diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml index 80ac0b61e6d1a6f469f7d5bb0f1d50ce9c56565b..d188007465cd27662ffadfb3ece0d8bf2e8e28be 100644 --- a/.github/workflows/quicktest-dev-pr.yml +++ b/.github/workflows/quicktest-dev-pr.yml @@ -50,4 +50,4 @@ jobs: - name: DockerRunQuicktest run: | - docker run --init --hostname finn_gha -v $(pwd):/workspace/finn -e FINN_BUILD_DIR=/tmp/finn_gha -e FINN_INST_NAME=finn_gha finn_gha quicktest.sh + docker run --init --hostname finn_gha -w $(pwd) -v $(pwd):$(pwd) -e FINN_BUILD_DIR=/tmp/finn_gha -e FINN_INST_NAME=finn_gha finn_gha quicktest.sh diff --git a/.gitignore b/.gitignore index 225fb5cfa3df45124797da425df14974308b90c2..126321cf4deccaa01ab0f2025460e53519d4c06f 100644 --- a/.gitignore +++ b/.gitignore @@ -96,3 +96,6 @@ MANIFEST # generated files as part of end2end notebooks /notebooks/end2end_example/**/*.onnx + +# downloaded dep repos +/deps/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 143514b36ba31cb2b292f3a1961187709798efbf..f5998e98d00f7ea2e89ae3f0fcddd5862454f876 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,11 +29,11 @@ exclude: '^docs/conf.py' default_language_version: - python: python3 + python: python3.8 repos: -- repo: git://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 hooks: - id: trailing-whitespace exclude: '\.dat$' @@ -50,21 +50,29 @@ repos: - id: mixed-line-ending args: ['--fix=no'] -- repo: git://github.com/PyCQA/isort - rev: 5.5.3 +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 hooks: - id: isort -- repo: git://github.com/psf/black - rev: stable +- repo: https://github.com/psf/black + rev: 22.3.0 hooks: - id: black language_version: python3 - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.3 + rev: 3.9.2 hooks: - id: flake8 # black-compatible flake-8 config args: ['--max-line-length=88', # black default '--extend-ignore=E203'] # E203 is not PEP8 compliant + +- repo: local + hooks: + - id: jupyter-nb-clear-output + name: jupyter-nb-clear-output + files: \.ipynb$ + language: system + entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace diff --git a/AUTHORS.rst b/AUTHORS.rst index 533ed62e1dbda2799f74805f2100769f9c4fecfc..1d42d35a3b269176fcab79d8239b84ac8442fa43 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -13,3 +13,12 @@ Contributors * Suranga Mahesh (@surangamh) * Peter Lehnhardt (@pete-lennart) * Neil Kim Nielsen (@neilkimn) +* Jon Ander Lezeta (@jalezeta) +* John Terry (@jterry-x) +* Alina Vasilciuc (@alinavalinav) +* Alessandro Pappalardo (@volcacius) +* Giuseppe Franco (@Giuseppe5) +* Syed Asad Alam (@asadalam) +* Javier Duarte (@jmduarte) +* Uma Maheshwari (@umav1511) +* José Rosa (@pinxau1000) diff --git a/README.md b/README.md index 10ac25cb8f9e23520830efa4f2f7a58a21370e29..4cc995fc8c991ccc851e95fd30897aeea8ca266a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ <img align="left" src="https://raw.githubusercontent.com/Xilinx/finn/github-pages/docs/img/finn-stack.png" alt="drawing" style="margin-right: 20px" width="250"/> -[](https://gitter.im/xilinx-finn/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) +[](https://github.com/Xilinx/finn/discussions) [](http://finn.readthedocs.io/) FINN is an experimental framework from Xilinx Research Labs to explore deep neural network @@ -24,9 +24,9 @@ Please see the [Getting Started](https://finn.readthedocs.io/en/latest/getting_s ## What's New in FINN? +* **2021-11-05:** v0.7 is released, introducing QONNX support, three new example networks and many other improvements. Read more on the [v0.7 release blog post](https://xilinx.github.io/finn//2021/11/05/finn-v07-is-released.html). * **2021-06-15:** v0.6 is released, with ResNet-50 on U250 and ZCU104 MobileNet-v1 in finn-examples showcasing new features plus a lot more. Read more on the [v0.6 release blog post](https://xilinx.github.io/finn//2021/06/15/finn-v06-is-released.html). * **2020-12-17:** v0.5b (beta) is released, with a new [examples repo](https://github.com/Xilinx/finn-examples) including MobileNet-v1. Read more on the <a href="https://xilinx.github.io/finn/2020/12/17/finn-v05b-beta-is-released.html">release blog post</a>. -* **2020-09-21:** v0.4b (beta) is released. Read more on the <a href="https://xilinx.github.io/finn/2020/09/21/finn-v04b-beta-is-released.html">release blog post</a>. ## Documentation @@ -34,9 +34,12 @@ You can view the documentation on [readthedocs](https://finn.readthedocs.io) or ## Community -We have a [gitter channel](https://gitter.im/xilinx-finn/community) where you can ask questions. You can use the GitHub issue tracker to report bugs, but please don't file issues to ask questions as this is better handled in the gitter channel. +We have [GitHub discussions](https://github.com/Xilinx/finn/discussions) where you can ask questions. You can use the GitHub issue tracker to report bugs, but please don't file issues to ask questions as this is better handled in GitHub discussions. + +We also heartily welcome contributions to the project, please check out the [contribution guidelines](CONTRIBUTING.md) and the [list of open issues](https://github.com/Xilinx/finn/issues). Don't hesitate to get in touch over [GitHub discussions](https://github.com/Xilinx/finn/discussions) to discuss your ideas. + +In the past, we also had a [Gitter channel](https://gitter.im/xilinx-finn/community). Please be aware that this is no longer maintained by us but can still be used to search for questions previous users had. -We also heartily welcome contributions to the project, please check out the [contribution guidelines](CONTRIBUTING.md) and the [list of open issues](https://github.com/Xilinx/finn/issues). Don't hesitate to get in touch over [Gitter](https://gitter.im/xilinx-finn/community) to discuss your ideas. ## Citation diff --git a/custom_hls/checksum.cpp b/custom_hls/checksum.cpp new file mode 100644 index 0000000000000000000000000000000000000000..071d9bfbe3a93bf822ec1f0d64605941a4248e85 --- /dev/null +++ b/custom_hls/checksum.cpp @@ -0,0 +1,36 @@ +/****************************************************************************** + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Validation top-level module for checksum component. + * @author Thomas B. Preußer <tpreusse@amd.com> + * + *******************************************************************************/ +#include "checksum.hpp" +CHECKSUM_TOP(WORDS_PER_FRAME, WORD_SIZE, ITEMS_PER_WORD) diff --git a/custom_hls/checksum.hpp b/custom_hls/checksum.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bf580f31a6228ffd446221ff5c7cd5f29e439837 --- /dev/null +++ b/custom_hls/checksum.hpp @@ -0,0 +1,131 @@ +/****************************************************************************** + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Checksum over stream-carried data frames. + * @author Thomas B. Preußer <tpreusse@amd.com> + * + *******************************************************************************/ +#include <hls_stream.h> +#include <ap_int.h> + + +/** + * Computes a checksum over a forwarded stream assumed to carry frames of + * N words further subdivided into K subwords. + * - Subword slicing can be customized typically by using a lambda. + * The provided DefaultSubwordSlicer assumes an `ap_(u)int`-like word + * type with a member `width` and a range-based slicing operator. It + * further assumes a little-endian arrangement of subwords within words + * for the canonical subword stream order. + * - Subwords wider than 23 bits are folded using bitwise XOR across + * slices of 23 bits starting from the LSB. + * - The folded subword values are weighted according to their position + * in the stream relative to the start of frame by a periodic weight + * sequence 1, 2, 3, ... + * - The weighted folded subword values are reduced to a checksum by an + * accumulation module 2^24. + * - A checksum is emitted for each completed frame. It is the concatenation + * of an 8-bit (modulo 256) frame counter and the 24-bit frame checksum. + */ +template<typename T, unsigned K> class DefaultSubwordSlicer { + static_assert(T::width%K == 0, "Word size must be subword multiple."); + static constexpr unsigned W = T::width/K; +public: + ap_uint<W> operator()(T const &x, unsigned const j) const { +#pragma HLS inline + return x((j+1)*W-1, j*W); + } +}; + +template< + unsigned N, // number of data words in a frame + unsigned K, // subword count per data word + typename T, // type of stream-carried data words + typename F = DefaultSubwordSlicer<T, K> // f(T(), j) to extract subwords +> +void checksum( + hls::stream<T> &src, + hls::stream<T> &dst, + ap_uint<32> &chk, + F&& f = F() +) { + ap_uint<2> coeff[3] = { 1, 2, 3 }; + ap_uint<24> s = 0; + + for(unsigned i = 0; i < N; i++) { +#pragma HLS pipeline II=1 style=flp + T const x = src.read(); + + // Pass-thru copy + dst.write(x); + + // Actual checksum update + for(unsigned j = 0; j < K; j++) { +#pragma HLS unroll + auto const v0 = f(x, j); + constexpr unsigned W = 1 + (decltype(v0)::width-1)/23; + ap_uint<K*23> v = v0; + ap_uint< 23> w = 0; + for(unsigned k = 0; k < W; k++) { + w ^= v(23*k+22, 23*k); + } + s += (coeff[j%3][1]? (w, ap_uint<1>(0)) : ap_uint<24>(0)) + (coeff[j%3][0]? w : ap_uint<23>(0)); + } + + // Re-align coefficients + for(unsigned j = 0; j < 3; j++) { +#pragma HLS unroll + ap_uint<3> const cc = coeff[j] + ap_uint<3>(K%3); + coeff[j] = cc(1, 0) + cc[2]; + } + } + + // Frame counter & output + static ap_uint<8> cnt = 0; +#pragma HLS reset variable=cnt + chk = (cnt++, s); +} + +#define CHECKSUM_TOP_(WORDS_PER_FRAME, WORD_SIZE, ITEMS_PER_WORD) \ + using T = ap_uint<WORD_SIZE>; \ + void checksum_ ## WORDS_PER_FRAME ## _ ## WORD_SIZE ## _ ## ITEMS_PER_WORD ( \ + hls::stream<T> &src, \ + hls::stream<T> &dst, \ + ap_uint<32> &chk \ + ) { \ + _Pragma("HLS interface port=src axis") \ + _Pragma("HLS interface port=dst axis") \ + _Pragma("HLS interface port=chk s_axilite") \ + _Pragma("HLS interface port=return ap_ctrl_none") \ + _Pragma("HLS dataflow") \ + checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(src, dst, chk); \ + } +#define CHECKSUM_TOP(WORDS_PER_FRAME, WORD_SIZE, ITEMS_PER_WORD) \ + CHECKSUM_TOP_(WORDS_PER_FRAME, WORD_SIZE, ITEMS_PER_WORD) diff --git a/custom_hls/checksum_tb.sv b/custom_hls/checksum_tb.sv new file mode 100644 index 0000000000000000000000000000000000000000..cec4e1b5bbc099326f784b9a14e325fa582a9f0e --- /dev/null +++ b/custom_hls/checksum_tb.sv @@ -0,0 +1,136 @@ +/****************************************************************************** + * Copyright (c) 2022, Xilinx, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Testbench for checksum component. + * @author Thomas B. Preußer <tpreusse@amd.com> + * + *******************************************************************************/ +module checksum_tb; + + //----------------------------------------------------------------------- + // Global Control + logic clk = 0; + always #5ns clk = !clk; + logic rst; + + //----------------------------------------------------------------------- + // DUT + localparam int unsigned N = 60; // words per frame + localparam int unsigned K = 4; // subwords per word + localparam int unsigned W = 8; // subword size + + logic [K-1:0][W-1:0] src_TDATA; + logic src_TVALID; + uwire src_TREADY; + + uwire [K-1:0][W-1:0] dst_TDATA; + uwire dst_TVALID; + logic dst_TREADY; + + uwire [31:0] chk; + uwire chk_vld; + + checksum_top dut ( + .ap_clk(clk), .ap_rst_n(!rst), + .src_TDATA, .src_TVALID, .src_TREADY, + .dst_TDATA, .dst_TVALID, .dst_TREADY, + .chk, .chk_ap_vld(chk_vld), + .ap_local_block(), .ap_local_deadlock() + ); + + //----------------------------------------------------------------------- + // Stimulus + logic [K-1:0][W-1:0] Bypass [$] = {}; + logic [31:0] Checksum[$] = {}; + initial begin + src_TDATA = 'x; + src_TVALID = 0; + + rst = 1; + repeat(9) @(posedge clk); + rst <= 0; + + for(int unsigned r = 0; r < 311; r++) begin + automatic logic [23:0] sum = 0; + src_TVALID <= 1; + for(int unsigned i = 0; i < N; i++) begin + for(int unsigned k = 0; k < K; k++) begin + automatic logic [W-1:0] v = $urandom()>>17; + src_TDATA[k] <= v; + sum += ((K*i+k)%3 + 1) * v; + end + @(posedge clk iff src_TREADY); + Bypass.push_back(src_TDATA); + end + src_TVALID <= 0; + $display("Expect: %02x:%06x", r[7:0], sum); + Checksum.push_back({r, sum}); + end + + repeat(8) @(posedge clk); + $finish; + end + + //----------------------------------------------------------------------- + // Output Validation + + // Drain and check pass-thru stream + assign dst_TREADY = 1; + always_ff @(posedge clk iff dst_TVALID) begin + assert(Bypass.size()) begin + automatic logic [K-1:0][W-1:0] exp = Bypass.pop_front(); + assert(dst_TDATA === exp) else begin + $error("Unexpected output %0x instead of %0x.", dst_TDATA, exp); + $stop; + end + end + else begin + $error("Spurious data output."); + $stop; + end + end + + // Validate checksum reports + always_ff @(posedge clk iff chk_vld) begin + $display("Check: %02x:%06x", chk[31:24], chk[23:0]); + assert(Checksum.size()) begin + automatic logic [31:0] exp = Checksum.pop_front(); + assert(chk === exp) else begin + $error("Unexpected checksum %0x instead of %0x.", chk, exp); + $stop; + end + end + else begin + $error("Spurious checksum output."); + $stop; + end + end + +endmodule : checksum_tb diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn index 4d03e2fbb5c4cce7dbda6a757aea8dce3e15e569..6036f2e744f53dfaf287b97d2789bb20bdd9d9f7 100644 --- a/docker/Dockerfile.finn +++ b/docker/Dockerfile.finn @@ -39,24 +39,29 @@ WORKDIR /workspace ENV TZ="Europe/Dublin" RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN apt-get update -RUN apt-get -y upgrade -RUN apt-get install -y build-essential -RUN apt-get install -y libglib2.0-0 -RUN apt-get install -y libsm6 -RUN apt-get install -y libxext6 -RUN apt-get install -y libxrender-dev -RUN apt-get install -y verilator -RUN apt-get install -y nano -RUN apt-get install -y zsh -RUN apt-get install -y rsync -RUN apt-get install -y git -RUN apt-get install -y sshpass -RUN apt-get install -y wget -RUN apt-get install -y sudo -RUN apt-get install -y unzip -RUN apt-get install -y zip +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + libc6-dev-i386 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev \ + verilator \ + nano \ + zsh \ + rsync \ + git \ + openssh-client \ + sshpass \ + wget \ + sudo \ + unzip \ + zip \ + locales \ + lsb-core RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config +RUN locale-gen "en_US.UTF-8" # install XRT RUN wget https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb -O /tmp/$XRT_DEB_VERSION.deb @@ -72,11 +77,12 @@ RUN rm requirements.txt RUN pip install pygments==2.4.1 RUN pip install ipykernel==5.5.5 RUN pip install jupyter==1.0.0 +RUN pip install markupsafe==2.0.1 RUN pip install matplotlib==3.3.1 --ignore-installed RUN pip install pytest-dependency==0.5.1 RUN pip install sphinx==3.1.2 RUN pip install sphinx_rtd_theme==0.5.0 -RUN pip install pytest-xdist==2.0.0 +RUN pip install pytest-xdist[setproctitle]==2.4.0 RUN pip install pytest-parallel==0.1.0 RUN pip install "netron>=5.0.0" RUN pip install pandas==1.1.5 @@ -84,70 +90,21 @@ RUN pip install scikit-learn==0.24.1 RUN pip install tqdm==4.31.1 RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading -# git-based Python repo dependencies -# these are installed in editable mode for easier co-development -ARG FINN_BASE_COMMIT="e8facdd719b55839cca46da2cc4f4a4a372afb41" -ARG QONNX_COMMIT="9f9eff95227cc57aadc6eafcbd44b7acda89f067" -ARG FINN_EXP_COMMIT="af6102769226b82b639f243dc36f065340991513" -ARG BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03" -ARG PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e" -ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -ARG HLSLIB_COMMIT="966d17d3fddd801927b2167627d23a9a15ed1461" -ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e" -ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" - -# finn-base -RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base -RUN git -C /workspace/finn-base checkout $FINN_BASE_COMMIT -RUN pip install -e /workspace/finn-base -# Install qonnx without dependencies, currently its only dependency is finn-base -RUN git clone https://github.com/fastmachinelearning/qonnx.git /workspace/qonnx -RUN git -C /workspace/qonnx checkout $QONNX_COMMIT -RUN pip install --no-dependencies -e /workspace/qonnx +# extra dependencies from other FINN deps +# installed in Docker image to make entrypoint script go faster # finn-experimental -RUN git clone https://github.com/Xilinx/finn-experimental.git /workspace/finn-experimental -RUN git -C /workspace/finn-experimental checkout $FINN_EXP_COMMIT -RUN pip install -e /workspace/finn-experimental +RUN pip install deap==1.3.1 +RUN pip install mip==1.13.0 +RUN pip install networkx==2.8 # brevitas -RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas -RUN git -C /workspace/brevitas checkout $BREVITAS_COMMIT -RUN pip install -e /workspace/brevitas +RUN pip install future-annotations==1.0.0 +RUN pip install dependencies==2.0.1 +RUN pip install tokenize-rt==4.2.1 # pyverilator -RUN git clone https://github.com/maltanar/pyverilator.git /workspace/pyverilator -RUN git -C /workspace/pyverilator checkout $PYVERILATOR_COMMIT -RUN pip install -e /workspace/pyverilator -# other git-based dependencies (non-Python) -# cnpy -RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy -RUN git -C /workspace/cnpy checkout $CNPY_COMMIT -# finn-hlslib -RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib -RUN git -C /workspace/finn-hlslib checkout $HLSLIB_COMMIT -# oh-my-xilinx -RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx -RUN git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT -# board files -RUN cd /tmp; \ - wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip; \ - wget -q https://dpoauwgwqsy2x.cloudfront.net/Download/pynq-z2.zip; \ - unzip -q pynq-z1.zip; \ - unzip -q pynq-z2.zip; \ - mkdir /workspace/board_files; \ - mv pynq-z1/ /workspace/board_files/; \ - mv pynq-z2/ /workspace/board_files/; \ - rm pynq-z1.zip; \ - rm pynq-z2.zip; \ - git clone https://github.com/Avnet/bdf.git /workspace/avnet-bdf; \ - git -C /workspace/avnet-bdf checkout $AVNET_BDF_COMMIT; \ - mv /workspace/avnet-bdf/* /workspace/board_files/; - +RUN pip install tclwrapper==0.0.1 # extra environment variables for FINN compiler ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache" -ENV PATH "${PATH}:/workspace/oh-my-xilinx" -ENV OHMYXILINX "/workspace/oh-my-xilinx" - -WORKDIR /workspace/finn COPY docker/finn_entrypoint.sh /usr/local/bin/ COPY docker/quicktest.sh /usr/local/bin/ diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index a2312d025b616acd285b94f1b56b83f0c35cc0ae..5cd2b962ce92afe7d7954ab962694d5c5e70e21e 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -28,11 +28,14 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -export FINN_ROOT=/workspace/finn export HOME=/tmp/home_dir export SHELL=/bin/bash +export LANG="en_US.UTF-8" +export LC_ALL="en_US.UTF-8" +export LANGUAGE="en_US:en" # colorful terminal output export PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] ' +export PATH=$PATH:$OHMYXILINX YELLOW='\033[0;33m' GREEN='\033[0;32m' @@ -51,12 +54,23 @@ recho () { echo -e "${RED}ERROR: $1${NC}" } -if [ -f "$FINN_ROOT/setup.py" ];then +# finn-base +pip install --user -e ${FINN_ROOT}/deps/finn-base +# Install qonnx without dependencies, currently its only dependency is finn-base +pip install --user --no-dependencies -e ${FINN_ROOT}/deps/qonnx +# finn-experimental +pip install --user -e ${FINN_ROOT}/deps/finn-experimental +# brevitas +pip install --user -e ${FINN_ROOT}/deps/brevitas +# pyverilator +pip install --user -e ${FINN_ROOT}/deps/pyverilator + +if [ -f "${FINN_ROOT}/setup.py" ];then # run pip install for finn - pip install --user -e $FINN_ROOT + pip install --user -e ${FINN_ROOT} else - recho "Unable to find FINN source code in /workspace/finn" - recho "Ensure you have passed -v <path-to-finn-repo>:/workspace/finn to the docker run command" + recho "Unable to find FINN source code in ${FINN_ROOT}" + recho "Ensure you have passed -v <path-to-finn-repo>:<path-to-finn-repo> to the docker run command" exit -1 fi @@ -90,5 +104,16 @@ else fi fi +if [ -f "$HLS_PATH/settings64.sh" ];then + # source Vitis HLS env.vars + source $HLS_PATH/settings64.sh + gecho "Found Vitis HLS at $HLS_PATH" +else + yecho "Unable to find $HLS_PATH/settings64.sh" + yecho "Functionality dependent on Vitis HLS will not be available." + yecho "Please note that FINN needs at least version 2020.2 for Vitis HLS support." + yecho "If you need Vitis HLS, ensure HLS_PATH is set correctly and mounted into the Docker container." +fi + # execute the provided command(s) as root exec "$@" diff --git a/docker/jenkins/Jenkinsfile b/docker/jenkins/Jenkinsfile index f3211941890d634b12142ed13c0f0cf49a9003d8..dab0833166234fc8ec9f123adf8c6157acdf5d5d 100644 --- a/docker/jenkins/Jenkinsfile +++ b/docker/jenkins/Jenkinsfile @@ -1,108 +1,46 @@ -pipeline { - agent any - parameters { - string(name: 'FINN_CI_BRANCH', defaultValue: '', description: 'FINN branch to build') - string(name: 'FINN_XILINX_PATH', defaultValue: '', description: 'Path to Xilinx tool installation') - string(name: 'FINN_XILINX_VERSION', defaultValue: '2020.1', description: 'Xilinx tool version') - string(name: 'PYNQ_BOARD', defaultValue: 'Pynq-Z1', description: 'PYNQ board type') - string(name: 'PYNQ_IP', defaultValue: '', description: 'PYNQ board IP address') - string(name: 'PYNQ_USERNAME', defaultValue: 'xilinx', description: 'PYNQ board username') - string(name: 'PYNQ_PASSWORD', defaultValue: 'xilinx', description: 'PYNQ board password') - string(name: 'PYNQ_TARGET_DIR', defaultValue: '/home/xilinx/finn', description: 'PYNQ board target deployment directory') - string(name: 'NUM_DEFAULT_WORKERS', defaultValue: '1', description: 'Number of cores for parallel transformations') - // main test: everything except rtlsim and end2end tests, parallel run with xdist, no parallel transformations to save on memory - string(name: 'DOCKER_CMD_MAIN', defaultValue: """python setup.py test --addopts "-k 'not (rtlsim or end2end)' --dist=loadfile -n auto" """, description: 'Main test command') - // rtlsim tests: parallel run with pytest-parallel, no parallel transformations to save on memory - string(name: 'DOCKER_CMD_RTLSIM', defaultValue: """python setup.py test --addopts "-k rtlsim --workers auto" """, description: 'rtlsim test command') - // end2end tests: no parallel testing, use NUM_DEFAULT_WORKERS for parallel transformations - string(name: 'DOCKER_CMD_END2END', defaultValue: """python setup.py test --addopts "-k end2end" """, description: 'end2end test command') - // allow specifying where to mount the cloned folder from, since Jenkins and FINN may be running in separate containers - string(name: 'WORKSPACE_MOUNT', defaultValue: '/var/jenkins_home/workspace/finn', description: 'Path to Jenkins workspace mount') +node { + def app + stage('Clone repository') { + /* Let's make sure we have the repository cloned to our workspace */ + checkout scm } - environment { - DOCKER_TAG='finn_ci:$BUILD_ID' - DOCKER_INST_NAME='finn_ci' - BUILD_PATH='/tmp/finn_ci' - VIVADO_PATH=${params.FINN_XILINX_PATH}/Vivado/${params.FINN_XILINX_VERSION} - VITIS_PATH=${params.FINN_XILINX_PATH}/Vitis/${params.FINN_XILINX_VERSION} - } - stages { - stage("Clone") { - steps { - git branch: "${params.FINN_CI_BRANCH}", url: 'https://github.com/Xilinx/finn.git' - } - } - stage('Build') { - steps { - sh """ - docker build -t $DOCKER_TAG -f docker/Dockerfile.finn_ci \ - --build-arg BUILD_PATH=$BUILD_PATH \ - . - """ + withEnv([ + "FINN_XILINX_PATH=/proj/xbuilds/SWIP/2022.1_0420_0327/installs/lin64", + "FINN_XILINX_VERSION=2022.1", + "FINN_DOCKER_TAG=xilinx/finn:jenkins", + "FINN_HOST_BUILD_DIR=/scratch/users/finn_ci", + "PLATFORM_REPO_PATHS=/opt/xilinx/dsa" + ]){ + parallel firstBranch: { + stage('Brevitas export') { + dir("${env.WORKSPACE}") { + sh("bash run-docker.sh python setup.py test --addopts -mbrevitas_export") + } } - } - stage('test-main') { - steps { - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - sh """ - docker run --init \ - --hostname $DOCKER_INST_NAME \ - -v ${params.WORKSPACE_MOUNT}:/workspace/finn \ - -v ${params.FINN_XILINX_PATH}:${params.FINN_XILINX_PATH}:ro \ - -e NUM_DEFAULT_WORKERS=1 \ - -e FINN_INST_NAME=$DOCKER_INST_NAME \ - -e VIVADO_PATH=$VIVADO_PATH \ - -e VITIS_PATH=$VITIS_PATH \ - -e PYNQ_BOARD=${params.PYNQ_BOARD} \ - -e PYNQ_IP=${params.PYNQ_IP} \ - -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ - -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ - -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ - $DOCKER_TAG ${params.DOCKER_CMD_MAIN} - """} + }, secondBranch: { + stage('Streamlining transformations') { + dir("${env.WORKSPACE}") { + sh("bash run-docker.sh python setup.py test --addopts -mstreamline") + } + } + }, thirdBranch: { + stage('Util functions') { + dir("${env.WORKSPACE}") { + sh("bash run-docker.sh python setup.py test --addopts -mutil") + } } - } - stage('test-rtlsim') { - steps { - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - sh """ - docker run --init \ - --hostname $DOCKER_INST_NAME \ - -v ${params.WORKSPACE_MOUNT}:/workspace/finn \ - -v $VIVADO_PATH:$VIVADO_PATH:ro \ - -e NUM_DEFAULT_WORKERS=1 \ - -e FINN_INST_NAME=$DOCKER_INST_NAME \ - -e VIVADO_PATH=$VIVADO_PATH \ - -e VITIS_PATH=$VITIS_PATH \ - -e PYNQ_BOARD=${params.PYNQ_BOARD} \ - -e PYNQ_IP=${params.PYNQ_IP} \ - -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ - -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ - -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ - $DOCKER_TAG ${params.DOCKER_CMD_RTLSIM} - """} + }, fourthBranch: { + stage('General transformations') { + dir("${env.WORKSPACE}") { + sh("bash run-docker.sh python setup.py test --addopts -mtransform") + } } - } - stage('test-end2end') { - steps { - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - sh """ - docker run --init \ - --hostname $DOCKER_INST_NAME \ - -v ${params.WORKSPACE_MOUNT}:/workspace/finn \ - -v $VIVADO_PATH:$VIVADO_PATH:ro \ - -e NUM_DEFAULT_WORKERS=${params.NUM_DEFAULT_WORKERS} \ - -e FINN_INST_NAME=$DOCKER_INST_NAME \ - -e VIVADO_PATH=$VIVADO_PATH \ - -e VITIS_PATH=$VITIS_PATH \ - -e PYNQ_BOARD=${params.PYNQ_BOARD} \ - -e PYNQ_IP=${params.PYNQ_IP} \ - -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ - -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ - -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ - $DOCKER_TAG ${params.DOCKER_CMD_END2END} - """ } + }, fifthBranch: { + stage('Fpgadataflow transformations and simulations') { + dir("${env.WORKSPACE}") { + sh("bash run-docker.sh python setup.py test --addopts -mfpgadataflow") + } } - } + } } } diff --git a/docker/quicktest.sh b/docker/quicktest.sh index b4ad37232fa69754a86e9064d7592d7474e8617e..f625f2b1ef722f386180a8409a9eb9e759a2f3b6 100755 --- a/docker/quicktest.sh +++ b/docker/quicktest.sh @@ -2,7 +2,7 @@ : ${PYTEST_PARALLEL=auto} -cd $FINN_ROOT +cd $FINN_ROOT/finn # check if command line argument is empty or not present if [ -z $1 ]; then echo "Running quicktest: not (vivado or slow or board) with pytest-xdist" diff --git a/docs/finn/command_line.rst b/docs/finn/command_line.rst index ccb891a0ab42eebdd85f10c14384aaa217e8ed8b..54ffca9430a57ed4513ce822afbe0f1642b77404 100644 --- a/docs/finn/command_line.rst +++ b/docs/finn/command_line.rst @@ -186,20 +186,23 @@ This is possible by using the `build_custom` entry as follows: outside the FINN repo folder for cleaner separation. Let's call this folder ``custom_build_dir``. -2. Create a ``custom_build_dir/build.py`` file that will perform the build when -executed. You should also put any ONNX model(s) or other Python modules you -may want to include in your build flow in this folder (so that they get mounted -into the Docker container while building). Besides the filename and data placement, +2. Create one or more Python files under this directory that perform the build(s) +you would like when executed, for instance ``custom_build_dir/build.py`` and +``custom_build_dir/build_quick.py``. +You should also put any ONNX model(s) or other +Python modules you may want to include in your build flow in this folder (so that they get +mounted into the Docker container while building). Besides the data placement, you have complete freedom on how to implement the build flow here, including calling the steps from the simple dataflow build mode above, making calls to FINN library functions, preprocessing and altering models, building several variants etc. -You can find a basic example of build.py under ``src/finn/qnn-data/build_dataflow/build.py``. +You can find a basic example of a build flow under ``src/finn/qnn-data/build_dataflow/build.py``. -You can launch the custom build flow using: +You can launch the desired custom build flow using: :: - ./run-docker.sh build_custom <path/to/custom_build_dir/> + ./run-docker.sh build_custom <path/to/custom_build_dir> <name-of-build-flow> This will mount the specified folder into the FINN Docker container and launch -your ``build.py``. +the build flow. If ``<name-of-build-flow>`` is not specified it will default to ``build`` +and thus execute ``build.py``. If it is specified, it will be ``<name-of-build-flow>.py``. diff --git a/docs/finn/developers.rst b/docs/finn/developers.rst index 508cd86a31b6284e072499987ae45864d3942e16..2e05761d1fc1b9a23abb29f7bc062cf99a8acf5c 100644 --- a/docs/finn/developers.rst +++ b/docs/finn/developers.rst @@ -63,40 +63,44 @@ Docker images If you want to add new dependencies (packages, repos) to FINN it's important to understand how we handle this in Docker. -There are currently two Docker images used in FINN: - -* The finn.dev image, used for deploying and developing the FINN compiler. Details described below. -* The finn.ci image, which is used for continuous integration testing. Almost identical to finn.dev image, key differences are no user setup and fewer packages installed (e.g. no Jupyter). The finn.dev image is built and launched as follows: -1. run-docker.sh launches the build of the Docker image with `docker build` +1. run-docker.sh launches fetch-repos.sh to checkout dependency git repos at correct commit hashes (unless ``FINN_SKIP_DEP_REPOS=1``) -2. Docker image is built from docker/Dockerfile.finn_dev using the following steps: +2. run-docker.sh launches the build of the Docker image with `docker build` (unless ``FINN_DOCKER_PREBUILT=1``). Docker image is built from docker/Dockerfile.finn using the following steps: * Base: PyTorch dev image * Set up apt dependencies: apt-get install a few packages for verilator and * Set up pip dependencies: Python packages FINN depends on are listed in requirements.txt, which is copied into the container and pip-installed. Some additional packages (such as Jupyter and Netron) are also installed. - * Do user setup: Switch to the same user running the container to avoid running as root. - * Clone dependency repos: These include Brevitas, finn-hlslib, finn-base, pyverilator and oh-my-xilinx. The correct commit version will be checked out by the entrypoint script. * Install XRT deps, if needed: For Vitis builds we need to install the extra dependencies for XRT. This is only triggered if the image is built with the INSTALL_XRT_DEPS=1 argument. 3. Docker image is ready, run-docker.sh can now launch a container from this image with `docker run`. It sets up certain environment variables and volume mounts: * Vivado/Vitis is mounted from the host into the container (on the same path). - * The finn root folder is mounted under /workspace/finn. This allows modifying the source code on the host and testing inside the container. + * The finn root folder is mounted into the container (on the same path). This allows modifying the source code on the host and testing inside the container. * The build folder is mounted under /tmp/finn_dev_username (can be overridden by defining FINN_HOST_BUILD_DIR). This will be used for generated files. Mounting on the host allows easy examination of the generated files, and keeping the generated files after the container exits. * Various environment variables are set up for use inside the container. See the run-docker.sh script for a complete list. 4. Entrypoint script (docker/finn_entrypoint.sh) upon launching container performs the following: - * Update and checkout the dependency repos at specified commits. + * Do `pip install` on the dependency git repos at specified commits. * Source Vivado settings64.sh from specified path to make vivado and vivado_hls available. * Download PYNQ board files into the finn root directory, unless they already exist. * Source Vitits settings64.sh if Vitis is mounted. 5. Depending on the arguments to run-docker.sh a different application is launched. run-docker.sh notebook launches a Jupyter server for the tutorials, whereas run-docker.sh build_custom and run-docker.sh build_dataflow trigger a dataflow build (see documentation). Running without arguments yields an interactive shell. See run-docker.sh for other options. +(Re-)launching builds outside of Docker +====================================== + +It is possible to launch builds for FINN-generated HLS IP and stitched-IP folders outside of the Docker container. +This may be necessary for visual inspection of the generated designs inside the Vivado GUI, if you run into licensing +issues during synthesis, or other environmental problems. +Simply set the ``FINN_ROOT`` environment variable to the location where the FINN compiler is installed on the host +computer, and you should be able to launch the various .tcl scripts or .xpr project files without using the FINN +Docker container as well. + Linting ======= diff --git a/docs/finn/faq.rst b/docs/finn/faq.rst index 87e36e0722e4db6b2efd5de5df343b7bdf68a719..3ddd13664432ceefdd0379004d856abd096f93ff 100644 --- a/docs/finn/faq.rst +++ b/docs/finn/faq.rst @@ -4,68 +4,109 @@ Frequently Asked Questions *********************** -.. note:: **This page is under construction.** +Can't find the answer to your question here? Check `FINN GitHub Discussions <https://github.com/Xilinx/finn/discussions>`_. -Can I install FINN out of the Docker container? -=============================================== -We do not support out of the Docker implementations at the moment. This is due -to the high complexity of the FINN project dependencies. +Can I install FINN out of the Docker container? + We do not support out of the Docker implementations at the moment. This is due + to the high complexity of the FINN project dependencies. Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator? -============================================================================================= + The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer + types and quantization annotations. Networks must be first quantized using Brevitas and exported + to FINN-ONNX to be converted to FPGA accelerators. -The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer -types and quantization annotations. Networks must be first quantized using Brevitas and exported -to FINN-ONNX to be converted to FPGA accelerators. +Can I install FINN out of the Docker container? + We do not support out of the Docker implementations at the moment. This is due + to the high complexity of the FINN project dependencies. -Can I deploy custom NNs with arbitrary precisions and layers using FINN? -========================================================================= +Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator? + The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer + types and quantization annotations. Networks must be first quantized using Brevitas and exported + to FINN-ONNX to be converted to FPGA accelerators. -Yes, though the effort required and quality of results will vary. -Although we do support arbitrary -precision, the way we create the hardware isn't typically practical for more than -4 bits, or very large networks for a single FPGA. -In terms of layers, only a subset of quantized layers covered by the various FINN examples -are currently supported. -It is possible to add support for new layers, though we don't have tutorials for this in place -just yet. -Does FINN only work with the example networks? -============================================== +Can I deploy custom NNs with arbitrary precisions and layers using FINN? + Yes, though the effort required and quality of results will vary. + Although we do support arbitrary + precision, the way we create the hardware isn't typically practical for more than + 4 bits, or very large networks for a single FPGA. + In terms of layers, only a subset of quantized layers covered by the various FINN examples + are currently supported. + It is possible to add support for new layers, though we don't have tutorials for this in place + just yet. -FINN isn't restricted to the example networks; -rather, it's restricted to certain patterns (e.g. certain layer types and their combinations). -The current best practice for custom networks is to take a working network and gradually modify it. +Does FINN only work with the example networks? + FINN isn't restricted to the example networks; + rather, it's restricted to certain patterns (e.g. certain layer types and their combinations). + The current best practice for custom networks is to take a working network and gradually modify it. What is the expected background for using FINN? -=============================================== - -Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks -is expected. -Our goal is to make the tool in a shape and form so that no hardware/FPGA background -should be necessary, although having some knowledge would give better results. + Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks + is expected. + Our goal is to make the tool in a shape and form so that no hardware/FPGA background + should be necessary, although having some knowledge would give better results. What operating systems are supported by FINN? -============================================= - -FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long -as you install Docker (``docker-ce``) on your machine . + FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long + as you install Docker (``docker-ce``) on your machine. I am getting DocNav and Model_Composer errors when launching the Docker image. -============================================================================== - -We do not mount those particular directories into the Docker container because they are not -used. The errors are Vivado related but you can safely ignore them. + We do not mount those particular directories into the Docker container because they are not + used. The errors are Vivado related but you can safely ignore them. What board do you recommend to start working with FINN? -======================================================= - -Our preferred target platforms are those supported by `PYNQ <http://www.pynq.io/board.html>`_. -For those boards we can offer end-to-end (DNN-to-bitstream) deployment, -see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples. -However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO) -in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board, -though you will have to do the system integration manually. + Our preferred target platforms are those supported by `PYNQ <http://www.pynq.io/board.html>`_. + For those boards we can offer end-to-end (DNN-to-bitstream) deployment, + see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples. + However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO) + in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board, + though you will have to do the system integration manually. + +FINN-generated builds break after I restart my computer, because ``/tmp`` gets wiped. + See https://github.com/Xilinx/finn/discussions/404 + +How can I target an arbitrary Xilinx FPGA without PYNQ support? + See https://github.com/Xilinx/finn/discussions/387 + +Why does FINN-generated architectures need FIFOs between layers? + See https://github.com/Xilinx/finn/discussions/383 + +How do I tell FINN to utilize DSPs instead of LUTs for MAC operations in particular layers? + This is done with the ``resType="dsp"`` attribute on ``MatrixVectorActivation`` and ``Vector_Vector_Activate`` instances. + When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layers’ + folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`). + This is a good idea for layers with more weight/input act bits and high PE*SIMD. + See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference. + + +How do I tell FINN to utilize a particular type of memory resource in particular layers? + This is done with the ``ram_style`` attribute. Check the particular ``HLSCustomOp`` attribute definition to see + which modes are supported (`example for MatrixVectorActivation <https://github.com/Xilinx/finn/blob/dev/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py#L101>`_). + When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layers’ + folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`). + See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference. + +Which data layout do FINN-generated accelerators use? Big-endian? Little-endian? + The data layout used by FINN does not correspond to system-level big or little endian due to difficulties in defining what + the “word size†is and bit packing for smaller datatypes. FINN’s “word size†is dependent on the parallelization of the + first/last layers. For instance, if the first HLS layer is using SIMD=3 this means the “innermost dimension†in the + data packing functions will be of size 3. + When you use the verification infrastructure or the generated PYNQ Python drivers that FINN provides, the tool normally + takes care of any required data layout conversion on standard numpy arrays before presenting the data to the accelerator, + and vice versa on the output side. Doing this data packing and layout conversion manually can be messy at the moment. + If you need to do this manually, first examine how the `FINN PYNQ Python drivers <https://github.com/Xilinx/finn-examples/blob/main/finn_examples/driver.py#L379>`_ do this – notice how the input data is + first reshaped to create the “folded input shape†that reflects the word size of the first layer based on how much it + was parallelized, then data packing is applied to obtain a raw byte array (with some reversals going on) that can be + fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn-base/blob/dev/src/finn/util/data_packing.py#L289>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation: + +Why does FIFO sizing take so long for my network? Is something wrong? + The automatic FIFO sizing in FINN can take quite long. It unfortunately doesn’t really parallelize on multiple cores since + it’s based on running an rtl simulation with lots of inputs and very large FIFOs, then observing the max occupancy/count + in each FIFO. + +What's a good starting point for the folding configuration if I want to make manual changes? + First, enable automatic folding options in ``build_dataflow`` such ``target_fps``. This should find a decent set of + folding factors and save them to ``output_folder/auto_folding_config.json`` which you can use as a basis for creating the desired config. diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst index 9e1df209ba25287cd7740be34d21ed3257500053..3e730924c032765ebf8f58afaa9ae2e694fb3d11 100644 --- a/docs/finn/getting_started.rst +++ b/docs/finn/getting_started.rst @@ -12,7 +12,8 @@ Quickstart 3. Clone the FINN compiler from the repo: ``git clone https://github.com/Xilinx/finn/`` and go into the directory where it is cloned 4. Execute ``./run-docker.sh quicktest`` to verify your installation. 5. Optionally, follow the instructions on :ref:`PYNQ board first-time setup` or :ref:`Alveo first-time setup` for board setup. -6. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler. +6. Optionally, set up a `Vivado/Vitis license`_. +7. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler. How do I use FINN? @@ -111,6 +112,8 @@ These are summarized below: * (optional) ``FINN_DOCKER_TAG`` (autogenerated) specifies the Docker image tag to use. * (optional) ``FINN_DOCKER_RUN_AS_ROOT`` (default 0) if set to 1 then run Docker container as root, default is the current user. * (optional) ``FINN_DOCKER_GPU`` (autodetected) if not 0 then expose all Nvidia GPUs or those selected by ``NVIDIA_VISIBLE_DEVICES`` to Docker container for accelerated DNN training. Requires `Nvidia Container Toolkit <https://github.com/NVIDIA/nvidia-docker>`_ +* (optional) ``FINN_DOCKER_EXTRA`` (default "") pass extra arguments to the ``docker run`` command when executing ``./run-docker.sh`` +* (optional) ``FINN_SKIP_DEP_REPOS`` (default "0") skips the download of FINN dependency repos (uses the ones already downloaded under deps/. * (optional) ``NVIDIA_VISIBLE_DEVICES`` (default "") specifies specific Nvidia GPUs to use in Docker container. Possible values are a comma-separated list of GPU UUID(s) or index(es) e.g. ``0,1,2``, ``all``, ``none``, or void/empty/unset. * (optional) ``DOCKER_BUILDKIT`` (default "1") enables `Docker BuildKit <https://docs.docker.com/develop/develop-images/build_enhancements/>`_ for faster Docker image rebuilding (recommended). @@ -119,7 +122,7 @@ General FINN Docker tips * Several folders including the root directory of the FINN compiler and the ``FINN_HOST_BUILD_DIR`` will be mounted into the Docker container and can be used to exchange files. * Do not use ``sudo`` to launch the FINN Docker. Instead, setup Docker to run `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_. * If you want a new terminal on an already-running container, you can do this with `docker exec -it <name_of_container> bash`. -* The container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the /workspace/finn folder (which is mounted from the host computer) or otherwise backed up. +* The container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the finn compiler folder (which is mounted from the host computer) or otherwise backed up. Using a prebuilt image ********************** @@ -152,7 +155,7 @@ Start on the target side: Continue on the host side (replace the ``<PYNQ_IP>`` and ``<PYNQ_USERNAME>`` with the IP address and username of your board from the first step): 1. Launch the Docker container from where you cloned finn with ``./run-docker.sh`` -2. Go into the `ssh_keys` directory (e.g. ``cd /workspace/finn/ssh_keys``) +2. Go into the `ssh_keys` directory (e.g. ``cd /path/to/finn/ssh_keys``) 3. Run ``ssh-keygen`` to create a key pair e.g. ``id_rsa`` private and ``id_rsa.pub`` public key 4. Run ``ssh-copy-id -i id_rsa.pub <PYNQ_USERNAME>@<PYNQ_IP>`` to install the keys on the remote system 5. Test that you can ``ssh <PYNQ_USERNAME>@<PYNQ_IP>`` without having to enter the password. Pass the ``-v`` flag to the ssh command if it doesn't work to help you debug. @@ -181,15 +184,26 @@ On the host side: 5. `Set up public key authentication <https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server>`_. Copy your private key to the ``finn/ssh_keys`` folder on the host to get password-less deployment and remote execution. 6. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time. +Vivado/Vitis license +********************* +If you are targeting Xilinx FPGA parts that needs specific licenses (non-WebPack) you can make these available to the +FINN Docker container by passing extra arguments. To do this, you can use the ``FINN_DOCKER_EXTRA`` environment variable as follows: +:: + + export FINN_DOCKER_EXTRA=" -v /path/to/licenses:/path/to/licenses -e XILINXD_LICENSE_FILE=/path/to/licenses " + +The above example mounts ``/path/to/licenses`` from the host into the same path on the Docker container, and sets the +value of the ``XILINXD_LICENSE_FILE`` environment variable. System Requirements ==================== * Ubuntu 18.04 with ``bash`` installed * Docker `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_ -* A working Vivado 2019.1 or 2020.1 installation -* A ``VIVADO_PATH`` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located) +* A working Vivado 2020.1 installation +* ``FINN_XILINX_PATH`` and ``FINN_XILINX_VERSION`` environment variables correctly set, see `Quickstart`_ +* *(optional)* `Vivado/Vitis license`_ if targeting non-WebPack FPGA parts. * *(optional)* A PYNQ board with a network connection, see `PYNQ board first-time setup`_ * *(optional)* An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ ) diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst index 9305f7840216f6d076a11337ddb3cfa588f1a062..356b5613fe35d3020c3e024e4d77e5d4f2e3d469 100644 --- a/docs/finn/internals.rst +++ b/docs/finn/internals.rst @@ -146,10 +146,10 @@ A transformation passes changes (transforms) the given model, it gets the model .. _mem_mode: -StreamingFCLayer *mem_mode* +MatrixVectorActivation *mem_mode* =========================== -FINN supports two types of the so-called *mem_mode* attrıbute for the node StreamingFCLayer. This mode controls how the weight values are accessed during the execution. That means the mode setting has direct influence on the resulting circuit. Currently two settings for the *mem_mode* are supported in FINN: +FINN supports two types of the so-called *mem_mode* attrıbute for the node MatrixVectorActivation. This mode controls how the weight values are accessed during the execution. That means the mode setting has direct influence on the resulting circuit. Currently two settings for the *mem_mode* are supported in FINN: * "const" @@ -163,7 +163,7 @@ The following picture shows the idea behind the two modes. Const mode ---------- -In *const* mode the weights are "baked in" into the Matrix-Vector-Activate-Unit (MVAU), which means they are part of the HLS code. During the IP block generation the weight values are integrated as *params.h* file in the HLS code and synthesized together with it. For the *const* mode IP block generation the `StreamingFCLayer_Batch function <https://github.com/Xilinx/finn-hlslib/blob/07a8353f6cdfd8bcdd81e309a5581044c2a93d3b/fclayer.h#L94>`_ from the finn-hls library is used, which implements a standard MVAU. The resulting IP block has an input and an output stream, as shown in the above picture on the left. FIFOs in the form of verilog components are connected to these. +In *const* mode the weights are "baked in" into the Matrix-Vector-Activate-Unit (MVAU), which means they are part of the HLS code. During the IP block generation the weight values are integrated as *params.h* file in the HLS code and synthesized together with it. For the *const* mode IP block generation the `Matrix_Vector_Activate_Batch function <https://github.com/Xilinx/finn-hlslib/blob/19fa1197c09bca24a0f77a7fa04b8d7cb5cc1c1d/mvau.hpp#L93>`_ from the finn-hls library is used, which implements a standard MVAU. The resulting IP block has an input and an output stream, as shown in the above picture on the left. FIFOs in the form of verilog components are connected to these. Advantages: @@ -185,7 +185,7 @@ In *decoupled* mode a different variant of the MVAU with three ports is used. Be Advantages: -* better control over the used memory primivites used (see the ram_style attribute in StreamingFCLayer) +* better control over the used memory primivites used (see the ram_style attribute in MatrixVectorActivation) * potentially faster HLS synthesis time since weight array shape is no longer part of HLS synthesis diff --git a/docs/finn/nw_prep.rst b/docs/finn/nw_prep.rst index f5c64e76a4412e1b74ba321d5be1f3e29be1063e..9a10895dd8e5e74d0f047bea30d4fbddd4215af1 100644 --- a/docs/finn/nw_prep.rst +++ b/docs/finn/nw_prep.rst @@ -35,7 +35,7 @@ After this transformation the ONNX model is streamlined and contains now custom Convert to HLS Layers ===================== -Pairs of binary XNORPopcountMatMul layers are converted to StreamingFCLayers and following Multithreshold layers are absorbed into the Matrix-Vector-Activate-Unit (MVAU). The result is a model consisting of a mixture of HLS and non-HLS layers. For more details, see :py:mod:`finn.transformation.fpgadataflow.convert_to_hls_layers`. The MVAU can be implemented in two different modes, *const* and *decoupled*, see chapter :ref:`mem_mode`. +Pairs of binary XNORPopcountMatMul layers are converted to MatrixVectorActivation layers and following Multithreshold layers are absorbed into the Matrix-Vector-Activate-Unit (MVAU). The result is a model consisting of a mixture of HLS and non-HLS layers. For more details, see :py:mod:`finn.transformation.fpgadataflow.convert_to_hls_layers`. The MVAU can be implemented in two different modes, *const* and *decoupled*, see chapter :ref:`mem_mode`. Dataflow Partitioning ===================== diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst index 34a6285f227690c87c568855e7ca70ddb9b2764c..7de038248d418e1964effd7678bc1cad4cb48c14 100644 --- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst +++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst @@ -127,10 +127,10 @@ finn.custom\_op.fpgadataflow.streamingdatawidthconverter\_batch :undoc-members: :show-inheritance: -finn.custom\_op.fpgadataflow.streamingfclayer\_batch +finn.custom\_op.fpgadataflow.matrixvectoractivation ----------------------------------------------------------- -.. automodule:: finn.custom_op.fpgadataflow.streamingfclayer_batch +.. automodule:: finn.custom_op.fpgadataflow.matrixvectoractivation :members: :undoc-members: :show-inheritance: @@ -184,10 +184,10 @@ finn.custom\_op.fpgadataflow.upsampler :undoc-members: :show-inheritance: -finn.custom\_op.fpgadataflow.vector\_vector\_activate\_batch +finn.custom\_op.fpgadataflow.vectorvectoractivation ----------------------------------------------- -.. automodule:: finn.custom_op.fpgadataflow.vector_vector_activate_batch +.. automodule:: finn.custom_op.fpgadataflow.vectorvectoractivation :members: :undoc-members: :show-inheritance: diff --git a/fetch-repos.sh b/fetch-repos.sh new file mode 100755 index 0000000000000000000000000000000000000000..f8c136a32340d427c3f742261847079ed7b645a5 --- /dev/null +++ b/fetch-repos.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# Copyright (c) 2020-2022, Advanced Micro Devices +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +FINN_BASE_COMMIT="fde240556165bbbce27bb7c0c894839877186d52" +QONNX_COMMIT="9f9eff95227cc57aadc6eafcbd44b7acda89f067" +FINN_EXP_COMMIT="9cbd2787b5160e2b44e0e8164a0df1457dbd5366" +BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03" +PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e" +CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" +HLSLIB_COMMIT="5db5c8d480ae82bbbd05dd216b85272b6c6af091" +OMX_COMMIT="a97f0bf145a2f7e57ca416ea76c9e45df4e9aa37" +AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" +EXP_BOARD_FILES_MD5="ac1811ae93b03f5f09a505283ff989a3" + +FINN_BASE_URL="https://github.com/Xilinx/finn-base.git" +QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" +FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" +BREVITAS_URL="https://github.com/Xilinx/brevitas.git" +PYVERILATOR_URL="https://github.com/maltanar/pyverilator.git" +CNPY_URL="https://github.com/rogersce/cnpy.git" +HLSLIB_URL="https://github.com/Xilinx/finn-hlslib.git" +OMX_URL="https://github.com/maltanar/oh-my-xilinx.git" +AVNET_BDF_URL="https://github.com/Avnet/bdf.git" + +FINN_BASE_DIR="finn-base" +QONNX_DIR="qonnx" +FINN_EXP_DIR="finn-experimental" +BREVITAS_DIR="brevitas" +PYVERILATOR_DIR="pyverilator" +CNPY_DIR="cnpy" +HLSLIB_DIR="finn-hlslib" +OMX_DIR="oh-my-xilinx" +AVNET_BDF_DIR="avnet-bdf" + +# absolute path to this script, e.g. /home/user/bin/foo.sh +SCRIPT=$(readlink -f "$0") +# absolute path this script is in, thus /home/user/bin +SCRIPTPATH=$(dirname "$SCRIPT") + +fetch_repo() { + # URL for git repo to be cloned + REPO_URL=$1 + # commit hash for repo + REPO_COMMIT=$2 + # directory to clone to under deps/ + REPO_DIR=$3 + # absolute path for the repo local copy + CLONE_TO=$SCRIPTPATH/deps/$REPO_DIR + + # clone repo if dir not found + if [ ! -d "$CLONE_TO" ]; then + git clone $REPO_URL $CLONE_TO + fi + # verify and try to pull repo if not at correct commit + CURRENT_COMMIT=$(git -C $CLONE_TO rev-parse HEAD) + if [ $CURRENT_COMMIT != $REPO_COMMIT ]; then + git -C $CLONE_TO pull + # checkout the expected commit + git -C $CLONE_TO checkout $REPO_COMMIT + fi + # verify one last time + CURRENT_COMMIT=$(git -C $CLONE_TO rev-parse HEAD) + if [ $CURRENT_COMMIT == $REPO_COMMIT ]; then + echo "Successfully checked out $REPO_DIR at commit $CURRENT_COMMIT" + else + echo "Could not check out $REPO_DIR. Check your internet connection and try again." + fi +} + +fetch_board_files() { + echo "Downloading and extracting board files..." + mkdir -p "$SCRIPTPATH/deps/board_files" + OLD_PWD=$(pwd) + cd "$SCRIPTPATH/deps/board_files" + wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip + wget -q https://dpoauwgwqsy2x.cloudfront.net/Download/pynq-z2.zip + unzip -q pynq-z1.zip + unzip -q pynq-z2.zip + cp -r $SCRIPTPATH/deps/avnet-bdf/* $SCRIPTPATH/deps/board_files/ + cd $OLD_PWD +} + +fetch_repo $FINN_BASE_URL $FINN_BASE_COMMIT $FINN_BASE_DIR +fetch_repo $QONNX_URL $QONNX_COMMIT $QONNX_DIR +fetch_repo $FINN_EXP_URL $FINN_EXP_COMMIT $FINN_EXP_DIR +fetch_repo $BREVITAS_URL $BREVITAS_COMMIT $BREVITAS_DIR +fetch_repo $PYVERILATOR_URL $PYVERILATOR_COMMIT $PYVERILATOR_DIR +fetch_repo $CNPY_URL $CNPY_COMMIT $CNPY_DIR +fetch_repo $HLSLIB_URL $HLSLIB_COMMIT $HLSLIB_DIR +fetch_repo $OMX_URL $OMX_COMMIT $OMX_DIR +fetch_repo $AVNET_BDF_URL $AVNET_BDF_COMMIT $AVNET_BDF_DIR + +# download extra Pynq board files and extract if needed +if [ ! -d "$SCRIPTPATH/deps/board_files" ]; then + fetch_board_files +else + cd $SCRIPTPATH + BOARD_FILES_MD5=$(find deps/board_files/ -type f -exec md5sum {} \; | sort -k 2 | md5sum | cut -d' ' -f 1) + if [ "$BOARD_FILES_MD5" = "$EXP_BOARD_FILES_MD5" ]; then + echo "Verified board files folder content md5: $BOARD_FILES_MD5" + else + echo "Board files folder content mismatch, removing and re-downloading" + rm -rf deps/board_files/ + fetch_board_files + fi +fi diff --git a/finn-rtllib/axi_info/component.xml b/finn-rtllib/axi_info/component.xml new file mode 100644 index 0000000000000000000000000000000000000000..d22637534ff1fac1e3659c5dca6620d2f044ab87 --- /dev/null +++ b/finn-rtllib/axi_info/component.xml @@ -0,0 +1,708 @@ +<?xml version="1.0" encoding="UTF-8"?> +<spirit:component xmlns:xilinx="http://www.xilinx.com" xmlns:spirit="http://www.spiritconsortium.org/XMLSchema/SPIRIT/1685-2009" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <spirit:vendor>AMD</spirit:vendor> + <spirit:library>user</spirit:library> + <spirit:name>axi_info_top</spirit:name> + <spirit:version>1.0</spirit:version> + <spirit:busInterfaces> + <spirit:busInterface> + <spirit:name>s_axi</spirit:name> + <spirit:busType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm" spirit:version="1.0"/> + <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="interface" spirit:name="aximm_rtl" spirit:version="1.0"/> + <spirit:slave> + <spirit:memoryMapRef spirit:memoryMapRef="s_axi"/> + </spirit:slave> + <spirit:portMaps> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>AWADDR</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_AWADDR</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>AWVALID</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_AWVALID</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>AWREADY</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_AWREADY</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>WDATA</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_WDATA</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>WSTRB</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_WSTRB</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>WVALID</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_WVALID</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>WREADY</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_WREADY</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>BRESP</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_BRESP</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>BVALID</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_BVALID</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>BREADY</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_BREADY</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>ARADDR</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_ARADDR</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>ARVALID</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_ARVALID</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>ARREADY</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_ARREADY</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>RDATA</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_RDATA</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>RRESP</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_RRESP</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>RVALID</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_RVALID</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>RREADY</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>s_axi_RREADY</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + </spirit:portMaps> + </spirit:busInterface> + <spirit:busInterface> + <spirit:name>ap_rst_n</spirit:name> + <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset" spirit:version="1.0"/> + <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="reset_rtl" spirit:version="1.0"/> + <spirit:slave/> + <spirit:portMaps> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>RST</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>ap_rst_n</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + </spirit:portMaps> + <spirit:parameters> + <spirit:parameter> + <spirit:name>POLARITY</spirit:name> + <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_RST_N.POLARITY" spirit:choiceRef="choice_list_9d8b0d81">ACTIVE_LOW</spirit:value> + </spirit:parameter> + </spirit:parameters> + </spirit:busInterface> + <spirit:busInterface> + <spirit:name>ap_clk</spirit:name> + <spirit:busType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock" spirit:version="1.0"/> + <spirit:abstractionType spirit:vendor="xilinx.com" spirit:library="signal" spirit:name="clock_rtl" spirit:version="1.0"/> + <spirit:slave/> + <spirit:portMaps> + <spirit:portMap> + <spirit:logicalPort> + <spirit:name>CLK</spirit:name> + </spirit:logicalPort> + <spirit:physicalPort> + <spirit:name>ap_clk</spirit:name> + </spirit:physicalPort> + </spirit:portMap> + </spirit:portMaps> + <spirit:parameters> + <spirit:parameter> + <spirit:name>ASSOCIATED_RESET</spirit:name> + <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_RESET">ap_rst_n</spirit:value> + </spirit:parameter> + <spirit:parameter> + <spirit:name>ASSOCIATED_BUSIF</spirit:name> + <spirit:value spirit:id="BUSIFPARAM_VALUE.AP_CLK.ASSOCIATED_BUSIF">s_axi</spirit:value> + </spirit:parameter> + </spirit:parameters> + </spirit:busInterface> + </spirit:busInterfaces> + <spirit:memoryMaps> + <spirit:memoryMap> + <spirit:name>s_axi</spirit:name> + <spirit:displayName>s_axi</spirit:displayName> + <spirit:addressBlock> + <spirit:name>reg0</spirit:name> + <spirit:displayName>reg0</spirit:displayName> + <spirit:baseAddress spirit:format="bitString" spirit:bitStringLength="1">0x0</spirit:baseAddress> + <spirit:range spirit:format="bitString" spirit:bitStringLength="13" spirit:minimum="4096" spirit:rangeType="long">0x1000</spirit:range> + <spirit:width spirit:format="long">32</spirit:width> + <spirit:usage>register</spirit:usage> + </spirit:addressBlock> + </spirit:memoryMap> + </spirit:memoryMaps> + <spirit:model> + <spirit:views> + <spirit:view> + <spirit:name>xilinx_anylanguagesynthesis</spirit:name> + <spirit:displayName>Synthesis</spirit:displayName> + <spirit:envIdentifier>:vivado.xilinx.com:synthesis</spirit:envIdentifier> + <spirit:language>SystemVerilog</spirit:language> + <spirit:modelName>axi_info_top</spirit:modelName> + <spirit:fileSetRef> + <spirit:localName>xilinx_anylanguagesynthesis_view_fileset</spirit:localName> + </spirit:fileSetRef> + <spirit:parameters> + <spirit:parameter> + <spirit:name>viewChecksum</spirit:name> + <spirit:value>7d682dfc</spirit:value> + </spirit:parameter> + </spirit:parameters> + </spirit:view> + <spirit:view> + <spirit:name>xilinx_anylanguagebehavioralsimulation</spirit:name> + <spirit:displayName>Simulation</spirit:displayName> + <spirit:envIdentifier>:vivado.xilinx.com:simulation</spirit:envIdentifier> + <spirit:language>SystemVerilog</spirit:language> + <spirit:modelName>axi_info_top</spirit:modelName> + <spirit:fileSetRef> + <spirit:localName>xilinx_anylanguagebehavioralsimulation_view_fileset</spirit:localName> + </spirit:fileSetRef> + <spirit:parameters> + <spirit:parameter> + <spirit:name>viewChecksum</spirit:name> + <spirit:value>7d682dfc</spirit:value> + </spirit:parameter> + </spirit:parameters> + </spirit:view> + <spirit:view> + <spirit:name>xilinx_xpgui</spirit:name> + <spirit:displayName>UI Layout</spirit:displayName> + <spirit:envIdentifier>:vivado.xilinx.com:xgui.ui</spirit:envIdentifier> + <spirit:fileSetRef> + <spirit:localName>xilinx_xpgui_view_fileset</spirit:localName> + </spirit:fileSetRef> + <spirit:parameters> + <spirit:parameter> + <spirit:name>viewChecksum</spirit:name> + <spirit:value>e11f9727</spirit:value> + </spirit:parameter> + </spirit:parameters> + </spirit:view> + </spirit:views> + <spirit:ports> + <spirit:port> + <spirit:name>ap_clk</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>ap_rst_n</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_AWVALID</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_AWREADY</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_AWADDR</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:vector> + <spirit:left spirit:format="long">4</spirit:left> + <spirit:right spirit:format="long">0</spirit:right> + </spirit:vector> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_WVALID</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_WREADY</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_WDATA</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:vector> + <spirit:left spirit:format="long">31</spirit:left> + <spirit:right spirit:format="long">0</spirit:right> + </spirit:vector> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_WSTRB</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:vector> + <spirit:left spirit:format="long">3</spirit:left> + <spirit:right spirit:format="long">0</spirit:right> + </spirit:vector> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">1</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_BVALID</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_BREADY</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_BRESP</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:vector> + <spirit:left spirit:format="long">1</spirit:left> + <spirit:right spirit:format="long">0</spirit:right> + </spirit:vector> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_ARVALID</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_ARREADY</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_ARADDR</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:vector> + <spirit:left spirit:format="long">4</spirit:left> + <spirit:right spirit:format="long">0</spirit:right> + </spirit:vector> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_RVALID</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_RREADY</spirit:name> + <spirit:wire> + <spirit:direction>in</spirit:direction> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + <spirit:driver> + <spirit:defaultValue spirit:format="long">0</spirit:defaultValue> + </spirit:driver> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_RDATA</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:vector> + <spirit:left spirit:format="long">31</spirit:left> + <spirit:right spirit:format="long">0</spirit:right> + </spirit:vector> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + <spirit:port> + <spirit:name>s_axi_RRESP</spirit:name> + <spirit:wire> + <spirit:direction>out</spirit:direction> + <spirit:vector> + <spirit:left spirit:format="long">1</spirit:left> + <spirit:right spirit:format="long">0</spirit:right> + </spirit:vector> + <spirit:wireTypeDefs> + <spirit:wireTypeDef> + <spirit:typeName>logic</spirit:typeName> + <spirit:viewNameRef>xilinx_anylanguagesynthesis</spirit:viewNameRef> + <spirit:viewNameRef>xilinx_anylanguagebehavioralsimulation</spirit:viewNameRef> + </spirit:wireTypeDef> + </spirit:wireTypeDefs> + </spirit:wire> + </spirit:port> + </spirit:ports> + <spirit:modelParameters> + <spirit:modelParameter xsi:type="spirit:nameValueTypeType" spirit:dataType="bit(31 0)"> + <spirit:name>SIG_CUSTOMER</spirit:name> + <spirit:displayName>Sig Customer</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.SIG_CUSTOMER">0</spirit:value> + </spirit:modelParameter> + <spirit:modelParameter spirit:dataType="bit(31 0)"> + <spirit:name>SIG_APPLICATION</spirit:name> + <spirit:displayName>Sig Application</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.SIG_APPLICATION">0</spirit:value> + </spirit:modelParameter> + <spirit:modelParameter spirit:dataType="bit(31 0)"> + <spirit:name>VERSION</spirit:name> + <spirit:displayName>Version</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.VERSION">0</spirit:value> + </spirit:modelParameter> + <spirit:modelParameter spirit:dataType="bit(31 0)"> + <spirit:name>CHECKSUM_COUNT</spirit:name> + <spirit:displayName>Checksum Count</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="generated" spirit:id="MODELPARAM_VALUE.CHECKSUM_COUNT">0</spirit:value> + </spirit:modelParameter> + </spirit:modelParameters> + </spirit:model> + <spirit:choices> + <spirit:choice> + <spirit:name>choice_list_9d8b0d81</spirit:name> + <spirit:enumeration>ACTIVE_HIGH</spirit:enumeration> + <spirit:enumeration>ACTIVE_LOW</spirit:enumeration> + </spirit:choice> + </spirit:choices> + <spirit:fileSets> + <spirit:fileSet> + <spirit:name>xilinx_anylanguagesynthesis_view_fileset</spirit:name> + <spirit:file> + <spirit:name>hdl/axi_info.sv</spirit:name> + <spirit:fileType>systemVerilogSource</spirit:fileType> + </spirit:file> + <spirit:file> + <spirit:name>hdl/axi_info_top.sv</spirit:name> + <spirit:fileType>systemVerilogSource</spirit:fileType> + <spirit:userFileType>CHECKSUM_ec9ff0da</spirit:userFileType> + </spirit:file> + </spirit:fileSet> + <spirit:fileSet> + <spirit:name>xilinx_anylanguagebehavioralsimulation_view_fileset</spirit:name> + <spirit:file> + <spirit:name>hdl/axi_info.sv</spirit:name> + <spirit:fileType>systemVerilogSource</spirit:fileType> + </spirit:file> + <spirit:file> + <spirit:name>hdl/axi_info_top.sv</spirit:name> + <spirit:fileType>systemVerilogSource</spirit:fileType> + </spirit:file> + </spirit:fileSet> + <spirit:fileSet> + <spirit:name>xilinx_xpgui_view_fileset</spirit:name> + <spirit:file> + <spirit:name>xgui/axi_info_top_v1_0.tcl</spirit:name> + <spirit:fileType>tclSource</spirit:fileType> + <spirit:userFileType>CHECKSUM_e11f9727</spirit:userFileType> + <spirit:userFileType>XGUI_VERSION_2</spirit:userFileType> + </spirit:file> + </spirit:fileSet> + </spirit:fileSets> + <spirit:description>axi_info_top_v1_0</spirit:description> + <spirit:parameters> + <spirit:parameter> + <spirit:name>SIG_CUSTOMER</spirit:name> + <spirit:displayName>Sig Customer</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.SIG_CUSTOMER">0</spirit:value> + </spirit:parameter> + <spirit:parameter> + <spirit:name>SIG_APPLICATION</spirit:name> + <spirit:displayName>Sig Application</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.SIG_APPLICATION">0</spirit:value> + </spirit:parameter> + <spirit:parameter> + <spirit:name>VERSION</spirit:name> + <spirit:displayName>Version</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.VERSION">0</spirit:value> + </spirit:parameter> + <spirit:parameter> + <spirit:name>CHECKSUM_COUNT</spirit:name> + <spirit:displayName>Checksum Count</spirit:displayName> + <spirit:value spirit:format="long" spirit:resolve="user" spirit:id="PARAM_VALUE.CHECKSUM_COUNT">0</spirit:value> + </spirit:parameter> + <spirit:parameter> + <spirit:name>Component_Name</spirit:name> + <spirit:value spirit:resolve="user" spirit:id="PARAM_VALUE.Component_Name" spirit:order="1">axi_info_top_v1_0</spirit:value> + </spirit:parameter> + </spirit:parameters> + <spirit:vendorExtensions> + <xilinx:coreExtensions> + <xilinx:supportedFamilies> + <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">qvirtex7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">kintex7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">kintex7l</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">qkintex7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">qkintex7l</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">akintex7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">artix7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">artix7l</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">aartix7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">qartix7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">azynq</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">spartan7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">aspartan7</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">zynquplus</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">virtexuplus58g</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">kintexuplus</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">artixuplus</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">kintexu</xilinx:family> + </xilinx:supportedFamilies> + <xilinx:taxonomies> + <xilinx:taxonomy>/UserIP</xilinx:taxonomy> + </xilinx:taxonomies> + <xilinx:displayName>axi_info_top_v1_0</xilinx:displayName> + <xilinx:definitionSource>package_project</xilinx:definitionSource> + <xilinx:coreRevision>5</xilinx:coreRevision> + <xilinx:coreCreationDateTime>2022-05-30T14:16:13Z</xilinx:coreCreationDateTime> + </xilinx:coreExtensions> + <xilinx:packagingInfo> + <xilinx:xilinxVersion>2022.1</xilinx:xilinxVersion> + <xilinx:checksum xilinx:scope="busInterfaces" xilinx:value="919b2cd5"/> + <xilinx:checksum xilinx:scope="memoryMaps" xilinx:value="c930e363"/> + <xilinx:checksum xilinx:scope="fileGroups" xilinx:value="5ec5459d"/> + <xilinx:checksum xilinx:scope="ports" xilinx:value="bd3646cb"/> + <xilinx:checksum xilinx:scope="hdlParameters" xilinx:value="eab94b69"/> + <xilinx:checksum xilinx:scope="parameters" xilinx:value="ba692e87"/> + </xilinx:packagingInfo> + </spirit:vendorExtensions> +</spirit:component> diff --git a/finn-rtllib/axi_info/hdl/axi_info.sv b/finn-rtllib/axi_info/hdl/axi_info.sv new file mode 100644 index 0000000000000000000000000000000000000000..293563293651162e55df4f1886d1e2a17e0b3996 --- /dev/null +++ b/finn-rtllib/axi_info/hdl/axi_info.sv @@ -0,0 +1,119 @@ +/****************************************************************************** + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Read-only exposure of compiled-in info data on AXI-lite. + * @author Thomas B. Preußer <tpreusse@amd.com> + * + *******************************************************************************/ +module axi_info #( + int unsigned N, + int unsigned S_AXI_DATA_WIDTH = 32, + bit [S_AXI_DATA_WIDTH-1:0] DATA[N] +)( + //- Global Control ------------------ + input logic ap_clk, + input logic ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input logic s_axi_AWVALID, + output logic s_axi_AWREADY, + input logic [$clog2(N)+1:0] s_axi_AWADDR, + + input logic s_axi_WVALID, + output logic s_axi_WREADY, + input logic [S_AXI_DATA_WIDTH -1:0] s_axi_WDATA, + input logic [S_AXI_DATA_WIDTH/8-1:0] s_axi_WSTRB, + + output logic s_axi_BVALID, + input logic s_axi_BREADY, + output logic [1:0] s_axi_BRESP, + + // Reading + input logic s_axi_ARVALID, + output logic s_axi_ARREADY, + input logic [$clog2(N)+1:0] s_axi_ARADDR, + + output logic s_axi_RVALID, + input logic s_axi_RREADY, + output logic [S_AXI_DATA_WIDTH-1:0] s_axi_RDATA, + output logic [ 1:0] s_axi_RRESP +); + + uwire clk = ap_clk; + uwire rst = !ap_rst_n; + + //----------------------------------------------------------------------- + // Error out all Writes + if(1) begin : blkKillWrites + logic WABusy = 0; + logic WDBusy = 0; + uwire clr = rst || (WABusy && WDBusy && s_axi_BREADY); + always_ff @(posedge clk) begin : blockName + if(clr) begin + WABusy <= 0; + WDBusy <= 0; + end + else begin + WABusy <= WABusy || s_axi_AWVALID; + WDBusy <= WDBusy || s_axi_WVALID; + end + end + assign s_axi_AWREADY = !WABusy; + assign s_axi_WREADY = !WDBusy; + assign s_axi_BVALID = WABusy && WDBusy; + assign s_axi_BRESP = '1; // DECERR + + end : blkKillWrites + + //----------------------------------------------------------------------- + // Answer Reads + if(1) begin : blkRead + logic RValid = 0; + logic [S_AXI_DATA_WIDTH-1:0] RData;// = 'x; + always_ff @(posedge clk) begin + if(rst) begin + RValid <= 0; + RData <= 'x; + end + else if(s_axi_ARREADY) begin + automatic logic [$left(s_axi_ARADDR):2] addr_eff = s_axi_ARADDR[$left(s_axi_ARADDR):2]; + RValid <= s_axi_ARVALID; + RData <= (addr_eff < N)? DATA[addr_eff] : 32'hDEADDEAD; + end + end + assign s_axi_ARREADY = !RValid || s_axi_RREADY; + assign s_axi_RVALID = RValid; + assign s_axi_RDATA = RData; + assign s_axi_RRESP = '0; // OKAY + + end : blkRead + +endmodule : axi_info diff --git a/finn-rtllib/axi_info/hdl/axi_info_top.sv b/finn-rtllib/axi_info/hdl/axi_info_top.sv new file mode 100644 index 0000000000000000000000000000000000000000..2032e1105ce2c352c7ff3d5c2292dee338fa0bb1 --- /dev/null +++ b/finn-rtllib/axi_info/hdl/axi_info_top.sv @@ -0,0 +1,95 @@ +/****************************************************************************** + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @author Thomas B. Preußer <tpreusse@amd.com> + * + *******************************************************************************/ +module axi_info_top #( + bit [31:0] SIG_CUSTOMER, + bit [31:0] SIG_APPLICATION, + bit [31:0] VERSION, + bit [31:0] CHECKSUM_COUNT +)( + //- Global Control ------------------ + input logic ap_clk, + input logic ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input logic s_axi_AWVALID, + output logic s_axi_AWREADY, + input logic [4:0] s_axi_AWADDR, + + input logic s_axi_WVALID, + output logic s_axi_WREADY, + input logic [31:0] s_axi_WDATA, + input logic [ 3:0] s_axi_WSTRB, + + output logic s_axi_BVALID, + input logic s_axi_BREADY, + output logic [1:0] s_axi_BRESP, + + // Reading + input logic s_axi_ARVALID, + output logic s_axi_ARREADY, + input logic [4:0] s_axi_ARADDR, + + output logic s_axi_RVALID, + input logic s_axi_RREADY, + output logic [31:0] s_axi_RDATA, + output logic [ 1:0] s_axi_RRESP +); + + axi_info #( + .N(6), + .S_AXI_DATA_WIDTH(32), + .DATA('{ + 32'h4649_4E4E, + SIG_CUSTOMER, + SIG_APPLICATION, + VERSION, + 32'h0, + CHECKSUM_COUNT + }) + )( + //- Global Control ------------------ + .ap_clk, .ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + .s_axi_AWVALID, .s_axi_AWREADY, .s_axi_AWADDR, + .s_axi_WVALID, .s_axi_WREADY, .s_axi_WDATA, .s_axi_WSTRB, + .s_axi_BVALID, .s_axi_BREADY, .s_axi_BRESP, + // Reading + .s_axi_ARVALID, .s_axi_ARREADY, .s_axi_ARADDR, + .s_axi_RVALID, .s_axi_RREADY, .s_axi_RDATA, .s_axi_RRESP + ); + +endmodule : axi_info_top diff --git a/finn-rtllib/axi_info/xgui/axi_info_top_v1_0.tcl b/finn-rtllib/axi_info/xgui/axi_info_top_v1_0.tcl new file mode 100644 index 0000000000000000000000000000000000000000..22ae5a71538ca0f5983ec0adf6f75d1bdfbd4f72 --- /dev/null +++ b/finn-rtllib/axi_info/xgui/axi_info_top_v1_0.tcl @@ -0,0 +1,70 @@ +# Definitional proc to organize widgets for parameters. +proc init_gui { IPINST } { + ipgui::add_param $IPINST -name "Component_Name" + #Adding Page + set Page_0 [ipgui::add_page $IPINST -name "Page 0"] + ipgui::add_param $IPINST -name "CHECKSUM_COUNT" -parent ${Page_0} + ipgui::add_param $IPINST -name "SIG_APPLICATION" -parent ${Page_0} + ipgui::add_param $IPINST -name "SIG_CUSTOMER" -parent ${Page_0} + ipgui::add_param $IPINST -name "VERSION" -parent ${Page_0} + + +} + +proc update_PARAM_VALUE.CHECKSUM_COUNT { PARAM_VALUE.CHECKSUM_COUNT } { + # Procedure called to update CHECKSUM_COUNT when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.CHECKSUM_COUNT { PARAM_VALUE.CHECKSUM_COUNT } { + # Procedure called to validate CHECKSUM_COUNT + return true +} + +proc update_PARAM_VALUE.SIG_APPLICATION { PARAM_VALUE.SIG_APPLICATION } { + # Procedure called to update SIG_APPLICATION when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.SIG_APPLICATION { PARAM_VALUE.SIG_APPLICATION } { + # Procedure called to validate SIG_APPLICATION + return true +} + +proc update_PARAM_VALUE.SIG_CUSTOMER { PARAM_VALUE.SIG_CUSTOMER } { + # Procedure called to update SIG_CUSTOMER when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.SIG_CUSTOMER { PARAM_VALUE.SIG_CUSTOMER } { + # Procedure called to validate SIG_CUSTOMER + return true +} + +proc update_PARAM_VALUE.VERSION { PARAM_VALUE.VERSION } { + # Procedure called to update VERSION when any of the dependent parameters in the arguments change +} + +proc validate_PARAM_VALUE.VERSION { PARAM_VALUE.VERSION } { + # Procedure called to validate VERSION + return true +} + + +proc update_MODELPARAM_VALUE.SIG_CUSTOMER { MODELPARAM_VALUE.SIG_CUSTOMER PARAM_VALUE.SIG_CUSTOMER } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.SIG_CUSTOMER}] ${MODELPARAM_VALUE.SIG_CUSTOMER} +} + +proc update_MODELPARAM_VALUE.SIG_APPLICATION { MODELPARAM_VALUE.SIG_APPLICATION PARAM_VALUE.SIG_APPLICATION } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.SIG_APPLICATION}] ${MODELPARAM_VALUE.SIG_APPLICATION} +} + +proc update_MODELPARAM_VALUE.VERSION { MODELPARAM_VALUE.VERSION PARAM_VALUE.VERSION } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.VERSION}] ${MODELPARAM_VALUE.VERSION} +} + +proc update_MODELPARAM_VALUE.CHECKSUM_COUNT { MODELPARAM_VALUE.CHECKSUM_COUNT PARAM_VALUE.CHECKSUM_COUNT } { + # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value + set_property value [get_property value ${PARAM_VALUE.CHECKSUM_COUNT}] ${MODELPARAM_VALUE.CHECKSUM_COUNT} +} + diff --git a/finn-rtllib/memstream/component.xml b/finn-rtllib/memstream/component.xml index 1e5b710dc86bde4d442ce9e83b188aeed24388c5..63a8540a76a100201c67d7a1dcbaec15f10e1c0e 100644 --- a/finn-rtllib/memstream/component.xml +++ b/finn-rtllib/memstream/component.xml @@ -1677,6 +1677,7 @@ <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family> <xilinx:family xilinx:lifeCycle="Production">qzynqplus</xilinx:family> <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">versalprime</xilinx:family> <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family> <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family> <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family> diff --git a/finn-rtllib/memstream/hdl/memstream_singleblock.v b/finn-rtllib/memstream/hdl/memstream_singleblock.v index 6bb3a97115325d81d4292c5af3c33921c2680a30..c9b8770aaa58dc3355bc259e5c5fece702125490 100644 --- a/finn-rtllib/memstream/hdl/memstream_singleblock.v +++ b/finn-rtllib/memstream/hdl/memstream_singleblock.v @@ -192,7 +192,11 @@ end else begin: bypass reg [MEM_WIDTH-1:0] singleval[0:0]; initial begin - $readmemh({MEM_INIT,"memblock_0.dat"}, singleval, 0, 0); + `ifdef SYNTHESIS + $readmemh({MEM_INIT,"memblock_synth_0.dat"}, singleval, 0, 0); + `else + $readmemh({MEM_INIT,"memblock_sim_0.dat"}, singleval, 0, 0); + `endif end always @(posedge aclk) diff --git a/finn-rtllib/memstream/hdl/ramb18_sdp.v b/finn-rtllib/memstream/hdl/ramb18_sdp.v index 63a349f7d56197a9b5a66c837a2f003a6e8475e6..8d2fbf9a988c0e9702e1ed83f2b4e79efb1c5a85 100644 --- a/finn-rtllib/memstream/hdl/ramb18_sdp.v +++ b/finn-rtllib/memstream/hdl/ramb18_sdp.v @@ -71,15 +71,15 @@ initial begin //MEM_INIT path must be terminated by / `ifdef SYNTHESIS if (ID < 10) - $readmemh({MEM_INIT,"memblock_",idx+8'd48,".dat"}, mem, 0, DEPTH-1); + $readmemh({MEM_INIT,"memblock_synth_",idx+8'd48,".dat"}, mem, 0, DEPTH-1); else - $readmemh({MEM_INIT,"memblock_",(idx/10)+8'd48,(idx%10)+8'd48,".dat"}, mem, 0, DEPTH-1); + $readmemh({MEM_INIT,"memblock_synth_",(idx/10)+8'd48,(idx%10)+8'd48,".dat"}, mem, 0, DEPTH-1); `else $sformat(idx,"%0d",ID); if (ID < 10) - $readmemh({MEM_INIT,"memblock_",idx[7:0],".dat"}, mem, 0, DEPTH-1); + $readmemh({MEM_INIT,"memblock_sim_",idx[7:0],".dat"}, mem, 0, DEPTH-1); else - $readmemh({MEM_INIT,"memblock_",idx,".dat"}, mem, 0, DEPTH-1); + $readmemh({MEM_INIT,"memblock_sim_",idx,".dat"}, mem, 0, DEPTH-1); `endif end diff --git a/notebooks/FCLayer_graph.onnx b/notebooks/FCLayer_graph.onnx deleted file mode 100644 index 950c78a9de7224b83ff46da4920da1baa5d80d61..0000000000000000000000000000000000000000 Binary files a/notebooks/FCLayer_graph.onnx and /dev/null differ diff --git a/notebooks/advanced/0_custom_analysis_pass.ipynb b/notebooks/advanced/0_custom_analysis_pass.ipynb index 617bfa0897f64f13a0cf34be469ff8d0bfe2ee40..684b3fea792de55c7f7fb87a4764e94e10f32964 100644 --- a/notebooks/advanced/0_custom_analysis_pass.ipynb +++ b/notebooks/advanced/0_custom_analysis_pass.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -48,38 +48,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving '../LFCW1A1.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f14142de3c8>" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "showInNetron(\"../LFCW1A1.onnx\")" ] @@ -93,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -110,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -140,20 +111,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " def analysis(self, analysis_fxn):\n", - " \"\"\"Runs given anaylsis_fxn on this model and return resulting dict.\"\"\"\n", - " return analysis_fxn(self)\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "showSrc(ModelWrapper.analysis)" ] @@ -167,17 +127,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'Shape': 1, 'Gather': 1, 'Unsqueeze': 5, 'Concat': 1, 'Reshape': 1, 'Mul': 5, 'Sub': 1, 'Sign': 4, 'MatMul': 4, 'BatchNormalization': 3, 'Squeeze': 3}\n" - ] - } - ], + "outputs": [], "source": [ "print(model.analysis(count_equal_nodes))" ] diff --git a/notebooks/advanced/1_custom_transformation_pass.ipynb b/notebooks/advanced/1_custom_transformation_pass.ipynb index 9d9bc74633975076b9464dcc38da920204f05c06..f0c5f80d826a41c429ecbb465844e738faa62c9b 100644 --- a/notebooks/advanced/1_custom_transformation_pass.ipynb +++ b/notebooks/advanced/1_custom_transformation_pass.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -42,32 +42,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " def transform(self, transformation, make_deepcopy=True):\n", - " \"\"\"Applies given Transformation repeatedly until no more changes can be made\n", - " and returns a transformed ModelWrapper instance.\n", - "\n", - " If make_deepcopy is specified, operates on a new (deep)copy of model.\n", - " \"\"\"\n", - " transformed_model = self\n", - " if make_deepcopy:\n", - " transformed_model = copy.deepcopy(self)\n", - " model_was_changed = True\n", - " while model_was_changed:\n", - " (transformed_model, model_was_changed) = transformation.apply(\n", - " transformed_model\n", - " )\n", - " return transformed_model\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "from finn.core.modelwrapper import ModelWrapper\n", "showSrc(ModelWrapper.transform)" @@ -98,27 +75,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "class Transformation(ABC):\n", - " \"\"\"Transformation class all transformations are based on. Contains only\n", - " abstract method apply() every transformation has to fill.\"\"\"\n", - "\n", - " def __init__(self):\n", - " super().__init__()\n", - "\n", - " @abstractmethod\n", - " def apply(self, model):\n", - " pass\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "from finn.transformation.base import Transformation\n", "\n", @@ -145,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -157,45 +116,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving '../LFCW1A1.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fc625ac0a20>" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "showInNetron('../LFCW1A1.onnx')" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -232,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -242,40 +172,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/tmp/LFCW1A1_changed.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fc625ac09b0>" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "showInNetron('/tmp/LFCW1A1_changed.onnx')" ] @@ -291,66 +190,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "class NodeLocalTransformation(Transformation):\n", - " \"\"\"\n", - " Parent class for transformations, which can be executed locally to one node\n", - " by accessing and modifying the attributes of only that node.\n", - " This class can then automatically parallelize the transformation.\n", - " Transformations sublcassing NodeLocalTransformation must implement the\n", - " abstract method applyNodeLocal().\n", - "\n", - " To control the degree of parallelization, specify the num_workers argument\n", - " in the constructor, using one of the following values:\n", - " * None: use NUM_DEFAULT_WORKERS environment variable\n", - " * 0: use all available CPU cores\n", - " * (any other int>0): set number of parallel workers\n", - " \"\"\"\n", - "\n", - " def __init__(self, num_workers=None):\n", - " super().__init__()\n", - " if num_workers is None:\n", - " self._num_workers = get_num_default_workers()\n", - " else:\n", - " self._num_workers = num_workers\n", - " assert self._num_workers >= 0, \"Number of workers must be nonnegative.\"\n", - " if self._num_workers == 0:\n", - " self._num_workers = mp.cpu_count()\n", - "\n", - " @abstractmethod\n", - " def applyNodeLocal(self, node):\n", - " pass\n", - "\n", - " def apply(self, model):\n", - " # Remove old nodes from the current model\n", - " old_nodes = []\n", - " for i in range(len(model.graph.node)):\n", - " old_nodes.append(model.graph.node.pop())\n", - "\n", - " # Execute transformation in parallel\n", - " with mp.Pool(self._num_workers) as p:\n", - " new_nodes_and_bool = p.map(self.applyNodeLocal, old_nodes, chunksize=1)\n", - "\n", - " # extract nodes and check if the transformation needs to run again\n", - " # Note: .pop() had initially reversed the node order\n", - " run_again = False\n", - " for node, run in reversed(new_nodes_and_bool):\n", - " # Reattach new nodes to old model\n", - " model.graph.node.append(node)\n", - " if run is True:\n", - " run_again = True\n", - "\n", - " return (model, run_again)\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "from finn.transformation.base import NodeLocalTransformation\n", "\n", @@ -370,59 +212,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "class CompileCppSim(NodeLocalTransformation):\n", - " \"\"\"For every node: compile C++ code in node attribute \"code_gen_dir_cppsim\"\n", - " and save path to executables in node attribute \"executable_path\".\n", - " All nodes in the graph must have the fpgadataflow backend attribute.\n", - "\n", - " To use these executables, exec_mode must be set to \"cppsim\" (using transformation\n", - " SetExecMode) and the model has to be executed using execute_onnx() from\n", - " finn.core.onnx_exec\n", - "\n", - " * num_workers (int or None) number of parallel workers, see documentation in\n", - " NodeLocalTransformation for more details.\n", - " \"\"\"\n", - "\n", - " def __init__(self, num_workers=None):\n", - " super().__init__(num_workers=num_workers)\n", - "\n", - " def applyNodeLocal(self, node):\n", - " op_type = node.op_type\n", - " if is_fpgadataflow_node(node) is True:\n", - " try:\n", - " # lookup op_type in registry of CustomOps\n", - " inst = registry.getCustomOp(node)\n", - " # ensure that code is generated\n", - " assert (\n", - " inst.get_nodeattr(\"code_gen_dir_cppsim\") != \"\"\n", - " ), \"\"\"Node\n", - " attribute \"code_gen_dir_cppsim\" is not set. Please run\n", - " Transformation PrepareCppSim first.\"\"\"\n", - " # call the compilation function for this node\n", - " inst.compile_singlenode_code()\n", - " # ensure that executable path is now set\n", - " assert (\n", - " inst.get_nodeattr(\"executable_path\") != \"\"\n", - " ), \"\"\"Transformation\n", - " compile was not successful, there is no path to executables set\n", - " in node attribute \"executable_path\".\"\"\"\n", - " except KeyError:\n", - " # exception if op_type is not supported\n", - " raise Exception(\n", - " \"Custom op_type %s is currently not supported.\" % op_type\n", - " )\n", - " return (node, False)\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim\n", "\n", diff --git a/notebooks/advanced/2_custom_op.ipynb b/notebooks/advanced/2_custom_op.ipynb index 57f2601c73853ba9acc5f8ff85e0a18efc7e9a17..5f2bdc4bf4dac1b3471a75f2af7c786b69ed6cd0 100644 --- a/notebooks/advanced/2_custom_op.ipynb +++ b/notebooks/advanced/2_custom_op.ipynb @@ -28,57 +28,9 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['__abstractmethods__',\n", - " '__class__',\n", - " '__delattr__',\n", - " '__dict__',\n", - " '__dir__',\n", - " '__doc__',\n", - " '__eq__',\n", - " '__format__',\n", - " '__ge__',\n", - " '__getattribute__',\n", - " '__gt__',\n", - " '__hash__',\n", - " '__init__',\n", - " '__init_subclass__',\n", - " '__le__',\n", - " '__lt__',\n", - " '__module__',\n", - " '__ne__',\n", - " '__new__',\n", - " '__reduce__',\n", - " '__reduce_ex__',\n", - " '__repr__',\n", - " '__setattr__',\n", - " '__sizeof__',\n", - " '__slots__',\n", - " '__str__',\n", - " '__subclasshook__',\n", - " '__weakref__',\n", - " '_abc_impl',\n", - " 'execute_node',\n", - " 'get_nodeattr',\n", - " 'get_nodeattr_allowed_values',\n", - " 'get_nodeattr_def',\n", - " 'get_nodeattr_types',\n", - " 'infer_node_datatype',\n", - " 'make_shape_compatible_op',\n", - " 'set_nodeattr',\n", - " 'verify_node']" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.custom_op.base import CustomOp\n", "dir(CustomOp)" @@ -95,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -183,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -200,27 +152,9 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'DebugMarker': finn.custom_op.general.debugmarker.DebugMarker,\n", - " 'QuantAvgPool2d': finn.custom_op.general.quantavgpool2d.QuantAvgPool2d,\n", - " 'MaxPoolNHWC': finn.custom_op.general.maxpoolnhwc.MaxPoolNHWC,\n", - " 'GenericPartition': finn.custom_op.general.genericpartition.GenericPartition,\n", - " 'MultiThreshold': finn.custom_op.general.multithreshold.MultiThreshold,\n", - " 'XnorPopcountMatMul': finn.custom_op.general.xnorpopcount.XnorPopcountMatMul,\n", - " 'Im2Col': finn.custom_op.general.im2col.Im2Col,\n", - " 'MyPythonPowerOp': __main__.MyPythonPowerOp}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "general.custom_op" ] @@ -238,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -283,34 +217,9 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[input: \"inp\"\n", - "output: \"outp\"\n", - "op_type: \"MyPythonPowerOp\"\n", - "attribute {\n", - " name: \"exec_mode\"\n", - " s: \"python\"\n", - " type: STRING\n", - "}\n", - "attribute {\n", - " name: \"exponent\"\n", - " i: 2\n", - " type: INT\n", - "}\n", - "domain: \"finn.custom_op.general\"\n", - "]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# generate a small graph with our custom op\n", "input_shape = (1, 2, 4)\n", @@ -327,21 +236,9 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[ 0., -3., 1., -8.],\n", - " [ 2., -2., -4., -8.]]], dtype=float32)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.core.datatype import DataType\n", "from finn.util.basic import gen_finn_dt_tensor\n", @@ -360,21 +257,9 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'outp': array([[[ 0., 9., 1., 64.],\n", - " [ 4., 4., 16., 64.]]], dtype=float32)}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.core.onnx_exec import execute_onnx\n", "\n", @@ -406,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -521,34 +406,9 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[input: \"inp\"\n", - "output: \"outp\"\n", - "op_type: \"MyMixedPowerOp\"\n", - "attribute {\n", - " name: \"exec_mode\"\n", - " s: \"python\"\n", - " type: STRING\n", - "}\n", - "attribute {\n", - " name: \"exponent\"\n", - " i: 2\n", - " type: INT\n", - "}\n", - "domain: \"finn.custom_op.general\"\n", - "]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# register our new op\n", "general.custom_op[\"MyMixedPowerOp\"] = MyMixedPowerOp\n", @@ -567,19 +427,9 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Available functions: ['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', 'execute_node', 'get_nodeattr', 'get_nodeattr_allowed_values', 'get_nodeattr_def', 'get_nodeattr_types', 'infer_node_datatype', 'make_shape_compatible_op', 'my_custom_cpp_gen', 'onnx_node', 'set_nodeattr', 'verify_node']\n", - "codegen_dir: \n", - "exec_mode: python\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.custom_op.registry import getCustomOp\n", "\n", @@ -602,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -641,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -657,17 +507,9 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tmp/finn_dev_maltanar/my_custom_oppswiou3i\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "new_op_inst = getCustomOp(mixedop_graph_new.graph.node[0])\n", "codegen_dir = new_op_inst.get_nodeattr(\"codegen_dir\")\n", @@ -683,17 +525,9 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compile.sh node_model\ttop.cpp\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ls {codegen_dir}" ] @@ -707,39 +541,9 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r\n", - "#include <iostream>\r\n", - "#include <fstream>\r\n", - "using namespace std;\r\n", - "#define EXPONENT 2\r\n", - "\r\n", - "int main(int argc, char **argv) {\r\n", - " ifstream infile(\"input.txt\");\r\n", - " ofstream outfile(\"output.txt\");\r\n", - " \r\n", - " float elem;\r\n", - " while (infile >> elem)\r\n", - " {\r\n", - " float res = 1.0;\r\n", - " for(int i=0; i < EXPONENT; i++) {\r\n", - " res *= elem;\r\n", - " }\r\n", - " outfile << res << \"\\n\";\r\n", - " }\r\n", - "\r\n", - " return 0;\r\n", - "}\r\n", - " " - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! cat {codegen_dir}/top.cpp" ] @@ -757,7 +561,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -766,7 +570,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -775,26 +579,16 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "49\r\n", - "64\r\n", - "81\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! cat {codegen_dir}/output.txt" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -812,21 +606,9 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[-6., 3., 2., -5.],\n", - " [ 5., 2., 0., -2.]]], dtype=float32)" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# generate a random input of e.g signed 4-bit values\n", "random_input = gen_finn_dt_tensor(DataType[\"INT4\"], input_shape)\n", @@ -842,21 +624,9 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'outp': array([[[36., 9., 4., 25.],\n", - " [25., 4., 0., 4.]]], dtype=float32)}" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# run with FINN's execute_onnx, custom node will use Python execution\n", "new_op_inst.set_nodeattr(\"exec_mode\", \"python\")\n", @@ -874,21 +644,9 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'outp': array([[[36., 9., 4., 25.],\n", - " [25., 4., 0., 4.]]])}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# run with FINN's execute_onnx, custom node will use c++ execution\n", "new_op_inst.set_nodeattr(\"exec_mode\", \"c++\")\n", diff --git a/notebooks/basics/0_how_to_work_with_onnx.ipynb b/notebooks/basics/0_how_to_work_with_onnx.ipynb index 58f53c32981edcba669f15a6c52499a8f89938d6..aae98ec771c1b38d16e593241f16f6dccfe142d7 100644 --- a/notebooks/basics/0_how_to_work_with_onnx.ipynb +++ b/notebooks/basics/0_how_to_work_with_onnx.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -98,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -154,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -180,40 +180,9 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving '/tmp/simple_model.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fcdfc956b70>" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron('/tmp/simple_model.onnx')" ] @@ -229,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -252,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -270,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -289,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -308,29 +277,9 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The output of the ONNX model is: \n", - "[[22. 13. 21. 8.]\n", - " [ 0. 8. 11. 1.]\n", - " [ 3. 12. 8. 2.]\n", - " [ 0. 6. 1. 4.]]\n", - "\n", - "The output of the reference function is: \n", - "[[22. 13. 21. 8.]\n", - " [ 0. 8. 11. 1.]\n", - " [ 3. 12. 8. 2.]\n", - " [ 0. 6. 1. 4.]]\n", - "\n", - "The results are the same!\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "ref_output= expected_output(in1_values, in2_values, in3_values)\n", "print(\"The output of the ONNX model is: \\n{}\".format(output[0]))\n", @@ -369,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -386,7 +335,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -410,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -433,19 +382,9 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found adder node: Add1\n", - "Found adder node: Add2\n", - "Found adder node: Add3\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "add_nodes = identify_adder_nodes(finn_model)\n", "for node in add_nodes:\n", @@ -461,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -490,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -520,19 +459,9 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found following pair that could be replaced by a sum node:\n", - "Add1\n", - "Add2\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "for node in add_nodes:\n", " add_pairs = adder_pair(finn_model, node)\n", @@ -556,18 +485,9 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The new node gets the following inputs: \n", - "['in1', 'in2', 'in3']\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "input_list = []\n", "for i in range(len(substitute_pair)):\n", @@ -591,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -607,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -628,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -656,7 +576,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -666,40 +586,9 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/tmp/simple_model1.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fcdfc130cc0>" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron('/tmp/simple_model1.onnx')" ] @@ -713,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -723,29 +612,9 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The output of the manipulated ONNX model is: \n", - "[[22. 13. 21. 8.]\n", - " [ 0. 8. 11. 1.]\n", - " [ 3. 12. 8. 2.]\n", - " [ 0. 6. 1. 4.]]\n", - "\n", - "The output of the reference function is: \n", - "[[22. 13. 21. 8.]\n", - " [ 0. 8. 11. 1.]\n", - " [ 3. 12. 8. 2.]\n", - " [ 0. 6. 1. 4.]]\n", - "\n", - "The results are the same!\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(\"The output of the manipulated ONNX model is: \\n{}\".format(output[0]))\n", "print(\"\\nThe output of the reference function is: \\n{}\".format(ref_output))\n", diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb index 2d668f3e041e54bd82e79a79efca8a82210bfcbc..e2762024a751ef573ebaf8dadf64d63e6abb6d83 100644 --- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb +++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb @@ -55,14 +55,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from finn.util.basic import make_build_dir\n", "from finn.util.visualization import showInNetron\n", + "import os\n", " \n", - "build_dir = \"/workspace/finn\"" + "build_dir = os.environ[\"FINN_ROOT\"]" ] }, { @@ -76,20 +77,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspace/brevitas/src/brevitas_examples/bnn_pynq/models/CNV.py:106: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n", - " x = 2.0 * x - torch.tensor([1.0], device=x.device)\n", - "/workspace/brevitas/src/brevitas/quant_tensor/__init__.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n", - " training = torch.tensor(training, dtype=torch.bool)\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import onnx\n", "from finn.util.test import get_test_model_trained\n", @@ -119,38 +109,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving '/workspace/finn/end2end_cnv_w1a1_tidy.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f912af76550>" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir+\"/end2end_cnv_w1a1_tidy.onnx\")" ] @@ -173,18 +134,9 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspace/finn-base/src/finn/transformation/infer_data_layouts.py:114: UserWarning: Assuming 4D input is NCHW\n", - " warnings.warn(\"Assuming 4D input is NCHW\")\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.util.pytorch import ToTensor\n", "from finn.transformation.merge_onnx_models import MergeONNXModels\n", @@ -208,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -230,39 +182,9 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/end2end_cnv_w1a1_pre_post.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f8ffd85a760>" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir+\"/end2end_cnv_w1a1_pre_post.onnx\")" ] @@ -285,7 +207,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -328,39 +250,9 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/end2end_cnv_w1a1_streamlined.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f91ac6e6f70>" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir+\"/end2end_cnv_w1a1_streamlined.onnx\")" ] @@ -376,18 +268,9 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspace/finn/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py:591: UserWarning: Clipping some thresholds in \n", - " warnings.warn(\"Clipping some thresholds in %s\" % self.onnx_node.name)\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls\n", "from finn.transformation.fpgadataflow.create_dataflow_partition import (\n", @@ -401,8 +284,8 @@ "mem_mode = \"decoupled\"\n", "\n", "model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_streamlined.onnx\")\n", - "model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))\n", - "model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))\n", + "model = model.transform(to_hls.InferBinaryMatrixVectorActivation(mem_mode))\n", + "model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode))\n", "# TopK to LabelSelect\n", "model = model.transform(to_hls.InferLabelSelectLayer())\n", "# input quantization (if any) to standalone thresholding\n", @@ -429,46 +312,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice the additional `RemoveCNVtoFCFlatten` transformation that was not used for TFC-w1a1. In the last Netron visualization you may have noticed a `Reshape` operation towards the end of the network where the convolutional part of the network ends and the fully-connected layers started. That `Reshape` is essentialy a tensor flattening operation, which we can remove for the purposes of hardware implementation. We can examine the contents of the dataflow partition with Netron, and observe the `ConvolutionInputGenerator`, `StreamingFCLayer_Batch` and `StreamingMaxPool_Batch` nodes that implement the sliding window, matrix multiply and maxpool operations in hlslib. *Note that the StreamingFCLayer instances following the ConvolutionInputGenerator nodes are really implementing the convolutions, despite the name. The final three StreamingFCLayer instances implement actual FC layers.*" + "Notice the additional `RemoveCNVtoFCFlatten` transformation that was not used for TFC-w1a1. In the last Netron visualization you may have noticed a `Reshape` operation towards the end of the network where the convolutional part of the network ends and the fully-connected layers started. That `Reshape` is essentialy a tensor flattening operation, which we can remove for the purposes of hardware implementation. We can examine the contents of the dataflow partition with Netron, and observe the `ConvolutionInputGenerator`, `MatrixVectorActivation` and `StreamingMaxPool_Batch` nodes that implement the sliding window, matrix multiply and maxpool operations in hlslib. *Note that the MatrixVectorActivation instances following the ConvolutionInputGenerator nodes are really implementing the convolutions, despite the name. The final three MatrixVectorActivation instances implement actual FC layers.*" ] }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/end2end_cnv_w1a1_dataflow_parent.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f8ffd85ae20>" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir + \"/end2end_cnv_w1a1_dataflow_parent.onnx\")" ] @@ -482,39 +333,9 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/end2end_cnv_w1a1_dataflow_model.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f8ffd832280>" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir + \"/end2end_cnv_w1a1_dataflow_model.onnx\")" ] @@ -528,12 +349,12 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_dataflow_model.onnx\")\n", - "fc_layers = model.get_nodes_by_op_type(\"StreamingFCLayer_Batch\")\n", + "fc_layers = model.get_nodes_by_op_type(\"MatrixVectorActivation\")\n", "# each tuple is (PE, SIMD, in_fifo_depth) for a layer\n", "folding = [\n", " (16, 3, 128),\n", @@ -567,44 +388,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Below we visualize in Netron to observe the `StreamingDataWidthConverter` and `StreamingFIFO` nodes that have been inserted into graph, as well as the folding factors in the `PE` and `SIMD` attributes of each `StreamingFCLayer_Batch`." + "Below we visualize in Netron to observe the `StreamingDataWidthConverter` and `StreamingFIFO` nodes that have been inserted into graph, as well as the folding factors in the `PE` and `SIMD` attributes of each `MatrixVectorActivation`." ] }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/end2end_cnv_w1a1_folded.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f8ff1243af0>" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir + \"/end2end_cnv_w1a1_folded.onnx\")" ] @@ -627,22 +418,9 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspace/finn/src/finn/transformation/fpgadataflow/floorplan.py:107: UserWarning: 32 nodes have no entry in the provided floorplan, SLR was set to -1\n", - " warnings.warn(\n", - "/workspace/finn/src/finn/transformation/fpgadataflow/insert_fifo.py:154: UserWarning: Overriding input FIFO depth to 32\n", - " warnings.warn(\"Overriding input FIFO depth to 32\")\n", - "/workspace/finn/src/finn/transformation/fpgadataflow/insert_fifo.py:200: UserWarning: Overriding output FIFO depth to 32\n", - " warnings.warn(\"Overriding output FIFO depth to 32\")\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "test_pynq_board = \"Pynq-Z2\"\n", "target_clk_ns = 10\n", @@ -666,22 +444,9 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Welcome to PYNQ Linux, based on Ubuntu 18.04 (GNU/Linux 4.19.0-xilinx-v2019.1 armv7l)\r\n", - "\r\n", - " * Super-optimized for small spaces - read how we shrank the memory\r\n", - " footprint of MicroK8s to make it the smallest full K8s around.\r\n", - "\r\n", - " https://ubuntu.com/blog/microk8s-memory-optimisation\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import os\n", "\n", @@ -701,7 +466,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -714,20 +479,9 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/home/xilinx/finn_dev_jduarte/pynq_deployment_yrxnwrak'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "target_dir_pynq = target_dir + \"/\" + model.get_metadata_prop(\"pynq_deployment_dir\").split(\"/\")[-1]\n", "target_dir_pynq" @@ -735,24 +489,9 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total 4240\r\n", - "-rw-rw-r-- 1 xilinx xilinx 18616 Jun 28 20:42 driver_base.py\r\n", - "-rw-r--r-- 1 xilinx xilinx 4868 Jun 28 20:42 driver.py\r\n", - "drwxr-xr-x 4 xilinx xilinx 4096 Jun 28 20:42 finn\r\n", - "-rw-r--r-- 1 xilinx xilinx 4045671 Jun 28 20:42 resizer.bit\r\n", - "-rw-r--r-- 1 xilinx xilinx 247083 Jun 28 20:42 resizer.hwh\r\n", - "drwxr-xr-x 2 xilinx xilinx 4096 Jun 28 20:42 runtime_weights\r\n", - "-rw-rw-r-- 1 xilinx xilinx 4107 Jun 28 20:42 validate.py\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ssh {options} {username}@{ip} -p {port} 'ls -l {target_dir_pynq}'" ] @@ -766,32 +505,9 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<matplotlib.image.AxesImage at 0x7f917faeb6d0>" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD5CAYAAADhukOtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAe8klEQVR4nO2da4yc53Xf/2feuex9Z5dLLpdXURJlRVZiSqFVO1EV2akDRUkgGwhcu4ChAEYUBBEQA+kHwQVqF+gHp6ht+EPhgq5VK4ZrWbUtSEiE1LYcRDDsSKJu1IW6ULxIJJdcksu97+zcTj/MyKXU5//sksudpf38fwDB2efs875nnnnPvLPPf8455u4QQvz6k1tvB4QQnUHBLkQiKNiFSAQFuxCJoGAXIhEU7EIkQn41k83sDgBfA5AB+B/u/qXY7/d3533DQDF8rPh5Ltq3mKTo4Lbouci06PH40eJGj70Px/wP2yx2MjIHAGLK7KXJttyP2NHcL/4aaB2TrQenGX3Sl+ZH7NkxSzPiBvNxer6OxaVG0MlLDnYzywD8NwAfA3AcwNNm9qi7v8LmbBgo4gv/7vrw8bxJz1UshN20HA+IanWJ2uqNGj9XMfxmBACNZthHj7wqlmtQWy6jJnitlx8T/JiFYiU4nkVeastx/xvNOrXV6vw1azZJUBj3ox6+RgEAS+x4WC5wwz7G3tSrVX59NBqRdYxcw7nIa1Yl19U8X3osVMPH+/ZPTkR8uHRuAXDI3Q+7exXAgwDuWsXxhBBryGqCfSuAty/4+Xh7TAhxBbLmG3Rmdo+Z7Tez/XOLkc8lQog1ZTXBfgLA9gt+3tYeexfuvs/d97r73r7uVe0HCiFWwWqC/WkAu81sl5kVAXwKwKOXxy0hxOXmkm+17l43s3sB/B+0pLf73f3l6BwYquT9xX2RTyS7lSXwHesc+FZ3Ph/ZIb8ExcsKfNJStUpt9WbEx4j0lkV28fNkmjX5DjPqXLmI7SI3I/5XrSs43shKfE7seA2+HtbkPhpRE7oir1neuC2XjygXtcgaG/8T1skae0RnyLKwjzFlYlWfq939MQCPreYYQojOoG/QCZEICnYhEkHBLkQiKNiFSAQFuxCJ0OFvuTicJVY4l3+8EZ5jDS7VNGtc8sq6IzIOeDIDk7yaEemnWChQW925rVmLPLfI+er1sM0imVy5iMxnGU8M8iwsrwHAYiMssZ06x+Wp+Sr3cW6Oz8ucr0d/V3gdi8Zf54GebmrrLnEJrZnj11wuKqOFfeRXB1BjyVcR7U13diESQcEuRCIo2IVIBAW7EImgYBciETq6G2/uyDfIrnsW2S0mSRylLJIfn49tS0YSHUiCAQCaCFOPFQvLcT8KRb7ru/mq66htZuostZ09txA+V57vqucQSU6p80tk0bn/B4+FffTSMJ1Ty3hiU7WP7/zPTU9S24mJqeB4X4k/r8ap8BwA2DHK13FDP1/HrnysnFX4Oi5GLuEGUSBi5bZ0ZxciERTsQiSCgl2IRFCwC5EICnYhEkHBLkQirEO517A0YPkyn0HkhHqsA0eOy3LVOk9YKEZqpDUapFZYJDEFESmkGKmD9q/+zceo7Zmf/4LaTk6dC47PRyS0eoNLXseOn6G2Iyd495FSeSw4vm10F53jpX5qq+b561Lo20ht9cpccPzcxEk6p6fM5cHjc6eprUJqJQLAaD9Pa+kphBNhGrWwjAoArIlPpJOX7uxCpIKCXYhEULALkQgKdiESQcEuRCIo2IVIhFVJb2Z2FMAsgAaAurvvjf1+03JYyoXllemFHjqvQdoTDfVxeW0g43JYPlKPrRmR5ZisQevqIZ5Ft7Bwntp++vePUNvpKV6v7/Rc+HzHTvBzHRt/m9qyrj5qa2QD1NY7MBIcL/Tw4+W7eBZdKdKSqSvHpcOz1XBbsbFtO+icyuI8tR05wqW3yekKtWXGn/dVG8O2QoNLecbqMkak3suhs3/E3XnOpRDiikAf44VIhNUGuwP4kZk9Y2b3XA6HhBBrw2o/xt/q7ifMbBOAH5vZq+7+xIW/0H4TuAcAhvp5lQ8hxNqyqju7u59o/z8B4GEAtwR+Z5+773X3vX3d6/BVfCEEgFUEu5n1mln/O48B/AGAly6XY0KIy8tqbrWjAB5ub/XnAfwvd//H2IR603BmMZzhM1kr03lP/Pyfg+O/sZtLLh95f1j6AYChSHHLJslsA4AcadOTy/GMpobztkURNQlHjh2htslFngHmPUPB8ayPSz+5oVlq6y4PUlu1wqWmKmmvNDDEX7OBPm6bOHWK2mbO84KT/cXwJd7VzWW+t85zcanQv4nazpx6i9r6TvM13jwQ9qXbIpmKpAgrIrLyJQe7ux8G8IFLnS+E6CyS3oRIBAW7EImgYBciERTsQiSCgl2IROhsr7eshPxguODgwjn+vlMrhgsKTi6EpTAAWKjy3mADRZ7Z1iR9t9rG4HCW8Yy9SpVLPGd48hrOznIJMFYQcWhjOJtrvjlD54yA+5hFMtGqBb6Olfmw1FSZ437sHN1AbQtEQgOACZLZBgBWCMuU05O8mCMiBUQX53lGXFbk18HEDM86HCfZcjtH+PWdYwlxsRaH3CSE+HVCwS5EIijYhUgEBbsQiaBgFyIROrob39Xdi/f91v+XBQsAOP4vr9F5fYPh3fhbPhw+FgD0ZMeorUp2igEgl+dJLVYI70w3vEzn9G/aTm3PHzhEbX1lvjO9def7qc1z4d3nQmTnvLkUbhkFANVqpMVWZK0yksTx8gsH6JyBUqRFUi9PkumN1LU7eSpcM65OlBUAyMgOPgAM9XN1YrrBk57OT3LbkVPTwfEto5vpnDxTlCLZVbqzC5EICnYhEkHBLkQiKNiFSAQFuxCJoGAXIhE6Kr3lsjx6BsOS0s6rr6PzFolqsWPXtXTOSI1LK1NHuCxXiyTCNOrhRIdbbvs4nbPjat4Ra9dvHqW2Z557gdqG+rgkc3IiXD8t77yMd6nAJS/wZcRcJClkmtSFG+rl54qcCo2IVDayMSzNAsBSLfx6nj0flrsAwCItu/ojdfLyGQ+naoUn3hx++3hwfGOZy3y7t4XbqHnk/q07uxCJoGAXIhEU7EIkgoJdiERQsAuRCAp2IRJhWenNzO4H8McAJtz9xvbYMIDvAbgKwFEAn3R3XmTrnWPlcshK4Qylk6cP0nl7fvuDwfHeQV7zK5s9QW2NeqRFTqTW2eG3w9lytw6F6+oBAHq2UVN/L5djuvI8k6s7Uuusq0gytiJ11bZuGaO2V958k9qKRV7nb2Y2vFZXbdtN51x3/Q3UNjnJL6++gTK1nTw1ERy3HK/vVh7iNf6mI7Xksohk191TprbF2fB1cIhcbwDQXQyfq1aPZClSy//jWwDueM/YfQAed/fdAB5v/yyEuIJZNtjb/dbf+w2JuwA80H78AICPX163hBCXm0v9m33U3cfbj0+h1dFVCHEFs+oNOnd3RL7paGb3mNl+M9s/Pc1rhgsh1pZLDfbTZjYGAO3/w7sgANx9n7vvdfe9g4MDl3g6IcRqudRgfxTA3e3HdwN45PK4I4RYK1YivX0XwO0ARszsOIAvAPgSgIfM7LMAjgH45EpOZpah0BW+u1cqvCDi0lI47a0QkaB6evmniN5IS6NSxrPe+vLhfk3f2vdNOudP/u291FaYP0VtxVIkeynHfdx19dbg+MTkSTqnMsez1zZvGqG2yRkuHS5Vw6/n1dfyTMVrruWZj9PPPUtt87Nz1DYzH/ax3uAS1eJiuB0TAJTLg9TWcC6VDZR5tl+9Gn49sxzvD3Z8PPxhukqy/IAVBLu7f5qYfn+5uUKIKwd9g06IRFCwC5EICnYhEkHBLkQiKNiFSISOFpyEGSwLSxALEfmnsrAYHC9EenLNnuNZXsi49FYAL0Q4Vg5nSr1xkPdsO3mc27DA5bBjx49S202beY+7rTvDxSi3TPBvNM8f4gU4h0tlausvc1nu8OGjwfGxLWFpEACmZvg3LGsRqez0Gd6rrukWHLdIcciFiPRmOX5dhc/UojdSqBLNcJZd0cLXPQBUz4VlW4+U7dSdXYhEULALkQgKdiESQcEuRCIo2IVIBAW7EInQWenNAZCeXZlzaWVsJNwfrqeLS28/PcALJQ5FivLtHubZSV2lsOxSzHOp5szEUWprLvHihTuu4UUss8jz7hkYCo6PjPLCl+cmedbYdCSzrRFRNzeS/mv5iFxaIdlfQDyba7HCs8PqxEk2DgCVJZ6BWa/z++OGkU3UZsavq6KFr5+SRfoOejjjsxApeqk7uxCJoGAXIhEU7EIkgoJdiERQsAuRCB3djTcDCvlwMslgH09OKfeHbdbku5UzzhMPzp7nKQsj/XxJeovhHdVGLlwjDwCOnjxKbaNDvJ7Zzmt5K6QKPx2eeibcRuvEON/57+8L7+ADQKHAWzy9fOgt7gi5jzQj95elyG783DxPCikP83ZNdZIIM36aFkRGbz9/XfIZTzTp6eE1EYusLRcA1MKJPI35KTpldFN/cDxf4G2tdGcXIhEU7EIkgoJdiERQsAuRCAp2IRJBwS5EIqyk/dP9AP4YwIS739ge+yKAPwdwpv1rn3f3x1ZywszCUsjmTeHaaS0niYwTSYAY28YTSfZH5LAp45KdZ+E6eYMjPKlicIAnQBS6wvIJAFwVkd76BsOJQQDwP+//dnB8IbJWM4uT1LawyGsDFiJXz+ah8POuTPJ6d/Mk0QgABgf46/Lqa29Q2+nTZ4LjM5GWUeUyf2IDvX3UljnXRAtVvo4ZqUW4sZcfb7ArHEf5yO17JXf2bwG4IzD+VXff0/63okAXQqwfywa7uz8BgL/1CyF+JVjN3+z3mtkBM7vfzPhXsIQQVwSXGuxfB3ANgD0AxgF8mf2imd1jZvvNbP/U1NQlnk4IsVouKdjd/bS7N9y9CeAbAGjXAnff5+573X1vuVy+RDeFEKvlkoLdzMYu+PETAF66PO4IIdaKlUhv3wVwO4ARMzsO4AsAbjezPWhVlTsK4C9WcrJcLkezfwaGuPRWb4TdLOV5JtF1u3ZQ2/5nuOQ1U7iW2po2Gxwf3crltVcO/gu1/c7v/Rm1/eLnfN78fKRNUvVscHzi1Nt0Tuw9f67GbXlwaWgoF86y29rNfZ8+wyW0esa3hUY3cVujEc6kW4y0eKos8rp785EaevUml/NqlRPUtqkQzujb0sez6Jbq4Tmxu/eywe7unw4Mf3O5eUKIKwt9g06IRFCwC5EICnYhEkHBLkQiKNiFSISOFpzM5XLo7QtnLw2NjNB5dQu7WckV6ZyuvgFqK5d5QcG33j5Fbbd+8P1hP+Z4O6me/nDWFQCMnzhObYdef53a6g3enihH6g3Oz0zTOf0bxqhteprLUIN9vBjl+667MTj+9Auv0jnPvnqU2m69/Q+prVDkEtXhQ4eC49Oz/HnFimJWFrm8tnOUS7rdvbyg6vBweJ7neQHOejVc+NJJVimgO7sQyaBgFyIRFOxCJIKCXYhEULALkQgKdiESoaPSm3sTzXpY8hgc5oX85hfDhQgXGrzvVpbx97Ed27dR2+sv88yr6YWwxNbXyzPstl9DTTj2Oi++eOLkOLV9+MMfpLaFhbA01L9lK50zvIUX53xrkktli0tcciz2hvuvDWzcTufc1M9flzNnwv3QAODosReobX4xLFNOTXMJbePGjdQ26Px12dnHJdFNA7wHW8HCmYDVGu9v10skthx4TOjOLkQiKNiFSAQFuxCJoGAXIhEU7EIkQkd345v1GmbPhXczuyO1vZYq4V1Oa3L3zfiu5Mgwb5/0eu4wtU1Mhlv4nMv4rvRgH6+td/2NPCHn8DFeM67GuyRhaiasduzevZvO2b2LSwbHxnkCzcsvv0ht586Gk1OKJa66DPXxRJLjL3NV4NQ5XtfOSLJUFmm9FWsdtpPnmWBHP08M6srxpJalSvj6aTZ5bcNanRyPX/a6swuRCgp2IRJBwS5EIijYhUgEBbsQiaBgFyIRVtL+aTuAvwMwitbG/j53/5qZDQP4HoCr0GoB9Ul3D/f8abO0tITDh8LS1o7dv0HndeXC0luzyhMF8l0RGSRi6+/n0lDfQLiu3fXXv4/O+cmPHqO2hWle765neBO1HTo+QW3bt4WTcna972Y6p1Tkl8HVO3iSz9Qkf7lfORhOKGo61w1PTPFEkhmSDAUAlQaXbWemwlLkps086eatc7w+3fB2LpeeK3E/0OTPbaoefm6e59fpEjleFTzhZiV39jqAv3H3GwB8CMBfmdkNAO4D8Li77wbwePtnIcQVyrLB7u7j7v5s+/EsgIMAtgK4C8AD7V97AMDH18hHIcRl4KL+ZjezqwDcBOBJAKPuv0zuPYXWx3whxBXKioPdzPoA/ADA59z9Xd9PdHcH+aKemd1jZvvNbP/sLC8YIIRYW1YU7GZWQCvQv+PuP2wPnzazsbZ9DEBw18jd97n7XnffG9v8EkKsLcsGu5kZWv3YD7r7Vy4wPQrg7vbjuwE8cvndE0JcLlaS9fa7AD4D4EUze7499nkAXwLwkJl9FsAxAJ9c7kALS3U8fygsG+248RY6r4lwtpmxzB8AaPL0n5nZWWqbmjpLbRuG9wTH77zjI3TOng9cT20P/fBhajPjEsrg4BC1bd0SlpT6Bsp0TlYPry8ADG/ml8jYrhq1TXeHZaPnXuD14sbneEqZF3g7r8HNPItx5JqwVJZFZK2Gcz9e83D7MgA4dIrLg8WMH3OxUgmOL0Qu73ozfH3MNnh24LLB7u4/A8A8/f3l5gshrgz0DTohEkHBLkQiKNiFSAQFuxCJoGAXIhE6WnCy0jC8Pt0dtJ1t8AKAXghLE7kqL4boRJoAgFyO27aM8Wyzf/074cyxrgKXXHbt5G2X/uhPP0Vt33/4H6jt7Cn+vMenw8ULK5VDdE4RXOOZXOS2Q8d41h6qYVnOR3iG4NCmcJFKAGhGKim2vvNF5nWFj9m0cCFKAKhF2opNN/i5ugr8mF15Lr3NWzjLrlbg5/JmeH0bEclWd3YhEkHBLkQiKNiFSAQFuxCJoGAXIhEU7EIkQkelt6WG4fWp8PvLIz/jfcP27BwJjm8u8gyknkIkW2sz7782NsKzq665mhQpdF5McPzMOWq7/0Eurz37/CvUxnrfAQBNBHT+vu4NfrxGia9HI8eloTzCEms9Ig3Vc+E5ANAVu1IjWWqVavh5e47PyUcy4rIm7+vnFS5T1sHnFZphHzPjr1m1FvY/0uJQd3YhUkHBLkQiKNiFSAQFuxCJoGAXIhE6uhvfgGEuF04WePzZ1+m8N94Mt4y647dvoHOu2cLb9Bw5HG5NBAC3ffBGausiiQmzVb7D/NA/Pk1tz71yktoW6pFWQpHd4lwh/P7djNTkyxnfRY7tWjeaPAFoieww1xp8jhmvabeESFKI8+eWz5Od7ozf53p6eEJLEdz/Bt9wR8N4qDXIxHqNvy7F/nJw3HL8PLqzC5EICnYhEkHBLkQiKNiFSAQFuxCJoGAXIhGWld7MbDuAv0OrJbMD2OfuXzOzLwL4cwBn2r/6eXd/LHqyfB4bRjYGbZPnuXwyfn4qOP7zF3irm0ZtZ8QTLq1s3EySXQBYFpbDntr/Ep3zDz/9BbUtNXnNNeS59JbLXfx7dGOJJ7t4RJZrRuS1mOTFWigV8vySs4xLmMj4a5aPzMuy8PliTUazyPrmnMuDjUiyUTMiHTLNbvNmLh/3D4Rtb5Yi68Q9+CV1AH/j7s+aWT+AZ8zsx23bV939v67gGEKIdWYlvd7GAYy3H8+a2UEAvGSqEOKK5KI+D5rZVQBuAvBke+heMztgZvebGW8tKoRYd1Yc7GbWB+AHAD7n7jMAvg7gGgB70Lrzf5nMu8fM9pvZ/voib5UshFhbVhTs1qrC/wMA33H3HwKAu59294a7NwF8A0Cwwbq773P3ve6+N9/NG0EIIdaWZYPdzAzANwEcdPevXDA+dsGvfQIA35IWQqw7K9mN/10AnwHwopk93x77PIBPm9ketOS4owD+YrkDmRmVSQoFLjXVK2E54ejpGTpnaf4gtd1283XU1l0eo7bpSlgi+ecn99M5FeeZS7U6l3FKJZ7Z1ozUQVtYCLcSipFFMrKMJ70h0pEJJSJ5xbKyELFZicuU3d28dl2eSH21SEbZ7Pw8tTUiMuVSnb8ug0PhOooAMDoWtvVFCu8tzob/JPbItbGS3fifAQi95FFNXQhxZaFv0AmRCAp2IRJBwS5EIijYhUgEBbsQidDRgpNwR7NOsqhiGUNZWIaqgmc7TcwtUduzr/FCj3cucGll1sNyx4nz/JuBpT6eXVVf4P5Xlrj/PT0RqYm0vYodz3Lcj1ykXVMsg82JjOaR+0shIjfO1Xj2XbXOpTImy8Uy9mIS2nyk9VZfmctr5Y285Vi1Hj7ma6/yrM4CyUasVbl/urMLkQgKdiESQcEuRCIo2IVIBAW7EImgYBciETosvQFgWUPO5Y4sCxfrazqXhRo5XuDv6ASXyu5/iOf3fPT2vcHxIyfPBMcBYKERK0IYkaG6eOHArMhtPaSHWbGby1qLs1y6imWHeUSiKpCMrSzPX7PYubJIUclYH7vFhbmLnhM7V3lomNo2jPKMybPnJqlt6uyp8PhbvCfhtbt2hQ0RSVF3diESQcEuRCIo2IVIBAW7EImgYBciERTsQiRCR6W3LJ9huFwO2ioVLofNL4YzeYoZz/6qR2ShXKS45RNPHaC2IyfD2XLT87xw5OTcIrWRZCcAQG9vJFsuUlSwVAo/t3xEruvq5hllWSQjLl/gx2yQ+0g9InlZxObOfWzU+PpXa+FF7u7iUuTIhg3UNjTC5bVqJHNzqRgpHkn6szXzXD6er4Svq2ZEwtadXYhEULALkQgKdiESQcEuRCIo2IVIhGV3482sC8ATAErt3/++u3/BzHYBeBDABgDPAPiMu0f2lwFvOpbILmIp8raz1AjvthYyvhtc55vI8Bw/Wa6b74IfIwkvuUhyR73Gd5hjikGlUqG2+Uh7ohx5bmyXHgB6i3zXtzuSQJPLcf+LXeHzdffw9a1WeSLM2UmeSNIEn5cvhNdjaKCXzhkdLlPb5s08EWZqntf5m506T21z01PB8fIwP9fZM2eD4/VIMtFK7uxLAD7q7h9Aqz3zHWb2IQB/C+Cr7n4tgPMAPruCYwkh1ollg91bvJMnWGj/cwAfBfD99vgDAD6+Fg4KIS4PK+3PnrU7uE4A+DGANwFMuf+yRelxAFvXxEMhxGVhRcHu7g133wNgG4BbAFy/0hOY2T1mtt/M9tcWeItlIcTaclG78e4+BeCfAHwYQNnsl429twE4Qebsc/e97r630DOwGl+FEKtg2WA3s41mVm4/7gbwMQAH0Qr6P23/2t0AHlkjH4UQl4GVJMKMAXjAzDK03hwecve/N7NXADxoZv8ZwHMAvrncgZrNJpYWw5JSKTM6r4d42azxJJNI1yI0wSWjWCJBk7SbqlcjCRwN/rxiLYhitmYkEYZJb+fPc+lnMrKOA31cohqM1GMbILXwusClvEaTS1d5iyTrlPiLvVQJH7OU569L7Fz1hemIjfs/N3WO2pokWaerxCXRCquTZ5HnRS1t3P0AgJsC44fR+vtdCPErgL5BJ0QiKNiFSAQFuxCJoGAXIhEU7EIkgsUknst+MrMzAI61fxwBEE7d6Szy493Ij3fzq+bHTnffGDJ0NNjfdWKz/e4ebp4mP+SH/LjsfuhjvBCJoGAXIhHWM9j3reO5L0R+vBv58W5+bfxYt7/ZhRCdRR/jhUiEdQl2M7vDzF4zs0Nmdt96+ND246iZvWhmz5vZ/g6e934zmzCzly4YGzazH5vZG+3/h9bJjy+a2Yn2mjxvZnd2wI/tZvZPZvaKmb1sZn/dHu/omkT86OiamFmXmT1lZi+0/fhP7fFdZvZkO26+Z2a84moId+/oPwAZWmWtrgZQBPACgBs67Ufbl6MARtbhvLcBuBnASxeM/RcA97Uf3wfgb9fJjy8C+PcdXo8xADe3H/cDeB3ADZ1ek4gfHV0TAAagr/24AOBJAB8C8BCAT7XH/zuAv7yY467Hnf0WAIfc/bC3Sk8/COCudfBj3XD3JwC8tzbyXWgV7gQ6VMCT+NFx3H3c3Z9tP55FqzjKVnR4TSJ+dBRvcdmLvK5HsG8F8PYFP69nsUoH8CMze8bM7lknH95h1N3H249PARhdR1/uNbMD7Y/5a/7nxIWY2VVo1U94Euu4Ju/xA+jwmqxFkdfUN+hudfebAfwhgL8ys9vW2yGg9c6O1hvRevB1ANeg1SNgHMCXO3ViM+sD8AMAn3P3d1Un7eSaBPzo+Jr4Koq8MtYj2E8A2H7Bz7RY5Vrj7ifa/08AeBjrW3nntJmNAUD7/4n1cMLdT7cvtCaAb6BDa2JmBbQC7Dvu/sP2cMfXJOTHeq1J+9xTuMgir4z1CPanAexu7ywWAXwKwKOddsLMes2s/53HAP4AwEvxWWvKo2gV7gTWsYDnO8HV5hPowJqYmaFVw/Cgu3/lAlNH14T50ek1WbMir53aYXzPbuOdaO10vgngP6yTD1ejpQS8AODlTvoB4LtofRysofW312fR6pn3OIA3APwEwPA6+fFtAC8COIBWsI11wI9b0fqIfgDA8+1/d3Z6TSJ+dHRNAPwWWkVcD6D1xvIfL7hmnwJwCMD/BlC6mOPqG3RCJELqG3RCJIOCXYhEULALkQgKdiESQcEuRCIo2IVIBAW7EImgYBciEf4vt7E0CllzrOkAAAAASUVORK5CYII=\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import pkg_resources as pk\n", "import matplotlib.pyplot as plt\n", @@ -812,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -829,20 +545,9 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[3.]], dtype=float32)" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "ret[oname]" ] @@ -874,20 +579,9 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[sudo] password for xilinx: Requirement already satisfied: dataset_loading from git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading in /usr/local/lib/python3.6/dist-packages\n", - "Requirement already satisfied: Pillow in /usr/lib/python3/dist-packages (from dataset_loading)\n", - "Requirement already satisfied: scipy in /usr/lib/python3/dist-packages (from dataset_loading)\n", - "Connection to 99.121.248.96 closed.\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ssh {options} -t {username}@{ip} -p {port} 'echo {password} | sudo -S pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading'" ] @@ -905,31 +599,9 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[sudo] password for xilinx: Tar File found in dest_dir. Not Downloading again\n", - "Extracting Python CIFAR10 data.\n", - "Files extracted\n", - "batch 1 / 10 : total OK 851 NOK 149\n", - "batch 2 / 10 : total OK 1683 NOK 317\n", - "batch 3 / 10 : total OK 2522 NOK 478\n", - "batch 4 / 10 : total OK 3370 NOK 630\n", - "batch 5 / 10 : total OK 4207 NOK 793\n", - "batch 6 / 10 : total OK 5044 NOK 956\n", - "batch 7 / 10 : total OK 5887 NOK 1113\n", - "batch 8 / 10 : total OK 6728 NOK 1272\n", - "batch 9 / 10 : total OK 7570 NOK 1430\n", - "batch 10 / 10 : total OK 8419 NOK 1581\n", - "Final accuracy: 84.190000\n", - "Connection to 99.121.248.96 closed.\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ssh {options} -t {username}@{ip} -p {port} 'cd {target_dir_pynq}; echo {password} | sudo -S python3.6 validate.py --dataset cifar10 --batchsize 1000'" ] diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb index a1a8450225f6bd375443a10a66739ca9dd00017e..2e19cda3dce3366ee729aa0c4640e5221886f457 100644 --- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb +++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb @@ -42,15 +42,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from finn.util.visualization import showSrc, showInNetron\n", "from finn.util.basic import make_build_dir\n", - "\n", + "import os\n", " \n", - "build_dir = \"/workspace/finn\"" + "build_dir = os.environ[\"FINN_ROOT\"]" ] }, { @@ -77,27 +77,16 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading: \"https://github.com/Xilinx/brevitas/releases/download/bnn_pynq-r1/tfc_1w1a-45185b4d.pth\" to /home/maltanar/.cache/torch/checkpoints/tfc_1w1a-45185b4d.pth\n", - "100%|██████████| 249073/249073 [00:00<00:00, 767315.58it/s]\n", - "/workspace/brevitas/brevitas_examples/bnn_pynq/models/FC.py:84: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n", - " x = 2.0 * x - torch.tensor([1.0], device=x.device)\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import onnx\n", "from finn.util.test import get_test_model_trained\n", "import brevitas.onnx as bo\n", "\n", "tfc = get_test_model_trained(\"TFC\", 1, 1)\n", - "bo.export_finn_onnx(tfc, (1, 1, 28, 28), build_dir+\"/tfc_w1_a1.onnx\")" + "bo.export_finn_onnx(tfc, (1, 1, 28, 28), build_dir+\"/tfc_w1_a1.onnx\"); # semicolon added to suppress log" ] }, { @@ -110,38 +99,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving '/workspace/finn/tfc_w1_a1.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe30c65e828>" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir+\"/tfc_w1_a1.onnx\")" ] @@ -155,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -243,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -271,40 +231,9 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_tidy.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe2d26a7da0>" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir+\"/tfc_w1_a1_tidy.onnx\")" ] @@ -324,48 +253,9 @@ }, { "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_with_preproc.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspace/finn/src/finn/transformation/infer_data_layouts.py:113: UserWarning: Assuming 4D input is NCHW\n", - " warnings.warn(\"Assuming 4D input is NCHW\")\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe264171f98>" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.util.pytorch import ToTensor\n", "from finn.transformation.merge_onnx_models import MergeONNXModels\n", @@ -401,40 +291,9 @@ }, { "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_pre_post.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe2640f4588>" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.transformation.insert_topk import InsertTopK\n", "\n", @@ -472,49 +331,9 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "class Streamline(Transformation):\n", - " \"\"\"Apply the streamlining transform, see arXiv:1709.04060.\"\"\"\n", - "\n", - " def apply(self, model):\n", - " streamline_transformations = [\n", - " ConvertSubToAdd(),\n", - " ConvertDivToMul(),\n", - " BatchNormToAffine(),\n", - " ConvertSignToThres(),\n", - " AbsorbSignBiasIntoMultiThreshold(),\n", - " MoveAddPastMul(),\n", - " MoveScalarAddPastMatMul(),\n", - " MoveAddPastConv(),\n", - " MoveScalarMulPastMatMul(),\n", - " MoveScalarMulPastConv(),\n", - " MoveAddPastMul(),\n", - " CollapseRepeatedAdd(),\n", - " CollapseRepeatedMul(),\n", - " AbsorbAddIntoMultiThreshold(),\n", - " FactorOutMulSignMagnitude(),\n", - " AbsorbMulIntoMultiThreshold(),\n", - " Absorb1BitMulIntoMatMul(),\n", - " Absorb1BitMulIntoConv(),\n", - " RoundAndClipThresholds(),\n", - " ]\n", - " for trn in streamline_transformations:\n", - " model = model.transform(trn)\n", - " model = model.transform(RemoveIdentityOps())\n", - " model = model.transform(GiveUniqueNodeNames())\n", - " model = model.transform(GiveReadableTensorNames())\n", - " model = model.transform(InferDataTypes())\n", - " return (model, False)\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.transformation.streamline import Streamline\n", "showSrc(Streamline)" @@ -531,40 +350,9 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_streamlined.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe2640f4d30>" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants\n", "import finn.transformation.streamline.absorb as absorb\n", @@ -591,40 +379,9 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1a1_ready_for_hls_conversion.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe30c65e898>" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount\n", "from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds\n", @@ -658,60 +415,27 @@ "metadata": {}, "source": [ "### Conversion to HLS layers <a id='hls_layers'></a>\n", - "Converts the nodes to HLS layers that correspond to the functions in [finn-hls library](https://finn-hlslib.readthedocs.io/en/latest/). In our case this transformation converts pairs of binary XnorPopcountMatMul layers to StreamingFCLayer_Batch layers. Any immediately following MultiThreshold layers will also be absorbed into the MVTU.\n", + "Converts the nodes to HLS layers that correspond to the functions in [finn-hls library](https://finn-hlslib.readthedocs.io/en/latest/). In our case this transformation converts pairs of binary XnorPopcountMatMul layers to MatrixVectorActivation layers. Any immediately following MultiThreshold layers will also be absorbed into the MVTU.\n", "\n", - "Below is the code for the transformation and the network is visualized using netron to create the new structure with `StreamingFCLayer_Batch` nodes, which will correspond to a function call from the [finn-hlslib](https://finn-hlslib.readthedocs.io/en/latest/library/fclayer.html#_CPPv4I_j_j_j_j000_i_i000E22StreamingFCLayer_BatchvRN3hls6streamI7ap_uintI9InStreamWEEERN3hls6streamI7ap_uintI10OutStreamWEEERK2TWRK2TAKjRK1R) library." + "Below is the code for the transformation and the network is visualized using netron to create the new structure with `MatrixVectorActivation` nodes, which will correspond to a function call from the [finn-hlslib](https://finn-hlslib.readthedocs.io/en/latest/library/matrixvector.html) library." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Note:** The transformation `to_hls.InferBinaryStreamingFCLayer` gets the string \"decoupled\" as argument, this indicates the `mem_mode` for the weights. In FINN there are different options to set the way the weights are stored and accessed. For details please have a look on the [FINN readthedocs website](https://finn.readthedocs.io/) under Internals." + "**Note:** The transformation `to_hls.InferBinaryMatrixVectorActivation` gets the string \"decoupled\" as argument, this indicates the `mem_mode` for the weights. In FINN there are different options to set the way the weights are stored and accessed. For details please have a look on the [FINN readthedocs website](https://finn.readthedocs.io/) under Internals." ] }, { "cell_type": "code", - "execution_count": 29, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_hls_layers.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe30c65e748>" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls\n", "model = ModelWrapper(build_dir+\"/tfc_w1a1_ready_for_hls_conversion.onnx\")\n", - "model = model.transform(to_hls.InferBinaryStreamingFCLayer(\"decoupled\"))\n", + "model = model.transform(to_hls.InferBinaryMatrixVectorActivation(\"decoupled\"))\n", "# TopK to LabelSelect\n", "model = model.transform(to_hls.InferLabelSelectLayer())\n", "# input quantization (if any) to standalone thresholding\n", @@ -724,7 +448,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Each StreamingFCLayer_Batch node has two attributes that specify the degree of folding, PE and SIMD. In all nodes the values for these attributes are set as default to 1, which would correspond to a maximum folding (time multiplexing) and thus minimum performance. We will shortly cover how these can be adjusted, but first we want to separate the HLS layers from the non-HLS layers in this network." + "Each MatrixVectorActivation node has two attributes that specify the degree of folding, PE and SIMD. In all nodes the values for these attributes are set as default to 1, which would correspond to a maximum folding (time multiplexing) and thus minimum performance. We will shortly cover how these can be adjusted, but first we want to separate the HLS layers from the non-HLS layers in this network." ] }, { @@ -733,45 +457,14 @@ "source": [ "### Creating a Dataflow Partition <a id='dataflow_partition'></a>\n", "\n", - "In the graph above, you can see that there is a mixture of FINN HLS layers (StreamingFCLayer_Batch) with regular ONNX layers (Reshape, Mul, Add). To create a bitstream, FINN needs a model with only HLS layers. In order to achieve this, we will use the `CreateDataflowPartition` transformation to create a \"dataflow partition\" in this graph, separating out the HLS layers into another model, and replacing them with a placeholder layer called StreamingDataflowPartition:" + "In the graph above, you can see that there is a mixture of FINN HLS layers (MatrixVectorActivation) with regular ONNX layers (Reshape, Mul, Add). To create a bitstream, FINN needs a model with only HLS layers. In order to achieve this, we will use the `CreateDataflowPartition` transformation to create a \"dataflow partition\" in this graph, separating out the HLS layers into another model, and replacing them with a placeholder layer called StreamingDataflowPartition:" ] }, { "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_dataflow_parent.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe2640abc88>" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition\n", "\n", @@ -785,45 +478,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can see that the StreamingFCLayer instances have all been replaced with a single `StreamingDataflowPartition`, which has an attribute `model` that points to the extracted, HLS dataflow-only graph:" + "We can see that the MatrixVectorActivation instances have all been replaced with a single `StreamingDataflowPartition`, which has an attribute `model` that points to the extracted, HLS dataflow-only graph:" ] }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/tmp/finn_dev_maltanar/dataflow_partition0_q1ym9aul/df_model.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe264098f60>" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.custom_op.registry import getCustomOp\n", "sdp_node = parent_model.get_nodes_by_op_type(\"StreamingDataflowPartition\")[0]\n", @@ -836,12 +498,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can see all the extracted `StreamingFCLayer` instances have been moved to the child (dataflow) model. We will load the child model with `ModelWrapper` and continue working on it." + "We can see all the extracted `MatrixVectorActivation` instances have been moved to the child (dataflow) model. We will load the child model with `ModelWrapper` and continue working on it." ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -856,7 +518,7 @@ "\n", "*Folding* in FINN describes how much a layer is time-multiplexed in terms of execution resources. There are several *folding factors* for each layer, controlled by the PE (parallelization over outputs) and SIMD (parallelization over inputs) parameters as described by the original [FINN paper](https://arxiv.org/pdf/1612.07119). The higher the PE and SIMD values are set, the faster the generated accelerator will run, and the more FPGA resources it will consume. \n", "\n", - "Since the folding parameters are node attributes, they can be easily accessed and changed using a helper function of the `ModelWrapper`. But first we take a closer look at one of the nodes that implement a StreamingFCLayer_Batch operation. This is where the Netron visualization helps us, in the above diagram we can see that the first four nodes are StreamingFCLayer_Batch. So as an example we extract the first node." + "Since the folding parameters are node attributes, they can be easily accessed and changed using a helper function of the `ModelWrapper`. But first we take a closer look at one of the nodes that implement a MatrixVectorActivation operation. This is where the Netron visualization helps us, in the above diagram we can see that the first four nodes are MatrixVectorActivation. So as an example we extract the first node." ] }, { @@ -868,51 +530,9 @@ }, { "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CustomOp wrapper is of class Thresholding_Batch\n" - ] - }, - { - "data": { - "text/plain": [ - "{'PE': ('i', True, 0),\n", - " 'NumChannels': ('i', True, 0),\n", - " 'ram_style': ('s', False, 'distributed'),\n", - " 'inputDataType': ('s', True, ''),\n", - " 'outputDataType': ('s', True, ''),\n", - " 'inFIFODepth': ('i', False, 2),\n", - " 'outFIFODepth': ('i', False, 2),\n", - " 'numInputVectors': ('ints', False, [1]),\n", - " 'ActVal': ('i', False, 0),\n", - " 'backend': ('s', True, 'fpgadataflow'),\n", - " 'code_gen_dir_cppsim': ('s', False, ''),\n", - " 'code_gen_dir_ipgen': ('s', False, ''),\n", - " 'executable_path': ('s', False, ''),\n", - " 'ipgen_path': ('s', False, ''),\n", - " 'ip_path': ('s', False, ''),\n", - " 'ip_vlnv': ('s', False, ''),\n", - " 'exec_mode': ('s', False, ''),\n", - " 'cycles_rtlsim': ('i', False, 0),\n", - " 'cycles_estimate': ('i', False, 0),\n", - " 'rtlsim_trace': ('s', False, ''),\n", - " 'res_estimate': ('s', False, ''),\n", - " 'res_hls': ('s', False, ''),\n", - " 'res_synth': ('s', False, ''),\n", - " 'rtlsim_so': ('s', False, ''),\n", - " 'partition_id': ('i', False, 0)}" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "fc0 = model.graph.node[0]\n", "fc0w = getCustomOp(fc0)\n", @@ -932,11 +552,11 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "fc_layers = model.get_nodes_by_op_type(\"StreamingFCLayer_Batch\")\n", + "fc_layers = model.get_nodes_by_op_type(\"MatrixVectorActivation\")\n", "# (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer\n", "config = [\n", " (16, 49, 16, 64, \"block\"),\n", @@ -977,42 +597,9 @@ }, { "cell_type": "code", - "execution_count": 42, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_set_folding_factors.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe2640712e8>" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "model.save(build_dir+\"/tfc_w1_a1_set_folding_factors.onnx\")\n", "showInNetron(build_dir+\"/tfc_w1_a1_set_folding_factors.onnx\")" @@ -1038,17 +625,9 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dict_keys(['Ultra96', 'Pynq-Z1', 'Pynq-Z2', 'ZCU102', 'ZCU104'])\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# print the names of the supported PYNQ boards\n", "from finn.util.basic import pynq_part_map\n", @@ -1057,7 +636,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1076,7 +655,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1087,7 +666,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1105,40 +684,9 @@ }, { "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1_post_synthesis.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe2ef58eb00>" - ] - }, - "execution_count": 99, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "showInNetron(build_dir + \"/tfc_w1_a1_post_synthesis.onnx\")" ] @@ -1152,40 +700,9 @@ }, { "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Stopping http://0.0.0.0:8081\n", - "Serving '/tmp/finn_dev_maltanar/dataflow_partition2_b6c72_s0/df_model.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe2ef5a0e48>" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "model = ModelWrapper(build_dir + \"/tfc_w1_a1_post_synthesis.onnx\")\n", "sdp_node_middle = getCustomOp(model.graph.node[1])\n", @@ -1203,34 +720,9 @@ }, { "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[key: \"pynq_driver_dir\"\n", - "value: \"/tmp/finn_dev_maltanar/pynq_driver_kl300vbh\"\n", - ", key: \"vivado_stitch_proj\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_stitch_proj_yy5ixo91\"\n", - ", key: \"clk_ns\"\n", - "value: \"10\"\n", - ", key: \"wrapper_filename\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_stitch_proj_yy5ixo91/finn_vivado_stitch_proj.srcs/sources_1/bd/StreamingDataflowPartition_1/hdl/StreamingDataflowPartition_1_wrapper.v\"\n", - ", key: \"vivado_stitch_vlnv\"\n", - "value: \"xilinx_finn:finn:StreamingDataflowPartition_1:1.0\"\n", - ", key: \"vivado_stitch_ifnames\"\n", - "value: \"{\\'clk\\': [\\'ap_clk\\'], \\'rst\\': [\\'ap_rst_n\\'], \\'s_axis\\': [\\'s_axis_0\\'], \\'m_axis\\': [\\'m_axis_0\\'], \\'aximm\\': [], \\'axilite\\': []}\"\n", - ", key: \"platform\"\n", - "value: \"zynq-iodma\"\n", - "]" - ] - }, - "execution_count": 103, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "model = ModelWrapper(postsynth_layers)\n", "model.model.metadata_props" @@ -1252,32 +744,9 @@ }, { "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[key: \"pynq_driver_dir\"\n", - "value: \"/tmp/finn_dev_maltanar/pynq_driver_kl300vbh\"\n", - ", key: \"vivado_pynq_proj\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f\"\n", - ", key: \"bitfile\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f/resizer.bit\"\n", - ", key: \"hw_handoff\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f/resizer.hwh\"\n", - ", key: \"vivado_synth_rpt\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f/synth_report.xml\"\n", - ", key: \"platform\"\n", - "value: \"zynq-iodma\"\n", - "]" - ] - }, - "execution_count": 97, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "model = ModelWrapper(build_dir + \"/tfc_w1_a1_post_synthesis.onnx\")\n", "model.model.metadata_props" @@ -1292,20 +761,9 @@ }, { "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NA\t\t\t finn_zynq_link.runs resizer.bit\t vivado.jou\r\n", - "finn_zynq_link.cache\t finn_zynq_link.srcs resizer.hwh\t vivado.log\r\n", - "finn_zynq_link.hw\t finn_zynq_link.xpr synth_project.sh\r\n", - "finn_zynq_link.ip_user_files ip_config.tcl\t synth_report.xml\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ls {model.get_metadata_prop(\"vivado_pynq_proj\")}" ] @@ -1344,21 +802,9 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Welcome to PYNQ Linux, based on Ubuntu 18.04 (GNU/Linux 5.4.0-xilinx-v2020.1 armv7l)\r\n", - "\r\n", - " * Pure upstream Kubernetes 1.21, smallest, simplest cluster ops!\r\n", - "\r\n", - " https://microk8s.io/\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import os\n", "\n", @@ -1378,7 +824,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1397,68 +843,18 @@ }, { "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[key: \"pynq_driver_dir\"\n", - "value: \"/tmp/finn_dev_maltanar/pynq_driver_kl300vbh\"\n", - ", key: \"vivado_pynq_proj\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f\"\n", - ", key: \"bitfile\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f/resizer.bit\"\n", - ", key: \"hw_handoff\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f/resizer.hwh\"\n", - ", key: \"vivado_synth_rpt\"\n", - "value: \"/tmp/finn_dev_maltanar/vivado_zynq_proj_kdf60v6f/synth_report.xml\"\n", - ", key: \"platform\"\n", - "value: \"zynq-iodma\"\n", - ", key: \"pynq_ip\"\n", - "value: \"192.168.2.99\"\n", - ", key: \"pynq_port\"\n", - "value: \"22\"\n", - ", key: \"pynq_username\"\n", - "value: \"xilinx\"\n", - ", key: \"pynq_password\"\n", - "value: \"xilinx\"\n", - ", key: \"pynq_target_dir\"\n", - "value: \"/home/xilinx/finn_tfc_end2end_example\"\n", - ", key: \"pynq_deployment_dir\"\n", - "value: \"/tmp/finn_dev_maltanar/pynq_deployment_3wrnn2sp\"\n", - ", key: \"pynq_deploy_dir\"\n", - "value: \"/tmp/finn_dev_maltanar/pynq_deployment_3wrnn2sp\"\n", - ", key: \"exec_mode\"\n", - "value: \"remote_pynq\"\n", - "]" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "model.model.metadata_props" ] }, { "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/home/xilinx/finn_tfc_end2end_example/pynq_deployment_3wrnn2sp'" - ] - }, - "execution_count": 106, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "target_dir_pynq = target_dir + \"/\" + model.get_metadata_prop(\"pynq_deployment_dir\").split(\"/\")[-1]\n", "target_dir_pynq" @@ -1466,27 +862,9 @@ }, { "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total 4236\r\n", - "-rw-r--r-- 1 xilinx xilinx 8490 Sep 21 11:06 driver.py\r\n", - "drwxr-xr-x 4 xilinx xilinx 4096 Sep 21 11:06 finn\r\n", - "-rw-r--r-- 1 xilinx xilinx 3264 Sep 21 12:05 input.npy\r\n", - "-rw-r--r-- 1 root root 205 Sep 21 12:34 nw_metrics.txt\r\n", - "-rw-r--r-- 1 root root 84 Sep 21 12:06 output.npy\r\n", - "drwxrwxr-x 2 xilinx xilinx 4096 Sep 21 11:34 __pycache__\r\n", - "-rw-r--r-- 1 xilinx xilinx 4045671 Sep 21 11:06 resizer.bit\r\n", - "-rw-r--r-- 1 xilinx xilinx 246211 Sep 21 11:06 resizer.hwh\r\n", - "-rw-r--r-- 1 root root 32 Sep 21 12:34 sds_trace_data.dat\r\n", - "-rw-r--r-- 1 xilinx xilinx 1727 Sep 21 11:06 validate.py\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ssh {options} {username}@{ip} -p {port} 'ls -l {target_dir_pynq}'" ] @@ -1500,32 +878,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<matplotlib.image.AxesImage at 0x7fcb96004cc0>" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAARYElEQVR4nO3dfYyVZXrH8d/FoDAw8iYRCaisG/5QqmUbgk1KyOKmxlUMbKJm/aPauAmarMmqTVqz/UOSaqJVa/pH3YStL9CsmiWoq0a7a82mWo1GNFQQW1CULGR4E5H3t+HqH/NgZ3We6549z3nOc9z7+0kmM3Ouec65OTM/zsv13Pdt7i4Af/xGNT0AAJ1B2IFMEHYgE4QdyARhBzIxupM3Zma89Z+ZUaPKH09OnTpV23VXvf6enp6wPjAw0PJ1183dbbjLK4XdzK6U9M+SeiT9q7vfV+X6cmU27O/mS6k/6ip/eKNHx38CqcCk6r29vaW1Q4cOhcem9PX1hfUDBw6U1lIt50mTJoX1zz77LKx3o5afxptZj6R/kfR9SRdLusHMLm7XwAC0V5XX7PMlfeTuW9z9uKSnJS1pz7AAtFuVsM+Q9Lsh328rLvs9ZrbMzNaa2doKtwWgotrfoHP3FZJWSLxBBzSpyiP7dknnDfl+ZnEZgC5UJezvSJptZt8yszMl/VDS8+0ZFoB2a/lpvLufNLPbJP1ag623x9z9g7aNLCPjx48P6wcPHmz5useMGRPWjx07FtZTbcFx48aF9ai9lmoppqSOj9prqT76vn37WhlSV6v0mt3dX5L0UpvGAqBGnC4LZIKwA5kg7EAmCDuQCcIOZIKwA5mwTq4um+vpsqled6qXffTo0bA+duzYlo9Nia676vWfffbZYb3qNNLofp06dWp47O7du8N6amrwyZMnw3qdyuaz88gOZIKwA5kg7EAmCDuQCcIOZIKwA5mg9fYNkGrNVfkd1nnddUtNDa6yem1q6m5qanCTS03TegMyR9iBTBB2IBOEHcgEYQcyQdiBTBB2IBP02TvgrLPOCuvRbqOSNHHixLB+4sSJ0lpqN9LUFNbPP/88rC9YsCCs33rrraW1VC/6jjvuCOtbt24N601OM20SfXYgc4QdyARhBzJB2IFMEHYgE4QdyARhBzJBn/0b4JFHHgnrUS871Wuuuox1b29vWI+ktk2+5JJLwvqmTZvC+vHjx0trZ5xxRnhsdO6ClP53HzlyJKzXqazPXmnLZjP7VNIBSQOSTrr7vCrXB6A+lcJeWOTue9pwPQBqxGt2IBNVw+6SfmNm75rZsuF+wMyWmdlaM1tb8bYAVFD1afwCd99uZudIesXM/sfdXxv6A+6+QtIKiTfogCZVemR39+3F512SnpU0vx2DAtB+LYfdzMab2Vmnv5Z0haQN7RoYgPaq8jR+mqRniz7taElPuvu/t2VUf2RSWzYvWrQorF922WVhPeqVHzx4MDw21W/u6+sL66nzNKI566m11x999NGWr1uS7rzzztLaW2+9FR5b93bSTWg57O6+RdKftnEsAGpE6w3IBGEHMkHYgUwQdiAThB3IBFNcu0Bqqubs2bPD+v79+0trEyZMCI+NpoFK6SmwVbZ8TrX9UlJLcO/du7e0tnTp0vDYdevWhfVUSzLV8qwTS0kDmSPsQCYIO5AJwg5kgrADmSDsQCYIO5CJdiw42TFRT7fOfnBK6thU/ZZbbgnrq1atCuszZ85s+bZTffZ77rknrK9evTqsn3nmmaW1K664Ijz2wQcfDOuprbCj2168eHF47LZt28L6nj3fvDVWeWQHMkHYgUwQdiAThB3IBGEHMkHYgUwQdiATHZ/Pnup3Rzo51naqOvd54cKFYf2iiy4qrY0bNy48dvTo+FSLNWvWhPUtW7aE9SpSyz3PmTMnrKfu90jq75T57AC6FmEHMkHYgUwQdiAThB3IBGEHMkHYgUx0vM8+alT5/y9V54XXqcpc+lOnTlW67eg+S9VPnjwZHjt+/PiwfujQobCe2o46+p2l5tJfffXVYf3pp58O61X67Kk17VP3a5Na7rOb2WNmtsvMNgy5bIqZvWJmm4vPk9s5WADtN5Kn8U9IuvIrl90l6VV3ny3p1eJ7AF0sGXZ3f03SV/fRWSJpZfH1SklL2zssAO3W6hp009y9v/h6h6RpZT9oZsskLWvxdgC0SeUFJ93dow0b3X2FpBUSGzsCTWq19bbTzKZLUvF5V/uGBKAOrYb9eUk3FV/fJOlX7RkOgLok++xm9pSk70qaKmmnpLslPSfpl5LOl7RV0vXuXr4Z9v9fV21P46uuG1+1Hkn1ZFN7qEf7r1fV29sb1o8cORLWU+cAVDnH4MILLwzrH3/8ccvXnRpXak36lMOHD1c6voqyPnvyNbu731BS+l6lEQHoKE6XBTJB2IFMEHYgE4QdyARhBzLBls2FVAtyYGAgrEd6enrCetVlh6M2UarFlJrCmpK6/mjb5KgmSYsWLWppTKdFv9MTJ06Ex6amuFb5e2gKj+xAJgg7kAnCDmSCsAOZIOxAJgg7kAnCDmSiq/rsdW7nXHU55yrqvu0DBw6U1lL94lSvO3V8qk8fLRedWsb6uuuuC+tHjx4N62PHji2tpfrsqd9Zk1syt4pHdiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMtHxPns0t7ube+XRksmp5ZRT6txW+dJLLw2PnTNnTlhPLSX93HPPhfVI1AeXpIULF4b1Klt4p5ahjs5dkKovwd0EHtmBTBB2IBOEHcgEYQcyQdiBTBB2IBOEHchEx/vs0Zz1OvvoqbnyqXndUU949Oj4bly6dGlYTx2/ZMmSsD5mzJjS2ty5c8NjJ02aFNZTvezXX3+95eNnz54dHptamz3V616/fn1p7fLLLw+Pje5TqTv76CnJR3Yze8zMdpnZhiGXLTez7Wa2rvi4qt5hAqhqJE/jn5B05TCXP+zuc4uPl9o7LADtlgy7u78maW8HxgKgRlXeoLvNzN4vnuZPLvshM1tmZmvNbG2F2wJQUath/5mkb0uaK6lf0kNlP+juK9x9nrvPa/G2ALRBS2F3953uPuDupyT9XNL89g4LQLu1FHYzmz7k2x9I2lD2swC6g6X6qGb2lKTvSpoqaaeku4vv50pySZ9KusXd+5M3ZhbeWKrfnJr3HZk1a1ZYv+aaa8L64sWLS2upedepedupudPR/utSvIZ5X19feGxK1Xnd0e/0iy++CI+dOHFiWE/ZvHlzaW3VqlXhsQ89VPrKVFJ399ndfdiTSpIn1bj7DcNc/GjlEQHoKE6XBTJB2IFMEHYgE4QdyARhBzKRbL219cbMPFp2uc4prnfffXdYX758eVjfs2dPaW3q1KmtDOlLqa2H9+6NpyZE9QsuuCA8NtUWTG3ZnHLs2LHSWmoaaervIdWKjaYtp7Zcfvnll8P6zTffHNab3NK5rPXGIzuQCcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noeJ89qlfZmjg11TLV96yy7fKuXbvC+tatW8P6Aw88ENZXr14d1ufNK18E6OGHHw6PTW3ZPHly6YpjkqRt27aF9eh3+sQTT4THfvLJJ2H92muvDevR1OOq02tffPHFsJ6aMl0n+uxA5gg7kAnCDmSCsAOZIOxAJgg7kAnCDmSio332UaNGeTQ/+vjx4+Hx55xzTmlt9+7d4bGpPntq7nTUL05tB71p06awPmXKlLCeWrY4Wu75/PPPD49NzWdPLe+9b9++sH7jjTeW1l544YXw2JTUOgLRctGLFi0Kj02tMZC6X1LLf9eJPjuQOcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noqvnsVaT6nitXrgzr119/fcvXf/jw4fDYcePGhfXUtsipef4DAwOltdS672+++WZYf/LJJ8P6unXrwvobb7xRWkudX5Dq4ad+59F5G/Pnzw+Pffvtt8P6448/HtZT68rXqeU+u5mdZ2a/NbONZvaBmf2kuHyKmb1iZpuLz/EqBwAaNZKn8Scl/Y27XyzpzyX92MwulnSXpFfdfbakV4vvAXSpZNjdvd/d3yu+PiDpQ0kzJC2RdPq58UpJS2saI4A2iF/0fIWZzZL0HUlvS5rm7v1FaYekaSXHLJO0rMIYAbTBiN+NN7M+SWsk3e7u+4fWfPBdvmHffHP3Fe4+z93LV0UEULsRhd3MztBg0H/h7s8UF+80s+lFfbqkeIlVAI1Ktt5scP7mSkl73f32IZc/IOkzd7/PzO6SNMXd/zZxXeGNnXvuueFYduzYEdYj0fa9kjRz5sywfu+995bWZsyYER6b2nI5tXVxtF20JN1///2ltY0bN4bHpqa4prZFTklNW46k2oYnTpwI69HU49Tf/YQJE8J61SnTdSprvY3kNftfSPorSevNbF1x2U8l3Sfpl2b2I0lbJcWNagCNSobd3f9LUtl/kd9r73AA1IXTZYFMEHYgE4QdyARhBzJB2IFMdHSKa09Pj0d93dRU0aj3uX///tKaJPX19YX1VN806vlW6fdK6Z5v6hyBqJed6uEfO3YsrFcV/b5TyzWnpgan/l6q/M5Sqo6tTiwlDWSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrpqKenUHOKol55aVrjqvOzp06eX1vr7+0trI9Hb2xvWU1s213ndqWWsDx06FNarzClPGTUqfqyqMqe86fMTqqDPDmSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrqqzw6gOvrsQOYIO5AJwg5kgrADmSDsQCYIO5AJwg5kIhl2MzvPzH5rZhvN7AMz+0lx+XIz225m64qPq+ofLoBWJU+qMbPpkqa7+3tmdpakdyUt1eB+7Afd/cER3xgn1QC1KzupZiT7s/dL6i++PmBmH0qa0d7hAajbH/Sa3cxmSfqOpLeLi24zs/fN7DEzm1xyzDIzW2tma6sNFUAVIz433sz6JP2npHvd/RkzmyZpjySX9A8afKp/c+I6eBoP1KzsafyIwm5mZ0h6UdKv3f2fhqnPkvSiu/9J4noIO1CzlifC2ODyoI9K+nBo0Is37k77gaQNVQcJoD4jeTd+gaTXJa2XdHpt3p9KukHSXA0+jf9U0i3Fm3nRdfHIDtSs0tP4diHsQP2Yzw5kjrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmUguONlmeyRtHfL91OKybtStY+vWcUmMrVXtHNsFZYWOzmf/2o2brXX3eY0NINCtY+vWcUmMrVWdGhtP44FMEHYgE02HfUXDtx/p1rF167gkxtaqjoyt0dfsADqn6Ud2AB1C2IFMNBJ2M7vSzP7XzD4ys7uaGEMZM/vUzNYX21A3uj9dsYfeLjPbMOSyKWb2ipltLj4Pu8deQ2Prim28g23GG73vmt7+vOOv2c2sR9ImSX8paZukdyTd4O4bOzqQEmb2qaR57t74CRhmtlDSQUmrTm+tZWb/KGmvu99X/Ec52d3/rkvGtlx/4DbeNY2tbJvxv1aD9107tz9vRROP7PMlfeTuW9z9uKSnJS1pYBxdz91fk7T3KxcvkbSy+HqlBv9YOq5kbF3B3fvd/b3i6wOSTm8z3uh9F4yrI5oI+wxJvxvy/TZ1137vLuk3ZvaumS1rejDDmDZkm60dkqY1OZhhJLfx7qSvbDPeNfddK9ufV8UbdF+3wN3/TNL3Jf24eLralXzwNVg39U5/JunbGtwDsF/SQ00OpthmfI2k2919/9Bak/fdMOPqyP3WRNi3SzpvyPczi8u6grtvLz7vkvSsBl92dJOdp3fQLT7vang8X3L3ne4+4O6nJP1cDd53xTbjayT9wt2fKS5u/L4bblydut+aCPs7kmab2bfM7ExJP5T0fAPj+BozG1+8cSIzGy/pCnXfVtTPS7qp+PomSb9qcCy/p1u28S7bZlwN33eNb3/u7h3/kHSVBt+R/1jS3zcxhpJxXSjpv4uPD5oem6SnNPi07oQG39v4kaSzJb0qabOk/5A0pYvG9m8a3Nr7fQ0Ga3pDY1ugwafo70taV3xc1fR9F4yrI/cbp8sCmeANOiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMvF/rSIwqVQD1iIAAAAASUVORK5CYII=\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from pkgutil import get_data\n", "import onnx.numpy_helper as nph\n", @@ -1538,17 +893,9 @@ }, { "cell_type": "code", - "execution_count": 92, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Expected network input shape is [1, 784]\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "model = ModelWrapper(build_dir + \"/tfc_w1_a1_pynq_deploy.onnx\")\n", "iname = model.graph.input[0].name\n", @@ -1566,7 +913,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1579,20 +926,9 @@ }, { "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[2.]], dtype=float32)" - ] - }, - "execution_count": 96, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "ret[oname]" ] @@ -1624,22 +960,9 @@ }, { "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[sudo] password for xilinx: Collecting git+https://github.com/fbcotter/dataset_loading.git@0.0.4\n", - " Cloning https://github.com/fbcotter/dataset_loading.git (to 0.0.4) to /tmp/pip-hhwx4j3n-build\n", - " Requirement already satisfied (use --upgrade to upgrade): dataset-loading==0.0.4 from git+https://github.com/fbcotter/dataset_loading.git@0.0.4 in /usr/local/lib/python3.6/dist-packages\n", - "Requirement already satisfied: Pillow in /usr/lib/python3/dist-packages (from dataset-loading==0.0.4)\n", - "Requirement already satisfied: scipy in /usr/lib/python3/dist-packages (from dataset-loading==0.0.4)\n", - "Connection to 192.168.2.99 closed.\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ssh {options} -t {username}@{ip} -p {port} 'echo {password} | sudo -S pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading'" ] @@ -1657,36 +980,9 @@ }, { "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[sudo] password for xilinx: Looking for Train Imgs\n", - "Tar File found in data_dir. Not Downloading again\n", - "Looking for Train Labels\n", - "Tar File found in data_dir. Not Downloading again\n", - "Looking for Test Imgs\n", - "Tar File found in data_dir. Not Downloading again\n", - "Looking for Test Labels\n", - "Tar File found in data_dir. Not Downloading again\n", - "batch 0 / 10 : total OK 913 NOK 87\n", - "batch 1 / 10 : total OK 1800 NOK 200\n", - "batch 2 / 10 : total OK 2714 NOK 286\n", - "batch 3 / 10 : total OK 3619 NOK 381\n", - "batch 4 / 10 : total OK 4535 NOK 465\n", - "batch 5 / 10 : total OK 5488 NOK 512\n", - "batch 6 / 10 : total OK 6438 NOK 562\n", - "batch 7 / 10 : total OK 7399 NOK 601\n", - "batch 8 / 10 : total OK 8371 NOK 629\n", - "batch 9 / 10 : total OK 9296 NOK 704\n", - "Final accuracy: 92.960000\n", - "Connection to 192.168.2.99 closed.\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! ssh {options} -t {username}@{ip} -p {port} 'cd {target_dir_pynq}; echo {password} | sudo -S python3.6 validate.py --dataset mnist --batchsize 1000'" ] @@ -1709,23 +1005,9 @@ }, { "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Network metrics:\n", - "runtime[ms]: 10.43391227722168\n", - "throughput[images/s]: 958413.2714850444\n", - "DRAM_in_bandwidth[Mb/s]: 751.3960048442748\n", - "DRAM_out_bandwidth[Mb/s]: 0.9584132714850445\n", - "fclk[mhz]: 100.0\n", - "N: 10000\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.core.throughput_test import throughput_test_remote\n", "\n", @@ -1745,17 +1027,9 @@ }, { "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We reach approximately 61% of the ideal performance.\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "II = 64\n", "# frequency in MHz\n", diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb index 4a5d3dd07a2f6719b51e75d672790ed44883138f..dbb98bc304b3fffc0dad524070e67859726ff406 100644 --- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb +++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb @@ -28,14 +28,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from finn.util.basic import make_build_dir\n", "from finn.util.visualization import showSrc, showInNetron\n", - " \n", - "build_dir = \"/workspace/finn\"" + "import os\n", + "\n", + "build_dir = os.environ[\"FINN_ROOT\"]" ] }, { @@ -47,22 +48,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[-1.119972 , -1.7596636, 0.8423852, -1.0705007, -1.3218282,\n", - " -1.5030646, -1.4598225, -1.2803943, -1.0334575, -1.7878995]],\n", - " dtype=float32)" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from pkgutil import get_data\n", "import onnx\n", @@ -91,42 +79,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "def xnorpopcountmatmul(inp0, inp1):\n", - " \"\"\"Simulates XNOR-popcount matrix multiplication as a regular bipolar\n", - " matrix multiplication followed by some post processing.\"\"\"\n", - " # extract the operand shapes\n", - " # (M, K0) = inp0.shape\n", - " # (K1, N) = inp1.shape\n", - " K0 = inp0.shape[-1]\n", - " K1 = inp1.shape[0]\n", - " # make sure shapes are compatible with matmul\n", - " assert K0 == K1, \"Matrix shapes are not compatible with matmul.\"\n", - " K = K0\n", - " # convert binary inputs to bipolar\n", - " inp0_bipolar = 2.0 * inp0 - 1.0\n", - " inp1_bipolar = 2.0 * inp1 - 1.0\n", - " # call regular numpy matrix multiplication\n", - " out = np.matmul(inp0_bipolar, inp1_bipolar)\n", - " # XNOR-popcount does not produce the regular dot product result --\n", - " # it returns the number of +1s after XNOR. let P be the number of +1s\n", - " # and N be the number of -1s. XNOR-popcount returns P, whereas the\n", - " # regular dot product result from numpy is P-N, so we need to apply\n", - " # some correction.\n", - " # out = P-N\n", - " # K = P+N\n", - " # out + K = 2P, so P = (out + K)/2\n", - " return (out + K) * 0.5\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.custom_op.general.xnorpopcount import xnorpopcountmatmul\n", "showSrc(xnorpopcountmatmul)" @@ -145,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -158,25 +113,17 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results are the same!\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import finn.core.onnx_exec as oxe\n", - "output_dict = oxe.execute_onnx(model_for_sim, input_dict)\n", + "output_dict = oxe.execute_onnx(model_for_sim, input_dict, return_full_exec_context=False)\n", "output_pysim = output_dict[list(output_dict.keys())[0]]\n", "\n", "\n", "\n", - "if np.isclose(output_pysim, output_golden, atol=1e-3).all():\n", + "if np.isclose(output_pysim, np.where(output_golden[0]==np.amax(output_golden[0])), atol=1e-3).all():\n", " print(\"Results are the same!\")\n", "else:\n", " print(\"The results are not the same!\")" @@ -200,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -218,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -240,38 +187,9 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving '/workspace/finn/tfc_w1_a1_for_cppsim.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://0.0.0.0:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f3cac09d978>" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "model_for_cppsim.save(build_dir+\"/tfc_w1_a1_for_cppsim.onnx\")\n", "showInNetron(build_dir+\"/tfc_w1_a1_for_cppsim.onnx\")" @@ -290,18 +208,9 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compile.sh\t\t\t memblock_0.dat thresh.h\r\n", - "execute_StreamingFCLayer_Batch.cpp node_model\t weights.npy\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.custom_op.registry import getCustomOp\n", "\n", @@ -327,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -348,26 +257,18 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results are the same!\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "parent_model = ModelWrapper(build_dir+\"/tfc_w1_a1_dataflow_parent.onnx\")\n", - "sdp_node = parent_model.graph.node[2]\n", + "sdp_node = parent_model.graph.node[1]\n", "child_model = build_dir + \"/tfc_w1_a1_for_cppsim.onnx\"\n", "getCustomOp(sdp_node).set_nodeattr(\"model\", child_model)\n", "output_dict = oxe.execute_onnx(parent_model, input_dict)\n", "output_cppsim = output_dict[list(output_dict.keys())[0]]\n", "\n", - "if np.isclose(output_cppsim, output_golden, atol=1e-3).all():\n", + "if np.isclose(output_cppsim, np.where(output_golden[0]==np.amax(output_golden[0])), atol=1e-3).all():\n", " print(\"Results are the same!\")\n", "else:\n", " print(\"The results are not the same!\")" @@ -404,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -433,14 +334,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# parent model\n", "model_for_rtlsim = ModelWrapper(build_dir + \"/tfc_w1_a1_dataflow_parent.onnx\")\n", "# reference child model\n", - "sdp_node = getCustomOp(model_for_rtlsim.graph.node[2])\n", + "sdp_node = getCustomOp(model_for_rtlsim.graph.node[1])\n", "sdp_node.set_nodeattr(\"model\", build_dir + \"/tfc_w1_a1_dataflow_child.onnx\")\n", "\n", "model_for_rtlsim = model_for_rtlsim.transform(SetExecMode(\"rtlsim\"))" @@ -455,22 +356,14 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results are the same!\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "output_dict = oxe.execute_onnx(model_for_rtlsim, input_dict)\n", "output_rtlsim = output_dict[list(output_dict.keys())[0]]\n", "\n", - "if np.isclose(output_rtlsim, output_golden, atol=1e-3).all():\n", + "if np.isclose(output_rtlsim, np.where(output_golden[0]==np.amax(output_golden[0])), atol=1e-3).all():\n", " print(\"Results are the same!\")\n", "else:\n", " print(\"The results are not the same!\")" @@ -487,24 +380,9 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspace/finn/src/finn/transformation/fpgadataflow/hlssynth_ip.py:70: UserWarning: Using pre-existing IP for StreamingFCLayer_Batch_3\n", - " warnings.warn(\"Using pre-existing IP for %s\" % node.name)\n", - "/workspace/finn/src/finn/transformation/fpgadataflow/hlssynth_ip.py:70: UserWarning: Using pre-existing IP for StreamingFCLayer_Batch_1\n", - " warnings.warn(\"Using pre-existing IP for %s\" % node.name)\n", - "/workspace/finn/src/finn/transformation/fpgadataflow/hlssynth_ip.py:70: UserWarning: Using pre-existing IP for StreamingFCLayer_Batch_2\n", - " warnings.warn(\"Using pre-existing IP for %s\" % node.name)\n", - "/workspace/finn/src/finn/transformation/fpgadataflow/hlssynth_ip.py:70: UserWarning: Using pre-existing IP for StreamingFCLayer_Batch_0\n", - " warnings.warn(\"Using pre-existing IP for %s\" % node.name)\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.transformation.fpgadataflow.insert_dwc import InsertDWC\n", "from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO\n", @@ -519,51 +397,36 @@ "child_model = child_model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))\n", "child_model = child_model.transform(PrepareRTLSim())\n", "child_model.set_metadata_prop(\"exec_mode\",\"rtlsim\")\n", - "child_model.save(build_dir + \"/tfc_w1_a1_dataflow_child.onnx\")" + "child_model.save(build_dir + \"/tfc_w1_a1_dataflow_child.onnx\");" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# parent model\n", "model_for_rtlsim = ModelWrapper(build_dir + \"/tfc_w1_a1_dataflow_parent.onnx\")\n", "# reference child model\n", - "sdp_node = getCustomOp(model_for_rtlsim.graph.node[2])\n", + "sdp_node = getCustomOp(model_for_rtlsim.graph.node[1])\n", "sdp_node.set_nodeattr(\"model\", build_dir + \"/tfc_w1_a1_dataflow_child.onnx\")" ] }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results are the same!\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "output_dict = oxe.execute_onnx(model_for_rtlsim, input_dict)\n", "output_rtlsim = output_dict[list(output_dict.keys())[0]]\n", "\n", - "if np.isclose(output_rtlsim, output_golden, atol=1e-3).all():\n", + "if np.isclose(output_rtlsim, np.where(output_golden[0]==np.amax(output_golden[0])), atol=1e-3).all():\n", " print(\"Results are the same!\")\n", "else:\n", " print(\"The results are not the same!\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -582,7 +445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb index 2c9f4a99ed3edd05a8e8d32db2fe6bcdad204716..85a4e9556b71ad913080a302c665edf23146faa0 100644 --- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb +++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -103,27 +103,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2021-10-12 15:49:17-- https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n", - "Resolving zenodo.org (zenodo.org)... 137.138.76.77\n", - "Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 13391907 (13M) [application/octet-stream]\n", - "Saving to: ‘unsw_nb15_binarized.npz’\n", - "\n", - "unsw_nb15_binarized 100%[===================>] 12.77M 3.56MB/s in 3.7s \n", - "\n", - "2021-10-12 15:49:22 (3.44 MB/s) - ‘unsw_nb15_binarized.npz’ saved [13391907/13391907]\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "! wget -O unsw_nb15_binarized.npz https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1" ] @@ -137,18 +119,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Samples in each set: train = 175341, test = 82332\n", - "Shape of one input sample: torch.Size([593])\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "from torch.utils.data import TensorDataset\n", @@ -183,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -198,18 +171,9 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input shape for 1 batch: torch.Size([1000, 593])\n", - "Label shape for 1 batch: torch.Size([1000])\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "count = 0\n", "for x,y in train_quantized_loader:\n", @@ -220,6 +184,25 @@ " break" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Define a PyTorch Device <a id='define_pytorch_device'></a> \n", + "\n", + "GPUs can significantly speed-up training of deep neural networks. We check for availability of a GPU and if so define it as target device." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "print(\"Target device: \" + str(device))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -236,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -258,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -282,7 +265,9 @@ " nn.Dropout(0.5),\n", " QuantReLU(bit_width=act_bit_width),\n", " QuantLinear(hidden3, num_classes, bias=True, weight_bit_width=weight_bit_width)\n", - ")\n" + ")\n", + "\n", + "model.to(device)" ] }, { @@ -302,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -313,6 +298,7 @@ " \n", " for i, data in enumerate(train_loader, 0): \n", " inputs, target = data\n", + " inputs, target = inputs.to(device), target.to(device)\n", " optimizer.zero_grad() \n", " \n", " # forward pass\n", @@ -324,14 +310,14 @@ " optimizer.step()\n", " \n", " # keep track of loss value\n", - " losses.append(loss.data.numpy()) \n", + " losses.append(loss.data.cpu().numpy()) \n", " \n", " return losses" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -347,12 +333,13 @@ " with torch.no_grad():\n", " for data in test_loader:\n", " inputs, target = data\n", + " inputs, target = inputs.to(device), target.to(device)\n", " output_orig = model(inputs.float())\n", " # run the output through sigmoid\n", " output = torch.sigmoid(output_orig) \n", " # compare against a threshold of 0.5 to generate 0/1\n", - " pred = (output.detach().numpy() > 0.5) * 1\n", - " target = target.float()\n", + " pred = (output.detach().cpu().numpy() > 0.5) * 1\n", + " target = target.cpu().float()\n", " y_true.extend(target.tolist()) \n", " y_pred.extend(pred.reshape(-1).tolist())\n", " \n", @@ -384,7 +371,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -402,30 +389,20 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# loss criterion and optimizer\n", - "criterion = nn.BCEWithLogitsLoss()\n", + "criterion = nn.BCEWithLogitsLoss().to(device)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))" ] }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Training loss = 0.132918 test accuracy = 0.798341: 100%|██████████| 10/10 [00:44<00:00, 4.45s/it]\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "from sklearn.metrics import accuracy_score\n", @@ -450,24 +427,9 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAofElEQVR4nO3de3Rd5X3m8e+jo5slSzq2JRMsHWMbTIi5SLSGXEsTSlpIO8C0uUCbhLRpmXRKmpY2DWlmpR2mWSuFTpJ2SjowJSSZkFJCksaTQsiNQNKEBBOMb9yM8N1g+Spbsu6/+eNsiSMhyTq2js+R9HzW0tLe776c3z4herz3u/e7FRGYmZlNVVmxCzAzs5nFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJ0ASQ9Ium66182zhjdL2jnd+zU7nvJiF2B2qkg6mjNbA/QCg8n8f4mIu6e6r4i4ohDrms0EDg6bMyJi/vC0pK3A70fEd8euJ6k8IgZOZW1mM4kvVdmcN3zJR9JHJL0I3CVpgaRvSuqQdDCZbsnZ5geSfj+Zfp+kH0n6u2TdFyRdcYLrLpf0iKQjkr4r6TZJX5ricbwm+axDkjZJujJn2dskbU72u0vSnyftjcmxHZJ0QNIPJfnvgk3K/4GYZb0KWAicAVxP9v8bdyXzS4FjwD9Osv1rgWeARuAW4E5JOoF1vwz8DFgE/DXwnqkUL6kC+H/At4HFwAeBuyW9OlnlTrKX4+qA84DvJ+1/BuwEmoDTgL8EPA6RTcrBYZY1BPxVRPRGxLGI2B8RX42I7og4AnwC+OVJtt8WEf8nIgaBLwCnk/1DPOV1JS0FLgI+HhF9EfEjYM0U638dMB/4ZLLt94FvAtcmy/uBVZLqI+JgRPw8p/104IyI6I+IH4YHsLPjcHCYZXVERM/wjKQaSbdL2iapE3gESEtKTbD9i8MTEdGdTM7Pc90lwIGcNoAdU6x/CbAjIoZy2rYBzcn0bwFvA7ZJeljS65P2W4EtwLcltUu6aYqfZ3OYg8Msa+y/sv8MeDXw2oioBy5J2ie6/DQd9gALJdXktGWmuO1uIDOmf2IpsAsgIh6LiKvIXsb6N+DepP1IRPxZRKwArgRulPQrJ3cYNts5OMzGV0e2X+OQpIXAXxX6AyNiG7AW+GtJlclZwX+a4uY/BbqBv5BUIenNybb3JPv6HUkNEdEPdJK9NIek35B0VtLHcpjs7clD436CWcLBYTa+zwDzgH3Ao8C3TtHn/g7wemA/8DfAv5J93mRSEdFHNiiuIFvzZ4H3RsTTySrvAbYml90+kHwOwErgu8BR4CfAZyPioWk7GpuV5H4ws9Il6V+BpyOi4Gc8ZlPlMw6zEiLpIklnSiqTdDlwFdk+CbOS4SfHzUrLq4CvkX2OYyfwhxHxRHFLMhvNl6rMzCwvvlRlZmZ5mROXqhobG2PZsmXFLsPMbEZ5/PHH90VE09j2OREcy5YtY+3atcUuw8xsRpG0bbx2X6oyM7O8ODjMzCwvDg4zM8uLg8PMzPLi4DAzs7w4OMzMLC8ODjMzy4uDYxLfWLeLLz067m3MZmZzloNjEt/a+CJ3PNJe7DLMzEqKg2MSrZk02w90c6Crr9ilmJmVDAfHJFpb0gA8ueNQUeswMyslDo5JXNDSQJlgnYPDzGyEg2MStVXlrFxcx5M7DxW7FDOzkuHgOI62TJondxzCL7wyM8tycBxHaybNwe5+th/oLnYpZmYloaDBIelySc9I2iLppnGWf0DSBknrJP1I0qqcZR9NtntG0q9NdZ/TrTXTALifw8xsWMGCQ1IKuA24AlgFXJsbDIkvR8T5EdEG3AJ8Ktl2FXANcC5wOfBZSakp7nNavfq0OqoryhwcZmaJQp5xXAxsiYj2iOgD7gGuyl0hIjpzZmuB4Y6Eq4B7IqI3Il4AtiT7O+4+p1t5qozzmxt8S66ZWaKQwdEM7MiZ35m0jSLpjyQ9T/aM44+Ps+2U9pns93pJayWt7ejoOOGDgOzzHBt3d9I/OHRS+zEzmw2K3jkeEbdFxJnAR4D/No37vSMiVkfE6qamV7xrPS9tS9P0DQzx9J4j01SdmdnMVcjg2AVkcuZbkraJ3ANcfZxt893ntBh+gnydn+cwMytocDwGrJS0XFIl2c7uNbkrSFqZM/vrwHPJ9BrgGklVkpYDK4GfTWWfhdCyYB6LaitZt/1QoT/KzKzklRdqxxExIOkG4EEgBXwuIjZJuhlYGxFrgBskXQb0AweB65JtN0m6F9gMDAB/FBGDAOPts1DHMExS9kFAn3GYmRUuOAAi4n7g/jFtH8+Z/tAk234C+MRU9nkqtGbSfP+ZvXT29FNfXXGqP97MrGQUvXN8pmjLpImADTsPF7sUM7OicnBM0QUtfoLczAwcHFOWrqlkeWOtHwQ0sznPwZGHtkyadR4p18zmOAdHHlpbGth7pJcXO3uKXYqZWdE4OPLQmkkDfpWsmc1tDo48rFpST0VKPOHgMLM5zMGRh6ryFKtOr/cZh5nNaQ6OPLVm0mzYeZjBIXeQm9nc5ODIU1smTVffIFv2Hi12KWZmReHgyJM7yM1srnNw5Gn5olrqq8s9xLqZzVkOjjyVlYnWTNpDrJvZnOXgOAGtLWmeeekIx/oGi12Kmdkp5+A4AW2ZNINDwcbdHinXzOYeB8cJuCCTHSnXHeRmNhc5OE7A4rpqmtPzPMS6mc1JDo4TNDxSrpnZXOPgOEGtmQZ2HjzGvqO9xS7FzOyUKmhwSLpc0jOStki6aZzlN0raLGm9pO9JOiNpf4ukdTk/PZKuTpZ9XtILOcvaCnkME2ltSQOw3s9zmNkcU7DgkJQCbgOuAFYB10paNWa1J4DVEXEBcB9wC0BEPBQRbRHRBlwKdAPfztnuw8PLI2JdoY5hMue3NFAm/DyHmc05hTzjuBjYEhHtEdEH3ANclbtCEhDdyeyjQMs4+3k78EDOeiWhprKcs0+rY91O35JrZnNLIYOjGdiRM78zaZvI+4EHxmm/BviXMW2fSC5vfVpS1Xg7k3S9pLWS1nZ0dORT95S1ZdI86VfJmtkcUxKd45LeDawGbh3TfjpwPvBgTvNHgXOAi4CFwEfG22dE3BERqyNidVNTU0HqbsukOXysn637S+pkyMysoAoZHLuATM58S9I2iqTLgI8BV0bE2FuU3gl8PSL6hxsiYk9k9QJ3kb0kVhQeKdfM5qJCBsdjwEpJyyVVkr3ktCZ3BUkXAreTDY294+zjWsZcpkrOQpAk4Gpg4/SXPjVnn1ZHTWXKz3OY2ZxSXqgdR8SApBvIXmZKAZ+LiE2SbgbWRsQaspem5gNfyeYA2yPiSgBJy8iesTw8Ztd3S2oCBKwDPlCoYzieVJk4r7nBwWFmc0rBggMgIu4H7h/T9vGc6csm2XYr43SmR8Sl01jiSWvLpPn8f2ylb2CIyvKS6DIyMyso/6U7SW2ZNH2DQzy1p7PYpZiZnRIOjpM00kHuJ8jNbI5wcJykJQ3VNM6vcj+Hmc0ZDo6TJMkj5ZrZnOLgmAZtmQbaO7o4fKz/+Cubmc1wDo5pMNzPscHjVpnZHODgmAYXJEOsr9txsLiFmJmdAg6OadAwr4IVTbWs2+EzDjOb/Rwc02S4g9wj5ZrZbOfgmCZtmTT7jvay+3BPsUsxMysoB8c0GX6VrEfKNbPZzsExTV5zej2VqTIHh5nNeg6OaVJZXsaqJfU84eAws1nOwTGN2jJpNuw8zMDgULFLMTMrGAfHNGrLpDnWP8hze48WuxQzs4JxcEwjv0rWzOYCB8c0WraohoZ5FR5i3cxmNQfHNJJEaybNE9sPFbsUM7OCcXBMs7aWBp596QjdfQPFLsXMrCAKGhySLpf0jKQtkm4aZ/mNkjZLWi/pe5LOyFk2KGld8rMmp325pJ8m+/xXSZWFPIZ8tS1NMxSwcZdfJWtms1PBgkNSCrgNuAJYBVwradWY1Z4AVkfEBcB9wC05y45FRFvyc2VO+98Cn46Is4CDwPsLdQwnwiPlmtlsV8gzjouBLRHRHhF9wD3AVbkrRMRDEdGdzD4KtEy2Q0kCLiUbMgBfAK6ezqJPVuP8KloWzONJj5RrZrNUIYOjGdiRM78zaZvI+4EHcuarJa2V9Kikq5O2RcChiBjuQJhwn5KuT7Zf29HRcUIHcKL8Klkzm81KonNc0ruB1cCtOc1nRMRq4LeBz0g6M599RsQdEbE6IlY3NTVNY7XH15ZJs+vQMTqO9J7SzzUzOxUKGRy7gEzOfEvSNoqky4CPAVdGxMhf2ojYlfxuB34AXAjsB9KSyifbZ7H5QUAzm80KGRyPASuTu6AqgWuANbkrSLoQuJ1saOzNaV8gqSqZbgTeCGyO7FuSHgLenqx6HfCNAh7DCTlvSQOpMvlBQDOblQoWHEk/xA3Ag8BTwL0RsUnSzZKG75K6FZgPfGXMbbevAdZKepJsUHwyIjYnyz4C3ChpC9k+jzsLdQwnal5lilefVud+DjOblcqPv8qJi4j7gfvHtH08Z/qyCbb7MXD+BMvayd6xVdJaM2n+ff1uhoaCsjIVuxwzs2lTEp3js9GFmTSdPQNs3d9V7FLMzKaVg6NAhjvIfbnKzGYbB0eBnLV4PrWVKd9ZZWazjoOjQFJl4vyWBp9xmNms4+AooNZMms17OukdGCx2KWZm08bBUUAXZtL0DwZP7TlS7FLMzKaNg6OARjrIt3ukXDObPRwcBfSq+moW11Xx5E6PlGtms4eDo4Ak0ZZJ+84qM5tVHBwF1ppJ076vi8Pd/cUuxcxsWjg4CqxteKRcD3hoZrOEg6PAzm9pQPIQ62Y2ezg4Cqy+uoIzm+b7QUAzmzUcHKdAa0uaJ3ceIvs6ETOzmc3BcQq0LU2z72gfuw4dK3YpZmYnzcFxCrS1pAGPlGtms4OD4xQ45/Q6KsvL3EFuZrOCg+MUqEiVcd6Sep9xmNms4OA4RVozaTbsOszA4FCxSzEzOylTCg5JtZLKkumzJV0pqWIK210u6RlJWyTdNM7yGyVtlrRe0vcknZG0t0n6iaRNybJ35WzzeUkvSFqX/LRN+WiLqC2Tpqd/iGdfOlrsUszMTspUzzgeAaolNQPfBt4DfH6yDSSlgNuAK4BVwLWSVo1Z7QlgdURcANwH3JK0dwPvjYhzgcuBz0hK52z34YhoS37WTfEYiqrNr5I1s1liqsGhiOgGfhP4bES8Azj3ONtcDGyJiPaI6APuAa7KXSEiHkr2C/Ao0JK0PxsRzyXTu4G9QNMUay1JSxfWsKCmwh3kZjbjTTk4JL0e+B3g35O21HG2aQZ25MzvTNom8n7ggXE++GKgEng+p/kTySWsT0uqmqDg6yWtlbS2o6PjOKUWniRaM2mPWWVmM95Ug+NPgI8CX4+ITZJWAA9NVxGS3g2sBm4d03468H+B342I4V7ljwLnABcBC4GPjLfPiLgjIlZHxOqmptI4WWltSfPsS0fo6h0odilmZidsSsEREQ9HxJUR8bdJJ/m+iPjj42y2C8jkzLckbaNIugz4GHBlRPTmtNeTPbv5WEQ8mlPLnsjqBe4ie0lsRmjLpBkK2LDLL3Yys5lrqndVfVlSvaRaYCOwWdKHj7PZY8BKScslVQLXAGvG7PdC4HayobE3p70S+DrwxYi4b8w2pye/BVyd1DMjDL9K1v0cZjaTTfVS1aqI6CT7h/oBYDnZO6smFBEDwA3Ag8BTwL3JZa6bJV2ZrHYrMB/4SnJr7XCwvBO4BHjfOLfd3i1pA7ABaAT+ZorHUHQLaytZurDGd1aZ2YxWPsX1KpLnNq4G/jEi+iUdd6jXiLgfuH9M28dzpi+bYLsvAV+aYNmlU6y5JLVl0qzdeqDYZZiZnbCpnnHcDmwFaoFHkgf1OgtV1GzWmkmz+3APezt7il2KmdkJmWrn+D9ERHNEvC3pmN4GvKXAtc1KbZkGwA8CmtnMNdXO8QZJnxp+LkLS/yR79mF5OndJA+Vl8vMcZjZjTfVS1eeAI2Q7rd9J9jLVXYUqajarrkhxzul1PuMwsxlrqp3jZ0bEb+XM/3dJ6wpQz5zQ2pJmzbrdDA0FZWUqdjlmZnmZ6hnHMUlvGp6R9EbA70E9QW2ZNEd6B2jf11XsUszM8jbVM44PAF+U1JDMHwSuK0xJs1/uSLlnLZ5f3GLMzPI01buqnoyIVuAC4IKIuBCY0c9TFNOKpvnMryr3E+RmNiPl9QbAiOhMniAHuLEA9cwJqTJxQUuD76wysxnpZF4d617dk9CaSfPUnk56+geLXYqZWV5OJjiOO+SITay1JU3/YLB5jx/AN7OZZdLOcUlHGD8gBMwrSEVzxIVL00B2pNxfWLqguMWYmeVh0uCIiLpTVchcc1p9Na+qr/aDgGY245zMpSo7SW2ZtO+sMrMZx8FRRK2ZNFv3d3Oou6/YpZiZTZmDo4haPVKumc1ADo4iuqAljQRP7vA7yM1s5nBwFNH8qnJWLp7vBwHNbEZxcBRZa0uadTsOEeHHYsxsZihocEi6XNIzkrZIummc5TdK2ixpvaTvJa+kHV52naTnkp/rctp/UdKGZJ//IGlGP8HetjTNga4+dh70YMNmNjMULDgkpYDbgCuAVcC1klaNWe0JYHVEXADcB9ySbLsQ+CvgtcDFwF9JGn5K7p+APwBWJj+XF+oYToXWljQAT7iD3MxmiEKecVwMbImI9ojoA+4BrspdISIeiojuZPZRoCWZ/jXgOxFxICIOAt8BLpd0OlAfEY9G9trOF4GrC3gMBffqV9VRVV7m5znMbMYoZHA0Azty5ncmbRN5P/DAcbZtTqaPu09J1w+/I72joyPP0k+dilQZ5zc3ODjMbMYoic5xSe8GVgO3Ttc+I+KOiFgdEaubmpqma7cF0ZpJs2HXYfoHh4pdipnZcRUyOHYBmZz5lqRtFEmXAR8DroyI3uNsu4uXL2dNuM+Zpi2TpndgiGdePFLsUszMjquQwfEYsFLSckmVwDXAmtwVJF0I3E42NPbmLHoQ+FVJC5JO8V8FHoyIPUCnpNcld1O9F/hGAY/hlBh+layf5zCzmaBgwRERA8ANZEPgKeDeiNgk6WZJVyar3QrMB74iaZ2kNcm2B4D/QTZ8HgNuTtoA/ivwz8AW4Hle7heZsVoWzGNhbSXrth8qdilmZsc16bDqJysi7gfuH9P28ZzpyybZ9nPA58ZpXwucN41lFp2k7Ei5PuMwsxmgJDrHLfs8x3N7j3K0d6DYpZiZTcrBUSJaMw1EwHqfdZhZiXNwlIiRDnKPlGtmJc7BUSLSNZUsW1TjBwHNrOQ5OEpIaybtlzqZWclzcJSQtkyaFzt7ePFwT7FLMTObkIOjhLQm/Rw+6zCzUubgKCGrTq+nIiU/z2FmJc3BUUKqK1K85vR6d5CbWUlzcJSY1pY063ceZnDIr5I1s9Lk4CgxbZk0R3sHaO84WuxSzMzG5eAoMe4gN7NS5+AoMSsaa6mrLndwmFnJcnCUmLIy0drikXLNrHQ5OEpQa6aBp/ccoad/sNilmJm9goOjBLW2pBkYCjbt9oCHZlZ6HBwlqG2kg9zBYWalx8FRghbXV7OkodoPAppZSXJwlCiPlGtmpaqgwSHpcknPSNoi6aZxll8i6eeSBiS9Paf9LZLW5fz0SLo6WfZ5SS/kLGsr5DEUS1smzfYD3Rzo6it2KWZmoxQsOCSlgNuAK4BVwLWSVo1ZbTvwPuDLuY0R8VBEtEVEG3Ap0A18O2eVDw8vj4h1hTmC4modeSPgoaLWYWY2ViHPOC4GtkREe0T0AfcAV+WuEBFbI2I9MDTJft4OPBAR3YUrtfSc39xAmfwEuZmVnkIGRzOwI2d+Z9KWr2uAfxnT9glJ6yV9WlLViRZYymqryjn7tDo/CGhmJaekO8clnQ6cDzyY0/xR4BzgImAh8JEJtr1e0lpJazs6OgpeayG0tqR5cschIjxSrpmVjkIGxy4gkzPfkrTl453A1yOif7ghIvZEVi9wF9lLYq8QEXdExOqIWN3U1JTnx5aGtqVpDnb3s/3AnLpKZ2YlrpDB8RiwUtJySZVkLzmtyXMf1zLmMlVyFoIkAVcDG0++1NLU2pIG3M9hZqWlYMEREQPADWQvMz0F3BsRmyTdLOlKAEkXSdoJvAO4XdKm4e0lLSN7xvLwmF3fLWkDsAFoBP6mUMdQbGefNp95FSkHh5mVlPJC7jwi7gfuH9P28Zzpx8hewhpv262M05keEZdOb5WlqzxVxvnNDb4l18xKSkl3jlt2pNyNuzvpH5zsjmUzs1PHwVHiWjNp+gaGeHrPkWKXYmYGODhK3shIuX6ew8xKhIOjxDWn59Gcnsct33qaf/jecxzp6T/+RmZmBeTgKHGS+MLvXcwbzlzEp77zLJfc8hC3P/w8x/r8dkAzKw7NhaeSV69eHWvXri12GSdt/c5DfOo7z/KDZzponF/FDW85k2tfu5Sq8lSxSzOzWUjS4xGx+hXtDo6ZZ+3WA/zdt5/h0fYDLGmo5oZLV/KO1S1UpHwCaWbTx8Exi4Jj2I+37OPWbz/DE9sPsXRhDX9y2UquamsmVaZil2Zms8BEweF/os5gbzirka/94Ru4630XUVddzo33Psmvfvphvrl+N0NDs/8fBGZWHA6OGU4SbzlnMd/84Jv43+/+BVJl4oYvP8Hb/uGHfGfzSx5Z18ymnYNjlpDE5eedzgMfuoS/v6aNnv5B/uCLa7n6tv/gkWc7HCBmNm0cHLNMqkxc1dbMd2/8ZW75rQvYd7SP937uZ7zr9kf5afv+YpdnZrOAO8dnud6BQe59bAf/6/tb2Hukl19a2ciNbz2bC5cuKHZpZlbifFfVHA2OYT39g3zp0W189gfPc6Crj8tes5g/fevZnLukodilmVmJcnDM8eAY1tU7wOd/vJXbH36ezp4Bfv380/nTt67krMV1xS7NzEqMg8PBMcrhY/3c+cN27vzRCxzrH+TqtmY+dNlKzlhUW+zSzKxEODgcHOM60NXH7Q8/zxd+spX+weAdv9jCB39lJc3pecUuzcyKzMHh4JjU3s4ePvuD5/nyT7cDcO3FGf7oLWexuL66yJWZWbE4OBwcU7Lr0DH+8fvP8ZW1O0mVievesIwP/PKZLKytLHZpZnaKOTgcHHnZtr+Lv//uc3x93S5qKlL83puW8/u/tIKGeRXFLs3MTpGiBIeky4G/B1LAP0fEJ8csvwT4DHABcE1E3JezbBDYkMxuj4grk/blwD3AIuBx4D0R0TdZHQ6OE/fcS0f4zHef49837KGuupxLVjZxbnM95zc3cN6SBhb4TMRs1jrlwSEpBTwLvBXYCTwGXBsRm3PWWQbUA38OrBkTHEcjYv44+70X+FpE3CPpfwNPRsQ/TVaLg+Pkbdp9mP/zSDuPbz/IjgPHRtqb0/M4LwmSc5MwaaqrKmKlZjZdJgqO8gJ+5sXAlohoTwq4B7gKGAmOiNiaLBuayg4lCbgU+O2k6QvAXwOTBoedvHOXNPCZay4E4FB3H5t2d7Jh12E27jrMpt2dPLjppZF1X1VfzXnN9Zy7pCF7ZtLcwGn1VWT/5zOzma6QwdEM7MiZ3wm8No/tqyWtBQaAT0bEv5G9PHUoIgZy9tk83saSrgeuB1i6dGl+lduk0jWVvPGsRt54VuNIW2dPP5t3d7IxCZONuzv53tN7GT6hbZxfxXnN9Zy3JBsk5zXX05ye5zAxm4EKGRwn64yI2CVpBfB9SRuAw1PdOCLuAO6A7KWqAtVoifrqCl63YhGvW7FopK2rd4Cn9mTDZMOuTjbtPswPn9vHYPKukAU1FZzX3JBzZlLP0oU1DhOzElfI4NgFZHLmW5K2KYmIXcnvdkk/AC4EvgqkJZUnZx157dNOrdqqclYvW8jqZQtH2nr6B7NhsruTjTsPs3H3Ye78UTv9g9kwqasuT85K6pMzkwaWL6qlzG81NCsZhQyOx4CVyV1Qu4BreLlvYlKSFgDdEdErqRF4I3BLRISkh4C3k72z6jrgGwWp3gqiuiLFhUsXjBqdt3dgkGdfPMrG3YfZsOswm3Yd5gs/2UbfQLbrq7YyxblLGkbu5jp3SQPLGmuoKk8V6zDM5rRC3477NrK326aAz0XEJyTdDKyNiDWSLgK+DiwAeoAXI+JcSW8AbgeGyL4z5DMRcWeyzxVkQ2Mh8ATw7ojonawO31U18/QPDvHcS9kwGe432bynk57+bJiUCTILa1jeWMuKxvmsaKplRWMtK5rmuyPebJr4AUAHx4w3MDhE+74uNu0+zAsdXTy/r4v2ji5e2Hd0JFAAaipT2UBpms/yxlrObMqGy/KmWuZXlXK3nllpKcbtuGbTqjxVxtmn1XH2aaOHgB8aCl7s7BkJkec7umjf18W6HQf55vrd5P7baHFdFSuaalneOD8bKMl0ZsE8ylN+IabZVDg4bMYrKxNL0vNYkp7Hm1Y2jlrW0z/I9gPdtHdkA+WFfV20dxzlgY17ONTdP7JeRUosXVgzEijDZywrmmpZVFvpS19mORwcNqtVV6TGPUsBONjVR/u+o7QnZyjtHUd5YV8XjzzbQd/gy5e+6qrLWdE0nzMba0ddAlveWMu8SnfQ29zj4LA5a0FtJb9Yu5BfPGPhqPbBoWDXwWMjofLCvi7a9x3lJ+37+doTo+/+flV9NWcsqmHZolrOaEx+L6rhjEXuT7HZy/9lm42RKhNLF9WwdFENb3716GXdfQPJ5a5soGzb3822/V187+m97Ds6+ua+xvlVLEtCZFmyv2WLalm2qJaGGo8ybDOXg8MsDzWV5dlnSpY0vGLZ0d4Btu3PhsnW/V1s29fNtgNd/Pj5fXz15z2j1k3XVIwEytjfC92nYiXOwWE2TeZXTRwqw530W/flBMv+bh7fdpD/9+RuhnLu/KqrKueMxrGBkr0EtrjOz6hY8Tk4zE6ByTrpewcG2XnwGNv2d7F1X3c2YPZ3sXl3Jw9ufJGBnFSZV5FK+lCG+1NqWTS/kqGhYGAoGBz5PUT/4Oj5gaFgcPDl9fqHhkbNj1ovmR8YHMpZFgyM7DPb3p8zX1NZzuK6KhbXV3NafRWnJb8X11WzuL6KRbVVpDx0zKzg4DArsqryFGc2zefMple8foaBwSF2H+pJzlC62Jr0qTzf0cVDT4+++ysfqTJRnvykykR5qmykLfd3xSvas/NVFeU5+8i2He0dYPfhHtbtOMT+rle+Wy1VJprmV2XDpL6axXU54VJfzWlJwCysqfTYZCXOwWFWwspTZSMd9dA0atlg8uDjwa4+ylPDf9zLcsLg5T/qYwOh0Je7+gaG6Djay0udPezt7GXvkR5e6uzhpc5s244D3azdeoCDOc/SjBxzmUaduSyuywmXnLYFNRW+bFckDg6zGSpVJprT82hOzyt2Ka9QWV42pdp6+gfpOJINlr1JqLx05OXAeWFfF4+2H+DwsVcGTGWqjKa6qpHLYsNhU1ddTlV5GVXlqezvipenqytSLy+rKBuZriwv82W0PDg4zKxoqitSZBbWkFlYM+l6Pf2D2WAZFTAvTz+39yg/2rKPIz0Dk+5nMhUpvRw25WVUDYdMRU7bmMAZG0zD4VRZXkZFaswZYHJJcOzZ38hZYWr89orUmPXKVPRLeQ4OMyt51RWpnEt2EzvWN0hX3wC9A0P09g9mf4+dHhiktz873TPSnvzuz5ketd0gR3sH2H+0b2R5T866w68AOFUkXhk8EwTUnddddNzvLV8ODjObNeZVpooyDMzQUNA3ODqYcu9G6x97d9pQzvzgOO2D49/l9vKdba9sf+VnZNerqpj+wTsdHGZmJ6msTFSXpaiuSAGzf1QAjyNtZmZ5cXCYmVleHBxmZpYXB4eZmeWloMEh6XJJz0jaIummcZZfIunnkgYkvT2nvU3STyRtkrRe0rtyln1e0guS1iU/bYU8BjMzG61gd1VJSgG3AW8FdgKPSVoTEZtzVtsOvA/48zGbdwPvjYjnJC0BHpf0YEQcSpZ/OCLuK1TtZmY2sULejnsxsCUi2gEk3QNcBYwER0RsTZaNenomIp7Nmd4taS/ZgXoOFbBeMzObgkJeqmoGduTM70za8iLpYqASeD6n+RPJJaxPS6qaYLvrJa2VtLajoyPfjzUzswmU9AOAkk4H/i9wXUQMn5V8FHiRbJjcAXwEuHnsthFxR7IcSR2Stp1gGY3AvhPcdjby9/Eyfxej+fsYbTZ8H2eM11jI4NgFZHLmW5K2KZFUD/w78LGIeHS4PSL2JJO9ku7ilf0jrxARTcdbZ5I61kbE6hPdfrbx9/Eyfxej+fsYbTZ/H4W8VPUYsFLSckmVwDXAmqlsmKz/deCLYzvBk7MQlB2I/2pg43QWbWZmkytYcETEAHAD8CDwFHBvRGySdLOkKwEkXSRpJ/AO4HZJm5LN3wlcArxvnNtu75a0AdhA9lTwbwp1DGZm9kqKiOOvNYdJuj7pLzH8feTydzGav4/RZvP34eAwM7O8eMgRMzPLi4PDzMzy4uCYxPHG2porJGUkPSRpczJ+2IeKXVMpkJSS9ISkbxa7lmKTlJZ0n6SnJT0l6fXFrqlYJP1p8v+TjZL+RVJ1sWuabg6OCeSMtXUFsAq4VtKq4lZVNAPAn0XEKuB1wB/N4e8i14fI3jFo8PfAtyLiHKCVOfq9SGoG/hhYHRHnASmyjyLMKg6OiY2MtRURfcDwWFtzTkTsiYifJ9NHyP5RyHv4mNlEUgvw68A/F7uWYpPUQPb2+TsBIqIvZ0DSuagcmCepHKgBdhe5nmnn4JjYtIy1NdtIWgZcCPy0yKUU22eAvwCGjrPeXLAc6ADuSi7d/bOk2mIXVQwRsQv4O7Ijf+8BDkfEt4tb1fRzcNiUSZoPfBX4k4joLHY9xSLpN4C9EfF4sWspEeXALwD/FBEXAl3AnOwTlLSA7JWJ5cASoFbSu4tb1fRzcEzspMbamm0kVZANjbsj4mvFrqfI3ghcKWkr2UuYl0r6UnFLKqqdwM6IGD4LvY9skMxFlwEvRERHRPQDXwPeUOSapp2DY2InPNbWbJOMC3Yn8FREfKrY9RRbRHw0IloiYhnZ/y6+HxGz7l+VUxURLwI7JL06afoVct67M8dsB14nqSb5/82vMAtvFCjpYdWLKSIGJA2PtZUCPhcRm46z2Wz1RuA9wAZJ65K2v4yI+4tXkpWYD5IdR64SaAd+t8j1FEVE/FTSfcDPyd6N+ATJ6x1mEw85YmZmefGlKjMzy4uDw8zM8uLgMDOzvDg4zMwsLw4OMzPLi4PD7DgkHU1+L5P029O8778cM//j6dy/WSE4OMymbhmQV3AkA91NZlRwRMSse8rYZh8Hh9nUfRL4JUnrkncupCTdKukxSesl/RcASW+W9ENJa0ieoJb0b5IeT97TcH3S9kmyo6iuk3R30jZ8dqNk3xslbZD0rpx9/yDn3Rd3J08oI+mTyTtT1kv6u1P+7dic4SfHzabuJuDPI+I3AJIAOBwRF0mqAv5D0vBIqL8AnBcRLyTzvxcRByTNAx6T9NWIuEnSDRHRNs5n/SbQRvbdFo3JNo8kyy4EziU7XPd/AG+U9BTwn4FzIiIkpaf30M1e5jMOsxP3q8B7k2FYfgosAlYmy36WExoAfyzpSeBRsoNnrmRybwL+JSIGI+Il4GHgopx974yIIWAd2Utoh4Ee4E5Jvwl0n+SxmU3IwWF24gR8MCLakp/lOe9e6BpZSXoz2VFTXx8RrWTHLzqZ14n25kwPAuURMUD25WP3Ab8BfOsk9m82KQeH2dQdAepy5h8E/jAZch5JZ0/wAqMG4GBEdEs6h+zrd4f1D28/xg+BdyX9KE1k37D3s4kKS96V0pAMPPmnZC9xmRWE+zjMpm49MJhccvo82fdsLwN+nnRQdwBXj7Pdt4APJP0Qz5C9XDXsDmC9pJ9HxO/ktH8deD3wJBDAX0TEi0nwjKcO+IakarJnQjee0BGaTYFHxzUzs7z4UpWZmeXFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJlZXv4/QAgzW/yBXxUAAAAASUVORK5CYII=\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", @@ -478,22 +440,9 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAot0lEQVR4nO3deXxcZ33v8c9Xuyx5X+LEexI7djYSUMOSsrQ0kFJIKFDqpBRyWxrgEkqhhZv09lIaXuXSjcItebUNNMAtS6CBUtObktKyFgq1Q0xCrHHiOIttNIls2dZYsvbf/eMcyWN5JI1tjWak+b5fr3nNOc9Z5qeJc37znOc5z6OIwMzMbLyacgdgZmaVyQnCzMwKcoIwM7OCnCDMzKwgJwgzMyvICcLMzApygjAzs4KcIGzWk3Qs7zUi6Xje+q+dwfm+JenNpYjVbDapK3cAZmcrIlpHlyU9Abw5Iv6tfBGVlqS6iBgqdxw297kGYXOWpBpJt0p6TNIhSV+UtCTd1iTpM2n5EUnbJZ0j6Y+BFwIfS2sgH5vg3P8gKSvpqKTvSLokb1uzpL+Q9GS6/T8kNafbflbS99PP3CfpprT8pFqLpJsk/Ufeekh6u6RHgUfTso+m5+iWdL+kF+btXyvp99O/PZduXyPpDkl/Me5v2SbpXWf/jdtc4wRhc9k7gFcDLwbOAw4Dd6Tb3gQsBNYAS4G3Ascj4n8C3wVuiYjWiLhlgnP/C7ARWAH8CPhs3rY/B54DvABYArwXGJG0Lj3ur4DlwBXAztP4e14NPBe4OF3fnp5jCfA54B8kNaXb3g3cALwCWAD8BtALfBq4QVINgKRlwC+kx5udxLeYbC57K8mFfj+ApPcDT0n6dWCQJDFcGBEPAvefzokj4q7R5fS8hyUtBHIkF+PnRcSBdJfvp/vdCPxbRHw+LT+Uvor1vyOiKy+Gz+Rt+wtJfwBcBPwYeDPw3ojYnW7/8ehnSjoKvBT4OrAV+FZEPH0acViVcA3C5rJ1wD+mt3OOAO3AMHAO8PfAfcDdkn4q6U8l1Rdz0vT2zYfS2zfdwBPppmXpqwl4rMChayYoL9a+cXH8nqT29DbWEZIa0bIiPuvTwBvS5TeQfBdmp3CCsLlsH/CLEbEo79UUEQciYjAi/igiLia5FfRK4I3pcVMNcXwjcD3JrZmFwPq0XMBBoA+4YIJ4CpUD9ADz8tZXFthnLK60veG9wOuBxRGxCDiaxjDVZ30GuF7Ss4AtwFcm2M+qnBOEzWV/A/xxeu8fScslXZ8u/5ykyyTVAt0kt5xG0uOeBs6f5LzzgX6S20PzgA+OboiIEeAu4MOSzktrG8+X1EjSTvELkl4vqU7SUklXpIfuBF4jaZ6kC4HfnOJvmw8MAZ1AnaT3kbQ1jPoE8AFJG5W4XNLSNMb9JO0Xfw98KSKOT/FZVqWcIGwu+yiwDfhXSTngBySNvJD8Qr+HJDm0A9/mxK2WjwKvk3RY0v8pcN7/CzwJHAB2pefN93vAQyQX4S7gT4CaiHiKpNH4d9PyncCz0mP+EhggSU6f5uRG70LuA74GPJLG0sfJt6A+DHwR+Nf0b/w7oDlv+6eBy/DtJZuEPGGQWfWR9CKSW03rwhcBm4BrEGZVJm2MfyfwCScHm4wThFkVkbQFOAKcC3ykrMFYxfMtJjMzK8g1CDMzK2jOPEm9bNmyWL9+fbnDMDObVe6///6DEbG80LY5kyDWr1/Pjh07yh2GmdmsIunJibb5FpOZmRXkBGFmZgU5QZiZWUFOEGZmVpAThJmZFeQEYWZmBZU0QUi6VtJuSXsk3Vpg+1pJ35T0gKQHJb0ib9tt6XG7Jb28lHGamdmpSvYcRDrO/h3ANcB+YLukbRGxK2+3PwC+GBF/Leli4F5gfbq8FbiEZC7hf5O0KSKGSxWvmRUWETzd3c+D+4/w6DPHaKyrYdG8BhbPqx97XzyvgYXN9dTUaOoT2qxRygflrgL2RMReAEl3k8zClZ8gghOTnCwEfpouXw/cHRH9wOOS9qTn+88SxmtmwDPdfTy4/ygPHTjx6sz1T3mcBAubk2SxaN7J7yeSSd5yS7Ktqb52Bv4qOxOlTBCrOHkCk/2cmKxl1PtJJnN5B9BCMoXj6LH5k7DsT8tOIulm4GaAtWvXTkvQZtWkM9fPTw4cTRPCER46cJSnu5NkUCO4YHkrL9y4jMtXLeSy1QvZvHIBQ8PB4d4BjhwfTN57BzjcM5i8946WDfJ0dx+7szkO9w7QOzBx5b+pviZNJg0saq5nccvJNZNCtZUFzfXUVkFtpW9wmEM9A3QdG+BQTz+Hjg3Q1TPAwZ5+usaWB1i/dB4f3XrltH9+uYfauAH4VET8haTnA38v6dJiD46IO4E7Adra2jwsrdkkDh3rT2oEebWDjqN9QPLr//xlLbzggmVcliaDi89dQEtj4UvEwnn1p/XZ/UPDHEmTx6nJJFk+kiaW3dkcR3oHOXJ8kOGRwv9bS9DaUMf8pjrmN9WzoDl5T9ZPLC8Y9z5W3lxPS0Mt0swmmb7BYbp6BjiUXvBPLA/QlSaAQz3ptmMD9EyQWOtrxdKWRpa0NLC0tYFzFzYX3O9slTJBHADW5K2vTsvy/SZwLUBE/KekJmBZkcea2QQO9wycuEWUJoQDR05MPX3+shau2rAkSQarFnLJqoW0TpAMpkNjXS3nLKjlnAVNRR8zMhLk+odOTSY9SfLI9Q3SfXyIXN8gub4hnsn18VjnELm+IbqPDzI0QXIZVSNobTw5mUyVaOY31bMgr7y2RnT1JL/kD/UMcOhY/ynLB9Nf+l09AxzrHyoYS32tkot9SyNLWxtYt3Te2PLSloaxRLC0pZElrQ3Mb6ybkeRWygSxHdgoaQPJxX0rcOO4fZ4CXgp8Kp3IpIlkEvZtwOckfZikkXoj8F8ljNVszOGeAXbuP8IDTx1h574j9PYPsaA5uTAk78mFJHk/dX1+Ux31tTPXg/xo7yA/+enJt4n2dZ1IBuuXzuPZ6xbzphes47JVi7hk1QIWNJ1eDaAcamrEwuZ6FjbXs27p6R0bEfQNjiRJpO9EEulO33Nj70nZaKL56ZE+cv25sW0T1WCmUlcjlrY2sKSlkaUtyQV/SUsDy1qTX/3JcrJ9SUsDC5pm5oJ/ukqWICJiSNItJJOr1wJ3RcTDkm4HdkTENpLJ2z8u6V0kDdY3pVMgPizpiyQN2kPA292DyUphcHiE3dkcDzx1mAeeOsID+47w+MEeIPmFuemc+SxpaeCZXB97nhm9mAwy1XVjXkNtgURy9gmmu2+Qn4y7TfTkod6x7WuXzOPyVYv4teeu4/K0ZrCwufKTwXSTRHNDLc0NtaxYMPX+hUQEvQPDYwmlUKIZHgkWz2sY+6W/tLWyL/ina87MKNfW1hYe7tumkj3alySDfUd44KnDPHTgKH2DIwAsa23kyrWLkteaxVy+emHBe/CjF47RX56jSWNsPX+5b7DAflP/Mh2fYFoa63iqq3cseQGsXtw81l5w+apFXLpqAYvmNUzvF2ZznqT7I6Kt0LZyN1KblUzf4DAPHTjKA08dZue+5JbRaKNsQ20Nl6xawI1XrRtLCqsWNRf1q08SLY11tDTWce7C04/rTBLM4d4BNp3Tyuues5pL03aDJS1OBlZaThB2kogg291Ha2MdrTPUEDYdIoInDvWyc196q+ipI7R3dI81VK5Z0szPrF+SJoPFbDl3Po115el/f7YJxmymOEHYSe7evo/bvvwQkPzKXtxSnzakpe/z0vfWBpbMaxhrcFvSkvRRr5uhxtnuvkF+nNYKRmsIh3sHAWhpqOVZaxbxlhefz5VrFnPF2kUsa22ckbjM5hInCDvJ/U8eZvG8et72kgvo6hmkK+2r3dUzwEOHj9DVM0B3X+GuepA8STvaLW9xS9JwN/q+ZNzykpYG5jVM/U9weCR45OncWDJ4YN8RHus8RkTSH37jilZedvFKrkhvFW1cMb8qHqIyKzUnCDtJJtvNpasWcvOLLphwn8HhEQ73DNDVmzzh2dU7MPbAz+HepA/44Z4B9nX1Jr/sewYm7JPeVF+T1ERGu/zNO1Fj6RkYZudTR/jx/iNjT+IuaWngyjWLuP5Z53Hl2sVcvmbhrOiyaTYbOUHYmKHhER55+hg3vWD9pPvV19awYkETK4p86Cki6O4bGquJdKUJZPTp0bGaSu8gjx88NvYEaV2NuOS8BfzKc1Zz5drFXLl2EWuXzJs17SJms50ThI154lAPA0MjbF45f1rPK5144GnDspaijukbHEaibA3JZuYEYXnaO3IAbF55hk8WTSOP8GlWfp5RzsZkst3U1YgLVhT3K9/M5jYnCBuT6chxwfJW39YxM8AJwvJksjk2nzu97Q9mNns5QRgAR48PcuDI8YpofzCzyuAEYQDszqYN1K5BmFnKCcIA2J3tBmCLaxBmlnKCMADaszkWzavnnAUes8jMEk4QBkCmo5vNK+f7KWUzG+MEYYyMBLuzOTdQm9lJnCCM/YeP0zMwPO1DbJjZ7OYEYbSnDdSbz3UNwsxOKGmCkHStpN2S9ki6tcD2v5S0M309IulI3rbhvG3bShlntct05JBg0zmt5Q7FzCpIyQbrk1QL3AFcA+wHtkvaFhG7RveJiHfl7f8O4Mq8UxyPiCtKFZ+dkMl2s35pS1GT95hZ9ShlDeIqYE9E7I2IAeBu4PpJ9r8B+HwJ47EJZLI5tz+Y2SlKmSBWAfvy1venZaeQtA7YAHwjr7hJ0g5JP5D06gmOuzndZ0dnZ+c0hV1degeGeOJQj3swmdkpKqWReitwT0QM55Wti4g24EbgI5JOmQMzIu6MiLaIaFu+fPlMxTqnPPJ0Mrezh9gws/FKmSAOAGvy1lenZYVsZdztpYg4kL7vBb7Fye0TNk0yHR5iw8wKK2WC2A5slLRBUgNJEjilN5KkzcBi4D/zyhZLakyXlwFXA7vGH2tnL5PN0dJQy+rFzeUOxcwqTMm6rUTEkKRbgPuAWuCuiHhY0u3AjogYTRZbgbsjIvIO3wL8raQRkiT2ofzeTzZ92ju6uWjlfGpqPMSGmZ2spP0aI+Je4N5xZe8bt/7+Asd9H7islLEZRASZbI5fuvzccodiZhWoUhqprQyy3X0cPT7IFndxNbMCnCCqWKZjdJIgN1Cb2amcIKrY6BhMF7kGYWYFOEFUsUxHjlWLmlnQVF/uUMysAjlBVLFMtpstfkDOzCbgBFGl+oeGeazTQ2yY2cScIKrUnmeOMTwSHmLDzCbkBFGlxnowuQZhZhNwgqhSmWw3jXU1rF86r9yhmFmFcoKoUplsjk3nzKeu1v8EzKwwXx2qVHuHJwkys8k5QVShzlw/B4/1+wlqM5uUE0QV2p1NGqg9BpOZTcYJogplPMSGmRXBCaIKtXfkWDG/kaWtjeUOxcwqmBNEFcpku93+YGZTcoKoMkPDIzz69DG3P5jZlJwgqszjB3sYGB7xEBtmNiUniCrTnvUQG2ZWnJImCEnXStotaY+kWwts/0tJO9PXI5KO5G17k6RH09ebShlnNcl0dFNXIy5Y3lruUMyswtWV6sSSaoE7gGuA/cB2SdsiYtfoPhHxrrz93wFcmS4vAf4QaAMCuD899nCp4q0WmWyOC1e00lDnyqOZTa6UV4mrgD0RsTciBoC7gesn2f8G4PPp8suBr0dEV5oUvg5cW8JYq0amo9tDbJhZUUqZIFYB+/LW96dlp5C0DtgAfON0jpV0s6QdknZ0dnZOS9Bz2dHeQX56tM9dXM2sKJVyn2ErcE9EDJ/OQRFxZ0S0RUTb8uXLSxTa3DH6BLVrEGZWjFImiAPAmrz11WlZIVs5cXvpdI+1ImVGx2ByDcLMilDKBLEd2Chpg6QGkiSwbfxOkjYDi4H/zCu+D3iZpMWSFgMvS8vsLGSy3SyeV8+K+R5iw8ymVrJeTBExJOkWkgt7LXBXRDws6XZgR0SMJoutwN0REXnHdkn6AEmSAbg9IrpKFWu1yGRzbF65AEnlDsXMZoGSJQiAiLgXuHdc2fvGrb9/gmPvAu4qWXBVZmQk2J3N8as/s2bqnc3MqJxGaiuxfYd76R0YdgO1mRXNCaJKtHd4iA0zOz1OEFUik+1Ggk3nuAZhZsVxgqgSmY4cG5a20NxQW+5QzGyWcIKoEskkQa49mFnxJuzFJOk1RRzfl/ZUsgrW0z/Ek129vObZq8sdipnNIpN1c/048E/AZJ3mX8S4bqxWeR55OkeEh9gws9MzWYL4l4j4jckOlvSZaY7HSsBDbJjZmZiwDSIi3jDVwcXsY+WX6eimtbGOVYuayx2Kmc0iRTdSS7pQ0mckfUnS80sZlE2v9myOi1bOp6bGQ2yYWfEma6Ruioi+vKIPAO9Nl78KXFHCuGyaRASZjm5e9azzyh2Kmc0yk9UgvirpjXnrg8B6YB1wWvM2WPl0HO2ju2/IkwSZ2WmbLEFcCyyQ9DVJLwJ+j2Qq0F8Gfm0mgrOzNzpJ0Bb3YDKz0zThLaZ0drePSfp74H8BbwP+ICIem6ng7OyNjsG0yQnCzE7TZG0QzwXeAwwAHwSOA38s6QDwgYg4MiMR2lnJZHOsXtzMgqb6codiZrPMZM9B/C3wCqAV+GREXA1slfRi4Askt5uswmU6uj2Cq5mdkcnaIIY40Sg9MFoYEd+OCCeHWaBvcJi9B3vY4jGYzOwMTFaDuBF4C0lyeOMk+1mF2vPMMYZHwjUIMzsjkzVSPwL87gzGYtNsdIgNj+JqZmdiwltMkv55qoOn2kfStZJ2S9oj6dYJ9nm9pF2SHpb0ubzyYUk709e2qWKxU2U6ummsq2H90pZyh2Jms9Bkt5h+dooLs4CLJ9wo1QJ3ANcA+4HtkrZFxK68fTYCtwFXR8RhSSvyTnE8Iq4o4m+wCWTSITZqPcSGmZ2ByRLE9UUcPzDJtquAPRGxF0DS3ek5d+Xt81vAHRFxGCAininiM61ImWw3P795xdQ7mpkVMFkbxLfP8tyrgH156/uB547bZxOApO8BtcD7I+Jr6bYmSTtIelN9KCK+Mv4DJN0M3Aywdu3aswx3bunM9XPw2IAbqM3sjE1Wg5ipz98IvARYDXxH0mXpQ3jrIuKApPOBb0h6aPxT3BFxJ3AnQFtbW8xo5BVudIgNN1Cb2Zkq5ZzUB4A1eeur07J8+4FtETEYEY8Dj5AkDCLiQPq+F/gWcGUJY51zMukQG65BmNmZmjJBSHqVpDNJJNuBjZI2SGoAtgLjG72/QlJ7QNIykltOeyUtltSYV341J7dd2BTas92cs6CRJS0N5Q7FzGapYi78vwo8KulPJW0u9sQRMQTcAtwHtANfjIiHJd0u6bp0t/uAQ5J2Ad8E3hMRh4AtwA5JP07LP5Tf+8mmlunIufZgZmdlyjaIiHiDpAXADcCnJAXwSeDzEZGb4th7gXvHlb0vbzmAd6ev/H2+D1xW7B9hJxscHmHPM8d44aZl5Q7FzGaxom4dRUQ3cA9wN3AuyZwQP5L0jhLGZmfo8YM9DAyPsMU1CDM7C8W0QVwn6R9JGorrgasi4heBZ+GhOCpSe4d7MJnZ2Summ+trgb+MiO/kF0ZEr6TfLE1YdjYy2Rz1teL8Za3lDsXMZrFiEsT7gY7RFUnNwDkR8URE/HupArMzl+no5oLlrTTUlbIXs5nNdcVcQf4BGMlbH07LrEJlsjm2nOv2BzM7O8UkiLqIyJ8waABw5/oKdaR3gI6jfWz2HNRmdpaKSRCdec8tIOl64GDpQrKzcWIOCNcgzOzsFNMG8Vbgs5I+RjLE9z48w1zFyqQ9mLa4BmFmZ6mYB+UeA54nqTVdP1byqOyMZbI5lrQ0sHx+Y7lDMbNZrqjRXCX9EnAJyRDcAETE7SWMy85QezbH5pXzGf3vZGZ2pop5UO5vSMZjegfJLaZfAdaVOC47AyMjwSNZj8FkZtOjmEbqF0TEG4HDEfFHwPNJJ/qxyvJUVy/HB4f9BLWZTYtiEkRf+t4r6TxgkGQ8JqswY5MEuYHazKZBMW0QX5W0CPgz4EdAAB8vZVB2Zto7ctQINq5wgjCzszdpgkgnCvr3dArQL0n6Z6ApIo7ORHB2ejLZbtYva6G5obbcoZjZHDDpLaaIGAHuyFvvd3KoXJlszkN8m9m0KaYN4t8lvVbuN1nRevqHePJQr9sfzGzaFJMg3kIyOF+/pG5JOUndJY7LTtPupz3EhplNr2KepPZP0lkg05EmCNcgzGyaFPOg3IsKvYo5uaRrJe2WtEfSrRPs83pJuyQ9LOlzeeVvkvRo+npT8X9Sdcpku2ltrGP14uZyh2Jmc0Qx3Vzfk7fcBFwF3A/8/GQHSaolaeC+BtgPbJe0LSJ25e2zEbgNuDoiDktakZYvAf4QaCPpVnt/euzhov+yKpPp8BAbZja9pqxBRMSr8l7XAJcCxVyorwL2RMTedA6Ju4Hrx+3zW8Adoxf+iHgmLX858PWI6Eq3fR24trg/qfpEBO3Zbj9BbWbT6kzmpNwPbCliv1UkQ4PnH7dq3D6bgE2SvifpB5KuPY1jkXSzpB2SdnR2dhb9B8w1Pz3aR65vyGMwmdm0mvIWk6S/IrnNA0lCuYLkierp+vyNwEuA1cB3JF1W7MERcSdwJ0BbW1tMsfucNTYHhGsQZjaNimmD2JG3PAR8PiK+V8RxB4A1eeur07J8+4EfRsQg8LikR0gSxgGSpJF/7LeK+MyqNDqL3KZznCDMbPoUkyDuAfoiYhiSxmdJ8yKid4rjtgMbJW0gueBvBW4ct89XgBuAT0paRnLLaS/wGPBBSYvT/V5G0phtBbR3dLNmSTPzm+rLHYqZzSFFPUkN5PedbAb+baqDImIIuAW4D2gHvhgRD0u6PW+O6/uAQ5J2Ad8E3hMRhyKiC/gASZLZDtyellkBGc8BYWYlUEwNoil/mtGIOCZpXjEnj4h7gXvHlb0vbzmAd6ev8cfeBdxVzOdUs77BYfZ2HuMVl64sdyhmNscUU4PokfTs0RVJzwGOly4kOx17njnGSHiIDTObfsXUIH4H+AdJPyWZcnQlyRSkVgHaOzxJkJmVRjFjMW2XtBm4KC3anfY6sgqQyeZoqq9h3dKWcodiZnNMMWMxvR1oiYifRMRPgFZJ/730oVkxMtluLjpnPrU1HmLDzKZXMW0Qv5XOKAdAOvTFb5UsIitaRNDe4R5MZlYaxSSI2vzJgtJB+BpKF5IVq/NYP109Ax6DycxKophG6q8BX5D0t+n6W9IyK7MTc0C4BmFm06+YBPE/gJuBt6XrXwc+XrKIrGiZrHswmVnpFDPc90hE/E1EvC4iXgfsAv6q9KHZVDIdOVYuaGJxi+/4mdn0K6YGgaQrScZMej3wOPDlUgZlxWnP5tz+YGYlM2GCkLSJJCncABwEvgAoIn5uhmKzSQwOj7DnmRwv3rS83KGY2Rw1WQ0iA3wXeGVE7AGQ9K4ZicqmtLezh8Hh8BwQZlYyk7VBvAboAL4p6eOSXkoy1IZVgBMN1O7BZGalMWGCiIivRMRWYDPJUNy/A6yQ9NeSXjZD8dkE2jty1NeK85d7iA0zK41iejH1RMTnIuJVJDO7PUDS9dXKKJPt5sIV86mvPZNpxc3MpnZaV5eIOBwRd0bES0sVkBUn05Fji59/MLMS8s/PWehwzwDZ7j53cTWzknKCmIUyWQ+xYWal5wQxC431YHINwsxKqKQJQtK1knZL2iPp1gLbb5LUKWln+npz3rbhvPJtpYxztsl05Fja0sDy1sZyh2Jmc1hRQ22ciXRY8DuAa4D9wHZJ2yJi17hdvxARtxQ4xfGIuKJU8c1mmWw3m8+dT94o7GZm066UNYirgD0RsTciBoC7getL+HlVYXgk2P20Jwkys9IrZYJYBezLW9+flo33WkkPSrpH0pq88iZJOyT9QNKrC32ApJvTfXZ0dnZOX+QV7MlDPfQNjniIbzMruXI3Un8VWB8Rl5PMM/HpvG3rIqINuBH4iKQLxh+cPpPRFhFty5dXx6B1u9MeTFvOdQ3CzEqrlAniAJBfI1idlo2JiEMR0Z+ufgJ4Tt62A+n7XuBbwJUljHXWaM/mqBFcuKK13KGY2RxXygSxHdgoaYOkBmArcFJvJEnn5q1eB7Sn5YslNabLy4CrSSYqqnqZjm42LGuhqb623KGY2RxXsl5METEk6RbgPqAWuCsiHpZ0O7AjIrYBvy3pOmAI6AJuSg/fAvytpBGSJPahAr2fqlImm+Oy1QvLHYaZVYGSJQiAiLgXuHdc2fvylm8Dbitw3PeBy0oZ22x0rH+Ip7p6eX3b6nKHYmZVoNyN1HYadnuIDTObQU4Qs4iH2DCzmeQEMYtkOnLMb6xj1aLmcodiZlXACWIW8RAbZjaTnCBmiYgg0+EhNsxs5jhBzBIHjhwn1z/k9gczmzFOELNEpsM9mMxsZjlBzBKjPZgu8iB9ZjZDnCBmifZsjrVL5tHaWNJnG83MxjhBzBKZjm4P8W1mM8oJYhboGxzm8YM9bPYQ32Y2g5wgZoFHnz7GSMAW1yDMbAY5QcwC7WNDbLgGYWYzxwliFsh05Giur2XtknnlDsXMqogTxCyQyXazaeV8ams8xIaZzRwniAoXEbR3dLv9wcxmnBNEhevM9XO4d9BdXM1sxjlBVLj20UmC3EBtZjPMCaLCZTrSHkyuQZjZDCtpgpB0raTdkvZIurXA9pskdUramb7enLftTZIeTV9vKmWclSyTzXHuwiYWzWsodyhmVmVKNrCPpFrgDuAaYD+wXdK2iNg1btcvRMQt445dAvwh0AYEcH967OFSxVup2j3EhpmVSSlrEFcBeyJib0QMAHcD1xd57MuBr0dEV5oUvg5cW6I4K9bA0AiPdR5z+4OZlUUpE8QqYF/e+v60bLzXSnpQ0j2S1pzOsZJulrRD0o7Ozs7pirti7D14jMHhcA3CzMqi3I3UXwXWR8TlJLWET5/OwRFxZ0S0RUTb8uXLSxJgOY1OErTFNQgzK4NSJogDwJq89dVp2ZiIOBQR/enqJ4DnFHtsNWjPdtNQW8OGZS3lDsXMqlApE8R2YKOkDZIagK3AtvwdJJ2bt3od0J4u3we8TNJiSYuBl6VlVSXTkePCFa3U15a7omdm1ahkvZgiYkjSLSQX9lrgroh4WNLtwI6I2Ab8tqTrgCGgC7gpPbZL0gdIkgzA7RHRVapYK1Um283VFy4rdxhmVqVKOn9lRNwL3Duu7H15y7cBt01w7F3AXaWMr5J19QzwdHc/W1a6/cHMysP3LipUZmwOCPdgMrPycIKoUKM9mDa7BmFmZeIEUaEy2W6WtTawfH5juUMxsyrlBFGhMtmcaw9mVlZOEBVoeCTYnc35CWozKysniAr0xKEe+odGPAaTmZWVE0QFOtFA7RqEmZWPE0QFymS7qa0RF65oLXcoZlbFnCAqUCab4/xlLTTV15Y7FDOrYk4QFSiT7eYi314yszJzgqgwub5B9nUd9xDfZlZ2ThAV5pGn3UBtZpXBCaLCtI/2YHINwszKzAmiwmSy3cxvquO8hU3lDsXMqpwTRIXJdOTYsnIBksodiplVOSeIChIRyRhMHuLbzCqAE0QF2X/4OMf6hzxIn5lVBCeICpLJjjZQuwZhZuXnBFFBMh3JLHIXneMEYWblV9IEIelaSbsl7ZF06yT7vVZSSGpL19dLOi5pZ/r6m1LGWSky2Rzrls6jpbGkU4WbmRWlZFciSbXAHcA1wH5gu6RtEbFr3H7zgXcCPxx3isci4opSxVeJ2rPdfkDOzCpGKX+qXgXsiYi9AJLuBq4Hdo3b7wPAnwDvKWEsExoeCQ4e60eCGil9gdL30bIT2xlbn86uqMcHhnniYA+vuvy8aTunmdnZKGWCWAXsy1vfDzw3fwdJzwbWRMT/kzQ+QWyQ9ADQDfxBRHy3FEEe6R3guR/89zM6Nj9pTJ1Q8ren+9ec2H9oZISRgC1uoDazClG2m92SaoAPAzcV2NwBrI2IQ5KeA3xF0iUR0T3uHDcDNwOsXbv2jOJoaazjg798GSMRRAQjASPpe7J+oiwCRkby14vYP68sIhgZOXX/4XS/521YytUXLjujv8PMbLqVMkEcANbkra9Oy0bNBy4FvpXeqlkJbJN0XUTsAPoBIuJ+SY8Bm4Ad+R8QEXcCdwK0tbXFmQTZVF/Ljc89s+RiZjaXlbIX03Zgo6QNkhqArcC20Y0RcTQilkXE+ohYD/wAuC4idkhanjZyI+l8YCOwt4SxmpnZOCWrQUTEkKRbgPuAWuCuiHhY0u3AjojYNsnhLwJulzQIjABvjYiuUsVqZmanUsQZ3ZmpOG1tbbFjx46pdzQzszGS7o+ItkLb/CS1mZkV5ARhZmYFOUGYmVlBThBmZlaQE4SZmRU0Z3oxSeoEnjyLUywDDk5TOLOdv4uT+fs4mb+PE+bCd7EuIpYX2jBnEsTZkrRjoq5e1cbfxcn8fZzM38cJc/278C0mMzMryAnCzMwKcoI44c5yB1BB/F2czN/Hyfx9nDCnvwu3QZiZWUGuQZiZWUFOEGZmVlDVJwhJ10raLWmPpFvLHU85SVoj6ZuSdkl6WNI7yx1TuUmqlfSApH8udyzlJmmRpHskZSS1S3p+uWMqJ0nvSv8/+Ymkz0tqKndM062qE0Q6KdEdwC8CFwM3SLq4vFGV1RDwuxFxMfA84O1V/n0AvBNoL3cQFeKjwNciYjPwLKr4e5G0CvhtoC0iLiWZ82ZreaOaflWdIICrgD0RsTciBoC7gevLHFPZRERHRPwoXc6RXABWlTeq8pG0Gvgl4BPljqXcJC0kmcjr7wAiYiAijpQ1qPKrA5ol1QHzgJ+WOZ5pV+0JYhWwL299P1V8QcwnaT1wJfDDModSTh8B3ksyq2G12wB0Ap9Mb7l9QlJLuYMql4g4APw58BTQARyNiH8tb1TTr9oThBUgqRX4EvA7EdFd7njKQdIrgWci4v5yx1Ih6oBnA38dEVcCPUDVttlJWkxyt2EDcB7QIukN5Y1q+lV7gjgArMlbX52WVS1J9STJ4bMR8eVyx1NGVwPXSXqC5Nbjz0v6THlDKqv9wP6IGK1R3kOSMKrVLwCPR0RnRAwCXwZeUOaYpl21J4jtwEZJGyQ1kDQybStzTGUjSST3mNsj4sPljqecIuK2iFgdEetJ/l18IyLm3C/EYkVEFtgn6aK06KXArjKGVG5PAc+TNC/9/+alzMFG+7pyB1BOETEk6RbgPpJeCHdFxMNlDqucrgZ+HXhI0s607Pcj4t7yhWQV5B3AZ9MfU3uB/1bmeMomIn4o6R7gRyS9/x5gDg674aE2zMysoGq/xWRmZhNwgjAzs4KcIMzMrCAnCDMzK8gJwszMCnKCMEtJOpa+r5d04zSf+/fHrX9/Os9vVgpOEGanWg+cVoJIB2ybzEkJIiLm3FO3Nvc4QZid6kPACyXtTMf8r5X0Z5K2S3pQ0lsAJL1E0nclbSN9qljSVyTdn84TcHNa9iGSUT93SvpsWjZaW1F67p9IekjSr+ad+1t58y98Nn1iF0kfSufseFDSn8/4t2NVo6qfpDabwK3A70XEKwHSC/3RiPgZSY3A9ySNjtz5bODSiHg8Xf+NiOiS1Axsl/SliLhV0i0RcUWBz3oNcAXJ/ArL0mO+k267EriEZBjp7wFXS2oHfhnYHBEhadH0/ulmJ7gGYTa1lwFvTIcf+SGwFNiYbvuvvOQA8NuSfgz8gGQgyI1M7meBz0fEcEQ8DXwb+Jm8c++PiBFgJ8mtr6NAH/B3kl4D9J7l32Y2IScIs6kJeEdEXJG+NuSN/d8ztpP0EpJRPp8fEc8iGZ/nbKah7M9bHgbqImKIZKKre4BXAl87i/ObTcoJwuxUOWB+3vp9wNvSodCRtGmCyXIWAocjolfSZpJpW0cNjh4/zneBX03bOZaTzNr2XxMFls7VsTAdQPFdJLemzErCbRBmp3oQGE5vFX2KZC7m9cCP0obiTuDVBY77GvDWtJ1gN8ltplF3Ag9K+lFE/Fpe+T8Czwd+DATw3ojIpgmmkPnAP0lqIqnZvPuM/kKzIng0VzMzK8i3mMzMrCAnCDMzK8gJwszMCnKCMDOzgpwgzMysICcIMzMryAnCzMwK+v9dS7Ovcb84WwAAAABJRU5ErkJggg==\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "acc_per_epoch = [np.mean(acc_per_epoch) for acc_per_epoch in running_test_acc]\n", "display_loss_plot(acc_per_epoch, title=\"Test accuracy\", ylabel=\"Accuracy [%]\")" @@ -501,27 +450,16 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.798340863819657" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "test(model, test_quantized_loader)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -540,23 +478,16 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<All keys matched successfully>" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import torch\n", "\n", + "# Make sure the model is on CPU before loading a pretrained state_dict\n", + "model = model.cpu()\n", + "\n", + "# Load pretrained weights\n", "trained_state_dict = torch.load(\"state_dict.pth\")[\"models_state_dict\"][0]\n", "\n", "model.load_state_dict(trained_state_dict, strict=False)" @@ -564,23 +495,14 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9188772287810328" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ + "# Move the model back to it's target device\n", + "model.to(device)\n", + "\n", + "# Test for accuracy\n", "test(model, test_quantized_loader)" ] }, @@ -600,6 +522,16 @@ "Sometimes, it's desirable to make some changes to our trained network prior to export (this is known in general as \"network surgery\"). This depends on the model and is not generally necessary, but in this case we want to make a couple of changes to get better results with FINN." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Move the model to CPU before surgery\n", + "model = model.cpu()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -609,20 +541,9 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(64, 593)" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from copy import deepcopy\n", "\n", @@ -634,20 +555,9 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(64, 600)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -658,20 +568,9 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([64, 600])" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "modified_model[0].weight.data = torch.from_numpy(W_new)\n", "modified_model[0].weight.shape" @@ -690,11 +589,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from brevitas.core.quant import QuantType\n", "from brevitas.nn import QuantIdentity\n", "\n", "\n", @@ -702,23 +600,27 @@ " def __init__(self, my_pretrained_model):\n", " super(CybSecMLPForExport, self).__init__()\n", " self.pretrained = my_pretrained_model\n", - " self.qnt_output = QuantIdentity(quant_type=QuantType.BINARY, bit_width=1, min_val=-1.0, max_val=1.0)\n", + " self.qnt_output = QuantIdentity(\n", + " quant_type='binary', \n", + " scaling_impl_type='const',\n", + " bit_width=1, min_val=-1.0, max_val=1.0)\n", " \n", " def forward(self, x):\n", " # assume x contains bipolar {-1,1} elems\n", " # shift from {-1,1} -> {0,1} since that is the\n", " # input range for the trained network\n", - " x = (x + torch.tensor([1.0])) / 2.0 \n", + " x = (x + torch.tensor([1.0]).to(x.device)) / 2.0 \n", " out_original = self.pretrained(x)\n", " out_final = self.qnt_output(out_original) # output as {-1,1} \n", " return out_final\n", "\n", - "model_for_export = CybSecMLPForExport(modified_model)" + "model_for_export = CybSecMLPForExport(modified_model)\n", + "model_for_export.to(device)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -731,16 +633,17 @@ " with torch.no_grad():\n", " for data in test_loader:\n", " inputs, target = data\n", + " inputs, target = inputs.to(device), target.to(device)\n", " # pad inputs to 600 elements\n", - " input_padded = np.pad(inputs, [(0,0), (0,7)])\n", + " input_padded = torch.nn.functional.pad(inputs, (0,7,0,0))\n", " # convert inputs to {-1,+1}\n", - " input_scaled = 2*input_padded - 1\n", + " input_scaled = 2 * input_padded - 1\n", " # run the model\n", - " output = model(torch.from_numpy(input_scaled).float())\n", - " y_pred.extend(list(output.flatten()))\n", + " output = model(input_scaled.float())\n", + " y_pred.extend(list(output.flatten().cpu().numpy()))\n", " # make targets bipolar {-1,+1}\n", - " expected = 2*target.float() - 1\n", - " expected = expected.detach().numpy()\n", + " expected = 2 * target.float() - 1\n", + " expected = expected.cpu().numpy()\n", " y_true.extend(list(expected.flatten()))\n", " \n", " return accuracy_score(y_true, y_pred)" @@ -748,20 +651,9 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9188772287810328" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "test_padded_bipolar(model_for_export, test_quantized_loader)" ] @@ -780,35 +672,16 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model saved to cybsec-mlp-ready.onnx\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<ipython-input-22-78c27bb59095>:15: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n", - " x = (x + torch.tensor([1.0])) / 2.0\n", - "/workspace/brevitas/src/brevitas/quant_tensor/__init__.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n", - " training = torch.tensor(training, dtype=torch.bool)\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import brevitas.onnx as bo\n", "from brevitas.quant_tensor import QuantTensor\n", "\n", "ready_model_filename = \"cybsec-mlp-ready.onnx\"\n", "input_shape = (1, 600)\n", + "\n", "# create a QuantTensor instance to mark input as bipolar during export\n", "input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)\n", "input_a = 2 * input_a - 1\n", @@ -818,6 +691,10 @@ " input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True\n", ")\n", "\n", + "#Move to CPU before export\n", + "model_for_export.cpu()\n", + "\n", + "# Export to ONNX\n", "bo.export_finn_onnx(\n", " model_for_export, export_path=ready_model_filename, input_t=input_qt\n", ")\n", @@ -843,38 +720,9 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving 'cybsec-mlp-ready.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fb36398c3a0>" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from finn.util.visualization import showInNetron\n", "\n", @@ -888,18 +736,11 @@ "## That's it! <a id=\"thats_it\" ></a>\n", "You created, trained and tested a quantized MLP that is ready to be loaded into FINN, congratulations! You can now proceed to the next notebook." ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -913,7 +754,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.7.0" } }, "nbformat": 4, diff --git a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb index a0fef1ab6112e734abfc5d5a22a526b41f5503a5..07c8dbb1b9a4fd356aaf6a5bc5679e21a3152c1f 100644 --- a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb +++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -77,85 +77,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['__class__',\n", - " '__delattr__',\n", - " '__dict__',\n", - " '__dir__',\n", - " '__doc__',\n", - " '__eq__',\n", - " '__format__',\n", - " '__ge__',\n", - " '__getattribute__',\n", - " '__gt__',\n", - " '__hash__',\n", - " '__init__',\n", - " '__init_subclass__',\n", - " '__le__',\n", - " '__lt__',\n", - " '__module__',\n", - " '__ne__',\n", - " '__new__',\n", - " '__reduce__',\n", - " '__reduce_ex__',\n", - " '__repr__',\n", - " '__setattr__',\n", - " '__sizeof__',\n", - " '__str__',\n", - " '__subclasshook__',\n", - " '__weakref__',\n", - " '_model_proto',\n", - " 'analysis',\n", - " 'check_all_tensor_shapes_specified',\n", - " 'check_compatibility',\n", - " 'cleanup',\n", - " 'find_consumer',\n", - " 'find_consumers',\n", - " 'find_direct_predecessors',\n", - " 'find_direct_successors',\n", - " 'find_producer',\n", - " 'find_upstream',\n", - " 'get_all_tensor_names',\n", - " 'get_finn_nodes',\n", - " 'get_initializer',\n", - " 'get_metadata_prop',\n", - " 'get_node_index',\n", - " 'get_nodes_by_op_type',\n", - " 'get_non_finn_nodes',\n", - " 'get_tensor_datatype',\n", - " 'get_tensor_fanout',\n", - " 'get_tensor_layout',\n", - " 'get_tensor_shape',\n", - " 'get_tensor_sparsity',\n", - " 'get_tensor_valueinfo',\n", - " 'graph',\n", - " 'is_fork_node',\n", - " 'is_join_node',\n", - " 'make_empty_exec_context',\n", - " 'make_new_valueinfo_name',\n", - " 'model',\n", - " 'rename_tensor',\n", - " 'save',\n", - " 'set_initializer',\n", - " 'set_metadata_prop',\n", - " 'set_tensor_datatype',\n", - " 'set_tensor_layout',\n", - " 'set_tensor_shape',\n", - " 'set_tensor_sparsity',\n", - " 'temporary_fix_oldstyle_domain',\n", - " 'transform']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "dir(model_for_sim)" ] @@ -169,24 +93,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input tensor name: 0\n", - "Output tensor name: 73\n", - "Input tensor shape: [1, 600]\n", - "Output tensor shape: [1, 1]\n", - "Input tensor datatype: BIPOLAR\n", - "Output tensor datatype: FLOAT32\n", - "List of node operator types in the graph: \n", - "['Mul', 'Add', 'Div', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'MultiThreshold']\n" - ] - } - ], + "outputs": [], "source": [ "from finn.core.datatype import DataType\n", "\n", @@ -226,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -262,38 +171,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving 'cybsec-mlp-verification.onnx' at http://0.0.0.0:8081\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <iframe\n", - " width=\"100%\"\n", - " height=\"400\"\n", - " src=\"http://localhost:8081/\"\n", - " frameborder=\"0\"\n", - " allowfullscreen\n", - " ></iframe>\n", - " " - ], - "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f3be619b2b0>" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from finn.util.visualization import showInNetron\n", "\n", @@ -311,20 +191,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([100, 593])" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "from torch.utils.data import TensorDataset\n", @@ -356,20 +225,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<All keys matched successfully>" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "input_size = 593 \n", "hidden1 = 64 \n", @@ -409,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -441,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -476,17 +334,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ok 100 nok 0: 100%|██████████| 100/100 [00:21<00:00, 4.72it/s]\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "from tqdm import trange\n", @@ -511,17 +361,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Verification succeeded. Brevitas and FINN-ONNX execution outputs are identical\n" - ] - } - ], + "outputs": [], "source": [ "if ok == n_verification_inputs:\n", " print(\"Verification succeeded. Brevitas and FINN-ONNX execution outputs are identical\")\n", diff --git a/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb b/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb index 551c321534cfefa13b8d34b7f1e7685000702ec0..980a770fe2b47aebd9da2fe2fdb8943b542c07b2 100644 --- a/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb +++ b/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb @@ -106,17 +106,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Previous run results deleted!\n" - ] - } - ], + "outputs": [], "source": [ "import finn.builder.build_dataflow as build\n", "import finn.builder.build_dataflow_config as build_cfg\n", @@ -148,40 +140,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Building dataflow accelerator from cybsec-mlp-ready.onnx\n", - "Intermediate outputs will be generated in /tmp/finn_dev_ubuntu\n", - "Final outputs will be generated in output_estimates_only\n", - "Build log is at output_estimates_only/build_dataflow.log\n", - "Running step: step_tidy_up [1/7]\n", - "Running step: step_streamline [2/7]\n", - "Running step: step_convert_to_hls [3/7]\n", - "Running step: step_create_dataflow_partition [4/7]\n", - "Running step: step_target_fps_parallelization [5/7]\n", - "Running step: step_apply_folding_config [6/7]\n", - "Running step: step_generate_estimate_reports [7/7]\n", - "Completed successfully\n", - "CPU times: user 1.84 s, sys: 599 ms, total: 2.44 s\n", - "Wall time: 1.77 s\n" - ] - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "%%time\n", "build.build_dataflow_cfg(model_file, cfg_estimates)" @@ -196,36 +157,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "build_dataflow.log intermediate_models report time_per_step.json\r\n" - ] - } - ], + "outputs": [], "source": [ "! ls {estimates_output_dir}" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "estimate_layer_config_alternatives.json estimate_network_performance.json\r\n", - "estimate_layer_cycles.json\t\t op_and_param_counts.json\r\n", - "estimate_layer_resources.json\r\n" - ] - } - ], + "outputs": [], "source": [ "! ls {estimates_output_dir}/report" ] @@ -239,23 +182,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\r\n", - " \"critical_path_cycles\": 252,\r\n", - " \"max_cycles\": 64,\r\n", - " \"max_cycles_node_name\": \"StreamingFCLayer_Batch_1\",\r\n", - " \"estimated_throughput_fps\": 1562500.0,\r\n", - " \"estimated_latency_ns\": 2520.0\r\n", - "}" - ] - } - ], + "outputs": [], "source": [ "! cat {estimates_output_dir}/report/estimate_network_performance.json" ] @@ -269,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -282,23 +211,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'StreamingFCLayer_Batch_0': 60,\n", - " 'StreamingFCLayer_Batch_1': 64,\n", - " 'StreamingFCLayer_Batch_2': 64,\n", - " 'StreamingFCLayer_Batch_3': 64}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "read_json_dict(estimates_output_dir + \"/report/estimate_layer_cycles.json\")" ] @@ -314,44 +229,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'StreamingFCLayer_Batch_0': {'BRAM_18K': 36,\n", - " 'BRAM_efficiency': 0.11574074074074074,\n", - " 'LUT': 8184,\n", - " 'URAM': 0,\n", - " 'URAM_efficiency': 1,\n", - " 'DSP': 0},\n", - " 'StreamingFCLayer_Batch_1': {'BRAM_18K': 4,\n", - " 'BRAM_efficiency': 0.1111111111111111,\n", - " 'LUT': 1217,\n", - " 'URAM': 0,\n", - " 'URAM_efficiency': 1,\n", - " 'DSP': 0},\n", - " 'StreamingFCLayer_Batch_2': {'BRAM_18K': 4,\n", - " 'BRAM_efficiency': 0.1111111111111111,\n", - " 'LUT': 1217,\n", - " 'URAM': 0,\n", - " 'URAM_efficiency': 1,\n", - " 'DSP': 0},\n", - " 'StreamingFCLayer_Batch_3': {'BRAM_18K': 1,\n", - " 'BRAM_efficiency': 0.006944444444444444,\n", - " 'LUT': 341,\n", - " 'URAM': 0,\n", - " 'URAM_efficiency': 1,\n", - " 'DSP': 0},\n", - " 'total': {'BRAM_18K': 45.0, 'LUT': 10959.0, 'URAM': 0.0, 'DSP': 0.0}}" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "read_json_dict(estimates_output_dir + \"/report/estimate_layer_resources.json\")" ] @@ -375,7 +255,7 @@ "\n", "<font color=\"red\">**Live FINN tutorial:** These next builds will take about 10 minutes to complete since multiple calls to Vivado and a call to RTL simulation are involved. While this is running, you can examine the generated files with noVNC -- it is running on **(your AWS URL):6080/vnc.html**\n", "\n", - "* Once the `step_hls_codegen [8/16]` below is completed, you can view the generated HLS code under its own folder for each layer: `/tmp/finn_dev_ubuntu/code_gen_ipgen_StreamingFCLayer_Batch_XXXXXX`\n", + "* Once the `step_hls_codegen [8/16]` below is completed, you can view the generated HLS code under its own folder for each layer: `/tmp/finn_dev_ubuntu/code_gen_ipgen_MatrixVectorActivation_XXXXXX`\n", " \n", "* Once the `step_create_stitched_ip [11/16]` below is completed, you can view the generated stitched IP in Vivado under `/home/ubuntu/finn/notebooks/end2end_example/cybersecurity/output_ipstitch_ooc_rtlsim/stitched_ip`\n", "</font> " @@ -383,17 +263,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Previous run results deleted!\n" - ] - } - ], + "outputs": [], "source": [ "import finn.builder.build_dataflow as build\n", "import finn.builder.build_dataflow_config as build_cfg\n", @@ -425,49 +297,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Building dataflow accelerator from cybsec-mlp-ready.onnx\n", - "Intermediate outputs will be generated in /tmp/finn_dev_ubuntu\n", - "Final outputs will be generated in output_ipstitch_ooc_rtlsim\n", - "Build log is at output_ipstitch_ooc_rtlsim/build_dataflow.log\n", - "Running step: step_tidy_up [1/16]\n", - "Running step: step_streamline [2/16]\n", - "Running step: step_convert_to_hls [3/16]\n", - "Running step: step_create_dataflow_partition [4/16]\n", - "Running step: step_target_fps_parallelization [5/16]\n", - "Running step: step_apply_folding_config [6/16]\n", - "Running step: step_generate_estimate_reports [7/16]\n", - "Running step: step_hls_codegen [8/16]\n", - "Running step: step_hls_ipgen [9/16]\n", - "Running step: step_set_fifo_depths [10/16]\n", - "Running step: step_create_stitched_ip [11/16]\n", - "Running step: step_measure_rtlsim_performance [12/16]\n", - "Running step: step_make_pynq_driver [13/16]\n", - "Running step: step_out_of_context_synthesis [14/16]\n", - "Running step: step_synthesize_bitfile [15/16]\n", - "Running step: step_deployment_package [16/16]\n", - "Completed successfully\n", - "CPU times: user 4.76 s, sys: 710 ms, total: 5.47 s\n", - "Wall time: 8min 5s\n" - ] - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "%%time\n", "build.build_dataflow_cfg(model_file, cfg_stitched_ip)" @@ -489,22 +321,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "all_verilog_srcs.txt\t\t finn_vivado_stitch_proj.xpr\r\n", - "finn_vivado_stitch_proj.cache\t ip\r\n", - "finn_vivado_stitch_proj.hw\t make_project.sh\r\n", - "finn_vivado_stitch_proj.ip_user_files make_project.tcl\r\n", - "finn_vivado_stitch_proj.sim\t vivado.jou\r\n", - "finn_vivado_stitch_proj.srcs\t vivado.log\r\n" - ] - } - ], + "outputs": [], "source": [ "! ls {rtlsim_output_dir}/stitched_ip" ] @@ -518,18 +337,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "estimate_layer_resources_hls.json rtlsim_performance.json\r\n", - "ooc_synth_and_timing.json\r\n" - ] - } - ], + "outputs": [], "source": [ "! ls {rtlsim_output_dir}/report" ] @@ -543,27 +353,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\r\n", - " \"vivado_proj_folder\": \"/tmp/finn_dev_ubuntu/synth_out_of_context_iut077er/results_finn_design_wrapper\",\r\n", - " \"LUT\": 8667.0,\r\n", - " \"FF\": 9063.0,\r\n", - " \"DSP\": 0.0,\r\n", - " \"BRAM\": 22.0,\r\n", - " \"WNS\": 0.946,\r\n", - " \"\": 0,\r\n", - " \"fmax_mhz\": 110.44842058758559,\r\n", - " \"estimated_throughput_fps\": 1725756.5716810247\r\n", - "}" - ] - } - ], + "outputs": [], "source": [ "! cat {rtlsim_output_dir}/report/ooc_synth_and_timing.json" ] @@ -577,26 +369,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\r\n", - " \"cycles\": 643,\r\n", - " \"runtime[ms]\": 0.00643,\r\n", - " \"throughput[images/s]\": 1088646.967340591,\r\n", - " \"DRAM_in_bandwidth[Mb/s]\": 81.64852255054431,\r\n", - " \"DRAM_out_bandwidth[Mb/s]\": 0.13608087091757387,\r\n", - " \"fclk[mhz]\": 100.0,\r\n", - " \"N\": 7,\r\n", - " \"latency_cycles\": 211\r\n", - "}" - ] - } - ], + "outputs": [], "source": [ "! cat {rtlsim_output_dir}/report/rtlsim_performance.json" ] @@ -610,62 +385,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\r\n", - " \"Defaults\": {},\r\n", - " \"StreamingFIFO_0\": {\r\n", - " \"ram_style\": \"auto\",\r\n", - " \"depth\": 32,\r\n", - " \"impl_style\": \"rtl\"\r\n", - " },\r\n", - " \"StreamingFCLayer_Batch_0\": {\r\n", - " \"PE\": 16,\r\n", - " \"SIMD\": 40,\r\n", - " \"ram_style\": \"auto\",\r\n", - " \"resType\": \"lut\",\r\n", - " \"mem_mode\": \"decoupled\",\r\n", - " \"runtime_writeable_weights\": 0\r\n", - " },\r\n", - " \"StreamingDataWidthConverter_Batch_0\": {\r\n", - " \"impl_style\": \"hls\"\r\n", - " },\r\n", - " \"StreamingFCLayer_Batch_1\": {\r\n", - " \"PE\": 1,\r\n", - " \"SIMD\": 64,\r\n", - " \"ram_style\": \"auto\",\r\n", - " \"resType\": \"lut\",\r\n", - " \"mem_mode\": \"decoupled\",\r\n", - " \"runtime_writeable_weights\": 0\r\n", - " },\r\n", - " \"StreamingDataWidthConverter_Batch_1\": {\r\n", - " \"impl_style\": \"hls\"\r\n", - " },\r\n", - " \"StreamingFCLayer_Batch_2\": {\r\n", - " \"PE\": 1,\r\n", - " \"SIMD\": 64,\r\n", - " \"ram_style\": \"auto\",\r\n", - " \"resType\": \"lut\",\r\n", - " \"mem_mode\": \"decoupled\",\r\n", - " \"runtime_writeable_weights\": 0\r\n", - " },\r\n", - " \"StreamingFCLayer_Batch_3\": {\r\n", - " \"PE\": 1,\r\n", - " \"SIMD\": 1,\r\n", - " \"ram_style\": \"auto\",\r\n", - " \"resType\": \"lut\",\r\n", - " \"mem_mode\": \"decoupled\",\r\n", - " \"runtime_writeable_weights\": 0\r\n", - " }\r\n", - "}" - ] - } - ], + "outputs": [], "source": [ "! cat {rtlsim_output_dir}/final_hw_config.json" ] @@ -681,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -716,49 +438,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Building dataflow accelerator from cybsec-mlp-ready.onnx\n", - "Intermediate outputs will be generated in /tmp/finn_dev_ubuntu\n", - "Final outputs will be generated in output_final\n", - "Build log is at output_final/build_dataflow.log\n", - "Running step: step_tidy_up [1/16]\n", - "Running step: step_streamline [2/16]\n", - "Running step: step_convert_to_hls [3/16]\n", - "Running step: step_create_dataflow_partition [4/16]\n", - "Running step: step_target_fps_parallelization [5/16]\n", - "Running step: step_apply_folding_config [6/16]\n", - "Running step: step_generate_estimate_reports [7/16]\n", - "Running step: step_hls_codegen [8/16]\n", - "Running step: step_hls_ipgen [9/16]\n", - "Running step: step_set_fifo_depths [10/16]\n", - "Running step: step_create_stitched_ip [11/16]\n", - "Running step: step_measure_rtlsim_performance [12/16]\n", - "Running step: step_make_pynq_driver [13/16]\n", - "Running step: step_out_of_context_synthesis [14/16]\n", - "Running step: step_synthesize_bitfile [15/16]\n", - "Running step: step_deployment_package [16/16]\n", - "Completed successfully\n", - "CPU times: user 4.47 s, sys: 766 ms, total: 5.24 s\n", - "Wall time: 22min 13s\n" - ] - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#%%time\n", "#build.build_dataflow_cfg(model_file, cfg)" @@ -773,17 +455,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "finn-accel.bit\tfinn-accel.hwh\r\n" - ] - } - ], + "outputs": [], "source": [ "#! ls {final_output_dir}/bitfile" ] @@ -797,17 +471,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "driver.py driver_base.py finn runtime_weights validate.py\r\n" - ] - } - ], + "outputs": [], "source": [ "#! ls {final_output_dir}/driver" ] @@ -821,18 +487,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "estimate_layer_resources_hls.json post_synth_resources.xml\r\n", - "post_route_timing.rpt\r\n" - ] - } - ], + "outputs": [], "source": [ "#! ls {final_output_dir}/report" ] @@ -846,17 +503,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bitfile driver\r\n" - ] - } - ], + "outputs": [], "source": [ "#! ls {final_output_dir}/deploy" ] @@ -874,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -883,7 +532,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -892,38 +541,18 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "driver.py\tfinn\t\t unsw_nb15_binarized.npz validate.py\r\n", - "driver_base.py\truntime_weights validate-unsw-nb15.py\r\n" - ] - } - ], + "outputs": [], "source": [ "#! ls {final_output_dir}/deploy/driver" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/workspace/finn/notebooks/end2end_example/cybersecurity/deploy-on-pynq.zip'" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#from shutil import make_archive\n", "#make_archive('deploy-on-pynq', 'zip', final_output_dir+\"/deploy\")" @@ -1016,7 +645,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index da0ec0b63092f0618bb7c9982b95fa90e8f91118..0e51d7ae6d53703e2b485be85956127ca3430a7c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,10 +8,11 @@ numpy==1.18.0 onnx==1.7.0 onnxoptimizer onnxruntime==1.4.0 -pre-commit==2.6.0 +pre-commit==2.9.2 pyscaffold==3.2.1 scipy==1.5.2 setupext-janitor>=1.1.2 toposort==1.5 vcdvcd==1.0.5 wget==3.2 +protobuf==3.20.1 \ No newline at end of file diff --git a/run-docker.sh b/run-docker.sh index a1147fcee55d345850da4c533dd9e88270d727a6..6b7d597cb68c58c28ac568cb39f8e760c94bb885 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2020, Xilinx +# Copyright (c) 2020-2022, Advanced Micro Devices # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -92,11 +92,13 @@ SCRIPTPATH=$(dirname "$SCRIPT") : ${FINN_DOCKER_PREBUILT="0"} : ${FINN_DOCKER_RUN_AS_ROOT="0"} : ${FINN_DOCKER_GPU="$(docker info | grep nvidia | wc -m)"} +: ${FINN_DOCKER_EXTRA=""} +: ${FINN_SKIP_DEP_REPOS="0"} +: ${OHMYXILINX="${SCRIPTPATH}/deps/oh-my-xilinx"} : ${NVIDIA_VISIBLE_DEVICES=""} : ${DOCKER_BUILDKIT="1"} DOCKER_INTERACTIVE="" -DOCKER_EXTRA="" if [ "$1" = "test" ]; then gecho "Running test suite (all tests)" @@ -112,36 +114,41 @@ elif [ "$1" = "notebook" ]; then JUPYTER_PASSWD_ARG="--NotebookApp.password='$JUPYTER_PASSWD_HASH'" fi DOCKER_CMD="jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks" - DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT " - DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT " - DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT " - DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT " + FINN_DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT " + FINN_DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT " + FINN_DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT " + FINN_DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT " elif [ "$1" = "build_dataflow" ]; then BUILD_DATAFLOW_DIR=$(readlink -f "$2") - DOCKER_EXTRA="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR " + FINN_DOCKER_EXTRA+="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR " DOCKER_INTERACTIVE="-it" #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build gecho "Running build_dataflow for folder $BUILD_DATAFLOW_DIR" DOCKER_CMD="build_dataflow $BUILD_DATAFLOW_DIR" elif [ "$1" = "build_custom" ]; then BUILD_CUSTOM_DIR=$(readlink -f "$2") - DOCKER_EXTRA="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR " + FLOW_NAME=${3:-build} + FINN_DOCKER_EXTRA+="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR " DOCKER_INTERACTIVE="-it" #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build - gecho "Running build_custom: $BUILD_CUSTOM_DIR/build.py" - DOCKER_CMD="python -mpdb -cc -cq build.py" + gecho "Running build_custom: $BUILD_CUSTOM_DIR/$FLOW_NAME.py" + DOCKER_CMD="python -mpdb -cc -cq $FLOW_NAME.py" +elif [ -z "$1" ]; then + gecho "Running container only" + DOCKER_CMD="bash" + DOCKER_INTERACTIVE="-it" else - gecho "Running container only" - DOCKER_CMD="bash" - DOCKER_INTERACTIVE="-it" + gecho "Running container with passed arguments" + DOCKER_CMD="$@" fi + if [ "$FINN_DOCKER_GPU" != 0 ];then gecho "nvidia-docker detected, enabling GPUs" if [ ! -z "$NVIDIA_VISIBLE_DEVICES" ];then - DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES " + FINN_DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES " else - DOCKER_EXTRA+="--gpus all " + FINN_DOCKER_EXTRA+="--gpus all " fi fi @@ -161,6 +168,11 @@ gecho "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT" gecho "Vivado IP cache dir is at $VIVADO_IP_CACHE" gecho "Using default PYNQ board $PYNQ_BOARD" +# Ensure git-based deps are checked out at correct commit +if [ "$FINN_SKIP_DEP_REPOS" = "0" ]; then + ./fetch-repos.sh +fi + # Build the FINN Docker image if [ "$FINN_DOCKER_PREBUILT" = "0" ]; then # Need to ensure this is done within the finn/ root folder: @@ -175,10 +187,11 @@ fi DOCKER_EXEC="docker run -t --rm $DOCKER_INTERACTIVE --tty --init " DOCKER_EXEC+="--hostname $DOCKER_INST_NAME " DOCKER_EXEC+="-e SHELL=/bin/bash " -DOCKER_EXEC+="-v $SCRIPTPATH:/workspace/finn " +DOCKER_EXEC+="-w $SCRIPTPATH " +DOCKER_EXEC+="-v $SCRIPTPATH:$SCRIPTPATH " DOCKER_EXEC+="-v $FINN_HOST_BUILD_DIR:$FINN_HOST_BUILD_DIR " DOCKER_EXEC+="-e FINN_BUILD_DIR=$FINN_HOST_BUILD_DIR " -DOCKER_EXEC+="-e FINN_ROOT="/workspace/finn" " +DOCKER_EXEC+="-e FINN_ROOT="$SCRIPTPATH" " DOCKER_EXEC+="-e LOCALHOST_URL=$LOCALHOST_URL " DOCKER_EXEC+="-e VIVADO_IP_CACHE=$VIVADO_IP_CACHE " DOCKER_EXEC+="-e PYNQ_BOARD=$PYNQ_BOARD " @@ -186,6 +199,7 @@ DOCKER_EXEC+="-e PYNQ_IP=$PYNQ_IP " DOCKER_EXEC+="-e PYNQ_USERNAME=$PYNQ_USERNAME " DOCKER_EXEC+="-e PYNQ_PASSWORD=$PYNQ_PASSWORD " DOCKER_EXEC+="-e PYNQ_TARGET_DIR=$PYNQ_TARGET_DIR " +DOCKER_EXEC+="-e OHMYXILINX=$OHMYXILINX " DOCKER_EXEC+="-e NUM_DEFAULT_WORKERS=$NUM_DEFAULT_WORKERS " if [ "$FINN_DOCKER_RUN_AS_ROOT" = "0" ];then DOCKER_EXEC+="-v /etc/group:/etc/group:ro " @@ -204,11 +218,15 @@ fi if [ ! -z "$FINN_XILINX_PATH" ];then VIVADO_PATH="$FINN_XILINX_PATH/Vivado/$FINN_XILINX_VERSION" VITIS_PATH="$FINN_XILINX_PATH/Vitis/$FINN_XILINX_VERSION" + HLS_PATH="$FINN_XILINX_PATH/Vitis_HLS/$FINN_XILINX_VERSION" DOCKER_EXEC+="-v $FINN_XILINX_PATH:$FINN_XILINX_PATH " if [ -d "$VIVADO_PATH" ];then DOCKER_EXEC+="-e "XILINX_VIVADO=$VIVADO_PATH" " DOCKER_EXEC+="-e VIVADO_PATH=$VIVADO_PATH " fi + if [ -d "$HLS_PATH" ];then + DOCKER_EXEC+="-e HLS_PATH=$HLS_PATH " + fi if [ -d "$VITIS_PATH" ];then DOCKER_EXEC+="-e VITIS_PATH=$VITIS_PATH " fi @@ -222,7 +240,7 @@ if [ ! -z "$FINN_XILINX_PATH" ];then DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR " fi fi -DOCKER_EXEC+="$DOCKER_EXTRA " +DOCKER_EXEC+="$FINN_DOCKER_EXTRA " DOCKER_EXEC+="$FINN_DOCKER_TAG $DOCKER_CMD" $DOCKER_EXEC diff --git a/setup.cfg b/setup.cfg index 96618e0ffcb8dcb217185c67948a71a132a7b45a..bcf5364b782447d21eea553ddcc2a6fc9b2636c0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -106,7 +106,6 @@ console_scripts = [test] # py.test options when running `python setup.py test` # addopts = --verbose -extras = True [tool:pytest] # Options for py.test: @@ -120,6 +119,11 @@ markers = vivado: mark tests that require Vivado or Vivado HLS vitis: mark tests that require Vitis board: mark tests that require a PYNQ board + brevitas_export : mark tests that test brevitas export functionality + streamline: mark tests that test streamlining functionality + util: mark tests that test util functions + transform: mark tests that test transformations (before hls layers) + fpgadataflow: mark tests related to hls layers norecursedirs = dist build diff --git a/src/finn/analysis/fpgadataflow/res_estimation.py b/src/finn/analysis/fpgadataflow/res_estimation.py index 31cfeb76a6d4f411808af5dcd265e4f07352ae02..c543361f5dae373c5c581088fa3fdb5be1b5a39d 100644 --- a/src/finn/analysis/fpgadataflow/res_estimation.py +++ b/src/finn/analysis/fpgadataflow/res_estimation.py @@ -62,8 +62,8 @@ def res_estimation_complete(model): op_type = node.op_type inst = registry.getCustomOp(node) if ( - op_type == "StreamingFCLayer_Batch" - or op_type == "Vector_Vector_Activate_Batch" + op_type == "MatrixVectorActivation" + or op_type == "VectorVectorActivation" ): orig_restype = inst.get_nodeattr("resType") res_dict[node.name] = [] diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 807fd706860d7e4667107ddd2ed46ea2b123c3ec..cc8e6187624e8931ca31c0c78bdab166d5a3bdf5 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -34,7 +34,7 @@ from enum import Enum from typing import Any, List, Optional from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy -from finn.util.basic import alveo_part_map, pynq_part_map +from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map class ShellFlowType(str, Enum): @@ -59,7 +59,7 @@ class DataflowOutputType(str, Enum): class ComputeEngineMemMode(str, Enum): """Memory mode for generated compute engines. See - https://finn.readthedocs.io/en/latest/internals.html#streamingfclayer-mem-mode + https://finn.readthedocs.io/en/latest/internals.html#matrixvectoractivation-mem-mode for more information.""" CONST = "const" @@ -209,6 +209,10 @@ class DataflowBuildConfig: #: the full list of layer IP build directories. By default, synthesis will not run. stitched_ip_gen_dcp: Optional[bool] = False + #: Insert a signature node to the stitched-IP to read/write information + #: to the design: e.g. Customer signature, application signature, version + signature: Optional[tuple] = () + #: (Optional) Control the maximum width of the per-PE MVAU stream while #: exploring the parallelization attributes to reach target_fps #: Only relevant if target_fps is specified. @@ -218,8 +222,8 @@ class DataflowBuildConfig: #: (Optional) Whether thresholding layers (which implement quantized #: activations in FINN) will be implemented as stand-alone HLS layers, - #: instead of being part of StreamingFCLayer. This gives larger flexibility, - #: and makes it possible to have runtime-writable thresholds. + #: instead of being part of MatrixVectorActivation layer. This gives larger + #: flexibility, and makes it possible to have runtime-writable thresholds. standalone_thresholds: Optional[bool] = False #: Target board, only needed for generating full bitfiles where the FINN @@ -257,6 +261,8 @@ class DataflowBuildConfig: #: Which Vitis platform will be used. #: Only relevant when `shell_flow_type = ShellFlowType.VITIS_ALVEO` #: e.g. "xilinx_u250_xdma_201830_2" + #: If not specified but "board" is specified, will use the FINN + #: default (if any) for that Alveo board vitis_platform: Optional[str] = None #: Path to JSON config file assigning each layer to an SLR. @@ -340,7 +346,7 @@ class DataflowBuildConfig: if self.target_fps is None: return None else: - n_clock_cycles_per_sec = 10 ** 9 / self.synth_clk_period_ns + n_clock_cycles_per_sec = 10**9 / self.synth_clk_period_ns n_cycles_per_frame = n_clock_cycles_per_sec / self.target_fps return int(n_cycles_per_frame) @@ -356,6 +362,17 @@ class DataflowBuildConfig: } return name_to_strategy[self.vitis_opt_strategy] + def _resolve_vitis_platform(self): + if self.vitis_platform is not None: + return self.vitis_platform + elif (self.vitis_platform is None) and (self.board is not None): + return alveo_default_platform[self.board] + else: + raise Exception( + "Could not resolve Vitis platform:" + " need either board or vitis_platform specified" + ) + def _resolve_verification_steps(self): if self.verify_steps is None: return [] @@ -376,6 +393,6 @@ class DataflowBuildConfig: ) verify_expected_output_npy = np.load(self.verify_expected_output_npy) return ( - verify_input_npy.astype(np.float32), - verify_expected_output_npy.astype(np.float32), + verify_input_npy, + verify_expected_output_npy, ) diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index c977f15e7090f5cae633a013f5eb9e6b3dd34dd2..f2f768b3c3caf7747627d0c0043d3955b417ea67 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -55,6 +55,7 @@ from finn.builder.build_dataflow_config import ( ) from finn.core.modelwrapper import ModelWrapper from finn.core.onnx_exec import execute_onnx +from finn.core.rtlsim_exec import rtlsim_exec from finn.core.throughput_test import throughput_test_rtlsim from finn.custom_op.registry import getCustomOp from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount @@ -108,7 +109,11 @@ from finn.util.test import execute_parent def verify_step( - model: ModelWrapper, cfg: DataflowBuildConfig, step_name: str, need_parent: bool + model: ModelWrapper, + cfg: DataflowBuildConfig, + step_name: str, + need_parent: bool, + rtlsim_pre_hook=None, ): print("Running verification for " + step_name) verify_out_dir = cfg.output_dir + "/verification_output" @@ -131,7 +136,10 @@ def verify_step( inp_tensor_name = model.graph.input[0].name out_tensor_name = model.graph.output[0].name inp_dict = {inp_tensor_name: in_npy} - out_dict = execute_onnx(model, inp_dict, True) + if rtlsim_pre_hook is not None: + out_dict = rtlsim_exec(model, inp_dict, pre_hook=rtlsim_pre_hook) + else: + out_dict = execute_onnx(model, inp_dict, True) out_npy = out_dict[out_tensor_name] res = np.isclose(exp_out_npy, out_npy, atol=1e-3).all() res_to_str = {True: "SUCCESS", False: "FAIL"} @@ -282,9 +290,9 @@ def step_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): # doing this first causes all threshold layers to be standalone model = model.transform(to_hls.InferThresholdingLayer()) # needed for bipolar MatMul layers - model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferBinaryMatrixVectorActivation(mem_mode)) # needed for non-bipolar MatMul layers - model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode)) # TopK to LabelSelect model = model.transform(to_hls.InferLabelSelectLayer()) # input quantization (if any) as standalone threshold @@ -397,7 +405,7 @@ def step_generate_estimate_reports(model: ModelWrapper, cfg: DataflowBuildConfig model = model.transform(AnnotateCycles()) estimate_network_performance = model.analysis(dataflow_performance) # add some more metrics to estimated performance - n_clock_cycles_per_sec = (10 ** 9) / cfg.synth_clk_period_ns + n_clock_cycles_per_sec = (10**9) / cfg.synth_clk_period_ns est_fps = n_clock_cycles_per_sec / estimate_network_performance["max_cycles"] estimate_network_performance["estimated_throughput_fps"] = est_fps est_latency_ns = ( @@ -451,7 +459,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): InsertAndSetFIFODepths( cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period(), - vivado_ram_style=cfg.large_fifo_mem_style.value, + vivado_ram_style=cfg.large_fifo_mem_style, ) ) else: @@ -503,6 +511,7 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): cfg._resolve_fpga_part(), cfg.synth_clk_period_ns, vitis=cfg.stitched_ip_gen_dcp, + signature=cfg.signature, ) ) # TODO copy all ip sources into output dir? as zip? @@ -599,7 +608,7 @@ def step_out_of_context_synthesis(model: ModelWrapper, cfg: DataflowBuildConfig) estimate_network_performance = model.analysis(dataflow_performance) # add some more metrics to estimated performance - n_clock_cycles_per_sec = float(ooc_res_dict["fmax_mhz"]) * (10 ** 6) + n_clock_cycles_per_sec = float(ooc_res_dict["fmax_mhz"]) * (10**6) est_fps = n_clock_cycles_per_sec / estimate_network_performance["max_cycles"] ooc_res_dict["estimated_throughput_fps"] = est_fps with open(report_dir + "/ooc_synth_and_timing.json", "w") as f: @@ -644,7 +653,7 @@ def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig): VitisBuild( cfg._resolve_fpga_part(), cfg.synth_clk_period_ns, - cfg.vitis_platform, + cfg._resolve_vitis_platform(), strategy=cfg._resolve_vitis_opt_strategy(), enable_debug=cfg.enable_hw_debug, floorplan_file=cfg.vitis_floorplan_file, diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 50746d4834cb1e7b29979f1876da007425352e76..2437f48588275f33d1bf258c973ed28aeba800fa 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -28,6 +28,8 @@ from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch +from finn.custom_op.fpgadataflow.checksum import checksum +from finn.custom_op.fpgadataflow.concat import StreamingConcat from finn.custom_op.fpgadataflow.convolutioninputgenerator import ( ConvolutionInputGenerator, ) @@ -44,6 +46,7 @@ from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch from finn.custom_op.fpgadataflow.iodma import IODMA from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch from finn.custom_op.fpgadataflow.lookup import Lookup +from finn.custom_op.fpgadataflow.matrixvectoractivation import MatrixVectorActivation from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch from finn.custom_op.fpgadataflow.streamingdataflowpartition import ( StreamingDataflowPartition, @@ -51,15 +54,12 @@ from finn.custom_op.fpgadataflow.streamingdataflowpartition import ( from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import ( StreamingDataWidthConverter_Batch, ) -from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch -from finn.custom_op.fpgadataflow.vector_vector_activate_batch import ( - Vector_Vector_Activate_Batch, -) +from finn.custom_op.fpgadataflow.vectorvectoractivation import VectorVectorActivation custom_op = dict() @@ -67,7 +67,7 @@ custom_op = dict() # registered and plug in correctly into the infrastructure custom_op["DownSampler"] = DownSampler custom_op["StreamingMaxPool_Batch"] = StreamingMaxPool_Batch -custom_op["StreamingFCLayer_Batch"] = StreamingFCLayer_Batch +custom_op["MatrixVectorActivation"] = MatrixVectorActivation custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator custom_op["ConvolutionInputGenerator1D"] = ConvolutionInputGenerator1D custom_op["ConvolutionInputGenerator_rtl"] = ConvolutionInputGenerator_rtl @@ -81,9 +81,11 @@ custom_op["Thresholding_Batch"] = Thresholding_Batch custom_op["AddStreams_Batch"] = AddStreams_Batch custom_op["LabelSelect_Batch"] = LabelSelect_Batch custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch -custom_op["Vector_Vector_Activate_Batch"] = Vector_Vector_Activate_Batch +custom_op["VectorVectorActivation"] = VectorVectorActivation custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch custom_op["IODMA"] = IODMA custom_op["StreamingDataflowPartition"] = StreamingDataflowPartition custom_op["UpsampleNearestNeighbour_Batch"] = UpsampleNearestNeighbour_Batch custom_op["Lookup"] = Lookup +custom_op["StreamingConcat"] = StreamingConcat +custom_op["checksum"] = checksum diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py index fa80e47485eef4f289b0272fd73ac185bd1c2c5e..d1da1e0e524986332429079f79d36ae62f7cfd1e 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py @@ -56,7 +56,7 @@ class AddStreams_Batch(HLSCustomOp): my_attrs.update(super().get_nodeattr_types()) return my_attrs - def get_normal_input_shape(self): + def get_normal_input_shape(self, ind=0): ich = self.get_nodeattr("NumChannels") vecs = list(self.get_nodeattr("numInputVectors")) ishape = tuple(vecs + [ich]) @@ -166,7 +166,6 @@ class AddStreams_Batch(HLSCustomOp): exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -211,9 +210,8 @@ class AddStreams_Batch(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim did not produce expected folded output shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + context[node.output[0]].shape == exp_oshape + ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -340,14 +338,22 @@ class AddStreams_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=in1") - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=in1 name=in1_" + self.hls_sname() + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() - intf_names["s_axis"].append(("in1_V_V", self.get_instream_width_padded())) + sname = self.hls_sname() + swidth = self.get_instream_width_padded() + intf_names["s_axis"] = [(x + "_" + sname, swidth) for x in ["in0", "in1"]] return intf_names diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py index 4961f6148231252d255c1830ced418308032ce41..462b8b6e6ec845b75e3594460807ccc7f37bbe9e 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py @@ -350,13 +350,13 @@ class ChannelwiseOp_Batch(HLSCustomOp): # get desired function func = self.get_nodeattr("Func") if func == "cmp_le": - func_str = "comp::less_equal" + func_str = "comp::less_equal<%s, %s>" % (idt_hls, pdt_hls) elif func == "cmp_ge": - func_str = "std::greater_equal" + func_str = "comp::greater_equal<%s, %s>" % (idt_hls, pdt_hls) elif func == "add": - func_str = "std::plus" + func_str = "comp::add<%s, %s, %s>" % (odt_hls, odt_hls, odt_hls) elif func == "mul": - func_str = "std::multiplies" + func_str = "comp::mul<%s, %s, %s>" % (odt_hls, odt_hls, odt_hls) else: raise Exception( """Invalid value for attribute Func! Is currently set to: {} @@ -373,7 +373,7 @@ class ChannelwiseOp_Batch(HLSCustomOp): idt_hls, pdt_hls, odt_hls, - "%s<%s>" % (func_str, odt_hls), + func_str, ) ) f_params.write(parameters_hls_code) @@ -431,11 +431,8 @@ class ChannelwiseOp_Batch(HLSCustomOp): out = 2 * out - 1 context[node.output[0]] = out assert ( - context[node.output[0]].shape == self.get_folded_output_shape() + context[node.output[0]].shape == self.get_normal_output_shape() ), """Output shape is not as expected""" - # reshape output to have expected shape - oshape = self.get_normal_output_shape() - context[node.output[0]] = context[node.output[0]].reshape(*oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -514,18 +511,15 @@ class ChannelwiseOp_Batch(HLSCustomOp): # should ImgDim be defined or just filled in here like we do now? ishape = self.get_folded_input_shape() if len(ishape) == 3: - imgdim_h = 1 - imgdim_w = 1 + spatial_dim = 1 elif len(ishape) == 5: - imgdim_h = ishape[1] - imgdim_w = ishape[2] + spatial_dim = ishape[1] * ishape[2] else: raise Exception("""Unexpeted input shape""") self.code_gen_dict["$DOCOMPUTE$"] = [ - """Thresholding_Batch<{}, {}, NumChannels1, PE1, {}, {}> + """Thresholding_Batch<{}, NumChannels1, PE1, {}, {}> (in0, out, threshs, numReps);""".format( - imgdim_h, - imgdim_w, + spatial_dim, tmpl_args["TSrcI"], tmpl_args["TDstI"], ) @@ -574,8 +568,12 @@ class ChannelwiseOp_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) diff --git a/src/finn/custom_op/fpgadataflow/checksum.py b/src/finn/custom_op/fpgadataflow/checksum.py new file mode 100644 index 0000000000000000000000000000000000000000..59d26fdce83d7a3009606da0fd00c84f03110622 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/checksum.py @@ -0,0 +1,325 @@ +# Copyright (c) 2022, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os +import warnings + +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class checksum(HLSCustomOp): + """Class that corresponds to custom_hls checksum function.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + # number of data words in a frame + "words_per_frame": ("i", True, 0), + # subword count per data word + "items_per_word": ("i", True, 0), + # FINN DataTypes for input + "inputDataType": ("s", True, ""), + # folded shape of input/output + "folded_shape": ("ints", True, []), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def make_shape_compatible_op(self, model): + oshape = self.get_normal_output_shape() + return super().make_const_shape_op(oshape) + + def infer_node_datatype(self, model): + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype().name), + str(idt.name), + ) + warnings.warn(warn_str) + self.set_nodeattr("inputDataType", idt.name) + # set output datatype from property + odt = self.get_output_datatype() + model.set_tensor_datatype(node.output[0], odt) + + def verify_node(self): + pass + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self): + """Returns FINN DataType of output.""" + # here same as input data type + return DataType[self.get_nodeattr("inputDataType")] + + def get_instream_width(self): + dtype = DataType[self.get_nodeattr("inputDataType")] + folded_shape = self.get_nodeattr("folded_shape") + in_width = folded_shape[-1] * dtype.bitwidth() + return in_width + + def get_outstream_width(self): + return self.get_instream_width() + + def get_folded_input_shape(self): + return self.get_nodeattr("folded_shape") + + def get_folded_output_shape(self): + return self.get_nodeattr("folded_shape") + + def get_normal_input_shape(self): + # derive normal shape from folded shape + # checksum nodes are inserted in between fpgadataflow nodes + # the folded shape could be for example (1, nf, pe) + # with nf (neuron folding): mh // pe + # the normal input shape is in this case (1, mh) + # so to achieve this the two inner dimensions are multiplied + # and together with all previous dimensions + # this gives the normal input shape + + folded_shape = self.get_nodeattr("folded_shape") + # extract inner dimension + inner_dim = folded_shape[-1] + # multiply with the next inner dimension + folding_factor = folded_shape[-2] * inner_dim + normal_ishape = [] + # create the normal_ishape + for i in range(len(folded_shape) - 2): + normal_ishape.append(folded_shape[i]) + normal_ishape.append(folding_factor) + + return normal_ishape + + def get_ap_int_max_w(self): + return max(super().get_ap_int_max_w(), 32) + + def get_normal_output_shape(self): + # same shape as input + return self.get_normal_input_shape() + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def npy_to_dynamic_output(self, context): + super().npy_to_dynamic_output(context) + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + output_checksum = np.load("{}/output_checksum.npy".format(code_gen_dir)) + context[node.output[1]] = output_checksum + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + inp = context[node.input[0]] + + # TODO ensure codegen dir exists + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + if mode == "cppsim": + self.dynamic_input_to_npy(context, 1) + self.exec_precompiled_singlenode_model() + self.npy_to_dynamic_output(context) + elif mode == "rtlsim": + # create a npy file for the input of the node + assert ( + str(inp.dtype) == "float32" + ), """Input datatype is + not float32 as expected.""" + expected_inp_shape = self.get_folded_input_shape() + reshaped_input = inp.reshape(expected_inp_shape) + if DataType[self.get_nodeattr("inputDataType")] == DataType["BIPOLAR"]: + # store bipolar activations as binary + reshaped_input = (reshaped_input + 1) / 2 + export_idt = DataType["BINARY"] + else: + export_idt = DataType[self.get_nodeattr("inputDataType")] + # make copy before saving the array + reshaped_input = reshaped_input.copy() + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + sim = self.get_rtlsim() + nbits = self.get_instream_width() + inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + + # load and reshape output + output = np.load(out_npy_path) + oshape = self.get_normal_output_shape() + output = np.asarray([output], dtype=np.float32).reshape(*oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "checksum.hpp"'] + + def defines(self, var): + items_per_word = self.get_nodeattr("items_per_word") + words_per_frame = self.get_nodeattr("words_per_frame") + word_size = self.get_instream_width() + my_defines = [] + my_defines.append("#define WORDS_PER_FRAME {}".format(words_per_frame)) + my_defines.append("#define ITEMS_PER_WORD {}".format(items_per_word)) + my_defines.append("#define WORD_SIZE {}".format(word_size)) + self.code_gen_dict["$DEFINES$"] = my_defines + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + # note: the innermost dim is reversed for the input + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append("ap_uint<32> chk;") + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [ + """checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(in0, out, chk);""" + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType["BIPOLAR"]: + # use binary for bipolar storage + dtype = DataType["BINARY"] + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + shape = tuple(self.get_folded_output_shape()) + shape_cpp_str = str(shape).replace("(", "{").replace(")", "}") + + # note: the innermost dim is not reversed for the output + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + shape_cpp_str, + npy_out, + ), + "std::vector<unsigned int> checksum(1);", + "checksum[0] = chk;", + 'cnpy::npy_save("%s/output_checksum.npy",&checksum[0],{1},"w");' + % code_gen_dir, + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + """using T = ap_uint<WORD_SIZE>;\n void {}(hls::stream<T> &in0, + hls::stream<T> &out, ap_uint<32> &chk)""".format( + self.onnx_node.name + ) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS interface axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS interface axis port=out name=out_" + self.hls_sname() + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS interface s_axilite port=chk bundle=checksum" + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS interface ap_ctrl_none port=return" + ) + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS dataflow") + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + # expose axilite interface + intf_names["axilite"] = ["s_axi_checksum"] + return intf_names diff --git a/src/finn/custom_op/fpgadataflow/concat.py b/src/finn/custom_op/fpgadataflow/concat.py new file mode 100644 index 0000000000000000000000000000000000000000..ee8a2c323238c4e4f91b76c91d1445c69e3cdaa0 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/concat.py @@ -0,0 +1,376 @@ +# Copyright (c) 2021, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os + +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.util.basic import roundup_to_integer_multiple +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class StreamingConcat(HLSCustomOp): + """Streaming concatenation node with dynamically generated HLS. + Only supports concatenating along the last axis.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + # number of elements from each stream to concat + "ElemsPerStream": ("ints", True, []), + # FINN DataTypes for inputs; output datatype inferred from input + "inputDataType": ("s", True, ""), + # number of input vectors for non-concat axes, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_n_inputs(self): + return len(self.get_nodeattr("ElemsPerStream")) + + def get_total_elems(self): + elems_per_stream = self.get_nodeattr("ElemsPerStream") + return int(np.sum(elems_per_stream)) + + def get_normal_input_shape(self, ind=0): + elems_per_stream = self.get_nodeattr("ElemsPerStream") + elems = elems_per_stream[ind] + vecs = list(self.get_nodeattr("numInputVectors")) + ishape = tuple(vecs + [elems]) + return ishape + + def get_folded_input_shape(self, ind=0): + return self.get_normal_input_shape(ind) + + def get_normal_output_shape(self): + total_elems = self.get_total_elems() + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [total_elems]) + + def get_folded_output_shape(self): + return self.get_normal_output_shape() + + def make_shape_compatible_op(self, model): + # check all input shapes + for i, inp in enumerate(self.onnx_node.input): + exp_ishape = self.get_normal_input_shape(i) + ishape = tuple(model.get_tensor_shape(inp)) + assert ishape == exp_ishape, "Unexpected shape for " + inp + oshape = self.get_normal_output_shape() + return super().make_const_shape_op(oshape) + + def infer_node_datatype(self, model): + # check all input datatypes + for i, inp in enumerate(self.onnx_node.input): + idt = model.get_tensor_datatype(inp) + assert idt == self.get_input_datatype() + odt = self.get_output_datatype() + model.set_tensor_datatype(self.onnx_node.output[0], odt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + # input dt identical for all inputs + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self): + return self.get_input_datatype() + + def get_instream_width(self, ind=0): + elems_per_stream = self.get_nodeattr("ElemsPerStream") + elems = elems_per_stream[ind] + ibits = self.get_input_datatype().bitwidth() + return elems * ibits + + def get_outstream_width(self): + obits = self.get_output_datatype().bitwidth() + total_elems = self.get_total_elems() + out_width = total_elems * obits + return out_width + + def get_number_output_values(self): + return np.prod(self.get_folded_output_shape()[:-1]) + + def get_exp_cycles(self): + return np.prod(self.get_folded_output_shape()[:-1]) + + def generate_params(self, model, path): + elems_per_stream = self.get_nodeattr("ElemsPerStream") + inp_streams = [] + commands = [] + idt = self.get_input_datatype() + total_elems = self.get_total_elems() + total_bw = idt.bitwidth() * total_elems + for (i, elems) in enumerate(elems_per_stream): + bw = idt.bitwidth() * elems + inp_stream = "hls::stream<ap_uint<%d> > &in%d" % (bw, i) + inp_streams.append(inp_stream) + cmd = "in%d.read()" % i + commands.append(cmd) + out_stream = "hls::stream<ap_uint<%d> > &out" % (total_bw) + inp_streams.append(out_stream) + + impl_hls_code = [] + impl_hls_code.append("void StreamingConcat(") + impl_hls_code.append(",".join(inp_streams)) + impl_hls_code.append(", unsigned int numReps) {") + impl_hls_code.append("for(unsigned int i = 0; i < numReps; i++) {") + impl_hls_code.append("#pragma HLS PIPELINE II=1") + impl_hls_code.append("ap_uint<%d> out_elem;" % total_bw) + # FIXME: the order of streams for concatenation works out differently + # for cppsim vs rtlsim, addressed via reversing the order of commands + # for now + impl_hls_code.append("#ifdef __SYNTHESIS__") + impl_hls_code.append("out_elem = (" + ",".join(commands[::-1]) + ");") + impl_hls_code.append("#else") + impl_hls_code.append("out_elem = (" + ",".join(commands) + ");") + impl_hls_code.append("#endif") + impl_hls_code.append("out.write(out_elem);") + impl_hls_code.append("}") + impl_hls_code.append("}") + impl_hls_code = "\n".join(impl_hls_code) + + impl_filename = "{}/concat_impl.hpp".format(path) + f_impl = open(impl_filename, "w") + f_impl.write(impl_hls_code) + f_impl.close() + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + n_inps = len(self.onnx_node.input) + ishapes = [self.get_normal_input_shape(x) for x in range(n_inps)] + folded_ishapes = [self.get_folded_input_shape(x) for x in range(n_inps)] + exp_oshape = self.get_normal_output_shape() + folded_oshape = self.get_folded_output_shape() + export_idt = self.get_input_datatype() + + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + for i in range(n_inps): + inp = context[node.input[i]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert inp.shape == ishapes[i], "Input shape mismatch for " + node.input[i] + # reshape input into folded form + inp = inp.reshape(folded_ishapes[i]) + # make copy before saving array + reshaped_input = inp.copy() + np.save(os.path.join(code_gen_dir, "input_%d.npy" % i), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + assert ( + context[node.output[0]].shape == folded_oshape + ), "cppsim did not produce expected folded output shape" + context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + io_dict = {"inputs": {}, "outputs": {"out": []}} + for i in range(n_inps): + nbits = self.get_instream_width(i) + rtlsim_inp = npy_to_rtlsim_input( + "%s/input_%d.npy" % (code_gen_dir, i), + export_idt, + nbits, + reverse_inner=True, + ) + io_dict["inputs"]["in%d" % i] = rtlsim_inp + super().reset_rtlsim(sim) + super().toggle_clk(sim) + + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, + out_npy_path, + odt, + out_shape, + packed_bits, + target_bits, + reverse_inner=True, + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output shape doesn't match expected shape.""" + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "concat_impl.hpp"'] + + def defines(self, var): + num_reps = self.get_nodeattr("numInputVectors") + num_reps = np.prod(num_reps) + self.code_gen_dict["$DEFINES$"] = ["#define NumReps %d" % num_reps] + + def read_npy_data(self): + n_inputs = self.get_n_inputs() + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + npy_type = "float" + self.code_gen_dict["$READNPYDATA$"] = [] + idt = self.get_input_datatype() + idt_bw = idt.bitwidth() + elem_hls_type = idt.get_hls_datatype_str() + elem_bits = idt_bw + for i in range(n_inputs): + packed_bits = self.get_instream_width(i) + packed_hls_type = "ap_uint<%d>" % packed_bits + npy_in = "%s/input_%d.npy" % (code_gen_dir, i) + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in%d);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in, i) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + n_inputs = self.get_n_inputs() + for i in range(n_inputs): + packed_bits = self.get_instream_width(i) + packed_hls_type = "ap_uint<%d>" % packed_bits + stream_name = "in%d" % i + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<%s> %s ("%s");' + % (packed_hls_type, stream_name, stream_name) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [] + n_inputs = self.get_n_inputs() + in_stream_names = ["in%d" % x for x in range(n_inputs)] + in_stream_names = ",".join(in_stream_names) + comp_call = "StreamingConcat(%s, out, NumReps);" % (in_stream_names) + self.code_gen_dict["$DOCOMPUTE$"] = [comp_call] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + oshape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + n_inputs = self.get_n_inputs() + in_streams = [] + for i in range(n_inputs): + iwidth = self.get_instream_width(i) + in_streams.append("hls::stream<ap_uint<%d>> &in%d" % (iwidth, i)) + in_streams = ",".join(in_streams) + total_width = self.get_input_datatype().bitwidth() * self.get_total_elems() + out_stream = "hls::stream<ap_uint<%d>> &out" % (total_width) + blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_streams, out_stream) + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [blackbox_hls] + + def pragmas(self): + n_inputs = self.get_n_inputs() + pragmas = [] + for i in range(n_inputs): + pragmas.append( + "#pragma HLS INTERFACE axis port=in%d name=in%d_%s" + % (i, i, self.hls_sname()) + ) + self.code_gen_dict["$PRAGMAS$"] = pragmas + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) + + def get_instream_width_padded(self, ind=0): + in_width = self.get_instream_width(ind) + return roundup_to_integer_multiple(in_width, 8) + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + n_inputs = self.get_n_inputs() + sname = self.hls_sname() + intf_names["s_axis"] = [] + for i in range(n_inputs): + intf_names["s_axis"].append( + ("in%d_%s" % (i, sname), self.get_instream_width_padded(i)) + ) + return intf_names diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index a4018836846257c15ad203b1cef54c03cd081e45..150c3b7198d139c29a342460bab499c73bb84196 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -286,7 +286,6 @@ class ConvolutionInputGenerator(HLSCustomOp): exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() # TODO ensure codegen dir exists if mode == "cppsim": @@ -325,10 +324,9 @@ class ConvolutionInputGenerator(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape + context[node.output[0]].shape == exp_oshape ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -489,8 +487,12 @@ class ConvolutionInputGenerator(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py index e43d73b1cd3ec7902fc743bfdf4d2fcad1c01dfe..b25246f1eaf73e14836bb6d00a5704f8bd3ce892 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py @@ -29,6 +29,7 @@ import math import numpy as np import os +import warnings from finn.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp @@ -85,6 +86,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp): "distributed", {"auto", "block", "distributed", "ultra"}, ), + "parallel_window": ("i", False, 0, {0, 1}), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -181,18 +183,36 @@ class ConvolutionInputGenerator1D(HLSCustomOp): num_output_elems = np.prod(folded_oshape[:-1]) return num_output_elems + def get_swu_variant(self): + # checks which variant of the 1D ConvolutionInputGenerator (SWU) can be used + # We have 5 variants: ConvolutionInputGenerator_1D_parallel, + # ConvolutionInputGenerator_1D_dws_naive, ConvolutionInputGenerator_1D, + # ConvolutioninputGenerator_1D_dws, ConvolutionInputGenerator_1D_dws_stride + is_dws = self.get_nodeattr("depthwise") + is_strided = np.prod(self.get_nodeattr("Stride")) > 1 + is_stride_2 = np.prod(self.get_nodeattr("Stride")) == 2 + is_dilated = np.prod(self.get_nodeattr("Dilation")) > 1 + if self.use_parallel_window_output(): + return "ConvolutionInputGenerator_1D_parallel" + if not is_dws: + return "ConvolutionInputGenerator_1D" + if is_dws: + if (is_strided and not is_stride_2) or (is_dilated): + return "ConvolutionInputGenerator_1D_dws_naive" + elif is_stride_2: + return "ConvolutionInputGenerator_1D_dws_stride" + else: + return "ConvolutionInputGenerator_1D_dws" + def get_1d_conv_attrs_normalized(self): # support both (1, D) and (D, 1) cases transparently: # For the kernel, presenting the input data of size D as # [H, W] = [Y, X] = [1, D] or [D, 1] - # effectively gives the same result. Because the - # ConvolutionInputGenerator_NonSquare_Dilated(_dws) kernel currently only - # supports dilation>1 along the X-axis and the - # ConvolutionInputGenerator_NonSquare only works for stride>1 along the - # X-axis, we are working with the following assumption: - # the dummy ('1') dimension is the Y-dimension, i.e. - # images and kernels (and their attributes) of dimension - # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D] + # effectively gives the same result. + # For consistency and ease of programming, this function + # returns the attributes of the layer as follows: + # [H, W] = [Y, X] = [1, D] or [D, 1] are always mapped to [1, D]. + # The dummy ('1') dimension is the Y-dimension. ifm_ch = self.get_nodeattr("IFMChannels") k = self.get_nodeattr("ConvKernelDim") ifm_dim = self.get_nodeattr("IFMDim") @@ -217,56 +237,94 @@ class ConvolutionInputGenerator1D(HLSCustomOp): dilation = self.get_nodeattr("Dilation") stride_h, stride_w = stride dilation_h, dilation_w = dilation + ram_style = self.get_nodeattr("ram_style") - if self.get_nodeattr("SIMD") == self.get_nodeattr("IFMChannels"): - if self.get_nodeattr("depthwise") == 0: - if stride_h == 1 and stride_w == 1: - if dilation_h == 1 and dilation_w == 1: - return True - - return False + fully_unfolded = self.get_nodeattr("SIMD") == self.get_nodeattr("IFMChannels") + non_dws = self.get_nodeattr("depthwise") == 0 + no_stride = stride_h == 1 and stride_w == 1 + no_dilation = dilation_h == 1 and dilation_w == 1 + supported_ram_style = ram_style in ["auto", "distributed"] + if self.get_nodeattr("parallel_window") == 1: + if ( + fully_unfolded + and non_dws + and no_stride + and no_dilation + and supported_ram_style + ): + return True + else: + warnings.warn( + "{}: Parallel window output variant is not supported for this node,\ + please inspect requirements in use_parallel_window_output method\ + of the custom_op".format( + self.onnx_node.name + ) + ) + return False + else: + return False def get_exp_cycles(self): simd = self.get_nodeattr("SIMD") ( ifm_ch, - ifm_dim, - ofm_dim, - k, - stride, - dilation, + [ifm_dim_h, ifm_dim_w], + [ofm_dim_h, ofm_dim_w], + [k_h, k_w], + [stride_h, stride_w], + [dilation_h, dilation_w], ) = self.get_1d_conv_attrs_normalized() - ifm_dim_h, ifm_dim_w = ifm_dim - ofm_dim_h, ofm_dim_w = ofm_dim - k_h, k_w = k - stride_h, stride_w = stride - dilation_h, dilation_w = dilation # since mmv != 1 is not supported yet, we set mmv for now to 1 - mmv = 1 + # mmv = 1 # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h - if self.use_parallel_window_output(): + swu_variant = self.get_swu_variant() + if swu_variant == "ConvolutionInputGenerator_1D_parallel": exp_cycles = k_w + ofm_dim_w - else: - cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv - cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd) - max_cycles = max(cycles_write_block, cycles_read_block) + elif swu_variant == "ConvolutionInputGenerator_1D": + exp_cycles = 1 + ofm_dim_w * k_w * ifm_ch / simd + elif swu_variant in [ + "ConvolutionInputGenerator_1D_dws", + "ConvolutionInputGenerator_1D_dws_stride", + ]: exp_cycles = ( - ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles + 1 + + ofm_dim_w * k_w * ifm_ch / simd + + (ifm_ch / simd) * (k_w - 1) + - (k_w - 1) ) + elif swu_variant == "ConvolutionInputGenerator_1D_dws_naive": + cycles_read_block = ifm_dim_w * ifm_ch / simd + cycles_write_block = ofm_dim_w * k_w * ifm_ch / simd + exp_cycles = cycles_read_block + cycles_write_block return int(exp_cycles) def bram_estimation(self): - # NOTE: not tested for correctness simd = self.get_nodeattr("SIMD") - ifm_ch = self.get_nodeattr("IFMChannels") - ifm_dim = np.prod(self.get_nodeattr("IFMDim")) - k = np.prod(self.get_nodeattr("ConvKernelDim")) - stride = np.prod(self.get_nodeattr("Stride")) + ( + ifm_ch, + [ifm_dim_h, ifm_dim_w], + [ofm_dim_h, ofm_dim_w], + [k_h, k_w], + [stride_h, stride_w], + [dilation_h, dilation_w], + ) = self.get_1d_conv_attrs_normalized() ram_style = self.get_nodeattr("ram_style") + swu_variant = self.get_swu_variant() + if swu_variant == "ConvolutionInputGenerator_1D_parallel": + return 0 if ram_style == "block" or ram_style == "auto": - ram_depth = ifm_dim * ifm_ch / simd + if swu_variant == "ConvolutionInputGenerator_1D": + ram_depth = (k_w - 1) * ifm_ch / simd + elif swu_variant == "ConvolutionInputGenerator_1D_dws_naive": + ram_depth = ifm_dim_w * ifm_ch / simd + elif swu_variant in [ + "ConvolutionInputGenerator_1D_dws", + "ConvolutionInputGenerator_1D_dws_stride", + ]: + ram_depth = k_w * ifm_ch / simd if ram_depth <= 512: ram_width = 36 elif ram_depth <= 1024: @@ -279,53 +337,80 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ram_width = 2 else: ram_width = 1 - return int( - (k + stride) - * ( - math.ceil(simd * self.get_input_datatype().bitwidth() / ram_width) - * math.ceil(ifm_dim * ifm_ch / simd / ram_depth) - ) + width_mul = math.ceil( + simd * self.get_input_datatype().bitwidth() / ram_width ) + depth_mul = math.ceil(ram_depth / 18432) + return width_mul * depth_mul else: return 0 def lut_estimation(self): - # NOTE: not tested for correctness simd = self.get_nodeattr("SIMD") - ifm_ch = self.get_nodeattr("IFMChannels") - ifm_dim = np.prod(self.get_nodeattr("IFMDim")) - k = np.prod(self.get_nodeattr("ConvKernelDim")) - stride = np.prod(self.get_nodeattr("Stride")) + ( + ifm_ch, + [ifm_dim_h, ifm_dim_w], + [ofm_dim_h, ofm_dim_w], + [k_h, k_w], + [stride_h, stride_w], + [dilation_h, dilation_w], + ) = self.get_1d_conv_attrs_normalized() ram_style = self.get_nodeattr("ram_style") - if ram_style == "distributed": - ram_luts = int( - (k + stride) - * ( - simd - * self.get_input_datatype().bitwidth() - * math.ceil(ifm_dim * ifm_ch / simd / 64) - ) + swu_variant = self.get_swu_variant() + if swu_variant == "ConvolutionInputGenerator_1D_parallel": + ram_luts = math.ceil( + simd * self.get_input_datatype().bitwidth() * (k_w + 1) / 64 ) + elif ram_style == "distributed": + if swu_variant == "ConvolutionInputGenerator_1D": + ram_luts = math.ceil( + self.get_input_datatype().bitwidth() * (k_w - 1) * ifm_ch / 64 + ) + elif swu_variant == "ConvolutionInputGenerator_1D_dws_naive": + ram_luts = math.ceil( + self.get_input_datatype().bitwidth() * ifm_dim_w * ifm_ch / 64 + ) + elif swu_variant in [ + "ConvolutionInputGenerator_1D_dws", + "ConvolutionInputGenerator_1D_dws_stride", + ]: + ram_luts = math.ceil( + self.get_input_datatype().bitwidth() * k_w * ifm_ch / 64 + ) else: ram_luts = 0 return 300 + ram_luts def uram_estimation(self): - # NOTE: not tested for correctness simd = self.get_nodeattr("SIMD") - ifm_ch = self.get_nodeattr("IFMChannels") - ifm_dim = np.prod(self.get_nodeattr("IFMDim")) - k = np.prod(self.get_nodeattr("ConvKernelDim")) - stride = np.prod(self.get_nodeattr("Stride")) + ( + ifm_ch, + [ifm_dim_h, ifm_dim_w], + [ofm_dim_h, ofm_dim_w], + [k_h, k_w], + [stride_h, stride_w], + [dilation_h, dilation_w], + ) = self.get_1d_conv_attrs_normalized() ram_style = self.get_nodeattr("ram_style") - if ram_style == "ultra": - return int( - (k + stride) - * ( - math.ceil(simd * self.get_input_datatype().bitwidth() / 64) - * math.ceil(ifm_dim * ifm_ch / simd / 4096) - ) - ) + swu_variant = self.get_swu_variant() + if swu_variant == "ConvolutionInputGenerator_1D_parallel": + return 0 + elif ram_style == "ultra": + if swu_variant == "ConvolutionInputGenerator_1D": + width_mul = math.ceil(simd * self.get_input_datatype().bitwidth() / 72) + depth_mul = math.ceil((k_w - 1) * ifm_ch / simd / 4096) + return width_mul * depth_mul + elif swu_variant == "ConvolutionInputGenerator_1D_dws_naive": + width_mul = math.ceil(simd * self.get_input_datatype().bitwidth() / 72) + depth_mul = math.ceil(ifm_dim_w * ifm_ch / simd / 4096) + return width_mul * depth_mul + elif swu_variant in [ + "ConvolutionInputGenerator_1D_dws", + "ConvolutionInputGenerator_1D_dws_stride", + ]: + width_mul = math.ceil(simd * self.get_input_datatype().bitwidth() / 72) + depth_mul = math.ceil(k_w * ifm_ch / simd / 4096) + return width_mul * depth_mul else: return 0 @@ -335,7 +420,6 @@ class ConvolutionInputGenerator1D(HLSCustomOp): exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() # TODO ensure codegen dir exists if mode == "cppsim": @@ -374,10 +458,9 @@ class ConvolutionInputGenerator1D(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape + context[node.output[0]].shape == exp_oshape ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -423,89 +506,83 @@ class ConvolutionInputGenerator1D(HLSCustomOp): numReps = 1 ( ifm_ch, - ifm_dim, - ofm_dim, - k, - stride, - dilation, + [ifm_dim_h, ifm_dim_w], + [ofm_dim_h, ofm_dim_w], + [k_h, k_w], + [stride_h, stride_w], + [dilation_h, dilation_w], ) = self.get_1d_conv_attrs_normalized() simd = self.get_nodeattr("SIMD") ifm_precision = self.get_input_datatype().bitwidth() - ifm_dim_y, ifm_dim_x = ifm_dim - ofm_dim_y, ofm_dim_x = ofm_dim - k_y, k_x = k - dilation_y, dilation_x = dilation - # For a 1d convolution with stride=[S,1] or [1,S], the finn-hlslib function - # of ConvInpGen must be created with [stride_y, stride_x] = [S, S]. - # TODO: changes in finn-hlslib (slidingwindow.h) - stride_y = np.prod(stride) - stride_x = np.prod(stride) - - if dilation_x > 1: - assert ( - dilation_y == 1 - ), "Dilation value greater than 1 along y-axis is not yet supported" + swu_variant = self.get_swu_variant() + + if swu_variant in [ + "ConvolutionInputGenerator_1D_parallel", + "ConvolutionInputGenerator_1D", + "ConvolutionInputGenerator_1D_dws_stride", + ]: self.code_gen_dict["$DEFINES$"] = [ """ #define ConvKernelDim1_x {}\n - #define ConvKernelDim1_y {}\n #define IFMChannels1 {}\n #define Input_precision1 {}\n #define IFMDim1_x {}\n - #define IFMDim1_y {}\n #define OFMDim1_x {}\n - #define OFMDim1_y {}\n - #define SIMD1 {}\n #define Stride1_x {}\n - #define Stride1_y {}\n - #define Dilation1_x {}\n - #define Dilation1_y {}\n + #define SIMD1 {}\n #define numReps {} """.format( - k_x, - k_y, + k_w, ifm_ch, ifm_precision, - ifm_dim_x, - ifm_dim_y, - ofm_dim_x, - ofm_dim_y, + ifm_dim_w, + ofm_dim_w, + stride_w, simd, - stride_x, - stride_y, - dilation_x, - dilation_y, numReps, ) ] - else: - ofm_dim = self.get_nodeattr("OFMDim") + if swu_variant == "ConvolutionInputGenerator_1D_dws": self.code_gen_dict["$DEFINES$"] = [ """ #define ConvKernelDim1_x {}\n - #define ConvKernelDim1_y {}\n #define IFMChannels1 {}\n #define Input_precision1 {}\n #define IFMDim1_x {}\n - #define IFMDim1_y {}\n #define OFMDim1_x {}\n - #define OFMDim1_y {}\n #define SIMD1 {}\n + #define numReps {} + """.format( + k_w, + ifm_ch, + ifm_precision, + ifm_dim_w, + ofm_dim_w, + simd, + numReps, + ) + ] + if swu_variant == "ConvolutionInputGenerator_1D_dws_naive": + self.code_gen_dict["$DEFINES$"] = [ + """ + #define ConvKernelDim1_x {}\n + #define IFMChannels1 {}\n + #define Input_precision1 {}\n + #define IFMDim1_x {}\n + #define OFMDim1_x {}\n #define Stride1_x {}\n - #define Stride1_y {}\n + #define Dilation1_x {}\n + #define SIMD1 {}\n #define numReps {} """.format( - k_x, - k_y, + k_w, ifm_ch, ifm_precision, - ifm_dim_x, - ifm_dim_y, - ofm_dim_x, - ofm_dim_y, + ifm_dim_w, + ofm_dim_w, + stride_w, + dilation_w, simd, - stride_x, - stride_y, numReps, ) ] @@ -546,49 +623,49 @@ class ConvolutionInputGenerator1D(HLSCustomOp): "ultra": "ap_resource_uram()", } hls_ram_style = map_to_hls_ram_style[ram_style] + swu_variant = self.get_swu_variant() # check which ConvolutionInputGenerator is needed - if self.use_parallel_window_output(): - hls_call = "ConvolutionInputGenerator_1D_parallel" + if swu_variant == "ConvolutionInputGenerator_1D_parallel": self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, - IFMDim1_x, OFMDim1_x, SIMD1, Stride1_x> + IFMDim1_x, OFMDim1_x, Stride1_x, SIMD1> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style + swu_variant, hls_ram_style + ) + ] + if swu_variant == "ConvolutionInputGenerator_1D": + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, + IFMDim1_x, OFMDim1_x, Stride1_x, SIMD1> + (in0, out, numReps, {});""".format( + swu_variant, hls_ram_style + ) + ] + if swu_variant == "ConvolutionInputGenerator_1D_dws": + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, + IFMDim1_x, OFMDim1_x, SIMD1> + (in0, out, numReps, {});""".format( + swu_variant, hls_ram_style + ) + ] + if swu_variant == "ConvolutionInputGenerator_1D_dws_stride": + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, + IFMDim1_x, OFMDim1_x, Stride1_x, SIMD1> + (in0, out, numReps, {});""".format( + swu_variant, hls_ram_style + ) + ] + if swu_variant == "ConvolutionInputGenerator_1D_dws_naive": + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, + IFMDim1_x, OFMDim1_x, Stride1_x, Dilation1_x, SIMD1> + (in0, out, numReps, {});""".format( + swu_variant, hls_ram_style ) ] - else: - hls_call = "ConvolutionInputGenerator_NonSquare" - dilation_h, dilation_w = self.get_nodeattr("Dilation") - if dilation_h > 1 or dilation_w > 1: - hls_call += "_Dilated" - if self.get_nodeattr("depthwise") == 1: - hls_call += "_dws" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y, Dilation1_x, Dilation1_y> - (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] - elif self.get_nodeattr("depthwise") == 1: - hls_call += "_dws" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] - else: - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -646,8 +723,12 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py index 124b3e4645caa63a2590d91c58f430f8d56bb6a0..aa3bad9e41f78c3d6ae4bcd23d99bb7c4c72800c 100644 --- a/src/finn/custom_op/fpgadataflow/downsampler.py +++ b/src/finn/custom_op/fpgadataflow/downsampler.py @@ -248,8 +248,12 @@ class DownSampler(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) @@ -260,7 +264,6 @@ class DownSampler(HLSCustomOp): exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -291,9 +294,8 @@ class DownSampler(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim did not produce expected folded output shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + context[node.output[0]].shape == exp_oshape + ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py index 3b0fa55b0065e6ceeb8ad2eb7282a413adf443d7..fb15b260e6bdfd57f42e0e4659a1536bb716b526 100644 --- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py @@ -29,7 +29,6 @@ import numpy as np import os import warnings -from onnx import TensorProto, helper from finn.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp @@ -46,6 +45,8 @@ class DuplicateStreams_Batch(HLSCustomOp): my_attrs = { "NumChannels": ("i", True, 0), "PE": ("i", True, 0), + # how many duplicated output streams to create + "NumOutputStreams": ("i", True, 0), # FINN DataTypes for input "inputDataType": ("s", True, ""), # number of input vectors, examples: @@ -57,6 +58,9 @@ class DuplicateStreams_Batch(HLSCustomOp): my_attrs.update(super().get_nodeattr_types()) return my_attrs + def get_num_output_streams(self): + return self.get_nodeattr("NumOutputStreams") + def get_normal_input_shape(self): ch = self.get_nodeattr("NumChannels") vecs = list(self.get_nodeattr("numInputVectors")) @@ -72,36 +76,27 @@ class DuplicateStreams_Batch(HLSCustomOp): folded_ishape = tuple(vecs + [folds, pe]) return folded_ishape - def get_normal_output_shape(self): + def get_normal_output_shape(self, ind=0): + # since the output shape of both out streams are the same + # return independently from index return self.get_normal_input_shape() - def get_folded_output_shape(self): + def get_folded_output_shape(self, ind=0): + # since the output shape of both out streams are the same + # return independently from index return self.get_folded_input_shape() def make_shape_compatible_op(self, model): exp_ishape = self.get_normal_input_shape() ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) assert ishape == exp_ishape, "Unexpected input shape." + num_out = self.get_num_output_streams() + assert len(self.onnx_node.output) == num_out, "Unexpected number of outputs" oshape = self.get_normal_output_shape() - values = np.zeros(oshape).astype(np.float32) - split_input = np.concatenate((values, values), axis=0) - - split_in = helper.make_tensor_value_info( - model.make_new_valueinfo_name(), TensorProto.FLOAT, oshape - ) - - model.graph.value_info.append(split_in) # requires clean up - model.set_initializer(split_in.name, split_input) - - shape_comp_node = helper.make_node( - "Split", - inputs=[split_in.name], - outputs=[self.onnx_node.output[0], self.onnx_node.output[1]], - axis=0, - ) - - return shape_comp_node + ret = super().make_const_shape_op(oshape) + ret.output[:] = self.onnx_node.output + return ret def infer_node_datatype(self, model): node = self.onnx_node @@ -115,8 +110,8 @@ class DuplicateStreams_Batch(HLSCustomOp): warnings.warn(warn_str) self.set_nodeattr("inputDataType", idt.name) odt = self.get_output_datatype() - model.set_tensor_datatype(self.onnx_node.output[0], odt) - model.set_tensor_datatype(self.onnx_node.output[1], odt) + for my_out in self.onnx_node.output: + model.set_tensor_datatype(my_out, odt) def verify_node(self): info_messages = [] @@ -133,6 +128,7 @@ class DuplicateStreams_Batch(HLSCustomOp): self.get_nodeattr("executable_path") self.get_nodeattr("NumChannels") self.get_nodeattr("PE") + self.get_nodeattr("NumOutputStreams") self.get_nodeattr("inputDataType") info_messages.append("All necessary attributes exist") except Exception: @@ -165,19 +161,53 @@ class DuplicateStreams_Batch(HLSCustomOp): return out_width def get_number_output_values(self): - return 2 * np.prod(self.get_folded_output_shape()[1:-1]) + return self.get_num_output_streams() * np.prod( + self.get_folded_output_shape()[1:-1] + ) def get_exp_cycles(self): # Channels/PE * batch size * fmdim * fmdim return np.prod(self.get_folded_output_shape()[:-1]) + def generate_params(self, model, path): + n_outputs = self.get_num_output_streams() + inp_streams = [] + commands = [] + o_stream_w = self.get_outstream_width() + i_stream_w = self.get_instream_width() + in_stream = "hls::stream<ap_uint<%d> > &in0" % (i_stream_w) + inp_streams.append(in_stream) + commands.append("ap_uint<%d> e = in0.read();" % i_stream_w) + iters = self.get_number_output_values() // self.get_num_output_streams() + for i in range(n_outputs): + out_stream = "hls::stream<ap_uint<%d> > &out%d" % (o_stream_w, i) + inp_streams.append(out_stream) + cmd = "out%d.write(e);" % i + commands.append(cmd) + + impl_hls_code = [] + impl_hls_code.append("void DuplicateStreamsCustom(") + impl_hls_code.append(",".join(inp_streams)) + impl_hls_code.append(") {") + impl_hls_code.append("for(unsigned int i = 0; i < %d; i++) {" % iters) + impl_hls_code.append("#pragma HLS PIPELINE II=1") + impl_hls_code.append("\n".join(commands)) + impl_hls_code.append("}") + impl_hls_code.append("}") + impl_hls_code = "\n".join(impl_hls_code) + + impl_filename = "{}/duplicate_impl.hpp".format(path) + f_impl = open(impl_filename, "w") + f_impl.write(impl_hls_code) + f_impl.close() + def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() + n_outputs = self.get_num_output_streams() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -205,17 +235,14 @@ class DuplicateStreams_Batch(HLSCustomOp): # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file - super().npy_to_dynamic_outputs(context, ["output0.npy", "output1.npy"]) - assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim \ - did not produce expected ofolded utput shape" - assert ( - context[node.output[1]].shape == folded_oshape - ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) - context[node.output[1]] = context[node.output[1]].reshape(*exp_oshape) + super().npy_to_dynamic_outputs( + context, ["output%d.npy" % i for i in range(n_outputs)] + ) + for i in range(n_outputs): + assert ( + context[node.output[i]].shape == exp_oshape + ), "cppsim \ + did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -226,41 +253,30 @@ class DuplicateStreams_Batch(HLSCustomOp): super().toggle_clk(sim) rtlsim_dict = { "inputs": {"in0": rtlsim_inp}, - "outputs": {"out0": [], "out1": []}, + "outputs": {}, } + for i in range(n_outputs): + rtlsim_dict["outputs"]["out%d" % i] = [] self.rtlsim_multi_io(sim, rtlsim_dict) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_shape = self.get_folded_output_shape() + for i in range(n_outputs): + out_npy_path = "%s/output%d.npy" % (code_gen_dir, i) + rtlsim_output_to_npy( + rtlsim_dict["outputs"]["out%d" % i], + out_npy_path, + odt, + out_shape, + packed_bits, + target_bits, + ) + # load and reshape output 0 + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[i]] = output - out_npy_path = "{}/output0.npy".format(code_gen_dir) - rtlsim_output_to_npy( - rtlsim_dict["outputs"]["out0"], - out_npy_path, - odt, - out_shape, - packed_bits, - target_bits, - ) - # load and reshape output 0 - output = np.load(out_npy_path) - output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) - context[node.output[0]] = output - - out_npy_path = "{}/output1.npy".format(code_gen_dir) - rtlsim_output_to_npy( - rtlsim_dict["outputs"]["out1"], - out_npy_path, - odt, - out_shape, - packed_bits, - target_bits, - ) - # load and reshape output 1 - output = np.load(out_npy_path) - output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) - context[node.output[1]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} @@ -277,7 +293,7 @@ class DuplicateStreams_Batch(HLSCustomOp): ), """Output1 shape doesn't match expected shape.""" def global_includes(self): - self.code_gen_dict["$GLOBALS$"] = ['#include "streamtools.h"'] + self.code_gen_dict["$GLOBALS$"] = ['#include "duplicate_impl.hpp"'] def defines(self, var): self.code_gen_dict["$DEFINES$"] = [] @@ -298,24 +314,23 @@ class DuplicateStreams_Batch(HLSCustomOp): ) def strm_decl(self): + n_outputs = self.get_num_output_streams() self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) ) - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out0 ("out0");'.format(self.get_outstream_width()) - ) - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out1 ("out1");'.format(self.get_outstream_width()) - ) + for i in range(n_outputs): + out_name = "out%d" % i + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<%d>> %s ("%s");' + % (self.get_outstream_width(), out_name, out_name) + ) def docompute(self): - self.code_gen_dict["$DOCOMPUTE$"] = [ - """DuplicateStreams_Batch<{}, {}> (in0, out0, out1, 1);""".format( - self.get_outstream_width(), - self.get_number_output_values() // 2, - ) - ] + n_outputs = self.get_num_output_streams() + ostreams = ["out%d" % x for x in range(n_outputs)] + dc = "DuplicateStreamsCustom(in0, %s);" % (",".join(ostreams)) + self.code_gen_dict["$DOCOMPUTE$"] = [dc] def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -325,62 +340,71 @@ class DuplicateStreams_Batch(HLSCustomOp): packed_hls_type = "ap_uint<%d>" % packed_bits elem_hls_type = dtype.get_hls_datatype_str() npy_type = "float" - npy_out = "%s/output0.npy" % code_gen_dir - npy_out1 = "%s/output1.npy" % code_gen_dir + n_outputs = self.get_num_output_streams() oshape = self.get_folded_output_shape() oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") - - self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out0, %s, "%s");' - % ( - packed_hls_type, - elem_hls_type, - elem_bits, - npy_type, - oshape_cpp_str, - npy_out, + outstrm_code = [] + + for i in range(n_outputs): + out_name = "out%d" % i + npy_out = "%s/output%d.npy" % (code_gen_dir, i) + outstrm_code.append( + 'apintstream2npy<%s, %s, %d, %s>(%s, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + out_name, + oshape_cpp_str, + npy_out, + ) ) - ] - self.code_gen_dict["$DATAOUTSTREAM$"] += [ - 'apintstream2npy<%s, %s, %d, %s>(out1, %s, "%s");' - % ( - packed_hls_type, - elem_hls_type, - elem_bits, - npy_type, - oshape_cpp_str, - npy_out1, - ) - ] + self.code_gen_dict["$DATAOUTSTREAM$"] = outstrm_code def save_as_npy(self): self.code_gen_dict["$SAVEASCNPY$"] = [] def blackboxfunction(self): + n_outputs = self.get_num_output_streams() + inp_streams = [] + o_stream_w = self.get_outstream_width() + i_stream_w = self.get_instream_width() + in_stream = "hls::stream<ap_uint<%d> > &in0" % (i_stream_w) + inp_streams.append(in_stream) + for i in range(n_outputs): + out_stream = "hls::stream<ap_uint<%d> > &out%d" % (o_stream_w, i) + inp_streams.append(out_stream) + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &out0, - hls::stream<ap_uint<{}>> &out1)""".format( + """void {}({})""".format( self.onnx_node.name, - self.get_instream_width(), - self.get_outstream_width(), - self.get_outstream_width(), + ",".join(inp_streams), ) ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out0") - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out1") + n_outputs = self.get_num_output_streams() + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + for i in range(n_outputs): + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out%d name=out%d_%s" + % (i, i, self.hls_sname()) + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() - intf_names["m_axis"] = [ - ("out0_V_V", self.get_outstream_width_padded()), - ("out1_V_V", self.get_outstream_width_padded()), - ] + n_outputs = self.get_num_output_streams() + sname = self.hls_sname() + intf_names["m_axis"] = [] + for i in range(n_outputs): + intf_names["m_axis"].append( + ("out%d_%s" % (i, sname), self.get_outstream_width_padded()) + ) return intf_names diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py index 8ac30524ebee6f503e34f6d92408f3f137a59c72..177ca2acbd60b49658a61741ec042e651b560b27 100644 --- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py @@ -312,8 +312,12 @@ class FMPadding_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) @@ -324,7 +328,6 @@ class FMPadding_Batch(HLSCustomOp): exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -355,9 +358,8 @@ class FMPadding_Batch(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim did not produce expected folded output shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + context[node.output[0]].shape == exp_oshape + ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py index 6d4a55ee5c86b68776f4c7c2e58930034bb0be02..43a7dc211c0fe0689629cb9bb4d4b0664ac9eef9 100644 --- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py @@ -185,7 +185,6 @@ class GlobalAccPool_Batch(HLSCustomOp): exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -215,10 +214,9 @@ class GlobalAccPool_Batch(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape + context[node.output[0]].shape == exp_oshape ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -331,8 +329,12 @@ class GlobalAccPool_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py index 3aac7f6b451ed12ab265a20a7df1bfa6c1d7b4c7..402f2cce19efe05620c0fcaee761a88c919f822a 100644 --- a/src/finn/custom_op/fpgadataflow/hlscustomop.py +++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py @@ -32,6 +32,7 @@ import os import subprocess from abc import abstractmethod +from finn.core.datatype import DataType from finn.custom_op.base import CustomOp from finn.util.basic import ( CppBuilder, @@ -112,13 +113,23 @@ class HLSCustomOp(CustomOp): # input and output FIFO depths "inFIFODepth": ("i", False, 2), "outFIFODepth": ("i", False, 2), + "output_hook": ("s", False, ""), + # HLS version to be used for IP synthesis + "hls_version": ("s", False, "vitis_hls", {"vivado_hls", "vitis_hls"}), } def get_verilog_top_module_name(self): "Return the Verilog top module name for this node." node = self.onnx_node - prefixed_top_name = "%s_%s" % (node.name, node.name) + hls_version = self.get_nodeattr("hls_version") + if hls_version == "vivado_hls": + prefixed_top_name = "%s_%s" % (node.name, node.name) + elif hls_version == "vitis_hls": + prefixed_top_name = node.name + else: + raise Exception("Unknown hls_version: %s" % hls_version) + return prefixed_top_name def get_verilog_top_module_intf_names(self): @@ -133,8 +144,9 @@ class HLSCustomOp(CustomOp): intf_names = {} intf_names["clk"] = ["ap_clk"] intf_names["rst"] = ["ap_rst_n"] - intf_names["s_axis"] = [("in0_V_V", self.get_instream_width_padded())] - intf_names["m_axis"] = [("out_V_V", self.get_outstream_width_padded())] + sname = self.hls_sname() + intf_names["s_axis"] = [("in0_" + sname, self.get_instream_width_padded())] + intf_names["m_axis"] = [("out_" + sname, self.get_outstream_width_padded())] intf_names["aximm"] = [] intf_names["axilite"] = [] return intf_names @@ -290,10 +302,9 @@ class HLSCustomOp(CustomOp): self.code_gen_dict["$PROJECTNAME$"] = ["project_{}".format(node.name)] self.code_gen_dict["$HWSRCDIR$"] = [code_gen_dir] self.code_gen_dict["$FPGAPART$"] = [fpgapart] - self.code_gen_dict["$FINNHLSLIBDIR$"] = ["/workspace/finn-hlslib"] - self.code_gen_dict["$FINNHLSCUSTOMDIR$"] = ["/workspace/finn/custom_hls"] self.code_gen_dict["$TOPFXN$"] = [node.name] self.code_gen_dict["$CLKPERIOD$"] = [str(clk)] + self.code_gen_dict["$DEFAULT_DIRECTIVES$"] = self.ipgen_default_directives() self.code_gen_dict["$EXTRA_DIRECTIVES$"] = self.ipgen_extra_directives() template = self.ipgentcl_template @@ -308,16 +319,37 @@ class HLSCustomOp(CustomOp): f.close() self.code_gen_dict.clear() + def ipgen_default_directives(self): + """Return list of default HLS synthesis directives, which differ + slightly between vivado_hls and vitis_hls""" + + hls_version = self.get_nodeattr("hls_version") + default_directives = { + "vivado_hls": [ + "config_compile -ignore_long_run_time -disable_unroll_code_size_check", + "config_interface -m_axi_addr64", + "config_rtl -auto_prefix", + ], + "vitis_hls": [ + "set_param hls.enable_hidden_option_error false", + "config_compile -disable_unroll_code_size_check -pipeline_style flp", + "config_interface -m_axi_addr64", + "config_rtl -module_auto_prefix", + "config_rtl -deadlock_detection none", + ], + } + return default_directives[hls_version] + def ipgen_extra_directives(self): "Return a list of extra tcl directives for HLS synthesis." return [] def ipgen_singlenode_code(self): - """Builds the bash script for ip generation using the CallHLS from - finn.util.hls.""" + """Builds the bash script for IP generation using the CallHLS utility.""" node = self.onnx_node code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - builder = CallHLS() + hls_version = self.get_nodeattr("hls_version") + builder = CallHLS(backend=hls_version) builder.append_tcl(code_gen_dir + "/hls_syn_{}.tcl".format(node.name)) builder.set_ipgen_path(code_gen_dir + "/project_{}".format(node.name)) builder.build(code_gen_dir) @@ -372,15 +404,15 @@ class HLSCustomOp(CustomOp): builder = CppBuilder() # to enable additional debug features please uncommand the next line # builder.append_includes("-DDEBUG") - builder.append_includes("-I/workspace/finn/src/finn/qnn-data/cpp") - builder.append_includes("-I/workspace/cnpy/") - builder.append_includes("-I/workspace/finn-hlslib") - builder.append_includes("-I/workspace/finn/custom_hls") - builder.append_includes("-I{}/include".format(os.environ["VIVADO_PATH"])) - builder.append_includes("--std=c++11") + builder.append_includes("-I$FINN_ROOT/src/finn/qnn-data/cpp") + builder.append_includes("-I$FINN_ROOT/deps/cnpy/") + builder.append_includes("-I$FINN_ROOT/deps/finn-hlslib") + builder.append_includes("-I$FINN_ROOT/custom_hls") + builder.append_includes("-I{}/include".format(os.environ["HLS_PATH"])) + builder.append_includes("--std=c++14") builder.append_includes("-O3") builder.append_sources(code_gen_dir + "/*.cpp") - builder.append_sources("/workspace/cnpy/cnpy.cpp") + builder.append_sources("$FINN_ROOT/deps/cnpy/cnpy.cpp") builder.append_includes("-lz") builder.set_executable_path(code_gen_dir + "/node_model") builder.build(code_gen_dir) @@ -402,10 +434,22 @@ Found no codegen dir for this node, did you run the prepare_cppsim transformatio # assuming dynamic inputs start from 0 for in_ind in range(count): current_input_name = node.input[in_ind] - # make copy before saving array - input_array = context[current_input_name].copy() + input_array = context[current_input_name] + if in_ind == 0: + expected_inp_shape = self.get_folded_input_shape() + idt = self.get_input_datatype() + else: + expected_inp_shape = self.get_folded_input_shape(in_ind) + idt = self.get_input_datatype(in_ind) + reshaped_input = input_array.reshape(expected_inp_shape) + if idt == DataType["BIPOLAR"]: + # store bipolar activations as binary + reshaped_input = (reshaped_input + 1) / 2 + # make copy before saving the array + reshaped_input = reshaped_input.copy() np.save( - os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), input_array + os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), + reshaped_input, ) def npy_to_dynamic_output(self, context): @@ -414,7 +458,8 @@ Found no codegen dir for this node, did you run the prepare_cppsim transformatio node = self.onnx_node code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") output = np.load("{}/output.npy".format(code_gen_dir)) - context[node.output[0]] = output + exp_shape = self.get_normal_output_shape() + context[node.output[0]] = output.reshape(exp_shape) def npy_to_dynamic_outputs(self, context, npy_list): """Reads the output from .npy files generated from cppsim and places @@ -425,7 +470,11 @@ Found no codegen dir for this node, did you run the prepare_cppsim transformatio code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") for i in range(len(npy_list)): output = np.load("{}/{}".format(code_gen_dir, npy_list[i])) - context[node.output[i]] = output + if i == 0: + exp_shape = self.get_normal_output_shape() + else: + exp_shape = self.get_normal_output_shape(i) + context[node.output[i]] = output.reshape(exp_shape) def exec_precompiled_singlenode_model(self): """Executes precompiled executable.""" @@ -453,6 +502,17 @@ compilation transformations? sim.io.ap_clk = 1 sim.io.ap_clk = 0 + def hls_sname(self): + """Get the naming convention used by chosen HLS version for stream signals, + decided by the hls_version node attribute. + Example: the TDATA for a stream called "out" would be out_V_V_TDATA + in vivado_hls and out_V_TDATA in vitis_hls. + """ + hls_version = self.get_nodeattr("hls_version") + sname_dict = {"vivado_hls": "V_V", "vitis_hls": "V"} + sname = sname_dict[hls_version] + return sname + def rtlsim(self, sim, inp, inp2=None): """Runs the pyverilator simulation by passing the input values to the simulation, toggle the clock and observing the execution time. Function contains also an @@ -466,7 +526,18 @@ compilation transformations? sim.start_vcd_trace(trace_file) inputs = inp outputs = [] - sim.io.out_V_V_TREADY = 1 + sname = self.hls_sname() + o_ready = "out_" + sname + "_TREADY" + o_valid = "out_" + sname + "_TVALID" + o_data = "out_" + sname + "_TDATA" + in0_ready = "in0_" + sname + "_TREADY" + in0_valid = "in0_" + sname + "_TVALID" + in0_data = "in0_" + sname + "_TDATA" + in1_ready = "in1_" + sname + "_TREADY" + in1_valid = "in1_" + sname + "_TVALID" + in1_data = "in1_" + sname + "_TDATA" + + sim.io[o_ready] = 1 # observe if output is completely calculated # observation_count will contain the number of cycles the calculation ran @@ -481,19 +552,19 @@ compilation transformations? liveness_threshold = pyverilate_get_liveness_threshold_cycles() while not (output_observed): - sim.io.in0_V_V_TVALID = 1 if len(inputs) > 0 else 0 - sim.io.in0_V_V_TDATA = inputs[0] if len(inputs) > 0 else 0 - if sim.io.in0_V_V_TREADY == 1 and sim.io.in0_V_V_TVALID == 1: + sim.io[in0_valid] = 1 if len(inputs) > 0 else 0 + sim.io[in0_data] = inputs[0] if len(inputs) > 0 else 0 + if sim.io[in0_ready] == 1 and sim.io[in0_valid] == 1: inputs = inputs[1:] if inp2 is not None: - sim.io.in1_V_V_TVALID = 1 if len(inp2) > 0 else 0 - sim.io.in1_V_V_TDATA = inp2[0] if len(inp2) > 0 else 0 - if sim.io.in1_V_V_TREADY == 1 and sim.io.in1_V_V_TVALID == 1: + sim.io[in1_valid] = 1 if len(inp2) > 0 else 0 + sim.io[in1_data] = inp2[0] if len(inp2) > 0 else 0 + if sim.io[in1_ready] == 1 and sim.io[in1_valid] == 1: inp2 = inp2[1:] - if sim.io.out_V_V_TVALID == 1 and sim.io.out_V_V_TREADY == 1: - outputs = outputs + [sim.io.out_V_V_TDATA] + if sim.io[o_valid] == 1 and sim.io[o_ready] == 1: + outputs = outputs + [sim.io[o_data]] sim.io.ap_clk = 1 sim.io.ap_clk = 0 @@ -525,11 +596,16 @@ compilation transformations? def rtlsim_multi_io(self, sim, io_dict): "Run rtlsim for this node, supports multiple i/o streams." + # signal naming differs slightly between vivado_hls/vitis_hls + sname = "_" + self.hls_sname() + "_" + trace_file = self.get_nodeattr("rtlsim_trace") if trace_file == "default": trace_file = self.onnx_node.name + ".vcd" num_out_values = self.get_number_output_values() - total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file) + total_cycle_count = rtlsim_multi_io( + sim, io_dict, num_out_values, trace_file, sname=sname + ) self.set_nodeattr("cycles_rtlsim", total_cycle_count) def execute_node(self, context, graph): @@ -580,7 +656,7 @@ compilation transformations? be filled by every node. var: makes it possible to reuse the function for different c++ code generation. - I.e. if set to "ipgen" in StreamingFCLayer_Batch additional PRAGMA defines are + I.e. if set to "ipgen" in MatrixVectorActivation additional PRAGMA defines are added.""" pass diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py index 802c7e78515336ef884e5ff09356085b5cc6069f..a331caee0193e101dd108299c159dfd97c893cfa 100644 --- a/src/finn/custom_op/fpgadataflow/iodma.py +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -83,11 +83,14 @@ class IODMA(HLSCustomOp): "NumChannels": ("i", True, 0), # FINN input datatype "dataType": ("s", True, ""), - # Stream parameters + # Width of input or output stream "streamWidth": ("i", False, 32), # DMA-specific parameters + # width of axi-mm interface "intfWidth": ("i", False, 32), + # burst mode for axi-mm interface (wrap used for DRAM weights) "burstMode": ("s", False, "increment", {"wrap", "increment"}), + # IODMA direction: in = read from DRAM, out = write to DRAM "direction": ("s", False, "in", {"in", "out"}), # shape describing input vecs per execution "numInputVectors": ("ints", False, [1]), @@ -224,20 +227,19 @@ class IODMA(HLSCustomOp): def docompute(self): direction = self.get_nodeattr("direction") mode = self.get_nodeattr("burstMode") + dwc_func = "StreamingDataWidthConverter_Batch" if direction == "in": if mode == "wrap": func = "Mem2Stream_Batch_external_wmem" else: func = "Mem2Stream_Batch" - dwc_func = "WidthAdjustedOutputStream" elif direction == "out": func = "Stream2Mem_Batch" - dwc_func = "WidthAdjustedInputStream" else: raise ValueError("Invalid IODMA direction, please set to in or out") # define templates for instantiation dma_inst_template = func + "<DataWidth1, NumBytes1>(%s, %s, numReps);" - dwc_inst_template = dwc_func + "<%d, %d, %d> %s(%s, numReps);" + dwc_inst_template = dwc_func + "<%d, %d, %d>(%s, %s, numReps);" # do stream infrastructure and instantiations intfw = self.get_nodeattr("intfWidth") strmw = self.get_nodeattr("streamWidth") @@ -246,22 +248,65 @@ class IODMA(HLSCustomOp): # because we use WidthAdjustedInputStream, dtype_bits = self.get_input_datatype().bitwidth() total_bits = dtype_bits * np.prod(self.get_normal_input_shape()) + if direction == "in": - self.code_gen_dict["$DOCOMPUTE$"] = [ - dwc_inst_template - % (width_lcm, strmw, total_bits // width_lcm, "dwc_lcm", "out"), - dwc_inst_template - % (intfw, width_lcm, total_bits // intfw, "dwc_intfw", "dwc_lcm"), - dma_inst_template % ("in0", "dwc_intfw"), - ] + # AXI MM -> IODMA -> (DWCs) -> out + # DWCs depend on AXI MM and out interface width + if strmw == intfw: + # case 0: AXI MM width = out width, no DWCs needed + self.code_gen_dict["$DOCOMPUTE$"] = [dma_inst_template % ("in0", "out")] + elif (strmw % intfw == 0) or (intfw % strmw == 0): + # case 1: AXI MM width divisible by out width or vice versa + # single DWC + single extra stream needed + self.code_gen_dict["$DOCOMPUTE$"] = [ + "hls::stream<ap_uint<%d> > dma2dwc;" % intfw, + dma_inst_template % ("in0", "dma2dwc"), + dwc_inst_template + % (intfw, strmw, total_bits // intfw, "dma2dwc", "out"), + ] + else: + # case 2: AXI MM width not divisible by out width or vice versa + # need 2 DWCs (going through the least common multiple width) + # and 2 streams + self.code_gen_dict["$DOCOMPUTE$"] = [ + "hls::stream<ap_uint<%d> > dma2lcm;" % intfw, + "hls::stream<ap_uint<%d> > lcm2out;" % width_lcm, + dma_inst_template % ("in0", "dma2lcm"), + dwc_inst_template + % (intfw, width_lcm, total_bits // intfw, "dma2lcm", "lcm2out"), + dwc_inst_template + % (width_lcm, strmw, total_bits // width_lcm, "lcm2out", "out"), + ] + elif direction == "out": + # in0 -> (DWCs) -> IODMA -> AXI MM + # DWCs depend on AXI MM and out interface width + if strmw == intfw: + # case 0: in width = AXI MM width, no DWCs needed + self.code_gen_dict["$DOCOMPUTE$"] = [dma_inst_template % ("in0", "out")] + elif (strmw % intfw == 0) or (intfw % strmw == 0): + # case 1: AXI MM width divisible by in width or vice versa + # single DWC + single extra stream needed + self.code_gen_dict["$DOCOMPUTE$"] = [ + "hls::stream<ap_uint<%d> > dwc2dma;" % intfw, + dwc_inst_template + % (strmw, intfw, total_bits // strmw, "in0", "dwc2dma"), + dma_inst_template % ("dwc2dma", "out"), + ] + else: + # case 2: AXI MM width not divisible by out width or vice versa + # need 2 DWCs (going through the least common multiple width) + # and 2 streams + self.code_gen_dict["$DOCOMPUTE$"] = [ + "hls::stream<ap_uint<%d> > in2lcm;" % width_lcm, + "hls::stream<ap_uint<%d> > lcm2dma;" % intfw, + dwc_inst_template + % (strmw, width_lcm, total_bits // strmw, "in0", "in2lcm"), + dwc_inst_template + % (width_lcm, intfw, total_bits // width_lcm, "in2lcm", "lcm2dma"), + dma_inst_template % ("lcm2dma", "out"), + ] else: - self.code_gen_dict["$DOCOMPUTE$"] = [ - dwc_inst_template - % (strmw, width_lcm, total_bits // strmw, "dwc_lcm", "in0"), - dwc_inst_template - % (width_lcm, intfw, total_bits // width_lcm, "dwc_intfw", "dwc_lcm"), - dma_inst_template % ("dwc_intfw", "out"), - ] + raise Exception("Unknown IODMA direction: %s" % direction) def blackboxfunction(self): packed_ibits = self.get_instream_width() @@ -304,11 +349,11 @@ class IODMA(HLSCustomOp): "#pragma HLS INTERFACE s_axilite port=in0 bundle=control" ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out" + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() ) elif direction == "out": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=in0" + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() ) if intfname == "": self.code_gen_dict["$PRAGMAS$"].append( diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py index 1eb5962fdbc54092eaeb4796806b3a623c65aea8..bb83311dab44a4942d6bc1b581c21abb1e993493 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py +++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py @@ -182,7 +182,6 @@ class LabelSelect_Batch(HLSCustomOp): exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - folded_oshape = self.get_folded_output_shape() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -212,10 +211,9 @@ class LabelSelect_Batch(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape + context[node.output[0]].shape == exp_oshape ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -342,8 +340,12 @@ class LabelSelect_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) diff --git a/src/finn/custom_op/fpgadataflow/lookup.py b/src/finn/custom_op/fpgadataflow/lookup.py index 27be06bdfa3ce3d980a139ec91385c7fe85afab3..dcf67e4c4338b8a903fefd7a83a96331d0a5c8e9 100644 --- a/src/finn/custom_op/fpgadataflow/lookup.py +++ b/src/finn/custom_op/fpgadataflow/lookup.py @@ -29,13 +29,14 @@ import numpy as np import os import warnings -from math import ceil +from math import ceil, log2 from finn.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.data_packing import ( npy_to_rtlsim_input, numpy_to_hls_code, + pack_innermost_dim_as_hex_string, rtlsim_output_to_npy, ) @@ -58,6 +59,13 @@ class Lookup(HLSCustomOp): "InputType": ("s", True, ""), # Input shape "InputShape": ("ints", False, [1]), + # Memory mode + # const : parameters baked into bitfile (BRAM) + # external : lookup performed in external memory over AXI MM + "mem_mode": ("s", False, "const", ["const", "external"]), + # Width for AXI-MM interface + # only relevant when mem_mode="external" + "ext_mem_width": ("i", False, 32), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -72,7 +80,8 @@ class Lookup(HLSCustomOp): def get_normal_output_shape(self): ishape = self.get_normal_input_shape() - oshape = list(ishape) + [self.get_nodeattr("EmbeddingDim")] + emb_dim = self.get_nodeattr("EmbeddingDim") + oshape = list(ishape) + [emb_dim] return tuple(oshape) def get_folded_input_shape(self): @@ -81,7 +90,23 @@ class Lookup(HLSCustomOp): return tuple(folded_ishape) def get_folded_output_shape(self): - return self.get_normal_output_shape() + ishape = self.get_normal_input_shape() + mem_mode = self.get_nodeattr("mem_mode") + emb_dim = self.get_nodeattr("EmbeddingDim") + if mem_mode == "const": + oshape = list(ishape) + [emb_dim] + elif mem_mode == "external": + ext_mem_width = self.get_nodeattr("ext_mem_width") + bits_per_emb_elem = self.get_output_datatype().bitwidth() + assert ext_mem_width % bits_per_emb_elem == 0 + emb_elems_per_ext_mem_width = ext_mem_width // bits_per_emb_elem + oshape = list(ishape) + [ + emb_dim // emb_elems_per_ext_mem_width, + emb_elems_per_ext_mem_width, + ] + else: + raise Exception("Unrecognized mem_mode:" + mem_mode) + return tuple(oshape) def make_shape_compatible_op(self, model): exp_ishape = tuple(self.get_normal_input_shape()) @@ -123,17 +148,20 @@ class Lookup(HLSCustomOp): return ibits def get_outstream_width(self): + folded_oshape = self.get_folded_output_shape() obits = self.get_output_datatype().bitwidth() - ofm_ch = self.get_nodeattr("EmbeddingDim") - return obits * ofm_ch + return obits * folded_oshape[-1] def get_number_output_values(self): folded_oshape = self.get_folded_output_shape() return np.prod(folded_oshape[:-1]) def global_includes(self): - global_incls = ['#include "lookup.hpp"'] - global_incls.append('#include "embeddings.hpp"') + mem_mode = self.get_nodeattr("mem_mode") + global_incls = [] + if mem_mode == "const": + global_incls.append('#include "lookup.hpp"') + global_incls.append('#include "embeddings.hpp"') self.code_gen_dict["$GLOBALS$"] = global_incls def defines(self, var): @@ -142,14 +170,26 @@ class Lookup(HLSCustomOp): elem_hls_type = dtype.get_hls_datatype_str() emb_type = DataType[self.get_nodeattr("EmbeddingType")] emb_hls_type = emb_type.get_hls_datatype_str() + emb_dim = self.get_nodeattr("EmbeddingDim") + mem_mode = self.get_nodeattr("mem_mode") my_defines = [] - my_defines.append( - "#define NumEmbeddings %d" % self.get_nodeattr("NumEmbeddings") - ) - my_defines.append("#define EmbeddingDim %d" % self.get_nodeattr("EmbeddingDim")) my_defines.append("#define NumInputs %d" % n_inputs) - my_defines.append("#define InputType %s" % elem_hls_type) - my_defines.append("#define EmbeddingType %s" % emb_hls_type) + if mem_mode == "external": + ext_mem_width = self.get_nodeattr("ext_mem_width") + ext_mem_emb_size = self.get_folded_output_shape()[-2] + ext_mem_emb_align = ceil(log2(ext_mem_emb_size)) + my_defines.append("#define MemBits %d" % ext_mem_width) + my_defines.append("#define EmbeddingSize %d" % ext_mem_emb_size) + my_defines.append("#define EmbeddingAlign %d" % ext_mem_emb_align) + my_defines.append("#define T_SRC %s" % elem_hls_type) + my_defines.append("#define T_DST ap_uint<MemBits>") + elif mem_mode == "const": + my_defines.append( + "#define NumEmbeddings %d" % self.get_nodeattr("NumEmbeddings") + ) + my_defines.append("#define EmbeddingDim %d" % emb_dim) + my_defines.append("#define InputType %s" % elem_hls_type) + my_defines.append("#define EmbeddingType %s" % emb_hls_type) self.code_gen_dict["$DEFINES$"] = my_defines def read_npy_data(self): @@ -186,7 +226,7 @@ class Lookup(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", %s);' % ( packed_hls_type, elem_hls_type, @@ -194,6 +234,7 @@ class Lookup(HLSCustomOp): npy_type, oshape_cpp_str, npy_out, + "false", ) ] @@ -210,43 +251,115 @@ class Lookup(HLSCustomOp): ) def docompute(self): - self.code_gen_dict["$DOCOMPUTE$"] = [ - """StreamingLookup<NumEmbeddings, EmbeddingDim, NumInputs, - InputType, EmbeddingType >(in0, out, embeddings);""" - ] + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode == "const": + self.code_gen_dict["$DOCOMPUTE$"] = [ + """StreamingLookup<NumEmbeddings, EmbeddingDim, NumInputs, + InputType, EmbeddingType >(in0, out, embeddings);""" + ] + elif mem_mode == "external": + hls_impl = """ + if(!in0.empty()) { + ap_uint<T_SRC::width+EmbeddingAlign> const base = + (in0.read(), ap_uint<EmbeddingAlign>(0)); + for(unsigned j = 0; j < EmbeddingSize; j++) { +#pragma HLS PIPELINE II=1 + out.write(mem[base+j]); + } + } + """ + self.code_gen_dict["$DOCOMPUTE$"] = [hls_impl] def blackboxfunction(self): + mem_mode = self.get_nodeattr("mem_mode") ibits = self.get_instream_width() packed_input_hls_type = "ap_uint<%d>" % ibits obits = self.get_outstream_width() packed_output_hls_type = "ap_uint<%d>" % obits - self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_input_hls_type, packed_output_hls_type) - ] + if mem_mode == "const": + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" + % (self.onnx_node.name, packed_input_hls_type, packed_output_hls_type) + ] + elif mem_mode == "external": + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void " + + self.onnx_node.name + + "(hls::stream<T_SRC> &in0, hls::stream<T_DST> &out, " + + "T_DST const *const mem)" + ] def pragmas(self): - my_pragmas = ["#pragma HLS INTERFACE axis port=in0"] - my_pragmas.append("#pragma HLS INTERFACE axis port=out") + mem_mode = self.get_nodeattr("mem_mode") + my_pragmas = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + my_pragmas.append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) my_pragmas.append("#pragma HLS INTERFACE ap_ctrl_none port=return") + if mem_mode == "const": + my_pragmas.append( + "#pragma HLS BIND_STORAGE variable=embeddings type=ROM_2P impl=BRAM" + ) + elif mem_mode == "external": + my_pragmas.append("#pragma HLS INTERFACE m_axi offset=slave port=mem") + my_pragmas.append("#pragma HLS INTERFACE s_axilite port=mem bundle=control") + else: + raise Exception("Unrecognized mem_mode: " + mem_mode) self.code_gen_dict["$PRAGMAS$"] = my_pragmas def generate_params(self, model, path): - code_gen_dir = path + mem_mode = self.get_nodeattr("mem_mode") embeddings = model.get_initializer(self.onnx_node.input[1]) - weight_filename = "{}/embeddings.hpp".format(code_gen_dir) - edt = DataType[self.get_nodeattr("EmbeddingType")] - # obits = self.get_outstream_width() - # packed_output_hls_type = "ap_uint<%d>" % obits - assert np.vectorize(edt.allowed)( - embeddings - ).all(), "Embeddings can't be expressed with type %s" % str(edt) - embeddings_hls_code = numpy_to_hls_code( - embeddings, edt, "embeddings", True, False - ) - f_thresh = open(weight_filename, "w") - f_thresh.write(embeddings_hls_code) - f_thresh.close() + if mem_mode == "const": + code_gen_dir = path + weight_filename = "{}/embeddings.hpp".format(code_gen_dir) + edt = DataType[self.get_nodeattr("EmbeddingType")] + # obits = self.get_outstream_width() + # packed_output_hls_type = "ap_uint<%d>" % obits + assert np.vectorize(edt.allowed)( + embeddings + ).all(), "Embeddings can't be expressed with type %s" % str(edt) + # reverse innertmost dim in embeddings to remain compatible with + # how we normally encode the data in FINN + embeddings_rev = np.flip(embeddings, -1) + embeddings_hls_code = numpy_to_hls_code( + embeddings_rev, edt, "embeddings", True, False + ) + f_thresh = open(weight_filename, "w") + f_thresh.write(embeddings_hls_code) + f_thresh.close() + elif mem_mode == "external": + edt = DataType[self.get_nodeattr("EmbeddingType")] + ext_mem_width = self.get_nodeattr("ext_mem_width") + assert edt.bitwidth() == 8, ( + "Lookup with mem_mode=external " + + "only works with 8-bit embeddings but found " + + str(edt) + ) + emb_dim = self.get_nodeattr("EmbeddingDim") + # need to zero-pad embeddings in external mode for burst alignment + # compute how much padding we need + emb_elems_per_ext_mem_width = self.get_folded_output_shape()[-1] + ext_mem_emb_size = self.get_folded_output_shape()[-2] + ext_mem_emb_align = ceil(log2(ext_mem_emb_size)) + align_factor = int((ext_mem_width / 8) * 2**ext_mem_emb_align) + pad_amount = align_factor - emb_dim + embeddings_padded = np.pad(embeddings, [(0, 0), (0, pad_amount)]) + # reshape for packing the innermost dim + embeddings_padded = embeddings_padded.reshape( + -1, emb_elems_per_ext_mem_width + ) + weight_filename = "%s/%s.dat" % (path, self.onnx_node.name) + ret = pack_innermost_dim_as_hex_string( + embeddings_padded, edt, ext_mem_width, True, prefix="" + ) + with open(weight_filename, "w") as f: + for current_line in ret: + f.write(current_line + "\n") + else: + raise Exception("Unrecognized mem_mode: " + mem_mode) def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") @@ -255,6 +368,10 @@ class Lookup(HLSCustomOp): exp_oshape = tuple(self.get_normal_output_shape()) folded_ishape = tuple(self.get_folded_input_shape()) folded_oshape = tuple(self.get_folded_output_shape()) + mem_mode = self.get_nodeattr("mem_mode") + assert ( + mem_mode == "const" + ), "Only mem_mode=const is supported for simulation of Lookup layer" if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -306,7 +423,7 @@ class Lookup(HLSCustomOp): out_shape, packed_bits, target_bits, - reverse_inner=False, + reverse_inner=True, ) # load and reshape output output = np.load(out_npy_path) @@ -324,10 +441,16 @@ class Lookup(HLSCustomOp): ), """Output shape doesn't match expected shape.""" def bram_estimation(self): - # current calculation assumes embeddings always stored in BRAM_18Ks - width_factor = ceil(self.get_outstream_width() / 16) - depth_factor = ceil(self.get_nodeattr("NumEmbeddings") / 1024) - return width_factor * depth_factor + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode == "const": + # current calculation assumes embeddings always stored in BRAM_18Ks + # when mem_mode is const + width_factor = ceil(self.get_outstream_width() / 16) + depth_factor = ceil(self.get_nodeattr("NumEmbeddings") / 1024) + return width_factor * depth_factor + else: + # TODO can we estimate BRAMs for the DMA engine? + return 0 def bram_efficiency_estimation(self): bram16_est = self.bram_estimation() @@ -336,3 +459,18 @@ class Lookup(HLSCustomOp): ebits = self.get_outstream_width() * self.get_nodeattr("NumEmbeddings") bram16_est_capacity = bram16_est * 18 * 1024 return ebits / bram16_est_capacity + + def get_ap_int_max_w(self): + parent_max = super().get_ap_int_max_w() + mem_mode = self.get_nodeattr("mem_mode") + ext_mem_width = self.get_nodeattr("ext_mem_width") + if mem_mode == "external": + return max(ext_mem_width, parent_max) + else: + return parent_max + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + intf_names["axilite"] = ["s_axi_control"] + intf_names["aximm"] = [("m_axi_gmem", self.get_nodeattr("ext_mem_width"))] + return intf_names diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py similarity index 95% rename from src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py rename to src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 68cd1ff9ea680e157f59353d0c9d05afc3d9d6d7..4198486b94fe79638081e183ea48375a767b2477 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -48,7 +48,7 @@ from finn.util.data_packing import ( from . import templates -# ONNX i/o tensor shape assumptions for StreamingFCLayer: +# ONNX i/o tensor shape assumptions for MatrixVectorActivation: # input 0 is the input tensor, shape (.., i_size) = (..., MW) # input 1 is the weight tensor, shape (i_size, o_size) = (MW, MH) # (optional) input 2 is the thresholds tensor, shape (o_size, n_thres) @@ -56,8 +56,9 @@ from . import templates # the ... here can be any shape (representing groups of vectors) -class StreamingFCLayer_Batch(HLSCustomOp): - """Class that corresponds to finn-hls StreamingFCLayer_Batch function.""" +class MatrixVectorActivation(HLSCustomOp): + """Class that corresponds to finn-hls Matrix_Vector_Activate(_Stream)_Batch + function.""" def __init__(self, onnx_node): super().__init__(onnx_node) @@ -192,7 +193,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): info_messages.append("All necessary attributes exist") except Exception: info_messages.append( - """The required StreamingFCLayer attributes do not exist.""" + """The required MatrixVectorActivation attributes do not exist.""" ) # verify the number of inputs depending on noActivation value @@ -204,7 +205,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): info_messages.append("The number of inputs is correct") else: info_messages.append( - """StreamingFCLayer_Batch needs in no + """MatrixVectorActivation needs in no activation mode 2 inputs (data input and weights)""" ) elif no_act == 0: @@ -212,7 +213,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): info_messages.append("The number of inputs is correct") else: info_messages.append( - """StreamingFCLayer_Batch needs 3 inputs + """MatrixVectorActivation needs 3 inputs (data input and weights and threshold values)""" ) else: @@ -358,8 +359,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): if noact == 0: odt = self.get_output_datatype() B = odt.bitwidth() - thr_luts = (2 ** B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64) - comp_luts = (2 ** B - 1) * acc_bits + thr_luts = (2**B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64) + comp_luts = (2**B - 1) * acc_bits return int( c0 @@ -393,9 +394,16 @@ class StreamingFCLayer_Batch(HLSCustomOp): exp_cycles = (mh / pe) * (mw / simd) * np.prod(num_inp_vec) / mmv return int(exp_cycles) - def get_input_datatype(self): + def get_input_datatype(self, ind=0): """Returns FINN DataType of input.""" - return DataType[self.get_nodeattr("inputDataType")] + # when performing FIFO insertion on an FC layer with ext weights, the ind + # parameter can be > 0 (referring to the weights) so handle that here + if ind == 0: + return DataType[self.get_nodeattr("inputDataType")] + elif ind == 1: + return DataType[self.get_nodeattr("weightDataType")] + else: + raise Exception("Undefined input ind for this layer type") def get_weight_datatype(self): """Returns FINN DataType of weights.""" @@ -811,16 +819,28 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.make_weight_file(weights, "decoupled_npy", weight_filename_sim) if mem_mode == "decoupled": # also save weights as Verilog .dat file - weight_filename_rtl = "{}/memblock_0.dat".format(code_gen_dir) + # note that we provide two different .dat files, one for synth + # and one for synthesis. this is because URAM-based weights always + # need zero weights for synthesis, otherwise they get inferred + # as BRAM + weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format( + code_gen_dir + ) + weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir) + # sim weights are always the true weights + self.make_weight_file( + weights, "decoupled_verilog_dat", weight_filename_rtl_sim + ) ram_style = self.get_nodeattr("ram_style") if ram_style == "ultra": # UltraRAM must have no memory initializer, or only zeroes # otherwise BRAM will be inferred instead of URAM # as a workaround we provide a zero-weight init here - # TODO handle this in Verilog with an if statement - weights = np.zeros_like(weights) + synth_weights = np.zeros_like(weights) + else: + synth_weights = weights self.make_weight_file( - weights, "decoupled_verilog_dat", weight_filename_rtl + synth_weights, "decoupled_verilog_dat", weight_filename_rtl_synth ) else: raise Exception( @@ -871,7 +891,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): tdt_hls, odt_hls, self.get_nodeattr("ActVal"), - "comp::less_equal<%s>" % tdt_hls, + "comp::less_equal<%s, %s>" % (tdt_hls, tdt_hls), ) ) f_thresh.write(thresholds_hls_code) @@ -921,7 +941,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): reshaped_input, ) elif in_ind > 2: - raise Exception("Unexpected input found for StreamingFCLayer") + raise Exception("Unexpected input found for MatrixVectorActivation") in_ind += 1 if mode == "cppsim": @@ -935,11 +955,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): out = 2 * out - 1 context[node.output[0]] = out assert ( - context[node.output[0]].shape == self.get_folded_output_shape() - ), """Output shape is not as expected""" - # reshape output to have expected shape - oshape = self.get_normal_output_shape() - context[node.output[0]] = context[node.output[0]].reshape(*oshape) + context[node.output[0]].shape == self.get_normal_output_shape() + ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -994,16 +1011,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.code_gen_dict["$GLOBALS$"] += ['#include "activations.hpp"'] mem_mode = self.get_nodeattr("mem_mode") - if mem_mode == "const": - # self.code_gen_dict["$GLOBALS$"] += ['#include "params.h"'] - pass - elif mem_mode == "decoupled" or mem_mode == "external": - self.code_gen_dict["$GLOBALS$"] += ['#include "mvau.hpp"'] - else: + if mem_mode not in ["const", "decoupled", "external"]: raise Exception( """Please set mem_mode to "const", "decoupled", or "external", currently no other parameter value is supported!""" ) + self.code_gen_dict["$GLOBALS$"] += ['#include "mvau.hpp"'] if self.calc_tmem() != 0: # TODO find a better way of checking for no pregenerated thresholds self.code_gen_dict["$GLOBALS$"] += ['#include "thresh.h"'] @@ -1015,7 +1028,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): MW = self.get_nodeattr("MW") condition = SIMD >= (MW / 1024) msg = ( - f"HLS synthesis of StreamingFCLayer_Batch requires: " + f"HLS synthesis of MatrixVectorActivation requires: " f"SIMD >= MW / 1024. This is not fulfilled with: SIMD={SIMD} " f"and MW={MW} for node: {self.onnx_node.name}." ) @@ -1107,11 +1120,9 @@ class StreamingFCLayer_Batch(HLSCustomOp): else: threshs = "threshs" if mem_mode == "const": - node = self.onnx_node self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<MW1, MH1, SIMD1, PE1, {}, {}, {}> + """Matrix_Vector_Activate_Batch<MW1, MH1, SIMD1, PE1, 1, {}, {}, {}> (in0, out, weights, {}, numReps, {});""".format( - node.op_type, tmpl_args["TSrcI"], tmpl_args["TDstI"], tmpl_args["TWeightI"], @@ -1210,8 +1221,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): def pragmas(self): mem_mode = self.get_nodeattr("mem_mode") ram_style_thresholds = self.get_nodeattr("ram_style_thresholds") - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) in_fifo_depth = self.get_nodeattr("inFIFODepth") out_fifo_depth = self.get_nodeattr("outFIFODepth") # insert depth pragmas only if specified @@ -1239,7 +1254,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): ) elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=weights" + "#pragma HLS INTERFACE axis port=weights name=weights_" + + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS stream depth=8 variable=weights" @@ -1302,6 +1318,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): runtime_writable == 1 ), "Layer with URAM weights must have runtime_writeable_weights=1" node_name = self.onnx_node.name + sname = self.hls_sname() # create a hierarchy for this layer, with the same port names clk_name = self.get_verilog_top_module_intf_names()["clk"][0] rst_name = self.get_verilog_top_module_intf_names()["rst"][0] @@ -1355,8 +1372,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): ) cmd.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s/m_axis_0] " - "[get_bd_intf_pins %s/%s/weights_V_V]" - % (node_name, strm_inst, node_name, node_name) + "[get_bd_intf_pins %s/%s/weights_%s]" + % (node_name, strm_inst, node_name, node_name, sname) ) cmd.append( "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/%s/aresetn]" @@ -1404,15 +1421,16 @@ class StreamingFCLayer_Batch(HLSCustomOp): # base class impl sufficient for const/external modes return super().code_generation_ipi() else: - raise Exception("Unrecognized mem_mode for StreamingFCLayer") + raise Exception("Unrecognized mem_mode for MatrixVectorActivation") return cmd def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() mem_mode = self.get_nodeattr("mem_mode") + sname = self.hls_sname() if mem_mode == "external": intf_names["s_axis"].append( - ("weights_V_V", self.get_weightstream_width_padded()) + ("weights_" + sname, self.get_weightstream_width_padded()) ) if mem_mode == "decoupled": # only expose axilite interface if attribute is set diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py index ba8a446f2cf7541c0bd2e1dff731afe2397942ef..09d707ae238a90b596f18400b58a6508f0413692 100644 --- a/src/finn/custom_op/fpgadataflow/pool_batch.py +++ b/src/finn/custom_op/fpgadataflow/pool_batch.py @@ -38,7 +38,7 @@ class Pool_Batch(HLSCustomOp): """Class that corresponds to finn-hlslib Pool_batch function. Requires ConvolutionInputGenerator(depthwise == 1) to format its input - Input shape (BatchSize,OutImgDim,OutImgDim,KernelSize^2*Channels) + Input shape (BatchSize,OutImgDim,OutImgDim,TotalKernelSize*Channels) Output shape (BatchSize,OutImgDim,OutImgDim,Channels) Notes: @@ -56,13 +56,13 @@ class Pool_Batch(HLSCustomOp): my_attrs = { "Channels": ("i", True, 0), "PE": ("i", True, 1), - "KernelSize": ("i", True, 0), + "KernelSize": ("ints", True, []), # Function: # - MaxPool # - QuantAvgPool # TODO add support for AvgPool and AccPool "Function": ("s", True, "", {"MaxPool", "QuantAvgPool"}), - "OutImgDim": ("i", True, 0), + "OutImgDims": ("ints", True, []), # FINN DataTypes for inputs/outputs "InputDataType": ("s", True, ""), "OutputDataType": ("s", True, ""), @@ -100,10 +100,11 @@ class Pool_Batch(HLSCustomOp): def get_normal_input_shape(self): ifm_ch = self.get_nodeattr("Channels") - odim = self.get_nodeattr("OutImgDim") + odims = self.get_nodeattr("OutImgDims") batch_size = self.get_nodeattr("BatchSize") k = self.get_nodeattr("KernelSize") - ishape = (batch_size, odim, odim, k * k * ifm_ch) + k_prod = int(np.prod(k)) + ishape = (batch_size, *odims, k_prod * ifm_ch) return ishape def get_folded_input_shape(self): @@ -117,9 +118,9 @@ class Pool_Batch(HLSCustomOp): def get_normal_output_shape(self): ofm_ch = self.get_nodeattr("Channels") - odim = self.get_nodeattr("OutImgDim") + odims = self.get_nodeattr("OutImgDims") batch_size = self.get_nodeattr("BatchSize") - oshape = (batch_size, odim, odim, ofm_ch) + oshape = (batch_size, *odims, ofm_ch) return oshape def get_folded_output_shape(self): @@ -140,9 +141,10 @@ class Pool_Batch(HLSCustomOp): ifm_ch = self.get_nodeattr("Channels") pe = self.get_nodeattr("PE") k = self.get_nodeattr("KernelSize") - odim = self.get_nodeattr("OutImgDim") + k_prod = int(np.prod(k)) + odims = self.get_nodeattr("OutImgDims") batch_size = self.get_nodeattr("BatchSize") - exp_cycles = ((ifm_ch * k * k) / pe) * odim * odim * batch_size + exp_cycles = ((ifm_ch * k_prod) / pe) * np.prod(odims) * batch_size return int(exp_cycles) def get_instream_width(self): @@ -198,7 +200,8 @@ class Pool_Batch(HLSCustomOp): return info_messages def global_includes(self): - self.code_gen_dict["$GLOBALS$"] = ['#include "maxpool.h"'] + self.code_gen_dict["$GLOBALS$"] = ['#include "activations.hpp"'] + self.code_gen_dict["$GLOBALS$"] += ['#include "maxpool.h"'] self.code_gen_dict["$GLOBALS$"] += ['#include "pool.hpp"'] def defines(self, var): @@ -211,10 +214,12 @@ class Pool_Batch(HLSCustomOp): self.code_gen_dict["$DEFINES$"] += ["#define PE {}".format(pe)] k = self.get_nodeattr("KernelSize") - self.code_gen_dict["$DEFINES$"] += ["#define KernelSize {}".format(k)] + k_prod = int(np.prod(k)) + self.code_gen_dict["$DEFINES$"] += ["#define KernelSize {}".format(k_prod)] - odim = self.get_nodeattr("OutImgDim") - self.code_gen_dict["$DEFINES$"] += ["#define OFMDim {}".format(odim)] + odims = self.get_nodeattr("OutImgDims") + total_odim = np.prod(odims) + self.code_gen_dict["$DEFINES$"] += ["#define OFMDimTotal {}".format(total_odim)] numReps = self.get_nodeattr("BatchSize") self.code_gen_dict["$DEFINES$"] += ["#define numReps {}".format(numReps)] @@ -275,7 +280,7 @@ class Pool_Batch(HLSCustomOp): self.code_gen_dict["$DOCOMPUTE$"] += [ """Pool_batch<Channels, PE, KernelSize,Slice<{} >, Slice< {} > > - (in0,out, pool_fxn, OFMDim*OFMDim*numReps);""".format( + (in0,out, pool_fxn, OFMDimTotal*numReps);""".format( i_hls_dt, o_hls_dt ) ] @@ -322,8 +327,12 @@ class Pool_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) @@ -334,7 +343,6 @@ class Pool_Batch(HLSCustomOp): exp_ishape = self.get_normal_input_shape() folded_ishape = self.get_folded_input_shape() exp_oshape = self.get_normal_output_shape() - folded_oshape = self.get_folded_output_shape() # TODO ensure codegen dir exists if mode == "cppsim": @@ -368,9 +376,8 @@ class Pool_Batch(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim did not produce expected folded output shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + context[node.output[0]].shape == exp_oshape + ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index 1791706afa217d5eb453064547c1ea66b306d227..5fabef57be3675c38fcfd74c0db99f50d98340f4 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -76,24 +76,30 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): oshape = self.get_nodeattr("shape") return oshape + def check_divisible_iowidths(self): + impl_style = self.get_nodeattr("impl_style") + if impl_style == "hls": + # when using impl_style = hls must have the following + # if inWidth > outWidth: inWidth % outWidth = 0 + # if inWidth < outWidth: outWidth % inWidth = 0 + iwidth = self.get_nodeattr("inWidth") + owidth = self.get_nodeattr("outWidth") + if iwidth > owidth: + assert ( + iwidth % owidth == 0 + ), """DWC InWidth is bigger than OutWidth and is not divisible by it. + Please adjust PE and SIMD values so that InWidth % OutWidth = 0 + or alternatively use impl_style = vivado""" + else: + assert ( + owidth % iwidth == 0 + ), """DWC OutWidth is bigger than InWidth and is not divisible by it. + Please adjust PE and SIMD values so that OutWidth % InWidth = 0 + or alternatively use impl_style = vivado""" + def get_folded_input_shape(self): - # for correct functionality of the dwc node the - # following must apply: - # if inWidth > outWidth: inWidth % outWidth = 0 - # if inWidth < outWidth: outWidth % inWidth = 0 + self.check_divisible_iowidths() iwidth = self.get_nodeattr("inWidth") - owidth = self.get_nodeattr("outWidth") - if iwidth > owidth: - assert ( - iwidth % owidth == 0 - ), """InWidth is bigger than OutWidth and is not divisible by it. - Please adjust PE and SIMD values so that InWidth % OutWidth = 0""" - else: - assert ( - owidth % iwidth == 0 - ), """OutWidth is bigger than InWidth and is not divisible by it. - Please adjust PE and SIMD values so that OutWidth % InWidth = 0""" - ishape = self.get_normal_input_shape() dummy_t = np.random.randn(*ishape) ibits = self.get_input_datatype().bitwidth() @@ -112,23 +118,8 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): return dummy_t.shape def get_folded_output_shape(self): - # for correct functionality of the dwc node the - # following must apply: - # if inWidth > outWidth: inWidth % outWidth = 0 - # if inWidth < outWidth: outWidth % inWidth = 0 - iwidth = self.get_nodeattr("inWidth") + self.check_divisible_iowidths() owidth = self.get_nodeattr("outWidth") - if iwidth > owidth: - assert ( - iwidth % owidth == 0 - ), """InWidth is bigger than OutWidth and is not divisible by it. - Please adjust PE and SIMD values so that InWidth % OutWidth = 0""" - else: - assert ( - owidth % iwidth == 0 - ), """OutWidth is bigger than InWidth and is not divisible by it. - Please adjust PE and SIMD values so that OutWidth % InWidth = 0""" - oshape = self.get_normal_output_shape() dummy_t = np.random.randn(*oshape) obits = self.get_output_datatype().bitwidth() @@ -287,22 +278,29 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") + impl_style = self.get_nodeattr("impl_style") node = self.onnx_node exp_shape = self.get_normal_input_shape() folded_ishape = self.get_folded_input_shape() # TODO ensure codegen dir exists if mode == "cppsim": + assert impl_style == "hls", "DWC cppsim only possible when impl_style==hls" code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": + assert impl_style == "hls", "DWC rtlsim only possible when impl_style==hls" code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 91f6ed5b8d29fd72ea1fbb8a3da94cfc103af88e..ad5300eec1b0b74d9ae4bcc898983a5c429a660d 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -34,6 +34,7 @@ from shutil import copy from finn.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.util.basic import get_finn_root from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy from . import templates @@ -110,7 +111,7 @@ class StreamingFIFO(HLSCustomOp): ) os.makedirs(verilog_dir) # copy Q_srl.v from finn-rtllib to verilog directory - memstream_dir = "/workspace/finn/finn-rtllib/memstream/hdl/" + memstream_dir = get_finn_root() + "/finn-rtllib/memstream/hdl/" Q_file = os.path.join(memstream_dir, "Q_srl.v") copy(Q_file, verilog_dir) @@ -128,6 +129,7 @@ class StreamingFIFO(HLSCustomOp): self.code_gen_dict["$OUT_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$WIDTH$"] = [str(in_width)] self.code_gen_dict["$DEPTH$"] = [str(self.get_nodeattr("depth"))] + self.code_gen_dict["$HLS_SNAME$"] = [self.hls_sname()] template = self.strm_fifo_wrapper @@ -152,6 +154,7 @@ class StreamingFIFO(HLSCustomOp): # note: setting the root dir as absolute can cause path problems # the ipgen script will be invoked from the sources dir so root_dir=. is OK self.code_gen_dict["$VERILOG_DIR$"] = ["."] + self.code_gen_dict["$HLS_SNAME$"] = [self.hls_sname()] for key in self.code_gen_dict: # transform list into long string separated by '\n' code_gen_line = "\n".join(self.code_gen_dict[key]) diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py old mode 100644 new mode 100755 index 1e66a5c204cc62bb7620907f82fcd5b2072bc184..b9c2350c0c20035358780e90ddb6f2923d171af5 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -32,9 +32,12 @@ import warnings from finn.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp -from finn.custom_op.general.im2col import compute_conv_output_dim +from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy +# TODO: consider splitting this into separate implementations for 1D and 2D +# similar to what we do for ConvolutionInputGenerator + class StreamingMaxPool_Batch(HLSCustomOp): """Class that corresponds to finn-hlslib StreamingMaxPool_batch function.""" @@ -44,6 +47,10 @@ class StreamingMaxPool_Batch(HLSCustomOp): "ImgDim": ("ints", True, []), # [H, W] = [Y, X] "PoolDim": ("ints", True, []), # [H, W] = [Y, X] "NumChannels": ("i", True, 0), + # parallelism control - only supported for 1D maxpool + "PE": ("i", False, 0), + # round up (instead of down) output size - only supported for 1D maxpool + "CeilMode": ("i", False, 0), # FINN DataTypes for inputs/outputs "dataType": ("s", True, ""), } @@ -82,24 +89,30 @@ class StreamingMaxPool_Batch(HLSCustomOp): return ishape def get_folded_input_shape(self): - # even though there is no folding in the current hlslib op, - # insert a time multiplexing axis to remain compatible with the - # shapes produced by the rest of the dataflow pipeline - ret = list(self.get_normal_input_shape()) - ret.insert(-1, 1) - return tuple(ret) + ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") + ifm_ch = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + nf = int(ifm_ch / pe) + if self.is_1d(): + folded_ishape = (1, ifm_dim_h, ifm_dim_w, nf, pe) + else: + folded_ishape = (1, ifm_dim_h, ifm_dim_w, 1, ifm_ch) + return folded_ishape def get_normal_output_shape(self): ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") k_h, k_w = tuple(self.get_nodeattr("PoolDim")) ifm_ch = self.get_nodeattr("NumChannels") - stride_h = k_h - stride_w = k_w - pad = 0 - assert ifm_dim_h % k_h == 0, "StreamingMaxPool needs ImgDim_h % PoolDim_h == 0" - assert ifm_dim_w % k_w == 0, "StreamingMaxPool needs ImgDim_w % PoolDim_w == 0" - ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad) - ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad) + ceil_mode = self.get_nodeattr("CeilMode") + if not self.is_1d(): + assert ( + ifm_dim_h % k_h == 0 + ), "StreamingMaxPool needs ImgDim_h % PoolDim_h == 0" + assert ( + ifm_dim_w % k_w == 0 + ), "StreamingMaxPool needs ImgDim_w % PoolDim_w == 0" + ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, k_h, 0, ceil_mode) + ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, k_w, 0, ceil_mode) oshape = (1, ofm_dim_h, ofm_dim_w, ifm_ch) return oshape @@ -107,8 +120,15 @@ class StreamingMaxPool_Batch(HLSCustomOp): # even though there is no folding in the current hlslib op, # insert a time multiplexing axis to remain compatible with the # shapes produced by the rest of the dataflow pipeline + ifm_ch = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + nf = int(ifm_ch / pe) ret = list(self.get_normal_output_shape()) - ret.insert(-1, 1) + if self.is_1d(): + ret[-1] = nf + ret.append(pe) + else: + ret.insert(-1, 1) return tuple(ret) def get_number_output_values(self): @@ -118,20 +138,27 @@ class StreamingMaxPool_Batch(HLSCustomOp): def get_exp_cycles(self): # derived from StreamingMaxPool_Batch loop nest ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + _, _, ofm_dim_w, nf, _ = self.get_folded_output_shape() + if self.is_1d(): - return int(ifm_dim[1] + k[1]) + exp_cycles = ofm_dim_w * nf * (k[1] + 1) + return int(exp_cycles) else: # TODO: adjust inaccurate formula return int(ifm_dim[1] * (ifm_dim[1] + (ifm_dim[1] / k[1]))) def get_instream_width(self): dt_bits = self.get_input_datatype().bitwidth() + pe = self.get_nodeattr("PE") ifm_ch = self.get_nodeattr("NumChannels") - in_width = int(dt_bits * ifm_ch) + if self.is_1d(): + in_width = int(dt_bits * pe) + else: + in_width = int(dt_bits * ifm_ch) return in_width def get_outstream_width(self): - """For streaming maxpool out stream with is the same as in stream width""" + """For streaming maxpool out stream width is the same as in stream width""" return self.get_instream_width() def make_shape_compatible_op(self, model): @@ -176,18 +203,34 @@ class StreamingMaxPool_Batch(HLSCustomOp): self.code_gen_dict["$GLOBALS$"] = ['#include "maxpool.h"'] def defines(self, var): - numReps = 2 + numReps = 1 ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + ceil_mode = self.get_nodeattr("CeilMode") + output_size = compute_pool_output_dim(ifm_dim[1], k[1], k[1], 0, ceil_mode) - self.code_gen_dict["$DEFINES$"] = [ - """#define ImgDim {}\n #define PoolDim {}\n - #define NumChannels {}\n #define numReps {}""".format( - ifm_dim[1], - k[1], - self.get_nodeattr("NumChannels"), - numReps, - ) - ] + if self.is_1d(): + self.code_gen_dict["$DEFINES$"] = [ + """#define ImgDim {}\n #define PoolDim {}\n + #define NumChannels {}\n #define PE {}\n #define OutputSize {} + \n #define numReps {}""".format( + ifm_dim[1], + k[1], + self.get_nodeattr("NumChannels"), + self.get_nodeattr("PE"), + output_size, + numReps, + ) + ] + else: + self.code_gen_dict["$DEFINES$"] = [ + """#define ImgDim {}\n #define PoolDim {}\n + #define NumChannels {}\n #define numReps {}""".format( + ifm_dim[1], + k[1], + self.get_nodeattr("NumChannels"), + numReps, + ) + ] def read_npy_data(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -222,22 +265,27 @@ class StreamingMaxPool_Batch(HLSCustomOp): if self.is_1d(): raise Exception("Binary 1d MaxPool not implemented on HLS backend") else: - op = "StreamingMaxPool_Batch" + op = "StreamingMaxPool" self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op) + "%s<ImgDim, PoolDim, NumChannels>(in0, out);" % (op) ] else: - if self.is_1d(): - op = "StreamingMaxPool_Precision_Batch_1d" - else: - op = "StreamingMaxPool_Precision_Batch" dtype = self.get_input_datatype() dtype_hls = dtype.get_hls_datatype_str() minval_str = str(int(dtype.min())) - self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0, out, numReps);" - % (op, dtype_hls, minval_str) - ] + if self.is_1d(): + op = "StreamingMaxPool_Precision_1d" + self.code_gen_dict["$DOCOMPUTE$"] = [ + """%s<ImgDim, PoolDim, NumChannels, PE, + OutputSize, %s, %s>(in0, out);""" + % (op, dtype_hls, minval_str) + ] + else: + op = "StreamingMaxPool_Precision" + self.code_gen_dict["$DOCOMPUTE$"] = [ + "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0, out);" + % (op, dtype_hls, minval_str) + ] def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -278,8 +326,12 @@ class StreamingMaxPool_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) @@ -289,7 +341,7 @@ class StreamingMaxPool_Batch(HLSCustomOp): node = self.onnx_node exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() - folded_oshape = self.get_folded_output_shape() + folded_ishape = self.get_folded_input_shape() # TODO ensure codegen dir exists if mode == "cppsim": @@ -316,9 +368,8 @@ class StreamingMaxPool_Batch(HLSCustomOp): export_idt = DataType["BINARY"] else: export_idt = self.get_input_datatype() - # no reshaping for input since assuming no folding on input - # make copy before saving array - reshaped_input = inp.copy() + + reshaped_input = inp.reshape(folded_ishape) np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) if mode == "cppsim": @@ -327,10 +378,9 @@ class StreamingMaxPool_Batch(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape + context[node.output[0]].shape == exp_oshape ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -367,4 +417,4 @@ class StreamingMaxPool_Batch(HLSCustomOp): assert ( context[node.output[0]].shape == exp_oshape ), """Output - shape doesn't match expected shape (1, ofm_dim, ofm_dim, k*k*ifm_ch).""" + shape doesn't match expected shape (1, ofm_dim, ofm_dim, ifm_ch).""" diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index e253348598d72897c2a8f83f5bee04351eb43d32..e73fa9bb2872d4a5023afb0c4e6953b4e6866b8d 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -86,23 +86,21 @@ puts "HLS project: $config_proj_name" set config_hwsrcdir "$HWSRCDIR$" puts "HW source dir: $config_hwsrcdir" set config_proj_part "$FPGAPART$" - -set config_bnnlibdir "$FINNHLSLIBDIR$" -set config_customhlsdir "$FINNHLSCUSTOMDIR$" - +set config_bnnlibdir "$::env(FINN_ROOT)/deps/finn-hlslib" +puts "finn-hlslib dir: $config_bnnlibdir" +set config_customhlsdir "$::env(FINN_ROOT)/custom_hls" +puts "custom HLS dir: $config_customhlsdir" set config_toplevelfxn "$TOPFXN$" set config_clkperiod $CLKPERIOD$ open_project $config_proj_name -add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++0x -I$config_bnnlibdir -I$config_customhlsdir" +add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++14 -I$config_bnnlibdir -I$config_customhlsdir" set_top $config_toplevelfxn open_solution sol1 set_part $config_proj_part -config_compile -ignore_long_run_time -disable_unroll_code_size_check -config_interface -m_axi_addr64 -config_rtl -auto_prefix +$DEFAULT_DIRECTIVES$ $EXTRA_DIRECTIVES$ create_clock -period $config_clkperiod -name default @@ -116,22 +114,22 @@ decoupled_wrapper = """ module $TOPNAME$( ap_clk, ap_rst_n, -in0_V_V_TDATA, -in0_V_V_TVALID, -in0_V_V_TREADY, -out_V_V_TDATA, -out_V_V_TVALID, -out_V_V_TREADY +in0_$HLS_SNAME$_TDATA, +in0_$HLS_SNAME$_TVALID, +in0_$HLS_SNAME$_TREADY, +out_$HLS_SNAME$_TDATA, +out_$HLS_SNAME$_TVALID, +out_$HLS_SNAME$_TREADY ); input ap_clk; input ap_rst_n; -input $IN_RANGE$ in0_V_V_TDATA; -input in0_V_V_TVALID; -output in0_V_V_TREADY; -output $OUT_RANGE$ out_V_V_TDATA; -output out_V_V_TVALID; -input out_V_V_TREADY; +input $IN_RANGE$ in0_$HLS_SNAME$_TDATA; +input in0_$HLS_SNAME$_TVALID; +output in0_$HLS_SNAME$_TREADY; +output $OUT_RANGE$ out_$HLS_SNAME$_TDATA; +output out_$HLS_SNAME$_TVALID; +input out_$HLS_SNAME$_TREADY; reg [31:0] config_address = 0; reg config_ce = 0; @@ -198,15 +196,15 @@ MVA_Stream_U ( .ap_clk(ap_clk), //input .ap_rst_n(ap_rst_n), //input -.in0_V_V_TDATA(in0_V_V_TDATA), //$IN_RANGE$ input -.in0_V_V_TVALID(in0_V_V_TVALID), //input -.in0_V_V_TREADY(in0_V_V_TREADY), //output -.weights_V_V_TDATA(m_axis_0_tdata), //$WEIGHT_RANGE$ input -.weights_V_V_TVALID(m_axis_0_tvalid), //input -.weights_V_V_TREADY(m_axis_0_tready), //output -.out_V_V_TDATA(out_V_V_TDATA), //$OUT_RANGE$ output -.out_V_V_TVALID(out_V_V_TVALID), //output -.out_V_V_TREADY(out_V_V_TREADY) //input +.in0_$HLS_SNAME$_TDATA(in0_$HLS_SNAME$_TDATA), //$IN_RANGE$ input +.in0_$HLS_SNAME$_TVALID(in0_$HLS_SNAME$_TVALID), //input +.in0_$HLS_SNAME$_TREADY(in0_$HLS_SNAME$_TREADY), //output +.weights_$HLS_SNAME$_TDATA(m_axis_0_tdata), //$WEIGHT_RANGE$ input +.weights_$HLS_SNAME$_TVALID(m_axis_0_tvalid), //input +.weights_$HLS_SNAME$_TREADY(m_axis_0_tready), //output +.out_$HLS_SNAME$_TDATA(out_$HLS_SNAME$_TDATA), //$OUT_RANGE$ output +.out_$HLS_SNAME$_TVALID(out_$HLS_SNAME$_TVALID), //output +.out_$HLS_SNAME$_TREADY(out_$HLS_SNAME$_TREADY) //input ); endmodule @@ -248,6 +246,8 @@ set_property supported_families { \ kintex7l Production \ kintexu Production \ kintexuplus Production \ + versal Production \ + versalprime Production \ virtex7 Production \ virtexu Production \ virtexuplus Production \ @@ -301,10 +301,10 @@ ipx::add_ports_from_hdl \ ## Infer interfaces ipx::infer_bus_interface ap_clk xilinx.com:signal:clock_rtl:1.0 [ipx::current_core] ipx::infer_bus_interface ap_rst_n xilinx.com:signal:reset_rtl:1.0 [ipx::current_core] -ipx::infer_bus_interface {in0_V_V_TDATA in0_V_V_TVALID in0_V_V_TREADY} xilinx.com:interface:axis_rtl:1.0 [ipx::current_core] -ipx::infer_bus_interface {out_V_V_TREADY out_V_V_TDATA out_V_V_TVALID} xilinx.com:interface:axis_rtl:1.0 [ipx::current_core] -ipx::associate_bus_interfaces -busif in0_V_V -clock ap_clk [ipx::current_core] -ipx::associate_bus_interfaces -busif out_V_V -clock ap_clk [ipx::current_core] +ipx::infer_bus_interface {in0_$HLS_SNAME$_TDATA in0_$HLS_SNAME$_TVALID in0_$HLS_SNAME$_TREADY} xilinx.com:interface:axis_rtl:1.0 [ipx::current_core] +ipx::infer_bus_interface {out_$HLS_SNAME$_TREADY out_$HLS_SNAME$_TDATA out_$HLS_SNAME$_TVALID} xilinx.com:interface:axis_rtl:1.0 [ipx::current_core] +ipx::associate_bus_interfaces -busif in0_$HLS_SNAME$ -clock ap_clk [ipx::current_core] +ipx::associate_bus_interfaces -busif out_$HLS_SNAME$ -clock ap_clk [ipx::current_core] ## Finalize set_property core_revision 2 [ipx::current_core] @@ -319,23 +319,23 @@ module $TOPNAME$( ap_clk, ap_rst_n, count, -in0_V_V_TDATA, -in0_V_V_TVALID, -in0_V_V_TREADY, -out_V_V_TDATA, -out_V_V_TVALID, -out_V_V_TREADY +in0_$HLS_SNAME$_TDATA, +in0_$HLS_SNAME$_TVALID, +in0_$HLS_SNAME$_TREADY, +out_$HLS_SNAME$_TDATA, +out_$HLS_SNAME$_TVALID, +out_$HLS_SNAME$_TREADY ); input ap_clk; input ap_rst_n; output $COUNT_RANGE$ count; -input $IN_RANGE$ in0_V_V_TDATA; -input in0_V_V_TVALID; -output in0_V_V_TREADY; -output $OUT_RANGE$ out_V_V_TDATA; -output out_V_V_TVALID; -input out_V_V_TREADY; +input $IN_RANGE$ in0_$HLS_SNAME$_TDATA; +input in0_$HLS_SNAME$_TVALID; +output in0_$HLS_SNAME$_TREADY; +output $OUT_RANGE$ out_$HLS_SNAME$_TDATA; +output out_$HLS_SNAME$_TVALID; +input out_$HLS_SNAME$_TREADY; Q_srl #( .depth($DEPTH$), @@ -346,12 +346,12 @@ $LAYER_NAME$ .clock(ap_clk), .reset(!ap_rst_n), .count(count), - .i_d(in0_V_V_TDATA), - .i_v(in0_V_V_TVALID), - .i_r(in0_V_V_TREADY), - .o_d(out_V_V_TDATA), - .o_v(out_V_V_TVALID), - .o_r(out_V_V_TREADY) + .i_d(in0_$HLS_SNAME$_TDATA), + .i_v(in0_$HLS_SNAME$_TVALID), + .i_r(in0_$HLS_SNAME$_TREADY), + .o_d(out_$HLS_SNAME$_TDATA), + .o_v(out_$HLS_SNAME$_TVALID), + .o_r(out_$HLS_SNAME$_TREADY) ); endmodule diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index 610139f44ee7e8be1320b47c99222667fa6ed850..3acfc7d8b004733131ee997f69aa4ac2aac88577 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -389,7 +389,7 @@ class Thresholding_Batch(HLSCustomOp): tdt_hls, odt_hls, self.get_nodeattr("ActVal"), - "comp::less_equal<%s>" % tdt_hls, + "comp::less_equal<%s, %s>" % (tdt_hls, tdt_hls), ) ) f_thresh.write(thresholds_hls_code) @@ -465,9 +465,26 @@ class Thresholding_Batch(HLSCustomOp): weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir) self.make_weight_file(thresholds, "decoupled_npy", weight_filename_sim) # also save weights as Verilog .dat file - weight_filename_rtl = "{}/memblock_0.dat".format(code_gen_dir) + # note that we provide two different .dat files, one for synth + # and one for synthesis. this is because URAM-based weights always + # need zero weights for synthesis, otherwise they get inferred + # as BRAM + weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format(code_gen_dir) + weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir) + # sim weights are always the true weights self.make_weight_file( - thresholds, "decoupled_verilog_dat", weight_filename_rtl + thresholds, "decoupled_verilog_dat", weight_filename_rtl_sim + ) + ram_style = self.get_nodeattr("ram_style") + if ram_style == "ultra": + # UltraRAM must have no memory initializer, or only zeroes + # otherwise BRAM will be inferred instead of URAM + # as a workaround we provide a zero-weight init here + synth_thresholds = np.zeros_like(thresholds) + else: + synth_thresholds = thresholds + self.make_weight_file( + synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth ) else: raise Exception("Unrecognized mem_mode") @@ -528,12 +545,10 @@ class Thresholding_Batch(HLSCustomOp): out = context[node.output[0]] out = 2 * out - 1 context[node.output[0]] = out + oshape = self.get_normal_output_shape() assert ( - context[node.output[0]].shape == self.get_folded_output_shape() + context[node.output[0]].shape == oshape ), """Output shape is not as expected""" - # reshape output to have expected shape - oshape = self.get_normal_output_shape() - context[node.output[0]] = context[node.output[0]].reshape(*oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -589,7 +604,7 @@ class Thresholding_Batch(HLSCustomOp): # TODO check and add whatever missing def defines(self, var): numInputVectors = list(self.get_nodeattr("numInputVectors")) - numReps = numInputVectors[0] + numReps = int(np.prod(numInputVectors)) self.code_gen_dict["$DEFINES$"] = [ """#define NumChannels1 {}\n #define PE1 {}\n #define numReps {}""".format( self.get_nodeattr("NumChannels"), @@ -660,34 +675,28 @@ class Thresholding_Batch(HLSCustomOp): # TODO: why put some template parameters into defines and not others? # should ImgDim be defined or just filled in here like we do now? node = self.onnx_node - ishape = self.get_folded_input_shape() - if len(ishape) == 3: - imgdimh = 1 - imgdimw = 1 - elif len(ishape) == 5: - imgdimh = ishape[1] - imgdimw = ishape[2] - else: - raise Exception("""Unexpected input shape""") + inp_vecs = self.get_nodeattr("numInputVectors") + total_spatial_size = int(np.prod(inp_vecs)) mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "const": self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, {}, NumChannels1, PE1, {}, {}> + """{}<{}, NumChannels1, PE1, {}, {}> (in0, out, threshs, numReps);""".format( node.op_type, - imgdimh, - imgdimw, + total_spatial_size, tmpl_args["TSrcI"], tmpl_args["TDstI"], ) ] elif mem_mode == "decoupled": + # note that numReps is set to 1 in the invocation below, since + # - for cppsim the repetition comes from the threshold stream reader+input + # - for synth the unit runs continuously anyway (ap_ctrl_none) self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, {}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> - (in0, out, weights, numReps);""".format( + """{}<{}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> + (in0, out, weights, 1);""".format( "Thresholding_Stream_Batch", - imgdimh, - imgdimw, + total_spatial_size, tmpl_args["TSrcI"], tmpl_args["TDstI"], ) @@ -753,8 +762,12 @@ class Thresholding_Batch(HLSCustomOp): raise Exception("Unrecognized mem_mode") def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) @@ -805,7 +818,8 @@ class Thresholding_Batch(HLSCustomOp): ) elif self.get_nodeattr("mem_mode") == "decoupled": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=weights" + "#pragma HLS INTERFACE axis port=weights name=weights_" + + self.hls_sname() ) def code_generation_ipi(self): @@ -815,6 +829,7 @@ class Thresholding_Batch(HLSCustomOp): if mem_mode == "decoupled": node_name = self.onnx_node.name runtime_writable = self.get_nodeattr("runtime_writeable_weights") == 1 + sname = self.hls_sname() # create a hierarchy for this layer, with the same port names clk_name = self.get_verilog_top_module_intf_names()["clk"][0] rst_name = self.get_verilog_top_module_intf_names()["rst"][0] @@ -868,8 +883,8 @@ class Thresholding_Batch(HLSCustomOp): ) cmd.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s/m_axis_0] " - "[get_bd_intf_pins %s/%s/weights_V_V]" - % (node_name, strm_inst, node_name, node_name) + "[get_bd_intf_pins %s/%s/weights_%s]" + % (node_name, strm_inst, node_name, node_name, sname) ) cmd.append( "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/%s/aresetn]" @@ -940,3 +955,8 @@ class Thresholding_Batch(HLSCustomOp): thres_count = out_features * num_steps ret_dict[thres_param_type] = thres_count return ret_dict + + def ipgen_extra_directives(self): + "Return a list of extra tcl directives for HLS synthesis." + + return ["config_compile -pipeline_style frp"] diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 70edaee9cfc0662411d005325e781f13b4f1b510..7386aa7e6311754b653e94f8d2e9b2a910a1370b 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -198,8 +198,12 @@ class TLastMarker(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) dyn_iters = self.get_nodeattr("DynIters") if dyn_iters == 1: @@ -244,12 +248,9 @@ class TLastMarker(HLSCustomOp): def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() stream_width = self.get_nodeattr("StreamWidth") - if self.get_nodeattr("Direction") == "in": - intf_names["s_axis"] = [("in0", stream_width)] - intf_names["m_axis"] = [("out_V_V", stream_width)] - else: - intf_names["s_axis"] = [("in0_V_V", stream_width)] - intf_names["m_axis"] = [("out_r", stream_width)] + sname = self.hls_sname() + intf_names["s_axis"] = [("in0_" + sname, stream_width)] + intf_names["m_axis"] = [("out_" + sname, stream_width)] if self.get_nodeattr("DynIters") == 1: intf_names["axilite"] = ["s_axi_control"] return intf_names diff --git a/src/finn/custom_op/fpgadataflow/upsampler.py b/src/finn/custom_op/fpgadataflow/upsampler.py index 7114cd83ed08b53eab2cfe38d98d84944d537168..221725d49440653c5e56287f0d910848ec0b24c5 100644 --- a/src/finn/custom_op/fpgadataflow/upsampler.py +++ b/src/finn/custom_op/fpgadataflow/upsampler.py @@ -231,8 +231,12 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py similarity index 96% rename from src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py rename to src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index e0f789a8883aad83ed8c8b37a16392308bc720cc..f1f3f5b5027678982e5b79b05b1dc47e90a69e3d 100644 --- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -45,7 +45,7 @@ from finn.util.data_packing import ( ) -class Vector_Vector_Activate_Batch(HLSCustomOp): +class VectorVectorActivation(HLSCustomOp): """Class that corresponds to finn-hlslib Vector_Vector_Activate_Batch function""" def __init__(self, onnx_node): @@ -379,7 +379,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): tdt_hls, odt_hls, self.get_nodeattr("ActVal"), - "comp::less_equal<%s>" % tdt_hls, + "comp::less_equal<%s, %s>" % (tdt_hls, tdt_hls), ) ) f_thresh.write(thresholds_hls_code) @@ -422,9 +422,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): reshaped_input, ) elif in_ind > 2: - raise Exception( - "Unexpected input found for Vector_Vector_Activate_Unit" - ) + raise Exception("Unexpected input found for VectorVectorActivation") in_ind += 1 if mode == "cppsim": @@ -433,11 +431,8 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == self.get_folded_output_shape() - ), """Output shape is not as expected""" - # reshape output to have expected shape - oshape = self.get_normal_output_shape() - context[node.output[0]] = context[node.output[0]].reshape(*oshape) + context[node.output[0]].shape == self.get_normal_output_shape() + ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -526,11 +521,9 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): threshs = "PassThroughActivation<%s>()" % odtype_hls_str else: threshs = "threshs" - node = self.onnx_node self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<Channels1, InnerProdDim, SIMD1, PE1, 1, {}, {}, {}> + """Vector_Vector_Activate_Batch<Channels1, InnerProdDim, SIMD1, PE1, 1, {}, {}, {}> (in0, out, weights, {}, numReps, {});""".format( - node.op_type, tmpl_args["TSrcI"], tmpl_args["TDstI"], tmpl_args["TWeightI"], @@ -579,8 +572,12 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): ] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + ) in_fifo_depth = self.get_nodeattr("inFIFODepth") out_fifo_depth = self.get_nodeattr("outFIFODepth") # insert depth pragmas only if specified @@ -694,8 +691,8 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): if noact == 0: odt = self.get_output_datatype() B = odt.bitwidth() - thr_luts = (2 ** B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64) - comp_luts = (2 ** B - 1) * acc_bits + thr_luts = (2**B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64) + comp_luts = (2**B - 1) * acc_bits return int(c0 + c1 * (P * (mult_luts + acc_luts + thr_luts + comp_luts)) + c2) diff --git a/src/finn/qnn-data/build_dataflow/folding_config.json b/src/finn/qnn-data/build_dataflow/folding_config.json index 1fbe289608f68c296c4d86fd0dbe4a07e3d70277..95167f1a306f1edefc9deb460413b16768dc96d5 100644 --- a/src/finn/qnn-data/build_dataflow/folding_config.json +++ b/src/finn/qnn-data/build_dataflow/folding_config.json @@ -4,22 +4,22 @@ "PE": 49, "ram_style": "distributed" }, - "StreamingFCLayer_Batch_0": { + "MatrixVectorActivation_0": { "PE": 16, "SIMD": 49, "ram_style": "block" }, - "StreamingFCLayer_Batch_1": { + "MatrixVectorActivation_1": { "PE": 8, "SIMD": 8, "ram_style": "auto" }, - "StreamingFCLayer_Batch_2": { + "MatrixVectorActivation_2": { "PE": 8, "SIMD": 8, "ram_style": "auto" }, - "StreamingFCLayer_Batch_3": { + "MatrixVectorActivation_3": { "PE": 10, "SIMD": 8, "ram_style": "distributed" diff --git a/src/finn/qnn-data/mdd-data/finn_design.mdd b/src/finn/qnn-data/mdd-data/finn_design.mdd new file mode 100644 index 0000000000000000000000000000000000000000..517180fa94079ad3e04d3a45776f165fd82cc483 --- /dev/null +++ b/src/finn/qnn-data/mdd-data/finn_design.mdd @@ -0,0 +1,36 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Advanced Micro Devices nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +OPTION psf_version = 2.1; + +BEGIN driver finn_design + OPTION supported_peripherals = (finn_design); + OPTION driver_state = ACTIVE; + OPTION VERSION = 1.0; + OPTION NAME = finn_design; +END driver diff --git a/src/finn/qnn-data/mdd-data/finn_design.tcl b/src/finn/qnn-data/mdd-data/finn_design.tcl new file mode 100644 index 0000000000000000000000000000000000000000..b8c55e12b22a2152157cbecd2b0b4bf061e9918a --- /dev/null +++ b/src/finn/qnn-data/mdd-data/finn_design.tcl @@ -0,0 +1,58 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Advanced Micro Devices nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# @brief Address range defines for FINN IP. +# @author Thomas B. Preußer <thomas.preusser@amd.com> +## + +proc generate {drv_handle} { + # Bounds of all exposed slave address ranges to xparameters.h + set file_handle [hsi::utils::open_include_file "xparameters.h"] + generate_memrange_parameters $drv_handle $file_handle + close $file_handle +} + +proc generate_memrange_parameters {drv_handle file_handle} { + # Collect unique slave interfaces to custom module + array unset ranges + foreach mem_range [hsi::get_mem_ranges -of_object [hsi::get_cells -hier [hsi::get_sw_processor]] $drv_handle] { + set ranges([common::get_property SLAVE_INTERFACE $mem_range]) [list \ + [common::get_property BASE_NAME $mem_range] \ + [common::get_property BASE_VALUE $mem_range] \ + [common::get_property HIGH_NAME $mem_range] \ + [common::get_property HIGH_VALUE $mem_range] \ + ] + } + + # Produce defines for the address range bounds + set prefix "XPAR_[string toupper $drv_handle]" + foreach {key val} [array get ranges] { + puts $file_handle "#define [format "%s_%s_%s" $prefix $key [lindex $val 0]] [lindex $val 1]" + puts $file_handle "#define [format "%s_%s_%s" $prefix $key [lindex $val 2]] [lindex $val 3]" + } + puts $file_handle "" +} diff --git a/src/finn/qnn-data/test_ext_weights/tfc-w1a1-extw.json b/src/finn/qnn-data/test_ext_weights/tfc-w1a1-extw.json index 299a8be815aeaba70c0f41e4b1b3252b77c6f042..442ea72d9a5877c60a25c15b296787e4ac04ce1b 100644 --- a/src/finn/qnn-data/test_ext_weights/tfc-w1a1-extw.json +++ b/src/finn/qnn-data/test_ext_weights/tfc-w1a1-extw.json @@ -4,22 +4,22 @@ "PE": 49, "ram_style": "distributed" }, - "StreamingFCLayer_Batch_0": { + "MatrixVectorActivation_0": { "PE": 16, "SIMD": 49, "ram_style": "block" }, - "StreamingFCLayer_Batch_1": { + "MatrixVectorActivation_1": { "PE": 8, "SIMD": 8, "mem_mode": "external" }, - "StreamingFCLayer_Batch_2": { + "MatrixVectorActivation_2": { "PE": 8, "SIMD": 8, "mem_mode": "external" }, - "StreamingFCLayer_Batch_3": { + "MatrixVectorActivation_3": { "PE": 10, "SIMD": 8, "ram_style": "distributed" diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index cd08bb46032ecb86861f26025bb48f26e8b98230..e3faa03ace5dc856e9571773c2b5a907f794fa89 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -51,7 +51,6 @@ class InferConvInpGen(Transformation): def __init__(self, use_rtl_variant=False): super().__init__() self.use_rtl_variant = use_rtl_variant - self.use_rtl_variant = True #testing def apply(self, model): graph = model.graph @@ -225,15 +224,15 @@ class InferConvInpGen(Transformation): depthwise=depthwise, name="ConvolutionInputGenerator_" + n.name, ) - else: # non-square images and/or kernels + else: # 1D images and/or kernels assert is_1d_convolution, ( "%s: ConvolutionInputGenerator1D works only for 1D convs" % n.name ) if dilation_h > 1 or dilation_w > 1: - assert stride_h == 1 and stride_w == 1, ( - """%s: Stride value of greater than 1 is not supported for convolutions - with dilation value greater than 1""" + assert depthwise == 1, ( + """%s: Dilation value > 1 is only supported for + 1D depthwise separable convolutions""" % n.name ) ConvInpGen_node = helper.make_node( @@ -367,20 +366,27 @@ class InferStreamingMaxPool(Transformation): graph = model.graph node_ind = 0 graph_modified = False - for n in graph.node: + for node in graph.node: node_ind += 1 - if n.op_type == "MaxPoolNHWC": - mp_input = n.input[0] - mp_output = n.output[0] + if node.op_type == "MaxPoolNHWC": + mp_input = node.input[0] + mp_output = node.output[0] mp_in_shape = model.get_tensor_shape(mp_input) # mp_out_shape = model.get_tensor_shape(mp_output) dt = model.get_tensor_datatype(mp_input) - mp_inst = getCustomOp(n) + mp_inst = getCustomOp(node) k_h, k_w = mp_inst.get_nodeattr("kernel_shape") ifm_ch = mp_in_shape[-1] ifm_dim_h = mp_in_shape[1] ifm_dim_w = mp_in_shape[2] - if ifm_dim_h % k_h == 0 and ifm_dim_w % k_w == 0: + pe = 1 + ceil_mode = mp_inst.get_nodeattr("ceil_mode") + is_1d = (ifm_dim_h == 1 and k_h == 1) or (ifm_dim_w == 1 and k_w == 1) + is_divisable = (ifm_dim_h % k_h == 0) or (ifm_dim_w % k_w == 0) + is_bipolar = dt == DataType["BIPOLAR"] + pass_1d = is_1d and (not is_bipolar) + pass_2d = (not is_1d) and is_divisable + if pass_1d or pass_2d: # create equivalent StreamingMaxPool_Batch node new_node = helper.make_node( "StreamingMaxPool_Batch", @@ -392,12 +398,16 @@ class InferStreamingMaxPool(Transformation): NumChannels=ifm_ch, ImgDim=(ifm_dim_h, ifm_dim_w), dataType=dt.name, - name="StreamingMaxPool_Batch_" + n.name, + PE=pe, + CeilMode=ceil_mode, + name="StreamingMaxPool_Batch_" + node.name, ) graph.node.insert(node_ind, new_node) # remove old nodes - graph.node.remove(n) + graph.node.remove(node) graph_modified = True + else: + warnings.warn(node.name + ": could not convert to HLS") if graph_modified: model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) @@ -413,62 +423,57 @@ class InferPool_Batch(Transformation): graph = model.graph node_ind = 0 graph_modified = False - for n in graph.node: + for node in graph.node: node_ind += 1 - if n.op_type in ["MaxPool", "QuantAvgPool2d", "MaxPoolNHWC"]: - # extract pool parameters + if node.op_type in ["MaxPool", "QuantAvgPool2d", "MaxPoolNHWC"]: + node_input = node.input[0] + ishape = model.get_tensor_shape(node_input) + node_output = node.output[0] + idt = model.get_tensor_datatype(node_input) + oshape = model.get_tensor_shape(node_output) + # only support 4D input tensors (1D convs need extra dummy dim) + if len(ishape) != 4: + continue - if n.op_type == "MaxPool": - k = get_by_name(n.attribute, "kernel_shape").ints[-1] - stride = get_by_name(n.attribute, "strides").ints[-1] - # assumed datalayout + # extract pool parameters + if node.op_type == "MaxPool": + kh, kw = list(get_by_name(node.attribute, "kernel_shape").ints) + sh, sw = list(get_by_name(node.attribute, "strides").ints) dlayout = "NCHW" - elif n.op_type == "QuantAvgPool2d": - inst = getCustomOp(n) - k = inst.get_nodeattr("kernel") - stride = inst.get_nodeattr("stride") + elif node.op_type == "QuantAvgPool2d": + inst = getCustomOp(node) + # QuantAvgPool2d has a single scalar attribute + # for kernel size and stride (implicit square) + kh = kw = inst.get_nodeattr("kernel") + sh = sw = inst.get_nodeattr("stride") dlayout = inst.get_nodeattr("data_layout") - elif n.op_type == "MaxPoolNHWC": - inst = getCustomOp(n) - k_shape = inst.get_nodeattr("kernel_shape") - strides = inst.get_nodeattr("strides") - assert k_shape[0] == k_shape[1] - assert strides[0] == strides[1] - k = k_shape[0] - stride = strides[0] + elif node.op_type == "MaxPoolNHWC": + inst = getCustomOp(node) + kh, kw = inst.get_nodeattr("kernel_shape") + sh, sw = inst.get_nodeattr("strides") dlayout = "NHWC" try: - pad = get_by_name(n.attribute, "pads").ints[-1] + pad = list(get_by_name(node.attribute, "pads").ints) except AttributeError: - pad = 0 - - node_input = n.input[0] - node_output = n.output[0] - idt = model.get_tensor_datatype(node_input) + pad = [0, 0, 0, 0] if not idt.is_integer(): continue - if k < stride: + if (kh < sh) or (kw < sw): + # TODO check/implement swg support continue - elif k == stride: - warnings.warn( - n.name - + """: Inferring Pool_Batch node for k == stride. - This case can be optimized. - For example, for MaxPool run InferStreamingMaxPool before - InferPool_Batch """ - ) odt = model.get_tensor_datatype(node_output) if dlayout == "NCHW": - ifm_ch = model.get_tensor_shape(n.input[0])[1] + _, ifm_ch, ifm_h, ifm_w = ishape + _, ofm_ch, ofm_h, ofm_w = oshape + elif dlayout == "NHWC": + _, ifm_h, ifm_w, ifm_ch = ishape + _, ofm_h, ofm_w, ofm_ch = oshape else: - ifm_ch = model.get_tensor_shape(n.input[0])[-1] - ofm_ch = ifm_ch - ifm_dim = model.get_tensor_shape(n.input[0])[-2] - ofm_dim = model.get_tensor_shape(n.output[0])[-2] + raise Exception("Unknown dlayout: " + str(dlayout)) # if data layout NCHW, we need transpose nodes surrounding # the hls layer @@ -477,7 +482,7 @@ class InferPool_Batch(Transformation): inp_trans_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ifm_dim, ifm_dim, ifm_ch), # NHWC + (1, ifm_h, ifm_w, ifm_ch), # NHWC ) graph.value_info.append(inp_trans_out) inp_trans_out = inp_trans_out.name @@ -486,7 +491,7 @@ class InferPool_Batch(Transformation): pool_output = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ofm_ch), + (1, ofm_h, ofm_w, ofm_ch), ) graph.value_info.append(pool_output) pool_output = pool_output.name @@ -495,7 +500,7 @@ class InferPool_Batch(Transformation): im2col_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ifm_ch * k * k), + (1, ofm_h, ofm_w, ifm_ch * kh * kw), ) graph.value_info.append(im2col_out) im2col_out = im2col_out.name @@ -513,24 +518,28 @@ class InferPool_Batch(Transformation): pool_output = node_output accum_bits = 0 - pool_size_param = k + pool_size_param = 0 # will be overridden if neededs pad_value = 0 - if n.op_type in ["MaxPool", "MaxPoolNHWC"]: + if node.op_type in ["MaxPool", "MaxPoolNHWC"]: pool_fxn = "MaxPool" odt = idt pad_value = idt.min() - elif n.op_type == "QuantAvgPool2d": + elif node.op_type == "QuantAvgPool2d": assert odt.is_integer(), """Output data type for QuantAvgPool2d needs to be integer""" - assert pad == 0, "Padding is not supported for QuantAvgPool2d" - inst = getCustomOp(n) + assert all( + x == 0 for x in pad + ), "Padding is not supported for QuantAvgPool2d" + inst = getCustomOp(node) pool_fxn = "QuantAvgPool" pool_size_param = inst.get_shifts() accum_bits = inst.get_accum_size() else: raise Exception( - "pad_value and pool_fxn not configured for {}".format(n.op_type) + "pad_value and pool_fxn not configured for {}".format( + node.op_type + ) ) # format input tensor @@ -539,13 +548,13 @@ class InferPool_Batch(Transformation): [im2col_in], [im2col_out], domain="finn.custom_op.general", - stride=[stride, stride], - kernel_size=[k, k], - pad_amount=[pad, pad, pad, pad], + stride=[sh, sw], + kernel_size=[kh, kw], + pad_amount=pad, pad_value=pad_value, depthwise=1, - input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), - name="Im2Col_" + n.name, + input_shape="(1,{},{},{})".format(ifm_h, ifm_w, ifm_ch), + name="Im2Col_" + node.name, ) # Warning PE has to be equal to ifm_ch until Im2Col is replaced by @@ -562,13 +571,13 @@ class InferPool_Batch(Transformation): OutputDataType=odt.name, Channels=ifm_ch, PE=ifm_ch, - KernelSize=k, + KernelSize=[kh, kw], Function=pool_fxn, - OutImgDim=ofm_dim, + OutImgDims=[ofm_h, ofm_w], AccumBits=accum_bits, Size=pool_size_param, BatchSize=1, - name="Pool_Batch_" + n.name, + name="Pool_Batch_" + node.name, ) if dlayout == "NCHW": @@ -587,7 +596,7 @@ class InferPool_Batch(Transformation): graph.node.insert(node_ind, im2col_node) graph.node.insert(node_ind + 1, pool_node) # remove old node - graph.node.remove(n) + graph.node.remove(node) graph_modified = True if graph_modified: @@ -596,9 +605,9 @@ class InferPool_Batch(Transformation): return (model, graph_modified) -class InferBinaryStreamingFCLayer(Transformation): +class InferBinaryMatrixVectorActivation(Transformation): """Convert XnorPopcountMatMul layers to - StreamingFCLayer_Batch layers. Any immediately following MultiThreshold + MatrixVectorActivation layers. Any immediately following MultiThreshold layers will also be absorbed into the MVTU.""" def __init__(self, mem_mode="const"): @@ -668,9 +677,9 @@ class InferBinaryStreamingFCLayer(Transformation): actval = odt.min() model.set_tensor_shape(mm_input, mm_in_shape) model.set_tensor_shape(mt_output, mt_out_shape) - # create and insert new StreamingFCLayer node + # create and insert new MatrixVectorActivation node new_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", [mm_input, mm_weight, mt_thres], [mt_output], domain="finn.custom_op.fpgadataflow", @@ -699,9 +708,9 @@ class InferBinaryStreamingFCLayer(Transformation): odt = model.get_tensor_datatype(mm_output) model.set_tensor_shape(mm_input, mm_in_shape) model.set_tensor_shape(mm_output, mm_out_shape) - # create and insert new StreamingFCLayer node + # create and insert new MatrixVectorActivation node new_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", [mm_input, mm_weight], [mm_output], domain="finn.custom_op.fpgadataflow", @@ -731,9 +740,9 @@ class InferBinaryStreamingFCLayer(Transformation): return (model, graph_modified) -class InferQuantizedStreamingFCLayer(Transformation): +class InferQuantizedMatrixVectorActivation(Transformation): """Convert MatMul layers with quantized inputs and weights to - StreamingFCLayer_Batch layers. Any immediately following MultiThreshold + MatrixVectorActivation layers. Any immediately following MultiThreshold layers will also be absorbed into the MVTU.""" def __init__(self, mem_mode="const"): @@ -811,9 +820,9 @@ class InferQuantizedStreamingFCLayer(Transformation): # remove bias for bipolar, since # binary->bipolar is achieved by reinterpretation actval = 0 - # create and insert new StreamingFCLayer node + # create and insert new MatrixVectorActivation node new_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", [mm_input, mm_weight, mt_thres], [mt_output], domain="finn.custom_op.fpgadataflow", @@ -830,7 +839,7 @@ class InferQuantizedStreamingFCLayer(Transformation): noActivation=0, numInputVectors=list(mm_in_shape[:-1]), mem_mode=self.mem_mode, - name="StreamingFCLayer_Batch_" + n.name, + name="MatrixVectorActivation_" + n.name, ) graph.node.insert(node_ind, new_node) # remove old nodes @@ -842,9 +851,9 @@ class InferQuantizedStreamingFCLayer(Transformation): odt = model.get_tensor_datatype(mm_output) model.set_tensor_shape(mm_input, mm_in_shape) model.set_tensor_shape(mm_output, mm_out_shape) - # create and insert new StreamingFCLayer node + # create and insert new MatrixVectorActivation node new_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", [mm_input, mm_weight], [mm_output], domain="finn.custom_op.fpgadataflow", @@ -861,7 +870,7 @@ class InferQuantizedStreamingFCLayer(Transformation): noActivation=1, numInputVectors=list(mm_in_shape[:-1]), mem_mode=self.mem_mode, - name="StreamingFCLayer_Batch_" + n.name, + name="MatrixVectorActivation_" + n.name, ) graph.node.insert(node_ind, new_node) # remove old node @@ -874,9 +883,9 @@ class InferQuantizedStreamingFCLayer(Transformation): return (model, graph_modified) -class InferVVAU(Transformation): +class InferVectorVectorActivation(Transformation): """Convert MatMul layers with quantized inputs and weights to - Vector_Vector_Activate_Batch layers, if the sparsity annotation + VectorVectorActivation layers, if the sparsity annotation of the weight matrix indicates that the MatMul layer belongs to a depthwise convolution. Any immediately following MultiThreshold layers will also be absorbed into the VVAU.""" @@ -963,9 +972,9 @@ class InferVVAU(Transformation): ) model.set_tensor_shape(mm_input, mm_in_shape) model.set_tensor_shape(mt_output, mt_out_shape) - # create and insert new Vector_Vector_Activate_Batch node + # create and insert new VectorVectorActivation node new_node = helper.make_node( - "Vector_Vector_Activate_Batch", + "VectorVectorActivation", [mm_input, mm_weight, mt_thres], [mt_output], domain="finn.custom_op.fpgadataflow", @@ -980,7 +989,7 @@ class InferVVAU(Transformation): outputDataType=odt.name, ActVal=actval, noActivation=0, - name="Vector_Vector_Activate_Batch_" + n.name, + name="VectorVectorActivation_" + n.name, ) graph.node.insert(node_ind, new_node) # remove old nodes @@ -994,7 +1003,7 @@ class InferVVAU(Transformation): model.set_tensor_shape(mm_output, mm_out_shape) # create and insert new VVAU node new_node = helper.make_node( - "Vector_Vector_Activate_Batch", + "VectorVectorActivation", [mm_input, mm_weight], [mm_output], domain="finn.custom_op.fpgadataflow", @@ -1009,7 +1018,7 @@ class InferVVAU(Transformation): outputDataType=odt.name, ActVal=0, noActivation=1, - name="Vector_Vector_Activate_Batch_" + n.name, + name="VectorVectorActivation_" + n.name, ) graph.node.insert(node_ind, new_node) # remove old node @@ -1174,7 +1183,7 @@ class InferAddStreamsLayer(Transformation): # create node with no parallelization first pe = 1 - # create and insert new StreamingFCLayer node + # create and insert new AddStreams_Batch node new_node = helper.make_node( "AddStreams_Batch", [in0, in1], @@ -1208,8 +1217,9 @@ class InferDuplicateStreamsLayer(Transformation): for node in graph.node: node_ind += 1 successors = model.find_consumers(node.output[0]) - if successors is not None and len(successors) == 2: + if successors is not None and len(successors) >= 2: output_tensor = node.output[0] + n_outputs = len(successors) dt = model.get_tensor_datatype(output_tensor) @@ -1220,7 +1230,7 @@ class InferDuplicateStreamsLayer(Transformation): # create clone tensors out_shape = model.get_tensor_shape(output_tensor) out_tensor_clones = [] - for i in range(2): + for i in range(n_outputs): clone = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape ) @@ -1243,6 +1253,7 @@ class InferDuplicateStreamsLayer(Transformation): PE=pe, inputDataType=dt.name, numInputVectors=vecs, + NumOutputStreams=n_outputs, name="DuplicateStreams_Batch_" + node.name, ) @@ -1458,7 +1469,7 @@ class InferLabelSelectLayer(Transformation): k = model.get_initializer(k_input)[0] - # create and insert new StreamingFCLayer node + # create and insert new LabelSelect_Batch node new_node = helper.make_node( "LabelSelect_Batch", [fc_input], @@ -1621,3 +1632,60 @@ class InferLookupLayer(Transformation): model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) + + +class InferConcatLayer(Transformation): + """Convert suitable Concat nodes (operating on last/-1 axis) + into StreamingConcat HLS layers.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "Concat": + ishape = model.get_tensor_shape(node.input[0]) + axis = get_by_name(node.attribute, "axis") + if (axis is None) or (ishape is None): + continue + axis = axis.i + last_axis = len(ishape) - 1 + # skip conversion if not using last axis + if (axis != -1) and (axis != last_axis): + continue + # check datatype coherence + dt0 = model.get_tensor_datatype(node.input[0]) + if dt0 is None: + continue + dt_coherent = all( + [model.get_tensor_datatype(x) == dt0 for x in node.input] + ) + if not dt_coherent: + continue + # skip conversion if inputs are not integers + if not dt0.is_integer(): + continue + # ready for conversion + elems_per_stream = [model.get_tensor_shape(x)[-1] for x in node.input] + inp_vec = list(model.get_tensor_shape(node.input[0])[:-1]) + new_node = helper.make_node( + "StreamingConcat", + node.input, + node.output, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + name="Concat_" + node.name, + ElemsPerStream=elems_per_stream, + inputDataType=dt0.name, + numInputVectors=inp_vec, + ) + graph.node.insert(node_ind, new_node) + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) \ No newline at end of file diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 327c7867fe30485f6df51d5e98dcbbaceea04cd8..0f410ec7a083ce2d68c40a9c5495365a17df4e13 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -26,11 +26,14 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pkg_resources as pk + import json import multiprocessing as mp import os import subprocess import warnings +from shutil import copytree from finn.custom_op.registry import getCustomOp from finn.transformation.base import Transformation @@ -51,7 +54,7 @@ def is_external_input(model, node, i): if model.get_initializer(node.input[i]) is None: return True else: - if node.op_type == "StreamingFCLayer_Batch": + if node.op_type == "MatrixVectorActivation": if node_inst.get_nodeattr("mem_mode") == "external": return True return False @@ -61,7 +64,9 @@ def is_external_output(model, node, i): # indicate whether output i of node should be made external # True only if output is unconnected consumers = model.find_consumers(node.output[i]) - if consumers is None: + if consumers == []: + # TODO should ideally check if tensor is in top-level + # outputs return True return False @@ -80,12 +85,15 @@ class CreateStitchedIP(Transformation): The packaged block design IP can be found under the ip subdirectory. """ - def __init__(self, fpgapart, clk_ns, ip_name="finn_design", vitis=False): + def __init__( + self, fpgapart, clk_ns, ip_name="finn_design", vitis=False, signature=[] + ): super().__init__() self.fpgapart = fpgapart self.clk_ns = clk_ns self.ip_name = ip_name self.vitis = vitis + self.signature = signature self.has_aximm = False self.has_m_axis = False self.m_axis_idx = 0 @@ -160,6 +168,16 @@ class CreateStitchedIP(Transformation): self.connect_cmds.append( "set_property name m_axi_gmem0 [get_bd_intf_ports m_axi_gmem_0]" ) + self.connect_cmds.append("assign_bd_address") + seg_name = "%s/Data_m_axi_gmem/SEG_m_axi_gmem0_Reg" % (inst_name) + self.connect_cmds.append( + "set_property offset 0 [get_bd_addr_segs {%s}]" % (seg_name) + ) + # TODO should propagate this information from the node instead of 4G + self.connect_cmds.append( + "set_property range 4G [get_bd_addr_segs {%s}]" % (seg_name) + ) + self.intf_names["aximm"] = [("m_axi_gmem0", aximm_intf_name[0][1])] assert self.has_aximm is False, "Currently limited to one AXI-MM interface" self.has_aximm = True @@ -210,12 +228,65 @@ class CreateStitchedIP(Transformation): ) self.s_axis_idx += 1 + def insert_signature(self, checksum_count): + signature_vlnv = "AMD:user:axi_info_top:1.0" + signature_name = "axi_info_top0" + self.create_cmds.append( + "create_bd_cell -type ip -vlnv %s %s" % (signature_vlnv, signature_name) + ) + self.create_cmds.append( + "set_property -dict [list " + "CONFIG.SIG_CUSTOMER {%s} " + "CONFIG.SIG_APPLICATION {%s} " + "CONFIG.VERSION {%s} " + "CONFIG.CHECKSUM_COUNT {%s} " + "] [get_bd_cells %s]" + % ( + self.signature[0], + self.signature[1], + self.signature[2], + checksum_count, + signature_name, + ) + ) + # set clk and reset + self.connect_cmds.append( + "connect_bd_net [get_bd_ports ap_clk] [get_bd_pins %s/ap_clk]" + % signature_name + ) + self.connect_cmds.append( + "connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins %s/ap_rst_n]" + % signature_name + ) + fclk_mhz = 1 / (self.clk_ns * 0.001) + fclk_hz = fclk_mhz * 1000000 + self.connect_cmds.append( + "set_property -dict [list " + "CONFIG.FREQ_HZ {%f} " + "CONFIG.CLK_DOMAIN {ap_clk} " + "] [get_bd_intf_pins %s/s_axi]" + % ( + fclk_hz, + signature_name, + ) + ) + # make axilite interface external + self.connect_cmds.append( + "make_bd_intf_pins_external [get_bd_intf_pins %s/s_axi]" % signature_name + ) + self.connect_cmds.append( + "set_property name s_axis_info [get_bd_intf_ports s_axi_0]" + ) + self.connect_cmds.append("assign_bd_address") + def apply(self, model): # ensure non-relative readmemh .dat files model = model.transform(ReplaceVerilogRelPaths()) ip_dirs = ["list"] # add RTL streamer IP - ip_dirs.append("/workspace/finn/finn-rtllib/memstream") + ip_dirs.append("$::env(FINN_ROOT)/finn-rtllib/memstream") + if self.signature: + ip_dirs.append("$::env(FINN_ROOT)/finn-rtllib/axi_info") if model.graph.node[0].op_type not in ["StreamingFIFO", "IODMA"]: warnings.warn( """First node is not StreamingFIFO or IODMA. @@ -223,8 +294,8 @@ class CreateStitchedIP(Transformation): behavior. It is strongly recommended to insert FIFOs prior to calling CreateStitchedIP.""" ) - # ensure that all nodes are fpgadataflow, and that IPs are generated for node in model.graph.node: + # ensure that all nodes are fpgadataflow, and that IPs are generated assert is_fpgadataflow_node( node ), "All nodes must be FINN fpgadataflow nodes." @@ -236,9 +307,7 @@ class CreateStitchedIP(Transformation): self.connect_clk_rst(node) self.connect_axi(node) for i in range(len(node.input)): - if is_external_input(model, node, i): - self.connect_s_axis_external(node, idx=i) - else: + if not is_external_input(model, node, i): producer = model.find_producer(node.input[i]) if producer is None: continue @@ -254,10 +323,32 @@ class CreateStitchedIP(Transformation): "[get_bd_intf_pins %s/%s]" % (producer.name, src_intf_name, node.name, dst_intf_name) ) + + # process external inputs and outputs in top-level graph input order + for input in model.graph.input: + inp_name = input.name + inp_cons = model.find_consumers(inp_name) + assert inp_cons != [], "No consumer for input " + inp_name + assert len(inp_cons) == 1, "Multiple consumers for input " + inp_name + node = inp_cons[0] + node_inst = getCustomOp(node) + for i in range(len(node.input)): + if node.input[i] == inp_name: + self.connect_s_axis_external(node, idx=i) + for output in model.graph.output: + out_name = output.name + node = model.find_producer(out_name) + assert node is not None, "No producer for output " + out_name + node_inst = getCustomOp(node) for i in range(len(node.output)): - if is_external_output(model, node, i): + if node.output[i] == out_name: self.connect_m_axis_external(node, idx=i) + if self.signature: + # extract number of checksum layer from graph + checksum_layers = model.get_nodes_by_op_type("checksum") + self.insert_signature(len(checksum_layers)) + # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_") @@ -316,7 +407,10 @@ class CreateStitchedIP(Transformation): tcl.append("write_verilog -force -mode synth_stub %s.v" % block_name) tcl.append("write_checkpoint %s.dcp" % block_name) tcl.append("write_xdc %s.xdc" % block_name) - tcl.append("report_utilization -file %s_partition_util.rpt" % block_name) + tcl.append( + "report_utilization -hierarchical -hierarchical_depth 5 " + "-file %s_partition_util.rpt" % block_name + ) # export block design itself as an IP core block_vendor = "xilinx_finn" block_library = "finn" @@ -414,6 +508,21 @@ class CreateStitchedIP(Transformation): "ipx::add_file dcp/%s.dcp " "[ipx::get_file_groups xilinx_simulationcheckpoint]" % block_name ) + # add a rudimentary driver mdd to get correct ranges in xparameters.h later on + example_data_dir = pk.resource_filename("finn.qnn-data", "mdd-data/") + copytree(example_data_dir, vivado_stitch_proj_dir + "/data") + tcl.append("file copy -force data ip/") + tcl.append("ipx::add_file_group -type software_driver {} [ipx::current_core]") + tcl.append( + "set_property type mdd [ipx::add_file data/finn_design.mdd " + "[ipx::get_file_groups xilinx_softwaredriver -of_objects " + "[ipx::current_core]]]" + ) + tcl.append( + "set_property type tclSource [ipx::add_file data/finn_design.tcl " + "[ipx::get_file_groups xilinx_softwaredriver -of_objects " + "[ipx::current_core]]]" + ) tcl.append("ipx::update_checksums [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::save_core [ipx::find_open_core %s]" % block_vlnv) # export list of used Verilog files (for rtlsim later on) @@ -441,4 +550,13 @@ class CreateStitchedIP(Transformation): bash_command = ["bash", make_project_sh] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() + # wrapper may be created in different location depending on Vivado version + if not os.path.isfile(wrapper_filename): + # check in alternative location (.gen instead of .srcs) + wrapper_filename_alt = wrapper_filename.replace(".srcs", ".gen") + if os.path.isfile(wrapper_filename_alt): + model.set_metadata_prop("wrapper_filename", wrapper_filename_alt) + else: + raise Exception("CreateStitchedIP failed, no wrapper HDL found.") + return (model, False) diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py index 2bda7883130d0863b7f67943d19caa00b7290de5..ec5afef506ab81eeb7bdc45c49bdebbdd3742338 100644 --- a/src/finn/transformation/fpgadataflow/floorplan.py +++ b/src/finn/transformation/fpgadataflow/floorplan.py @@ -151,7 +151,7 @@ class Floorplan(Transformation): partition_cnt += 1 continue elif not ( - node.op_type == "StreamingFCLayer_Batch" + node.op_type == "MatrixVectorActivation" and node_inst.get_nodeattr("mem_mode") is not None and node_inst.get_nodeattr("mem_mode") == "external" ): diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py index 58efe65eb5f9d96d74cdf40672703fabe76afb0d..627b9b9af016fd33553dfef9155d192b17937e2d 100644 --- a/src/finn/transformation/fpgadataflow/insert_dwc.py +++ b/src/finn/transformation/fpgadataflow/insert_dwc.py @@ -1,4 +1,3 @@ -import warnings from onnx import TensorProto from onnx import helper as oh @@ -46,25 +45,25 @@ class InsertDWC(Transformation): if _suitable_node(n): for output_name in n.output: consumers = model.find_consumers(output_name) - if consumers is None: + if consumers == []: continue - if len(consumers) > 1: - warnings.warn( - n.name - + ": HLS node with fan-out higher than 1 cannot be stitched" - ) - + assert len(consumers) == 1, ( + n.name + + ": HLS node with fan-out higher than 1 cannot be stitched" + ) consumer = consumers[0] if _suitable_node(consumer) is True: n0 = getCustomOp(n) n1 = getCustomOp(consumer) n0_out_shape = n0.get_folded_output_shape() - - # If FC and external mem, it could be connected to input 1 + # in some special cases, we need to get folded shapes of + # non-default inputs for the consumer + # - if FC and external mem, it could be connected to input 1 + # - if concat, could be connected to any input if ( - consumer.op_type == "StreamingFCLayer_Batch" + consumer.op_type == "MatrixVectorActivation" and n1.get_nodeattr("mem_mode") == "external" - ): + ) or (consumer.op_type == "StreamingConcat"): # get input idx in_idx = None for idx, n_input in enumerate(consumer.input): @@ -73,6 +72,7 @@ class InsertDWC(Transformation): assert in_idx is not None, "Malformed model" n1_in_shape = n1.get_folded_input_shape(in_idx) else: + # use default folded input shape n1_in_shape = n1.get_folded_input_shape() if n0_out_shape[-1] != n1_in_shape[-1]: diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index c8bb716922823876f5f16ffe62f17c425d49aa74..26613849060e361a6bc93483e3e1d8416e1fd97f 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -57,21 +57,21 @@ class InsertFIFO(Transformation): graph = model.graph node_ind = -1 graph_modified = False - for n in graph.node: + for first_node in graph.node: node_ind += 1 - if _suitable_node(n): - for n_output in n.output: + if _suitable_node(first_node): + for n_output in first_node.output: consumers = model.find_consumers(n_output) - if consumers is None: + if consumers == []: continue if len(consumers) > 1: warnings.warn( - n.name + first_node.name + ": HLS node with fan-out higher than 1 cannot be stitched" ) consumer = consumers[0] if _suitable_node(consumer) is True: - n0 = getCustomOp(n) + n0 = getCustomOp(first_node) # determine fifo node attributes fld_shape = n0.get_folded_output_shape() dtype = n0.get_output_datatype() @@ -137,47 +137,54 @@ class InsertFIFO(Transformation): graph_modified = True if graph_modified is False: - # insert FIFO as first node, except when first node is DMA - if ( - graph.node[0].op_type != "StreamingFIFO" - and graph.node[0].op_type != "IODMA" - ): - n = graph.node[0] - n_input = n.input[0] - n0 = getCustomOp(n) - # determine fifo node attributes - fld_shape = n0.get_folded_input_shape() - dtype = n0.get_input_datatype() - fifo_depth = n0.get_nodeattr("inFIFODepth") - - if fifo_depth <= 2: - warnings.warn("Overriding input FIFO depth to 32") - fifo_depth = 32 - - # create fifo node - fifo_output_tensor = oh.make_tensor_value_info( - model.make_new_valueinfo_name(), - TensorProto.FLOAT, - n0.get_normal_input_shape(), - ) - graph.value_info.append(fifo_output_tensor) - model.set_tensor_datatype(fifo_output_tensor.name, dtype) - - fifo_node = oh.make_node( - "StreamingFIFO", - [n_input], - [fifo_output_tensor.name], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - depth=fifo_depth, - folded_shape=fld_shape, - dataType=str(dtype.name), - ) - # insert fifo - graph.node.insert(0, fifo_node) - - # set fifo output tensor as new input tensor of second node - n.input[0] = fifo_output_tensor.name + graph_in_names = [x.name for x in model.graph.input] + for graph_in_name in graph_in_names: + first_node = model.find_consumer(graph_in_name) + # insert FIFO as first node, except when first node is DMA + if ( + first_node.op_type != "StreamingFIFO" + and first_node.op_type != "IODMA" + ): + inp_ind = list(first_node.input).index(graph_in_name) + n_input = first_node.input[inp_ind] + n0 = getCustomOp(first_node) + # determine fifo node attributes + if inp_ind == 0: + fld_shape = n0.get_folded_input_shape() + dtype = n0.get_input_datatype() + else: + fld_shape = n0.get_folded_input_shape(inp_ind) + dtype = n0.get_input_datatype(inp_ind) + fifo_depth = n0.get_nodeattr("inFIFODepth") + + if fifo_depth <= 2: + warnings.warn("Overriding input FIFO depth to 32") + fifo_depth = 32 + + # create fifo node + fifo_output_tensor = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + n0.get_normal_input_shape(), + ) + graph.value_info.append(fifo_output_tensor) + model.set_tensor_datatype(fifo_output_tensor.name, dtype) + + fifo_node = oh.make_node( + "StreamingFIFO", + [n_input], + [fifo_output_tensor.name], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + depth=fifo_depth, + folded_shape=fld_shape, + dataType=str(dtype.name), + ) + # insert fifo + graph.node.insert(0, fifo_node) + + # set fifo output tensor as new input tensor of second node + first_node.input[inp_ind] = fifo_output_tensor.name # insert FIFO as last node, except when last node is DMA graph_out_names = [x.name for x in model.graph.output] diff --git a/src/finn/transformation/fpgadataflow/insert_hook.py b/src/finn/transformation/fpgadataflow/insert_hook.py new file mode 100644 index 0000000000000000000000000000000000000000..c1fce40c574eb58b67e728b78d31454f0c709b78 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/insert_hook.py @@ -0,0 +1,102 @@ +import numpy as np +from onnx import TensorProto +from onnx import helper as oh + +from finn.custom_op.registry import getCustomOp +from finn.transformation.base import Transformation +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.util.fpgadataflow import is_fpgadataflow_node + + +def _is_hook_node(node): + if node.op_type in ["checksum"]: + return True + else: + return False + + +def _suitable_node(node): + if node is not None: + if is_fpgadataflow_node(node) is True: + if _is_hook_node(node) is False: + return True + else: + return False + else: + return False + else: + return False + + +class InsertHook(Transformation): + """Inserting hook layer after each layer that has the node attribute + 'output_hook' specified""" + + def __init__(self): + super().__init__() + + def apply(self, model): + list_supported_hooks = ["checksum"] + graph = model.graph + node_ind = -1 + graph_modified = False + for n in graph.node: + node_ind += 1 + if _suitable_node(n): + for output_name in n.output: + consumers = model.find_consumers(output_name) + assert len(consumers) <= 1, ( + n.name + + ": HLS node with fan-out higher than 1 cannot be stitched" + ) + n0 = getCustomOp(n) + n0_hook = n0.get_nodeattr("output_hook") + if n0_hook in list_supported_hooks: + if n0_hook == "checksum": + if len(consumers) == 1: + if consumers[0].op_type == "checksum": + continue + n0_normal_oshape = n0.get_normal_output_shape() + n0_folded_oshape = n0.get_folded_output_shape() + n0_odt = n0.get_output_datatype() + items_per_word = n0.get_nodeattr("PE") + words_per_frame = np.prod(n0_folded_oshape[:-1]) + chk_otensor = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + n0_normal_oshape, + ) + chk_result = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + [1], + ) + chk_node = oh.make_node( + "checksum", + [output_name], + outputs=[chk_otensor.name, chk_result.name], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + words_per_frame=words_per_frame, + items_per_word=items_per_word, + inputDataType=str(n0_odt.name), + folded_shape=n0_folded_oshape, + ) + # insert checksum node + graph.node.insert(node_ind + 1, chk_node) + # insert newly-created tensors + graph.value_info.append(chk_otensor) + graph.value_info.append(chk_result) + + # set chk output tensor as new input tensor of second node + if len(consumers) == 1: + consumers[0].input[0] = chk_otensor.name + else: + model.graph.output.pop() + model.graph.output.append(chk_otensor) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + graph_modified = True + return (model, graph_modified) + + return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py index d0ef270816c362af730a75b59be71d0457e0b8e2..cc85f544eb0e3a99bfee8dcfe1f5a8d722b656db 100644 --- a/src/finn/transformation/fpgadataflow/insert_iodma.py +++ b/src/finn/transformation/fpgadataflow/insert_iodma.py @@ -59,11 +59,11 @@ class InsertIODMA(Transformation): . """ - # TODO: refactor this into streamingfclayer_batch.py, could go into + # TODO: refactor this into matrixvectoractivation.py, could go into # make_weight_file except it doesn't write a file but returns a npy # array instead w_shape = weights.shape - assert len(w_shape) == 2, "weights withincorrect number of dims" + assert len(w_shape) == 2, "weights with incorrect number of dims" inp_w, out_w = w_shape assert out_w % pe == 0, "Malformed weight matrix" @@ -94,10 +94,11 @@ class InsertIODMA(Transformation): get_by_name(x.attribute, "backend").s.decode("UTF-8") == "fpgadataflow" for x in all_nodes ) - # parse streamingfclayers looking for external weights with no attached IODMA + # parse matrixvectoractivation layers looking for external weights with no + # attached IODMA fc_extw_nodes = list( filter( - lambda x: x.op_type == "StreamingFCLayer_Batch" + lambda x: x.op_type == "MatrixVectorActivation" and getCustomOp(x).get_nodeattr("mem_mode") == "external" and model.find_producer(x.input[1]) is None, all_nodes, diff --git a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py index 34cb61346dcd5bcd6f41a4272748764cf385a524..0d764b9ed4b3f14850cd678656ebb3ef98162644 100644 --- a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py +++ b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py @@ -97,7 +97,7 @@ class InsertTLastMarker(Transformation): first_node = model.find_consumers(graph_in_name) # skip if no consumers (this may be the case for unused initializers) # TODO: fix this with a cleanup transform - if first_node is None: + if first_node == []: continue assert len(first_node) == 1, "Input fans out to multiple nodes" first_node = first_node[0] @@ -106,7 +106,7 @@ class InsertTLastMarker(Transformation): # the input is in the list of graph inputs because it has an # initializer (TODO: fix this with a clean-up transform) if ( - first_node.op_type == "StreamingFCLayer_Batch" + first_node.op_type == "MatrixVectorActivation" and get_by_name(first_node.attribute, "mem_mode").s.decode("UTF-8") != "external" ): @@ -123,7 +123,7 @@ class InsertTLastMarker(Transformation): inp_idx = list(first_node.input).index(graph_in_name) if inp_idx > 0: if ( - first_node.op_type == "StreamingFCLayer_Batch" + first_node.op_type == "MatrixVectorActivation" and inp_idx == 1 ): stream_width = int(custom_op.get_weightstream_width()) diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index 2c3bd7ee59e23566bbd0acf2241ca67ed2beb3ea..8286f696fb1a6790bd7830b1fdedb43838827040 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -267,13 +267,13 @@ class MakePYNQDriver(Transformation): # copy all the dependencies into the driver folder # driver imports utils/data_packing and core/datatype # both of which are in finn-base - # e.g. /workspace/finn-base/src/finn/util/data_packing.py + # e.g. $FINN_ROOT/deps/finn-base/src/finn/util/data_packing.py dpk_root = dpk.__file__ - # e.g. /workspace/finn-base/src/finn/util + # e.g. $FINN_ROOT/deps/finn-base/src/finn/util dpk_root = dpk_root.replace("data_packing.py", "") - # e.g. /workspace/finn-base/src/finn/core/datatype.py + # e.g. $FINN_ROOT/deps/finn-base/src/finn/core/datatype.py dtp_root = dtp.__file__ - # e.g. /workspace/finn-base/src/finn/core + # e.g. $FINN_ROOT/deps/finn-base/src/finn/core dtp_root = dtp_root.replace("datatype.py", "") shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util") shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") @@ -288,7 +288,7 @@ class MakePYNQDriver(Transformation): dataflow_model = ModelWrapper(dataflow_model_filename) rt_layer_ind = 0 for node in dataflow_model.graph.node: - if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]: + if node.op_type in ["MatrixVectorActivation", "Thresholding_Batch"]: node_inst = getCustomOp(node) is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights") if is_rt_weights == 1: diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 84d587b6cecea63cb3be41a4a73bcc24aeb822f3..b52e61ff06d5185283cbb615d3018227ca988eca 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -62,13 +62,13 @@ def collect_ip_dirs(model, ipstitch_path): ), """The directory that should contain the generated ip blocks doesn't exist.""" ip_dirs += [ip_dir_value] - if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]: + if node.op_type in ["MatrixVectorActivation", "Thresholding_Batch"]: if node_inst.get_nodeattr("mem_mode") == "decoupled": need_memstreamer = True ip_dirs += [ipstitch_path + "/ip"] if need_memstreamer: # add RTL streamer IP - ip_dirs.append("/workspace/finn/finn-rtllib/memstream") + ip_dirs.append("$::env(FINN_ROOT)/finn-rtllib/memstream") return ip_dirs @@ -152,11 +152,13 @@ class MakeZYNQProject(Transformation): # define kernel instances # name kernels connected to graph inputs as idmaxx # name kernels connected to graph outputs as odmaxx - if producer is None or consumer is None: + if (producer is None) or (consumer == []): + # TODO not a good way of checking for external inp&out + # should look at the list of top-level in/out instead if producer is None: instance_names[node.name] = "idma" + str(idma_idx) idma_idx += 1 - elif consumer is None: + elif consumer == []: instance_names[node.name] = "odma" + str(odma_idx) odma_idx += 1 config.append( @@ -279,10 +281,16 @@ class MakeZYNQProject(Transformation): copy(bitfile_name, deploy_bitfile_name) # set bitfile attribute model.set_metadata_prop("bitfile", deploy_bitfile_name) - hwh_name = ( + hwh_name_alts = [ vivado_pynq_proj_dir - + "/finn_zynq_link.srcs/sources_1/bd/top/hw_handoff/top.hwh" - ) + + "/finn_zynq_link.srcs/sources_1/bd/top/hw_handoff/top.hwh", + vivado_pynq_proj_dir + + "/finn_zynq_link.gen/sources_1/bd/top/hw_handoff/top.hwh", + ] + hwh_name = None + for hwh_name_cand in hwh_name_alts: + if os.path.isfile(hwh_name_cand): + hwh_name = hwh_name_cand if not os.path.isfile(hwh_name): raise Exception( "Synthesis failed, no bitfile found. Check logs under %s" diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 39eb049565475b462ea0df9d88b46e3598e6cdd9..f1783c27ab150d6d5a83f458579c919d3b8c787b 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -99,7 +99,7 @@ class RemoveShallowFIFOs(Transformation): # bypass shallow fifos shallow_fifos.append(node) consumers = model.find_consumers(node.output[0]) - if consumers is None: + if consumers == []: producer = model.find_producer(node.input[0]) for idx, inp in enumerate(producer.output): if inp == node.input[0]: @@ -137,7 +137,7 @@ class CapConvolutionFIFODepths(Transformation): Background: The simulation-based rtlsim_exec tends to overestimate the required depth of FIFOs between the ConvolutionInputGenerator (here called SWG) and the - StreamingFCLayer (here called MVAU). As the SWG has an internal buffer of 1 + MatrixVectorActivation (here called MVAU). As the SWG has an internal buffer of 1 image row, we use this as a rule of thumb to set FIFO depth to be no larger than 1 row. """ @@ -152,7 +152,7 @@ class CapConvolutionFIFODepths(Transformation): # TODO move this to own transformation for node in model.graph.node: # look for following pattern: - # ConvolutionInputGenerator -> StreamingFIFO -> StreamingFCLayer + # ConvolutionInputGenerator -> StreamingFIFO -> MatrixVectorActivation if node.op_type == "StreamingFIFO": fifo_prod = model.find_producer(node.input[0]) fifo_cons = model.find_consumer(node.output[0]) @@ -162,7 +162,7 @@ class CapConvolutionFIFODepths(Transformation): continue if fifo_cons is None: continue - if fifo_cons.op_type != "StreamingFCLayer_Batch": + if fifo_cons.op_type != "MatrixVectorActivation": continue op_inst = getCustomOp(node) depth = op_inst.get_nodeattr("depth") @@ -222,7 +222,7 @@ class InsertAndSetFIFODepths(Transformation): fpgapart, clk_ns=10.0, max_qsrl_depth=256, - max_depth=2 ** 14, + max_depth=2**14, swg_exception=True, vivado_ram_style="auto", ): @@ -247,7 +247,7 @@ class InsertAndSetFIFODepths(Transformation): node = getCustomOp(node) node.set_nodeattr("inFIFODepth", self.max_depth) node.set_nodeattr("outFIFODepth", self.max_depth) - if node.onnx_node.op_type == "StreamingFCLayer_Batch": + if node.onnx_node.op_type == "MatrixVectorActivation": mmode = node.get_nodeattr("mem_mode") if mmode == "external": modified_fc_nodes.append(node.onnx_node.name) @@ -377,7 +377,7 @@ class InsertAndSetFIFODepths(Transformation): getCustomOp(node).set_nodeattr("outFIFODepth", 0) # for every FC node we changed from external to decoupled, # change back and reset implementation - if node.op_type == "StreamingFCLayer_Batch": + if node.op_type == "MatrixVectorActivation": if node.name in modified_fc_nodes: node_inst = getCustomOp(node) node_inst.set_nodeattr("mem_mode", "external") diff --git a/src/finn/transformation/fpgadataflow/set_folding.py b/src/finn/transformation/fpgadataflow/set_folding.py index 64d7a080724820d58a026bafbe74a4d7567b2179..443d5c255316c5ca5b9b4ceba50981a906818d9a 100644 --- a/src/finn/transformation/fpgadataflow/set_folding.py +++ b/src/finn/transformation/fpgadataflow/set_folding.py @@ -62,13 +62,13 @@ class SetFolding(Transformation): Notable exceptions and special behavior: - * When folding dense convolution/FC compute engines (StreamingFCLayer_Batch), + * When folding dense convolution/FC compute engines ("MVAU"/MatrixVectorActivation), which have two attributes (PE and SIMD): * first increases SIMD while weight stream width per PE is <= mvau_wwidth_max (configurable in the SetFolding initializer, defaults to 36) * then increases PE until the target is met or max PE reached - * When folding depthwise convolutions ("VVAU"/Vector_Vector_Activate_Batch) + * When folding depthwise convolutions ("VVAU"/VectorVectorActivation) or spatial reduction ops (Pool_Batch): * the producer of the node is expected to be a ConvolutionInputGenerator with depthwise=1, whose SIMD value will be set equal to the PE value of @@ -104,16 +104,21 @@ class SetFolding(Transformation): ] # these ops use SIMD parallelism, up to a max value of NumChannels # ConvolutionInputGenerator has a special case when depthwise=1 - simd_ops = ["DownSampler", "FMPadding_Batch", "ConvolutionInputGenerator"] + simd_ops = [ + "DownSampler", + "FMPadding_Batch", + "ConvolutionInputGenerator", + "ConvolutionInputGenerator1D", + ] # these ops are preceded by depthwise SWG and have special behavior, # as explained in the SetFolding docstring - depthwise_op_exceptions = ["Vector_Vector_Activate_Batch", "Pool_Batch"] + depthwise_op_exceptions = ["VectorVectorActivation", "Pool_Batch"] for node in graph.node: if not is_fpgadataflow_node(node): continue op_type = node.op_type node_inst = getCustomOp(node) - if op_type == "StreamingFCLayer_Batch": + if op_type == "MatrixVectorActivation": max_simd = node_inst.get_nodeattr("MW") max_pe = node_inst.get_nodeattr("MH") node_inst.set_nodeattr("PE", 1) @@ -150,12 +155,12 @@ class SetFolding(Transformation): # also set the folding of the upsteam DW SWU # which must be identical to this node swu_node = model.find_producer(node.input[0]) - if swu_node.op_type == "ConvolutionInputGenerator": + if swu_node.op_type.startswith("ConvolutionInputGenerator"): swu_node_inst = getCustomOp(swu_node) pe = node_inst.get_nodeattr("PE") swu_node_inst.set_nodeattr("SIMD", pe) else: - if op_type == "Vector_Vector_Activate_Batch": + if op_type == "VectorVectorActivation": ksize = np.prod(node_inst.get_nodeattr("Kernel")) elif op_type == "Pool_Batch": ksize = node_inst.get_nodeattr("KernelSize") @@ -166,7 +171,10 @@ class SetFolding(Transformation): "Expected SWU on DW op input, found " + swu_node.op_type ) elif op_type in simd_ops: - if op_type == "ConvolutionInputGenerator": + if op_type in [ + "ConvolutionInputGenerator", + "ConvolutionInputGenerator1D", + ]: depthwise = node_inst.get_nodeattr("depthwise") if depthwise == 0: max_simd = node_inst.get_nodeattr("IFMChannels") diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index a12f359c7d3f1c29a17694ef4987a1a349286234..84fad1d8c3831fdfc8e59c6594ac21071eab83ec 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -103,8 +103,8 @@ create_project finn_zynq_link ./ -part $FPGA_PART # set board part repo paths to find PYNQ-Z1/Z2 set paths_prop [get_property BOARD_PART_REPO_PATHS [current_project]] set paths_param [get_param board.repoPaths] -lappend paths_prop /workspace/board_files -lappend paths_param /workspace/board_files +lappend paths_prop $::env(FINN_ROOT)/deps/board_files +lappend paths_param $::env(FINN_ROOT)/deps/board_files set_property BOARD_PART_REPO_PATHS $paths_prop [current_project] set_param board.repoPaths $paths_param diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py index a2865321418343efbfdae12c111ba4334ecfee28..4dce3ab16c38bfe5dd43f3e23b14ea2ec571f68c 100644 --- a/src/finn/transformation/fpgadataflow/vitis_build.py +++ b/src/finn/transformation/fpgadataflow/vitis_build.py @@ -49,7 +49,6 @@ from finn.transformation.general import ( GiveUniqueNodeNames, RemoveUnusedTensors, ) -from finn.transformation.infer_data_layouts import InferDataLayouts from finn.util.basic import make_build_dir from . import templates @@ -214,11 +213,13 @@ class VitisLink(Transformation): # define kernel instances # name kernels connected to graph inputs as idmaxx # name kernels connected to graph inputs as odmaxx + # TODO not a good way of checking for external in/out + # check top-level in/out list instead if producer is None: instance_names[node.name] = "idma" + str(idma_idx) config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) idma_idx += 1 - elif consumer is None: + elif consumer == []: instance_names[node.name] = "odma" + str(odma_idx) config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) odma_idx += 1 @@ -392,8 +393,6 @@ class VitisBuild(Transformation): def apply(self, model): _check_vitis_envvars() - # first infer layouts - model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels prep_transforms = [InsertIODMA(512), InsertDWC()] for trn in prep_transforms: diff --git a/src/finn/transformation/move_reshape.py b/src/finn/transformation/move_reshape.py index 6c9a2973376be2c4744bc23db2cc975be8e7d52a..765d842997e1a388bd7d1e758f25dd861d2c4d4a 100644 --- a/src/finn/transformation/move_reshape.py +++ b/src/finn/transformation/move_reshape.py @@ -51,7 +51,7 @@ class RemoveCNVtoFCFlatten(Transformation): producer = model.find_producer(transp_node.input[0]) if _is_fpgadataflow_node(producer) is True: consumer = model.find_consumer(n.output[0]) - if consumer.op_type == "StreamingFCLayer_Batch": + if consumer.op_type == "MatrixVectorActivation": fc_inst = getCustomOp(consumer) mw = fc_inst.get_nodeattr("MW") mh = fc_inst.get_nodeattr("MH") diff --git a/src/finn/transformation/qonnx/fold_quant_weights.py b/src/finn/transformation/qonnx/fold_quant_weights.py index 12c854d3bab2b762abc3649e15beff29ff8de3ac..e8a0f418ae5eb587d6aabae57d8b379357d3a0ca 100644 --- a/src/finn/transformation/qonnx/fold_quant_weights.py +++ b/src/finn/transformation/qonnx/fold_quant_weights.py @@ -103,7 +103,7 @@ class FoldQuantWeights(Transformation): model.set_initializer(node_out, q_node_output) else: # Check next operator type - mul_like_nodes = ["Mul", "Div", "Conv", "MatMul"] + mul_like_nodes = ["Mul", "Div", "Conv", "MatMul", "Gather"] add_like_nodes = ["Add", "Sub"] all_supported_ops = mul_like_nodes.copy() all_supported_ops.extend(add_like_nodes) @@ -146,11 +146,14 @@ class FoldQuantWeights(Transformation): model.set_initializer(mul_tensor.name, scale) successor = model.find_consumers(node_out) - if successor is None: + if successor == []: raise RuntimeError( "Can only constant fold scaled Quant weights " "if a successor exists." ) + assert ( + len(successor) == 1 + ), "Only implemented for a single consumer" successor = successor[0] succ_output_name = successor.output[0] diff --git a/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py index faad31fa06e76b245f25b6f0aa583fec5c0da29a..c234bd38d9679f72b6df73e81df57fba3e8d4554 100644 --- a/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py +++ b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py @@ -230,7 +230,7 @@ class AvgPoolAndTruncToQuantAvgPool(Transformation): # 7c2603a95e90e4de2575020e575c24eab6a15889/src/finn/custom_op/ # general/quantavgpool2d.py#L94 ibits = math.floor( - math.log(2 ** trunc_in_bits / (k_s * k_s), 2) + math.log(2**trunc_in_bits / (k_s * k_s), 2) ) # Get sign signed = _get_signed_from_upstream(model, t_node) diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py index 3336b1eee7fa9d54092cd56b9ba0edaf9d0884b1..c8bde7fea8ae8195001a7eccfd48baa4c48997ae 100644 --- a/src/finn/transformation/qonnx/qonnx_activation_handlers.py +++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py @@ -333,7 +333,7 @@ class QuantReluHandler(QuantActBaseHandler): # Calculate thersholds, see: https://github.com/Xilinx/brevitas/blob/ # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/ # onnx/finn/handler/act.py#L21 - num_distinct_values = 2 ** bit_width + num_distinct_values = 2**bit_width num_thresholds = int(num_distinct_values - 1) flat_scale = quant_scale.flatten().astype(np.float32) num_scale_channels = flat_scale.shape[0] @@ -468,9 +468,9 @@ class QuantIdentityHandler(QuantActBaseHandler): return thresholds else: if narrow: - num_distinct_values = 2 ** bit_width - 1 + num_distinct_values = 2**bit_width - 1 else: - num_distinct_values = 2 ** bit_width + num_distinct_values = 2**bit_width num_thresholds = int(num_distinct_values - 1) flat_scale = quant_scale.flatten() diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index 97ae3b51a849a4174c9853cb41c0d6d72bdf8dad..32e539d87045520044378b94fd0e3c71486990c7 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -627,10 +627,9 @@ class AbsorbTransposeIntoResize(Transformation): graph.node.insert(node_ind + 1, new_transpose) # rewire nodes final_t_cands = model.find_consumers(mt_cand.output[0]) - if final_t_cands is not None: - # rewire next nodes' inputs - for final_t_cand in final_t_cands: - final_t_cand.input[0] = trans_output + # rewire next nodes' inputs + for final_t_cand in final_t_cands: + final_t_cand.input[0] = trans_output mt_cand.output[0] = trans_input graph_modified = True if graph_modified: diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 0cdd6651d982426b1d81d7313346dcd899294bf7..e922dffe37691a39434e9ebafa5df6d1a11d389e 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -670,6 +670,13 @@ class MakeMaxPoolNHWC(Transformation): if consumer is not None and consumer.op_type == "Transpose": perms = list(get_by_name(consumer.attribute, "perm").ints) if perms == [0, 2, 3, 1]: + ceil_mode = get_by_name(n.attribute, "ceil_mode") + if ceil_mode is not None: + ceil_mode = ceil_mode.i + else: + ceil_mode = ( + 0 # default to ceil_mode=0 (equivalent to np.floor) + ) n.op_type = "MaxPoolNHWC" n.domain = "finn.custom_op.general" start_name = n.input[0] @@ -683,12 +690,20 @@ class MakeMaxPoolNHWC(Transformation): n.output[0] = end_name model.set_tensor_shape(mid_name, (b, hi, wi, c)) model.set_tensor_shape(end_name, (b, ho, wo, c)) + getCustomOp(n).set_nodeattr("ceil_mode", ceil_mode) graph.node.remove(consumer) graph.node.insert(node_ind - 1, consumer) graph_modified = True elif producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + ceil_mode = get_by_name(n.attribute, "ceil_mode") + if ceil_mode is not None: + ceil_mode = ceil_mode.i + else: + ceil_mode = ( + 0 # default to ceil_mode=0 (equivalent to np.floor) + ) n.op_type = "MaxPoolNHWC" n.domain = "finn.custom_op.general" start_name = producer.input[0] @@ -702,6 +717,7 @@ class MakeMaxPoolNHWC(Transformation): n.output[0] = mid_name model.set_tensor_shape(mid_name, (b, ho, wo, c)) model.set_tensor_shape(end_name, (b, c, ho, wo)) + getCustomOp(n).set_nodeattr("ceil_mode", ceil_mode) graph.node.remove(producer) graph.node.insert(node_ind, producer) graph_modified = True @@ -739,6 +755,7 @@ class MoveOpPastFork(Transformation): # Check case when branches are empty and go # to the same node consumers = model.find_consumers(n.output[0]) + assert len(consumers) > 1, "Must have >1 consumer" unique_consumer = True for consum_node in consumers[1:]: if consumers[0] != consum_node: diff --git a/src/finn/util/create.py b/src/finn/util/create.py index 62229a69b68c26dd191b3e1d4a44f1bb8b19ed07..46bf9980d55e18396809075907fa3e365d426a3d 100644 --- a/src/finn/util/create.py +++ b/src/finn/util/create.py @@ -117,7 +117,7 @@ def hls_mlp_maker(layer_spec): model.graph.output.append(global_out) # there are two ways to implement bipolar weights and inputs for - # StreamingFC: + # MatrixVectorActivation: # - specify their datatypes as such # - specify their datatypes as BINARY as use binaryXnorMode if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: @@ -144,7 +144,7 @@ def hls_mlp_maker(layer_spec): actval = 0 no_act = 1 FCLayer_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", node_inp_list, [current_out_name], domain="finn.custom_op.fpgadataflow", diff --git a/src/finn/util/gdrive.py b/src/finn/util/gdrive.py index 5a904ed7c8e866ec7545064f290c6401a990eb01..d525437300b6aee081bb073d40a517b5e3aa14be 100644 --- a/src/finn/util/gdrive.py +++ b/src/finn/util/gdrive.py @@ -31,9 +31,11 @@ import os import warnings from datetime import datetime +from finn.util.basic import get_finn_root + def upload_to_end2end_dashboard(data_dict): - gdrive_key = "/workspace/finn/gdrive-key/service_account.json" + gdrive_key = get_finn_root() + "/gdrive-key/service_account.json" if not os.path.isfile(gdrive_key): warnings.warn("Google Drive key not found, skipping dashboard upload") return diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py index 1b38914a83e7c5d68bb004df7545b518d6a93ddd..6d0c68f0f456c05ab60ffa043277409730b695ce 100644 --- a/tests/brevitas/test_brevitas_avg_pool_export.py +++ b/tests/brevitas/test_brevitas_avg_pool_export.py @@ -47,6 +47,7 @@ from finn.util.basic import gen_finn_dt_tensor base_export_onnx_path = "test_brevitas_avg_pool_export.onnx" +@pytest.mark.brevitas_export @pytest.mark.parametrize("QONNX_export", [False, True]) @pytest.mark.parametrize("kernel_size", [2, 3]) @pytest.mark.parametrize("stride", [1, 2]) diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py index 78ca361366902b37f826b575904126c783adbece..2592d381173ee2112565f17d6631dd98f05e221a 100644 --- a/tests/brevitas/test_brevitas_cnv.py +++ b/tests/brevitas/test_brevitas_cnv.py @@ -47,7 +47,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = "test_brevitas_cnv.onnx" - +@pytest.mark.brevitas_export @pytest.mark.parametrize("abits", [1, 2]) @pytest.mark.parametrize("wbits", [1, 2]) @pytest.mark.parametrize("QONNX_export", [False, True]) diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py index e42b93babefd9ca6a7a86def18a5cbb21d795c4c..3db1a208456f7209623530681d96d6aa35928900 100644 --- a/tests/brevitas/test_brevitas_debug.py +++ b/tests/brevitas/test_brevitas_debug.py @@ -47,6 +47,7 @@ from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN from finn.util.test import get_test_model_trained +@pytest.mark.brevitas_export @pytest.mark.parametrize("QONNX_export", [False, True]) @pytest.mark.parametrize("QONNX_FINN_conversion", [False, True]) def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion): diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py index 8e1e3de8d06b24ce946fb0a6726d875d0e75736e..fc0f24b9172eb7882197026420ede8fe5d69bee5 100644 --- a/tests/brevitas/test_brevitas_fc.py +++ b/tests/brevitas/test_brevitas_fc.py @@ -49,6 +49,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = make_build_dir("test_brevitas_fc_") +@pytest.mark.brevitas_export # act bits @pytest.mark.parametrize("abits", [1, 2]) # weight bits diff --git a/tests/brevitas/test_brevitas_mobilenet.py b/tests/brevitas/test_brevitas_mobilenet.py index 108c97c2e83b7f3ca9dd6ead746b3ef8b4d10af5..9c51206ee097e17b85728d6e606b1cf05eb0b9e4 100644 --- a/tests/brevitas/test_brevitas_mobilenet.py +++ b/tests/brevitas/test_brevitas_mobilenet.py @@ -48,15 +48,15 @@ from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_shapes import InferShapes from finn.transformation.insert_topk import InsertTopK from finn.transformation.merge_onnx_models import MergeONNXModels -from finn.util.basic import make_build_dir +from finn.util.basic import get_finn_root, make_build_dir from finn.util.pytorch import NormalizePreProc from finn.util.test import crop_center, get_test_model_trained, resize_smaller_side - +@pytest.mark.brevitas_export @pytest.mark.xfail def test_brevitas_mobilenet(): # get single image as input and prepare image - img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg") + img = Image.open(get_finn_root() + "/tests/brevitas/king_charles.jpg") # resize smallest side of the image to 256 pixels and resize larger side # with same ratio img = resize_smaller_side(256, img) diff --git a/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py index b530b4bd84c548319549a8b16e0c3a79584e075d..4f9d2778028223d85882839ef7243e170ef90dd6 100644 --- a/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py +++ b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py @@ -47,7 +47,7 @@ from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx" - +@pytest.mark.brevitas_export @pytest.mark.parametrize("abits", [1, 2, 4, 8]) @pytest.mark.parametrize("narrow_range", [False, True]) @pytest.mark.parametrize("max_val", [1.0, 1 - 2 ** (-7)]) diff --git a/tests/brevitas/test_brevitas_qconv2d.py b/tests/brevitas/test_brevitas_qconv2d.py index beaea4e51ecdd4cff9f0d4d0c16735cdecad207c..4d9bd14ae3500fd8c0e78e6c4d377ce1f234d168 100644 --- a/tests/brevitas/test_brevitas_qconv2d.py +++ b/tests/brevitas/test_brevitas_qconv2d.py @@ -49,7 +49,7 @@ from finn.util.basic import gen_finn_dt_tensor export_onnx_path = "test_brevitas_conv.onnx" - +@pytest.mark.brevitas_export @pytest.mark.parametrize("dw", [False, True]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("in_channels", [32]) diff --git a/tests/brevitas/test_brevitas_qlinear.py b/tests/brevitas/test_brevitas_qlinear.py index 1099d3ec83336e5cd07707b35baea112b7a2aee6..e78262fcb24a1fec1fa876a39c67bd3aa850299c 100644 --- a/tests/brevitas/test_brevitas_qlinear.py +++ b/tests/brevitas/test_brevitas_qlinear.py @@ -46,7 +46,7 @@ from finn.util.basic import gen_finn_dt_tensor export_onnx_path = "test_brevitas_qlinear.onnx" - +@pytest.mark.brevitas_export @pytest.mark.parametrize("bias", [False, True]) @pytest.mark.parametrize("out_features", [4]) @pytest.mark.parametrize("in_features", [3]) diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py index 57ead3b6c047220e90d4276620cc14b8f795fe08..01ba7f382535ea8a12a60f211b7718ca57164db4 100644 --- a/tests/brevitas/test_brevitas_relu_act_export.py +++ b/tests/brevitas/test_brevitas_relu_act_export.py @@ -47,7 +47,7 @@ from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN export_onnx_path = "test_brevitas_relu_act_export.onnx" - +@pytest.mark.brevitas_export @pytest.mark.parametrize("abits", [2, 4, 8]) @pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)]) @pytest.mark.parametrize( @@ -110,7 +110,7 @@ scaling_impl.learned_value": torch.tensor( assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path) - +@pytest.mark.brevitas_export @pytest.mark.parametrize("abits", [2, 4, 8]) @pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)]) @pytest.mark.parametrize("scaling_per_channel", [True, False]) diff --git a/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py index c6da2e2e971ee97cb73243284920cc87e8b4d7bb..9f17c0f26c42058f314a25c066c8ba37a06e0b65 100644 --- a/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py +++ b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py @@ -47,7 +47,7 @@ from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx" - +@pytest.mark.brevitas_export @pytest.mark.parametrize("abits", [2, 4, 8]) @pytest.mark.parametrize("narrow_range", [False, True]) @pytest.mark.parametrize("min_val", [-1.0, -(1 - 2 ** (-7)), -2]) diff --git a/tests/brevitas/test_brevitas_validate_mobilenet.py b/tests/brevitas/test_brevitas_validate_mobilenet.py index 12e7e7aff2ec2ebae3e2ec7713a24046553dc5f2..67e6b785a70c81717adadd3d2695017e0382edda 100644 --- a/tests/brevitas/test_brevitas_validate_mobilenet.py +++ b/tests/brevitas/test_brevitas_validate_mobilenet.py @@ -61,7 +61,7 @@ mean = [0.485, 0.456, 0.406] std = 0.226 ch = 3 - +@pytest.mark.brevitas_export def test_brevitas_mobilenet_preproc(): if "IMAGENET_VAL_PATH" not in os.environ.keys(): pytest.skip("Can't do validation without IMAGENET_VAL_PATH") @@ -98,6 +98,7 @@ def test_brevitas_mobilenet_preproc(): assert (finn_img == pyt_img).all() +@pytest.mark.brevitas_export @pytest.mark.slow # marked as XFAIL until Brevitas export issues are resolved: # https://github.com/Xilinx/brevitas/issues/173 diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 1fddc7c1c26a0ba04d5849809ccf59b0a926a509..ad54ac9e29d17592d24b62a1f203165bdcfd7d94 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -90,6 +90,7 @@ from finn.transformation.streamline.reorder import ( MakeMaxPoolNHWC, MoveScalarLinearPastInvariants, ) +from finn.util.basic import get_finn_root from finn.util.gdrive import upload_to_end2end_dashboard from finn.util.pytorch import ToTensor from finn.util.test import ( @@ -136,7 +137,7 @@ def update_dashboard_data(topology, wbits, abits, key, val): def fold_tfc(model): - fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + fc_layers = model.get_nodes_by_op_type("MatrixVectorActivation") # (PE, SIMD, ramstyle) for each layer config = [(16, 49, "block"), (8, 8, "auto"), (8, 8, "auto"), (10, 8, "distributed")] for fcl, (pe, simd, ramstyle) in zip(fc_layers, config): @@ -154,7 +155,7 @@ def fold_tfc(model): def fold_lfc(model): - fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + fc_layers = model.get_nodes_by_op_type("MatrixVectorActivation") # (PE, SIMD, ramstyle) for each layer config = [ (32, 49, "block"), @@ -176,7 +177,7 @@ def fold_lfc(model): def fold_cnv_large(model): - fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + fc_layers = model.get_nodes_by_op_type("MatrixVectorActivation") # each tuple is (PE, SIMD) for a layer folding = [ (16, 3), @@ -203,11 +204,11 @@ def fold_cnv_large(model): def fold_cnv_small(model): - fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + fc_layers = model.get_nodes_by_op_type("MatrixVectorActivation") # each tuple is (PE, SIMD) for a layer folding = [ - (8, 3, "auto"), - (16, 16, "auto"), + (8, 3, "distributed"), + (16, 16, "distributed"), (8, 16, "auto"), (8, 16, "block"), (4, 8, "auto"), @@ -259,11 +260,11 @@ def get_golden_io_pair(topology, wbits, abits, preproc=ToTensor(), return_topk=N def measure_top1_accuracy(model_chkpt, dataset, parent_chkpt=None): if dataset == "cifar10": trainx, trainy, testx, testy, valx, valy = cifar.load_cifar_data( - "/workspace/finn/dataset", download=True, one_hot=False + get_finn_root() + "/dataset", download=True, one_hot=False ) elif dataset == "mnist": trainx, trainy, testx, testy, valx, valy = mnist.load_mnist_data( - "/workspace/finn/dataset", download=True, one_hot=False + get_finn_root() + "/dataset", download=True, one_hot=False ) else: raise Exception("Unrecognized dataset") @@ -334,7 +335,7 @@ class TestEnd2End: dtstr = datetime.now().strftime("%Y-%m-%d %H:%M:%S") update_dashboard_data(topology, wbits, abits, "datetime", dtstr) finn_commit = subprocess.check_output( - ["git", "rev-parse", "HEAD"], cwd="/workspace/finn" + ["git", "rev-parse", "HEAD"], cwd=get_finn_root() ) finn_commit = finn_commit.decode("utf-8").strip() update_dashboard_data(topology, wbits, abits, "finn-commit", finn_commit) @@ -425,9 +426,9 @@ class TestEnd2End: # use standalone thresholds for tfc-w1a1 to also exercise that option model = model.transform(to_hls.InferThresholdingLayer()) # needed for bipolar MatMul layers - model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferBinaryMatrixVectorActivation(mem_mode)) # needed for non-bipolar MatMul layers - model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode)) # TopK to LabelSelect model = model.transform(to_hls.InferLabelSelectLayer()) # input quantization (if any) to standalone thresholding @@ -450,26 +451,26 @@ class TestEnd2End: "tfc": [ ("Reshape", 1), ("Thresholding_Batch", 1), - ("StreamingFCLayer_Batch", 4), + ("MatrixVectorActivation", 4), ("LabelSelect_Batch", 1), ], "tfc-1-1": [ ("Reshape", 1), ("Thresholding_Batch", 4), - ("StreamingFCLayer_Batch", 4), + ("MatrixVectorActivation", 4), ("LabelSelect_Batch", 1), ], "lfc": [ ("Reshape", 1), ("Thresholding_Batch", 1), - ("StreamingFCLayer_Batch", 4), + ("MatrixVectorActivation", 4), ("LabelSelect_Batch", 1), ], "cnv": [ ("Transpose", 1), ("Thresholding_Batch", 1), ("ConvolutionInputGenerator", 6), - ("StreamingFCLayer_Batch", 9), + ("MatrixVectorActivation", 9), ("StreamingMaxPool_Batch", 2), ("LabelSelect_Batch", 1), ], @@ -765,7 +766,7 @@ class TestEnd2End: ret = dict() # try a range of batch sizes, some may fail due to insufficient DMA # buffers - bsize_range_in = [8 ** i for i in range(5)] + bsize_range_in = [8**i for i in range(5)] bsize_range = [] for bsize in bsize_range_in: res = throughput_test_remote(model, bsize) diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py index e24d87ca6a505de7d0ed50b01157092eb0a26525..2da2ac467478d3a3a1427f1c3837d54dd250ae7f 100644 --- a/tests/end2end/test_end2end_cybsec_mlp.py +++ b/tests/end2end/test_end2end_cybsec_mlp.py @@ -217,8 +217,8 @@ def test_end2end_cybsec_mlp_build(QONNX_export): # examine the report contents with open(est_cycles_report, "r") as f: est_cycles_dict = json.load(f) - assert est_cycles_dict["StreamingFCLayer_Batch_0"] == 80 - assert est_cycles_dict["StreamingFCLayer_Batch_1"] == 64 + assert est_cycles_dict["MatrixVectorActivation_0"] == 80 + assert est_cycles_dict["MatrixVectorActivation_1"] == 64 with open(est_res_report, "r") as f: est_res_dict = json.load(f) assert est_res_dict["total"]["LUT"] == 11360.0 diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py index e459bfbc3e694d5bbc9698db562765b11f6e8c38..792da28a79f52166afbd8fb9d253a0668f4f9a6d 100644 --- a/tests/end2end/test_end2end_mobilenet_v1.py +++ b/tests/end2end/test_end2end_mobilenet_v1.py @@ -66,7 +66,7 @@ from finn.transformation.remove import RemoveIdentityOps from finn.transformation.streamline import Streamline from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds -from finn.util.basic import alveo_default_platform, alveo_part_map +from finn.util.basic import alveo_default_platform, alveo_part_map, get_finn_root from finn.util.pytorch import NormalizePreProc from finn.util.test import ( crop_center, @@ -114,7 +114,7 @@ def test_end2end_mobilenet_export(): # calculate golden output with pytorch/brevitas and save as .npy # get single image as input and prepare image - img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg") + img = Image.open(get_finn_root() + "/tests/brevitas/king_charles.jpg") # resize smallest side of the image to 256 pixels and resize larger side # with same ratio img = resize_smaller_side(256, img) @@ -212,8 +212,8 @@ def test_end2end_mobilenet_convert_to_hls_layers(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_lowered.onnx") model = model.transform(to_hls.InferPool_Batch()) model = model.transform(to_hls.InferConvInpGen()) - model = model.transform(to_hls.InferVVAU()) - model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferVectorVectorActivation()) + model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode)) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(InferShapes()) @@ -231,7 +231,7 @@ def test_end2end_mobilenet_folding(): assert extra_fold in [1, 2, 4] # set up folding for the depthwise conv layers impl'd by VVAUs # each value is PE for a layer - fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + fc_layers = model.get_nodes_by_op_type("MatrixVectorActivation") # each tuple is (PE, SIMD, ram_style) for a layer folding = [ (32, 3, "block"), @@ -260,7 +260,7 @@ def test_end2end_mobilenet_folding(): getCustomOp(fc_layers[0]).set_nodeattr("resType", first_layer_res_type) # set up folding for the depthwise conv layers impl'd by VVAUs # each value is PE for a layer - vvau_layers = model.get_nodes_by_op_type("Vector_Vector_Activate_Batch") + vvau_layers = model.get_nodes_by_op_type("VectorVectorActivation") folding = [32, 32, 64, 16, 32, 8, 16, 16, 16, 16, 16, 4, 8] for vvau, pe in zip(vvau_layers, folding): vvau_inst = getCustomOp(vvau) diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py index 5ddff3d36f03d17833e17bc98649a64dabf31577..f3649f5f00eb811df524c6ff28ca3728e6fc8faf 100644 --- a/tests/fpgadataflow/test_code_gen_trafo.py +++ b/tests/fpgadataflow/test_code_gen_trafo.py @@ -37,6 +37,7 @@ from finn.core.modelwrapper import ModelWrapper from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_code_gen_trafo(): idt = wdt = odt = DataType["BIPOLAR"] @@ -49,7 +50,7 @@ def test_code_gen_trafo(): outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", node_inp_list, ["outp"], domain="finn.custom_op.fpgadataflow", diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py index 81e2ff9a7c5829982cdb6121378e9e9e3af81632..c18fb2ede84b2b9a6401cf9e148e2c8cb8646329 100644 --- a/tests/fpgadataflow/test_compilation_trafo.py +++ b/tests/fpgadataflow/test_compilation_trafo.py @@ -38,6 +38,7 @@ from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_compilation_trafo(): idt = wdt = odt = DataType["BIPOLAR"] @@ -50,7 +51,7 @@ def test_compilation_trafo(): outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", node_inp_list, ["outp"], domain="finn.custom_op.fpgadataflow", diff --git a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py index c77179218875c7b83963f3aa8df0298f0b15b714..8cb0360bae1790e6dd49f6d34f372ebaea0e79c8 100644 --- a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py @@ -68,6 +68,7 @@ from finn.util.basic import gen_finn_dt_tensor @pytest.mark.parametrize("depthwise", [False, True]) @pytest.mark.parametrize("use_rtl_swg", [False, True]) @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, use_rtl_swg, exec_mode): @@ -144,10 +145,10 @@ def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, use_rtl_swg, exec_ new_model = model.transform(LowerConvsToMatMul()) new_model = new_model.transform(to_hls.InferConvInpGen(use_rtl_variant=use_rtl_swg)) if depthwise is True: - new_model = new_model.transform(to_hls.InferVVAU()) + new_model = new_model.transform(to_hls.InferVectorVectorActivation()) else: - new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) - fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] + new_model = new_model.transform(to_hls.InferQuantizedMatrixVectorActivation()) + fc_node = new_model.get_nodes_by_op_type("MatrixVectorActivation")[0] fc_inst = getCustomOp(fc_node) mw = fc_inst.get_nodeattr("MW") mh = fc_inst.get_nodeattr("MH") @@ -183,7 +184,7 @@ def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, use_rtl_swg, exec_ assert padding_inst.get_nodeattr("SIMD") == in_chn if depthwise is True and exec_mode == "rtlsim": - node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] + node = new_model.get_nodes_by_op_type("VectorVectorActivation")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py index bf690d1d68bc0f580663735c3596c1dfc0a651e8..946b748e583297c2e2fa52d73fed5f13fcba14ab 100644 --- a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py @@ -89,6 +89,7 @@ def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape): @pytest.mark.parametrize("scalar_param", [True, False]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_convert_to_hls_channelwise_layer( diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py index 9b0f3d68aed655f0b36857d50a085093ea94aecb..f24659f602fd2bac458dc0408c5aa603c62c1767 100755 --- a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py +++ b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py @@ -75,6 +75,7 @@ def get_multithreshold_rand_params(channels, num_of_thres, seed=None): ) @pytest.mark.parametrize("depthwise", [False, True]) @pytest.mark.parametrize("use_reshape", [False, True]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): @@ -218,8 +219,8 @@ def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): # convert_to_hls if depthwise is True: - new_model = new_model.transform(to_hls.InferVVAU()) - new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) + new_model = new_model.transform(to_hls.InferVectorVectorActivation()) + new_model = new_model.transform(to_hls.InferQuantizedMatrixVectorActivation()) new_model = new_model.transform(to_hls.InferThresholdingLayer()) new_model = new_model.transform(to_hls.InferConvInpGen()) new_model = new_model.transform(to_hls.InferStreamingMaxPool()) diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py index d96bc987567cdcfcd18a404986c954c7527c7354..dc102a0e550544a61536ea6fbfc8b0dba0c7457b 100644 --- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py @@ -58,6 +58,7 @@ from finn.util.basic import gen_finn_dt_tensor ) @pytest.mark.parametrize("depthwise", [False, True]) @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode): @@ -123,10 +124,10 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode): new_model = model.transform(LowerConvsToMatMul()) new_model = new_model.transform(to_hls.InferConvInpGen()) if depthwise is True: - new_model = new_model.transform(to_hls.InferVVAU()) + new_model = new_model.transform(to_hls.InferVectorVectorActivation()) else: - new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) - fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] + new_model = new_model.transform(to_hls.InferQuantizedMatrixVectorActivation()) + fc_node = new_model.get_nodes_by_op_type("MatrixVectorActivation")[0] fc_inst = getCustomOp(fc_node) mw = fc_inst.get_nodeattr("MW") mh = fc_inst.get_nodeattr("MH") @@ -172,7 +173,7 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode): assert padding_inst.get_nodeattr("SIMD") == in_chn if depthwise is True and exec_mode == "rtlsim": - node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] + node = new_model.get_nodes_by_op_type("VectorVectorActivation")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py index 3357ee6d6c1e540818549f2d0df8b8554690ca3c..32c338b7189af2d16ba540df026c174f58821e05 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py @@ -55,6 +55,7 @@ from finn.util.test import get_test_model_trained export_onnx_path_cnv = "test_convert_to_hls_layers_cnv.onnx" +@pytest.mark.fpgadataflow @pytest.mark.vivado # Standalone or fused thresholding-based activation @pytest.mark.parametrize("fused_activation", [True, False]) @@ -89,10 +90,10 @@ def test_convert_to_hls_layers_cnv_w1a1(fused_activation): # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) - model = model.transform(to_hls.InferBinaryStreamingFCLayer()) - model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) + model = model.transform(to_hls.InferBinaryMatrixVectorActivation()) + model = model.transform(to_hls.InferQuantizedMatrixVectorActivation()) for node in model.graph.node: - if node.op_type == "StreamingFCLayer_Batch": + if node.op_type == "MatrixVectorActivation": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") @@ -121,7 +122,7 @@ def test_convert_to_hls_layers_cnv_w1a1(fused_activation): assert len(non_finn_nodes) == 5 exp_non_finn_nodes = ["Transpose", "Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes - fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + fc_nodes = model.get_nodes_by_op_type("MatrixVectorActivation") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py index a1dc11e0eee5aab462beb0ec34b8771ced20a379..2f101fde4775e76caaf57970ab3083589789343a 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py @@ -55,6 +55,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = "test_convert_to_hls_layers_fc.onnx" +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_convert_to_hls_layers_tfc_w1a1(): tfc = get_test_model_trained("TFC", 1, 1) @@ -69,24 +70,24 @@ def test_convert_to_hls_layers_tfc_w1a1(): model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) model = model.transform(RoundAndClipThresholds()) - model = model.transform(to_hls.InferBinaryStreamingFCLayer()) + model = model.transform(to_hls.InferBinaryMatrixVectorActivation()) fc0 = model.graph.node[2] - assert fc0.op_type == "StreamingFCLayer_Batch" + assert fc0.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc0.input[0]) == [1, 784] assert model.get_tensor_shape(fc0.input[1]) == [784, 64] assert model.get_tensor_shape(fc0.input[2]) == [64, 1] fc1 = model.graph.node[3] - assert fc1.op_type == "StreamingFCLayer_Batch" + assert fc1.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc1.input[0]) == [1, 64] assert model.get_tensor_shape(fc1.input[1]) == [64, 64] assert model.get_tensor_shape(fc1.input[2]) == [64, 1] fc2 = model.graph.node[4] - assert fc2.op_type == "StreamingFCLayer_Batch" + assert fc2.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc2.input[0]) == [1, 64] assert model.get_tensor_shape(fc2.input[1]) == [64, 64] assert model.get_tensor_shape(fc2.input[2]) == [64, 1] fc3 = model.graph.node[5] - assert fc3.op_type == "StreamingFCLayer_Batch" + assert fc3.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc3.input[0]) == [1, 64] assert model.get_tensor_shape(fc3.input[1]) == [64, 10] @@ -125,6 +126,7 @@ def test_convert_to_hls_layers_tfc_w1a1(): os.remove(export_onnx_path) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_convert_to_hls_layers_tfc_w1a2(): tfc = get_test_model_trained("TFC", 1, 2) @@ -136,28 +138,28 @@ def test_convert_to_hls_layers_tfc_w1a2(): model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) from finn.transformation.fpgadataflow.convert_to_hls_layers import ( - InferQuantizedStreamingFCLayer, + InferQuantizedMatrixVectorActivation, ) - model = model.transform(InferQuantizedStreamingFCLayer()) + model = model.transform(InferQuantizedMatrixVectorActivation()) fc0 = model.graph.node[2] - assert fc0.op_type == "StreamingFCLayer_Batch" + assert fc0.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc0.input[0]) == [1, 784] assert model.get_tensor_shape(fc0.input[1]) == [784, 64] assert model.get_tensor_shape(fc0.input[2]) == [64, 2] fc1 = model.graph.node[3] - assert fc1.op_type == "StreamingFCLayer_Batch" + assert fc1.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc1.input[0]) == [1, 64] assert model.get_tensor_shape(fc1.input[1]) == [64, 64] assert model.get_tensor_shape(fc1.input[2]) == [64, 2] fc2 = model.graph.node[4] - assert fc2.op_type == "StreamingFCLayer_Batch" + assert fc2.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc2.input[0]) == [1, 64] assert model.get_tensor_shape(fc2.input[1]) == [64, 64] assert model.get_tensor_shape(fc2.input[2]) == [64, 2] fc3 = model.graph.node[5] - assert fc3.op_type == "StreamingFCLayer_Batch" + assert fc3.op_type == "MatrixVectorActivation" assert model.get_tensor_shape(fc3.input[0]) == [1, 64] assert model.get_tensor_shape(fc3.input[1]) == [64, 10] fc0w = getCustomOp(fc0) diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py index 6089901566cb412e63cd8acc7a8260081248ba52..4d72dd0a8420a1e5f6b8fce7dde1905fadc433b7 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py @@ -143,12 +143,13 @@ def make_model(ch, ifmdim): @pytest.mark.parametrize("ch", [16]) # ifmdim @pytest.mark.parametrize("ifmdim", [5]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt): model = make_model(ch, ifmdim) model.save(export_onnx_path) - model = ModelWrapper(export_onnx_path) + model = ModelWrapper(export_onnx_path, fix_float64=True) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py index 3efafc040df07a7d56638bf5ce0b1ce01887343c..7595275c3be34e947f40415d050c0f3e4a9a7a58 100644 --- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py +++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py @@ -48,22 +48,31 @@ from finn.transformation.infer_shapes import InferShapes from finn.util.basic import gen_finn_dt_tensor -def make_single_maxpool_modelwrapper(k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt): +def make_single_maxpool_modelwrapper( + k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt, use_1d=False +): odt = idt - inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim] - ) - outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim, ofm_dim] - ) - + if use_1d: + ishape = [1, ifm_ch, 1, ifm_dim] + oshape = [1, ifm_ch, 1, ofm_dim] + kshape = [1, k] + pads = [0, pad, 0, pad] + strides = [1, stride] + else: + ishape = [1, ifm_ch, ifm_dim, ifm_dim] + oshape = [1, ifm_ch, ofm_dim, ofm_dim] + kshape = [k, k] + pads = [pad, pad, pad, pad] + strides = [stride, stride] + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) mp_node = helper.make_node( "MaxPool", ["inp"], ["outp"], - kernel_shape=[k, k], - pads=[pad, pad, pad, pad], - strides=[stride, stride], + kernel_shape=kshape, + pads=pads, + strides=strides, ) graph = helper.make_graph( nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp] @@ -128,9 +137,10 @@ def prepare_inputs(input_tensor): # number of out channel computed in parallel @pytest.mark.parametrize("pe", [1, 2, 4]) # pool type -@pytest.mark.parametrize("op_type", ["QuantAvgPool2d", "MaxPool"]) +@pytest.mark.parametrize("op_type", ["QuantAvgPool2d", "MaxPool", "MaxPool1D"]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_convert_to_hls_pool_batch( @@ -147,7 +157,14 @@ def test_convert_to_hls_pool_batch( np.random.seed(0) ofm_dim = int(((ifm_dim + 2 * pad - k) / stride) + 1) - x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) + ishape = (1, ifm_ch, ifm_dim, ifm_dim) + use_1d = False + if op_type == "MaxPool1D": + use_1d = True + ishape = (1, ifm_ch, 1, ifm_dim) + op_type = "MaxPool" + + x = gen_finn_dt_tensor(idt, ishape) # prepare input data input_dict = prepare_inputs(x) if op_type == "MaxPool": @@ -159,7 +176,7 @@ def test_convert_to_hls_pool_batch( pytest.skip("Skipping Maxpool with idt != odt") model = make_single_maxpool_modelwrapper( - k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt + k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt, use_1d ) elif op_type == "QuantAvgPool2d": if pad != 0: @@ -178,16 +195,40 @@ def test_convert_to_hls_pool_batch( new_model = model.transform(to_hls.InferPool_Batch()) new_model = new_model.transform(GiveUniqueNodeNames()) - if ifm_ch != pe: - new_model = new_model.transform(to_hls.InferConvInpGen()) - # Folding - for n in new_model.graph.node: - if n.op_type == "ConvolutionInputGenerator": - inst = getCustomOp(n) - inst.set_nodeattr("SIMD", pe) - elif n.op_type == "Pool_Batch": - inst = getCustomOp(n) - inst.set_nodeattr("PE", pe) + new_model = new_model.transform(to_hls.InferConvInpGen()) + # Folding + for n in new_model.graph.node: + if n.op_type.startswith("ConvolutionInputGenerator"): + inst = getCustomOp(n) + inst.set_nodeattr("SIMD", pe) + elif n.op_type == "Pool_Batch": + inst = getCustomOp(n) + inst.set_nodeattr("PE", pe) + + if stride <= k: + if pad == 0: + assert len(new_model.graph.node) == 4 + assert new_model.graph.node[0].op_type == "Transpose" + assert new_model.graph.node[1].op_type.startswith( + "ConvolutionInputGenerator" + ) + assert new_model.graph.node[2].op_type == "Pool_Batch" + assert new_model.graph.node[3].op_type == "Transpose" + else: + assert len(new_model.graph.node) == 5 + assert new_model.graph.node[0].op_type == "Transpose" + assert new_model.graph.node[1].op_type == "FMPadding_Batch" + assert new_model.graph.node[2].op_type.startswith( + "ConvolutionInputGenerator" + ) + assert new_model.graph.node[3].op_type == "Pool_Batch" + assert new_model.graph.node[4].op_type == "Transpose" + else: + # not currently converted to HLS, node stays as-is + assert len(new_model.graph.node) == 1 + assert new_model.graph.node[0].op_type in ["MaxPool", "QuantAvgPool2d"] + # no need to exec + return if exec_mode == "cppsim": new_model = new_model.transform(SetExecMode("cppsim")) @@ -205,13 +246,6 @@ def test_convert_to_hls_pool_batch( # execute new_model y_produced = oxe.execute_onnx(new_model, input_dict)["outp"] assert (y_produced == y_expected).all() - if stride <= k: - if pad == 0 or ifm_ch == pe: - assert len(new_model.graph.node) == 4 - else: - assert len(new_model.graph.node) == 5 - else: - assert len(new_model.graph.node) == 1 if exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("Pool_Batch")[0] diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py index 633db668d3bc5de815a313743c06cd74a7166c9c..24c8b0d028222380bd7fa36887c59383a75b0229 100644 --- a/tests/fpgadataflow/test_depthwise_convolution.py +++ b/tests/fpgadataflow/test_depthwise_convolution.py @@ -40,7 +40,7 @@ from finn.custom_op.registry import getCustomOp from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.convert_to_hls_layers import ( InferConvInpGen, - InferVVAU, + InferVectorVectorActivation, ) from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim @@ -168,6 +168,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding): @pytest.mark.parametrize("stride", [1, 2]) # padding @pytest.mark.parametrize("padding", [0, 1]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding): @@ -182,7 +183,7 @@ def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding): input_dict = {"inp": input_tensor} new_model = model.transform(InferConvInpGen()) - new_model = new_model.transform(InferVVAU()) + new_model = new_model.transform(InferVectorVectorActivation()) # set SIMD in ConvInputGen node and PE in VVAU node @@ -190,7 +191,7 @@ def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding): if n.op_type == "ConvolutionInputGenerator": convinputgen_node = getCustomOp(n) convinputgen_node.set_nodeattr("SIMD", pe) - elif n.op_type == "Vector_Vector_Activate_Batch": + elif n.op_type == "VectorVectorActivation": vvau_node = getCustomOp(n) vvau_node.set_nodeattr("PE", pe) new_model = new_model.transform(SetExecMode("cppsim")) @@ -210,6 +211,7 @@ def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding): @pytest.mark.parametrize("stride", [1, 2]) # padding @pytest.mark.parametrize("padding", [0, 1]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding): @@ -224,7 +226,7 @@ def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding): input_dict = {"inp": input_tensor} new_model = model.transform(InferConvInpGen()) - new_model = new_model.transform(InferVVAU()) + new_model = new_model.transform(InferVectorVectorActivation()) # set SIMD in ConvInputGen node and PE in VVAU node @@ -232,7 +234,7 @@ def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding): if n.op_type == "ConvolutionInputGenerator": convinputgen_node = getCustomOp(n) convinputgen_node.set_nodeattr("SIMD", pe) - elif n.op_type == "Vector_Vector_Activate_Batch": + elif n.op_type == "VectorVectorActivation": vvau_node = getCustomOp(n) vvau_node.set_nodeattr("PE", pe) diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py index 8cbf54ec188b12c67e02a33e3540718e9b08f382..a3927cd2aa6a9e87c32068f986ab6030fbacc559 100644 --- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py @@ -89,6 +89,7 @@ def prepare_inputs(input1, input2): @pytest.mark.parametrize("fold", [-1, 2, 1]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): if fold == -1: diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py index 949046d4ae313b852471e7d8a93e44fea48f7b0f..f774a4ff53c636419d8eb7dcfba866fd601f0c98 100644 --- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -100,6 +100,7 @@ def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs): @pytest.mark.parametrize("func", ["add", "mul"]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_mode): diff --git a/tests/fpgadataflow/test_fpgadataflow_concat.py b/tests/fpgadataflow/test_fpgadataflow_concat.py new file mode 100644 index 0000000000000000000000000000000000000000..8a7d78610132ff71ff92ee6a69ad7e089604463b --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_concat.py @@ -0,0 +1,149 @@ +# Copyright (c) 2021, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +import onnx +import torch +from io import BytesIO +from torch import nn + +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.core.onnx_exec import execute_onnx +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.convert_to_hls_layers import InferConcatLayer +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.general import GiveUniqueNodeNames +from finn.util.basic import gen_finn_dt_tensor + + +def make_concat_model(i_shapes, idt): + class ConcatModel(nn.Module): + def forward(self, *args): + return torch.cat(args, -1) + + torch_model = ConcatModel() + torch_model.eval() + input_t = [] + for i_shape in i_shapes: + input_t.append(torch.zeros(i_shape, dtype=torch.float32)) + input_t = tuple(input_t) + model_bytes = BytesIO() + torch.onnx.export(torch_model, input_t, model_bytes, opset_version=11) + model = onnx.ModelProto.FromString(model_bytes.getvalue()) + model = ModelWrapper(model) + for inp in model.graph.input: + model.set_tensor_datatype(inp.name, idt) + return model + + +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.parametrize("idt", [DataType["INT4"]]) +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_concat(exec_mode, idt): + i_shapes = [(1, 2, 4), (1, 2, 6), (1, 2, 1)] + i_data = [gen_finn_dt_tensor(idt, x) for x in i_shapes] + model = make_concat_model(i_shapes, idt) + assert len(i_shapes) == len(model.graph.input) + assert len(model.graph.output) == 1 + exp_oshape = list(i_shapes[0][:-1]) + [sum(x[-1] for x in i_shapes)] + oname = model.graph.output[0].name + assert model.get_tensor_shape(oname) == exp_oshape + exp_out = np.concatenate(i_data, axis=-1) + inp_dict = {} + for i in range(len(i_shapes)): + inp_dict[model.graph.input[i].name] = i_data[i] + ret = execute_onnx(model, inp_dict) + assert (ret[oname] == exp_out).all() + # call transformation to convert to HLS and verify conversion + model = model.transform(InferConcatLayer()) + assert model.graph.node[0].op_type == "StreamingConcat" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + if exec_mode == "cppsim": + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 10)) + model = model.transform(HLSSynthIP()) + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareRTLSim()) + ret_sim = execute_onnx(model, inp_dict) + assert (exp_out == ret_sim[oname]).all() + + +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_concat_stitchedip(): + idt = DataType["INT4"] + fpga_part = "xc7z020clg400-1" + clk_ns = 10 + i_shapes = [(1, 2, 4), (1, 2, 6), (1, 2, 1)] + i_data = [gen_finn_dt_tensor(idt, x) for x in i_shapes] + model = make_concat_model(i_shapes, idt) + assert len(i_shapes) == len(model.graph.input) + assert len(model.graph.output) == 1 + exp_oshape = list(i_shapes[0][:-1]) + [sum(x[-1] for x in i_shapes)] + oname = model.graph.output[0].name + assert model.get_tensor_shape(oname) == exp_oshape + exp_out = np.concatenate(i_data, axis=-1) + inp_dict = {} + for i in range(len(i_shapes)): + inp_dict[model.graph.input[i].name] = i_data[i] + ret = execute_onnx(model, inp_dict) + assert (ret[oname] == exp_out).all() + # call transformation to convert to HLS and verify conversion + model = model.transform(InferConcatLayer()) + assert model.graph.node[0].op_type == "StreamingConcat" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + model = model.transform(InsertFIFO(create_shallow_fifos=True)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(fpga_part, clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform( + CreateStitchedIP( + fpga_part, + clk_ns, + vitis=False, + ) + ) + model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_trace", "trace.vcd") + model.save("dbg.onnx") + ret_sim = execute_onnx(model, inp_dict) + assert (exp_out == ret_sim[oname]).all() diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py index 47cd7e7ba1df76cc793cd0946581239a6883874e..afac8dc6f30982b63827dcd5a9ee4b70c92235ae 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py @@ -149,6 +149,7 @@ def prepare_inputs(input_tensor): @pytest.mark.parametrize("simd", [1, 2]) # depthwise @pytest.mark.parametrize("dw", [0, 1]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_slidingwindow( diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py index 8440ac1fe46a0d1ea4db3d76489dfc4ce68ff642..0d8b26632307b2b514c2aacaa96b28989286cd0d 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py @@ -46,6 +46,8 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.general import GiveUniqueNodeNames from finn.util.basic import gen_finn_dt_tensor +fpga_part = "xczu3eg-sbva484-1-e" + def make_single_im2col_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt @@ -90,7 +92,7 @@ def make_single_im2col_modelwrapper( def make_single_slidingwindow_modelwrapper( - k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw=0 + k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, parallel_window, dw=0 ): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim @@ -122,6 +124,7 @@ def make_single_slidingwindow_modelwrapper( inputDataType=idt.name, outputDataType=odt.name, depthwise=dw, + parallel_window=parallel_window, ) graph = helper.make_graph( nodes=[SlidingWindow_node], @@ -155,8 +158,7 @@ def prepare_inputs(input_tensor): # Stride @pytest.mark.parametrize("stride", [[1, 1], [2, 1]]) # Dilation -# @pytest.mark.parametrize("dilation", [[1, 1], [2, 1]]) -@pytest.mark.parametrize("dilation", [[1, 1]]) +@pytest.mark.parametrize("dilation", [[1, 1], [2, 1]]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) # input channel parallelism ("SIMD") @@ -165,10 +167,23 @@ def prepare_inputs(input_tensor): @pytest.mark.parametrize("dw", [0, 1]) # Flip dimensions @pytest.mark.parametrize("flip", [False, True]) +# Use parallel window output variant +@pytest.mark.parametrize("parallel_window", [False, True]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_slidingwindow_1d( - idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw, flip + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + exec_mode, + simd, + dw, + flip, + parallel_window, ): if flip: k = k[::-1] @@ -186,6 +201,11 @@ def test_fpgadataflow_slidingwindow_1d( """Dilation value greater than 1 and stride greater than 1 currently not supported for 1D convolutions""" ) + if (dilation_h > 1 or dilation_w > 1) and dw == 0: + pytest.skip( + """Dilation value greater than 1 currently not supported + for non-dws 1D convolutions""" + ) if simd > ifm_ch: pytest.skip("SIMD cannot be larger than number of input channels") @@ -203,6 +223,7 @@ def test_fpgadataflow_slidingwindow_1d( stride=stride, dilation=dilation, idt=idt, + parallel_window=parallel_window, dw=dw, ) @@ -213,7 +234,7 @@ def test_fpgadataflow_slidingwindow_1d( elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(PrepareIP(fpga_part, 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py index 73bf1165afa9418be0c89f77797de538275fd220..838dec81d32799d5a2afa6cfda8db632b2ac3355 100644 --- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py @@ -48,25 +48,32 @@ from finn.transformation.infer_shapes import InferShapes from finn.util.basic import gen_finn_dt_tensor -def make_dupstreams_modelwrapper(ch, pe, idim, idt): +def make_dupstreams_modelwrapper(ch, pe, idim, idt, n_dupl): shape = [1, idim, idim, ch] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) - outp0 = helper.make_tensor_value_info("outp0", TensorProto.FLOAT, shape) - outp1 = helper.make_tensor_value_info("outp1", TensorProto.FLOAT, shape) + out_names = [] + out_vi = [] + for i in range(n_dupl): + outp_name = "outp%d" % i + out_names.append(outp_name) + out_vi.append( + helper.make_tensor_value_info(outp_name, TensorProto.FLOAT, shape) + ) dupstrm_node = helper.make_node( "DuplicateStreams_Batch", ["inp"], - ["outp0", "outp1"], + out_names, domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ch, + NumOutputStreams=n_dupl, PE=pe, inputDataType=idt.name, numInputVectors=[1, idim, idim], ) graph = helper.make_graph( - nodes=[dupstrm_node], name="graph", inputs=[inp], outputs=[outp0, outp1] + nodes=[dupstrm_node], name="graph", inputs=[inp], outputs=out_vi ) model = helper.make_model(graph, producer_name="addstreams-model") @@ -92,10 +99,13 @@ def prepare_inputs(input_tensor, idt): @pytest.mark.parametrize("fold", [-1, 2, 1]) # image dimension @pytest.mark.parametrize("imdim", [7]) +# amount of duplication +@pytest.mark.parametrize("n_dupl", [2, 3]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado -def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode): +def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, n_dupl, exec_mode): if fold == -1: pe = 1 else: @@ -105,7 +115,7 @@ def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode): # generate input data x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch)) - model = make_dupstreams_modelwrapper(ch, pe, imdim, idt) + model = make_dupstreams_modelwrapper(ch, pe, imdim, idt, n_dupl) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) @@ -123,12 +133,11 @@ def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode): # prepare input data and execute input_dict = prepare_inputs(x, idt) output_dict = oxe.execute_onnx(model, input_dict) - y0 = output_dict["outp0"] - y1 = output_dict["outp1"] - expected_y = x - assert (y0 == expected_y).all(), exec_mode + " failed" - assert (y1 == expected_y).all(), exec_mode + " failed" + expected_y = x + for i in range(n_dupl): + y = output_dict["outp%d" % i] + assert (y == expected_y).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0] diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index 248b591eb48d7cfd6f121738a9bca525c38a45f8..973bfcca2e9862769b2b973365682cbfbc4b4512 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -83,6 +83,7 @@ def prepare_inputs(input_tensor, dt): @pytest.mark.parametrize("OUTWidth", [2, 4]) # finn_dtype @pytest.mark.parametrize("finn_dtype", [DataType["BIPOLAR"], DataType["INT2"]]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype): diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py index 4d3074fe14617df4386f060b6a476734931fb4ca..15e7f594ee4916559324f35d42b07de9acc5a2c6 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fifo.py +++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py @@ -87,6 +87,7 @@ def prepare_inputs(input_tensor, dt): @pytest.mark.parametrize("depth", [16]) # finn_dtype @pytest.mark.parametrize("finn_dtype", [DataType["BIPOLAR"]]) # , DataType["INT2"]]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype): diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py index b564273c0927938859dc438dce619e7067a7ad74..ce21ea0c321587b4d73b64dbd2729090f141cce8 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py +++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py @@ -111,6 +111,7 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty @pytest.mark.parametrize("idt", [DataType["INT2"], DataType["INT4"]]) # execution mode @pytest.mark.parametrize("mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode): diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py index 2299cc6e8f397df718d2fd65be8a562c2457e42d..fc622b10e9abcc3b050e30fc275ca927b89c7d9c 100644 --- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py +++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py @@ -87,6 +87,7 @@ def prepare_inputs(input_tensor, idt): @pytest.mark.parametrize("imdim", [7]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode): if fold == -1: diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index a4e75f5254b3bfd96871dbf32b8400edc2d55379..5fc934f1cda9715bd77cf00a39c2fb4dc1268abe 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -66,7 +66,7 @@ ip_stitch_model_dir = os.environ["FINN_BUILD_DIR"] def create_one_fc_model(mem_mode="const"): - # create a model with a StreamingFCLayer instance with no activation + # create a model with a MatrixVectorActivation instance with no activation # the wider range of the full accumulator makes debugging a bit easier wdt = DataType["INT2"] idt = DataType["INT32"] @@ -82,7 +82,7 @@ def create_one_fc_model(mem_mode="const"): outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, m]) fc0 = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", ["inp", "w0"], ["outp"], domain="finn.custom_op.fpgadataflow", @@ -120,7 +120,7 @@ def create_one_fc_model(mem_mode="const"): def create_two_fc_model(mem_mode="decoupled"): - # create a model with two StreamingFCLayer instances + # create a model with two MatrixVectorActivation instances wdt = DataType["INT2"] idt = DataType["INT32"] odt = DataType["INT32"] @@ -136,7 +136,7 @@ def create_two_fc_model(mem_mode="decoupled"): outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, m]) fc0 = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", ["inp", "w0"], ["mid"], domain="finn.custom_op.fpgadataflow", @@ -155,7 +155,7 @@ def create_two_fc_model(mem_mode="decoupled"): ) fc1 = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", ["mid", "w1"], ["outp"], domain="finn.custom_op.fpgadataflow", @@ -201,6 +201,7 @@ def create_two_fc_model(mem_mode="decoupled"): @pytest.mark.parametrize("mem_mode", ["const", "decoupled"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_fpgadataflow_ipstitch_gen_model(mem_mode): model = create_one_fc_model(mem_mode) @@ -214,7 +215,7 @@ def test_fpgadataflow_ipstitch_gen_model(mem_mode): model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, 5)) model = model.transform(HLSSynthIP()) - assert model.graph.node[0].op_type == "StreamingFCLayer_Batch" + assert model.graph.node[0].op_type == "MatrixVectorActivation" assert model.graph.node[-1].op_type == "TLastMarker" model.save( ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model_%s.onnx" % mem_mode @@ -222,6 +223,7 @@ def test_fpgadataflow_ipstitch_gen_model(mem_mode): @pytest.mark.parametrize("mem_mode", ["const", "decoupled"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_fpgadataflow_ipstitch_do_stitch(mem_mode): model = load_test_checkpoint_or_skip( @@ -239,6 +241,7 @@ def test_fpgadataflow_ipstitch_do_stitch(mem_mode): @pytest.mark.parametrize("mem_mode", ["const", "decoupled"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_fpgadataflow_ipstitch_rtlsim(mem_mode): model = load_test_checkpoint_or_skip( @@ -287,6 +290,7 @@ def test_fpgadataflow_ipstitch_rtlsim(mem_mode): @pytest.mark.parametrize("mem_mode", ["const", "decoupled"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_ipstitch_synth_ooc(mem_mode): @@ -308,6 +312,7 @@ def test_fpgadataflow_ipstitch_synth_ooc(mem_mode): assert ret["fmax_mhz"] > 100 +@pytest.mark.fpgadataflow def test_fpgadataflow_ipstitch_iodma_floorplan(): model = create_one_fc_model() if model.graph.node[0].op_type == "StreamingDataflowPartition": @@ -330,10 +335,13 @@ def test_fpgadataflow_ipstitch_iodma_floorplan(): @pytest.mark.parametrize("period_ns", [5]) # override mem_mode to external @pytest.mark.parametrize("extw", [True, False]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado @pytest.mark.vitis -def test_fpgadataflow_ipstitch_vitis(board, period_ns, extw): +# temporarily marked as xfail +@pytest.mark.xfail +def test_fpgadataflow_ipstitch_vitis_end2end(board, period_ns, extw): if "VITIS_PATH" not in os.environ: pytest.skip("VITIS_PATH not set") platform = alveo_default_platform[board] @@ -353,9 +361,12 @@ def test_fpgadataflow_ipstitch_vitis(board, period_ns, extw): # board @pytest.mark.parametrize("board", ["Pynq-Z1"]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_ipstitch_zynqbuild(board): +# temporarily marked as xfail +@pytest.mark.xfail +def test_fpgadataflow_ipstitch_zynqbuild_end2end(board): model = create_two_fc_model() if model.graph.node[0].op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(model.graph.node[0]) diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py index 8ed06c8bdf1c0dbfab2f8141bf724132f4a24705..2858426d1ee4b1f91f5de807ccded4ffe35a3a40 100644 --- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py +++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py @@ -92,6 +92,7 @@ def prepare_inputs(input_tensor, idt): @pytest.mark.parametrize("k", [1, 5]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode): np.random.seed(0) diff --git a/tests/fpgadataflow/test_fpgadataflow_lookup.py b/tests/fpgadataflow/test_fpgadataflow_lookup.py index 45678bbdf22c21d794777aba27d9070b42238267..0c284a530319290eb406c6b54a80e4f52d7ed1fa 100644 --- a/tests/fpgadataflow/test_fpgadataflow_lookup.py +++ b/tests/fpgadataflow/test_fpgadataflow_lookup.py @@ -36,8 +36,10 @@ from torch import nn from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper from finn.core.onnx_exec import execute_onnx +from finn.custom_op.registry import getCustomOp from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.convert_to_hls_layers import InferLookupLayer +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP @@ -87,6 +89,7 @@ def make_lookup_model(embeddings, ishape, idt, edt): ) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode): @@ -124,9 +127,57 @@ def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode): model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP("xc7z020clg400-1", 10)) + model = model.transform(PrepareIP("xczu3eg-sbva484-1-e", 10)) model = model.transform(HLSSynthIP()) model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) ret_sim = execute_onnx(model, {iname: itensor}) assert (exp_out == ret_sim[oname]).all() + + +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_lookup_external(): + fpga_part = "xczu3eg-sbva484-1-e" + edt = DataType["INT8"] + embedding_cfg = (200000, DataType["UINT32"], 300) + ishape = (1, 600) + num_embeddings, idt, embedding_dim = embedding_cfg + eshape = (num_embeddings, embedding_dim) + exp_oshape = tuple(list(ishape) + [embedding_dim]) + embeddings = gen_finn_dt_tensor(edt, eshape) + model = make_lookup_model(embeddings, ishape, idt, edt) + assert len(model.graph.node) == 1 + assert model.graph.node[0].op_type == "Gather" + iname = model.graph.input[0].name + ename = model.graph.node[0].input[0] + oname = model.graph.output[0].name + assert model.get_tensor_datatype(iname) == idt + assert model.get_tensor_datatype(ename) == edt + assert model.get_tensor_datatype(oname) == edt + assert tuple(model.get_tensor_shape(ename)) == eshape + assert tuple(model.get_tensor_shape(oname)) == exp_oshape + assert (model.get_initializer(ename) == embeddings).all() + # itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64) + # itensor = np.clip(itensor, 0, num_embeddings - 1) + # ret = execute_onnx(model, {iname: itensor}) + # exp_out = np.take(embeddings, itensor, axis=0) + # assert (exp_out == ret[oname]).all() + # call transformation to convert to HLS and verify conversion + model = model.transform(InferLookupLayer()) + assert model.graph.node[0].op_type == "Lookup" + assert model.graph.node[0].input[0] == iname + assert model.graph.node[0].input[1] == ename + assert model.graph.node[0].output[0] == oname + getCustomOp(model.graph.node[0]).set_nodeattr("mem_mode", "external") + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(fpga_part, 10)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP(fpga_part, 10.0)) + ifnames = eval(model.get_metadata_prop("vivado_stitch_ifnames")) + # check some generated files/interfaces for the generated stitched IP + assert ifnames["aximm"] == [["m_axi_gmem0", 32]] + assert ifnames["s_axis"] == [["s_axis_0", 32]] + assert ifnames["m_axis"] == [["m_axis_0", 32]] + assert ifnames["axilite"] == ["s_axi_control_0"] diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py similarity index 97% rename from tests/fpgadataflow/test_fpgadataflow_fclayer.py rename to tests/fpgadataflow/test_fpgadataflow_mvau.py index 02c3a3dc9506152fe999873df0612e76a5c9cefd..2b638384412f6d1198f3a18949ec7a4c695bf0ed 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -56,7 +56,7 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non assert mw % simd == 0 # there are two ways to implement bipolar weights and inputs for - # StreamingFC: + # MatrixVectorActivation: # - specify their datatypes as such # - specify their datatypes as BINARY as use binaryXnorMode if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: @@ -85,7 +85,7 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non actval = 0 no_act = 1 FCLayer_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", node_inp_list, ["outp"], domain="finn.custom_op.fpgadataflow", @@ -146,6 +146,7 @@ def prepare_inputs(input_tensor, idt, wdt): @pytest.mark.parametrize("mw", [16]) # HLS matrix height (output features) @pytest.mark.parametrize("mh", [16]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): @@ -233,6 +234,7 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): @pytest.mark.parametrize("mw", [16]) # HLS matrix height (output features) @pytest.mark.parametrize("mh", [16]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): @@ -305,9 +307,9 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "rtlsim failed" hls_synt_res_est = model.analysis(hls_synth_res_estimation) - assert "StreamingFCLayer_Batch_0" in hls_synt_res_est + assert "MatrixVectorActivation_0" in hls_synt_res_est - node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] + node = model.get_nodes_by_op_type("MatrixVectorActivation")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) @@ -332,6 +334,7 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): @pytest.mark.parametrize("mw", [128]) # HLS matrix height (output features) @pytest.mark.parametrize("mh", [128]) +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim( mem_mode, idt, wdt, act, nf, sf, mw, mh @@ -405,9 +408,9 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim( assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "rtlsim failed" hls_synt_res_est = model.analysis(hls_synth_res_estimation) - assert "StreamingFCLayer_Batch_0" in hls_synt_res_est + assert "MatrixVectorActivation_0" in hls_synt_res_est - node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] + node = model.get_nodes_by_op_type("MatrixVectorActivation")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py index fe52a73fc07df8551442e975c5eb378c132a56d7..292a7dc7a38ec3e5f18b4d3ad243a34b4909d63f 100644 --- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py +++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + from onnx import TensorProto, helper from finn.analysis.fpgadataflow.res_estimation import ( @@ -50,6 +52,7 @@ def check_two_dict_for_equality(dict1, dict2): return True +@pytest.mark.fpgadataflow def test_res_estimate(): mw = mh = 4 simd = 1 @@ -64,7 +67,7 @@ def test_res_estimate(): node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", node_inp_list, ["outp"], domain="finn.custom_op.fpgadataflow", @@ -94,7 +97,7 @@ def test_res_estimate(): model = model.transform(GiveUniqueNodeNames()) prod_resource_estimation = model.analysis(res_estimation) expect_resource_estimation = { - "StreamingFCLayer_Batch_0": { + "MatrixVectorActivation_0": { "BRAM_18K": 0, "BRAM_efficiency": 1, "LUT": 357, @@ -111,7 +114,7 @@ def test_res_estimate(): prod_resource_estimation = model.analysis(res_estimation_complete) expect_resource_estimation = { - "StreamingFCLayer_Batch_0": [ + "MatrixVectorActivation_0": [ { "BRAM_18K": 0, "BRAM_efficiency": 1, diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index 341bd3f37041c9b5a1526e99b2c4bad4d3dd3029..7d95cba11accae4e126d69013850401add4ab9a4 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -52,15 +52,21 @@ from finn.transformation.general import GiveUniqueNodeNames from finn.util.basic import gen_finn_dt_tensor from finn.util.pyverilator import axilite_read, axilite_write -test_fpga_part = "xc7z020clg400-1" +test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 -def make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode): +def make_single_thresholding_modelwrapper( + T, pe, idt, odt, actval, mem_mode, n_inp_vecs +): NumChannels = T.shape[0] - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, NumChannels]) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, NumChannels]) + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, n_inp_vecs + [NumChannels] + ) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, n_inp_vecs + [NumChannels] + ) node_inp_list = ["inp", "thresh"] @@ -78,6 +84,7 @@ def make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode): outputDataType=odt.name, ActVal=actval, mem_mode=mem_mode, + numInputVectors=n_inp_vecs, ) graph = helper.make_graph( nodes=[Thresholding_node], @@ -109,16 +116,18 @@ def make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode): @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) # memory mode @pytest.mark.parametrize("mem_mode", ["const", "decoupled"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode): if nf == -1: nf = ich pe = ich // nf + n_inp_vecs = [1, 2, 2] assert ich % pe == 0 # generate input data - x = gen_finn_dt_tensor(idt, (1, ich)) + x = gen_finn_dt_tensor(idt, tuple(n_inp_vecs + [ich])) odt = act n_steps = act.get_num_possible_values() - 1 @@ -135,7 +144,9 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode): else: actval = odt.min() - model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode) + model = make_single_thresholding_modelwrapper( + T, pe, idt, odt, actval, mem_mode, n_inp_vecs + ) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) @@ -153,7 +164,10 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode): # package input data as dictionary input_dict = {"inp": x} - y = multithreshold(x, T) + # multithreshold util fxn wants NCHW input, not NHWC + y = multithreshold(np.transpose(x, (0, 3, 1, 2)), T) + # convert back to NHWC for comparison to hw outputs + y = np.transpose(y, (0, 2, 3, 1)) if act == DataType["BIPOLAR"]: # binary to bipolar y = 2 * y - 1 @@ -183,8 +197,10 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode): assert exp_cycles != 0 +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_runtime_thresholds_single_layer(): + n_inp_vecs = [1, 2, 2] mem_mode = "decoupled" act = DataType["INT4"] idt = DataType["INT16"] @@ -194,7 +210,7 @@ def test_runtime_thresholds_single_layer(): assert ich % pe == 0 # generate input data - in_tensor = gen_finn_dt_tensor(idt, (1, ich)) + in_tensor = gen_finn_dt_tensor(idt, tuple(n_inp_vecs + [ich])) odt = act n_steps = act.get_num_possible_values() - 1 @@ -207,7 +223,9 @@ def test_runtime_thresholds_single_layer(): else: actval = odt.min() - model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode) + model = make_single_thresholding_modelwrapper( + T, pe, idt, odt, actval, mem_mode, n_inp_vecs + ) op_inst = getCustomOp(model.graph.node[0]) op_inst.set_nodeattr("runtime_writeable_weights", 1) op_inst.make_weight_file(T, "decoupled_runtime", "old_weights.dat") @@ -227,7 +245,7 @@ def test_runtime_thresholds_single_layer(): # add two copies of the input tensor as the first one is just used to # "flush out" the pipeline (as mvau already starts receiving old weights while # we read/write new ones and reads seem to cause a disturbance too) - in_tensor = np.tile(in_tensor, (2, 1)) + in_tensor = np.tile(in_tensor, (2, 1, 1, 1)) exec_ctx = {"inp": in_tensor} extracted_weight_stream = [] @@ -244,7 +262,13 @@ def test_runtime_thresholds_single_layer(): # only use second batch element in output; first will be invalid due to # old weights (see above) y = exec_ctx["outp"][1] - expected = multithreshold(in_tensor, T)[1] + + # multithreshold util fxn wants NCHW input, not NHWC + expected = multithreshold(np.transpose(in_tensor, (0, 3, 1, 2)), T) + # convert back to NHWC for comparison to hw outputs + expected = np.transpose(expected, (0, 2, 3, 1))[1] + + # expected = multithreshold(in_tensor, T)[1] if act == DataType["BIPOLAR"]: # binary to bipolar expected = 2 * expected - 1 @@ -273,7 +297,10 @@ def test_runtime_thresholds_single_layer(): rtlsim_exec(model, exec_ctx, pre_hook=write_weights) y = exec_ctx["outp"][1] - expected = multithreshold(in_tensor, new_weights)[1] + # multithreshold util fxn wants NCHW input, not NHWC + expected = multithreshold(np.transpose(in_tensor, (0, 3, 1, 2)), new_weights) + # convert back to NHWC for comparison to hw outputs + expected = np.transpose(expected, (0, 2, 3, 1))[1] if act == DataType["BIPOLAR"]: # binary to bipolar expected = 2 * expected - 1 diff --git a/tests/fpgadataflow/test_fpgadataflow_upsampler.py b/tests/fpgadataflow/test_fpgadataflow_upsampler.py index 1709cfe32904a5ed369f8399150a8a1d05f4b781..362d9def1028c46a8ebf1d79649971156b1d57a3 100644 --- a/tests/fpgadataflow/test_fpgadataflow_upsampler.py +++ b/tests/fpgadataflow/test_fpgadataflow_upsampler.py @@ -125,6 +125,7 @@ class PyTorchTestModel(nn.Module): @pytest.mark.parametrize("NumChannels", [4]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode): diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py index 6f39994bf27594a063a1e66c5bba7867eaabef6e..75e3eab40bac1c1c7edc74c8ae0082fc55b07907 100644 --- a/tests/fpgadataflow/test_fpgadataflow_vvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py @@ -62,8 +62,8 @@ def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): def _calculate_dot_prod_range(dt_a, dt_b, len): """Returns the (min,max) values a dot product between two (un)signed vectors of types dt_a and dt_b of len elements can take.""" - min_prod = 2 ** 30 - max_prod = -(2 ** 30) + min_prod = 2**30 + max_prod = -(2**30) for a_val in [dt_a.min(), dt_a.max()]: for b_val in [dt_b.min(), dt_b.max()]: prod = a_val * b_val * len @@ -98,7 +98,7 @@ def _make_single_vvau_modelwrapper( actval = 0 VVAU_node = helper.make_node( - "Vector_Vector_Activate_Batch", + "VectorVectorActivation", node_inp_list, ["outp"], domain="finn.custom_op.fpgadataflow", @@ -158,6 +158,7 @@ def prepare_inputs(input_tensor): @pytest.mark.parametrize("channels", [3, 4]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_vvau( @@ -232,7 +233,7 @@ def test_fpgadataflow_vvau( assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": - node = model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] + node = model.get_nodes_by_op_type("VectorVectorActivation")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py index 236eb2a0342a2782f106761f4cd356888a2f8630..494aea4dad000ff6d6bf61e9e38440b727d90dc7 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py @@ -28,25 +28,29 @@ import pytest -import numpy as np +# import numpy as np from onnx import TensorProto, helper import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper -from finn.custom_op.registry import getCustomOp +from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim + +# from finn.custom_op.registry import getCustomOp from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.convert_to_hls_layers import InferStreamingMaxPool from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.infer_shapes import InferShapes from finn.util.basic import gen_finn_dt_tensor -def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): +def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim ofm_dim_h, ofm_dim_w = ofm_dim @@ -65,6 +69,7 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): domain="finn.custom_op.general", kernel_shape=[k_h, k_w], strides=[k_h, k_w], + ceil_mode=ceil_mode, pads=[0, 0, 0, 0], ) graph = helper.make_graph( @@ -80,7 +85,9 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): return model -def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): +def make_single_streamingmaxpool_modelwrapper( + k, ifm_ch, pe, ifm_dim, ofm_dim, idt, ceil_mode +): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim ofm_dim_h, ofm_dim_w = ofm_dim @@ -100,7 +107,9 @@ def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): backend="fpgadataflow", PoolDim=[k_h, k_w], NumChannels=ifm_ch, + PE=pe, ImgDim=[ifm_dim_h, ifm_dim_w], + CeilMode=ceil_mode, dataType=idt.name, ) graph = helper.make_graph( @@ -127,14 +136,21 @@ def prepare_inputs(input_tensor): # kernel size @pytest.mark.parametrize("k", [2, 4]) # input dimension -@pytest.mark.parametrize("ifm_dim", [4, 8]) +@pytest.mark.parametrize("ifm_dim", [4, 10]) # input channels @pytest.mark.parametrize("ifm_ch", [1, 3]) # 1,3 +# pe +@pytest.mark.parametrize("pe", [1, 3]) +# ceil mode +@pytest.mark.parametrize("ceil_mode", [1]) # execution mode @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"]) +@pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode): +def test_fpgadataflow_streamingmaxpool( + idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode, exec_mode +): ifm_dim_h = ifm_dim k_h = k if dim_1d: @@ -148,22 +164,31 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mod stride_h = k_h stride_w = k_w - ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) - ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) + ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, stride_h, 0, ceil_mode) + ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, stride_w, 0, ceil_mode) ofm_dim = (ofm_dim_h, ofm_dim_w) if idt == DataType["BIPOLAR"] and dim_1d: pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") - if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: - pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") + if (ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0) and (not dim_1d): + pytest.skip("StreamingMaxPool_2d test w/ ImgDim % PoolDim != 0 not implemented") + if pe > ifm_ch: + pytest.skip("PE cannot be larger than number of input channels") + if pe > 1 and (not dim_1d): + pytest.skip("PE>1 only supported for StreamingMaxPool_1d") x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) - golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) + golden = make_single_maxpoolnhwc_modelwrapper( + k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode + ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] - model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) + model = golden.transform(InferStreamingMaxPool()) + model = model.transform(InferShapes()) + + assert model.graph.node[0].op_type == "StreamingMaxPool_Batch" if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) @@ -172,7 +197,7 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mod elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(PrepareIP("xczu3eg-sbva484-1-e", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: @@ -184,9 +209,11 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mod if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] - inst = getCustomOp(node) - cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + # inst = getCustomOp(node) + # cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) + # FIXME: maxpool cycles prediction needs a fix + # mostl likely due to some loops not flattening + # assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py index 0196a78d5c4254d7cb116641f946bcccb9e1ebc9..f86c58d33568c68ba95c9cfbf31ad5b0c8c900f5 100644 --- a/tests/fpgadataflow/test_runtime_weights.py +++ b/tests/fpgadataflow/test_runtime_weights.py @@ -43,10 +43,11 @@ from finn.util.basic import gen_finn_dt_tensor from finn.util.create import hls_random_mlp_maker from finn.util.pyverilator import axilite_read, axilite_write -test_fpga_part = "xc7z020clg400-1" +test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 +@pytest.mark.fpgadataflow @pytest.mark.vivado def test_runtime_weights_single_layer(): idt = DataType["UINT32"] @@ -67,7 +68,7 @@ def test_runtime_weights_single_layer(): } layer_spec_list = [layer_spec] model = hls_random_mlp_maker(layer_spec_list) - fcl = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] + fcl = model.get_nodes_by_op_type("MatrixVectorActivation")[0] op_inst = getCustomOp(fcl) op_inst.set_nodeattr("mem_mode", "decoupled") op_inst.set_nodeattr("runtime_writeable_weights", 1) diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py index 66fd5b43a1b8b8c8986bf9c9b9d0e9efd7a744a6..63612b9cc06d353b1f944e7ad0ba72add1d9e31f 100644 --- a/tests/fpgadataflow/test_set_folding.py +++ b/tests/fpgadataflow/test_set_folding.py @@ -66,7 +66,7 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes): simd = 1 FCLayer_nodes += [ helper.make_node( - "StreamingFCLayer_Batch", + "MatrixVectorActivation", [tensors[i].name, "weights_" + str(i), "thresh_" + str(i)], [tensors[i + 1].name], domain="finn.custom_op.fpgadataflow", @@ -109,9 +109,10 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes): # desired frames per second -@pytest.mark.parametrize("target_fps", [30, 10 ** 5, 10 ** 7]) +@pytest.mark.parametrize("target_fps", [30, 10**5, 10**7]) # target chip or board @pytest.mark.parametrize("platform", ["Pynq-Z1", "Ultra96", "U200"]) +@pytest.mark.fpgadataflow def test_set_folding(target_fps, platform): model = make_multi_fclayer_model( @@ -126,7 +127,7 @@ def test_set_folding(target_fps, platform): dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) clk_ns = 5 - target_cycles_per_frame = int((10 ** 9 / clk_ns) / target_fps) + target_cycles_per_frame = int((10**9 / clk_ns) / target_fps) dataflow_model = dataflow_model.transform(SetFolding(target_cycles_per_frame)) exp_cycles_dict = dataflow_model.analysis(exp_cycles_per_layer) diff --git a/tests/transformation/streamline/test_absorb_mul_into_topk.py b/tests/transformation/streamline/test_absorb_mul_into_topk.py index bc9a31d49c7edfc20ca3e932efd00df939f1135f..e75f2d21db5cb2fe1b2f93e43ee0e61c7a7681c9 100644 --- a/tests/transformation/streamline/test_absorb_mul_into_topk.py +++ b/tests/transformation/streamline/test_absorb_mul_into_topk.py @@ -39,6 +39,7 @@ from finn.transformation.insert_topk import InsertTopK from finn.transformation.streamline.absorb import AbsorbScalarMulAddIntoTopK +@pytest.mark.streamline # parameter to indicate if mul parameter is negative or positive @pytest.mark.parametrize("mul_positive", [True, False]) # parameter to indicate if mul parameter is scalar or not diff --git a/tests/transformation/streamline/test_absorb_opposite_transposes.py b/tests/transformation/streamline/test_absorb_opposite_transposes.py index 859e691277a261f01b559e2e166763e402c5d689..ca5ed6ba6a85935604750ab35df0ccf30e032c2c 100644 --- a/tests/transformation/streamline/test_absorb_opposite_transposes.py +++ b/tests/transformation/streamline/test_absorb_opposite_transposes.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import numpy as np import onnx.helper as oh from onnx import TensorProto @@ -36,6 +38,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.absorb import AbsorbConsecutiveTransposes +@pytest.mark.streamline def test_absorb_opposite_transposes(): np.random.seed(0) input_shape = [1, 3, 4, 2] diff --git a/tests/transformation/streamline/test_absorb_transp_into_flatten.py b/tests/transformation/streamline/test_absorb_transp_into_flatten.py index 1e5d5fe5806d2e3f418438b260d2257f5ae31adf..533dc693da0774e89d2dbb44aac52a6bef038990 100644 --- a/tests/transformation/streamline/test_absorb_transp_into_flatten.py +++ b/tests/transformation/streamline/test_absorb_transp_into_flatten.py @@ -13,6 +13,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.absorb import AbsorbTransposeIntoFlatten +@pytest.mark.streamline # permutation of transpose node @pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]]) # reshape or flatten diff --git a/tests/transformation/streamline/test_collapse_repeated_op.py b/tests/transformation/streamline/test_collapse_repeated_op.py index 1741ab6b8f4fc1c3e806a8868f329cd7753eac4d..d48d4ad3c2a30e005c1ccc02eee4f7edcaa8a57b 100644 --- a/tests/transformation/streamline/test_collapse_repeated_op.py +++ b/tests/transformation/streamline/test_collapse_repeated_op.py @@ -38,6 +38,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import CollapseRepeatedAdd, CollapseRepeatedMul +@pytest.mark.streamline def test_collapse_repeated_op(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [2]) add_param_0 = oh.make_tensor_value_info("add_param_0", TensorProto.FLOAT, [2]) @@ -74,6 +75,7 @@ def test_collapse_repeated_op(): assert new_model.graph.node[1].op_type == "Mul" +@pytest.mark.streamline @pytest.mark.parametrize( "test_args", [("Add", CollapseRepeatedAdd()), ("Mul", CollapseRepeatedMul())], diff --git a/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py b/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py index fca073f5a05e10bd721a18538dada05b4ad0d774..2e5ed2eebfcf7ac7c39ccd8c0f105dee8fb389a8 100644 --- a/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py +++ b/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import numpy as np import onnx.helper as oh from onnx import TensorProto @@ -36,6 +38,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import FactorOutMulSignMagnitude +@pytest.mark.streamline def test_factor_out_mul_sign_magnitude(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, 2]) mul_param = oh.make_tensor_value_info("mul_param", TensorProto.FLOAT, [1, 2]) diff --git a/tests/transformation/streamline/test_linear_past_eltwise.py b/tests/transformation/streamline/test_linear_past_eltwise.py index 098b3f9d4f67a2cbc1a87fbb67a313d00e229777..0e4ad6237b3f293c2ee32dcb4963423f6e8d9f19 100644 --- a/tests/transformation/streamline/test_linear_past_eltwise.py +++ b/tests/transformation/streamline/test_linear_past_eltwise.py @@ -89,6 +89,7 @@ def make_model(shape): return model +@pytest.mark.streamline # channels @pytest.mark.parametrize("ch", [64]) # ifmdim @@ -133,6 +134,7 @@ def test_linear_past_eltwise_add(ch, ifmdim): os.remove(export_onnx_path) +@pytest.mark.streamline @pytest.mark.parametrize("ch", [64, 1]) # ifmdim @pytest.mark.parametrize("ifmdim", [-1, 7]) diff --git a/tests/transformation/streamline/test_maxpool_nhwc.py b/tests/transformation/streamline/test_maxpool_nhwc.py new file mode 100644 index 0000000000000000000000000000000000000000..446302be94d7c5e9c06da1c1fc926de7a3bff578 --- /dev/null +++ b/tests/transformation/streamline/test_maxpool_nhwc.py @@ -0,0 +1,109 @@ +import pytest + +import onnx +import onnx.helper as oh +from onnx import TensorProto + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline.reorder import MakeMaxPoolNHWC +from finn.util.basic import gen_finn_dt_tensor + + +def create_maxpool(ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt): + ofm_dim_h = compute_pool_output_dim( + ifm_dim[0], kernel_shape[0], strides[0], pads[0], ceil_mode + ) + ofm_dim_w = compute_pool_output_dim( + ifm_dim[1], kernel_shape[1], strides[1], pads[1], ceil_mode + ) + inp = oh.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim[0], ifm_dim[1]] + ) + outp_mp = oh.make_tensor_value_info( + "outp_mp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim_h, ofm_dim_w] + ) + outp = oh.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch] + ) + + maxpool_node = oh.make_node( + "MaxPool", + inputs=["inp"], + outputs=["out_mp"], + ceil_mode=ceil_mode, + kernel_shape=kernel_shape, + pads=pads, + strides=strides, + ) + + transpose_node = onnx.helper.make_node( + "Transpose", + inputs=["out_mp"], + outputs=["outp"], + name="Transpose1", + perm=[0, 2, 3, 1], + ) + + graph = oh.make_graph( + nodes=[maxpool_node, transpose_node], + name="maxpool_graph", + inputs=[inp], + outputs=[outp], + value_info=[outp_mp], + ) + + model = oh.make_model(graph, producer_name="maxpool_model") + model = ModelWrapper(model) + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", idt) + + model = model.transform(InferShapes()) + + return model + + +@pytest.mark.streamline +# input dimension +@pytest.mark.parametrize("ifm_dim", [[8, 8], [9, 9]]) +# input channels +@pytest.mark.parametrize("ifm_ch", [3]) +# kernel shape +@pytest.mark.parametrize("kernel_shape", [[2, 2]]) +# padding +@pytest.mark.parametrize("pads", [[0, 0, 0, 0], [1, 1, 1, 1]]) +# strides +@pytest.mark.parametrize("strides", [[2, 2]]) +# ceil_mode +@pytest.mark.parametrize("ceil_mode", [0, 1]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT4"]]) +def test_maxpool_nhwc(ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt): + # create MaxPool node + maxpool_model = create_maxpool( + ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt + ) + + # generate input tensor for testing + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim[0], ifm_dim[1]]) + input_dict = {"inp": input_tensor} + + # execute first model + output_dict = oxe.execute_onnx(maxpool_model, input_dict) + expected = output_dict["outp"] + + # transform MaxPool into MaxPoolNHWC + maxpool_model = maxpool_model.transform(MakeMaxPoolNHWC()) + + # execute transformed model + output_node_name = maxpool_model.graph.output[0].name + output_dict = oxe.execute_onnx( + maxpool_model, input_dict, return_full_exec_context=False + ) + output = output_dict[output_node_name] + + # compare outputs + assert (expected == output).all() diff --git a/tests/transformation/streamline/test_move_add_past_mul.py b/tests/transformation/streamline/test_move_add_past_mul.py index 163b9d310a5f12bd0b854f9aa46f53a549bf109e..e0ee449734e523b1e1742c85dd6b9d1bbdd32537 100644 --- a/tests/transformation/streamline/test_move_add_past_mul.py +++ b/tests/transformation/streamline/test_move_add_past_mul.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import numpy as np import onnx.helper as oh from onnx import TensorProto @@ -36,6 +38,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import MoveAddPastMul +@pytest.mark.streamline def test_move_add_past_mul_single(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [2]) add_param = oh.make_tensor_value_info("add_param", TensorProto.FLOAT, [2]) @@ -65,6 +68,7 @@ def test_move_add_past_mul_single(): assert new_model.graph.node[0].output[0] == new_model.graph.node[1].input[0] +@pytest.mark.streamline def test_move_add_past_mul_multi(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [2]) add_param_0 = oh.make_tensor_value_info("add_param_0", TensorProto.FLOAT, [2]) @@ -103,6 +107,7 @@ def test_move_add_past_mul_multi(): assert new_model.graph.node[i].output[0] == new_model.graph.node[i + 1].input[0] +@pytest.mark.streamline def test_move_add_past_mul_only_if_linear(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [2]) diff --git a/tests/transformation/streamline/test_move_chw_add_past_conv.py b/tests/transformation/streamline/test_move_chw_add_past_conv.py index e4be8fc3836f18bf95eb193516937c2e9334e2ff..d43531fa7d48a67ed91d1e7843bbdfd726fcf14d 100644 --- a/tests/transformation/streamline/test_move_chw_add_past_conv.py +++ b/tests/transformation/streamline/test_move_chw_add_past_conv.py @@ -38,6 +38,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.reorder import MoveAddPastConv +@pytest.mark.streamline # input dimension @pytest.mark.parametrize("idim", [4, 7]) # kernel size diff --git a/tests/transformation/streamline/test_move_flatten_past_affine.py b/tests/transformation/streamline/test_move_flatten_past_affine.py index ef01436dc9435676b562e2b635a8cf12e901046b..1a4cecf1c46fddcb4427975cbf7e31a25628bf9a 100644 --- a/tests/transformation/streamline/test_move_flatten_past_affine.py +++ b/tests/transformation/streamline/test_move_flatten_past_affine.py @@ -42,6 +42,7 @@ from finn.transformation.streamline.reorder import MoveFlattenPastAffine from finn.util.basic import gen_finn_dt_tensor +@pytest.mark.streamline # data layout @pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW]) # batch size diff --git a/tests/transformation/streamline/test_move_flatten_past_topk.py b/tests/transformation/streamline/test_move_flatten_past_topk.py index 6086f7804eda4447de8f5948f521f0b003f65020..e3d8c65434871ecfa87784e69c76d99330c3f554 100644 --- a/tests/transformation/streamline/test_move_flatten_past_topk.py +++ b/tests/transformation/streamline/test_move_flatten_past_topk.py @@ -42,6 +42,7 @@ from finn.transformation.streamline.reorder import MoveFlattenPastTopK from finn.util.basic import gen_finn_dt_tensor +@pytest.mark.streamline # data layout @pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW]) # batch size diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_op.py b/tests/transformation/streamline/test_move_identical_op_past_join_op.py index 60e76b8b07e06048ecf1a15c72134fecf5c97346..1d840ec15403e7a70c8da67a6f57076d8521d587 100644 --- a/tests/transformation/streamline/test_move_identical_op_past_join_op.py +++ b/tests/transformation/streamline/test_move_identical_op_past_join_op.py @@ -60,6 +60,7 @@ def create_model(perm): return model +@pytest.mark.streamline # Permutation of transpose node @pytest.mark.parametrize("perm", [[0, 3, 1, 2], [0, 2, 3, 1]]) def test_move_identical_op_past_join_op(perm): diff --git a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py index fca05afa5b155e6a293857c14c10c4a9b80eeaf4..127f0fde7bc8423d7135a94f0d6f2ff1317bff76 100644 --- a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py +++ b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from onnx import TensorProto, helper @@ -17,7 +19,7 @@ def get_multithreshold_rand_params(channels, num_of_thres, seed=None): thres = ((thres - bias) * steps).astype(np.float32) return thres - +@pytest.mark.streamline def test_move_maxpool_past_multithreshold(): # generate test vectors of correct shape ch = 64 diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py index e9e956d845ef8e56d2078bcd738ad3bb0ff72bfa..ee7f840bb4461b9b32f25048c0678da9a68526b5 100644 --- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py +++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py @@ -12,6 +12,7 @@ from finn.transformation.streamline.reorder import MoveMulPastDWConv from finn.util.basic import gen_finn_dt_tensor +@pytest.mark.streamline # input dimension @pytest.mark.parametrize("ifm_dim", [4, 7]) # input channels diff --git a/tests/transformation/streamline/test_move_mul_past_maxpool.py b/tests/transformation/streamline/test_move_mul_past_maxpool.py index 2c51aaf36a79591fd0fd0cea368d5e23da0d07c3..5f92c514c05b8ea9d75e6c3813dfee998fd8b08b 100755 --- a/tests/transformation/streamline/test_move_mul_past_maxpool.py +++ b/tests/transformation/streamline/test_move_mul_past_maxpool.py @@ -13,6 +13,7 @@ from finn.transformation.streamline.reorder import MoveMulPastMaxPool from finn.util.basic import gen_finn_dt_tensor +@pytest.mark.streamline # input dimension @pytest.mark.parametrize("ifm_dim", [4, 7]) # input channels diff --git a/tests/transformation/streamline/test_move_past_fork.py b/tests/transformation/streamline/test_move_past_fork.py index 364590f933ac27539fd546d64e25325032c885c9..f578234d6200936502e2e00c841b49707a99656b 100644 --- a/tests/transformation/streamline/test_move_past_fork.py +++ b/tests/transformation/streamline/test_move_past_fork.py @@ -9,6 +9,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.reorder import MoveLinearPastFork +@pytest.mark.streamline @pytest.mark.parametrize("ch", [64, 1]) # ifmdim @pytest.mark.parametrize("ifmdim", [-1, 7]) diff --git a/tests/transformation/streamline/test_move_scalar_past_conv.py b/tests/transformation/streamline/test_move_scalar_past_conv.py index 5e2ded0174e9aa7a02551ed6b658f97ff070a523..8f725db91a4dadc938fb9296606e7214f02dcb6e 100644 --- a/tests/transformation/streamline/test_move_scalar_past_conv.py +++ b/tests/transformation/streamline/test_move_scalar_past_conv.py @@ -10,6 +10,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import MoveAddPastConv, MoveScalarMulPastConv +@pytest.mark.streamline @pytest.mark.parametrize("padding", [False, True]) @pytest.mark.parametrize( "test_args", @@ -90,6 +91,7 @@ def test_move_scalar_past_conv(test_args, padding): assert new_model.graph.node[2].op_type == scalar_op +@pytest.mark.streamline @pytest.mark.parametrize( "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())], diff --git a/tests/transformation/streamline/test_move_scalar_past_matmul.py b/tests/transformation/streamline/test_move_scalar_past_matmul.py index b15f84303b0dc2e00bd51397543871cfeb99c1f9..4d6dd95173485c234fd6d231e524d30b50ab56de 100644 --- a/tests/transformation/streamline/test_move_scalar_past_matmul.py +++ b/tests/transformation/streamline/test_move_scalar_past_matmul.py @@ -41,6 +41,7 @@ from finn.transformation.streamline import ( ) +@pytest.mark.streamline def test_move_scalar_mul_past_matmul(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, 2]) mul_param = oh.make_tensor_value_info("mul_param", TensorProto.FLOAT, [1, 1]) @@ -72,6 +73,7 @@ def test_move_scalar_mul_past_matmul(): assert new_model.graph.node[0].output[0] == new_model.graph.node[1].input[0] +@pytest.mark.streamline def test_move_scalar_add_past_matmul(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, 2]) add_param = oh.make_tensor_value_info("add_param", TensorProto.FLOAT, [1, 1]) @@ -103,6 +105,7 @@ def test_move_scalar_add_past_matmul(): assert new_model.graph.node[0].output[0] == new_model.graph.node[1].input[0] +@pytest.mark.streamline @pytest.mark.parametrize( "test_args", [("Add", MoveScalarAddPastMatMul()), ("Mul", MoveScalarMulPastMatMul())], diff --git a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py index 9110ede98da81a627127767276db33362503ef84..ad174a4909202f2d62fa2a3c31a7da8ead900e0b 100644 --- a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py +++ b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py @@ -13,6 +13,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.reorder import MoveTransposePastScalarMul +@pytest.mark.streamline # permutation of transpose node @pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]]) # scalar mul diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py index 2e57f1c85f6ac197ca7a4cf15e595c34cc0fb564..3a533b0694fa81bae846d2d2f6e8dbcb41a8ee6c 100644 --- a/tests/transformation/streamline/test_round_thresholds.py +++ b/tests/transformation/streamline/test_round_thresholds.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import numpy as np from onnx import TensorProto, helper @@ -35,6 +37,7 @@ from finn.core.modelwrapper import ModelWrapper from finn.transformation.streamline import RoundAndClipThresholds +@pytest.mark.streamline def test_round_thresholds(): v = helper.make_tensor_value_info("v", TensorProto.FLOAT, [1, 4]) thresholds = helper.make_tensor_value_info("thresholds", TensorProto.FLOAT, [4, 1]) diff --git a/tests/transformation/streamline/test_sign_to_thres.py b/tests/transformation/streamline/test_sign_to_thres.py index 2ffb5713c0363b115dee5c41484fb5826faf803a..aa9254e8d605bbcd1d8a61da4d79cc6d582a1764 100644 --- a/tests/transformation/streamline/test_sign_to_thres.py +++ b/tests/transformation/streamline/test_sign_to_thres.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import brevitas.onnx as bo import onnx import onnx.numpy_helper as nph @@ -42,6 +44,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = "test_sign_to_thres.onnx" +@pytest.mark.streamline def test_sign_to_thres(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py index ed2595330323bfc8a576af36ae3fea27522ec66c..f2c4921c9ae55fa2206abbbb2661fe20e6068b93 100644 --- a/tests/transformation/streamline/test_streamline_cnv.py +++ b/tests/transformation/streamline/test_streamline_cnv.py @@ -50,6 +50,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = make_build_dir("test_streamline_cnv_") +@pytest.mark.streamline # act bits @pytest.mark.parametrize("abits", [1, 2]) # weight bits diff --git a/tests/transformation/streamline/test_streamline_fc.py b/tests/transformation/streamline/test_streamline_fc.py index 3563b87c45a7ffe99fe6e9bdfd9f54a39e89cb68..875a1c46029b83f59211556dc79c9bac26ff927f 100644 --- a/tests/transformation/streamline/test_streamline_fc.py +++ b/tests/transformation/streamline/test_streamline_fc.py @@ -51,6 +51,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = make_build_dir("test_streamline_fc_") +@pytest.mark.streamline # act bits @pytest.mark.parametrize("abits", [1, 2]) # weight bits diff --git a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py index 300ef85faacf664b89c7b949ea2e462f110eef85..bdb988e2aa508ed7464aee33d30b671fa38ebacb 100644 --- a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py +++ b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import pkg_resources as pk import brevitas.onnx as bo @@ -44,7 +46,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = "test_output_bn2affine.onnx" - +@pytest.mark.transform def test_batchnorm_to_affine_cnv_w1a1(): lfc = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(lfc, (1, 3, 32, 32), export_onnx_path) @@ -69,6 +71,7 @@ def test_batchnorm_to_affine_cnv_w1a1(): os.remove(export_onnx_path) +@pytest.mark.transform def test_batchnorm_to_affine_lfc_w1a1(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) diff --git a/tests/transformation/test_infer_data_layouts_cnv.py b/tests/transformation/test_infer_data_layouts_cnv.py index 10bc687d13d4a85ce64955cb38c1c0dfdc6d53da..99f6efd4d2bb358508b592e26e691300ef5a784e 100644 --- a/tests/transformation/test_infer_data_layouts_cnv.py +++ b/tests/transformation/test_infer_data_layouts_cnv.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import brevitas.onnx as bo import os @@ -46,6 +48,7 @@ from finn.util.test import get_test_model_trained export_onnx_path_cnv = "test_infer_data_layouts.onnx" +@pytest.mark.transform def test_infer_data_layouts_cnv(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) @@ -87,8 +90,8 @@ def test_infer_data_layouts_cnv(): model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) - model = model.transform(to_hls.InferBinaryStreamingFCLayer()) - model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) + model = model.transform(to_hls.InferBinaryMatrixVectorActivation()) + model = model.transform(to_hls.InferQuantizedMatrixVectorActivation()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(GiveUniqueNodeNames()) @@ -103,9 +106,9 @@ def test_infer_data_layouts_cnv(): assert ( model.get_tensor_layout("ConvolutionInputGenerator_0_out0") == DataLayout.NHWC ) - assert model.get_tensor_layout("StreamingFCLayer_Batch_3_out0") == DataLayout.NHWC + assert model.get_tensor_layout("MatrixVectorActivation_3_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC - assert model.get_tensor_layout("StreamingFCLayer_Batch_6_out0") == DataLayout.NC + assert model.get_tensor_layout("MatrixVectorActivation_6_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC os.remove(export_onnx_path_cnv) diff --git a/tests/transformation/test_infer_datatypes_lfc.py b/tests/transformation/test_infer_datatypes_lfc.py index 8883dac7a54eafaaa768c8ae991b2030e385b318..3758485860cf0176143fe6f55b71508327ffe762 100644 --- a/tests/transformation/test_infer_datatypes_lfc.py +++ b/tests/transformation/test_infer_datatypes_lfc.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + import brevitas.onnx as bo import os @@ -40,6 +42,7 @@ from finn.util.test import get_test_model_trained export_onnx_path = "test_infer_datatypes.onnx" +@pytest.mark.transform def test_infer_datatypes_lfc(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) diff --git a/tests/transformation/test_qonnx_to_finn.py b/tests/transformation/test_qonnx_to_finn.py index df7d63e3d2e139077f0fa20b10714c0a43a24e47..d9443e381677273d15bcb06832b009990a6ad11a 100644 --- a/tests/transformation/test_qonnx_to_finn.py +++ b/tests/transformation/test_qonnx_to_finn.py @@ -88,6 +88,7 @@ def analysis_testing_for_no_quant_nodes(model): return dict() +@pytest.mark.transform # This test currently takes about 4 min and 20 seconds @pytest.mark.parametrize("abits", [1, 2]) @pytest.mark.parametrize("wbits", [1, 2]) diff --git a/tests/util/test_build_dataflow.py b/tests/util/test_build_dataflow.py index de1b3abcc314c0c1451bd86bab8a7b93600ca697..d33a4f2fd6c974b13ac315c7ef621eacb04002c4 100644 --- a/tests/util/test_build_dataflow.py +++ b/tests/util/test_build_dataflow.py @@ -39,7 +39,7 @@ from finn.util.basic import make_build_dir @pytest.mark.slow @pytest.mark.vivado -def test_build_dataflow_directory(): +def test_end2end_build_dataflow_directory(): test_dir = make_build_dir("test_build_dataflow_directory_") target_dir = test_dir + "/build_dataflow" example_data_dir = pk.resource_filename("finn.qnn-data", "build_dataflow/") diff --git a/tests/util/test_create.py b/tests/util/test_create.py index c11e60175ea3ac94b6686ec5f8401a7c134fe53e..655c01f06eecca84d414ce3b995cfe4d1ba58170 100644 --- a/tests/util/test_create.py +++ b/tests/util/test_create.py @@ -32,6 +32,7 @@ import finn.util.create as create from finn.core.datatype import DataType +@pytest.mark.util @pytest.mark.parametrize( "bitwidth", [DataType["BIPOLAR"], DataType["INT2"], DataType["INT4"]] ) diff --git a/tests/util/test_data_packing_hls.py b/tests/util/test_data_packing_hls.py index 7113a3051bffb568e36b01af59945f0956658f76..a29d2ae3f87eaca3e57abe638849f066c7ed671e 100644 --- a/tests/util/test_data_packing_hls.py +++ b/tests/util/test_data_packing_hls.py @@ -38,6 +38,7 @@ from finn.core.datatype import DataType from finn.util.data_packing import numpy_to_hls_code +@pytest.mark.util @pytest.mark.parametrize( "dtype", [ @@ -96,10 +97,10 @@ def test_npy2apintstream(test_shape, dtype): with open(test_dir + "/test.cpp", "w") as f: f.write("\n".join(test_app_string)) cmd_compile = """ -g++ -o test_npy2apintstream test.cpp /workspace/cnpy/cnpy.cpp \ --I/workspace/cnpy/ -I{}/include -I/workspace/finn/src/finn/qnn-data/cpp \ +g++ -o test_npy2apintstream test.cpp $FINN_ROOT/deps/cnpy/cnpy.cpp \ +-I$FINN_ROOT/deps/cnpy/ -I{}/include -I$FINN_ROOT/src/finn/qnn-data/cpp \ --std=c++11 -lz""".format( - os.environ["VIVADO_PATH"] + os.environ["HLS_PATH"] ) with open(test_dir + "/compile.sh", "w") as f: f.write(cmd_compile) @@ -123,6 +124,7 @@ g++ -o test_npy2apintstream test.cpp /workspace/cnpy/cnpy.cpp \ assert success +@pytest.mark.util def test_numpy_to_hls_code(): def remove_all_whitespace(s): return "".join(s.split()) diff --git a/tutorials/fpga_flow/README.md b/tutorials/fpga_flow/README.md new file mode 100644 index 0000000000000000000000000000000000000000..63ca6ac832c556b3e47a15fc3207683886796f23 --- /dev/null +++ b/tutorials/fpga_flow/README.md @@ -0,0 +1,119 @@ +# FINN Example FPGA Flow Using MNIST Numerals + +This example demonstrates how to bring a FINN compiled model into the Vivado FPGA design environment for integration into a larger FPGA application. It extends on the command-line [build_dataflow](https://github.com/Xilinx/finn/tree/master/src/finn/qnn-data/build_dataflow) using a model that was quantized with [Brevitas](https://github.com/Xilinx/brevitas) down to single-bit weight/ativation precision to classify hand-written numerals from the MNIST data set. + +If you are new to the command-line flow, more information can be found [here](https://finn.readthedocs.io/en/latest/command_line.html). + +This demo was created using Vivado 2020.1. + +## Compiling the Model in FINN + +#### Configuration +`build.py` assembles the needed files and configures how the model is compiled when generating the "stitched IP". The following items will need to be set appropriately for specific use cases: +- `output_dir`: defines the directory to be created for FINN compiler output. +- `target_fps`: desired throughput performance target for FINN compiler to achieve. +- `mvau_wwidth_max`: _an optional parameter_ ([described here](https://finn.readthedocs.io/en/latest/source_code/finn.builder.html#finn.builder.build_dataflow_config.DataflowBuildConfig.mvau_wwidth_max)) shown only to illustrate passing additional configuration items to the compiler. +- `folding_config_file`: an optional parameter to pass a json file defining the layer optimizations (PE,SIMD,ramstyle, etc.) to the compiler. +- `synth_clk_period_ns`: set the desired clock period in nS. +- `fpga_part` configures the IP for your target device that the stitched IP will be implemented in. It should be the full string recognized in Vivado: \<device\>-\<package\>-\<temperature_grade\>-\<speed_grade\> +- `generate_outputs`: for integration purposes, the only output needed is `STITCHED_IP`. You might also find the `ESTIMATE_REPORTS` interesting. Other options are documented [here](https://finn.readthedocs.io/en/latest/command_line.html#generated-outputs) and some of them (namely OOC_SYNTH, BITFILE) add substantial runtime and are not needed for this flow. +- `stitched_ip_gen_dcp` : will generate an IP block with a synthesized design checkpoint (.dcp) which makes the design more portable across different machines, but will add some runtime. + + +### Running FINN Compiler + +Prior to running, insure the following prerequisites have been met: +- Install FINN and prerequisites. The [Getting Started](https://finn.readthedocs.io/en/latest/getting_started.html#quickstart) section of the FINN documentation might be helpful for this. +- Ensure you have the `FINN_XILINX_PATH` and `FINN_XILINX_VERSION` env variables set appropriately for your install. For example: +> export FINN_XILINX_PATH=/opt/Xilinx +> export FINN_XILINX_VERSION=2020.1 +- Set the env variable for your `finn` install top directory (where you cloned the FINN compiler repo): +> export FINN_ROOT=/home/foo/finn + +Then, change to `finn` install directory and invoke the build as follows: +> cd ${FINN_ROOT} +> ./run-docker.sh build_custom ${FINN_ROOT}/tutorials/fpga_flow/ + +Alternatively, since the tutorials folder is already part of the FINN compiler installation, you can invoke it from within the Docker container: +> cd ${FINN_ROOT} +> ./run-docker.sh +> cd tutorials/fpga_flow +> python build.py + +The build should finish in about 10 minutes, and the FINN docker will close on success. + +``` + ... + Running step: step_create_stitched_ip [12/18] + Running step: step_measure_rtlsim_performance [13/18] + Running step: step_out_of_context_synthesis [14/18] + Running step: step_synthesize_bitfile [15/18] + Running step: step_make_pynq_driver [16/18] + Running step: step_deployment_package [17/18] + Running step: custom_step_gen_tb_and_io [18/18] + Completed successfully + The program finished and will be restarted +``` + + +### Examine the Stitched IP + +Navigate to the stitched IP project directory: + +> cd ${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/stitched_ip + +And, open the project: + +> vivado finn_vivado_stitch_proj.xpr + +Explore the IPI board design and note the interfaces. + + +### Simulating the Stitched IP with a Verilog Test Bench + +You may have noticed that the final build step invoked by FINN is `custom_step_gen_tb_and_io`. +This custom step generates the files we'll need to simulate the FINN design in Vivado, and places +them under `${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/sim`. Let's examine these files. + +* `input.dat` and `expected_output.dat`: text files containing hex data for sample input and its expected + output. These are generated from the `input.npy` and `expected_output.npy` files by the FINN compiler. + Notice how the structure of the .dat files reflects the parallelization parameters of the first (for input) + and last (for output) layers of the hardware. The input is fed 49 bytes at a time, over 19 cycles to finish + a sample of 28x28=784 bytes from the MNIST dataset. Note how this matches PE=49 as selected for the first layer in `folding_config.json`. Additionally, note the reversal along each line in the .dat file to align the + byte order with what the FINN-generated hardware expects. + +* `finn_testbench.sv` : created by filling in a testbench template (under `templates/finn_testbench.template.sv`) with + relevant information by the FINN compiler, including the sizes of the input/output streams, folding factors and number of samples in the generated .dat file. + +* `make_sim_proj.tcl` : created by filling in a TCL script template (under `templates/make_sim_proj.template.tcl`) by + the FINN compiler. Used for launching the testbench simulation. + +You can now launch the simulation as follows: + +> cd ${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/sim +> vivado -mode gui -source make_sim_proj.tcl + +The simulation should complete with: + +``` + # run all +CHK: Data match 02 == 02 --> 0 + +************************************************************ + SIM COMPLETE + Validated 1 data points + Total error count: ====> 0 <==== +``` + +You can also use the provided testbench skeleton and the custom step in `build.py` to build your own +testbench generators. + +#### Instantiation in Mission Design + +There are any number of ways to bring the stitched IP into larger design. + +FINN already packages the stitched IP block design as a standalone IP-XACT component, which you can find under `${FINN_ROOT}/tutorials/fpga_flow/output_tfc_w0a1_fpga/stitched_ip/ip`. You can add this to the list of IP repos and use it in your own Vivado designs. A good reference for this is [UG1119](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2020_1/ug1119-vivado-creating-packaging-ip-tutorial.pdf) + +Keep in mind that all of the User IP Repo's included in the Stitched IP project (from `$FINN_HOST_BUILD_DIR` which is normally located under `/tmp/finn_dev_<username>`) need to also be brought in as IP Repo's to any project using the stitched IP. It would be prudent to copy those IP repos to an appropriate archive location. You should also set the +`FINN_ROOT` environment variable to point to the compiler installation directory, as some of the build scripts will +use this to access various components. Alternatively, if you don't want to copy all of the dependencies, you can ask FINN to generate the IP-XACT component with a synthesized .dcp checkpoint by passing the [stitched_ip_gen_dcp=True](https://finn-dev.readthedocs.io/en/latest/source_code/finn.builder.html#finn.builder.build_dataflow_config.DataflowBuildConfig.stitched_ip_gen_dcp) option as part of the build configuration. diff --git a/tutorials/fpga_flow/build.py b/tutorials/fpga_flow/build.py new file mode 100644 index 0000000000000000000000000000000000000000..1f8e27ef773e033933543cdc46de475c907a04eb --- /dev/null +++ b/tutorials/fpga_flow/build.py @@ -0,0 +1,145 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +# This file is intended to serve as an example showing how to set up custom builds +# using FINN. The custom build can be launched like this: +# ./run-docker.sh build_custom /path/to/folder + + +import numpy as np +import os + +import finn.builder.build_dataflow as build +import finn.builder.build_dataflow_config as build_cfg +import finn.util.data_packing as dpk +from finn.custom_op.registry import getCustomOp + +model_name = "tfc_w1a1" +platform_name = "fpga" + + +def custom_step_gen_tb_and_io(model, cfg): + sim_output_dir = cfg.output_dir + "/sim" + os.makedirs(sim_output_dir, exist_ok=True) + # load the provided input data + inp_data = np.load("input.npy") + batchsize = inp_data.shape[0] + # permute input image from NCHW -> NHWC format (needed by FINN) + # this example (MNIST) only has 1 channel, which means this doesn't + # really do anything in terms of data layout changes, but provided for + # completeness + inp_data = np.transpose(inp_data, (0, 2, 3, 1)) + # this network is an MLP and takes in flattened input + inp_data = inp_data.reshape(batchsize, -1) + # query the parallelism-dependent folded input shape from the + # node consuming the graph input + inp_name = model.graph.input[0].name + inp_node = getCustomOp(model.find_consumer(inp_name)) + inp_shape_folded = list(inp_node.get_folded_input_shape()) + inp_stream_width = inp_node.get_instream_width_padded() + # fix first dimension (N: batch size) to correspond to input data + # since FINN model itself always uses N=1 + inp_shape_folded[0] = batchsize + inp_shape_folded = tuple(inp_shape_folded) + inp_dtype = model.get_tensor_datatype(inp_name) + # now re-shape input data into the folded shape and do hex packing + inp_data = inp_data.reshape(inp_shape_folded) + inp_data_packed = dpk.pack_innermost_dim_as_hex_string( + inp_data, inp_dtype, inp_stream_width, prefix="", reverse_inner=True + ) + np.savetxt(sim_output_dir + "/input.dat", inp_data_packed, fmt="%s", delimiter="\n") + # load expected output and calculate folded shape + exp_out = np.load("expected_output.npy") + out_name = model.graph.output[0].name + out_node = getCustomOp(model.find_producer(out_name)) + out_shape_folded = list(out_node.get_folded_output_shape()) + out_stream_width = out_node.get_outstream_width_padded() + out_shape_folded[0] = batchsize + out_shape_folded = tuple(out_shape_folded) + out_dtype = model.get_tensor_datatype(out_name) + exp_out = exp_out.reshape(out_shape_folded) + out_data_packed = dpk.pack_innermost_dim_as_hex_string( + exp_out, out_dtype, out_stream_width, prefix="", reverse_inner=True + ) + np.savetxt( + sim_output_dir + "/expected_output.dat", + out_data_packed, + fmt="%s", + delimiter="\n", + ) + # fill in testbench template + with open("templates/finn_testbench.template.sv", "r") as f: + testbench_sv = f.read() + testbench_sv = testbench_sv.replace("@N_SAMPLES@", str(batchsize)) + testbench_sv = testbench_sv.replace("@IN_STREAM_BITWIDTH@", str(inp_stream_width)) + testbench_sv = testbench_sv.replace("@OUT_STREAM_BITWIDTH@", str(out_stream_width)) + testbench_sv = testbench_sv.replace( + "@IN_BEATS_PER_SAMPLE@", str(np.prod(inp_shape_folded[:-1])) + ) + testbench_sv = testbench_sv.replace( + "@OUT_BEATS_PER_SAMPLE@", str(np.prod(out_shape_folded[:-1])) + ) + testbench_sv = testbench_sv.replace("@TIMEOUT_CYCLES@", "1000") + with open(sim_output_dir + "/finn_testbench.sv", "w") as f: + f.write(testbench_sv) + # fill in testbench project creator template + with open("templates/make_sim_proj.template.tcl", "r") as f: + testbench_tcl = f.read() + testbench_tcl = testbench_tcl.replace("@STITCHED_IP_ROOT@", "../stitched_ip") + with open(sim_output_dir + "/make_sim_proj.tcl", "w") as f: + f.write(testbench_tcl) + + return model + + +build_steps = build_cfg.default_build_dataflow_steps + [custom_step_gen_tb_and_io] + + +cfg = build.DataflowBuildConfig( + steps=build_steps, + board=platform_name, + output_dir="output_%s_%s" % (model_name, platform_name), + synth_clk_period_ns=10.0, + folding_config_file="folding_config.json", + fpga_part="xczu3eg-sbva484-1-e", + shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, + stitched_ip_gen_dcp=False, + generate_outputs=[ + build_cfg.DataflowOutputType.STITCHED_IP, + ], + verify_steps=[ + build_cfg.VerificationStepType.TIDY_UP_PYTHON, + build_cfg.VerificationStepType.STREAMLINED_PYTHON, + build_cfg.VerificationStepType.FOLDED_HLS_CPPSIM, + build_cfg.VerificationStepType.STITCHED_IP_RTLSIM, + ], + save_intermediate_models=True, +) +model_file = "model.onnx" +build.build_dataflow_cfg(model_file, cfg) diff --git a/tutorials/fpga_flow/expected_output.npy b/tutorials/fpga_flow/expected_output.npy new file mode 100644 index 0000000000000000000000000000000000000000..a8d09384633791b7e3760dc8a2d1ba88a05d526d Binary files /dev/null and b/tutorials/fpga_flow/expected_output.npy differ diff --git a/tutorials/fpga_flow/folding_config.json b/tutorials/fpga_flow/folding_config.json new file mode 100644 index 0000000000000000000000000000000000000000..642200d02b39cf0d5572b3629cf071f29eba20f4 --- /dev/null +++ b/tutorials/fpga_flow/folding_config.json @@ -0,0 +1,30 @@ +{ + "Defaults": {}, + "Thresholding_Batch_0": { + "PE": 49, + "ram_style": "block" + }, + "MatrixVectorActivation_0": { + "PE": 16, + "SIMD": 49, + "ram_style": "block" + }, + "MatrixVectorActivation_1": { + "PE": 8, + "SIMD": 8, + "ram_style": "auto" + }, + "MatrixVectorActivation_2": { + "PE": 8, + "SIMD": 8, + "ram_style": "auto" + }, + "MatrixVectorActivation_3": { + "PE": 10, + "SIMD": 8, + "ram_style": "distributed" + }, + "LabelSelect_Batch_0": { + "PE": 1 + } +} diff --git a/tutorials/fpga_flow/gen_tb_data.py b/tutorials/fpga_flow/gen_tb_data.py new file mode 100755 index 0000000000000000000000000000000000000000..a525d92bfca8f23a74dd6d81af86c8f18dac7fe2 --- /dev/null +++ b/tutorials/fpga_flow/gen_tb_data.py @@ -0,0 +1,56 @@ +#!/usr/bin/python3 +# Copyright (c) 2022 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import sys +from keras.datasets import mnist + +(train_x, train_y), (test_x, test_y) = mnist.load_data() +print("Loaded MNIST test data successfully") +# print('X_test: ' + str(test_x.shape)) + +if len(sys.argv) != 2: + print("Expected: gen_tb_data.py <path_to_hex_file>") + sys.exit(-1) + +file_name = sys.argv[1] + +with open(file_name, "w") as tb_data: + for i in range(20): + for j in range(28): + for k in range(27, -1, -1): + tb_data.write("{:02X}".format(test_x[i][j][k])) + tb_data.write("\n") + tb_data.write( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffff{:02X}\n".format( + test_y[i] + ) + ) + +print("Testbench data generated at " + file_name) diff --git a/tutorials/fpga_flow/input.npy b/tutorials/fpga_flow/input.npy new file mode 100644 index 0000000000000000000000000000000000000000..edd24de05a33a15ebc330cdab31f3d77d2c47196 Binary files /dev/null and b/tutorials/fpga_flow/input.npy differ diff --git a/tutorials/fpga_flow/model.onnx b/tutorials/fpga_flow/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c513967dce32d0d4e48556f9c99a80f5ae881454 Binary files /dev/null and b/tutorials/fpga_flow/model.onnx differ diff --git a/tutorials/fpga_flow/numeral.png b/tutorials/fpga_flow/numeral.png new file mode 100644 index 0000000000000000000000000000000000000000..fbc51d21337a427d71a2e085ae7bf8b6b7986dac Binary files /dev/null and b/tutorials/fpga_flow/numeral.png differ diff --git a/tutorials/fpga_flow/templates/finn_testbench.template.sv b/tutorials/fpga_flow/templates/finn_testbench.template.sv new file mode 100644 index 0000000000000000000000000000000000000000..e9476249f3e12f7c52925e5d781a40fdafdf0739 --- /dev/null +++ b/tutorials/fpga_flow/templates/finn_testbench.template.sv @@ -0,0 +1,173 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of AMD nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// `timescale 1 ns / 1 ps +`define INPUT_HEXFILE "input.dat" +`define EXPECTED_OUTPUT_HEXFILE "expected_output.dat" + +// general FINN testbench parameters +parameter N_SAMPLES = @N_SAMPLES@; +parameter IN_STREAM_BITWIDTH = @IN_STREAM_BITWIDTH@; +parameter OUT_STREAM_BITWIDTH = @OUT_STREAM_BITWIDTH@; +parameter IN_BEATS_PER_SAMPLE = @IN_BEATS_PER_SAMPLE@; +parameter OUT_BEATS_PER_SAMPLE = @OUT_BEATS_PER_SAMPLE@; +parameter TIMEOUT_CYCLES = @TIMEOUT_CYCLES@; + +parameter IN_SAMPLE_BITWIDTH = IN_STREAM_BITWIDTH * IN_BEATS_PER_SAMPLE; +parameter OUT_SAMPLE_BITWIDTH = OUT_STREAM_BITWIDTH * OUT_BEATS_PER_SAMPLE; + +module tb (); + + +logic [IN_STREAM_BITWIDTH-1:0] input_data [N_SAMPLES*IN_BEATS_PER_SAMPLE]; +logic [OUT_STREAM_BITWIDTH-1:0] exp_output_data [N_SAMPLES*OUT_BEATS_PER_SAMPLE]; +logic [IN_STREAM_BITWIDTH-1:0] current_input [IN_BEATS_PER_SAMPLE]; +logic [$clog2(N_SAMPLES*OUT_BEATS_PER_SAMPLE):0] rd_ptr=0; +logic [$clog2(N_SAMPLES*OUT_BEATS_PER_SAMPLE):0] wr_ptr=0; +int err_count=0; +int data_count=0; +int i,j; +logic [31:0] input_file_lines; +logic [31:0] exp_output_file_lines; + +logic ap_clk = 0; +logic ap_rst_n = 0; + +logic [OUT_STREAM_BITWIDTH-1:0] dout_tdata; +logic dout_tlast; +logic dout_tready; +logic dout_tvalid; + +logic [IN_STREAM_BITWIDTH-1:0] din_tdata; +logic din_tready; +logic din_tvalid; + + + +finn_design_wrapper finn_design_wrapper ( + .ap_clk (ap_clk ), + .ap_rst_n (ap_rst_n ), + // output stream + .m_axis_0_tdata (dout_tdata ), + .m_axis_0_tready (dout_tready ), + .m_axis_0_tvalid (dout_tvalid ), + // input stream + .s_axis_0_tdata (din_tdata ), + .s_axis_0_tready (din_tready ), + .s_axis_0_tvalid (din_tvalid ) +); + +always #5ns ap_clk = !ap_clk; + +initial begin + // read input hexfile + $readmemh(`INPUT_HEXFILE, input_data); + for (i=0; i<N_SAMPLES*IN_BEATS_PER_SAMPLE; i+=1) if (input_data[i][0] !== 1'bx) input_file_lines = i; + if (input_file_lines[0] === {1'bx}) begin + $display("ERROR: Unable to read dat file: %s",`INPUT_HEXFILE); + $finish; + end + // read expected output hexfile + $readmemh(`EXPECTED_OUTPUT_HEXFILE, exp_output_data); + for (i=0; i<N_SAMPLES*OUT_BEATS_PER_SAMPLE; i+=1) if (exp_output_data[i][0] !== 1'bx) exp_output_file_lines = i; + if (exp_output_file_lines[0] === {1'bx}) begin + $display("ERROR: Unable to read dat file: %s",`EXPECTED_OUTPUT_HEXFILE); + $finish; + end + + din_tvalid = 0; + din_tdata = 0; + dout_tready = 1; + + // perform reset + repeat (100) @(negedge ap_clk); + ap_rst_n = 1; + repeat (100) @(negedge ap_clk); + dout_tready = 1; + + repeat (10) @(negedge ap_clk); + @(negedge ap_clk); + @(negedge ap_clk); + + + // feed all inputs + for (j=0; j<N_SAMPLES; j+=1) begin + // get current input and expected output samples from batch data + for (i=0; i<IN_BEATS_PER_SAMPLE; i+=1) begin + current_input[i] = input_data[j*IN_BEATS_PER_SAMPLE+i]; + end + // put corresponding expected output into queue + // data is already in exp_output_data + for (i=0; i<OUT_BEATS_PER_SAMPLE; i+=1) begin + wr_ptr++; + end + // feed current input + for (i=0; i<IN_BEATS_PER_SAMPLE; i+=1) begin + din_tvalid = 1; + din_tdata = current_input[i]; + @(negedge ap_clk); + // TODO add timeout on input backpressure + while (~din_tready) @(negedge ap_clk); + din_tvalid = 0; + end + end + + din_tdata = 0; + din_tvalid = 0; + + repeat (TIMEOUT_CYCLES) @(negedge ap_clk); + din_tdata = 0; + if (wr_ptr != rd_ptr) begin + $display("ERR: End-sim check: rd_ptr %h != %h wr_ptr",rd_ptr, wr_ptr); + err_count++; + end + + $display("\n************************************************************ "); + $display(" SIM COMPLETE"); + $display(" Validated %0d data points ",data_count); + $display(" Total error count: ====> %0d <====\n",err_count); + $finish; +end + + +// Check the result at each valid output from the model +always @(posedge ap_clk) begin + if (dout_tvalid && ap_rst_n) begin + // TODO implement output folding - current code assumes OUT_BEATS_PER_SAMPLE=1 + if (dout_tdata !== exp_output_data[rd_ptr]) begin + $display("ERR: Data mismatch %h != %h ",dout_tdata, exp_output_data[rd_ptr]); + err_count++; + end else begin + $display("CHK: Data match %h == %h --> %0d",dout_tdata, exp_output_data[rd_ptr], data_count); + end + rd_ptr++; + data_count++; + end +end + +endmodule diff --git a/tutorials/fpga_flow/templates/make_sim_proj.template.tcl b/tutorials/fpga_flow/templates/make_sim_proj.template.tcl new file mode 100644 index 0000000000000000000000000000000000000000..e39031023e9a065551180b70787cdca720049898 --- /dev/null +++ b/tutorials/fpga_flow/templates/make_sim_proj.template.tcl @@ -0,0 +1,39 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# open the stitched IP project to get all the sources +open_project @STITCHED_IP_ROOT@/finn_vivado_stitch_proj.xpr + +read_verilog -quiet [glob -nocomplain *.v] +read_verilog -quiet -sv [glob -nocomplain *.sv] + +save_project_as sim -force +add_files -fileset sim_1 [glob *.dat] +set_property top tb [get_fileset sim_1] +launch_simulation -simset sim_1 -mode behavioral +run all diff --git a/tutorials/fpga_flow/testbench.sv b/tutorials/fpga_flow/testbench.sv new file mode 100644 index 0000000000000000000000000000000000000000..54e7178f4e330698381ee6a4ac63b2d5dc4069ca --- /dev/null +++ b/tutorials/fpga_flow/testbench.sv @@ -0,0 +1,167 @@ +// Copyright (c) 2022 Xilinx, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of Xilinx nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +`timescale 1 ns / 1 ps +`define HEXFILE "data.hex" + +parameter MAX_FL =4000; + + +module tb (); + +logic [28*8-1:0] data [MAX_FL]; +logic [28*8-1:0] data_row; +logic [28*28*8-1:0] img_data; +logic [7:0] fifo [16]; +logic [3:0] rd_ptr=0; +logic [3:0] wr_ptr=0; +int err_count=0; +int data_count=0; +int i,j; +logic [31:0] file_lines; + +logic ap_clk = 0; +logic ap_rst_n = 0; + +logic [7:0]dout_tdata; +logic dout_tlast; +logic dout_tready; +logic dout_tvalid; + +logic [392-1:0]din_tdata; +logic din_tready; +logic din_tvalid; + + + +finn_design_wrapper finn_design_wrapper ( + .ap_clk (ap_clk ),//i + .ap_rst_n (ap_rst_n ),//i + + .m_axis_0_tdata (dout_tdata ),//o + .m_axis_0_tready (dout_tready ),//i + .m_axis_0_tvalid (dout_tvalid ),//o + + .s_axis_0_tdata (din_tdata ),//i + .s_axis_0_tready (din_tready ),//o + .s_axis_0_tvalid (din_tvalid ) //i +); + +initial begin: AP_CLK + forever begin + ap_clk = #5 ~ap_clk; + end +end + + +initial begin + // Hex file formated for Upper N bits as input data, and lower N bits as expected output data + + $readmemh(`HEXFILE, data); + // Determine how large file actuall is + for (i=0; i<MAX_FL; i+=1) if (data[i][0] !== 1'bx) file_lines = i; + if (file_lines[0] === {1'bx}) begin + $display("ERROR: Unable to read hex file: %s",`HEXFILE); + $finish; + end + + + din_tvalid = 0; + din_tdata = 0; + dout_tready = 1; + + repeat (100) @(negedge ap_clk); + ap_rst_n = 1; + repeat (100) @(negedge ap_clk); + dout_tready = 1; + + repeat (10) @(negedge ap_clk); + //while (~din_tready) @(negedge ap_clk); + @(negedge ap_clk); + @(negedge ap_clk); + + // The hex file is formated in 29 row blocks + // The first 28 rows are the image data + // The 29th row is the ground truth expected result stored in the lowest byte. + // Note that each row's byte-order is saved such that the high-byte is in the upper + // most bits, and the first byte in the lower-most bits. + for (j=0; j<=file_lines; j+=1) begin + if ((j%29) < 28) begin + img_data[(j%29)*28*8+:28*8] = data[j]; + end else begin + // Grab the verifcation result on the 29th row + data_row = data[j]; + //$display("wr_ptr %h, data:%h, j=%d",wr_ptr,data[j],j); + fifo[wr_ptr] = data_row[7:0]; + wr_ptr++; + + // Due to folding factors, the 784 bytes of each image gets fed 49-bytes at a time + // over 16 cycles + for (i=0; i<16; i+=1) begin + din_tvalid = 1; + din_tdata = img_data[392*i+:392]; + @(negedge ap_clk); + while (~din_tready) @(negedge ap_clk); + din_tvalid = 0; + //repeat (200) @(negedge ap_clk); + end + end + end + din_tdata = 0; + din_tvalid = 0; + + repeat (1000) @(negedge ap_clk); + din_tdata = 0; + if (wr_ptr != rd_ptr) begin + $display("ERR: End-sim check: rd_ptr %h != %h wr_ptr",rd_ptr, wr_ptr); + err_count++; + end + + $display("\n************************************************************ "); + $display(" SIM COMPLETE"); + $display(" Validated %0d data points ",data_count); + $display(" Total error count: ====> %0d <====\n",err_count); + $finish; +end + + +// Check the result at each valid output from the model +always @(posedge ap_clk) begin + if (dout_tvalid && ap_rst_n) begin + if (dout_tdata !== fifo[rd_ptr]) begin + $display("ERR: Data mismatch %h != %h ",dout_tdata, fifo[rd_ptr]); + err_count++; + end else begin + $display("CHK: Data match %h == %h --> %0d",dout_tdata, fifo[rd_ptr], data_count); + end + rd_ptr++; + data_count++; + end +end + +endmodule