diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml index cd59a629405c748187cdf478c0bdb0694c58c79f..924fbd24a174df49af4b3e259ad57d0a7907d42b 100644 --- a/.github/workflows/quicktest-dev-pr.yml +++ b/.github/workflows/quicktest-dev-pr.yml @@ -18,4 +18,6 @@ jobs: uses: actions/checkout@v2 - name: DockerRunQuicktest + env: + NUM_DEFAULT_WORKERS: 4 run: sh run-docker.sh quicktest diff --git a/.gitignore b/.gitignore index f838c1695130d232ac6a2b888aed0cea31aafaa7..8b3166a44070a4575aac86c445c4504b594cda08 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,6 @@ MANIFEST # Jenkins cfg dir /docker/jenkins_home + +# SSH key dir mounted into Docker +/ssh_keys/ diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci index 0d610ec66a5f433d156f4e8da976767ce6458aef..d06ff8521555ccd6d09383cab039850f1565fc61 100644 --- a/docker/Dockerfile.finn_ci +++ b/docker/Dockerfile.finn_ci @@ -37,7 +37,7 @@ WORKDIR /workspace RUN apt-get update RUN apt-get -y upgrade RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev -RUN apt install verilator +RUN apt-get install -y verilator zsh RUN apt-get -y install sshpass RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config @@ -47,11 +47,13 @@ RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas # CNPY RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy # FINN hlslib -RUN git clone https://github.com/maltanar/finn-hlslib.git /workspace/finn-hlslib +RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib # PyVerilator RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator # PYNQ-HelloWorld RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-HelloWorld +# oh-my-xilinx +RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx # checkout desired FINN branch for testing RUN git clone --branch $FINN_CI_BRANCH https://github.com/Xilinx/finn /workspace/finn @@ -59,11 +61,15 @@ RUN git clone --branch $FINN_CI_BRANCH https://github.com/Xilinx/finn /workspace RUN pip install -r /workspace/finn/requirements.txt RUN apt update; apt install nano RUN pip install pytest-dependency +RUN pip install pytest-xdist +RUN pip install pytest-parallel ENV PYTHONPATH "${PYTHONPATH}:/workspace/finn/src" ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator" ENV PYNQSHELL_PATH "/workspace/PYNQ-HelloWorld/boards" ENV VIVADO_IP_CACHE "$BUILD_PATH/vivado_ip_cache" +ENV PATH "${PATH}:/workspace/oh-my-xilinx" +ENV OHMYXILINX "/workspace/oh-my-xilinx" # colorful terminal output RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >> /root/.bashrc diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev index 1c2cb19d14137b866b55417522fdebb8e0d7ad90..f8919d7498e0e8ef08a52d1da0782988b56d6df4 100644 --- a/docker/Dockerfile.finn_dev +++ b/docker/Dockerfile.finn_dev @@ -37,16 +37,12 @@ ARG PASSWD ARG JUPYTER_PORT ARG NETRON_PORT -EXPOSE $JUPYTER_PORT -EXPOSE $NETRON_PORT - WORKDIR /workspace RUN apt-get update RUN apt-get -y upgrade RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev -RUN apt-get install verilator -RUN apt-get install nano +RUN apt-get install -y verilator nano zsh rsync RUN apt-get -y install sshpass RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config @@ -59,6 +55,8 @@ RUN pip install matplotlib RUN pip install pytest-dependency RUN pip install sphinx RUN pip install sphinx_rtd_theme +RUN pip install pytest-xdist +RUN pip install pytest-parallel # switch user RUN groupadd -g $GID $GNAME @@ -76,17 +74,21 @@ RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas # CNPY RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy # FINN hlslib -RUN git clone https://github.com/maltanar/finn-hlslib.git /workspace/finn-hlslib +RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib # PyVerilator RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator # PYNQ-HelloWorld RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-HelloWorld +# oh-my-xilinx +RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx # for this developer-oriented Docker container we assume the FINN repo is cloned and mounted from the host # at /workspace/finn -- see run-docker.sh for an example of how to do this. ENV PYTHONPATH "${PYTHONPATH}:/workspace/finn/src" ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator" ENV PYNQSHELL_PATH "/workspace/PYNQ-HelloWorld/boards" +ENV PATH "${PATH}:/workspace/oh-my-xilinx" +ENV OHMYXILINX "/workspace/oh-my-xilinx" WORKDIR /home/$UNAME/finn RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >> /home/$UNAME/.bashrc @@ -100,5 +102,8 @@ RUN chmod 755 /usr/local/bin/finn_entrypoint.sh RUN chmod 755 /usr/local/bin/quicktest.sh USER $UNAME +EXPOSE $JUPYTER_PORT +EXPOSE $NETRON_PORT + ENTRYPOINT ["finn_entrypoint.sh"] CMD ["bash"] diff --git a/docker/Jenkinsfile b/docker/Jenkinsfile index 80be261fb3da057186259598f84d915176577a5d..2215bc79cc7b2c20036d882fdc654fbe8721cab6 100644 --- a/docker/Jenkinsfile +++ b/docker/Jenkinsfile @@ -9,7 +9,12 @@ pipeline { string(name: 'PYNQ_PASSWORD', defaultValue: 'xilinx', description: 'PYNQ board password') string(name: 'PYNQ_TARGET_DIR', defaultValue: '/home/xilinx/finn', description: 'PYNQ board target deployment directory') string(name: 'NUM_DEFAULT_WORKERS', defaultValue: '1', description: 'Number of cores for parallel transformations') - string(name: 'DOCKER_CMD', defaultValue: """python setup.py test""", description: 'Command to run') + // main test: everything except rtlsim and end2end tests, parallel run with xdist, no parallel transformations to save on memory + string(name: 'DOCKER_CMD_MAIN', defaultValue: """python setup.py test --addopts "-k 'not (rtlsim or end2end)' --dist=loadfile -n auto" """, description: 'Main test command') + // rtlsim tests: parallel run with pytest-parallel, no parallel transformations to save on memory + string(name: 'DOCKER_CMD_RTLSIM', defaultValue: """python setup.py test --addopts "-k rtlsim --workers auto" """, description: 'rtlsim test command') + // end2end tests: no parallel testing, use NUM_DEFAULT_WORKERS for parallel transformations + string(name: 'DOCKER_CMD_END2END', defaultValue: """python setup.py test --addopts "-k end2end" """, description: 'end2end test command') } environment { DOCKER_TAG='finn_ci:$BUILD_ID' @@ -32,10 +37,49 @@ pipeline { """ } } - stage('Test') { + stage('test-main') { steps { + catchError { sh """ - docker run --name $DOCKER_INST_NAME --init \ + docker run --init \ + --hostname $DOCKER_INST_NAME \ + -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ + -e NUM_DEFAULT_WORKERS=1 \ + -e FINN_INST_NAME=$DOCKER_INST_NAME \ + -e VIVADO_PATH=${params.VIVADO_PATH} \ + -e PYNQ_BOARD=${params.PYNQ_BOARD} \ + -e PYNQ_IP=${params.PYNQ_IP} \ + -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ + -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ + -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ + $DOCKER_TAG ${params.DOCKER_CMD_MAIN} + """} + } + } + stage('test-rtlsim') { + steps { + catchError { + sh """ + docker run --init \ + --hostname $DOCKER_INST_NAME \ + -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ + -e NUM_DEFAULT_WORKERS=1 \ + -e FINN_INST_NAME=$DOCKER_INST_NAME \ + -e VIVADO_PATH=${params.VIVADO_PATH} \ + -e PYNQ_BOARD=${params.PYNQ_BOARD} \ + -e PYNQ_IP=${params.PYNQ_IP} \ + -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ + -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ + -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ + $DOCKER_TAG ${params.DOCKER_CMD_RTLSIM} + """} + } + } + stage('test-end2end') { + steps { + catchError { + sh """ + docker run --init \ --hostname $DOCKER_INST_NAME \ -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ -e NUM_DEFAULT_WORKERS=${params.NUM_DEFAULT_WORKERS} \ @@ -46,8 +90,8 @@ pipeline { -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ - $DOCKER_TAG ${params.DOCKER_CMD} - """ + $DOCKER_TAG ${params.DOCKER_CMD_END2END} + """ } } } } diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 81a80155d0b39a247211d5d0c736f33eb5234595..b312737c317517ca0ab19c74cf22284b5977b661 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -13,11 +13,12 @@ gecho () { # checkout the correct dependency repo commits # the repos themselves are cloned in the Dockerfile -BREVITAS_COMMIT=7696326e5f279cacffd5b6ac8d9e8d81deec3978 +BREVITAS_COMMIT=f9a27226d4acf1661dd38bc449f71f89e0983cce CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 -HLSLIB_COMMIT=6b88db826bb023937506913a23d964775a7606af -PYVERILATOR_COMMIT=1d89cb0d4e0c97469cc6352c611f876ec13edfa6 +HLSLIB_COMMIT=8f9f2018762f654f196b666838aeaf6fc730ad9a +PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d +OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada gecho "Setting up known-good commit versions for FINN dependencies" @@ -42,6 +43,10 @@ git -C /workspace/pyverilator checkout $PYVERILATOR_COMMIT --quiet gecho "PYNQ shell @ $PYNQSHELL_COMMIT" git -C /workspace/PYNQ-HelloWorld pull --quiet git -C /workspace/PYNQ-HelloWorld checkout $PYNQSHELL_COMMIT --quiet +# oh-my-xilinx +gecho "oh-my-xilinx @ $OMX_COMMIT" +git -C /workspace/oh-my-xilinx pull --quiet +git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT --quiet # source Vivado env.vars source $VIVADO_PATH/settings64.sh diff --git a/docker/quicktest.sh b/docker/quicktest.sh index 4f6a2d3e230de9fcbb947d794722294880a7730d..49b7886836ac4e45dad856dfcd49223276bd831a 100755 --- a/docker/quicktest.sh +++ b/docker/quicktest.sh @@ -1,4 +1,22 @@ #!/bin/bash +: ${PYTEST_PARALLEL=auto} + cd $FINN_ROOT -python setup.py test --addopts "-m 'not (vivado or slow)'" + +# check if command line argument is empty or not present +if [ -z $1 ]; then + echo "Running quicktest: not (vivado or slow) with pytest-xdist" + python setup.py test --addopts "-m 'not (vivado or slow)' --dist=loadfile -n $PYTEST_PARALLEL" +elif [ $1 = "main" ]; then + echo "Running main test suite: not (rtlsim or end2end) with pytest-xdist" + python setup.py test --addopts "-k not (rtlsim or end2end) --dist=loadfile -n $PYTEST_PARALLEL" +elif [ $1 = "rtlsim" ]; then + echo "Running rtlsim test suite with pytest-parallel" + python setup.py test --addopts "-k rtlsim --workers $PYTEST_PARALLEL" +elif [ $1 = "end2end" ]; then + echo "Running end2end test suite with no parallelism" + python setup.py test --addopts "-k end2end" +else + echo "Unrecognized argument to quicktest.sh" +fi diff --git a/docs/finn-sheduling-and-folding.pptx b/docs/finn-sheduling-and-folding.pptx new file mode 100644 index 0000000000000000000000000000000000000000..30bbe4d55b1cda9df25a791227983dc7cb750e58 Binary files /dev/null and b/docs/finn-sheduling-and-folding.pptx differ diff --git a/docs/finn/example_networks.rst b/docs/finn/example_networks.rst index 9f221871f09bf655db9d81988d6fa83e53473634..86bb2bd11fd805a23a3bdf6da8a8ed686259ecc1 100644 --- a/docs/finn/example_networks.rst +++ b/docs/finn/example_networks.rst @@ -20,17 +20,17 @@ version, this is indicated by an x mark in the table. +-----------------------+------------+----------+----------+----------+----------+----------+----------+ | Export/Import | x | x | x | x | x | x | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Streamlining | x | x | x | x | x | | | +| Streamlining | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Convert to HLS layers | x | x | x | x | x | | | +| Convert to HLS layers | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Stitched IP | x | x | x | x | x | | | +| Stitched IP | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Hardware test | x | x | x | | x | | | +| Hardware test | x | x | x | | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| cppsim | x | x | x | x | x | | | +| cppsim | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| rtlsim node-by-node | x | x | x | x | x | | | +| rtlsim node-by-node | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| rtlsim stitched IP | x | x | x | x | x | | | +| rtlsim stitched IP | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst index 95594bb67a2be3a4c3fbba488c75a704f623c136..8b20cebcfc49d14d0afbb26edd678d65425476d3 100644 --- a/docs/finn/getting_started.rst +++ b/docs/finn/getting_started.rst @@ -18,6 +18,7 @@ Requirements * A working Vivado 2019.1 installation * A `VIVADO_PATH` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located) * (optional) A PYNQ board with a network connection + * the ``bitstring`` package must be installed on the PYNQ: ``sudo pip3 install bitstring`` Running FINN in Docker ====================== @@ -30,6 +31,7 @@ Getting an interactive shell for development or experimentation sh run_docker.sh Simply running sh run-docker.sh without any additional arguments will clone the dependency repos, create a Docker container and give you a terminal with you can use for development for experimentation. +If you want a new terminal on an already-running container, you can do this with `docker exec -it finn_dev_<username> bash`. .. warning:: The Docker container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the /workspace/finn folder (which is mounted from the host computer) or otherwise backed up. @@ -71,8 +73,12 @@ from the FINN root directory* as follows: python setup.py test --addopts "-k test_end2end_tfc_w1a2" -Please see the pytest documentation for more about picking tests by marks or -by name. +Finally, if you want to run tests in parallel (e.g. to take advantage of a multi-core CPU) +you can use: + * pytest-parallel for any rtlsim tests, e.g. `python setup.py test --addopts "-k rtlsim --workers auto"` + * pytest-xdist for anything else, make sure to add `--dist=loadfile` if you have tests in the same file that have dependencies on each other e.g. `python setup.py test --addopts "-k mytest -n auto --dist=loadfile"` + +Please see the pytest documentation for more about picking tests by marks or by name. Environment variables ********************** diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst index 7a4bc687eeb827320991f7d3f1ef8cc35e97f3da..dee62f09a9253380e05300dac8fa34915c20dab5 100644 --- a/docs/finn/internals.rst +++ b/docs/finn/internals.rst @@ -16,6 +16,10 @@ Custom Quantization Annotations ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we are interested in smaller integers down to ternary and bipolar. To make this work, FINN uses the quantization_annotation field in ONNX to annotate tensors with their FINN DataType (:py:mod:`finn.core.datatype.DataType`) information. However, all tensors are expected to use single-precision floating point (float32) storage in FINN. This means we store even a 1-bit value as floating point for the purposes of representation. The FINN compiler flow is responsible for eventually producing a packed representation for the target hardware, where the 1-bit is actually stored as 1-bit. +Note that FINN uses floating point tensors as a carrier data type to represent integers. Floating point arithmetic can introduce rounding errors, e.g. (int_num * float_scale) / float_scale is not always equal to int_num. +When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`finn.util.basic.sanitize_quant_values`) for more information. +This behavior can be disabled (not recommended!) by setting the environment variable SANITIZE_QUANT_TENSORS=0. + Custom Operations/Nodes ======================= diff --git a/docs/finn/verification.rst b/docs/finn/verification.rst index 391c6f999312839daca0d4161336c7c0ae822f89..c52c0840aa40566d930164490b1fd249d7c07757 100644 --- a/docs/finn/verification.rst +++ b/docs/finn/verification.rst @@ -28,4 +28,15 @@ This simulation can be used for a model containing several HLS custom operations Emulation using PyVerilator =========================== -The emulation using PyVerilator can be used when IP blocks were generated, either node by node or of a whole design. For that purpose PyVerilator gets the generated verilog files. +The emulation using PyVerilator can be used when IP blocks were generated, either node by node or of a whole (IP-stitched) design. For that purpose PyVerilator gets the generated verilog files. + +For debugging purposes, it's possible to generate .vcd trace files that show the value of external & internal signals as the emuation is running. To enable this: + - for node-by-node rtlsim, set the `rtlsim_trace` attribute of each node of interest to either a file name for the vcd or `default` to use the node name as the filename. + - for IP-stitched rtlsim, set the `rtlsim_trace` metadata_prop for the graph as per above. + +To control the tracing depth in the module hierarchy, use the `RTLSIM_TRACE_DEPTH` environment variable (default is 1): + - level 1 shows top-level input/output streams + - level 2 shows per-layer input/output streams + - level 3 shows per full-layer I/O including FIFO count signals + +Note that deeper tracing will take longer to execute and may produce very large .vcd files. diff --git a/notebooks/end2end_example/tfc_end2end_example.ipynb b/notebooks/end2end_example/tfc_end2end_example.ipynb index d573061487de204084e0d3242da8ad1b791f44d8..c84efc964b1f57b7ed385521fc5214fdc2396590 100644 --- a/notebooks/end2end_example/tfc_end2end_example.ipynb +++ b/notebooks/end2end_example/tfc_end2end_example.ipynb @@ -132,7 +132,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f8890385828>" + "<IPython.lib.display.IFrame at 0x7f7cc4290940>" ] }, "execution_count": 3, @@ -293,7 +293,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe1ad0639e8>" + "<IPython.lib.display.IFrame at 0x7f7c6c567f28>" ] }, "execution_count": 6, @@ -333,9 +333,10 @@ " ConvertDivToMul(),\n", " BatchNormToAffine(),\n", " ConvertSignToThres(),\n", + " AbsorbSignBiasIntoMultiThreshold(),\n", " MoveAddPastMul(),\n", " MoveScalarAddPastMatMul(),\n", - " MoveScalarAddPastConv(),\n", + " MoveAddPastConv(),\n", " MoveScalarMulPastMatMul(),\n", " MoveScalarMulPastConv(),\n", " MoveAddPastMul(),\n", @@ -350,6 +351,7 @@ " ]\n", " for trn in streamline_transformations:\n", " model = model.transform(trn)\n", + " model = model.transform(RemoveIdentityOps())\n", " model = model.transform(GiveUniqueNodeNames())\n", " model = model.transform(GiveReadableTensorNames())\n", " model = model.transform(InferDataTypes())\n", @@ -400,7 +402,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe1346e4ef0>" + "<IPython.lib.display.IFrame at 0x7f7c6c0bf898>" ] }, "execution_count": 8, @@ -454,7 +456,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe1346f7780>" + "<IPython.lib.display.IFrame at 0x7f7c6c0e5c18>" ] }, "execution_count": 9, diff --git a/requirements.txt b/requirements.txt index 2427f9490a3dd5a7ffe0e0a8cf2ad19af0934cdf..b15d86ed89f7b0e76b772ce42aba6481937310b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,6 @@ pre-commit pyverilator scipy sphinx +toposort +vcdvcd wget diff --git a/run-docker.sh b/run-docker.sh index 186efc322a8f437be0371b5a142a9dd524d1abf3..00ca8f86985a78d8f2af099c51dcd4b80cd2e974 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -65,6 +65,11 @@ DOCKER_INST_NAME="finn_dev_${DOCKER_UNAME}" # ensure Docker tag and inst. name are all lowercase DOCKER_TAG=$(echo "$DOCKER_TAG" | tr '[:upper:]' '[:lower:]') DOCKER_INST_NAME=$(echo "$DOCKER_INST_NAME" | tr '[:upper:]' '[:lower:]') +# Absolute path to this script, e.g. /home/user/bin/foo.sh +SCRIPT=$(readlink -f "$0") +# Absolute path this script is in, thus /home/user/bin +SCRIPTPATH=$(dirname "$SCRIPT") + # the settings below will be taken from environment variables if available, # otherwise the defaults below will be used : ${JUPYTER_PORT=8888} @@ -74,11 +79,7 @@ DOCKER_INST_NAME=$(echo "$DOCKER_INST_NAME" | tr '[:upper:]' '[:lower:]') : ${PYNQ_BOARD="Pynq-Z1"} : ${PYNQ_TARGET_DIR="/home/xilinx/$DOCKER_INST_NAME"} : ${NUM_DEFAULT_WORKERS=1} - -# Absolute path to this script, e.g. /home/user/bin/foo.sh -SCRIPT=$(readlink -f "$0") -# Absolute path this script is in, thus /home/user/bin -SCRIPTPATH=$(dirname "$SCRIPT") +: ${FINN_SSH_KEY_DIR="$SCRIPTPATH/ssh_keys"} BUILD_LOCAL=/tmp/$DOCKER_INST_NAME VIVADO_HLS_LOCAL=$VIVADO_PATH @@ -87,6 +88,7 @@ VIVADO_IP_CACHE=$BUILD_LOCAL/vivado_ip_cache # ensure build dir exists locally mkdir -p $BUILD_LOCAL mkdir -p $VIVADO_IP_CACHE +mkdir -p $FINN_SSH_KEY_DIR gecho "Instance is named as $DOCKER_INST_NAME" gecho "Mounting $BUILD_LOCAL into $BUILD_LOCAL" @@ -96,7 +98,7 @@ gecho "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT" gecho "Vivado IP cache dir is at $VIVADO_IP_CACHE" gecho "Using default PYNQ board $PYNQ_BOARD" -DOCKER_INTERACTIVE = "" +DOCKER_INTERACTIVE="" if [ "$1" = "test" ]; then gecho "Running test suite (all tests)" @@ -133,6 +135,7 @@ docker run -t --rm --name $DOCKER_INST_NAME $DOCKER_INTERACTIVE --init \ -v $SCRIPTPATH:/workspace/finn \ -v $BUILD_LOCAL:$BUILD_LOCAL \ -v $VIVADO_PATH:$VIVADO_PATH \ +-v $FINN_SSH_KEY_DIR:/home/$DOCKER_UNAME/.ssh \ -e VIVADO_PATH=$VIVADO_PATH \ -e FINN_INST_NAME=$DOCKER_INST_NAME \ -e FINN_ROOT="/workspace/finn" \ diff --git a/src/finn/core/modelwrapper.py b/src/finn/core/modelwrapper.py index 2896b09e0f54d6d0492c5330ec5da4110e257d30..646add188c5d475cf37ccd33cf24d29d61754ae1 100644 --- a/src/finn/core/modelwrapper.py +++ b/src/finn/core/modelwrapper.py @@ -259,11 +259,10 @@ class ModelWrapper: def find_producer(self, tensor_name): """Finds and returns the node that produces the tensor with given name.""" - ret = None for x in self._model_proto.graph.node: if tensor_name in x.output: - ret = x - return ret + return x + return None def find_upstream(self, tensor_name, finder_fxn): """Follow the producer chain upstream, calling finder_fxn on each upstream diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py index 44787e1d26049e6075e2222316b45ab3898acbc7..7c3123cd5eb29a54dc5cbfb912225ad3fdb0f219 100644 --- a/src/finn/core/onnx_exec.py +++ b/src/finn/core/onnx_exec.py @@ -39,6 +39,7 @@ from finn.core.remote_exec import remote_exec from finn.core.rtlsim_exec import rtlsim_exec from finn.custom_op.registry import getCustomOp import finn.analysis.topology as ta +from finn.util.basic import sanitize_quant_values, get_sanitize_quant_tensors def execute_node(node, context, graph): @@ -102,15 +103,14 @@ def execute_node(node, context, graph): raise Exception( """Output shapes disagree after node execution: found %s vs expected %s""" - % ( - str(output_list[list_ind].shape.shape), - str(context[outp].shape), - ) + % (str(output_list[list_ind].shape), str(context[outp].shape)) ) context[outp] = output_list[list_ind] -def execute_onnx(model, input_dict, return_full_exec_context=False): +def execute_onnx( + model, input_dict, return_full_exec_context=False, start_node=None, end_node=None +): """Executes given ONNX ModelWrapper with given named inputs. If return_full_exec_context is False, a dict of named outputs is returned @@ -118,7 +118,12 @@ def execute_onnx(model, input_dict, return_full_exec_context=False): If return return_full_exec_context is True, the full set of tensors used by the execution (including inputs, weights, activations and final outputs) - will be returned as a dict.""" + will be returned as a dict. + + When start_node and end_node are set to None, the whole graph is executed. + If they are set to particular ONNX nodes, only the subgraph between (and + including) those nodes is executed. + """ if not model.check_all_tensor_shapes_specified(): raise Exception("Found unspecified tensor shapes, try infer_shapes") @@ -161,8 +166,28 @@ def execute_onnx(model, input_dict, return_full_exec_context=False): # execute the model node by node # we can simply walk down the list since the ONNX spec guarantees that it is # topologically sorted - for node in graph.node: + subgraph = [] + if start_node is None: + start_node = model.graph.node[0] + if end_node is None: + end_node = model.graph.node[-1] + # select the nodes between specified start/end nodes + start_ind = model.get_node_index(start_node) + end_ind = model.get_node_index(end_node) + 1 + assert end_ind >= start_ind, "Start/end nodes must define valid subgraph" + subgraph = graph.node[start_ind:end_ind] + for node in subgraph: + if get_sanitize_quant_tensors() != 0: + # round input values to match quantization annotation + execution_context = sanitize_quant_values( + model, node.input, execution_context + ) execute_node(node, execution_context, graph) + if get_sanitize_quant_tensors() != 0: + # round output values to quantization annotation + execution_context = sanitize_quant_values( + model, node.output, execution_context + ) elif model_exec_mode == "remote_pynq": # use remote exec metadata built into model to execute on a remote PYNQ remote_exec(model, execution_context) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index e5e6d29bd8d8ed23f6a4958856ed1ddea3617175..1e1bee3aa7435d5cab6cbf5ea23dd37dcdfa4380 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -66,6 +66,11 @@ def rtlsim_exec(model, execution_context): i_stream_w = first_node.get_instream_width() # convert input into time multiplexed shape i_folded_shape = first_node.get_folded_input_shape() + batchsize = i_tensor.shape[0] + # override batch size for input + i_folded_shape = list(i_folded_shape) + i_folded_shape[0] = batchsize + i_folded_shape = tuple(i_folded_shape) # TODO any other layout transformations need to happen here! i_tensor = i_tensor.reshape(i_folded_shape) # extract output shape @@ -74,21 +79,27 @@ def rtlsim_exec(model, execution_context): o_dt = model.get_tensor_datatype(o_name) last_node = getCustomOp(model.find_producer(o_name)) o_folded_shape = last_node.get_folded_output_shape() + # override batch size from actual input + o_shape = list(o_shape) + o_shape[0] = batchsize + o_shape = tuple(o_shape) + o_folded_shape = list(o_folded_shape) + o_folded_shape[0] = batchsize + o_folded_shape = tuple(o_folded_shape) o_stream_w = last_node.get_outstream_width() packedBits = o_stream_w targetBits = o_dt.bitwidth() # pack input packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w) num_out_values = last_node.get_number_output_values() + num_out_values *= batchsize # prepare pyverilator model rtlsim_so = model.get_metadata_prop("rtlsim_so") if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): sim = pyverilate_stitched_ip(model) model.set_metadata_prop("rtlsim_so", sim.lib._name) else: - sim = PyVerilator(rtlsim_so) - _reset_rtlsim(sim) - _toggle_clk(sim) + sim = PyVerilator(rtlsim_so, auto_eval=False) ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file) packed_output = ret[0] model.set_metadata_prop("sim_cycles", str(ret[1])) @@ -104,18 +115,22 @@ def _reset_rtlsim(sim): """Sets reset input in pyverilator to zero, toggles the clock and set it back to one""" sim.io.ap_rst_n_0 = 0 - sim.io.ap_clk_0 = 1 - sim.io.ap_clk_0 = 0 + _toggle_clk(sim) + _toggle_clk(sim) sim.io.ap_rst_n_0 = 1 + _toggle_clk(sim) + _toggle_clk(sim) def _toggle_clk(sim): """Toggles the clock input in pyverilator once.""" - sim.io.ap_clk_0 = 1 sim.io.ap_clk_0 = 0 + sim.eval() + sim.io.ap_clk_0 = 1 + sim.eval() -def _run_rtlsim(sim, inp, num_out_values, trace_file=None): +def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True): """Runs the pyverilator simulation by passing the input values to the simulation, toggle the clock and observing the execution time. Argument num_out_values contains the number of expected output values, so the simulation is closed after all @@ -140,6 +155,8 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None): if trace_file is not None: sim.start_vcd_trace(trace_file) + if reset: + _reset_rtlsim(sim) while not (output_observed): sim.io.in0_V_V_0_tvalid = 1 if len(inputs) > 0 else 0 @@ -148,8 +165,7 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None): inputs = inputs[1:] if sim.io.out_r_0_tvalid == 1 and sim.io.out_r_0_tready == 1: outputs = outputs + [sim.io.out_r_0_tdata] - sim.io.ap_clk_0 = 1 - sim.io.ap_clk_0 = 0 + _toggle_clk(sim) observation_count = observation_count + 1 no_change_count = no_change_count + 1 diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py index 8d3dabcf8af51327d5d951464c6d9b36e2f67497..4444e7584f843cd0edb016b520d01d71e659b904 100644 --- a/src/finn/core/throughput_test.py +++ b/src/finn/core/throughput_test.py @@ -28,6 +28,10 @@ import os import subprocess +import numpy as np + +from finn.util.basic import gen_finn_dt_tensor +from finn.core.rtlsim_exec import rtlsim_exec def throughput_test(model, batchsize=1000): @@ -88,3 +92,50 @@ def throughput_test(model, batchsize=1000): return res except FileNotFoundError: return None + + +def throughput_test_rtlsim(model, batchsize=100): + """Runs a throughput test for the given IP-stitched model. When combined + with tracing, useful to determine bottlenecks and required FIFO sizes.""" + + assert ( + model.get_metadata_prop("exec_mode") == "rtlsim" + ), """Top-level exec_mode + metadata_prop must be set to rtlsim""" + + # create random input + iname = model.graph.input[0].name + ishape = model.get_tensor_shape(iname) + ishape_batch = ishape + ishape_batch[0] = batchsize + idt = model.get_tensor_datatype(iname) + dummy_input = gen_finn_dt_tensor(idt, ishape_batch) + # compute input/output sizes + oname = model.graph.output[0].name + oshape = model.get_tensor_shape(oname) + oshape_batch = oshape + oshape_batch[0] = batchsize + odt = model.get_tensor_datatype(oname) + i_bytes = (np.prod(ishape_batch) * idt.bitwidth()) / 8 + o_bytes = (np.prod(oshape_batch) * odt.bitwidth()) / 8 + # make empty exec context and insert input + ctx = model.make_empty_exec_context() + ctx[iname] = dummy_input + # remove liveness threshold, launch rtlsim + os.environ["LIVENESS_THRESHOLD"] = "-1" + rtlsim_exec(model, ctx) + # extract metrics + cycles = int(model.get_metadata_prop("sim_cycles")) + clk_ns = float(model.get_metadata_prop("clk_ns")) + fclk_mhz = 1 / (clk_ns * 0.001) + runtime_s = (cycles * clk_ns) * (10 ** -9) + res = dict() + res["cycles"] = cycles + res["runtime[ms]"] = runtime_s * 1000 + res["throughput[images/s]"] = batchsize / runtime_s + res["DRAM_in_bandwidth[Mb/s]"] = i_bytes * 0.000001 / runtime_s + res["DRAM_out_bandwidth[Mb/s]"] = o_bytes * 0.000001 / runtime_s + res["fclk[mhz]"] = fclk_mhz + res["N"] = batchsize + + return res diff --git a/src/finn/custom_op/__init__.py b/src/finn/custom_op/__init__.py index ab6e03bee65b8bf5c4041dd8021b1a561e7673d2..4ae7b9ebffaab6ca6be04b8d73f647b2db22dc78 100644 --- a/src/finn/custom_op/__init__.py +++ b/src/finn/custom_op/__init__.py @@ -56,8 +56,15 @@ class CustomOp(ABC): ret = ret.decode("utf-8") return ret else: - # not set, return default value - return def_val + if req: + raise Exception( + """Required attribute %s unspecified in + a %s node""" + % (name, self.onnx_node.op_type) + ) + else: + # not set, return default value + return def_val except KeyError: raise AttributeError("Op has no such attribute: " + name) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 17a55e519ed0440f68e295aecaab179e6adf632f..71c731f96ca45519c443a5f932ead050770e17de 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -88,6 +88,8 @@ class HLSCustomOp(CustomOp): "res_hls": ("s", False, ""), "res_synth": ("s", False, ""), "rtlsim_so": ("s", False, ""), + # partitioning info + "partition_id": ("i", False, 0), # input and output FIFO depths "inFIFODepth": ("i", False, 2), "outFIFODepth": ("i", False, 2), @@ -171,9 +173,15 @@ class HLSCustomOp(CustomOp): of the node as a dictionary.""" ret = dict() ret["BRAM_18K"] = self.bram_estimation() + ret["BRAM_efficiency"] = self.bram_efficiency_estimation() ret["LUT"] = self.lut_estimation() return ret + def bram_efficiency_estimation(self): + """Function for BRAM efficiency estimation: actual parameter storage + needed divided by the allocated BRAM storage (from estimation)""" + return 1 + def bram_estimation(self): """Function for BRAM resource estimation, is member function of HLSCustomOp class but has to be filled by every node""" diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..ad68a4bde29123b2498ac7789048bcd2e13bf3bc --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py @@ -0,0 +1,576 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from math import ceil +import os + +import numpy as np + +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.util.data_packing import ( + npy_to_rtlsim_input, + numpy_to_hls_code, + rtlsim_output_to_npy, +) +from . import templates + +# ONNX i/o tensor shape assumptions for channelwise ops: +# input 0 is the input tensor, shape (..., NumChannels) +# input 1 is the channelwise parameter tensor, shape (NumChannels, params_per_channel) +# output 0 is the output tensor, shape (..., NumChannels) - same as input +# the ... here can be any shape (representing groups of vectors) + + +class ChannelwiseOp_Batch(HLSCustomOp): + """Class that corresponds to finn-hls Thresholding_Batch function. + It can implement a variety of channel-wise parametrized operations, + including Add, Mul and multi-thresholding. + """ + + def __init__(self, onnx_node): + super().__init__(onnx_node) + self.decoupled_wrapper = templates.decoupled_wrapper + + def get_nodeattr_types(self): + my_attrs = { + # channelwise "map" function to apply: + # one of cmp_le, cmp_ge, add, mul + "Func": ("s", False, "cmp_le"), + "PE": ("i", True, 0), + "NumChannels": ("i", True, 0), + # string defining memory resource type for parameters + "ram_style": ("s", False, "distributed"), + # FINN DataTypes for inputs, weights, outputs + "inputDataType": ("s", True, ""), + "paramDataType": ("s", True, ""), + "outputDataType": ("s", True, ""), + # input and output FIFO depths + "inFIFODepth": ("i", False, 0), + "outFIFODepth": ("i", False, 0), + # number of input vectors, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def calc_tmem(self): + """Calculates and returns TMEM, the depth of the memory used + to store the channelwise op parameters.""" + chn = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + return chn // pe + + def make_shape_compatible_op(self, model): + oshape = self.get_normal_output_shape() + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # check input datatype against property + idt_name = self.get_input_datatype().name + exp_idt_name = self.get_nodeattr("inputDataType") + assert exp_idt_name == idt_name, "Bad input DataType for ChannelwiseOp layer" + # TODO: dynamically infer/update odt based on idt as done in ConvertToHLSLayers? + # set output datatype from property + odt = self.get_output_datatype() + model.set_tensor_datatype(node.output[0], odt) + + def verify_node(self): + info_messages = [] + # verify that "domain" is set to "finn" + domain_value = self.onnx_node.domain + if domain_value == "finn": + info_messages.append("Attribute domain is set correctly") + else: + info_messages.append('Attribute domain should be set to "finn"') + + # verify that "backend" is set to "fpgadataflow" + backend_value = self.get_nodeattr("backend") + if backend_value == "fpgadataflow": + info_messages.append("Attribute backend is set correctly") + else: + info_messages.append('Attribute backend should be set to "fpgadataflow"') + + # verify that all necessary attributes exist + # TODO collect automatically from get_nodeattr_types + try: + self.get_nodeattr("code_gen_dir_cppsim") + self.get_nodeattr("executable_path") + self.get_nodeattr("NumChannels") + self.get_nodeattr("PE") + self.get_nodeattr("inputDataType") + self.get_nodeattr("paramDataType") + self.get_nodeattr("outputDataType") + info_messages.append("All necessary attributes exist") + except Exception: + info_messages.append( + """The required Threshold_Batch attributes do not exist.""" + ) + + return info_messages + + def bram_estimation(self): + """Calculates BRAM cost if resource set to BRAM""" + style = self.get_nodeattr("ram_style") + P = self.get_nodeattr("PE") + idt = self.get_input_datatype() + A = idt.bitwidth() + tmem = self.calc_tmem() + + if style == "block" and tmem > 1: + return int(ceil(A * P / 16)) * int(ceil(tmem / 1024)) + else: + return 0 + + def lut_estimation(self): + """Calculates LUT cost, taking memory resource type into account """ + # TODO add in/out FIFO contributions + style = self.get_nodeattr("ram_style") + P = self.get_nodeattr("PE") + idt = self.get_input_datatype() + A = idt.bitwidth() + tmem = self.calc_tmem() + # cost of comparators + comparator_cost = A * P + # cost of LUTRAM + if style == "distributed" and tmem > 1: + lutram_cost = P * A * int(ceil(tmem / 64)) + else: + lutram_cost = 0 + # total cost + return comparator_cost + lutram_cost + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self): + """Returns FINN DataType of output.""" + return DataType[self.get_nodeattr("outputDataType")] + + def get_instream_width(self): + i_bits = self.get_input_datatype().bitwidth() + return i_bits * self.get_nodeattr("PE") + + def get_outstream_width(self): + o_bits = self.get_output_datatype().bitwidth() + return o_bits * self.get_nodeattr("PE") + + def get_folded_input_shape(self): + ich = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + fold = ich // pe + vecs = list(self.get_nodeattr("numInputVectors")) + folded_input_shape = tuple(vecs + [fold, pe]) + return folded_input_shape + + def get_folded_output_shape(self): + # same shape as input + return self.get_folded_input_shape() + + def get_normal_input_shape(self): + ich = self.get_nodeattr("NumChannels") + vecs = list(self.get_nodeattr("numInputVectors")) + normal_input_shape = tuple(vecs + [ich]) + return normal_input_shape + + def get_normal_output_shape(self): + # same shape as input + return self.get_normal_input_shape() + + def get_number_output_values(self): + nf = np.prod(self.get_folded_output_shape()[:-1]) + return nf + + def get_template_param_values(self): + """Returns the template parameter values according to input, output and weight + data types.""" + ret = dict() + inp_hls_str = self.get_input_datatype().get_hls_datatype_str() + out_hls_str = self.get_output_datatype().get_hls_datatype_str() + # fill in TSrcI + ret["TSrcI"] = "Slice<%s>" % inp_hls_str + # fill in TDstI + ret["TDstI"] = "Slice<%s>" % out_hls_str + + return ret + + def get_hls_compatible_parameter_tensor(self, orig_param_vector): + """Convert the original numpy weight matrix orig_weight_matrix into + a form suitable for passing to the hlslib call: + * ensure chn % PE == 0 + * interleave rows between PEs + * reshape into (PE, TMEM) and return + """ + chn = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + tmem = chn // pe + assert chn % pe == 0, "Requirement NumChannels divisable by PE is violated." + assert ( + orig_param_vector.ndim == 1 + ), """Parameter vector dimension is {}. + Expected dimension: 1.""".format( + orig_param_vector.ndim + ) + + # if not self.get_input_datatype().signed(): + # # ensure all thresholds are nonnegative + # assert (orig_param_vector >= 0).all() + + # ensure all thresholds are integer + assert (orig_param_vector.astype(np.int32) == orig_param_vector).all() + ret = orig_param_vector + + assert ( + ret.shape[0] == chn + ), "Cardinality of parameter vector is not as expected (chn)" + + # distribute rows between PEs + ret = ret.reshape(tmem, pe).transpose() + assert ( + ret.shape[0] == pe + ), """First dimension after distribution of the + rows between PEs is not as expected (pe)""" + assert ( + ret.shape[1] == tmem + ), """Second dimension after distribution of the + rows between PEs is not as expected (tmem)""" + + return ret.reshape(1, pe, tmem) + + def generate_params(self, model, path): + code_gen_dir = path + # save thresholds in params.h + parameters = model.get_initializer(self.onnx_node.input[1]) + parameter_tensor = self.get_hls_compatible_parameter_tensor(parameters) + pdt = DataType[self.get_nodeattr("paramDataType")] + + parameters_hls_code = numpy_to_hls_code( + parameter_tensor, pdt, "parameters", False, True + ) + # get input data type + export_idt = self.get_input_datatype() + if self.get_input_datatype() == DataType.BIPOLAR: + export_idt = DataType.BINARY + idt_hls = export_idt.get_hls_datatype_str() + + # write parameters into params.h + f_params = open("{}/params.h".format(code_gen_dir), "w") + pdt_hls = pdt.get_hls_datatype_str() + # use binary to export bipolar activations + export_odt = self.get_output_datatype() + if self.get_output_datatype() == DataType.BIPOLAR: + export_odt = DataType.BINARY + odt_hls = export_odt.get_hls_datatype_str() + # get desired function + func = self.get_nodeattr("Func") + if func == "cmp_le": + func_str = "std::less_equal" + elif func == "cmp_ge": + func_str = "std::greater_equal" + elif func == "add": + func_str = "std::plus" + elif func == "mul": + func_str = "std::multiplies" + else: + raise Exception( + """Invalid value for attribute Func! Is currently set to: {} + has to be set to one of the following value + ("cmp_le", "cmp_ge", "add", "mul")""".format( + func + ) + ) + f_params.write( + "static ChannelWiseOperation<{},{},{},{},{},{}> threshs \ + = ".format( + self.calc_tmem(), + self.get_nodeattr("PE"), + idt_hls, + pdt_hls, + odt_hls, + "%s<%s>" % (func_str, odt_hls), + ) + ) + f_params.write(parameters_hls_code) + f_params.close() + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + + # TODO ensure codegen dir exists + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + # create a npy file fore each input of the node (in_ind is input index) + in_ind = 0 + for inputs in node.input: + # it is assumed that the first input of the node is the data input + # the second input are the weights + # the third input are the thresholds + if in_ind == 0: + assert ( + str(context[inputs].dtype) == "float32" + ), """Input datatype is + not float32 as expected.""" + expected_inp_shape = self.get_folded_input_shape() + reshaped_input = context[inputs].reshape(expected_inp_shape) + export_idt = self.get_input_datatype() + # make copy before saving the array + reshaped_input = reshaped_input.copy() + np.save( + os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), + reshaped_input, + ) + elif in_ind > 2: + raise Exception("Unexpected input found for ChannelwiseOp_Batch") + in_ind += 1 + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + # reinterpret binary output as bipolar where needed + if self.get_output_datatype() == DataType.BIPOLAR: + out = context[node.output[0]] + out = 2 * out - 1 + context[node.output[0]] = out + assert ( + context[node.output[0]].shape == self.get_folded_output_shape() + ), """Output shape is not as expected""" + # reshape output to have expected shape + oshape = self.get_normal_output_shape() + context[node.output[0]] = context[node.output[0]].reshape(*oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + output = self.rtlsim(sim, inp) + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + + # load and reshape output + output = np.load(out_npy_path) + oshape = self.get_normal_output_shape() + output = np.asarray([output], dtype=np.float32).reshape(*oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "activations.hpp"'] + self.code_gen_dict["$GLOBALS$"] += ['#include "params.h"'] + + # TODO check and add whatever missing + def defines(self, var): + numInputVectors = list(self.get_nodeattr("numInputVectors")) + numReps = numInputVectors[0] + self.code_gen_dict["$DEFINES$"] = [ + """#define NumChannels1 {}\n#define PE1 {}\n#define numReps {}""".format( + self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps, + ) + ] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + # note: the innermost dim is reversed for the input + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + tmpl_args = self.get_template_param_values() + # TODO: why put some template parameters into defines and not others? + # should ImgDim be defined or just filled in here like we do now? + ishape = self.get_folded_input_shape() + if len(ishape) == 3: + imgdim = 1 + elif len(ishape) == 5: + imgdim = ishape[1] + else: + raise Exception("""Unexpeted input shape""") + self.code_gen_dict["$DOCOMPUTE$"] = [ + """Thresholding_Batch<{}, NumChannels1, PE1, {}, {}> + (in0, out, threshs, numReps);""".format( + imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"], + ) + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + shape = self.get_folded_output_shape() + shape_cpp_str = str(shape).replace("(", "{").replace(")", "}") + + # note: the innermost dim is not reversed for the output + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + shape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + """void {}(hls::stream<ap_uint<{}>> &in0, + hls::stream<ap_uint<{}>> &out + )""".format( + self.onnx_node.name, + self.get_instream_width(), + self.get_outstream_width(), + ) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) + + # the channelwise parameter tensor is acc_type [PE][TMEM][N_PARAMS_PER_CHANNEL] + # partition for parallel access along PE and N_PARAMS_PER_CHANNEL + # dimensions (dims 1 and 3) + self.code_gen_dict["$PRAGMAS$"].append( + ( + "#pragma HLS ARRAY_PARTITION variable=threshs.parameters " + "complete dim=1" + ) + ) + # self.code_gen_dict["$PRAGMAS$"].append( + # ( + # "#pragma HLS ARRAY_PARTITION variable=threshs.parameters " + # "complete dim=3" + # ) + # ) + + # set resource type + ram_style = self.get_nodeattr("ram_style") + pe = self.get_nodeattr("PE") + ich = self.get_nodeattr("NumChannels") + # if PE less than NumChannels, assign cores according to ram_style; + # otherwise if PE == NumChannels, Vivado HLS will unroll to FFs + if pe < ich: + if ram_style == "distributed": + self.code_gen_dict["$PRAGMAS$"].append( + ( + "#pragma HLS RESOURCE variable=threshs.parameters " + "core=ROM_2P_LUTRAM" + ) + ) + elif ram_style == "block": + self.code_gen_dict["$PRAGMAS$"].append( + ( + "#pragma HLS RESOURCE variable=threshs.parameters " + "core=ROM_2P_BRAM" + ) + ) + else: + raise Exception( + """Invalid value for attribute ram_style! Is currently set to: {} + has to be set to one of ("block", "distributed")""".format( + ram_style + ) + ) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index e4d106068d4d128c66b2ce5f3d6c925dfe414b90..3e40ad70208909551365c51324153859ccc79ceb 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -41,10 +41,19 @@ from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy # output 0 is the output tensor, shape NHWC: # = (1, OFMDim, OFMDim, (ConvKernelDim^2)*IFMChannels) +# note: the actual data layout produced by the hlslib kernels is different +# for depthwise and non-depthwise ops. +# * non-depthwise SWG: (1, OFMDim, OFMDim, K, K, IFMChannels/SIMD, SIMD) +# * depthwise SWG: (1, OFMDim, OFMDim, IFMChannels/SIMD, K, K, SIMD) +# see test_fpgadataflow_slidingwindow.py for an example of how to transform +# between the two layouts + class ConvolutionInputGenerator(HLSCustomOp): - """Class that corresponds to finn-hlslib ConvolutionInputGenerator - (sliding window) function.""" + """Class that corresponds to one of the finn-hlslib ConvolutionInputGenerator + (sliding window) function variants. Depending on the combination of + attributes (e.g. depthwise or not, whether k % stride is 0) a different + variant will be picked for the actual HLS implementation.""" def __init__(self, onnx_node): super().__init__(onnx_node) @@ -60,6 +69,7 @@ class ConvolutionInputGenerator(HLSCustomOp): # FINN DataTypes for inputs, weights, outputs "inputDataType": ("s", True, ""), "outputDataType": ("s", True, ""), + "depthwise": ("i", False, 0), # FPGA resource type for ConvolutionInputGenerator input buffer # auto -- let Vivado HLS decide # block -- use BRAM @@ -106,7 +116,6 @@ class ConvolutionInputGenerator(HLSCustomOp): pad = 0 ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" - assert k % stride == 0, "stride must divide kernel size k" wf = int((k * k * ifm_ch) // simd) folded_oshape = (1, ofm_dim, ofm_dim, wf, simd) return folded_oshape @@ -305,12 +314,35 @@ class ConvolutionInputGenerator(HLSCustomOp): def docompute(self): node = self.onnx_node - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1, - OFMDim1, SIMD1, Stride1> (in0, out, numReps);""".format( - node.op_type - ) - ] + ram_style = self.get_nodeattr("ram_style") + map_to_hls_ram_style = { + "auto": "ap_resource_dflt()", + "block": "ap_resource_bram()", + "distributed": "ap_resource_lutram()", + "ultra": "ap_resource_uram()", + } + hls_ram_style = map_to_hls_ram_style[ram_style] + hls_call = node.op_type + # check if non optimized ConvolutionInputGenerator is needed + k = self.get_nodeattr("ConvKernelDim") + stride = self.get_nodeattr("Stride") + if k % stride != 0: + hls_call += "_kernel_stride" + + if self.get_nodeattr("depthwise") == 1: + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}_dws<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1, + OFMDim1, SIMD1, Stride1> (in0, out, numReps, {});""".format( + hls_call, hls_ram_style + ) + ] + else: + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1, + OFMDim1, SIMD1, Stride1> (in0, out, numReps, {});""".format( + hls_call, hls_ram_style + ) + ] def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -356,17 +388,3 @@ class ConvolutionInputGenerator(HLSCustomOp): self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) - - def ipgen_extra_directives(self): - # add directive to control input buffer memory resources - ram_style = self.get_nodeattr("ram_style") - map_to_hls_ram_style = { - "auto": "RAM_2P", - "block": "RAM_2P_BRAM", - "distributed": "RAM_2P_LUTRAM", - "ultra": "RAM_2P_URAM", - } - hls_ram_style = map_to_hls_ram_style[ram_style] - directive = "set_directive_resource -core %s " % hls_ram_style - directive += "ConvolutionInputGenerator inputBuf" - return [directive] diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py new file mode 100644 index 0000000000000000000000000000000000000000..0ce4379a2c41baa5bc009e9df7623d133ee89a09 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/downsampler.py @@ -0,0 +1,297 @@ +import os +import numpy as np +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class DownSampler(HLSCustomOp): + """Corresponds to finn-hlslib ConvolutionInputGenerator_kernel1 function. + Basically performs a down sampling of the image removing rows and columns.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + # spatial size of input images + "ImgDim": ("i", True, 0), + # number of channels in input image + "NumChannels": ("i", True, 0), + # Number of input columns computed in parallel + "SIMD": ("i", False, 1), + "Stride": ("i", True, 2), + # FINN input datatype + "inputDataType": ("s", True, ""), + # Batch size + "numInputVectors": ("i", False, 1), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_downsampled_odim(self): + "Return the down sampled spatial size of the output." + idim = self.get_nodeattr("ImgDim") + stride = self.get_nodeattr("Stride") + return int(np.floor((idim - 1) / stride) + 1) + + def get_normal_input_shape(self): + idim = self.get_nodeattr("ImgDim") + num_ch = self.get_nodeattr("NumChannels") + batch = self.get_nodeattr("numInputVectors") + ishape = (batch, idim, idim, num_ch) + return ishape + + def get_normal_output_shape(self): + odim = self.get_downsampled_odim() + num_ch = self.get_nodeattr("NumChannels") + batch = self.get_nodeattr("numInputVectors") + oshape = (batch, odim, odim, num_ch) + return oshape + + def get_folded_input_shape(self): + normal_ishape = list(self.get_normal_input_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_ishape[-1] / simd) + folded_ishape = normal_ishape[:-1] + [fold, simd] + return tuple(folded_ishape) + + def get_folded_output_shape(self): + normal_oshape = list(self.get_normal_output_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_oshape[-1] / simd) + folded_oshape = normal_oshape[:-1] + [fold, simd] + return tuple(folded_oshape) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpect input shape for DownSampler." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # data type stays the same + dtype = model.get_tensor_datatype(node.input[0]) + exp_idtype = self.get_input_datatype() + assert dtype == exp_idtype, "Unexpected datatype for DownSampler" + model.set_tensor_datatype(node.output[0], dtype) + + def verify_node(self): + pass + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + ret = DataType[self.get_nodeattr("inputDataType")] + return ret + + def get_output_datatype(self): + """Returns FINN DataType of output. (Same as input datatype)""" + return self.get_input_datatype() + + def get_instream_width(self): + ibits = self.get_input_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + return ibits * simd + + def get_outstream_width(self): + obits = self.get_output_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + return obits * simd + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"'] + + def defines(self, var): + self.code_gen_dict["$DEFINES$"] = [] + + ifm_ch = self.get_nodeattr("NumChannels") + self.code_gen_dict["$DEFINES$"] += ["#define IFMChannels {}".format(ifm_ch)] + + ibits = self.get_input_datatype().bitwidth() + self.code_gen_dict["$DEFINES$"] += ["#define Input_precision {}".format(ibits)] + + idim = self.get_nodeattr("ImgDim") + self.code_gen_dict["$DEFINES$"] += ["#define IFMDim {}".format(idim)] + + simd = self.get_nodeattr("SIMD") + self.code_gen_dict["$DEFINES$"] += ["#define SIMD {}".format(simd)] + + stride = self.get_nodeattr("Stride") + self.code_gen_dict["$DEFINES$"] += ["#define Stride {}".format(stride)] + + batch_size = self.get_nodeattr("numInputVectors") + self.code_gen_dict["$DEFINES$"] += ["#define numReps {}".format(batch_size)] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [ + """ConvolutionInputGenerator_kernel1<IFMChannels, Input_precision, + IFMDim, SIMD,Stride> (in0, out, numReps);""" + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + oshape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" + % (self.onnx_node.name, packed_hls_type, packed_hls_type) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() + folded_oshape = self.get_folded_output_shape() + + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert ( + inp.shape == exp_ishape + ), """Input shape doesn't + match expected shape (numInputVectors, ImgDim, ImgDim, NumChannels).""" + export_idt = self.get_input_datatype() + + reshaped_input = inp.reshape(folded_ishape) + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + assert ( + context[node.output[0]].shape == folded_oshape + ), "cppsim did not produce expected folded output shape" + context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output shape doesn't match expected shape + (1, OutputDim, OutputDim, NumChannels).""" diff --git a/src/finn/custom_op/fpgadataflow/sameresize_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py similarity index 76% rename from src/finn/custom_op/fpgadataflow/sameresize_batch.py rename to src/finn/custom_op/fpgadataflow/fmpadding_batch.py index c459cac1e9c17336200a1fc85aad2af5e14e2c61..d326ae7dfc7830a0081c3b13233d67ef08b12eff 100644 --- a/src/finn/custom_op/fpgadataflow/sameresize_batch.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py @@ -6,27 +6,42 @@ from finn.custom_op.fpgadataflow import HLSCustomOp from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -class SameResize_Batch(HLSCustomOp): - """Class that corresponds to finn-hlslib SameResize function. - Implements 'same' padding on a given input image.""" +class FMPadding_Batch(HLSCustomOp): + """Corresponds to finn-hlslib FMPadding_Batch function. + Pads input image by given amount.""" def __init__(self, onnx_node): super().__init__(onnx_node) def get_nodeattr_types(self): my_attrs = { + # spatial size of input images "ImgDim": ("i", True, 0), - "KernelDim": ("i", True, 0), - "Stride": ("i", True, 0), + # total padding (per dimension) to apply + "Padding": ("i", True, 2), + # number of channels in input image "NumChannels": ("i", True, 0), + # SIMD Input parallelism + "SIMD": ("i", False, 1), # FINN input datatype "inputDataType": ("s", True, ""), - # distribution of added values to achieve "same" padding - "PaddingStyle": ("i", True, 2), + # controls distribution of padded pixels + # in case of uneven padding -- see FMPadding fxn + # in hlslib + "PaddingStyle": ("i", False, 2), + # shape describing input vecs per execution + "numInputVectors": ("i", False, 1), } my_attrs.update(super().get_nodeattr_types()) return my_attrs + def get_padded_odim(self): + "Return the padded spatial size of the output." + + idim = self.get_nodeattr("ImgDim") + pad = self.get_nodeattr("Padding") + return idim + pad + def get_normal_input_shape(self): idim = self.get_nodeattr("ImgDim") num_ch = self.get_nodeattr("NumChannels") @@ -35,33 +50,29 @@ class SameResize_Batch(HLSCustomOp): return ishape def get_normal_output_shape(self): - idim = self.get_nodeattr("ImgDim") + odim = self.get_padded_odim() num_ch = self.get_nodeattr("NumChannels") - kdim = self.get_nodeattr("KernelDim") - stride = self.get_nodeattr("Stride") - assert idim % stride == 0, "Stride must divide input dimension." - # number of "same" windows over the input data - same_windows = idim // stride - odim = kdim + stride * (same_windows - 1) oshape = (1, odim, odim, num_ch) return oshape def get_folded_input_shape(self): - # even though there is no folding in the current hlslib op, - # insert a time multiplexing axis to remain compatible with the - # shapes produced by the rest of the dataflow pipeline - ret = list(self.get_normal_input_shape()) - ret.insert(-1, 1) - return tuple(ret) + normal_ishape = list(self.get_normal_input_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_ishape[-1] / simd) + folded_ishape = normal_ishape[:-1] + [fold, simd] + return tuple(folded_ishape) def get_folded_output_shape(self): - # even though there is no folding in the current hlslib op, - # insert a time multiplexing axis to remain compatible with the - # shapes produced by the rest of the dataflow pipeline - ret = list(self.get_normal_output_shape()) - ret.insert(-1, 1) - return tuple(ret) + normal_oshape = list(self.get_normal_output_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_oshape[-1] / simd) + folded_oshape = normal_oshape[:-1] + [fold, simd] + return tuple(folded_oshape) def make_shape_compatible_op(self, model): exp_ishape = self.get_normal_input_shape() @@ -87,7 +98,7 @@ class SameResize_Batch(HLSCustomOp): # data type stays the same dtype = model.get_tensor_datatype(node.input[0]) exp_idtype = self.get_input_datatype() - assert dtype == exp_idtype, "Unexpected datatype for SameResize_Batch" + assert dtype == exp_idtype, "Unexpected datatype for FMPadding_Batch" model.set_tensor_datatype(node.output[0], dtype) def verify_node(self): @@ -96,9 +107,9 @@ class SameResize_Batch(HLSCustomOp): def get_input_datatype(self): """Returns FINN DataType of input.""" ret = DataType[self.get_nodeattr("inputDataType")] - # the hlslib op always pads with zeroes, so ensure that the DataType - # is able to represent zeroes - assert ret.allowed(0), "SameResize_Batch DataType must support zero" + # the hlslib op always pads with zeros, so ensure that the DataType + # is able to represent zeros + assert ret.allowed(0), "FMPadding_Batch DataType must support zero" return ret def get_output_datatype(self): @@ -107,15 +118,13 @@ class SameResize_Batch(HLSCustomOp): def get_instream_width(self): ibits = self.get_input_datatype().bitwidth() - num_ch = self.get_nodeattr("NumChannels") - - return ibits * num_ch + simd = self.get_nodeattr("SIMD") + return ibits * simd def get_outstream_width(self): obits = self.get_output_datatype().bitwidth() - num_ch = self.get_nodeattr("NumChannels") - - return obits * num_ch + simd = self.get_nodeattr("SIMD") + return obits * simd def get_number_output_values(self): folded_oshape = self.get_folded_output_shape() @@ -125,18 +134,18 @@ class SameResize_Batch(HLSCustomOp): self.code_gen_dict["$GLOBALS$"] = ['#include "streamtools.h"'] def defines(self, var): - numReps = 1 - assert self.get_nodeattr("PaddingStyle") == 2, "Only PaddingStyle=2 supported" self.code_gen_dict["$DEFINES$"] = [ - """#define ImgDim1 {}\n #define KernelDim1 {}\n - #define Stride1 {}\n #define NumChannels1 {}\n - #define PaddingStyle1 {}\n #define numReps {}""".format( + """#define ImgDim1 {}\n#define OutputDim1 {}\n + #define Padding1 {}\n#define NumChannels1 {}\n + #define PaddingStyle1 {}\n#define numReps {} + #define SIMD1 {}\n""".format( self.get_nodeattr("ImgDim"), - self.get_nodeattr("KernelDim"), - self.get_nodeattr("Stride"), + self.get_padded_odim(), + self.get_nodeattr("Padding"), self.get_nodeattr("NumChannels"), self.get_nodeattr("PaddingStyle"), - numReps, + self.get_nodeattr("numInputVectors"), + self.get_nodeattr("SIMD"), ) ] @@ -171,8 +180,8 @@ class SameResize_Batch(HLSCustomOp): in_t = self.get_input_datatype().get_hls_datatype_str() node = self.onnx_node self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ImgDim1, KernelDim1, Stride1, NumChannels1, - {}, PaddingStyle1> (in0, out, numReps);""".format( + """{}<ImgDim1, OutputDim1, Padding1, NumChannels1,SIMD1, + {}, PaddingStyle1> (in0, out, numReps);""".format( node.op_type, in_t ) ] @@ -227,6 +236,7 @@ class SameResize_Batch(HLSCustomOp): node = self.onnx_node exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() folded_oshape = self.get_folded_output_shape() if mode == "cppsim": @@ -249,10 +259,8 @@ class SameResize_Batch(HLSCustomOp): match expected shape (1, ImgDim, ImgDim, NumChannels).""" export_idt = self.get_input_datatype() - # no reshaping for input since assuming no folding on input - # make copy before saving array - inp = inp.copy() - np.save(os.path.join(code_gen_dir, "input_0.npy"), inp) + reshaped_input = inp.reshape(folded_ishape) + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) if mode == "cppsim": # execute the precompiled model @@ -261,8 +269,7 @@ class SameResize_Batch(HLSCustomOp): super().npy_to_dynamic_output(context) assert ( context[node.output[0]].shape == folded_oshape - ), "cppsim \ - did not produce expected ofolded utput shape" + ), "cppsim did not produce expected folded output shape" context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) elif mode == "rtlsim": sim = self.get_rtlsim() diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py index 9e6c63dc510aab5f6baff9cb6326a2d0476f67a9..83152dea6cc494b8464c78605399b21b38d48b80 100644 --- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py @@ -75,16 +75,19 @@ class GlobalAccPool_Batch(HLSCustomOp): def get_normal_output_shape(self): ch = self.get_nodeattr("NumChannels") vecs = list(self.get_nodeattr("numInputVectors")) - oshape = tuple([vecs[0]] + [ch]) + if len(vecs) == 1: + oshape = tuple(vecs + [ch]) + elif len(vecs) == 3: + oshape = tuple([vecs[0]] + [1, 1, ch]) return oshape def get_folded_output_shape(self): ch = self.get_nodeattr("NumChannels") pe = self.get_nodeattr("PE") - vecs = list(self.get_nodeattr("numInputVectors")) + unfolded_shape = list(self.get_normal_output_shape()) assert ch % pe == 0, "PE must divide NumChannels" folds = int(ch / pe) - oshape = tuple([vecs[0]] + [folds, pe]) + oshape = tuple(unfolded_shape[:-1] + [folds, pe]) return oshape def make_shape_compatible_op(self, model): diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py new file mode 100644 index 0000000000000000000000000000000000000000..9b718ecbbc490610790b68871080de23a54f4891 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -0,0 +1,346 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import math +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow import HLSCustomOp + + +# the IODMA inerfaces a memory-mapped AXI interface and an AXI stream +# direction "in": pulls data from AXI-MM to AXI stream +# direction "out": pushes data from AXI stream to AXI-MM + +# DMA Addressing +# - burst mode can be "wrap" or "increment" +# - "increment" bursts will increment the address when moving to the next image +# - "wrap" bursts will reinitialize the address to the start address, +# and are useful for e.g. streaming weights, where the same buffer is +# repeatedly read into the FPGA +# - no additional alignment restrictions beyond anything specified in the AXI spec + +# Interfaces +# - AXI-MM name specified by intfName unless this is set to "" (empty, the default) +# in which case output AXI-MM are named "out" and input AXI-MM are named "in0" +# - AXI-MM interface width (in bits) is specified by intfWidth +# - AXI-Stream interface width (in bits) is specified by streamWidth +# - If inftWidth and streamWidth are not equal, the DMA core performs +# width conversion by going up to the least common multiple of bitwidths +# e.g. intfWidth=32b -> 96b -> sreamWidth=24b +# - transfers occur in multiples of the AXI-MM interface width, therefore +# the total number of bits in the tensor must be a multiple of intfWidth +# - transfers occur in multiples of the AXI-Stream interface width, therefore +# the total number of bits in the tensor must be a multiple of streamWidth +# - both interface widths must be a multiple of 8b (AXI protocol requirement) +# - in most systems, intfWidth is also restricted to a power of 2 (e.g. Vitis) +# but this is not universal so we don't check here explicitly + +# Input/output tensor sizes shapes +# - The data being moved is a tensor of shape numInputVectors+[NumChannels] +# - The data type of the tensor elements is specified by dataType +# - on the stream side +# -the normal shape is the same as the ONNX tensor attached to it +# -the folded shape is computed from the stream width and normal shape +# - on the AXI-MM side +# -the normal shape is the same as the one on the stream side +# -the folded shape is not defined + + +class IODMA(HLSCustomOp): + """Class that corresponds to finn-hlslib DMA function(s).""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + "NumChannels": ("i", True, 0), + # FINN input datatype + "dataType": ("s", True, ""), + # Stream parameters + "streamWidth": ("i", False, 32), + # DMA-specific parameters + "intfWidth": ("i", False, 32), + "burstMode": ("s", False, "increment"), + "direction": ("s", False, "in"), + # shape describing input vecs per execution + "numInputVectors": ("ints", False, [1]), + # name of axi-mm interface + "intfName": ("s", False, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_normal_input_shape(self): + vecs = list(self.get_nodeattr("numInputVectors")) + num_ch = self.get_nodeattr("NumChannels") + ishape = tuple(vecs + [num_ch]) + return ishape + + def get_normal_output_shape(self): + return self.get_normal_input_shape() + + def get_folded_input_shape(self): + if self.get_nodeattr("direction") == "in": + raise ValueError("Folded input shape not defined for input IODMA") + else: + shape = list(self.get_normal_input_shape()) + itype_bits = self.get_input_datatype().bitwidth() + intfw = self.get_nodeattr("streamWidth") + assert ( + intfw % itype_bits == 0 + ), "Input stream width must be a multiple of datatype bits" + elems_per_word = intfw // itype_bits + assert shape[-1] % elems_per_word == 0, "Fold depth must be integer" + fold_depth = shape[-1] // elems_per_word + shape[-1] = fold_depth + shape.append(elems_per_word) + return tuple(shape) + + def get_folded_output_shape(self): + if self.get_nodeattr("direction") == "out": + raise ValueError("Folded output shape not defined for output IODMA") + else: + shape = list(self.get_normal_output_shape()) + itype_bits = self.get_output_datatype().bitwidth() + intfw = self.get_nodeattr("streamWidth") + assert ( + intfw % itype_bits == 0 + ), "Input stream width must be a multiple of datatype bits" + elems_per_word = intfw // itype_bits + assert shape[-1] % elems_per_word == 0, "Fold depth must be integer" + fold_depth = shape[-1] // elems_per_word + shape[-1] = fold_depth + shape.append(elems_per_word) + return tuple(shape) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # data type stays the same + dtype = model.get_tensor_datatype(node.input[0]) + exp_idtype = self.get_input_datatype() + assert dtype == exp_idtype, "Unexpected datatype." + model.set_tensor_datatype(node.output[0], dtype) + + def verify_node(self): + pass + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("dataType")] + + def get_output_datatype(self): + """Returns FINN DataType of output. (Same as input datatype)""" + return self.get_input_datatype() + + def get_instream_width(self): + if self.get_nodeattr("direction") == "in": + return self.get_nodeattr("intfWidth") + elif self.get_nodeattr("direction") == "out": + return self.get_nodeattr("streamWidth") + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + + def get_outstream_width(self): + if self.get_nodeattr("direction") == "out": + return self.get_nodeattr("intfWidth") + elif self.get_nodeattr("direction") == "in": + return self.get_nodeattr("streamWidth") + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + + def get_number_output_values(self): + oshape = self.get_normal_output_shape() + itype_bits = self.get_input_datatype().bitwidth() + intfw = self.get_nodeattr("intfWidth") + nelems = np.prod(oshape) + nbits = nelems * itype_bits + assert nbits % intfw == 0, "DMA: total transfer size must be word multiple" + ovalues = nbits // intfw + return ovalues + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "dma.h"'] + self.code_gen_dict["$GLOBALS$"].append('#include "streamtools.h"') + + def defines(self, var): + itype_bits = self.get_input_datatype().bitwidth() + total_bits = itype_bits * np.prod(self.get_normal_input_shape()) + assert total_bits % 8 == 0, "DMA input not a multiple of 1 Byte" + total_bytes = total_bits // 8 + self.code_gen_dict["$DEFINES$"] = [ + """#define NumBytes1 {}\n#define DataWidth1 {}\n""".format( + total_bytes, self.get_nodeattr("intfWidth") + ) + ] + + def get_ap_int_max_w(self): + "Return the maximum width of any ap_int used in this module." + instream = self.get_instream_width() + outstream = self.get_outstream_width() + width_lcm = (instream * outstream) // math.gcd(instream, outstream) + return width_lcm + + def docompute(self): + direction = self.get_nodeattr("direction") + mode = self.get_nodeattr("burstMode") + if direction == "in": + if mode == "wrap": + func = "Mem2Stream_Batch_external_wmem" + else: + func = "Mem2Stream_Batch" + dwc_func = "WidthAdjustedOutputStream" + elif direction == "out": + func = "Stream2Mem_Batch" + dwc_func = "WidthAdjustedInputStream" + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + # define templates for instantiation + dma_inst_template = func + "<DataWidth1, NumBytes1>(%s, %s, numReps);" + dwc_inst_template = dwc_func + "<%d, %d, %d> %s(%s, numReps);" + # do stream infrastructure and instantiations + intfw = self.get_nodeattr("intfWidth") + strmw = self.get_nodeattr("streamWidth") + width_lcm = (strmw * intfw) // math.gcd(strmw, intfw) + # we always need two streams: one of width_lcm, and one of intfw width + # because we use WidthAdjustedInputStream, + dtype_bits = self.get_input_datatype().bitwidth() + total_bits = dtype_bits * np.prod(self.get_normal_input_shape()) + if direction == "in": + self.code_gen_dict["$DOCOMPUTE$"] = [ + dwc_inst_template + % (width_lcm, strmw, total_bits // width_lcm, "dwc_lcm", "out"), + dwc_inst_template + % (intfw, width_lcm, total_bits // intfw, "dwc_intfw", "dwc_lcm"), + dma_inst_template % ("in0", "dwc_intfw"), + ] + else: + self.code_gen_dict["$DOCOMPUTE$"] = [ + dwc_inst_template + % (strmw, width_lcm, total_bits // strmw, "dwc_lcm", "in0"), + dwc_inst_template + % (width_lcm, intfw, total_bits // width_lcm, "dwc_intfw", "dwc_lcm"), + dma_inst_template % ("dwc_intfw", "out"), + ] + + def blackboxfunction(self): + packed_ibits = self.get_instream_width() + packed_hls_type_in = "ap_uint<%d>" % packed_ibits + packed_obits = self.get_outstream_width() + packed_hls_type_out = "ap_uint<%d>" % packed_obits + direction = self.get_nodeattr("direction") + if direction == "in": + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(%s *in0, hls::stream<%s > &out, unsigned int numReps)" + % (self.onnx_node.name, packed_hls_type_in, packed_hls_type_out) + ] + elif direction == "out": + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, %s *out, unsigned int numReps)" + % (self.onnx_node.name, packed_hls_type_in, packed_hls_type_out) + ] + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE s_axilite port=numReps bundle=control" + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE s_axilite port=return bundle=control" + ) + direction = self.get_nodeattr("direction") + intfname = self.get_nodeattr("intfName") + if direction == "in": + if intfname == "": + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=in0" + ) + else: + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=%s" % (intfname) + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE s_axilite port=in0 bundle=control" + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out" + ) + elif direction == "out": + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=in0" + ) + if intfname == "": + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=out" + ) + else: + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=%s" % (intfname) + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE s_axilite port=out bundle=control" + ) + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS DATAFLOW") + + def execute_node(self, context, graph): + pass + + def dataoutstrm(self): + pass + + def read_npy_data(self): + pass + + def save_as_npy(self): + pass + + def strm_decl(self): + pass diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py index 7591f09d8d0cd1847672fe5aa09616ff1571033d..f61fbf12da889700297006ef2566088d4150c0e4 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py +++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py @@ -41,6 +41,13 @@ class LabelSelect_Batch(HLSCustomOp): def __init__(self, onnx_node): super().__init__(onnx_node) + odt_name = self.get_nodeattr("outputDataType") + if odt_name == "": + # If not provided compute min size + labels = self.get_nodeattr("Labels") + odt = DataType.get_smallest_possible(labels - 1) + odt_name = odt.name + self.set_nodeattr("outputDataType", odt_name) def get_nodeattr_types(self): my_attrs = { @@ -49,6 +56,7 @@ class LabelSelect_Batch(HLSCustomOp): "K": ("i", True, 0), # FINN DataTypes for input "inputDataType": ("s", True, ""), + "outputDataType": ("s", False, ""), # number of input vectors, examples: # [1] is a single vector (like a FC layer with batch=1) # [4] is four vectors (like a FC layer with batch=4) @@ -69,7 +77,6 @@ class LabelSelect_Batch(HLSCustomOp): pe = self.get_nodeattr("PE") vecs = list(self.get_nodeattr("numInputVectors")) assert nlabels % pe == 0, "PE must divide Labels" - assert pe == 1, "LabelSelect currently fails with folding" folds = int(nlabels / pe) folded_ishape = tuple(vecs + [folds, pe]) return folded_ishape @@ -90,7 +97,7 @@ class LabelSelect_Batch(HLSCustomOp): exp_ishape = self.get_normal_input_shape() oshape = self.get_normal_output_shape() ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) - assert ishape == exp_ishape, "Unexpect input shape." + assert ishape == exp_ishape, "Unexpected input shape." # implement tensor with correct shape values = np.random.randn(*oshape).astype(np.int64) return helper.make_node( @@ -106,9 +113,8 @@ class LabelSelect_Batch(HLSCustomOp): ) def infer_node_datatype(self, model): - # currently set to uint32 to be compatible with hlslib - # enhancement: consider finding smallest power-of-two int for reduced output bandwidth - model.set_tensor_datatype(self.onnx_node.output[0], DataType.UINT32) + odt = self.get_output_datatype() + model.set_tensor_datatype(self.onnx_node.output[0], odt) def verify_node(self): info_messages = [] @@ -134,6 +140,7 @@ class LabelSelect_Batch(HLSCustomOp): self.get_nodeattr("PE") self.get_nodeattr("K") self.get_nodeattr("inputDataType") + self.get_nodeattr("outputDataType") info_messages.append("All necessary attributes exist") except Exception: info_messages.append( @@ -150,12 +157,12 @@ class LabelSelect_Batch(HLSCustomOp): def get_input_datatype(self): """Returns FINN DataType of input.""" ret = DataType[self.get_nodeattr("inputDataType")] - assert ret.signed() is False, "LabelSelect is currently broken for signed inputs" return ret def get_output_datatype(self): """Returns FINN DataType of output.""" - return DataType.UINT32 + ret = DataType[self.get_nodeattr("outputDataType")] + return ret def get_instream_width(self): """Returns input stream width.""" @@ -260,8 +267,13 @@ class LabelSelect_Batch(HLSCustomOp): npy_type = "float" npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] + + # Calling npy2apintstream with reverse_inner = false to have LE packing + # as required by HLS fxn LabelSelect_Batch + # Also notice that StreamingDataWidthConverter_Batch performs LE packing + self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' + 'npy2apintstream<%s, %s, %d, %s>("%s", in0,false);' % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) ) @@ -277,12 +289,13 @@ class LabelSelect_Batch(HLSCustomOp): def docompute(self): node = self.onnx_node self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, {}, {}, {}, ap_uint<32>> (in0, out, 1);""".format( + """{}<{}, {}, {}, {}, {} > (in0, out, 1);""".format( node.op_type, self.get_nodeattr("Labels"), self.get_nodeattr("PE"), self.get_nodeattr("K"), self.get_input_datatype().get_hls_datatype_str(), + self.get_output_datatype().get_hls_datatype_str(), ) ] @@ -316,10 +329,11 @@ class LabelSelect_Batch(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ """void {}(hls::stream<ap_uint<{}*{}>> &in0, - hls::stream<ap_uint<32>> &out)""".format( + hls::stream<ap_uint<{}> > &out)""".format( self.onnx_node.name, self.get_nodeattr("PE"), self.get_input_datatype().bitwidth(), + self.get_output_datatype().bitwidth(), ) ] diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..c7edc24d0e24eef1154293caca2519ab3aa68358 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/pool_batch.py @@ -0,0 +1,395 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import numpy as np + +from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.core.datatype import DataType +from onnx import TensorProto, helper +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class Pool_Batch(HLSCustomOp): + """Class that corresponds to finn-hlslib Pool_batch function. + Requires ConvolutionInputGenerator(depthwise == 1) to format its input + + TODO: explain input shape (to reuse im2col code) + Input shape (BatchSize,OutImgDim,OutImgDim,KernelSize^2*Channels) + Output shape (BatchSize,OutImgDim,OutImgDim,Channels) + + # note: the actual data layout produced by the hlslib kernels is different + # for depthwise ops. + # * depthwise SWG: (1, OFMDim, OFMDim, IFMChannels/PE, K, K, PE) + + Channels can be folded using PE (SIMD from the input perspective) + TODO: doc + """ + + def get_nodeattr_types(self): + my_attrs = { + "Channels": ("i", True, 0), + "PE": ("i", True, 1), + "KernelSize": ("i", True, 0), + # Function: + # - MaxPool + # - AvgPool (not yet supported, but HLSLIB does) + # - AccPool (not yet supported, but HLSLIB does) + "Function": ("s", True, ""), + "OutImgDim": ("i", True, 0), + # FINN DataTypes for inputs/outputs + "dataType": ("s", True, ""), + "BatchSize": ("i", False, 1), + } + + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("dataType")] + + def get_output_datatype(self): + """Returns FINN DataType of output.""" + fxn = self.get_nodeattr("Function") + if fxn == "MaxPool": + # Same as input + return DataType[self.get_nodeattr("dataType")] + else: + raise Exception("Pool_Batch doesn't currently support " + fxn) + + def get_normal_input_shape(self): + ifm_ch = self.get_nodeattr("Channels") + odim = self.get_nodeattr("OutImgDim") + batch_size = self.get_nodeattr("BatchSize") + k = self.get_nodeattr("KernelSize") + ishape = (batch_size, odim, odim, k * k * ifm_ch) + return ishape + + def get_folded_input_shape(self): + normal_ishape = list(self.get_normal_input_shape()) + ifm_ch = self.get_nodeattr("Channels") + pe = self.get_nodeattr("PE") + assert ifm_ch % pe == 0, "PE must divide input channels" + fold = int(normal_ishape[-1] / pe) + folded_ishape = normal_ishape[:-1] + [fold, pe] + return tuple(folded_ishape) + + def get_normal_output_shape(self): + ofm_ch = self.get_nodeattr("Channels") + odim = self.get_nodeattr("OutImgDim") + batch_size = self.get_nodeattr("BatchSize") + oshape = (batch_size, odim, odim, ofm_ch) + return oshape + + def get_folded_output_shape(self): + normal_oshape = list(self.get_normal_output_shape()) + ifm_ch = self.get_nodeattr("Channels") + pe = self.get_nodeattr("PE") + assert ifm_ch % pe == 0, "PE must divide input channels" + fold = int(ifm_ch / pe) + folded_oshape = normal_oshape[:-1] + [fold, pe] + return tuple(folded_oshape) + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[1:-1]) + + def get_instream_width(self): + dt_bits = self.get_input_datatype().bitwidth() + pe = self.get_nodeattr("PE") + # ofm_ch = self.get_nodeattr("Channels") + # k = self.get_nodeattr("KernelSize") + # assert ifm_ch % pe == 0, "PE must divide input channels" + # simd = int(ifm_ch/pe) + in_width = int(dt_bits * pe) + return in_width + + def get_outstream_width(self): + fxn = self.get_nodeattr("Function") + if fxn == "MaxPool": + return self.get_instream_width() + else: + raise Exception("Pool_Batch doesn't currently support " + fxn) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape for Pool_Batch." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # data type stays the same + dtype = self.get_output_datatype() + model.set_tensor_datatype(node.output[0], dtype) + + def verify_node(self): + info_messages = [] + + # verify that "domain" is set to "finn" + domain_value = self.onnx_node.domain + if domain_value == "finn": + info_messages.append("Attribute domain is set correctly") + else: + info_messages.append('Attribute domain should be set to "finn"') + + # verify that "backend" is set to "fpgadataflow" + backend_value = self.get_nodeattr("backend") + if backend_value == "fpgadataflow": + info_messages.append("Attribute backend is set correctly") + else: + info_messages.append('Attribute backend should be set to "fpgadataflow"') + + # verify the number of inputs + if len(self.onnx_node.input) == 1: + info_messages.append("The number of inputs is correct") + else: + info_messages.append("""Pool_Batch needs 1 data input""") + + # check supported function + fnx = self.get_nodeattr("Function") + if fnx == "MaxPool": + info_messages.append( + "Attribute Function contains a supported pool function" + ) + else: + info_messages.append( + "Attribute Function contains an unsupported pool function" + ) + return info_messages + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "maxpool.h"'] + self.code_gen_dict["$GLOBALS$"] += ['#include "pool.hpp"'] + + def defines(self, var): + self.code_gen_dict["$DEFINES$"] = [] + + ifm_ch = self.get_nodeattr("Channels") + self.code_gen_dict["$DEFINES$"] += ["#define Channels {}".format(ifm_ch)] + + pe = self.get_nodeattr("PE") + self.code_gen_dict["$DEFINES$"] += ["#define PE {}".format(pe)] + + k = self.get_nodeattr("KernelSize") + self.code_gen_dict["$DEFINES$"] += ["#define KernelSize {}".format(k)] + + odim = self.get_nodeattr("OutImgDim") + self.code_gen_dict["$DEFINES$"] += ["#define OFMDim {}".format(odim)] + + numReps = self.get_nodeattr("BatchSize") + self.code_gen_dict["$DEFINES$"] += ["#define numReps {}".format(numReps)] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0,false);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + idt = self.get_input_datatype() + i_hls_dt = idt.get_hls_datatype_str() + odt = self.get_output_datatype() + o_hls_dt = odt.get_hls_datatype_str() + + self.code_gen_dict["$DOCOMPUTE$"] = [] + + fxn = self.get_nodeattr("Function") + if fxn == "MaxPool": + self.code_gen_dict["$DOCOMPUTE$"] += [ + "MaxPoolFunction<{},KernelSize> pool_fxn;".format(i_hls_dt) + ] + else: + raise Exception("Pool_Batch doesn't currently support " + fxn) + + self.code_gen_dict["$DOCOMPUTE$"] += [ + """Pool_batch<Channels, PE, KernelSize,Slice<{} >, Slice< {} > > + (in0,out, pool_fxn, OFMDim*OFMDim*numReps);""".format( + i_hls_dt, o_hls_dt + ) + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s",false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + oshape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + packed_ibits = self.get_instream_width() + packed_in_hls_type = "ap_uint<%d>" % packed_ibits + + packed_obits = self.get_outstream_width() + packed_out_hls_type = "ap_uint<%d>" % packed_obits + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" + % (self.onnx_node.name, packed_in_hls_type, packed_out_hls_type) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + folded_ishape = self.get_folded_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_oshape = self.get_folded_output_shape() + + # TODO ensure codegen dir exists + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert ( + inp.shape == exp_ishape + ), """Input shape doesn't + match expected shape (batch_size,odim,odim,k*k*ifm_ch).""" + + export_idt = self.get_input_datatype() + reshaped_input = inp.reshape(folded_ishape) + + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + assert ( + context[node.output[0]].shape == folded_oshape + ), "cppsim did not produce expected folded output shape" + context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output + shape doesn't match expected shape (1, ofm_dim, ofm_dim, k*k*ifm_ch).""" diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index f650442401b49f1ad0a602b6b2ad3e50fbb5e5c2..a7ebff68749120868cae9ce5ac18d2856fe2cb8a 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -240,11 +240,21 @@ class StreamingFCLayer_Batch(HLSCustomOp): Q = self.get_nodeattr("SIMD") wdt = self.get_weight_datatype() W = wdt.bitwidth() - D_in = self.get_instream_width() - D_out = self.get_outstream_width() + D_in = self.get_nodeattr("MW") + D_out = self.get_nodeattr("MH") omega = (D_in * D_out) / (Q * P) return P * (math.ceil(omega / 512)) * (math.ceil((Q * W) / 36)) + def bram_efficiency_estimation(self): + wdt = self.get_weight_datatype() + W = wdt.bitwidth() + D_in = self.get_nodeattr("MW") + D_out = self.get_nodeattr("MH") + bram16_est = self.bram_estimation() + wbits = W * D_in * D_out + bram16_est_capacity = bram16_est * 36 * 512 + return wbits / bram16_est_capacity + def lut_estimation(self): """Calculates resource estimations for LUTs based on: - FINN-R: An End-to-End Deep-Learning Framework for Fast @@ -290,12 +300,15 @@ class StreamingFCLayer_Batch(HLSCustomOp): return out_width def get_weightstream_width(self): - """Returns weight stream width. Used in decoupled mode.""" - pe = self.get_nodeattr("PE") - simd = self.get_nodeattr("SIMD") - wp = self.get_weight_datatype().bitwidth() - w_width = pe * simd * wp - return w_width + """Returns weight stream width. Used only in decoupled mode.""" + if self.get_nodeattr("mem_mode") == "decoupled": + pe = self.get_nodeattr("PE") + simd = self.get_nodeattr("SIMD") + wp = self.get_weight_datatype().bitwidth() + w_width = pe * simd * wp + return w_width + else: + return 0 def get_weightstream_width_padded(self): """Returns weight stream width padded to a multiple of 8. This is required @@ -513,40 +526,44 @@ class StreamingFCLayer_Batch(HLSCustomOp): elif mem_mode == "decoupled": """Saves weights in corresponding file format for cppsim or rtlsim""" # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD) - # and save as unflipped weight tensor to be able to differentiate between - # flipped an unflipped weight tensor (has to be flipped for cppsim) - weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3)) - # flip PE dimension and reverse SIMD flip for saving weights in .npy - weight_tensor_flipped = np.flip(weight_tensor_unflipped, axis=-2) - weight_tensor_flipped = np.flip(weight_tensor_flipped, axis=-1) + # reverse SIMD flip for saving weights in .npy + weight_tensor_simd_flipped = np.flip(weight_tensor_unflipped, axis=-1) + # PE flip for saving weights in .dat + weight_tensor_pe_flipped = np.flip(weight_tensor_unflipped, axis=-2) - # reshape weight tensor (flipped and unflipped) to desired shape + # reshape weight tensor (simd_flipped and pe_flipped) to desired shape pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") - # unflipped - weight_tensor_unflipped = weight_tensor_unflipped.reshape(1, -1, pe * simd) - weight_tensor_unflipped = weight_tensor_unflipped.copy() + # simd_flipped + weight_tensor_simd_flipped = weight_tensor_simd_flipped.reshape( + 1, -1, pe * simd + ) + weight_tensor_simd_flipped = weight_tensor_simd_flipped.copy() # flipped - weight_tensor_flipped = weight_tensor_flipped.reshape(1, -1, pe * simd) - weight_tensor_flipped = weight_tensor_flipped.copy() + weight_tensor_pe_flipped = weight_tensor_pe_flipped.reshape( + 1, -1, pe * simd + ) + weight_tensor_pe_flipped = weight_tensor_pe_flipped.copy() """Saves weights into .npy file""" - np.save(os.path.join(code_gen_dir, "weights.npy"), weight_tensor_flipped) + np.save( + os.path.join(code_gen_dir, "weights.npy"), weight_tensor_simd_flipped + ) """Saves weights into .dat file""" # convert weight values into hexstring weight_width = self.get_weightstream_width() # pad to nearest 4 bits to get hex strings weight_width_padded = roundup_to_integer_multiple(weight_width, 4) - weight_tensor_unflipped = pack_innermost_dim_as_hex_string( - weight_tensor_unflipped, export_wdt, weight_width_padded, prefix="" + weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( + weight_tensor_pe_flipped, export_wdt, weight_width_padded, prefix="" ) - weight_stream_len = np.prod(weight_tensor_unflipped.shape) + weight_stream_len = np.prod(weight_tensor_pe_flipped.shape) factor = math.ceil(weight_stream_len / 1024) # add zeroes to pad out file to 1024 entries - weight_stream = weight_tensor_unflipped.flatten() + weight_stream = weight_tensor_pe_flipped.flatten() pad_amt = (factor * 1024) - weight_stream_len weight_stream = np.pad( weight_stream, (0, pad_amt), mode="constant", constant_values="0" diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 66190333ce8d71dafba99aaeae4fb2c973d67410..1f734b548f923341687843c538d1887fcc069bee 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -110,6 +110,8 @@ class StreamingFIFO(HLSCustomOp): ] # make instream width a multiple of 8 for axi interface in_width = self.get_instream_width_padded() + count_width = int(self.get_nodeattr("depth") - 1).bit_length() + self.code_gen_dict["$COUNT_RANGE$"] = ["[{}:0]".format(count_width - 1)] self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$OUT_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$WIDTH$"] = [str(in_width)] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 5f526aa2aa1917144c7a048c9d9314aa9288a2d8..1da60a5124fa86b4336bae8fd1a587672f2f2e6f 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -99,6 +99,7 @@ set_top $config_toplevelfxn open_solution sol1 set_part $config_proj_part +config_compile -ignore_long_run_time -disable_unroll_code_size_check config_interface -m_axi_addr64 config_rtl -auto_prefix $EXTRA_DIRECTIVES$ @@ -408,6 +409,7 @@ strm_fifo_wrapper = """ module $TOPNAME$( ap_clk, ap_rst_n, +count, in0_V_V_TDATA, in0_V_V_TVALID, in0_V_V_TREADY, @@ -418,6 +420,7 @@ out_V_V_TREADY input ap_clk; input ap_rst_n; +output $COUNT_RANGE$ count; input $IN_RANGE$ in0_V_V_TDATA; input in0_V_V_TVALID; output in0_V_V_TREADY; @@ -433,6 +436,7 @@ $LAYER_NAME$ ( .clock(ap_clk), .reset(!ap_rst_n), + .count(count), .i_d(in0_V_V_TDATA), .i_v(in0_V_V_TVALID), .i_r(in0_V_V_TREADY), diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 25ea05e3607a52731ae1b64de421837bf137ee2b..17ba44b959577faf573d77ae222f7b2a3be6669d 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -30,20 +30,30 @@ from finn.custom_op.fpgadataflow import HLSCustomOp class TLastMarker(HLSCustomOp): - """Class that corresponds to the TLastMarker node that needs to be - inserted at the end of the model for rtlsim with stitched IP. - It marks the end of the current image/input sample.""" + """Node that adds/removes AXI stream TLAST signals where needed. Its behavior + is transparent in node-by-node execution, only visible in IP-stitched rtlsim or + actual hardware. + This node may be needed at the end of the network to signal a DMA write (needed by the + FINN PYNQ shell) or at the beginning to remove the end-of-burst from DMA read.""" def __init__(self, onnx_node): super().__init__(onnx_node) def get_nodeattr_types(self): my_attrs = { + # number of (static) iterations until TLAST=1 is generated for Direction=out "NumIters": ("i", True, 0), + # whether static or dynamic (from AXI lite) number of iterations are used + "DynIters": ("i", False, 1), + # direction: whether to insert or remove TLAST + "Direction": ("s", False, "out"), # width of input-output data streams, in bits "StreamWidth": ("i", True, 0), # width of individual element in stream, in bits "ElemWidth": ("i", True, 0), + # Protocol: external or internal + # Vitis docs recommend using qdma_axis for external, ap_axiu for internal + "Protocol": ("s", False, "external"), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -76,12 +86,33 @@ class TLastMarker(HLSCustomOp): def defines(self, var): stream_width = self.get_nodeattr("StreamWidth") + direction = self.get_nodeattr("Direction") + protocol = self.get_nodeattr("Protocol") # output stream must have TLAST, so we use this stream data type: # qdma_axis<stream_data_width,0,0,0 > - out_stream_dtype = "qdma_axis<%d,0,0,0>" % stream_width + if direction == "out": + if protocol == "external": + out_stream_dtype = "qdma_axis<%d,0,0,0>" % stream_width + elif protocol == "internal": + out_stream_dtype = "ap_axiu<%d,0,0,0>" % stream_width + else: + raise Exception("Unrecognized Protocol in TLastMarker") + in_stream_dtype = "ap_uint<%d>" % stream_width + elif direction == "in": + out_stream_dtype = "ap_uint<%d>" % stream_width + if protocol == "external": + in_stream_dtype = "qdma_axis<%d,0,0,0>" % stream_width + elif protocol == "internal": + in_stream_dtype = "ap_axiu<%d,0,0,0>" % stream_width + else: + raise Exception("Unrecognized Protocol in TLastMarker") + else: + raise Exception("Unrecognized Direction in TLastMarker") + self.code_gen_dict["$DEFINES$"] = [ "#define StreamWidth %d" % stream_width, "#define OutDType %s" % out_stream_dtype, + "#define InDType %s" % in_stream_dtype, "#define NumItersPerImg %d" % self.get_nodeattr("NumIters"), ] @@ -89,27 +120,60 @@ class TLastMarker(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] def docompute(self): - self.code_gen_dict["$DOCOMPUTE$"] = [ - "unsigned int n = 1;", - "OutDType t;", - "t.set_keep(-1);", - "io_section: { // start of cycle accurate region", - "#pragma HLS protocol fixed", - "// do a first read from stream before we decide on numIters", - "// giving software a chance to set up the numIters prior to startup", - "t.set_data(in0.read());", - "n = (numIters == 0 ? NumItersPerImg : numIters);", - "t.set_last(n==1);", - "out.write(t);", - "} // end of cycle accurate region", - "// do one less iteration than spec since we already did one", - "for(unsigned int i=1; i<n; i++) {", - "#pragma HLS PIPELINE II=1", - "t.set_data(in0.read());", - "t.set_last(i==(n-1));", - "out.write(t);", - "}", - ] + dyn_iters = self.get_nodeattr("DynIters") + direction = self.get_nodeattr("Direction") + use_qdma_axis = self.get_nodeattr("Protocol") == "external" + if direction == "in": + # read from input and just pass data along; ignore tlast + # no dyn iters on input, it doesnt make sense + self.code_gen_dict["$DOCOMPUTE$"] = [ + "for(unsigned int i=0; i<NumItersPerImg; i++) {", + "#pragma HLS PIPELINE II=1", + "out.write(in0.read().get_data());" + if use_qdma_axis + else "out.write(in0.read().data);", + "}", + ] + + elif dyn_iters == 1: + # output, with dynamic iteration counts + self.code_gen_dict["$DOCOMPUTE$"] = [ + "unsigned int n = 1;", + "OutDType t;", + "t.set_keep(-1);" if use_qdma_axis else "t.keep = -1;", + "io_section: { // start of cycle accurate region", + "#pragma HLS protocol fixed", + "// do a first read from stream before we decide on numIters", + "// giving software a chance to set up the numIters prior to startup", + "t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();", + "n = (numIters == 0 ? NumItersPerImg : numIters);", + "t.set_last(n==1);" if use_qdma_axis else "t.last = (n==1);", + "out.write(t);", + "} // end of cycle accurate region", + "// do one less iteration than spec since we already did one", + "for(unsigned int i=1; i<n; i++) {", + "#pragma HLS PIPELINE II=1", + "t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();", + "t.set_last(i==(n-1));" if use_qdma_axis else "t.last = (i==(n-1));", + "out.write(t);", + "}", + ] + + else: + # output, with static iteration counts + self.code_gen_dict["$DOCOMPUTE$"] = [ + "unsigned int n = 1;", + "OutDType t;", + "t.set_keep(-1);" if use_qdma_axis else "t.keep = -1;", + "for(unsigned int i=0; i<NumItersPerImg; i++) {", + "#pragma HLS PIPELINE II=1", + "t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();", + "t.set_last(i==(NumItersPerImg-1));" + if use_qdma_axis + else "t.last = (i==(NumItersPerImg-1));", + "out.write(t);", + "}", + ] def dataoutstrm(self): self.code_gen_dict["$DATAOUTSTREAM$"] = [] @@ -118,18 +182,30 @@ class TLastMarker(HLSCustomOp): self.code_gen_dict["$SAVEASCNPY$"] = [] def blackboxfunction(self): - self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void %s(hls::stream<ap_uint<StreamWidth> > &in0, - hls::stream<OutDType> &out, unsigned int numIters)""" - % self.onnx_node.name - ] + dyn_iters = self.get_nodeattr("DynIters") + + if dyn_iters == 1: + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + """void %s(hls::stream<InDType> &in0, + hls::stream<OutDType> &out, unsigned int numIters)""" + % self.onnx_node.name + ] + else: + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + """void %s(hls::stream<InDType> &in0, hls::stream<OutDType> &out)""" + % self.onnx_node.name + ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") - self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE s_axilite port=numIters bundle=control" - ) + + dyn_iters = self.get_nodeattr("DynIters") + if dyn_iters == 1: + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE s_axilite port=numIters bundle=control" + ) + self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) @@ -158,7 +234,7 @@ class TLastMarker(HLSCustomOp): def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<InDType> in0 ("in0");' ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<OutDType> out ("out");' diff --git a/src/finn/custom_op/im2col.py b/src/finn/custom_op/im2col.py index b0d94b158ba0ffe57646847a2bd99fa831e2ae20..8ed0041704d421dab587f08bcbcd9e739e8434e9 100644 --- a/src/finn/custom_op/im2col.py +++ b/src/finn/custom_op/im2col.py @@ -21,8 +21,6 @@ def get_im2col_indices_nchw( """Returns im2col indices.""" # First figure out what the size of the output should be N, C, H, W = x_shape - assert (H + 2 * padding - field_height) % stride_y == 0 - assert (W + 2 * padding - field_width) % stride_x == 0 out_height = compute_conv_output_dim(H, field_height, stride_y, padding) out_width = compute_conv_output_dim(W, field_width, stride_x, padding) @@ -70,6 +68,9 @@ def im2col_indices_nchw( # * ifm is the number of input channels # * k is the convolutional kernel size +# note: for the innermost (dot product) dimension of k*k*ifm, we +# assume an internal ordering (k, k, ifm) + class Im2Col(CustomOp): def get_nodeattr_types(self): @@ -79,7 +80,8 @@ class Im2Col(CustomOp): "input_shape": ("s", True, ""), "pad_amount": ("i", False, 0), "pad_value": ("i", False, 0), - "dw": ("i", False, 0), + # depthwise: if != 0, infer ConvolutionInputGenerator with depthwise == 1 + "depthwise": ("i", False, 0), } def make_shape_compatible_op(self, model): diff --git a/src/finn/custom_op/quantavgpool2d.py b/src/finn/custom_op/quantavgpool2d.py new file mode 100644 index 0000000000000000000000000000000000000000..fb5c78bc0c8419ba519c5c3113d9b0c7ae2dd3b7 --- /dev/null +++ b/src/finn/custom_op/quantavgpool2d.py @@ -0,0 +1,128 @@ +import numpy as np +from onnx import TensorProto, helper +import onnxruntime as rt + +from finn.custom_op import CustomOp +from finn.core.datatype import DataType +from finn.custom_op.maxpoolnhwc import compute_pool_output_dim + + +class QuantAvgPool2d(CustomOp): + """Class that corresponds to the quantized average pooling + layer from brevitas""" + + def get_nodeattr_types(self): + return { + "stride": ("i", True, 1), + "kernel": ("i", True, 1), + "ibits": ("i", True, 1), + "obits": ("i", True, 1), + # determines if values are signed (set to "1") or unsigned ("0") + "signed": ("i", True, 0), + # data layout attribute can be set to "NCHW" or "NHWC" + "data_layout": ("s", False, "NCHW"), + } + + def make_shape_compatible_op(self, model): + node = self.onnx_node + k = self.get_nodeattr("kernel") + s = self.get_nodeattr("stride") + data_layout = self.get_nodeattr("data_layout") + if data_layout == "NCHW": + return helper.make_node( + "AveragePool", + inputs=[node.input[0]], + outputs=[node.output[0]], + kernel_shape=[k, k], + strides=[s, s], + ) + elif data_layout == "NHWC": + iname = node.input[0] + ishape = model.get_tensor_shape(iname) + (n, hi, wi, c) = ishape + ho = compute_pool_output_dim(hi, k, s) + wo = compute_pool_output_dim(wi, k, s) + oshape = (n, ho, wo, c) + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + else: + raise Exception( + """Datalayout for QuantAvgPool2d is set to an invalid value. + Has to be set to "NCHW" or "NHWC".""" + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + bw = self.get_nodeattr("obits") + if bw in [2, 4, 8, 16, 32]: + if self.get_nodeattr("signed") == 0: + dtype = DataType["UINT%d" % bw] + else: + dtype = DataType["INT%d" % bw] + else: + raise Exception("Unsupported output datatype for QuantAvgPool2d") + model.set_tensor_datatype(node.output[0], dtype) + + def execute_node(self, context, graph): + # create a standard average pooling node to help calculate the result + node = self.onnx_node + k = self.get_nodeattr("kernel") + s = self.get_nodeattr("stride") + inp_values = context[node.input[0]] + oshape = context[node.output[0]].shape + if self.get_nodeattr("data_layout") == "NHWC": + inp_values = inp_values.transpose(0, 3, 1, 2) + oshape = (context[node.output[0]]).transpose(0, 3, 1, 2).shape + ishape = inp_values.shape + inp = helper.make_tensor_value_info(node.input[0], TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info(node.output[0], TensorProto.FLOAT, oshape) + node_avgpool = helper.make_node( + "AveragePool", + inputs=[node.input[0]], + outputs=[node.output[0]], + kernel_shape=[k, k], + strides=[s, s], + ) + graph_avgpool = helper.make_graph( + nodes=[node_avgpool], + name="single-avgpool-exec", + inputs=[inp], + outputs=[outp], + ) + model_avgpool = helper.make_model(graph_avgpool) + idict = {node.input[0]: inp_values} + sess = rt.InferenceSession(model_avgpool.SerializeToString()) + result_temp = sess.run(None, idict) + # remove scaling introduced by average + result_temp = result_temp[0] * (k * k) + ibits = self.get_nodeattr("ibits") + max_value = 2 ** ibits - 1 + max_value = max_value * k * k + max_bit_width = int(max_value).bit_length() + shift_bits = max_bit_width - self.get_nodeattr("obits") + result = np.right_shift(result_temp.astype(int), shift_bits) + if self.get_nodeattr("data_layout") == "NHWC": + result = result.transpose(0, 2, 3, 1) + context[node.output[0]] = result.astype(np.float32) + + def verify_node(self): + info_messages = [] + # verify that "domain" is set to "finn" + domain_value = self.onnx_node.domain + if domain_value == "finn": + info_messages.append("Attribute domain is set correctly") + else: + info_messages.append('Attribute domain should be set to "finn"') + return info_messages diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py index 238829e03353d79fab7c51e7d1b9dca6e2a96a11..e4317e02d46df90c8fd0c8854262ca6eb0ea4f48 100644 --- a/src/finn/custom_op/registry.py +++ b/src/finn/custom_op/registry.py @@ -31,6 +31,7 @@ from finn.custom_op.fpgadataflow.convolutioninputgenerator import ( ConvolutionInputGenerator, ) +from finn.custom_op.fpgadataflow.downsampler import DownSampler from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO @@ -44,16 +45,21 @@ from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import ( StreamingDataWidthConverter_Batch, ) from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch -from finn.custom_op.fpgadataflow.sameresize_batch import SameResize_Batch +from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch +from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch +from finn.custom_op.quantavgpool2d import QuantAvgPool2d from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch +from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch +from finn.custom_op.fpgadataflow.iodma import IODMA # create a mapping of all known CustomOp names and classes custom_op = {} custom_op["MultiThreshold"] = MultiThreshold +custom_op["DownSampler"] = DownSampler custom_op["XnorPopcountMatMul"] = XnorPopcountMatMul custom_op["Im2Col"] = Im2Col custom_op["StreamingMaxPool_Batch"] = StreamingMaxPool_Batch @@ -65,11 +71,15 @@ custom_op["MaxPoolNHWC"] = MaxPoolNHWC custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch custom_op["StreamingFIFO"] = StreamingFIFO custom_op["GlobalAccPool_Batch"] = GlobalAccPool_Batch -custom_op["SameResize_Batch"] = SameResize_Batch +custom_op["Pool_Batch"] = Pool_Batch +custom_op["FMPadding_Batch"] = FMPadding_Batch custom_op["Thresholding_Batch"] = Thresholding_Batch custom_op["AddStreams_Batch"] = AddStreams_Batch custom_op["LabelSelect_Batch"] = LabelSelect_Batch +custom_op["QuantAvgPool2d"] = QuantAvgPool2d custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch +custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch +custom_op["IODMA"] = IODMA def getCustomOp(node): diff --git a/src/finn/transformation/bipolar_to_xnor.py b/src/finn/transformation/bipolar_to_xnor.py index 8b65cfee17edd5d89fcca0bd86da12415d38fe78..80f2a73351f8548c99efd8dedd8a04d44c8558a3 100644 --- a/src/finn/transformation/bipolar_to_xnor.py +++ b/src/finn/transformation/bipolar_to_xnor.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np +import warnings from onnx import TensorProto from onnx import helper as oh @@ -66,26 +67,40 @@ class ConvertBipolarMatMulToXnorPopcount(Transformation): mt_chain = model.find_upstream(mm_input, find_prod_mt) if len(mt_chain) == 0: - raise Exception( - """Could not find upstream bipolar - MultiThreshold""" - ) - graph_modified = True - mt = mt_chain[-1] - mt_inst = getCustomOp(mt) - # ensure old scale/bias were correct for BIPOLAR - scale_ok = mt_inst.get_nodeattr("out_scale") == 2.0 - bias_ok = mt_inst.get_nodeattr("out_bias") == -1.0 - assert ( - scale_ok and bias_ok - ), """Unexpected scale/bias - attributes for BIPOLAR MultiThreshold node.""" - # start conversion, set MT output to binary - # (this is what XnorPopcountMatMul expects) - mt_inst.set_nodeattr("out_dtype", "BINARY") - mt_inst.set_nodeattr("out_scale", 1.0) - mt_inst.set_nodeattr("out_bias", 0.0) - model.set_tensor_datatype(mm_input, DataType.BINARY) + if mm_input == graph.input[0].name: + # change input datatype to BINARY + model.set_tensor_datatype(mm_input, DataType.BINARY) + graph_modified = True + warnings.warn( + """IMPORTANT: Changing graph input DataType + to BINARY instead of BIPOLAR. Ensure this is respected + when checking for correctness. + """ + ) + else: + raise Exception( + """Could not find upstream bipolar + MultiThreshold, and the MatMul is not the + first node on graph input. Unable to convert + input tensor from BIPOLAR to BINARY.""" + ) + else: + graph_modified = True + mt = mt_chain[-1] + mt_inst = getCustomOp(mt) + # ensure old scale/bias were correct for BIPOLAR + scale_ok = mt_inst.get_nodeattr("out_scale") == 2.0 + bias_ok = mt_inst.get_nodeattr("out_bias") == -1.0 + assert ( + scale_ok and bias_ok + ), """Unexpected scale/bias + attributes for BIPOLAR MultiThreshold node.""" + # start conversion, set MT output to binary + # (this is what XnorPopcountMatMul expects) + mt_inst.set_nodeattr("out_dtype", "BINARY") + mt_inst.set_nodeattr("out_scale", 1.0) + mt_inst.set_nodeattr("out_bias", 0.0) + model.set_tensor_datatype(mm_input, DataType.BINARY) # change node type and domain n.op_type = "XnorPopcountMatMul" n.domain = "finn" diff --git a/src/finn/transformation/change_datalayout.py b/src/finn/transformation/change_datalayout.py new file mode 100644 index 0000000000000000000000000000000000000000..d5b393a25e57122b059a44f70904a6dbe5bbaa3f --- /dev/null +++ b/src/finn/transformation/change_datalayout.py @@ -0,0 +1,110 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from onnx import helper, TensorProto + +from finn.transformation import Transformation +from finn.transformation.infer_shapes import InferShapes +from finn.util.basic import get_by_name + + +class ChangeDataLayoutQuantAvgPool2d(Transformation): + """Replace QuantAvgPool2d with datalayout (N,C,H,W) with Transpose nodes + and QuantAvgPool2dNHWC with datalayout (N,H,W,C)""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if n.op_type == "QuantAvgPool2d" and ( + get_by_name(n.attribute, "data_layout") is None + or get_by_name(n.attribute, "data_layout").s.decode("UTF-8") == "NCHW" + ): + graph_modified = True + node_input = n.input[0] + node_output = n.output[0] + s = get_by_name(n.attribute, "stride").i + k = get_by_name(n.attribute, "kernel").i + ibits = get_by_name(n.attribute, "ibits").i + obits = get_by_name(n.attribute, "obits").i + signed = get_by_name(n.attribute, "signed").i + batchsize = model.get_tensor_shape(n.input[0])[0] # assume NCHW + channels = model.get_tensor_shape(n.input[0])[1] # assume NCHW + idim = model.get_tensor_shape(n.input[0])[-1] # assume NCHW + odim = model.get_tensor_shape(n.output[0])[-1] # assume NCHW + + # create new nodes + # NCHW -> NHWC + # create new intermediate values + inp_trans_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (batchsize, idim, idim, channels), # NHWC + ) + graph.value_info.append(inp_trans_out) + inp_trans_out = inp_trans_out.name + quantavg_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (batchsize, odim, odim, channels), + ) + graph.value_info.append(quantavg_out) + quantavg_out = quantavg_out.name + inp_trans_node = helper.make_node( + "Transpose", [node_input], [inp_trans_out], perm=[0, 2, 3, 1] + ) + quantavg_node = helper.make_node( + "QuantAvgPool2d", + [inp_trans_out], + [quantavg_out], + domain="finn", + stride=s, + kernel=k, + ibits=ibits, + obits=obits, + signed=signed, + data_layout="NHWC", + ) + # NHWC -> NCHW + out_trans_node = helper.make_node( + "Transpose", [quantavg_out], [node_output], perm=[0, 3, 1, 2] + ) + # insert nodes + graph.node.insert(node_ind, inp_trans_node) + graph.node.insert(node_ind + 1, quantavg_node) + graph.node.insert(node_ind + 2, out_trans_node) + # remove old nodes + graph.node.remove(n) + + # set shapes + model.set_tensor_shape(inp_trans_out, (batchsize, idim, idim, channels)) + model.set_tensor_shape(quantavg_out, (batchsize, odim, odim, channels)) + model = model.transform(InferShapes()) + return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py index 207075b00de1871da19ea78472125d435449ed6e..62ee92df54eee2b63d84657515d7fbc3a8808b81 100644 --- a/src/finn/transformation/fpgadataflow/annotate_resources.py +++ b/src/finn/transformation/fpgadataflow/annotate_resources.py @@ -69,6 +69,9 @@ class AnnotateResources(Transformation): total_dict[r_type] += r_amount else: total_dict[r_type] = r_amount + for k in total_dict.keys(): + if "efficiency" in k: + total_dict[k] = total_dict[k] / len(graph.node) model.set_metadata_prop("res_total_" + self.mode, str(total_dict)) for node in graph.node: if _is_fpgadataflow_node(node) and node.name in res_dict.keys(): diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index f986cce1f02535ef4294ecfc387786d6c898b704..73a729f67510627e2dfeb85c9ee08cd9c6b5c2d7 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -26,7 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from onnx import helper +from onnx import helper, TensorProto +import numpy as np from finn.core.datatype import DataType from finn.transformation import Transformation @@ -34,6 +35,9 @@ from finn.custom_op.registry import getCustomOp from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_datatypes import InferDataTypes import finn.core.data_layout as DataLayout +from finn.util.onnx import nchw_to_nhwc +from finn.util.basic import get_by_name +import warnings class InferConvInpGen(Transformation): @@ -51,35 +55,94 @@ class InferConvInpGen(Transformation): i2c_in_shape = model.get_tensor_shape(i2c_input) i2c_out_shape = model.get_tensor_shape(i2c_output) dt = model.get_tensor_datatype(i2c_input) + if not dt.is_integer(): + warnings.warn("Input is not int. Can't infer ConvInpGen") + continue i2c_inst = getCustomOp(n) stride = i2c_inst.get_nodeattr("stride") k = i2c_inst.get_nodeattr("kernel_size") pad = i2c_inst.get_nodeattr("pad_amount") pad_val = i2c_inst.get_nodeattr("pad_value") + depthwise = i2c_inst.get_nodeattr("depthwise") ifm_ch = i2c_in_shape[-1] ifm_dim = i2c_in_shape[1] ofm_dim = i2c_out_shape[1] - # if padding enabled, ensure pad_val supported by DataType + + # default params for ConvolutionInputGenerator + ConvInpGen_node_idx = node_ind + ConvInpGen_input = i2c_input + ConvInpGen_idim = ifm_dim + if pad > 0: - assert dt.allowed(pad_val), "Im2Col DataType must support pad_val" - # create equivalent ConvolutionInputGenerator node - # TODO support padding - new_node = helper.make_node( - "ConvolutionInputGenerator", - [i2c_input], - [i2c_output], - domain="finn", - backend="fpgadataflow", - ConvKernelDim=k, - IFMChannels=ifm_ch, - IFMDim=ifm_dim, - OFMDim=ofm_dim, - SIMD=ifm_ch, - Stride=stride, - inputDataType=dt.name, - outputDataType=dt.name, - ) - graph.node.insert(node_ind, new_node) + # if padding enabled, ensure pad_val supported by DataType + # assert dt.allowed(pad_val),"""FMPadding_Batch DataType + # must support pad_val""" + assert ( + pad_val == 0 + ), "FMPadding_Batch doesn't currently support pad_val!= 0" + + odim_padding = ifm_dim + 2 * pad + + padding_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (1, odim_padding, odim_padding, ifm_ch), + ) + graph.value_info.append(padding_out) + padding_out = padding_out.name + model.set_tensor_datatype(padding_out, dt) + + ConvInpGen_node_idx += 1 + ConvInpGen_input = padding_out + ConvInpGen_idim = odim_padding + + padding_node = helper.make_node( + "FMPadding_Batch", + [i2c_input], + [padding_out], + domain="finn", + backend="fpgadataflow", + ImgDim=ifm_dim, + Padding=2 * pad, + NumChannels=ifm_ch, + inputDataType=dt.name, + ) + graph.node.insert(node_ind, padding_node) + + if stride > 1 and k == 1: + # create DownSampler node + ConvInpGen_node = helper.make_node( + "DownSampler", + [ConvInpGen_input], + [i2c_output], + domain="finn", + backend="fpgadataflow", + ImgDim=ConvInpGen_idim, + NumChannels=ifm_ch, + SIMD=ifm_ch, + Stride=stride, + inputDataType=dt.name, + ) + graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node) + else: + # create equivalent ConvolutionInputGenerator node + ConvInpGen_node = helper.make_node( + "ConvolutionInputGenerator", + [ConvInpGen_input], + [i2c_output], + domain="finn", + backend="fpgadataflow", + ConvKernelDim=k, + IFMChannels=ifm_ch, + IFMDim=ConvInpGen_idim, + OFMDim=ofm_dim, + SIMD=ifm_ch, + Stride=stride, + inputDataType=dt.name, + outputDataType=dt.name, + depthwise=depthwise, + ) + graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node) # remove old nodes graph.node.remove(n) graph_modified = True @@ -135,6 +198,137 @@ class InferStreamingMaxPool(Transformation): return (model, graph_modified) +class InferPool_Batch(Transformation): + """If kernel_shape > strides, replace Pool layer with with of Im2col + + pool(with kernel_shape == strides), plus Transpose layers to keep the original + data layout.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if n.op_type in ["MaxPool"]: + # extract pool parameters + k = get_by_name(n.attribute, "kernel_shape").ints[-1] + stride = get_by_name(n.attribute, "strides").ints[-1] + + if k <= stride: + continue + + try: + pad = get_by_name(n.attribute, "pads").ints[-1] + except AttributeError: + pad = 0 + + node_input = n.input[0] + node_output = n.output[0] + idt = model.get_tensor_datatype(node_input) + if not idt.is_integer(): + continue + + # odt = model.get_tensor_datatype(node_output) + + ifm_ch = model.get_tensor_shape(n.input[0])[1] # assume NCHW + ofm_ch = ifm_ch + ifm_dim = model.get_tensor_shape(n.input[0])[-1] # assume NCHW + ofm_dim = model.get_tensor_shape(n.output[0])[-1] # assume NCHW + # create new intermediate values + inp_trans_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (1, ifm_dim, ifm_dim, ifm_ch), # NHWC + ) + graph.value_info.append(inp_trans_out) + inp_trans_out = inp_trans_out.name + model.set_tensor_datatype(inp_trans_out, idt) + + im2col_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (1, ofm_dim, ofm_dim, ifm_ch * k * k), + ) + graph.value_info.append(im2col_out) + im2col_out = im2col_out.name + model.set_tensor_datatype(im2col_out, idt) + + pool_output = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (1, ofm_dim, ofm_dim, ofm_ch), + ) + graph.value_info.append(pool_output) + pool_output = pool_output.name + # model.set_tensor_datatype(pool_output, odt) + + # create new nodes + # NCHW -> NHWC + inp_trans_node = helper.make_node( + "Transpose", [node_input], [inp_trans_out], perm=[0, 2, 3, 1] + ) + + if n.op_type == "MaxPool": + pool_fxn = "MaxPool" + pad_value = idt.min() + else: + raise Exception( + "pad_value and pool_fxn not configured for {}".format(n.op_type) + ) + + # format input tensor + im2col_node = helper.make_node( + "Im2Col", + [inp_trans_out], + [im2col_out], + domain="finn", + stride=stride, + kernel_size=k, + pad_amount=pad, + pad_value=pad_value, + depthwise=1, + input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), + ) + + # Warning PE has to be equal to ifm_ch until Im2Col is replaced by + # ConvolutionInputGenerator with depthwise=1. + # For other settings the output will be incorrect due to incorrect input + # data layout + pool_node = helper.make_node( + "Pool_Batch", + [im2col_out], + [pool_output], + domain="finn", + backend="fpgadataflow", + dataType=idt.name, + Channels=ifm_ch, + PE=ifm_ch, + KernelSize=k, + Function=pool_fxn, + OutImgDim=ofm_dim, + BatchSize=1, + ) + + # NHWC -> NCHW + out_trans_node = helper.make_node( + "Transpose", [pool_output], [node_output], perm=[0, 3, 1, 2] + ) + + # insert nodes where the conv is to preserve topological ordering + graph.node.insert(node_ind, inp_trans_node) + graph.node.insert(node_ind + 1, im2col_node) + graph.node.insert(node_ind + 2, pool_node) + graph.node.insert(node_ind + 3, out_trans_node) + # remove old node + graph.node.remove(n) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class InferBinaryStreamingFCLayer(Transformation): """Convert XnorPopcountMatMul layers to StreamingFCLayer_Batch layers. Any immediately following MultiThreshold @@ -455,3 +649,243 @@ class InferThresholdingLayer(Transformation): model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) + + +class InferChannelwiseLinearLayer(Transformation): + """Convert any channel-wise Add/Mul into a HLS layer.""" + + def get_smallest_possible(self, vals): + """Returns smallest (fewest bits) possible DataType that can represent + value. Prefers unsigned integers where possible.""" + vals = np.array(vals) + for v in vals: + assert int(v) == v, "Error float value" + + for k in DataType.__members__: + dt = DataType[k] + + if dt in [DataType.BIPOLAR, DataType.TERNARY, DataType.FLOAT32]: + # not currently supported + continue + + if (dt.min() <= vals).all() and (vals <= dt.max()).all(): + return dt + + warnings.warn( + """InferChannelwiseLinearLayer: Output values may not be + representable with supported data types. + Setting maximum width data type available. + This will lead to errors if there are no constrains on the input + """ + ) + + if (0 <= vals).all(): + return DataType.UINT32 + else: + return DataType.INT32 + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "Add" or node.op_type == "Mul": + # assuming input[0] is dynamic + ll_input = node.input[0] + ll_output = node.output[0] + ll_in_shape = model.get_tensor_shape(ll_input) + + # check if input 1 has an initializer + ll_const = node.input[1] + if ll_const is not None: + ll_cinit = model.get_initializer(ll_const) + if ll_cinit is None: + # input 1 is also dynamic + continue + else: + continue + + # get number of channels and channel index from input + ll_in_layout = model.get_tensor_layout(ll_input) + if ll_in_layout == DataLayout.NHWC or ll_in_layout == DataLayout.NC: + ch_index = -1 + ch = ll_in_shape[-1] + elif ll_in_layout == DataLayout.NCHW: + ch_index = 1 + ch = ll_in_shape[1] + else: + continue + + # check if the shape of initializer is compatible + ll_cinit_shape = list(ll_cinit.shape) + if np.prod(ll_cinit_shape) == 1: + warnings.warn( + "Broadcasting " + str(node.op_type) + "(" + node.name + ")" + ) + ll_cinit = np.full((ch), ll_cinit.flatten()[0]) + elif np.prod(ll_cinit_shape) != ch or ll_cinit_shape[ch_index] != ch: + # parameter shape not compatible with Channelwise_batch + continue + + # check initializer contains integers as floats + if not (ll_cinit.astype(np.int32) == ll_cinit).all(): + continue + # all initializer conditions are met + + # check inputs + idt = model.get_tensor_datatype(ll_input) + if not idt.is_integer(): + # skip conversion for layers with float input + continue + + # check layout of inputs/outputs, and convert if needed + # check layout and convert if necessary + if ll_in_layout == DataLayout.NCHW: + ll_input = nchw_to_nhwc(ll_input, model, node_ind) + node_ind += 1 + ll_in_shape = model.get_tensor_shape(ll_input) + + # keep track of where we need to insert the HLS Op + # it has to be ahead of the output transform + insert_point = node_ind + ll_output_layout = model.get_tensor_layout(ll_output) + if ll_output_layout == DataLayout.NCHW: + ll_output = nchw_to_nhwc(ll_output, model, node_ind, reverse=True) + node_ind += 1 + + # get parameter data type + param_min = min(ll_cinit.flatten()) + param_max = max(ll_cinit.flatten()) + pdt = self.get_smallest_possible([param_min, param_max]) + + # set function and determine output data type + if node.op_type == "Add": + func = "add" + out_min = idt.min() + param_min + out_max = idt.max() + param_max + odt = self.get_smallest_possible([out_min, out_max]) + elif node.op_type == "Mul": + func = "mul" + possible_limits = [] + possible_limits += [idt.min() * param_min] + possible_limits += [idt.min() * param_max] + possible_limits += [idt.max() * param_min] + possible_limits += [idt.max() * param_max] + odt = self.get_smallest_possible(possible_limits) + + model.set_initializer(ll_const, ll_cinit.reshape(ch)) + model.set_tensor_datatype(ll_output, odt) + + # create node with no parallelization first + pe = 1 + assert ch % pe == 0, "Requirement IFC divisable by PE is violated." + # create and insert node + new_node = helper.make_node( + "ChannelwiseOp_Batch", + [ll_input, ll_const], + [ll_output], + domain="finn", + backend="fpgadataflow", + Func=func, + NumChannels=ch, + PE=pe, + inputDataType=idt.name, + paramDataType=pdt.name, + outputDataType=odt.name, + numInputVectors=list(ll_in_shape[:-1]), + ) + graph.node.insert(insert_point, new_node) + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + +class InferGlobalAccPoolLayer(Transformation): + """Convert any GlobalAveragePool into a GlobalAccPool HLS layer and a scalar Mul.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "GlobalAveragePool": + in0 = node.input[0] + result = node.output[0] + in0_shape = model.get_tensor_shape(in0) + + idt = model.get_tensor_datatype(in0) + + # skip conversion for layers with float input + if not idt.is_integer(): + continue + + # check layout and convert if necessary + in0_layout = model.get_tensor_layout(in0) + result_layout = model.get_tensor_layout(result) + + if in0_layout == DataLayout.NCHW: + in0 = nchw_to_nhwc(in0, model, node_ind) + node_ind += 1 + in0_shape = model.get_tensor_shape(in0) + + # keep track of where we need to insert the HLS Op + # it has to be ahead of the output transform + insert_point = node_ind + + if result_layout == DataLayout.NCHW: + result = nchw_to_nhwc(result, model, node_ind, reverse=True) + node_ind += 1 + + num_ch = int(in0_shape[-1]) + vecs = in0_shape[:-1] + # create node with no parallelization first + pe = 1 + assert ( + num_ch % pe == 0 + ), "Requirement Labels divisable by PE is violated." + + # create an additional tensor of the same shape and layout as result + out_shape = model.get_tensor_shape(result) + pool_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(pool_out) + pool_out = pool_out.name + model.set_tensor_layout(pool_out, model.get_tensor_layout(result)) + + new_pool = helper.make_node( + "GlobalAccPool_Batch", + [in0], + [pool_out], + domain="finn", + backend="fpgadataflow", + NumChannels=num_ch, + PE=pe, + inputDataType=idt.name, + numInputVectors=vecs, + ) + + mul_value = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, [1] + ) + model.graph.value_info.append(mul_value) + model.set_initializer(mul_value.name, np.array(1 / (vecs[1] * vecs[2]))) + new_mul = helper.make_node("Mul", [pool_out, mul_value.name], [result],) + graph.node.insert(insert_point, new_pool) + graph.node.insert(insert_point + 1, new_mul) + node_ind += 1 + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py new file mode 100644 index 0000000000000000000000000000000000000000..1d9a51875499d77f384c03f54009a9dd1001dea0 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/floorplan.py @@ -0,0 +1,80 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.util.basic import get_by_name + + +class Floorplan(Transformation): + """Perform Floorplanning of the dataflow design. Separate DMAs into their own + partitions IDs, and TODO: split the design into sections of defined size""" + + def __init__(self, limits=None): + super().__init__() + self.resource_limits = limits + + def apply(self, model): + target_partition_id = 0 + # we currently assume that all dataflow nodes belonging to the same partition + # are connected to each other and there is a single input/output to/from each. + all_nodes = list(model.graph.node) + df_nodes = list( + filter(lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes) + ) + dma_nodes = list(filter(lambda x: x.op_type == "IODMA", df_nodes)) + + non_dma_nodes = list(filter(lambda x: x not in dma_nodes, df_nodes)) + dyn_tlastmarker_nodes = list( + filter( + lambda x: x.op_type == "TLastMarker" + and getCustomOp(x).get_nodeattr("DynIters") == "true", + non_dma_nodes, + ) + ) + + non_dma_nodes = list( + filter(lambda x: x not in dyn_tlastmarker_nodes, non_dma_nodes) + ) + + for node in dma_nodes: + node_inst = getCustomOp(node) + node_inst.set_nodeattr("partition_id", target_partition_id) + target_partition_id += 1 + + for node in dyn_tlastmarker_nodes: + node_inst = getCustomOp(node) + node_inst.set_nodeattr("partition_id", target_partition_id) + target_partition_id += 1 + + for node in non_dma_nodes: + # TODO: implement proper floorplanning; for now just a single partition + node_inst = getCustomOp(node) + node_inst.set_nodeattr("partition_id", target_partition_id) + + return (model, False) diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py new file mode 100644 index 0000000000000000000000000000000000000000..e4368edea717f7499481e9b1c6ac20f7d5bb5f58 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/insert_iodma.py @@ -0,0 +1,198 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from onnx import TensorProto +from onnx import helper as oh + +from finn.util.basic import get_by_name +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.transformation.general import SortGraph +import finn.core.data_layout as DataLayout +import math +import numpy as np + + +class InsertIODMA(Transformation): + """Insert DMA nodes on all inputs and outputs.""" + + def __init__(self, max_intfwidth=32): + super().__init__() + assert ( + 2 ** math.log2(max_intfwidth) == max_intfwidth + ), "max_intfwidth must be a power of 2" + self.max_intfwidth = max_intfwidth + + def apply(self, model): + # only makes sense for a pure fpgadataflow graph -- so we check! + all_nodes = list(model.graph.node) + assert all( + get_by_name(x.attribute, "backend").s.decode("UTF-8") == "fpgadataflow" + for x in all_nodes + ) + # parse streamingfclayers looking for external weights with no attached IODMA + fc_extw_nodes = list( + filter( + lambda x: x.op_type == "StreamingFCLayer_Batch" + and get_by_name(x.attribute, "mem_mode") is not None + and get_by_name(x.attribute, "mem_mode").s.decode("UTF-8") == "external" + and model.find_producer(x.input[1]) is None, + all_nodes, + ) + ) + graph_in_name = model.graph.input[0].name + first_node = model.find_consumer(graph_in_name) + graph_out_name = model.graph.output[0].name + final_node = model.find_producer(graph_out_name) + if ( + final_node.op_type == "IODMA" + and first_node.op_type == "IODMA" + and len(fc_extw_nodes) == 0 + ): + # TODO maybe check the correctness of properties + return (model, False) + else: + if final_node.op_type != "IODMA": + # check if tensor is NHWC + assert ( + model.get_tensor_layout(graph_out_name) == DataLayout.NHWC + or model.get_tensor_layout(graph_in_name) == DataLayout.NC + ), "Data layout of tensors must be NHWC or NC" + out_shape = model.get_tensor_shape(graph_out_name) + out_dtype = model.get_tensor_datatype(graph_out_name) + # determine the feasible interface width + transfer_bits = np.prod(out_shape) * out_dtype.bitwidth() + intfwidth = math.gcd(transfer_bits, self.max_intfwidth) + assert ( + intfwidth % 8 == 0 + ), "No feasible interface width for transfer size" + # get width of stream input to DMA + streamWidth = getCustomOp(final_node).get_outstream_width() + # make new buffer + final_node_out = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(final_node_out) + model.set_tensor_datatype(final_node_out.name, out_dtype) + # reroute final node output to final_node_out_name + final_node.output[0] = final_node_out.name + dma_node = oh.make_node( + "IODMA", + [final_node_out.name], + [graph_out_name], + numInputVectors=out_shape[:-1], + NumChannels=out_shape[-1], + dataType=str(out_dtype.name), + intfWidth=intfwidth, + streamWidth=streamWidth, + direction="out", + domain="finn", + backend="fpgadataflow", + ) + model.graph.node.append(dma_node) + if first_node.op_type != "IODMA": + # check if tensor is NHWC + assert ( + model.get_tensor_layout(graph_in_name) == DataLayout.NHWC + or model.get_tensor_layout(graph_in_name) == DataLayout.NC + ), "Data layout of tensors must be NHWC or NC" + in_shape = model.get_tensor_shape(graph_in_name) + in_dtype = model.get_tensor_datatype(graph_in_name) + # determine the feasible interface width + transfer_bits = np.prod(in_shape) * in_dtype.bitwidth() + intfwidth = math.gcd(transfer_bits, self.max_intfwidth) + assert ( + intfwidth % 8 == 0 + ), "No feasible interface width for transfer size" + # get width of stream output from DMA + streamWidth = getCustomOp(first_node).get_instream_width() + # make new buffer + first_node_in = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape + ) + model.graph.value_info.append(first_node_in) + model.set_tensor_datatype(first_node_in.name, in_dtype) + # reroute final node output to final_node_out_name + first_node.input[0] = first_node_in.name + dma_node = oh.make_node( + "IODMA", + [graph_in_name], + [first_node_in.name], + numInputVectors=in_shape[:-1], + NumChannels=in_shape[-1], + dataType=str(in_dtype.name), + intfWidth=intfwidth, + streamWidth=streamWidth, + direction="in", + domain="finn", + backend="fpgadataflow", + ) + model.graph.node.insert(0, dma_node) + for fc_node in fc_extw_nodes: + # check if tensor is NHWC + assert ( + model.get_tensor_layout(fc_node.input[1]) == DataLayout.NHWC + or model.get_tensor_layout(graph_in_name) == DataLayout.NC + ), "Data layout of tensors must be NHWC or NC" + fc_w_name = fc_node.input[1] + w_shape = model.get_tensor_shape(fc_w_name) + w_dtype = model.get_tensor_datatype(fc_w_name) + # determine the feasible interface width + transfer_bits = np.prod(w_shape) * w_dtype.bitwidth() + intfwidth = math.gcd(transfer_bits, self.max_intfwidth) + assert ( + intfwidth % 8 == 0 + ), "No feasible interface width for transfer size" + # calculate width of stream output from DMA + pe = get_by_name(fc_node.attribute, "PE").i + simd = get_by_name(fc_node.attribute, "SIMD").i + streamWidth = simd * pe * w_dtype.bitwidth() + # make new buffer + fc_node_in = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, w_shape + ) + model.graph.value_info.append(fc_node_in) + model.set_tensor_datatype(fc_node_in.name, w_dtype) + dma_node = oh.make_node( + "IODMA", + [fc_w_name], + [fc_node_in.name], + numInputVectors=w_shape[:-1], + NumChannels=w_shape[-1], + dataType=str(w_dtype.name), + intfWidth=intfwidth, + streamWidth=streamWidth, + direction="in", + burstMode="wrap", + domain="finn", + backend="fpgadataflow", + ) + fc_node.input[1] = fc_node_in.name + model.graph.node.insert(0, dma_node) + model = model.transform(SortGraph()) + return (model, True) diff --git a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py index 32f32ece585a93465ba32fede45d5eb606a2b0a3..04dd437af27b9fbe18b2255c20a8e4acda03b3d0 100644 --- a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py +++ b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py @@ -31,23 +31,34 @@ from onnx import helper as oh from finn.custom_op.registry import getCustomOp from finn.transformation import Transformation +from finn.util.basic import get_by_name + +import numpy as np class InsertTLastMarker(Transformation): - """Ensure that the graph is terminated with a TLastMarker node, inserting - one if necessary.""" + """Ensure that the graph is started/terminated with a TLastMarker node, inserting + one if necessary. Use constructor args to determine type of TLastMarker to be inserted. + More information available on the TLastMarker documentation. + """ - def __init__(self): + def __init__(self, both=False, external=True, dynamic=True): super().__init__() + self.dyniters = dynamic + self.external = external + self.both = both def apply(self, model): # TODO only makes sense for a pure fpgadataflow graph -- check! graph_out_name = model.graph.output[0].name final_node = model.find_producer(graph_out_name) - if final_node.op_type == "TLastMarker": - # TODO maybe check the correctness of properties - return (model, False) - else: + graph_modified = False + if final_node.op_type != "TLastMarker" and not ( + final_node.op_type == "IODMA" + and get_by_name(final_node.attribute, "direction").s.decode("UTF-8") + == "out" + ): + custom_op = getCustomOp(final_node) num_iters = int(custom_op.get_number_output_values()) stream_width = int(custom_op.get_outstream_width()) @@ -69,8 +80,51 @@ class InsertTLastMarker(Transformation): NumIters=num_iters, StreamWidth=stream_width, ElemWidth=elem_width, + DynIters=(1 if self.dyniters else 0), + Direction="out", + Protocol=("external" if self.external else "internal"), domain="finn", backend="fpgadataflow", ) model.graph.node.append(tlast_node) - return (model, True) + graph_modified = True + # if both is True, also insert marker on input + if self.both: + graph_in_name = model.graph.input[0].name + first_node = model.find_consumer(graph_in_name) + if first_node.op_type != "TLastMarker" and not ( + first_node.op_type == "IODMA" + and get_by_name(first_node.attribute, "direction").s.decode("UTF-8") + == "in" + ): + + custom_op = getCustomOp(first_node) + num_iters = np.prod(custom_op.get_folded_input_shape()[1:-1]) + stream_width = int(custom_op.get_instream_width()) + in_shape = model.get_tensor_shape(graph_in_name) + in_dtype = model.get_tensor_datatype(graph_in_name) + elem_width = in_dtype.bitwidth() + # make new buffer + first_node_in = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape + ) + model.graph.value_info.append(first_node_in) + model.set_tensor_datatype(first_node_in.name, in_dtype) + # reroute final node output to first_node_in_name + first_node.input[0] = first_node_in.name + tlast_node = oh.make_node( + "TLastMarker", + [graph_in_name], + [first_node_in.name], + NumIters=num_iters, + StreamWidth=stream_width, + ElemWidth=elem_width, + DynIters=(1 if self.dyniters else 0), + Direction="in", + Protocol=("external" if self.external else "internal"), + domain="finn", + backend="fpgadataflow", + ) + model.graph.node.insert(0, tlast_node) + graph_modified = True + return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py index a1524322ec03a4e96ef41f999144e3eed349c5af..6eae560e1191642cfaf85d92c6d0fcf644630973 100644 --- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py @@ -29,9 +29,12 @@ import os import finn.custom_op.registry as registry -from finn.transformation import Transformation from finn.util.basic import make_build_dir from finn.util.fpgadataflow import is_fpgadataflow_node +from finn.transformation import Transformation +from finn.util.basic import get_num_default_workers +import multiprocessing as mp +import copy def _codegen_single_node(node, model): @@ -66,8 +69,39 @@ class PrepareCppSim(Transformation): that contains generated C++ code that can be used to simulate node using cppsim. The subsequent transformation is CompileCppSim""" + def __init__(self, num_workers=None): + super().__init__() + if num_workers is None: + self._num_workers = get_num_default_workers() + else: + self._num_workers = num_workers + assert self._num_workers >= 0, "Number of workers must be nonnegative." + if self._num_workers == 0: + self._num_workers = mp.cpu_count() + + def prepareCppSim_node(self, node): + if is_fpgadataflow_node(node) is True: + _codegen_single_node(node, self.model) + return (node, False) + def apply(self, model): - for node in model.graph.node: - if is_fpgadataflow_node(node) is True: - _codegen_single_node(node, model) - return (model, False) + # Remove old nodes from the current model + self.model = copy.deepcopy(model) + old_nodes = [] + for i in range(len(model.graph.node)): + old_nodes.append(model.graph.node.pop()) + + # Execute transformation in parallel + with mp.Pool(self._num_workers) as p: + new_nodes_and_bool = p.map(self.prepareCppSim_node, old_nodes, chunksize=1) + + # extract nodes and check if the transformation needs to run again + # Note: .pop() had initially reversed the node order + run_again = False + for node, run in reversed(new_nodes_and_bool): + # Reattach new nodes to old model + model.graph.node.append(node) + if run is True: + run_again = True + + return (model, run_again) diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py new file mode 100644 index 0000000000000000000000000000000000000000..1d49970c819961d1794cc89e998108639ca15593 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/synth_ooc.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +from shutil import copy2 + +from finn.transformation import Transformation +from finn.util.vivado import out_of_context_synth +from finn.util.basic import make_build_dir + + +class SynthOutOfContext(Transformation): + """Run out-of-context Vivado synthesis on a stitched IP design.""" + + def __init__(self, part, clk_period_ns, clk_name="ap_clk_0"): + super().__init__() + self.part = part + self.clk_period_ns = clk_period_ns + self.clk_name = clk_name + + def apply(self, model): + def file_to_basename(x): + return os.path.basename(os.path.realpath(x)) + + vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") + assert vivado_stitch_proj_dir is not None, "Need stitched IP to run." + top_module_name = model.get_metadata_prop("wrapper_filename") + top_module_name = file_to_basename(top_module_name).strip(".v") + build_dir = make_build_dir("synth_out_of_context_") + with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: + all_verilog_srcs = f.read().split() + for file in all_verilog_srcs: + if file.endswith(".v"): + copy2(file, build_dir) + ret = out_of_context_synth( + build_dir, top_module_name, self.part, self.clk_name, self.clk_period_ns + ) + model.set_metadata_prop("res_total_ooc_synth", str(ret)) + return (model, False) diff --git a/src/finn/transformation/general.py b/src/finn/transformation/general.py index f51ffbcfd9f62e06bf4942409fbb163e92ff6370..4303eb17f39a9949f5729e895e449bbb6a633033 100644 --- a/src/finn/transformation/general.py +++ b/src/finn/transformation/general.py @@ -28,6 +28,56 @@ import finn.util.basic as util from finn.transformation import Transformation +from toposort import toposort_flatten + + +class RemoveUnusedTensors(Transformation): + """Remove any unused tensors in the graph by removing any initializers, + ValueInfo and tensor annotations associated with it. Unused tensors do not + appear as any input/output for any graph nodes. + """ + + def apply(self, model): + graph_modified = False + onnx_graph = model.model.graph + # build a set of tensors that we actually use in the graph nodes + used_tensors = set() + for node in model.graph.node: + for i in node.input: + used_tensors.add(i) + for o in node.output: + used_tensors.add(o) + # remove initializers, value_info and annotations that are not in the + # used set of tensors, as determined by the graph node i/o + for init in onnx_graph.initializer: + if init.name not in used_tensors: + onnx_graph.initializer.remove(init) + graph_modified = True + for vi in onnx_graph.value_info: + if vi.name not in used_tensors: + onnx_graph.value_info.remove(vi) + graph_modified = True + for qa in onnx_graph.quantization_annotation: + if qa.tensor_name not in used_tensors: + onnx_graph.quantization_annotation.remove(qa) + graph_modified = True + + return (model, graph_modified) + + +class RemoveStaticGraphInputs(Transformation): + "Remove any top-level graph inputs that have initializers." + + def apply(self, model): + graph_modified = False + for i in model.graph.input: + if model.get_initializer(i.name) is not None: + # move ValueInfo to internal (value_info) container + model.graph.value_info.append(i) + model.graph.input.remove(i) + graph_modified = True + + return (model, graph_modified) class GiveUniqueNodeNames(Transformation): @@ -104,11 +154,13 @@ class GiveUniqueParameterTensors(Transformation): # first occurance seen_parameters += [node_input] continue - + new_param_name = model.make_new_valueinfo_name() model.set_initializer(new_param_name, input_init) - model.set_tensor_datatype(new_param_name, model.get_tensor_datatype(node_input)) + model.set_tensor_datatype( + new_param_name, model.get_tensor_datatype(node_input) + ) # point node input to new tensor n.input[input_idx] = new_param_name @@ -116,6 +168,55 @@ class GiveUniqueParameterTensors(Transformation): return (model, graph_modified) +class SortGraph(Transformation): + """ Returns the model with its node list sorted topologically. + Any ONNX graph to be executed must have a topologically sorted node list, + as dictated by the ONNX standard. + """ + + # Notes on SortGraph performance: + # benchmark in tests/transformation/test_sort_graph.py + # The algorithm doesn't move initializers so its performance should only depend on + # the number of nodes + # + # Relative order of magnitudes for time per step: + # - Gather graph structure: base + # - Sort nodes: 0.1 of base + # - Remove and insert in order : 0.001 of base + # + # Notes: + # Remove nodes and insert them in order: + # Probably this is faster than copying initializers and more robust in general + + def apply(self, model): + # Gather graph structure + graph_dependencies = {} + node_list = [ + n for n in model.graph.node + ] # I also need the list to remove the nodes + for node_idx, n in enumerate(node_list): + node_pred = model.find_direct_predecessors(n) + if node_pred is None: + # Will also eliminate nodes that are floating around for some reason + continue + + node_dependencies = [node_list.index(pred) for pred in node_pred] + graph_dependencies[node_idx] = set(node_dependencies) + + # Sort nodes + sorted_node_indexes = toposort_flatten(graph_dependencies) + + # Remove nodes and insert them in order + # Can't remove nodes before if I want to use model.find_direct_predecessors() + for n in node_list: + model.graph.node.remove(n) + + for new_idx, sorted_idx in enumerate(sorted_node_indexes): + model.graph.node.insert(new_idx, node_list[sorted_idx]) + + return model, False + + class ConvertSubToAdd(Transformation): """Convert subtract-a-constant nodes to add-a-constant nodes.""" diff --git a/src/finn/transformation/infer_data_layouts.py b/src/finn/transformation/infer_data_layouts.py index 9ac75578ffb911cc44cfddc2b2119b55e6abf2dd..e7a6b88239a1735d5379e165333f8356ae6f88a1 100644 --- a/src/finn/transformation/infer_data_layouts.py +++ b/src/finn/transformation/infer_data_layouts.py @@ -38,7 +38,7 @@ def _dims_to_layout(model, node, ndims): return DataLayout.NC else: if node.domain == "finn": - if node.op_type == "MultiThreshold": + if node.op_type == "MultiThreshold" or node.op_type == "QuantAvgPool2d": mt_inst = registry.getCustomOp(node) layout = mt_inst.get_nodeattr("data_layout") if layout == "NHWC" and ndims == 4: diff --git a/src/finn/transformation/infer_datatypes.py b/src/finn/transformation/infer_datatypes.py index 1acd4e3abe2d77248810cf15c15475e806a3bd32..39b7a787be8c725e7b6d474757dd96fc4848dfe0 100644 --- a/src/finn/transformation/infer_datatypes.py +++ b/src/finn/transformation/infer_datatypes.py @@ -71,7 +71,13 @@ def _infer_node_datatype(model, node): else: # unknown, assume node produces float32 outputs for o in node.output: - model.set_tensor_datatype(o, DataType.FLOAT32) + # check if output datatype is already set to a value != FLOAT32 + odtype = model.get_tensor_datatype(o) + if odtype is not None and odtype != DataType.FLOAT32: + # don't change data type + model.set_tensor_datatype(o, odtype) + else: + model.set_tensor_datatype(o, DataType.FLOAT32) # compare old and new output dtypes to see if anything changed new_odtypes = list(map(lambda x: model.get_tensor_datatype(x), node.output)) graph_modified = new_odtypes != odtypes diff --git a/src/finn/transformation/lower_convs_to_matmul.py b/src/finn/transformation/lower_convs_to_matmul.py index 0cee8eae1f26ba8f78c868343883efa33ba47b16..e5a1f778d0cac48925ecd97ae8b970f7bdab9c4f 100644 --- a/src/finn/transformation/lower_convs_to_matmul.py +++ b/src/finn/transformation/lower_convs_to_matmul.py @@ -104,14 +104,19 @@ class LowerConvsToMatMul(Transformation): inp_trans_out = inp_trans_out.name model.set_tensor_datatype(inp_trans_out, idt) - im2col_out = helper.make_tensor_value_info( - model.make_new_valueinfo_name(), - TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ifm_ch * k * k), - ) - graph.value_info.append(im2col_out) - im2col_out = im2col_out.name - model.set_tensor_datatype(im2col_out, idt) + need_im2col = True + if k == 1 and pad == 0 and stride == 1: + need_im2col = False + + if need_im2col: + im2col_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (1, ofm_dim, ofm_dim, ifm_ch * k * k), + ) + graph.value_info.append(im2col_out) + im2col_out = im2col_out.name + model.set_tensor_datatype(im2col_out, idt) matmul_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), @@ -128,20 +133,24 @@ class LowerConvsToMatMul(Transformation): "Transpose", [cnv_input], [inp_trans_out], perm=[0, 2, 3, 1] ) # lower input tensor - im2col_node = helper.make_node( - "Im2Col", - [inp_trans_out], - [im2col_out], - domain="finn", - stride=stride, - kernel_size=k, - pad_amount=pad, - input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), - dw=dw, - ) + matmul_input = inp_trans_out + if need_im2col: + matmul_input = im2col_out + im2col_node = helper.make_node( + "Im2Col", + [inp_trans_out], + [im2col_out], + domain="finn", + stride=stride, + kernel_size=k, + pad_amount=pad, + input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), + depthwise=dw, + ) + # do matmul matmul_node = helper.make_node( - "MatMul", [im2col_out, weight_name], [matmul_out] + "MatMul", [matmul_input, weight_name], [matmul_out] ) # NHWC -> NCHW out_trans_node = helper.make_node( @@ -149,9 +158,13 @@ class LowerConvsToMatMul(Transformation): ) # insert nodes where the conv is to preserve topological ordering graph.node.insert(node_ind, inp_trans_node) - graph.node.insert(node_ind + 1, im2col_node) - graph.node.insert(node_ind + 2, matmul_node) - graph.node.insert(node_ind + 3, out_trans_node) + if need_im2col: + graph.node.insert(node_ind + 1, im2col_node) + graph.node.insert(node_ind + 2, matmul_node) + graph.node.insert(node_ind + 3, out_trans_node) + else: + graph.node.insert(node_ind + 1, matmul_node) + graph.node.insert(node_ind + 2, out_trans_node) # remove old nodes graph.node.remove(n) model = model.transform(InferShapes()) diff --git a/src/finn/transformation/merge_onnx_models.py b/src/finn/transformation/merge_onnx_models.py new file mode 100644 index 0000000000000000000000000000000000000000..5dc6127ed189311c72a119932394aca4745e3608 --- /dev/null +++ b/src/finn/transformation/merge_onnx_models.py @@ -0,0 +1,222 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import copy +import warnings +from onnx import helper + +from finn.transformation import Transformation +from finn.core.modelwrapper import ModelWrapper +import finn.util.basic as util +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import ( + GiveReadableTensorNames, + GiveUniqueNodeNames, + GiveUniqueParameterTensors, +) + + +class MergeONNXModels(Transformation): + """Merges two models. The model passed in the transformation will be inserted before + the model the transformation is applied on, the resulting model is returned. + This transformation will try to connect graph.output[0] of the pre model and + graph.input[0] of the post model. + If more than one input or output exists, a warning is raised.""" + + def __init__(self, pre_model): + super().__init__() + # use deep copy of model that should be inserted in the beginning of + # the other model to ensure that it stays unchanged + self.pre_model = copy.deepcopy(pre_model) + + def apply(self, model): + graph_modified = False + pre_model = self.pre_model + post_model = copy.deepcopy(model) + + # check for dynamic outputs of pre model + dyn_outp = [] + for outp in pre_model.graph.output: + init_val = pre_model.get_initializer(outp.name) + if init_val is None: + dyn_outp.append(outp) + + if len(dyn_outp) != 1: + warnings.warn( + "The pre model has more than one dynamic output! The transformation " + "tries to connect the first dynamic output to the first dynamic input " + "of the post model." + ) + + # check for dynamic inputs of post model + dyn_inp = [] + for inp in post_model.graph.input: + init_val = post_model.get_initializer(inp.name) + if init_val is None: + dyn_inp.append(inp) + + if len(dyn_inp) != 1: + warnings.warn( + "The post model has more than one dynamic input! The transformation " + "tries to connect the first dynamic input to the first dynamic output " + "of the pre model." + ) + + # erase all node names to avoid conflict + for n in pre_model.graph.node: + n.name = "" + for n in post_model.graph.node: + n.name = "" + + # randomize all tensor names + names1 = pre_model.get_all_tensor_names() + names2 = post_model.get_all_tensor_names() + used_names = names1 + names2 + + # pre_model + for tensor_name in names1: + new_name = util.random_string() + while new_name in used_names: + new_name = util.random_string() + pre_model.rename_tensor(tensor_name, new_name) + used_names.append(new_name) + + # post_model + for tensor in names2: + new_name = util.random_string() + while new_name in used_names: + new_name = util.random_string() + post_model.rename_tensor(tensor_name, new_name) + used_names.append(new_name) + + # check if models can be merged + output_model_a = dyn_outp[0].name + input_model_b = dyn_inp[0].name + output_a_shape = pre_model.get_tensor_shape(output_model_a) + input_b_shape = post_model.get_tensor_shape(input_model_b) + assert ( + output_a_shape == input_b_shape + ), "Models can't be merged! Shapes don't match." + + # connect output of one model to input of the other + for n in pre_model.graph.node: + if output_model_a == n.output[0]: + n.output[0] = input_model_b + + # extract information for new model + + # nodes + node_list_a = pre_model.graph.node + node_list_b = post_model.graph.node + + node_list = node_list_a + for node in node_list_b: + node_list.append(node) + + # in and output + inp = pre_model.graph.input[0] + outp = post_model.graph.output[0] + + # create new graph and model + new_graph = helper.make_graph( + nodes=node_list, + name="fuse-graph", + inputs=[inp], + outputs=[outp], + value_info=[], + ) + + new_model = helper.make_model(new_graph, producer_name="fuse_model") + new_model = ModelWrapper(new_model) + + # add value info from both models to new model + # pre model + vi_pre = [x for x in pre_model.graph.input] + vi_pre += [x for x in pre_model.graph.output] + vi_pre += [x for x in pre_model.graph.value_info] + for vi in vi_pre: + # preserve intializers, quantization/sparsity annotation, etc. + # initializer + init_val = pre_model.get_initializer(vi.name) + if init_val is not None: + new_model.set_initializer(vi.name, init_val) + # FINN datatype + dtype = pre_model.get_tensor_datatype(vi.name) + new_model.set_tensor_datatype(vi.name, dtype) + # data layout + data_layout = pre_model.get_tensor_layout(vi.name) + if data_layout is not None: + new_model.set_tensor_layout(vi.name, data_layout) + # sparsity + sparsity = pre_model.get_tensor_sparsity(vi.name) + if sparsity is not None: + new_model.set_tensor_sparsity(vi.name, sparsity) + # graph input should not be part of graph.value_info, so don't insert + # if current vi == inp, but the quantization annotation is preserved + if vi == inp: + continue + new_model.graph.value_info.append(vi) + + # post model + vi_model = [x for x in post_model.graph.input] + vi_model += [x for x in post_model.graph.output] + vi_model += [x for x in post_model.graph.value_info] + for vi in vi_model: + # preserve intializers, quantization/sparsity annotation, etc. + # initializer + init_val = post_model.get_initializer(vi.name) + if init_val is not None: + new_model.set_initializer(vi.name, init_val) + # FINN datatype + dtype = post_model.get_tensor_datatype(vi.name) + new_model.set_tensor_datatype(vi.name, dtype) + # data layout + data_layout = post_model.get_tensor_layout(vi.name) + if data_layout is not None: + new_model.set_tensor_layout(vi.name, data_layout) + # sparsity + sparsity = post_model.get_tensor_sparsity(vi.name) + if sparsity is not None: + new_model.set_tensor_sparsity(vi.name, sparsity) + # graph output should not be part of graph.value_info, so don't insert + # if current vi == outp, but the quantization annotation is preserved + if vi == outp: + continue + new_model.graph.value_info.append(vi) + + # tidy-up new model + model = new_model + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveUniqueParameterTensors()) + model = model.transform(GiveReadableTensorNames()) + + return (model, graph_modified) diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py index c9c73fa4c8303ee28bc1cc6aee879d633740e01e..d7686eaadcbc800542ab96c5f45145857412b773 100644 --- a/src/finn/transformation/streamline/__init__.py +++ b/src/finn/transformation/streamline/__init__.py @@ -41,6 +41,7 @@ from finn.transformation.streamline.absorb import ( FactorOutMulSignMagnitude, Absorb1BitMulIntoMatMul, Absorb1BitMulIntoConv, + AbsorbSignBiasIntoMultiThreshold, ) from finn.transformation.streamline.collapse_repeated import ( @@ -52,13 +53,14 @@ from finn.transformation.streamline.reorder import ( MoveAddPastMul, MoveScalarMulPastMatMul, MoveScalarAddPastMatMul, - MoveScalarAddPastConv, + MoveAddPastConv, MoveScalarMulPastConv, ) from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds from finn.transformation.streamline.sign_to_thres import ConvertSignToThres from finn.transformation.batchnorm_to_affine import BatchNormToAffine +from finn.transformation.streamline.remove import RemoveIdentityOps class Streamline(Transformation): @@ -70,9 +72,10 @@ class Streamline(Transformation): ConvertDivToMul(), BatchNormToAffine(), ConvertSignToThres(), + AbsorbSignBiasIntoMultiThreshold(), MoveAddPastMul(), MoveScalarAddPastMatMul(), - MoveScalarAddPastConv(), + MoveAddPastConv(), MoveScalarMulPastMatMul(), MoveScalarMulPastConv(), MoveAddPastMul(), @@ -87,6 +90,7 @@ class Streamline(Transformation): ] for trn in streamline_transformations: model = model.transform(trn) + model = model.transform(RemoveIdentityOps()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index dbcf97361017144174f9fbfca35a84361b5abd26..8398a277443530e84632d26fbfca6d90ea4b0b9e 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -28,14 +28,81 @@ import numpy as np from onnx import helper as oh +import warnings from finn.core.datatype import DataType +import finn.core.data_layout as DataLayout from finn.transformation import Transformation from finn.util.basic import get_by_name from finn.custom_op.registry import getCustomOp +from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_datatypes import InferDataTypes +class AbsorbSignBiasIntoMultiThreshold(Transformation): + """Absorb scalar bias originating from signed int export back into + MultiThreshold and re-evaluate the output datatype.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + # search for (MultiThreshold, Add) pair + node_ind += 1 + if ( + n.op_type == "MultiThreshold" + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + consumer = model.find_consumer(n.output[0]) + if consumer is not None and consumer.op_type == "Add": + mt_node = n + add_node = consumer + threshold_name = mt_node.input[1] + add_weight_name = add_node.input[1] + T = model.get_initializer(threshold_name) + A = model.get_initializer(add_weight_name) + if (A is None) or (T is None): + warnings.warn("Threshold or add bias not constant, skipping") + continue + end_name = add_node.output[0] + # we can only absorb scalar adds + is_scalar = A.ndim == 0 or all(x == 1 for x in A.shape) + if not is_scalar: + continue + bias = A.flatten()[0] + # set MultiThreshold bias property + mt_inst = getCustomOp(mt_node) + bias += mt_inst.get_nodeattr("out_bias") + mt_inst.set_nodeattr("out_bias", bias) + graph_modified = True + # compute new DataType for MultiThreshold output + steps = T.shape[-1] + new_min = bias + new_max = steps + bias + odt = DataType.get_smallest_possible(steps).name.replace( + "UINT", "INT" + ) + odt = DataType[odt] + assert odt.allowed(new_max) and odt.allowed( + new_min + ), """Could + not compute new MultiThreshold DataType (min = %d max = %d)""" % ( + new_min, + new_max, + ) + mt_inst.set_nodeattr("out_dtype", odt.name) + # remove Add node, rewire MultiThreshold + graph.node.remove(add_node) + mt_node.output[0] = end_name + # set datatype + model.set_tensor_datatype(end_name, odt) + if graph_modified: + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class AbsorbAddIntoMultiThreshold(Transformation): """Absorb preceding Add ops into MultiThreshold by updating the threshold values. Only scalar/1D add vectors can be absorbed.""" @@ -250,11 +317,13 @@ class AbsorbTransposeIntoMultiThreshold(Transformation): graph_modified = False for n in graph.node: node_ind += 1 - if n.op_type == "Transpose": + if n.op_type == "Transpose" and not model.is_fork_node(n): perms = list(get_by_name(n.attribute, "perm").ints) if perms == [0, 3, 1, 2]: mt_cand = model.find_consumer(n.output[0]) - if mt_cand.op_type == "MultiThreshold": + if mt_cand.op_type == "MultiThreshold" and not model.is_fork_node( + mt_cand + ): final_t_cand = model.find_consumer(mt_cand.output[0]) if final_t_cand.op_type == "Transpose": perms = list( @@ -290,3 +359,182 @@ class AbsorbTransposeIntoMultiThreshold(Transformation): if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified) + + +class AbsorbTransposeIntoFlatten(Transformation): + """Absorb transpose node into succeeding flatten node, if H=W=1 and the first + dimension stays the same. Can also be applied if flatten is implemented implicitly + by a reshape node with shape [1, -1] and the first input dimension is 1""" + + def apply(self, model): + graph = model.graph + graph_modified = False + node_ind = 0 + for n in graph.node: + node_ind += 1 + if ( + n.op_type == "Reshape" + and (model.get_initializer(n.input[1]) == [1, -1]).all() + ) or n.op_type == "Flatten": + prod = model.find_producer(n.input[0]) + if ( + prod is not None + and prod.op_type == "Transpose" + # we ensure that the first dimension is not changed from the + # transpose operation + and get_by_name(prod.attribute, "perm").ints[0] == 0 + ): + data_layout = model.get_tensor_layout(prod.input[0]) + # check for the data layout to interpret input shape correctly + if data_layout is None: + warnings.warn( + """Data layout for input tensor of Transpose node is not set. + To use AbsorbTransposeIntoFlatten transformation + please set tensor data layout.""" + ) + continue + elif data_layout == DataLayout.NCHW: + (b, c, h, w) = model.get_tensor_shape(prod.input[0]) + # if h=w=1 the transposition can be absorbed, otherwise + # the absorption would lead to an error in the behavior + if h != 1 or w != 1: + continue + # the flatten node from onnx keeps by default the first + # dim and flattens the rest, that is why this transformation + # can only work with b != 1 if the model contains already a + # flatten node and not a reshape node with shape = [1, -1]. + # If the first dim of the input tensor is not 1, flatten and + # reshape (with shape = [1, -1]) would lead to different results + if n.op_type == "Reshape" and b != 1: + continue + elif data_layout == DataLayout.NHWC: + (b, h, w, c) = model.get_tensor_shape(prod.input[0]) + if h != 1 or w != 1: + continue + if n.op_type == "Reshape" and b != 1: + continue + # create single flatten node and remove obsolete nodes + node = oh.make_node("Flatten", [prod.input[0]], [n.output[0]]) + graph.node.remove(n) + graph.node.remove(prod) + graph.node.insert(node_ind, node) + graph_modified = True + if graph_modified: + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + +class AbsorbScalarMulIntoTopK(Transformation): + """Absorb a mul node into a suceeding topk node if the mul is scalar.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if n.op_type == "TopK": + prod = model.find_producer(n.input[0]) + if prod is not None and prod.op_type == "Mul": + prod_input = prod.input[0] + param_name = prod.input[1] + A = model.get_initializer(param_name) + if A is None: + warnings.warn("Param is not constant, skipping") + continue + if all(x == 1 for x in A.shape) and A > 0: + # if the mul is scalar and positive, we can just delete the + # mul node and rewire the top k node. Because the top k node + # works with probabilities and their relation to each other + # the relation doesn't change if every value is multiplied + # with a scalar + graph.node.remove(prod) + n.input[0] = prod_input + # to avoid error the dataype is set to float32 + model.set_tensor_datatype(n.input[0], DataType.FLOAT32) + graph_modified = True + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + +class AbsorbConsecutiveTransposes(Transformation): + """Remove (Transpose -> Transpose) patterns when the input and output + of the pattern have the same layout.""" + + def Are_opposite_permutations(self, perms1, perms2): + if len(perms1) != len(perms2): + return False + assert 0 <= max(perms2) < len(perms2), "invalid permutation" + assert 0 <= max(perms1) < len(perms1), "invalid permutation" + + for i, p in enumerate(perms2): + if perms1[p] != i: + return False + + return True + + def apply(self, model): + graph = model.graph + graph_modified = False + for n in graph.node: + if n.op_type == "Transpose": + if model.is_fork_node(n): + next_nodes = model.find_direct_successors(n) + perms1 = list(get_by_name(n.attribute, "perm").ints) + + # check if all nodes after fork are opposite transposes + all_opposite_transposes = True + for next_node in next_nodes: + if next_node is not None and next_node.op_type == "Transpose": + perms2 = list(get_by_name(next_node.attribute, "perm").ints) + if not self.Are_opposite_permutations(perms1, perms2): + all_opposite_transposes = False + break + else: + all_opposite_transposes = False + break + + if not all_opposite_transposes: + continue + + prod = model.find_producer(n.input[0]) + for next_node in next_nodes: + # connect next_node's consumer input to n's producer output + # TODO implement this to allow for forks as producers and + # joins as consumers + cons = model.find_consumer(next_node.output[0]) + cons.input[0] = prod.output[0] + + # remove consumer transpose + graph.node.remove(next_node) + + # remove producer transpose + graph.node.remove(n) + graph_modified = True + + else: + next_node = model.find_consumer(n.output[0]) + if next_node is not None and next_node.op_type == "Transpose": + perms1 = list(get_by_name(n.attribute, "perm").ints) + perms2 = list(get_by_name(next_node.attribute, "perm").ints) + if self.Are_opposite_permutations(perms1, perms2): + + # connect next_node's consumer input to n's producer output + # TODO implement this to allow for forks as producers + consumers = model.find_direct_successors(next_node) + prod = model.find_producer(n.input[0]) + for cons in consumers: + for cons_in in cons.input: + if cons_in == next_node.output[0]: + prod.output[0] = cons_in + break + # remove both transposes + graph.node.remove(n) + graph.node.remove(next_node) + + graph_modified = True + if graph_modified: + model = model.transform(InferDataTypes()) + return (model, graph_modified) diff --git a/src/finn/transformation/streamline/collapse_repeated.py b/src/finn/transformation/streamline/collapse_repeated.py index 67824ad4f633983b93e3178d03118927a1ddd85b..769bed841ce07c1c9c62f762de4b2c0937a6d68f 100644 --- a/src/finn/transformation/streamline/collapse_repeated.py +++ b/src/finn/transformation/streamline/collapse_repeated.py @@ -30,6 +30,7 @@ from onnx import helper as oh from finn.transformation import Transformation from finn.transformation.infer_shapes import InferShapes +from finn.core.datatype import DataType class CollapseRepeatedOp(Transformation): @@ -83,6 +84,9 @@ class CollapseRepeatedOp(Transformation): graph.node.insert(node_ind, new_node) # replace parameter value model.set_initializer(new_node_param_name, new_param) + # be conservative with param/output DataTypes + model.set_tensor_datatype(new_node_param_name, DataType.FLOAT32) + model.set_tensor_datatype(end_name, DataType.FLOAT32) # remove old nodes graph.node.remove(n) graph.node.remove(consumer) diff --git a/src/finn/transformation/streamline/remove.py b/src/finn/transformation/streamline/remove.py new file mode 100644 index 0000000000000000000000000000000000000000..ddc4233ddafbc70c4d20d316ea72ea6bba1b82a8 --- /dev/null +++ b/src/finn/transformation/streamline/remove.py @@ -0,0 +1,69 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from finn.transformation import Transformation +from finn.transformation.infer_shapes import InferShapes +import numpy as np + +class RemoveIdentityOps(Transformation): + """Remove identity ops like Add/Sub with zero or Mul/Div with one""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if ( + n.op_type in ["Add", "Sub"] + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + A = model.get_initializer(n.input[1]) + if A is not None and (A == np.zeros_like(A)).all(): + producer = model.find_producer(n.input[0]) + # remove node and wire output tensor to + # output of producer node + producer.output[0] = n.output[0] + graph.node.remove(n) + + elif ( + n.op_type in ["Mul", "Div"] + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + A = model.get_initializer(n.input[1]) + if A is not None and (A == np.ones_like(A)).all(): + producer = model.find_producer(n.input[0]) + # remove node and wire output tensor to + # output of producer node + producer.output[0] = n.output[0] + graph.node.remove(n) + model = model.transform(InferShapes()) + return (model, graph_modified) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 0b6259a61d3eb67b7b38d4c6939019ce2893a875..2b03532ce3ba7d5159e5ae57e61c2af9c8c37fce 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -27,12 +27,19 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np +import warnings from onnx import helper as oh +from onnx import TensorProto from finn.transformation import Transformation +import finn.core.data_layout as DataLayout from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.core.datatype import DataType from finn.core.onnx_exec import execute_node from finn.util.basic import get_by_name +from finn.custom_op.registry import getCustomOp class MoveAddPastMul(Transformation): @@ -65,8 +72,11 @@ class MoveAddPastMul(Transformation): add_weight_name = n.input[1] A = model.get_initializer(mul_weight_name) B = model.get_initializer(add_weight_name) - assert A is not None, "Initializer for mul weights is not set." - assert B is not None, "Initializer for add weights is not set." + if (A is None) or (B is None): + warnings.warn( + "Mul or add does not have constant params, skipping" + ) + continue start_name = n.input[0] middle_name = n.output[0] end_name = consumer.output[0] @@ -121,8 +131,9 @@ class MoveScalarMulPastMatMul(Transformation): matmul_weight_name = consumer.input[1] A = model.get_initializer(mul_weight_name) W = model.get_initializer(matmul_weight_name) - assert A is not None, "Initializer for mul weights is not set." - assert W is not None, "Initializer for matmul weights is not set." + if (A is None) or (W is None): + warnings.warn("MatMul or Mul params are not constant, skipping") + continue start_name = n.input[0] middle_name = n.output[0] end_name = consumer.output[0] @@ -178,8 +189,9 @@ class MoveScalarAddPastMatMul(Transformation): matmul_weight_name = consumer.input[1] A = model.get_initializer(add_weight_name) W = model.get_initializer(matmul_weight_name) - assert A is not None, "Initializer for add weights is not set." - assert W is not None, "Initializer for matmul weights is not set." + if (A is None) or (W is None): + warnings.warn("MatMul or Add params are not constant, skipping") + continue start_name = n.input[0] middle_name = n.output[0] end_name = consumer.output[0] @@ -213,8 +225,8 @@ class MoveScalarAddPastMatMul(Transformation): return (model, graph_modified) -class MoveScalarAddPastConv(Transformation): - """Move scalar add operations past conv operations. We want to have adds +class MoveAddPastConv(Transformation): + """Move scalar and channelwise add operations past conv operations. We want to have adds next to each other such that they can be collapsed into a single add.""" def apply(self, model): @@ -239,8 +251,12 @@ class MoveScalarAddPastConv(Transformation): add_weight_name = n.input[1] conv_in_name = consumer.input[0] conv_in_shape = model.get_tensor_shape(conv_in_name) + # assume datalayout to be NCHW + channels = conv_in_shape[1] A = model.get_initializer(add_weight_name) - assert A is not None, "Initializer for add weights is not set." + if A is None: + warnings.warn("Add param is not constant, skipping") + continue start_name = n.input[0] end_name = consumer.output[0] conv_out_shape = model.get_tensor_shape(end_name) @@ -249,11 +265,17 @@ class MoveScalarAddPastConv(Transformation): pads = list(get_by_name(consumer.attribute, "pads").ints) if sum(pads) == 0: using_padding = False - if all(x == 1 for x in A.shape) and not using_padding: + if ( + all(x == 1 for x in A.shape) or A.shape == (1, channels, 1, 1) + ) and not using_padding: # create a tensor filled with the add constant, in # the shape expected by the convolution conv_in_const = np.zeros(conv_in_shape, dtype=np.float32) - conv_in_const.fill(A.item()) + if A.shape == (1, channels, 1, 1): + for ch in range(channels): + conv_in_const[0][ch].fill(A[0][ch].item()) + else: + conv_in_const.fill(A.item()) # create an execution context and put in const input exec_ctx = model.make_empty_exec_context() exec_ctx[conv_in_name] = conv_in_const @@ -308,7 +330,9 @@ class MoveScalarMulPastConv(Transformation): ): mul_weight_name = n.input[1] A = model.get_initializer(mul_weight_name) - assert A is not None, "Initializer for mul weights is not set." + if A is None: + warnings.warn("Mul param is not constant, skipping") + continue conv_node = consumer mul_node = n start_name = mul_node.input[0] @@ -336,6 +360,71 @@ class MoveScalarMulPastConv(Transformation): return (model, graph_modified) +class MoveMulPastDWConv(Transformation): + """Move channelwise mul operations past depthwise conv operations. We want to have muls + next to each other such that they can be collapsed into a single mul.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if ( + n.op_type == "Mul" + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + consumer = model.find_consumer(n.output[0]) + if ( + consumer is not None + and consumer.op_type == "Conv" + and not model.is_join_node(consumer) + ): + mul_weight_name = n.input[1] + A = model.get_initializer(mul_weight_name) + if A is None: + warnings.warn( + """Mul weight tensor is not set. If it is a constant, + please use set_initializer to set the tensor.""" + ) + continue + conv_node = consumer + mul_node = n + start_name = mul_node.input[0] + conv_in_name = conv_node.input[0] + conv_in_shape = model.get_tensor_shape(conv_in_name) + ifm_ch = conv_in_shape[1] + group_attribute = get_by_name(consumer.attribute, "group") + if group_attribute is None: + continue + group_attribute = group_attribute.i + conv_out_name = conv_node.output[0] + conv_out_shape = model.get_tensor_shape(conv_out_name) + if A.shape == (1, ifm_ch, 1, 1) and ifm_ch == group_attribute: + # if the mul is channelwise and conv is depthwise, + # we can simply swap the order of ops + # rewire mul input to be conv input + conv_node.input[0] = start_name + model.set_tensor_shape(start_name, conv_in_shape) + model.set_tensor_datatype(start_name, DataType.FLOAT32) + # use old conv input tensor as conv output + conv_node.output[0] = conv_in_name + model.set_tensor_shape(conv_in_name, conv_out_shape) + model.set_tensor_datatype(conv_in_name, DataType.FLOAT32) + # use new conv output as new mul node input + mul_node.input[0] = conv_in_name + # use old conv output as new mul node output + mul_node.output[0] = conv_out_name + model.set_tensor_datatype(conv_out_name, DataType.FLOAT32) + # move mul node past conv node + graph.node.remove(mul_node) + graph.node.insert(node_ind, mul_node) + graph_modified = True + model = model.transform(InferShapes()) + return (model, graph_modified) + + class MoveLinearPastEltwiseAdd(Transformation): """Move linear operations (mul, add) past elementwise add operations where possible. Specifically,matches and transforms the following patterns: @@ -531,3 +620,279 @@ class MoveMulPastFork(MoveOpPastFork): class MoveLinearPastFork(MoveOpPastFork): def __init__(self): super().__init__(["Add", "Mul"]) + + +class MoveMaxPoolPastMultiThreshold(Transformation): + """Move MaxPool nodes past MultiThreshold nodes on linear segments of the graph.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + nodes = [n for n in graph.node] + for n in nodes: + node_ind += 1 + if n.op_type == "MaxPool" and not model.is_fork_node(n): + consumer = model.find_consumer(n.output[0]) + pads = get_by_name(n.attribute, "pads") + has_padding = False + if pads is not None: + pads = list(pads.ints) + has_padding = np.prod(pads) != 0 + if consumer is not None and consumer.op_type == "MultiThreshold": + mt_out = consumer.output[0] + mt_odt = model.get_tensor_datatype(mt_out) + if mt_odt.signed() and has_padding: + warnings.warn( + "Skipping padded MaxPool + signed-output MultiThreshold" + ) + continue + # check for non-decreasing thresholds and nonnegative + # scale factor in MultiThreshold + # otherwise we cannot do the reordering + T = model.get_initializer(consumer.input[1]) + T_sorted = np.sort(T, axis=1) + assert ( + T == T_sorted + ).all(), "MultiThreshold must have non-decreasing thresholds" + mt_inst = getCustomOp(consumer) + if mt_inst.get_nodeattr("out_scale") < 0: + warnings.warn("Skipping MultiThreshold with negative out_scale") + continue + + # remove old nodes + graph.node.remove(n) + graph.node.remove(consumer) + + # swap conections + group_in = n.input[0] + # new tensor because dims change + group_middle = model.make_new_valueinfo_name() + group_out = consumer.output[0] + + consumer.input[0] = group_in + consumer.output[0] = group_middle + + n.input[0] = group_middle + n.output[0] = group_out + + # insert them back in + graph.node.insert(node_ind - 1, consumer) + graph.node.insert(node_ind, n) + + graph_modified = True + + model = model.transform(InferShapes()) + return (model, graph_modified) + +class MoveFlattenPastTopK(Transformation): + """Move flatten node past a succeeding topk node, if the "axis" attribute in topk + is set to -1 and the data layout before the flatten is NHWC with H=W=1""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if n.op_type == "Flatten": + consumer = model.find_consumer(n.output[0]) + if consumer is not None and consumer.op_type == "TopK": + axis = get_by_name(consumer.attribute, "axis") + if axis is None or axis.i != -1: + continue + start_name = n.input[0] + data_layout = model.get_tensor_layout(start_name) + if data_layout != DataLayout.NHWC: + warnings.warn( + """Transformation can't be applied. The input + to flatten has to have DataLayout.NHWC""" + ) + continue + (b, h, w, c) = model.get_tensor_shape(start_name) + if h != 1 or w != 1: + continue + # get parameter k from topk + k = model.get_tensor_shape(consumer.output[1])[-1] + + # swap conections + # new tensor because dims change + middle_name = model.make_new_valueinfo_name() + topk_indices = oh.make_tensor_value_info( + middle_name, TensorProto.INT64, [b, h, w, k] + ) + end_name = consumer.output[1] + graph.value_info.append(topk_indices) + + # remove old nodes + graph.node.remove(n) + graph.node.remove(consumer) + + # set inputs and outputs correctly + consumer.input[0] = start_name + consumer.output[1] = middle_name + model.set_tensor_shape(consumer.output[0], (b, h, w, k)) + + n.input[0] = middle_name + n.output[0] = end_name + + # insert them back in + graph.node.insert(node_ind - 1, consumer) + graph.node.insert(node_ind, n) + + graph_modified = True + + model = model.transform(InferShapes()) + return (model, graph_modified) + +class MoveFlattenPastAffine(Transformation): + """Moves a node that implements a (1, -1) reshape past a MatMul, Mul or Add node.""" + + def apply(self, model): + graph = model.graph + graph_modified = False + node_ind = 0 + for n in graph.node: + node_ind += 1 + if ( + n.op_type == "Flatten" + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + consumer = model.find_consumer(n.output[0]) + if ( + consumer is not None + and ( + consumer.op_type == "MatMul" + or consumer.op_type == "Mul" + or consumer.op_type == "Add" + ) + and not model.is_join_node(consumer) + ): + # move flatten past operation and rewire tensors + start_name = n.input[0] + # check if datalyout is set to NHWC and H=W=1 + datalayout = model.get_tensor_layout(start_name) + if datalayout == DataLayout.NHWC: + (b, h, w, c) = model.get_tensor_shape(start_name) + if h != 1 or w != 1: + warnings.warn( + """The Transformation can only be performed if + H=W=1.""" + ) + continue + else: + warnings.warn( + """The Transformation can only be performed on + operations that operate on data layout NHWC.""" + ) + continue + middle_name = n.output[0] + end_name = consumer.output[0] + op_param_name = consumer.input[1] + A = model.get_initializer(op_param_name) + if A is None: + warnings.warn("Param is not constant, skipping") + continue + op_in_dt = model.get_tensor_datatype(consumer.input[0]) + op_out_dt = model.get_tensor_datatype(consumer.output[0]) + start_shape = model.get_tensor_shape(start_name) + dummy_in = np.random.uniform(low=0, high=1, size=(start_shape)) + + if consumer.op_type == "MatMul": + dummy_out = np.matmul(dummy_in, A) + elif consumer.op_type == "Mul": + dummy_out = dummy_in * A + elif consumer.op_type == "Add": + dummy_out = dummy_in + A + + new_op = oh.make_node( + consumer.op_type, + [start_name, op_param_name], + [middle_name], + name=consumer.name, + ) + new_flatten = oh.make_node("Flatten", [middle_name], [end_name]) + graph.node.insert(node_ind, new_op) + graph.node.insert(node_ind + 1, new_flatten) + model.set_tensor_shape(middle_name, dummy_out.shape) + # because a flatten node doesn't change the datatype we need + # only the datatype of the op node + model.set_tensor_datatype(start_name, op_in_dt) + model.set_tensor_datatype(middle_name, op_out_dt) + model.set_tensor_datatype(end_name, op_out_dt) + # set datalayout + model.set_tensor_layout(start_name, DataLayout.NHWC) + model.set_tensor_layout(middle_name, DataLayout.NHWC) + # remove old nodes + graph.node.remove(n) + graph.node.remove(consumer) + graph_modified = True + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + return (model, graph_modified) + +class MoveTransposePastScalarMul(Transformation): + """Moves a Transpose node past a scalar Mul node""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if ( + n.op_type == "Transpose" + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + consumer = model.find_consumer(n.output[0]) + if ( + consumer is not None + and consumer.op_type == "Mul" + and not model.is_join_node(consumer) + ): + mul_weight_name = consumer.input[1] + A = model.get_initializer(mul_weight_name) + if A is None: + warnings.warn("Mul param is not constant, skipping") + continue + transp_node = n + mul_node = consumer + start_name = transp_node.input[0] + middle_name = transp_node.output[0] + end_name = mul_node.output[0] + transp_in_shape = model.get_tensor_shape(start_name) + transp_out_shape = model.get_tensor_shape(middle_name) + transp_in_layout = model.get_tensor_layout(start_name) + transp_out_layout = model.get_tensor_layout(middle_name) + if transp_in_layout is None or transp_out_layout is None: + warnings.warn( + """Datalayout is not set for tensors. + Transformation can't be applied.""" + ) + continue + if all(x == 1 for x in A.shape): + # if the mul is scalar, we can simply swap the order of ops + # rewire transpose input to be mul input + mul_node.input[0] = start_name + model.set_tensor_shape(start_name, transp_in_shape) + model.set_tensor_layout(start_name, transp_in_layout) + mul_node.output[0] = middle_name + model.set_tensor_shape(middle_name, transp_in_shape) + model.set_tensor_layout(middle_name, transp_in_layout) + transp_node.input[0] = middle_name + transp_node.output[0] = end_name + model.set_tensor_shape(end_name, transp_out_shape) + model.set_tensor_layout(end_name, transp_out_layout) + graph.node.remove(transp_node) + graph.node.insert(node_ind, transp_node) + graph_modified = True + + if graph_modified is True: + model = model.transform(InferDataLayouts()) + model = model.transform(InferShapes()) + return (model, graph_modified) + diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 3880bb9591e27af5fe9d063dba2485d304e4db54..4a8277e08d3fc21e0b20668edf2ecad947b36647 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -31,6 +31,7 @@ import random import string import subprocess import tempfile +import warnings import numpy as np @@ -56,6 +57,12 @@ def get_rtlsim_trace_depth(): via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is undefined, the default value of 1 is returned. A trace depth of 1 will only show top-level signals and yield smaller .vcd files. + + The following depth values are of interest for whole-network stitched IP + rtlsim: + - level 1 shows top-level input/output streams + - level 2 shows per-layer input/output streams + - level 3 shows per full-layer I/O including FIFO count signals """ try: @@ -64,6 +71,16 @@ def get_rtlsim_trace_depth(): return 1 +def get_remote_vivado(): + """Return the address of the remote Vivado synthesis server as set by the, + REMOTE_VIVADO environment variable, otherwise return None""" + + try: + return os.environ["REMOTE_VIVADO"] + except KeyError: + return None + + def get_num_default_workers(): """Return the number of workers for parallel transformations. Controllable via the NUM_DEFAULT_WORKERS environment variable. If the env.var. is @@ -89,6 +106,25 @@ def get_finn_root(): ) +def get_execution_error_thresh(): + "Return the max error that is allowed for rounding in FINN execution." + try: + return float(os.environ["ERROR_THRESH"]) + except KeyError: + return 1e-2 + + +def get_sanitize_quant_tensors(): + """Return whether tensors with quantization annotations should be sanitized. + Enabled by default, disabling will yield faster ONNX execution but may give + incorrect results. Use with caution.""" + try: + return int(os.environ["SANITIZE_QUANT_TENSORS"]) + except KeyError: + # enabled by default + return 1 + + def make_build_dir(prefix=""): """Creates a temporary folder with given prefix to be used as a build dir. Use this function instead of tempfile.mkdtemp to ensure any generated files @@ -248,6 +284,69 @@ def calculate_signed_dot_prod_range(dt_a, dt_b, len): return (min_prod, max_prod) +def sanitize_quant_values(model, node_tensors, execution_context, check_values=False): + """ Sanitize given list of tensors in execution_context by rounding values + that are supposed to be integers (as indicated by their quantization + annotation). Will raise an assertion if the amount of rounding is too large. + Returns the sanitized execution context. + + If check_values is specified, an extra DataType.allowed() check will be + performed on any rounded tensors. + + Background: + FINN uses floating point tensors as a carrier data type to represent + integers. Floating point arithmetic can introduce rounding errors, e.g. + (int_num * float_scale) / float_scale is not always equal to int_num. + We use this function to ensure that the values that are supposed to be + integers are indeed integers. + """ + + for tensor in node_tensors: + dtype = model.get_tensor_datatype(tensor) + # floats don't need sanitization, skip to next + # introduces less quicker runtime + if dtype == DataType.FLOAT32: + continue + current_values = execution_context[tensor] + updated_values = current_values + has_to_be_rounded = False + # TODO: vectorize with numpy + for value in np.nditer(current_values): + if not dtype.allowed(value): + has_to_be_rounded = True + break + if has_to_be_rounded: + updated_values = np.round(current_values) + warnings.warn( + "The values of tensor {} can't be represented " + "with the set FINN datatype ({}), they will be rounded to match the " + "FINN datatype.".format(tensor, dtype) + ) + # check if rounded values are not too far from original values + max_error = max(np.abs(current_values - updated_values).flatten()) + if max_error <= get_execution_error_thresh(): + if check_values is True: + # check again if values can now be represented with set finn datatype + # TODO: vectorize with numpy + for value in np.nditer(updated_values): + if not dtype.allowed(value): + raise Exception( + """Values can't be represented with set + finn datatype ({}) for input {}""".format( + dtype, tensor + ) + ) + execution_context[tensor] = updated_values + else: + raise Exception( + """Rounding error is too high to match set FINN + datatype ({}) for input {}""".format( + dtype, tensor + ) + ) + return execution_context + + class CppBuilder: """Builds the g++ compiler command to produces the executable of the c++ code in code_gen_dir which is passed to the function build() of this class.""" diff --git a/src/finn/util/create.py b/src/finn/util/create.py new file mode 100644 index 0000000000000000000000000000000000000000..853cdd0d44a05426b34bf1db3caa58d9289b2e9e --- /dev/null +++ b/src/finn/util/create.py @@ -0,0 +1,178 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +from finn.core.modelwrapper import ModelWrapper +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor + + +def hls_random_mlp_maker(layer_spec): + """Create an MLP of given specification using HLSCustomOp instances. + Generate random weights/thresholds of appropriate size.""" + ret = [] + for l in layer_spec: + idt = l["idt"] + wdt = l["wdt"] + mw = l["mw"] + mh = l["mh"] + act = l["act"] + l["W"] = gen_finn_dt_tensor(wdt, (mw, mh)) + if act is None: + # no activation, produce accumulators + T = None + tdt = None + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + odt = DataType.UINT32 + else: + odt = DataType.INT32 + else: + odt = act + (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) + n_steps = act.get_num_possible_values() - 1 + T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) + # provide non-decreasing thresholds + T = np.sort(T, axis=1) + # generate thresholds for activation + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + tdt = DataType.UINT32 + # bias thresholds to be positive + T = np.ceil((T + mw) / 2) + assert (T >= 0).all() + else: + tdt = DataType.INT32 + l["T"] = T + l["tdt"] = tdt + l["odt"] = odt + ret.append(l) + + return hls_mlp_maker(ret) + + +def hls_mlp_maker(layer_spec): + """Create an MLP of given specification using HLSCustomOp instances.""" + + current_in_name = "" + current_out_name = "" + i = 0 + + graph = helper.make_graph(nodes=[], name="mlp", inputs=[], outputs=[]) + + model = helper.make_model(graph, producer_name="finn") + model = ModelWrapper(model) + + for l in layer_spec: + current_W_name = "W_%d" % i + current_T_name = "T_%d" % i + current_in_name = "act_%d" % i + current_out_name = "act_%d" % (i + 1) + + W = l["W"] + (mw, mh) = W.shape + T = l["T"] + pe = l["pe"] + simd = l["simd"] + wdt = l["wdt"] + idt = l["idt"] + tdt = l["tdt"] + odt = l["odt"] + + if i == 0: + global_in = helper.make_tensor_value_info( + current_in_name, TensorProto.FLOAT, [1, mw] + ) + model.graph.input.append(global_in) + + if i == len(layer_spec) - 1: + global_out = helper.make_tensor_value_info( + current_out_name, TensorProto.FLOAT, [1, mh] + ) + model.graph.output.append(global_out) + + # there are two ways to implement bipolar weights and inputs for + # StreamingFC: + # - specify their datatypes as such + # - specify their datatypes as BINARY as use binaryXnorMode + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + # we'll internally convert weights/inputs to binary and specify the + # datatypes as such, and also set the binaryXnorMode attribute to 1 + export_wdt = DataType.BINARY + export_idt = DataType.BINARY + binary_xnor_mode = 1 + else: + export_wdt = wdt + export_idt = idt + binary_xnor_mode = 0 + + if T is not None: + no_act = 0 + node_inp_list = [current_in_name, current_W_name, current_T_name] + if odt == DataType.BIPOLAR: + actval = 0 + else: + actval = odt.min() + else: + # no thresholds + node_inp_list = [current_in_name, current_W_name] + actval = 0 + no_act = 1 + FCLayer_node = helper.make_node( + "StreamingFCLayer_Batch", + node_inp_list, + [current_out_name], + domain="finn", + backend="fpgadataflow", + resType="ap_resource_lut()", + MW=mw, + MH=mh, + SIMD=simd, + PE=pe, + inputDataType=export_idt.name, + weightDataType=export_wdt.name, + outputDataType=odt.name, + ActVal=actval, + binaryXnorMode=binary_xnor_mode, + noActivation=no_act, + ) + + model.graph.node.append(FCLayer_node) + model.set_tensor_datatype(current_in_name, idt) + model.set_tensor_datatype(current_out_name, odt) + model.set_tensor_datatype(current_W_name, wdt) + if binary_xnor_mode: + # convert bipolar to binary + model.set_initializer(current_W_name, (W + 1) / 2) + else: + model.set_initializer(current_W_name, W) + if T is not None: + model.set_tensor_datatype(current_T_name, tdt) + model.set_initializer(current_T_name, T) + i += 1 + + return model diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py index d1669444e55cb0fddb2690e51849c4603d47d32c..3fe747a84985b2702ffb1e5855d9071362efebda 100644 --- a/src/finn/util/fpgadataflow.py +++ b/src/finn/util/fpgadataflow.py @@ -104,6 +104,7 @@ def pyverilate_stitched_ip(model): build_dir=build_dir, trace_depth=get_rtlsim_trace_depth(), top_module_name=top_module_name, + auto_eval=False, ) return sim diff --git a/src/finn/util/onnx.py b/src/finn/util/onnx.py index b9932111d86d7206b23e1d0e49a6aa8451f8ba24..4d7cdd126ededac887639a932c2021ef5f081c02 100644 --- a/src/finn/util/onnx.py +++ b/src/finn/util/onnx.py @@ -28,6 +28,7 @@ import numpy as np import onnx +import finn.core.data_layout as DataLayout def valueinfo_to_tensor(vi): @@ -37,3 +38,38 @@ def valueinfo_to_tensor(vi): return np.zeros( dims, dtype=onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[vi.type.tensor_type.elem_type] ) + + +def nchw_to_nhwc(t, model, idx, reverse=False): + """Converts between NCHW <-> NHWC layouts for tensor t by inserting a transpose. + If reverse=False, t is assumed NCHW and we insert transpose to convert NCHW -> NHWC + If reverse=True, t is assumed NHWC and we insert transpose to convert NHWC -> NCHW. + """ + graph = model.graph + # create new NHWC tensor + t_shape = model.get_tensor_shape(t) + bs = t_shape[0] + ch = t_shape[1] + height = t_shape[2] + width = t_shape[3] + t_trans = onnx.helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + onnx.TensorProto.FLOAT, + (bs, height, width, ch), # NHWC + ) + graph.value_info.append(t_trans) + dt = model.get_tensor_datatype(t) + t_trans = t_trans.name + model.set_tensor_datatype(t_trans, dt) + model.set_tensor_layout(t_trans, DataLayout.NHWC) + # NCHW <-> NHWC transpose + if reverse: + t_trans_node = onnx.helper.make_node( + "Transpose", [t_trans], [t], perm=[0, 3, 1, 2] + ) + else: + t_trans_node = onnx.helper.make_node( + "Transpose", [t], [t_trans], perm=[0, 2, 3, 1] + ) + graph.node.insert(idx, t_trans_node) + return t_trans diff --git a/src/finn/util/vcd.py b/src/finn/util/vcd.py new file mode 100644 index 0000000000000000000000000000000000000000..d9e244422065314ceb790dc6719b57688ff76828 --- /dev/null +++ b/src/finn/util/vcd.py @@ -0,0 +1,184 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from vcdvcd import VCDVCD +from finn.util.basic import get_num_default_workers +import multiprocessing as mp + +# string patterns to search for to find particular interfaces +# streaming interfaces +vname = "TVALID" +rname = "TREADY" +# FIFO count signals +fifo_mod_name = "StreamingFIFO" +fifo_cname = "count" + + +def list_stream_if(vcd_file): + "Return a list of stream interface names from given vcd trace." + + sig_names = VCDVCD(vcd_file, print_dumps=False, only_sigs=True).get_signals() + stream_if_names = [] + for cand_name in filter(lambda x: x.endswith(vname), sig_names): + base_name = cand_name.replace(vname, "") + if base_name + rname in sig_names: + stream_if_names.append(base_name) + return stream_if_names + + +def list_fifo_count_signals(vcd_file): + "Return a list of FIFO count signal names from given vcd trace." + + sig_names = VCDVCD(vcd_file, print_dumps=False, only_sigs=True).get_signals() + fifo_cnt_names = [] + for cand_name in filter(lambda x: fifo_cname in x, sig_names): + if fifo_mod_name in cand_name: + fifo_cnt_names.append(cand_name) + return fifo_cnt_names + + +def get_fifo_count_max(vcd_file, fifo_count_signal): + "Return the maximum value of the given FIFO count signal in vcd trace." + + d = VCDVCD(vcd_file, signals=[fifo_count_signal], store_tvs=True).get_data() + assert len(d) != 0, "FIFO count signal not found" + events = list(d.values())[0]["tv"] + max = 0 + for (time, val) in events: + current = int(val, base=2) + if current > max: + max = current + return max + + +def _get_fifo_max(x): + return (x[0], get_fifo_count_max(x[1], x[0])) + + +def get_all_fifo_count_max(vcd_file, fifo_count_signals=None): + """Return a list of max FIFO counts. If fifo_count_signals is None, + all FIFO count signals will be returned, otherwise treated as a list of + signal names to return the stats for.""" + if fifo_count_signals is None: + fifo_count_signals = list_fifo_count_signals(vcd_file) + + with mp.Pool(get_num_default_workers()) as p: + fifo_count_signals = map(lambda x: (x, vcd_file), fifo_count_signals) + all_stats = p.map(_get_fifo_max, fifo_count_signals) + + return all_stats + + +def get_stream_if_stats(vcd_file, if_base_name): + """Return statistics for given streaming interface in vcd trace in the + following dict format: + + <stream_state>: (<num_samples>, <fraction_of_time>), + + where <stream_state> is the combination of (V)alid/(R)eady values, + <num_samples> is the approximate number of rising clock edges spent in <state> + , and <fraction_of_time> is the fraction of <num_samples> to total + amount of time recorded by the trace. + + Example: + {"{'V': 0, 'R': 0}": (5, 0.0006060606060606061), + "{'V': 1, 'R': 0}": (0, 0.0), + "{'V': 0, 'R': 1}": (7605, 0.9218181818181819), + "{'V': 1, 'R': 1}": (640, 0.07757575757575758)} + + Here we can see the stream was transmitting values 7.7% of the time, + and 9.2% of the time there was no incoming data (valid 0, ready 1) + """ + if_valid = if_base_name + vname + if_ready = if_base_name + rname + v = VCDVCD(vcd_file, signals=[if_valid], store_tvs=True) + endtime = v.get_endtime() + v = v.get_data() + assert len(v) != 0, "Streaming interface not found" + v = list(v.values())[0]["tv"] + v = list(map(lambda x: ("V", x[0], x[1]), v)) + v.append(("V", endtime, "0")) + r = VCDVCD(vcd_file, signals=[if_ready], store_tvs=True).get_data() + assert len(r) != 0, "Streaming interface not found" + r = list(r.values())[0]["tv"] + r = list(map(lambda x: ("R", x[0], x[1]), r)) + r.append(("R", endtime, "0")) + events = sorted(v + r, key=lambda x: x[1]) + ret = { + "{'V': 0, 'R': 0}": 0, + "{'V': 1, 'R': 0}": 0, + "{'V': 0, 'R': 1}": 0, + "{'V': 1, 'R': 1}": 0, + } + status = {"V": 0, "R": 0} + last_time = 0 + total_rising_clock_edges = 0 + for (sig, time, val) in events: + # pyverilator generates 5 time units per sample + time = time / 5 + # pyverilator generates 4 samples per clock period + n_rising_clock_edges = int((time - last_time) / 4) + # note that the calculation of n_rising_clock_edges is approximate + # doing this exactly would require a cycle-by-cycle walkthrough of the + # trace, which can take very long + ret[str(status)] += n_rising_clock_edges + total_rising_clock_edges += n_rising_clock_edges + status[sig] = int(val) + last_time = time + + for state in ret: + v = ret[state] + ret[state] = (v, v / total_rising_clock_edges) + + return ret + + +def _get_stats(x): + return (x[0], get_stream_if_stats(x[1], x[0])) + + +def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}"): + """Return a list of streaming interface stats, sorted by the percentage + for the given sort_by key. If stream_ifs is None, all streamin interface + stats will be returned, otherwise treated as a list of interface names to + return the stats for.""" + + if stream_ifs is None: + stream_ifs = list_stream_if(vcd_file) + + with mp.Pool(get_num_default_workers()) as p: + stream_ifs = map(lambda x: (x, vcd_file), stream_ifs) + all_stats = p.map(_get_stats, stream_ifs) + + def sort_key(x): + stat = x[1] + (samples, percent) = stat[sort_by] + return percent + + ret = sorted(all_stats, key=sort_key) + return ret diff --git a/src/finn/util/vivado.py b/src/finn/util/vivado.py new file mode 100644 index 0000000000000000000000000000000000000000..6b6df3940cfeeed292345382471719c49f725de6 --- /dev/null +++ b/src/finn/util/vivado.py @@ -0,0 +1,147 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import subprocess +import stat +from finn.util.basic import get_remote_vivado + + +def which(program): + "Python equivalent of the shell cmd 'which'." + + # source: + # https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + + +def out_of_context_synth( + verilog_dir, + top_name, + fpga_part="xczu3eg-sbva484-1-e", + clk_name="ap_clk_0", + clk_period_ns=5.0, + remote_server=get_remote_vivado(), +): + "Run out-of-context Vivado synthesis, return resources and slack." + + # ensure that the OH_MY_XILINX envvar is set + if "OHMYXILINX" not in os.environ: + raise Exception("The environment variable OHMYXILINX is not defined.") + # ensure that vivado is in PATH: source $VIVADO_PATH/settings64.sh + if which("vivado") is None: + raise Exception("vivado is not in PATH, ensure settings64.sh is sourced.") + omx_path = os.environ["OHMYXILINX"] + if remote_server is None: + script = "vivadocompile.sh" + else: + script = "vivadoprojgen.sh" + # vivadocompile.sh <top-level-entity> <clock-name (optional)> <fpga-part (optional)> + call_omx = "zsh %s/%s %s %s %s %f" % ( + omx_path, + script, + top_name, + clk_name, + fpga_part, + float(clk_period_ns), + ) + call_omx = call_omx.split() + proc = subprocess.Popen( + call_omx, cwd=verilog_dir, stdout=subprocess.PIPE, env=os.environ + ) + proc.communicate() + + vivado_proj_folder = "%s/results_%s" % (verilog_dir, top_name) + res_counts_path = vivado_proj_folder + "/res.txt" + if remote_server is not None: + print("Using remote Vivado OOC synth, remote server %s" % remote_server) + run_synth = """ +#!/bin/bash +which vivado; +cd %s; +vivado -mode tcl -source %s.tcl -tclargs %s; +cat %s + """ % ( + vivado_proj_folder, + top_name, + top_name, + res_counts_path, + ) + with open(vivado_proj_folder + "/run.sh", "w") as f: + f.write(run_synth) + st = os.stat(vivado_proj_folder + "/run.sh") + os.chmod(vivado_proj_folder + "/run.sh", st.st_mode | stat.S_IEXEC) + # note that this assumes the same temp folder can be created on the + # remote server + # note we set target path as / due to use of -R (relative) + remote_server_uri = remote_server + ":/" + copy_files = "rsync -avzR %s %s" % (verilog_dir + "/", remote_server_uri) + copy_files = copy_files.split() + proc = subprocess.Popen(copy_files, cwd=verilog_dir, env=os.environ) + proc.communicate() + vivado_cmd = "bash -ic %s/run.sh" % vivado_proj_folder + run_vivado = ["ssh", "-t", remote_server, vivado_cmd] + proc = subprocess.Popen(run_vivado, cwd=verilog_dir, env=os.environ) + proc.communicate() + remote_server_result = remote_server + ":" + res_counts_path + copy_results = "rsync -avz %s %s" % (remote_server_result, res_counts_path) + copy_results = copy_results.split() + proc = subprocess.Popen(copy_results, cwd=verilog_dir, env=os.environ) + proc.communicate() + + with open(res_counts_path, "r") as myfile: + res_data = myfile.read().split("\n") + ret = {} + ret["vivado_proj_folder"] = vivado_proj_folder + for res_line in res_data: + res_fields = res_line.split("=") + print(res_fields) + try: + ret[res_fields[0]] = float(res_fields[1]) + except ValueError: + ret[res_fields[0]] = 0 + except IndexError: + ret[res_fields[0]] = 0 + if ret["WNS"] == 0: + ret["fmax_mhz"] = 0 + else: + ret["fmax_mhz"] = 1000.0 / (clk_period_ns - ret["WNS"]) + return ret diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py new file mode 100644 index 0000000000000000000000000000000000000000..e78812b21a03baa6963f1f0efaefdb4c73e4d0db --- /dev/null +++ b/tests/brevitas/test_brevitas_avg_pool_export.py @@ -0,0 +1,103 @@ +import os + +import onnx # noqa +import torch +import numpy as np +import brevitas.onnx as bo +from brevitas.nn import QuantAvgPool2d +from brevitas.quant_tensor import pack_quant_tensor +from brevitas.core.quant import QuantType +from finn.core.modelwrapper import ModelWrapper +from finn.core.datatype import DataType +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.util.basic import gen_finn_dt_tensor +import finn.core.onnx_exec as oxe + +import pytest + +export_onnx_path = "test_brevitas_avg_pool_export.onnx" + + +@pytest.mark.parametrize("kernel_size", [2, 3]) +@pytest.mark.parametrize("stride", [1, 2]) +@pytest.mark.parametrize("signed", [False, True]) +@pytest.mark.parametrize("bit_width", [2, 4]) +@pytest.mark.parametrize("input_bit_width", [4, 8, 32]) +@pytest.mark.parametrize("channels", [2, 4]) +@pytest.mark.parametrize("idim", [7, 8]) +def test_brevitas_avg_pool_export( + kernel_size, stride, signed, bit_width, input_bit_width, channels, idim +): + ishape = (1, channels, idim, idim) + ibw_tensor = torch.Tensor([input_bit_width]) + + b_avgpool = QuantAvgPool2d( + kernel_size=kernel_size, + stride=stride, + signed=signed, + min_overall_bit_width=bit_width, + max_overall_bit_width=bit_width, + quant_type=QuantType.INT, + ) + # call forward pass manually once to cache scale factor and bitwidth + input_tensor = torch.from_numpy(np.zeros(ishape)).float() + scale = np.ones((1, channels, 1, 1)) + output_scale = torch.from_numpy(scale).float() + input_quant_tensor = pack_quant_tensor( + tensor=input_tensor, scale=output_scale, bit_width=ibw_tensor + ) + bo.export_finn_onnx(b_avgpool, ishape, export_onnx_path, input_t=input_quant_tensor) + model = ModelWrapper(export_onnx_path) + + # determine input FINN datatype + if signed is True: + prefix = "INT" + else: + prefix = "UINT" + dt_name = prefix + str(input_bit_width // 2) + dtype = DataType[dt_name] + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + # execution with input tensor using integers and scale = 1 + # calculate golden output + inp = gen_finn_dt_tensor(dtype, ishape) + input_tensor = torch.from_numpy(inp).float() + input_quant_tensor = pack_quant_tensor( + tensor=input_tensor, scale=output_scale, bit_width=ibw_tensor + ) + b_avgpool.eval() + expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy() + + # finn execution + idict = {model.graph.input[0].name: inp} + odict = oxe.execute_onnx(model, idict, True) + produced = odict[model.graph.output[0].name] + assert (expected == produced).all() + + # execution with input tensor using float and scale != 1 + scale = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype( + np.float32 + ) + inp_tensor = inp * scale + input_tensor = torch.from_numpy(inp_tensor).float() + input_scale = torch.from_numpy(scale).float() + input_quant_tensor = pack_quant_tensor( + tensor=input_tensor, scale=input_scale, bit_width=ibw_tensor + ) + # export again to set the scale values correctly + bo.export_finn_onnx(b_avgpool, ishape, export_onnx_path, input_t=input_quant_tensor) + model = ModelWrapper(export_onnx_path) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + b_avgpool.eval() + expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy() + # finn execution + idict = {model.graph.input[0].name: inp_tensor} + odict = oxe.execute_onnx(model, idict, True) + produced = odict[model.graph.output[0].name] + + assert np.isclose(expected, produced).all() + + os.remove(export_onnx_path) diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py index c04e16ad1923609c81240235057cc7a190c90ffb..764671bee13710ef1d9fa21aab5ef600075b9b0d 100644 --- a/tests/brevitas/test_brevitas_cnv.py +++ b/tests/brevitas/test_brevitas_cnv.py @@ -38,11 +38,11 @@ import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.fold_constants import FoldConstants from finn.transformation.infer_shapes import InferShapes -from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs from finn.transformation.double_to_single_float import DoubleToSingleFloat from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_cnv.onnx" +export_onnx_path = "test_brevitas_cnv.onnx" @pytest.mark.parametrize("abits", [1, 2]) @@ -57,6 +57,9 @@ def test_brevitas_cnv_export_exec(wbits, abits): model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) + model = model.transform(RemoveStaticGraphInputs()) + assert len(model.graph.input) == 1 + assert len(model.graph.output) == 1 fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py index db18d91e3590e896e111c9e38bdc4de43872a98c..9369b25385080875efcb286c02291fc579a15a34 100644 --- a/tests/brevitas/test_brevitas_fc.py +++ b/tests/brevitas/test_brevitas_fc.py @@ -39,6 +39,7 @@ import torch import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.fold_constants import FoldConstants +from finn.transformation.general import RemoveStaticGraphInputs from finn.transformation.infer_shapes import InferShapes from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained @@ -63,6 +64,9 @@ def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits): model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) + model = model.transform(RemoveStaticGraphInputs()) + assert len(model.graph.input) == 1 + assert len(model.graph.output) == 1 # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) diff --git a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py b/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py index b66348a9902802bc65b2a35e8bc3e311cc81e0bc..9c7296b7b3b6d36cfb43b6d9e96e7fba6bbce49a 100644 --- a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py +++ b/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py @@ -12,7 +12,7 @@ import finn.core.onnx_exec as oxe from finn.transformation.infer_shapes import InferShapes from brevitas.core.quant import QuantType -export_onnx_path = "test_act.onnx" +export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx" @pytest.mark.parametrize("abits", [1, 2, 4, 8]) diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py index c9d8f2d812bc7bea1a2fd2598a7711099ad421e6..77974dacb51aa8746ce33f9a490becd35390db5a 100644 --- a/tests/brevitas/test_brevitas_relu_act_export.py +++ b/tests/brevitas/test_brevitas_relu_act_export.py @@ -12,7 +12,7 @@ from finn.core.modelwrapper import ModelWrapper import finn.core.onnx_exec as oxe from finn.transformation.infer_shapes import InferShapes -export_onnx_path = "test_act.onnx" +export_onnx_path = "test_brevitas_relu_act_export.onnx" @pytest.mark.parametrize("abits", [1, 2, 4, 8]) @@ -23,6 +23,7 @@ export_onnx_path = "test_act.onnx" def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type): min_val = -1.0 ishape = (1, 15) + b_act = QuantReLU( bit_width=abits, max_val=max_val, @@ -67,3 +68,60 @@ scaling_impl.learned_value": torch.tensor( assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path) + + +@pytest.mark.parametrize("abits", [1, 2, 4, 8]) +@pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)]) +@pytest.mark.parametrize("scaling_per_channel", [True, False]) +def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel): + out_channels = 32 + ishape = (1, out_channels, 1, 1) + min_val = -1.0 + b_act = QuantReLU( + bit_width=abits, + quant_type=QuantType.INT, + scaling_impl_type=ScalingImplType.PARAMETER, + scaling_per_channel=scaling_per_channel, + restrict_scaling_type=RestrictValueType.LOG_FP, + scaling_min_val=2e-16, + max_val=6.0, + return_quant_tensor=True, + per_channel_broadcastable_shape=(1, out_channels, 1, 1), + ) + if scaling_per_channel is True: + rand_tensor = (2) * torch.rand((1, out_channels, 1, 1)) + else: + rand_tensor = torch.tensor(1.2398) + checkpoint = { + "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\ +scaling_impl.learned_value": rand_tensor.type( + torch.FloatTensor + ) + } + b_act.load_state_dict(checkpoint) + bo.export_finn_onnx(b_act, ishape, export_onnx_path) + model = ModelWrapper(export_onnx_path) + model = model.transform(InferShapes()) + inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype( + np.float32 + ) + idict = {model.graph.input[0].name: inp_tensor} + odict = oxe.execute_onnx(model, idict, True) + produced = odict[model.graph.output[0].name] + inp_tensor = torch.from_numpy(inp_tensor).float() + b_act.eval() + expected = b_act.forward(inp_tensor).tensor.detach().numpy() + if not np.isclose(produced, expected, atol=1e-3).all(): + print(abits, max_val) + print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) + if abits < 5: + print( + "thres:", + ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), + ) + print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) + print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) + print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) + + assert np.isclose(produced, expected, atol=1e-3).all() + os.remove(export_onnx_path) diff --git a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py b/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py index d499f1517341477eca9915245da9ad12c346c5a9..e0ec82ebed44e2e984be9f62e02bc1721a7f9c33 100644 --- a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py +++ b/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py @@ -12,7 +12,7 @@ from finn.core.modelwrapper import ModelWrapper import finn.core.onnx_exec as oxe from finn.transformation.infer_shapes import InferShapes -export_onnx_path = "test_act.onnx" +export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx" @pytest.mark.parametrize("abits", [2, 4, 8]) diff --git a/tests/core/test_basic_onnx_exec.py b/tests/core/test_basic_onnx_exec.py index a7b6da9965aa5912870812a8c1f8d6da2ee0d181..ddb2cbfc40c7647970f0c51ecb95340e7d1dddae 100644 --- a/tests/core/test_basic_onnx_exec.py +++ b/tests/core/test_basic_onnx_exec.py @@ -35,6 +35,8 @@ import onnx.numpy_helper as np_helper import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.infer_shapes import InferShapes +from finn.core.datatype import DataType +from finn.util.basic import gen_finn_dt_tensor def test_mnist_onnx_download_extract_run(): @@ -47,9 +49,50 @@ def test_mnist_onnx_download_extract_run(): raw_o = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/output_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) output_tensor = onnx.load_tensor_from_string(raw_o) - # run using FINN-based execution + # run using FINN-based execution (full graph) input_dict = {"Input3": np_helper.to_array(input_tensor)} - output_dict = oxe.execute_onnx(model, input_dict) + output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) assert np.isclose( np_helper.to_array(output_tensor), output_dict["Plus214_Output_0"], atol=1e-3 ).all() + # test subgraph execution + start_node = model.graph.node[1] + end_node = model.graph.node[3] + subgraph_i_dict = {start_node.input[0]: output_dict[start_node.input[0]]} + subgraph_o_dict = oxe.execute_onnx( + model, + subgraph_i_dict, + return_full_exec_context=True, + start_node=start_node, + end_node=end_node, + ) + assert np.isclose( + subgraph_o_dict[end_node.output[0]], output_dict[end_node.output[0]], atol=1e-3 + ).all() + + +def test_onnx_exec_internal_rounding(): + inp0 = onnx.helper.make_tensor_value_info("inp0", onnx.TensorProto.FLOAT, [2, 2]) + inp1 = onnx.helper.make_tensor_value_info("inp1", onnx.TensorProto.FLOAT, [1]) + outp = onnx.helper.make_tensor_value_info("outp", onnx.TensorProto.FLOAT, [2, 2]) + mul_node = onnx.helper.make_node("Mul", inputs=["inp0", "inp1"], outputs=["outp"]) + graph = onnx.helper.make_graph( + nodes=[mul_node], name="mul_graph", inputs=[inp0, inp1], outputs=[outp] + ) + + model = onnx.helper.make_model(graph, producer_name="mul-model") + model = ModelWrapper(model) + idt = DataType.INT2 + model.set_tensor_datatype("inp0", idt) + model.set_tensor_datatype("inp1", idt) + model.transform(InferShapes()) + + mul_value = np.asarray([-1], dtype=np.float32) + inp_int = gen_finn_dt_tensor(idt, [2, 2]) + scale = np.random.uniform(low=0, high=1, size=(2, 2)).astype(np.float32) + inp_rounded = (inp_int * scale) / (scale + 1e-7) + input_dict = {"inp0": inp_rounded, "inp1": mul_value} + output_dict = oxe.execute_onnx(model, input_dict) + produced = output_dict["outp"] + expected = np.multiply(inp_int, mul_value) + assert (produced == expected).all() diff --git a/tests/core/test_modelwrapper.py b/tests/core/test_modelwrapper.py index 5fa9b23bad5c5b67f65530c55f862f889c07b1ac..0fb7ae42f3bd556755f81a02be6c71fd73ffc519 100644 --- a/tests/core/test_modelwrapper.py +++ b/tests/core/test_modelwrapper.py @@ -36,7 +36,7 @@ import finn.core.data_layout as DataLayout from finn.core.modelwrapper import ModelWrapper from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_modelwrapper.onnx" def test_modelwrapper(): diff --git a/tests/custom_op/test_xnorpopcountmatmul.py b/tests/custom_op/test_xnorpopcountmatmul.py index 37d9b7e5968bdb70023be9b70515410e941f51ce..745b782d418129d96e21c327a49de04d53aa7c48 100644 --- a/tests/custom_op/test_xnorpopcountmatmul.py +++ b/tests/custom_op/test_xnorpopcountmatmul.py @@ -47,7 +47,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.sign_to_thres import ConvertSignToThres from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_xnorpopcountmatmul.onnx" def test_xnorpopcountmatmul(): diff --git a/tests/end2end/test_end2end_cnv_w1a1.py b/tests/end2end/test_end2end_cnv_w1a1.py index c3359dcc82650bf0e9e8a5bc5276f5ca770ee96c..a2cfcd3a864c12788c2ac73271b5782ddfa336c1 100644 --- a/tests/end2end/test_end2end_cnv_w1a1.py +++ b/tests/end2end/test_end2end_cnv_w1a1.py @@ -42,7 +42,12 @@ from finn.transformation.double_to_single_float import DoubleToSingleFloat from finn.transformation.infer_shapes import InferShapes from finn.transformation.move_reshape import RemoveCNVtoFCFlatten from finn.transformation.fold_constants import FoldConstants -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) from finn.transformation.streamline import Streamline from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount @@ -72,6 +77,7 @@ from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.core.throughput_test import throughput_test_rtlsim build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -96,6 +102,7 @@ def test_end2end_cnv_w1a1_import_and_tidy(): model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) + model = model.transform(RemoveStaticGraphInputs()) model.save(build_dir + "/end2end_cnv_w1a1_tidy.onnx") @@ -107,6 +114,7 @@ def test_end2end_cnv_w1a1_streamline(): model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") @@ -142,15 +150,15 @@ def test_end2end_cnv_w1a1_fold_and_tlastmarker(): fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # each tuple is (PE, SIMD, in_fifo_depth) for a layer folding = [ - (16, 3, 128), - (32, 32, 128), - (16, 32, 128), - (16, 32, 128), - (4, 32, 81), + (16, 3, 256), + (32, 32, 256), + (16, 32, 256), + (16, 32, 256), + (4, 32, 214), (1, 32, 2), - (1, 4, 2), - (1, 8, 128), - (5, 1, 3), + (1, 4, 126), + (1, 8, 62), + (5, 1, 6), ] for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding): fcl_inst = getCustomOp(fcl) @@ -159,10 +167,12 @@ def test_end2end_cnv_w1a1_fold_and_tlastmarker(): fcl_inst.set_nodeattr("inFIFODepth", ififodepth) swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") + swg_idepth = [2, 51, 9, 106, 2, 2] for i in range(len(swg_layers)): swg_inst = getCustomOp(swg_layers[i]) simd = folding[i][1] swg_inst.set_nodeattr("SIMD", simd) + swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i]) model = model.transform(InsertDWC()) model = model.transform(InsertFIFO()) @@ -221,6 +231,20 @@ def test_end2end_cnv_w1a1_verify_dataflow_part(): assert np.isclose(res_cppsim, res_rtlsim_whole).all() +@pytest.mark.vivado +def test_end2end_cnv_w1a1_throughput_test_rtlsim(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx" + ) + model.set_metadata_prop("rtlsim_trace", "rtlsim_trace.vcd") + # os.environ["RTLSIM_TRACE_DEPTH"] = "4" + # run through IP-stitched rtlsim with increasing batch sizes and + # check the number of cycles it takes to execute + ret = throughput_test_rtlsim(model, 10) + # TODO check for expected performance + assert ret["cycles"] > 0 + + @pytest.mark.vivado def test_end2end_cnv_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers diff --git a/tests/end2end/test_end2end_cnv_w2a2.py b/tests/end2end/test_end2end_cnv_w2a2.py new file mode 100644 index 0000000000000000000000000000000000000000..f45b0a3eccd2f52ea144405865a1df06315952d9 --- /dev/null +++ b/tests/end2end/test_end2end_cnv_w2a2.py @@ -0,0 +1,384 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +import numpy as np + +# as of Feb'20 there is a bug that segfaults ONNX shape inference if we +# import pytorch before onnx, so we make sure to import onnx first +import onnx # NOQA + +import pytest +import pkg_resources as pk +from finn.custom_op.registry import getCustomOp +from finn.core.onnx_exec import execute_onnx +from finn.transformation.double_to_single_float import DoubleToSingleFloat +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.move_reshape import RemoveCNVtoFCFlatten +from finn.transformation.fold_constants import FoldConstants +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) +from finn.transformation.streamline import Streamline +from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul +import finn.transformation.streamline.absorb as absorb +from finn.transformation.streamline.reorder import MakeMaxPoolNHWC +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.insert_dwc import InsertDWC +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject +from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject +from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ +from finn.util.basic import pynq_part_map +from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip +from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.core.throughput_test import throughput_test_rtlsim + +build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] +test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") +test_fpga_part = pynq_part_map[test_pynq_board] +target_clk_ns = 10 +mem_mode = "decoupled" + + +def test_end2end_cnv_w2a2_export(): + import brevitas.onnx as bo + + cnv = get_test_model_trained("CNV", 2, 2) + bo.export_finn_onnx( + cnv, (1, 3, 32, 32), build_dir + "/end2end_cnv_w2a2_export.onnx" + ) + + +def test_end2end_cnv_w2a2_import_and_tidy(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_export.onnx") + model = model.transform(DoubleToSingleFloat()) + model = model.transform(InferShapes()) + model = model.transform(FoldConstants()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(RemoveStaticGraphInputs()) + model.save(build_dir + "/end2end_cnv_w2a2_tidy.onnx") + + +def test_end2end_cnv_w2a2_streamline(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_tidy.onnx") + model = model.transform(Streamline()) + model = model.transform(LowerConvsToMatMul()) + model = model.transform(MakeMaxPoolNHWC()) + model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) + model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) + model.save(build_dir + "/end2end_cnv_w2a2_streamlined.onnx") + + +def test_end2end_cnv_w2a2_convert_to_hls_layers(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_streamlined.onnx" + ) + model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferConvInpGen()) + model = model.transform(to_hls.InferStreamingMaxPool()) + model = model.transform(RemoveCNVtoFCFlatten()) + model.save(build_dir + "/end2end_cnv_w2a2_hls_layers.onnx") + + +def test_end2end_cnv_w2a2_create_dataflow_partition(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_hls_layers.onnx" + ) + parent_model = model.transform(CreateDataflowPartition()) + parent_model.save(build_dir + "/end2end_cnv_w2a2_dataflow_parent.onnx") + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) + dataflow_model.save(build_dir + "/end2end_cnv_w2a2_dataflow_model.onnx") + + +def test_end2end_cnv_w2a2_fold_and_tlastmarker(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_dataflow_model.onnx" + ) + fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + # each tuple is (PE, SIMD, in_fifo_depth) for a layer + folding = [ + (8, 3, 256, "auto"), + (16, 16, 256, "auto"), + (8, 16, 256, "auto"), + (8, 16, 256, "block"), + (4, 8, 214, "auto"), + (1, 8, 2, "auto"), + (1, 2, 126, "distributed"), + (2, 2, 62, "block"), + (5, 1, 6, "distributed"), + ] + for fcl, (pe, simd, ififodepth, ramstyle) in zip(fc_layers, folding): + fcl_inst = getCustomOp(fcl) + fcl_inst.set_nodeattr("PE", pe) + fcl_inst.set_nodeattr("SIMD", simd) + fcl_inst.set_nodeattr("inFIFODepth", ififodepth) + fcl_inst.set_nodeattr("ram_style", ramstyle) + + swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") + swg_idepth = [2, 51, 9, 106, 2, 2] + for i in range(len(swg_layers)): + swg_inst = getCustomOp(swg_layers[i]) + simd = folding[i][1] + swg_inst.set_nodeattr("SIMD", simd) + swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i]) + + model = model.transform(InsertDWC()) + model = model.transform(InsertFIFO()) + model = model.transform(InsertTLastMarker()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(AnnotateResources("estimate")) + model.save(build_dir + "/end2end_cnv_w2a2_folded.onnx") + + +@pytest.mark.slow +@pytest.mark.vivado +def test_end2end_cnv_w2a2_gen_hls_ip(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_folded.onnx") + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(AnnotateResources("hls")) + model.save(build_dir + "/end2end_cnv_w2a2_ipgen.onnx") + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_ip_stitch(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipgen.onnx") + model = model.transform(ReplaceVerilogRelPaths()) + model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + model.save(build_dir + "/end2end_cnv_w2a2_ipstitch.onnx") + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_verify_dataflow_part(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipstitch.onnx") + x = np.zeros((1, 32, 32, 3), dtype=np.float32) + inp_name = model.graph.input[0].name + out_name = model.graph.output[0].name + inp_dict = {inp_name: x} + # cppsim + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + model.save(build_dir + "/end2end_cnv_w2a2_ipgen_cppsim.onnx") + ret_cppsim = execute_onnx(model, inp_dict, True) + res_cppsim = ret_cppsim[out_name] + # node-by-node rtlsim + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareRTLSim()) + model.save(build_dir + "/end2end_cnv_w2a2_ipgen_nodebynode_rtlsim.onnx") + ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) + res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] + # whole-network (ip-stitched) rtlsim + model.set_metadata_prop("exec_mode", "rtlsim") + model.save(build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx") + # this is a particularly long-running test, set liveness thr. to unlimited + os.environ["LIVENESS_THRESHOLD"] = "-1" + ret_rtlsim_whole = execute_onnx(model, inp_dict, True) + res_rtlsim_whole = ret_rtlsim_whole[out_name] + assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() + assert np.isclose(res_cppsim, res_rtlsim_whole).all() + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_throughput_test_rtlsim(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx" + ) + model.set_metadata_prop("rtlsim_trace", "rtlsim_trace.vcd") + # os.environ["RTLSIM_TRACE_DEPTH"] = "4" + # run through IP-stitched rtlsim with increasing batch sizes and + # check the number of cycles it takes to execute + ret = throughput_test_rtlsim(model, 10) + # TODO check for expected performance + assert ret["cycles"] > 0 + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_verify_all(): + # use the streamlined model as the "golden" model for right answers + golden = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_streamlined.onnx" + ) + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + # load one of the test vectors + fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") + input_tensor = np.load(fn)["arr_0"].astype(np.float32) + input_tensor = input_tensor / 255 + assert input_tensor.shape == (1, 3, 32, 32) + x = input_tensor + # x = np.zeros(ishape, dtype=np.float32) + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_dataflow_parent.onnx" + ) + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + # produce results with cppsim + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipgen_cppsim.onnx") + sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w2a2_ipgen_cppsim.onnx") + ret_cppsim = execute_onnx(parent_model, {iname: x}, True) + y_cppsim = ret_cppsim[oname] + # produce results with node-by-node rtlsim + load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_ipgen_nodebynode_rtlsim.onnx" + ) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_cnv_w2a2_ipgen_nodebynode_rtlsim.onnx" + ) + ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True) + y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname] + # produce results with whole-network (stitched ip) rtlsim + load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx" + ) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx" + ) + # this is a particularly long-running test, set liveness thr. to unlimited + os.environ["LIVENESS_THRESHOLD"] = "-1" + ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True) + y_whole_rtlsim = ret_whole_rtlsim[oname] + assert np.isclose(y_golden, y_cppsim).all() + assert np.isclose(y_golden, y_nodebynode_rtlsim).all() + assert np.isclose(y_golden, y_whole_rtlsim).all() + assert np.argmax(y_golden) == 3 + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_make_pynq_proj(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipstitch.onnx") + model = model.transform(MakePYNQProject(test_pynq_board)) + model.save(build_dir + "/end2end_cnv_w2a2_pynq_project.onnx") + + +@pytest.mark.slow +@pytest.mark.vivado +def test_end2end_cnv_w2a2_synth_pynq_project(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_pynq_project.onnx" + ) + model = model.transform(SynthPYNQProject()) + model = model.transform(AnnotateResources("synth")) + model.save(build_dir + "/end2end_cnv_w2a2_synth.onnx") + + +def test_end2end_cnv_w2a2_make_driver(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_synth.onnx") + model = model.transform(MakePYNQDriver()) + model.save(build_dir + "/end2end_cnv_w2a2_pynq_driver.onnx") + + +def test_end2end_cnv_w2a2_deploy_on_pynq(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_pynq_driver.onnx" + ) + try: + ip = os.environ["PYNQ_IP"] # no fault for this one; skip if not defined + if ip == "": + pytest.skip("PYNQ board IP address not specified") + username = os.getenv("PYNQ_USERNAME", "xilinx") + password = os.getenv("PYNQ_PASSWORD", "xilinx") + port = os.getenv("PYNQ_PORT", 22) + target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") + model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) + # save the model to be able to link it to the parent + model.save(build_dir + "/end2end_cnv_w2a2_pynq_deploy.onnx") + except KeyError: + pytest.skip("PYNQ board IP address not specified") + + +def test_end2end_cnv_w2a2_run_on_pynq(): + # use the streamlined model as the "golden" model for right answers + golden = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_streamlined.onnx" + ) + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + # load one of the test vectors + fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") + input_tensor = np.load(fn)["arr_0"].astype(np.float32) + input_tensor = input_tensor / 255 + assert input_tensor.shape == (1, 3, 32, 32) + x = input_tensor + # run using FINN-based execution + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_dataflow_parent.onnx" + ) + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + try: + ip = os.environ["PYNQ_IP"] # NOQA + if ip == "": + pytest.skip("PYNQ board IP address not specified") + # produce results with cppsim + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_pynq_deploy.onnx") + sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w2a2_pynq_deploy.onnx") + ret = execute_onnx(parent_model, {iname: x}, True) + y = ret[oname] + assert np.isclose(y, y_golden).all() + assert np.argmax(y) == 3 + + except KeyError: + pytest.skip("PYNQ board IP address not specified") diff --git a/tests/end2end/test_end2end_tfc_w1a1.py b/tests/end2end/test_end2end_tfc_w1a1.py index 15c1c41b006c6f87d79a0e7eb6a4458838de5fd2..31659df631e8ab489cb63dbef51200f313bca6b3 100644 --- a/tests/end2end/test_end2end_tfc_w1a1.py +++ b/tests/end2end/test_end2end_tfc_w1a1.py @@ -63,7 +63,12 @@ from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( ) from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import Streamline @@ -72,6 +77,8 @@ from finn.util.basic import pynq_part_map from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.core.throughput_test import throughput_test_rtlsim +import finn.util.vcd as vcd build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -96,12 +103,14 @@ def test_end2end_tfc_w1a1_import_and_tidy(): model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) + model = model.transform(RemoveStaticGraphInputs()) model.save(build_dir + "/end2end_tfc_w1a1_tidy.onnx") def test_end2end_tfc_w1a1_streamline(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_tidy.onnx") model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) model.save(build_dir + "/end2end_tfc_w1a1_streamlined.onnx") @@ -197,6 +206,8 @@ def test_end2end_tfc_w1a1_verify_dataflow_part(): res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_trace", build_dir + "/tfc_w1a1.vcd") + os.environ["RTLSIM_TRACE_DEPTH"] = "3" model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] @@ -204,6 +215,39 @@ def test_end2end_tfc_w1a1_verify_dataflow_part(): assert np.isclose(res_cppsim, res_rtlsim_whole).all() +def test_end2end_tfc_w1a1_verify_fifo_fullness(): + vcdf = build_dir + "/tfc_w1a1.vcd" + if not os.path.isfile(vcdf): + pytest.skip("Cannot find %s, skipping" % vcdf) + stream_ifs = vcd.list_stream_if(vcdf) + fifos = vcd.list_fifo_count_signals(vcdf) + assert len(stream_ifs) == 37 + assert len(fifos) == 6 + fifo_max = vcd.get_all_fifo_count_max(vcdf) + assert fifo_max[0][0] == "TOP.v.finn_design_i.StreamingFIFO_0.count[3:0]" + assert fifo_max[0][1] == 3 + stream_stat = vcd.get_all_stream_if_stats(vcdf) + assert ( + stream_stat[0][0] + == "TOP.v.finn_design_i.StreamingDataWidthConverter_Batch_0_out_V_V_" + ) + + +@pytest.mark.vivado +def test_end2end_tfc_w1a1_throughput_test_rtlsim(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx" + ) + # run through IP-stitched rtlsim with increasing batch sizes and + # check the number of cycles it takes to execute + ret = throughput_test_rtlsim(model, 1) + assert ret["cycles"] == 205 + ret = throughput_test_rtlsim(model, 10) + assert ret["cycles"] == 844 + ret = throughput_test_rtlsim(model, 100) + assert ret["cycles"] == 7234 + + @pytest.mark.vivado def test_end2end_tfc_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers diff --git a/tests/end2end/test_end2end_tfc_w1a2.py b/tests/end2end/test_end2end_tfc_w1a2.py index d4c005a86580fb36e735beb00717fcfdffff21e5..d5579f625a20ae26e18bcdcba0cfaa3042a71b9a 100644 --- a/tests/end2end/test_end2end_tfc_w1a2.py +++ b/tests/end2end/test_end2end_tfc_w1a2.py @@ -61,7 +61,12 @@ from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( ) from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import Streamline @@ -93,12 +98,14 @@ def test_end2end_tfc_w1a2_import_and_tidy(): model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) + model = model.transform(RemoveStaticGraphInputs()) model.save(build_dir + "/end2end_tfc_w1a2_tidy.onnx") def test_end2end_tfc_w1a2_streamline(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_tidy.onnx") model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) model.save(build_dir + "/end2end_tfc_w1a2_streamlined.onnx") diff --git a/tests/end2end/test_end2end_tfc_w2a2.py b/tests/end2end/test_end2end_tfc_w2a2.py index 19d3f86e046658c4080d71984df1cff74008adab..470119f3444987f0156caff81bf556bf4f2f2cbb 100644 --- a/tests/end2end/test_end2end_tfc_w2a2.py +++ b/tests/end2end/test_end2end_tfc_w2a2.py @@ -61,7 +61,12 @@ from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( ) from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import Streamline @@ -93,12 +98,14 @@ def test_end2end_tfc_w2a2_import_and_tidy(): model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) + model = model.transform(RemoveStaticGraphInputs()) model.save(build_dir + "/end2end_tfc_w2a2_tidy.onnx") def test_end2end_tfc_w2a2_streamline(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_tidy.onnx") model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) model.save(build_dir + "/end2end_tfc_w2a2_streamlined.onnx") diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..d09c64a1250f78604c1a0a362cf234712de2cf57 --- /dev/null +++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py @@ -0,0 +1,115 @@ +import pytest + +from onnx import TensorProto, helper + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode + +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.infer_shapes import InferShapes +import numpy as np + + +def prepare_inputs(input_tensor): + return {"inp": input_tensor} + + +def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape): + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, ishape) + p0 = helper.make_tensor_value_info("p0", TensorProto.FLOAT, pshape) + + model = helper.make_model( + helper.make_graph( + name="test", + inputs=[inp], + outputs=[outp], + value_info=[p0], + nodes=[helper.make_node(onnx_op_name, ["inp", "p0"], ["outp"])], + ) + ) + + model = ModelWrapper(model) + model.set_initializer("p0", gen_finn_dt_tensor(pdt, pshape)) + model.set_tensor_datatype("inp", idt) + model.transform(InferDataLayouts(), make_deepcopy=False) + model.transform(InferShapes(), make_deepcopy=False) + return model + + +# parameter datatype +@pytest.mark.parametrize("pdt", [DataType.BIPOLAR, DataType.UINT4, DataType.INT2]) +# input datatype +@pytest.mark.parametrize("idt", [DataType.INT32, DataType.UINT4, DataType.INT4]) +# function +@pytest.mark.parametrize("onnx_op_name", ["Add", "Mul"]) +# vector parameter or scalar parameter (broadcast) +@pytest.mark.parametrize("scalar_param", [True, False]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.vivado +@pytest.mark.slow +def test_convert_to_hls_channelwise_layer( + pdt, idt, onnx_op_name, scalar_param, exec_mode +): + ifm_ch = 16 + ifm_dim = 5 + ishape = (1, ifm_ch, ifm_dim, ifm_dim) + if scalar_param: + pshape = (1,) + else: + pshape = (1, ifm_ch, 1, 1) + + np.random.seed(0) + model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape) + + # Since the aren't Data types with a bit width of a non power of 2, + # there are cases where the input won't use it full range. + if idt == DataType.INT32: + x = gen_finn_dt_tensor(DataType.INT16, (1, ifm_ch, ifm_dim, ifm_dim)) + elif idt == DataType.UINT32: + x = gen_finn_dt_tensor(DataType.UINT16, (1, ifm_ch, ifm_dim, ifm_dim)) + else: + x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) + + input_dict = prepare_inputs(x) + y_expected = oxe.execute_onnx(model, input_dict)["outp"] + + new_model = model.transform(to_hls.InferChannelwiseLinearLayer()) + new_model = new_model.transform(GiveUniqueNodeNames()) + + if exec_mode == "cppsim": + new_model = new_model.transform(PrepareCppSim()) + new_model = new_model.transform(CompileCppSim()) + new_model = new_model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + new_model = new_model.transform(SetExecMode("rtlsim")) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) + new_model = new_model.transform(HLSSynthIP()) + new_model = new_model.transform(ReplaceVerilogRelPaths()) + new_model = new_model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + ctx_produced = oxe.execute_onnx( + new_model, input_dict, return_full_exec_context=True + ) + y_produced = ctx_produced["outp"] + + assert (y_produced == y_expected).all() + assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch" diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..22c356a5869b25fcc7ae3ef0164ed61b53ef232c --- /dev/null +++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py @@ -0,0 +1,112 @@ +from onnx import TensorProto, helper +import numpy as np +import pytest + +from finn.core.datatype import DataType +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul + +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +import finn.core.onnx_exec as oxe +from finn.core.modelwrapper import ModelWrapper +from finn.util.basic import gen_finn_dt_tensor +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls + +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.custom_op.im2col import compute_conv_output_dim + +# conv_config kernel_size,stride, pad + + +@pytest.mark.parametrize( + "conv_config", [(1, 2, 0), (1, 3, 0), (3, 2, 1), (3, 1, 0), (3, 1, 1), (5, 2, 1)] +) +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.slow +@pytest.mark.vivado +def test_convert_to_hls_conv_layer(conv_config, exec_mode): + kernel_size, stride, pad = conv_config + np.random.seed(0) + idt = DataType.UINT4 + + in_feature_dim = 7 + in_chn = 16 + out_chn = 20 + + out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) + + input_shape = [1, in_chn, in_feature_dim, in_feature_dim] + output_shape = [1, out_chn, out_feature_dim, out_feature_dim] + + conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size] + conv_weight_dt = DataType.UINT4 + + conv_config = {} + conv_config["dilations"] = [1, 1] + conv_config["group"] = 1 + conv_config["kernel_shape"] = [kernel_size, kernel_size] + conv_config["pads"] = [pad, pad, pad, pad] + conv_config["strides"] = [stride, stride] + + top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) + value_info = [ + helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) + ] + + modelproto = helper.make_model( + helper.make_graph( + name="conv_test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=[ + helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) + ], + ) + ) + + model = ModelWrapper(modelproto) + model.set_tensor_datatype("top_in", idt) + model.set_tensor_datatype("top_out", idt) + model.set_tensor_datatype("p1", conv_weight_dt) + model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + new_model = model.transform(LowerConvsToMatMul()) + new_model = new_model.transform(to_hls.InferConvInpGen()) + + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(InferShapes()) + new_model = new_model.transform(InferDataTypes()) + + if exec_mode == "cppsim": + new_model = new_model.transform(PrepareCppSim()) + new_model = new_model.transform(CompileCppSim()) + new_model = new_model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + new_model = new_model.transform(SetExecMode("rtlsim")) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) + new_model = new_model.transform(HLSSynthIP()) + new_model = new_model.transform(ReplaceVerilogRelPaths()) + new_model = new_model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + x = gen_finn_dt_tensor(idt, input_shape) + inp_dict = {model.graph.input[0].name: x} + assert oxe.compare_execution(model, new_model, inp_dict) + if kernel_size == 1 and stride > 1 and pad == 0: + assert new_model.graph.node[1].op_type == "DownSampler" diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py index 48803c9614f53a3a149c6eaac4289d10086513a5..20e3ee08d7ffdd013a89d26bb71d86ccc554a5b4 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py @@ -51,7 +51,7 @@ from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.custom_op.registry import getCustomOp -export_onnx_path_cnv = "test_output_cnv.onnx" +export_onnx_path_cnv = "test_convert_to_hls_layers_cnv.onnx" @pytest.mark.vivado diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py index e261a3114853bf24bdb4c931c46ff92eea4150dd..d77065ad9396d0cc8dd57a39ed823fffcb30ee47 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py @@ -52,8 +52,7 @@ from finn.transformation.streamline.round_thresholds import RoundAndClipThreshol from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_tfc.onnx" -export_onnx_path_cnv = "test_output_cnv.onnx" +export_onnx_path = "test_convert_to_hls_layers_fc.onnx" @pytest.mark.vivado diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..c9f78dcea1a1ce364d0657ad64de7d440d41b822 --- /dev/null +++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py @@ -0,0 +1,160 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +from onnx import TensorProto, helper +import numpy as np +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.transformation.general import GiveUniqueNodeNames +from finn.custom_op.registry import getCustomOp +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.infer_shapes import InferShapes + + +def make_single_maxpool_modelwrapper(k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt): + odt = idt + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim] + ) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim, ofm_dim] + ) + + mp_node = helper.make_node( + "MaxPool", + ["inp"], + ["outp"], + kernel_shape=[k, k], + pads=[pad, pad, pad, pad], + strides=[stride, stride], + ) + graph = helper.make_graph( + nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp] + ) + + model = helper.make_model(graph, producer_name="mp-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + model = model.transform(InferShapes()) + + return model + + +def prepare_inputs(input_tensor): + return {"inp": input_tensor} + + +# input datatype +@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.INT4]) +# pool configuration: ( k,stride, pad, ifm_dim ) +@pytest.mark.parametrize( + "pool_config", [(3, 2, 0, 5), (3, 2, 1, 5), (2, 2, 0, 8), (5, 2, 2, 7)] +) +# input channels +@pytest.mark.parametrize("ifm_ch", [1, 4, 20]) +# number of out channel computed in parallel +@pytest.mark.parametrize("pe", [1, 4, 20]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +# pool type +@pytest.mark.parametrize("op_type", ["MaxPool"]) +@pytest.mark.slow +@pytest.mark.vivado +def test_convert_to_hls_pool_batch(idt, pool_config, ifm_ch, pe, exec_mode, op_type): + k, stride, pad, ifm_dim = pool_config + + if ifm_ch % pe != 0: + pytest.skip("ifm_ch%pe != 0. Skipping") + + if pad != 0 and idt.signed(): + pytest.skip("No support for pal_val != 0. Skipping") + + np.random.seed(0) + ofm_dim = int(((ifm_dim + 2 * pad - k) / stride) + 1) + + x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) + # prepare input data + input_dict = prepare_inputs(x) + if op_type == "MaxPool": + model = make_single_maxpool_modelwrapper( + k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt + ) + else: + assert False, "{} is not a supported op_type".format(op_type) + + y_expected = oxe.execute_onnx(model, input_dict)["outp"] + + new_model = model.transform(to_hls.InferPool_Batch()) + new_model = new_model.transform(GiveUniqueNodeNames()) + + if ifm_ch != pe: + new_model = new_model.transform(to_hls.InferConvInpGen()) + # Folding + for n in new_model.graph.node: + if n.op_type == "ConvolutionInputGenerator": + inst = getCustomOp(n) + inst.set_nodeattr("SIMD", pe) + elif n.op_type == "Pool_Batch": + inst = getCustomOp(n) + inst.set_nodeattr("PE", pe) + + if exec_mode == "cppsim": + new_model = new_model.transform(SetExecMode("cppsim")) + new_model = new_model.transform(PrepareCppSim()) + new_model = new_model.transform(CompileCppSim()) + elif exec_mode == "rtlsim": + new_model = new_model.transform(SetExecMode("rtlsim")) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) + new_model = new_model.transform(HLSSynthIP()) + new_model = new_model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + # execute new_model + y_produced = oxe.execute_onnx(new_model, input_dict)["outp"] + assert (y_produced == y_expected).all() + if stride != k: + if pad == 0 or ifm_ch == pe: + assert len(new_model.graph.node) == 4 + else: + assert len(new_model.graph.node) == 5 + else: + assert len(new_model.graph.node) == 1 diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..2ed352e28981552b186bb778b94dcbc07471e14b --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -0,0 +1,156 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from onnx import TensorProto, helper + +import finn.core.onnx_exec as oxe +from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) + + +def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs): + NumChannels = C.shape[0] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, vecs + [NumChannels]) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, vecs + [NumChannels] + ) + + node_inp_list = ["inp", "const"] + + node = helper.make_node( + "ChannelwiseOp_Batch", + node_inp_list, + ["outp"], + domain="finn", + backend="fpgadataflow", + NumChannels=NumChannels, + Func=func, + PE=pe, + inputDataType=idt.name, + outputDataType=odt.name, + paramDataType=pdt.name, + numInputVectors=vecs, + ) + graph = helper.make_graph(nodes=[node], name="graph", inputs=[inp], outputs=[outp]) + + model = helper.make_model(graph, producer_name="model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + model.set_tensor_datatype("const", idt) + model.set_initializer("const", C) + return model + + +# activation: None or DataType +@pytest.mark.parametrize("act", [DataType.INT8]) +# input datatype +@pytest.mark.parametrize("idt", [DataType.INT4]) +# param datatype +@pytest.mark.parametrize("pdt", [DataType.INT4]) +# folding, -1 is maximum possible +@pytest.mark.parametrize("nf", [-1, 2]) +# number of input features +@pytest.mark.parametrize("ich", [16]) +# vecs +@pytest.mark.parametrize("vecs", [[1], [1, 7, 7]]) +# function +@pytest.mark.parametrize("func", ["add", "mul"]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_mode): + if nf == -1: + nf = ich + pe = ich // nf + assert ich % pe == 0 + + # generate input and param data + x = gen_finn_dt_tensor(idt, tuple(vecs + [ich])) + # C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32) + C = gen_finn_dt_tensor(pdt, (ich)) + + odt = act + + model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs) + + if exec_mode == "cppsim": + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(HLSSynthIP()) + model = model.transform(ReplaceVerilogRelPaths()) + model = model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + # package input data as dictionary + input_dict = {"inp": x} + + oshape = model.get_tensor_shape("outp") + + C_reshaped = np.broadcast_to(C.flatten(), x.shape) + if func == "add": + y = x + C_reshaped + elif func == "mul": + y = x * C_reshaped + + y_expected = y.reshape(oshape) + # execute model + y_produced = oxe.execute_onnx(model, input_dict)["outp"] + + y_produced = y_produced.reshape(y_expected.shape) + + assert (y_produced == y_expected).all(), "cppsim failed" + + if exec_mode == "rtlsim": + hls_synt_res_est = model.analysis(hls_synth_res_estimation) + assert "ChannelwiseOp_Batch_0" in hls_synt_res_est diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py index 5051bf34dc690daf8b6186859d3717cc8e217eee..b5fc85caf274edc9e7afc52df962862fa8a99ba3 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py @@ -78,7 +78,7 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i def make_single_slidingwindow_modelwrapper( - k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt + k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw=0 ): odt = idt inp = helper.make_tensor_value_info( @@ -102,6 +102,7 @@ def make_single_slidingwindow_modelwrapper( Stride=stride, inputDataType=idt.name, outputDataType=odt.name, + depthwise=dw, ) graph = helper.make_graph( nodes=[SlidingWindow_node], @@ -126,25 +127,29 @@ def prepare_inputs(input_tensor): # input datatype @pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2]) # kernel size -@pytest.mark.parametrize("k", [2, 4]) +@pytest.mark.parametrize("k", [2, 3]) # input dimension -@pytest.mark.parametrize("ifm_dim", [4, 6, 8]) +@pytest.mark.parametrize("ifm_dim", [6, 8]) # input channels -@pytest.mark.parametrize("ifm_ch", [2, 4]) # , 2, 3, 4]) +@pytest.mark.parametrize("ifm_ch", [2, 4]) # Stride @pytest.mark.parametrize("stride", [1, 2]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) # input channel parallelism ("SIMD") @pytest.mark.parametrize("simd", [1, 2]) +# depthwise +@pytest.mark.parametrize("dw", [0, 1]) @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride, exec_mode, simd): +def test_fpgadataflow_slidingwindow( + idt, k, ifm_dim, ifm_ch, stride, exec_mode, simd, dw +): ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) model = make_single_slidingwindow_modelwrapper( - k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt + k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw ) if exec_mode == "cppsim": @@ -168,6 +173,12 @@ def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride, exec_mode, k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] - # if idt == DataType.BIPOLAR: - # y_expected = 2 * y_expected - 1 - assert (y_produced == y_expected).all() + if dw == 0: + assert (y_produced == y_expected).all() + else: + y_expected = y_expected.reshape( + 1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd + ) + y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) + y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k) + assert (y_produced == y_expected).all() diff --git a/tests/fpgadataflow/test_fpgadataflow_sameresize.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py similarity index 71% rename from tests/fpgadataflow/test_fpgadataflow_sameresize.py rename to tests/fpgadataflow/test_fpgadataflow_fmpadding.py index ea6130c3891443595b038460233ebb85799ac461..5ff3da87228a2a32a41226bb46e0b16b1a44df50 100644 --- a/tests/fpgadataflow/test_fpgadataflow_sameresize.py +++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py @@ -23,9 +23,11 @@ test_fpga_part = pynq_part_map[test_pynq_board] target_clk_ns = 10 -def make_single_sameresize_modelwrapper( - idim, odim, kdim, stride, num_ch, idt, pad_style -): +def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_style): + assert pad_style == 2, "only pad_style == 2 supported in hlslib" + assert padding > 0, "Output dim should be greater than input dim" + odim = idim + padding + inp = helper.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, idim, idim, num_ch] ) @@ -33,25 +35,26 @@ def make_single_sameresize_modelwrapper( "outp", TensorProto.FLOAT, [1, odim, odim, num_ch] ) - SameResize_node = helper.make_node( - "SameResize_Batch", + FMPadding = helper.make_node( + "FMPadding_Batch", ["inp"], ["outp"], domain="finn", backend="fpgadataflow", ImgDim=idim, - KernelDim=kdim, - Stride=stride, + Padding=padding, NumChannels=num_ch, inputDataType=str(idt.name), PaddingStyle=pad_style, + numInputVectors=1, + SIMD=simd, ) graph = helper.make_graph( - nodes=[SameResize_node], name="sameresize_graph", inputs=[inp], outputs=[outp] + nodes=[FMPadding], name="fmpadding_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="sameresize-model") + model = helper.make_model(graph, producer_name="fmpadding-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) @@ -60,34 +63,31 @@ def make_single_sameresize_modelwrapper( return model -# image dimension -@pytest.mark.parametrize("idim", [8, 16]) -# kernel dimension -@pytest.mark.parametrize("kdim", [2, 3]) -# stride -@pytest.mark.parametrize("stride", [1, 2]) +# input image dimension +@pytest.mark.parametrize("idim", [8]) +# number of rows and number of cols to add +@pytest.mark.parametrize("pad", [2, 3]) # number of channels -@pytest.mark.parametrize("num_ch", [1, 2]) +@pytest.mark.parametrize("num_ch", [2, 4]) +# Input parallelism +@pytest.mark.parametrize("simd", [1, 2]) +# PaddingStyle: selects behavior when (odim-idim)%2 != 0 +@pytest.mark.parametrize("pad_style", [2]) # FINN input datatype @pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4]) # execution mode @pytest.mark.parametrize("mode", ["cppsim", "rtlsim"]) @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_sameresize(idim, kdim, stride, num_ch, idt, mode): - pad_style = 2 - assert idim % stride == 0, "Stride must divide input dimension." - # number of "same" windows over the input data - same_windows = idim // stride - odim = kdim + stride * (same_windows - 1) - +def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode): + if num_ch % simd != 0: + pytest.skip(" num_ch % simd != 0, skipping") # generate input data x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch]) input_dict = {"inp": x} + odim = idim + pad - model = make_single_sameresize_modelwrapper( - idim, odim, kdim, stride, num_ch, idt, pad_style - ) + model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style) model = model.transform(InferShapes()) model = model.transform(SetExecMode(mode)) model = model.transform(GiveUniqueNodeNames()) @@ -103,8 +103,7 @@ def test_fpgadataflow_sameresize(idim, kdim, stride, num_ch, idt, mode): assert y_produced.shape == expected_oshape # calculate reference - # calculate correct padding according to parameters - pad = odim - idim + # calculate correct pad according to parameters if pad_style == 2: if pad % 2 == 0: pad_up = pad // 2 @@ -115,6 +114,7 @@ def test_fpgadataflow_sameresize(idim, kdim, stride, num_ch, idt, mode): else: pad_up = pad // 2 pad_left = pad // 2 + pad_down = pad - pad_up pad_right = pad - pad_left diff --git a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py similarity index 87% rename from tests/fpgadataflow/test_fpgadataflow_ip_stitch.py rename to tests/fpgadataflow/test_fpgadataflow_ipstitch.py index 16100522aa94fd25d234efa1d03edfdc866ca1bb..a9f5bf5ffa1f816b82ef701800e92249056b7c74 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -53,6 +53,11 @@ from finn.transformation.general import GiveUniqueNodeNames from finn.util.basic import gen_finn_dt_tensor, pynq_part_map from finn.util.fpgadataflow import pyverilate_stitched_ip from finn.util.test import load_test_checkpoint_or_skip +from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA +from finn.transformation.fpgadataflow.floorplan import Floorplan + test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") test_fpga_part = pynq_part_map[test_pynq_board] @@ -281,6 +286,27 @@ def test_fpgadataflow_ipstitch_rtlsim(): assert (rtlsim_res == x).all() +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_ipstitch_synth_ooc(): + model = load_test_checkpoint_or_skip( + ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx" + ) + model = model.transform(SynthOutOfContext(test_fpga_part, 5)) + ret = model.get_metadata_prop("res_total_ooc_synth") + assert ret is not None + # example expected output: (details may differ based on Vivado version etc) + # "{'vivado_proj_folder': ..., + # 'LUT': 708.0, 'FF': 1516.0, 'DSP': 0.0, 'BRAM': 0.0, 'WNS': 0.152, '': 0, + # 'fmax_mhz': 206.27062706270627}" + ret = eval(ret) + assert ret["LUT"] > 0 + assert ret["FF"] > 0 + assert ret["DSP"] == 0 + assert ret["BRAM"] == 0 + assert ret["fmax_mhz"] > 100 + + @pytest.mark.vivado def test_fpgadataflow_ipstitch_pynq_projgen(): model = load_test_checkpoint_or_skip( @@ -368,3 +394,19 @@ def test_fpgadataflow_ipstitch_remote_execution(): assert np.isclose(outp["outp"], x).all() except KeyError: pytest.skip("PYNQ board IP address not specified") + + +def test_fpgadataflow_ipstitch_iodma_floorplan(): + model = create_one_fc_model() + if model.graph.node[0].op_type == "StreamingDataflowPartition": + sdp_node = getCustomOp(model.graph.node[0]) + assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" + assert os.path.isfile(sdp_node.get_nodeattr("model")) + model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) + model = model.transform(InferDataLayouts()) + model = model.transform(InsertIODMA()) + model = model.transform(Floorplan()) + assert getCustomOp(model.graph.node[0]).get_nodeattr("partition_id") == 0 + assert getCustomOp(model.graph.node[1]).get_nodeattr("partition_id") == 2 + assert getCustomOp(model.graph.node[2]).get_nodeattr("partition_id") == 1 + model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_iodma_floorplan.onnx") diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py index 2df841728395229dafe33d2804c44a3489ef3e45..9bc77cd47fd6115823f9a35d98e8874ee3f98b2d 100644 --- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py +++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pytest +import numpy as np from onnx import TensorProto, helper @@ -70,7 +71,8 @@ def make_labelselect_modelwrapper(labels, pe, k, idt): model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", DataType.UINT32) + odt = DataType.get_smallest_possible(labels - 1) + model.set_tensor_datatype("outp", odt) return model @@ -79,19 +81,18 @@ def prepare_inputs(input_tensor, idt): return {"inp": input_tensor} -# TODO: folded inputs fail, likely problem in hlslib -# input datatype -- checked by assertion in HLSCustomOp -@pytest.mark.parametrize("idt", [DataType.UINT8, DataType.UINT16]) +@pytest.mark.parametrize("idt", [DataType.UINT8, DataType.UINT16, DataType.INT16]) # labels -@pytest.mark.parametrize("labels", [10, 1000]) +@pytest.mark.parametrize("labels", [10, 100]) # folding -@pytest.mark.parametrize("fold", [-1]) +@pytest.mark.parametrize("fold", [-1, 2, 10]) # number of top labels to select @pytest.mark.parametrize("k", [1, 5]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.vivado def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode): + np.random.seed(0) if fold == -1: pe = 1 else: diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py index 38f792ed3cdd52044b28b4c19ac0603da4e502e6..398a17132a2ef6c92e600102ff5c0b71a1f65aaa 100644 --- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py +++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py @@ -92,7 +92,7 @@ def test_res_estimate(): model = model.transform(GiveUniqueNodeNames()) prod_resource_estimation = model.analysis(res_estimation) expect_resource_estimation = { - "StreamingFCLayer_Batch_0": {"BRAM_18K": 1, "LUT": 304.4} + "StreamingFCLayer_Batch_0": {"BRAM_18K": 1, 'BRAM_efficiency': 0.001736111111111111, "LUT": 304.4} } assert check_two_dict_for_equality( diff --git a/tests/pynq/test_pynq_performance_end2end.py b/tests/pynq/test_pynq_performance_end2end.py index 66a93a190061e0142637be19bb2ea841d192745a..3b6ea86741b8adefce4faaa65b791f1d213cf3ae 100644 --- a/tests/pynq/test_pynq_performance_end2end.py +++ b/tests/pynq/test_pynq_performance_end2end.py @@ -10,7 +10,7 @@ from finn.core.throughput_test import throughput_test build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] -@pytest.mark.parametrize("end2end_example", ["tfc_w1a1", "cnv_w1a1"]) +@pytest.mark.parametrize("end2end_example", ["tfc_w1a1", "cnv_w1a1", "cnv_w2a2"]) @pytest.mark.slow def test_pynq_performance_end2end(end2end_example): model = load_test_checkpoint_or_skip( diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py index 56dcd26076ec0a5fba6e9be6acac7f5e13572c3d..bcb66a2c22eb4d6a998580129881793bbc86b250 100644 --- a/tests/transformation/streamline/test_streamline_cnv.py +++ b/tests/transformation/streamline/test_streamline_cnv.py @@ -34,7 +34,12 @@ import pkg_resources as pk import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.fold_constants import FoldConstants -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import Streamline from finn.util.test import get_test_model_trained @@ -44,9 +49,9 @@ from finn.transformation.double_to_single_float import DoubleToSingleFloat export_onnx_path = make_build_dir("test_streamline_cnv_") # act bits -@pytest.mark.parametrize("abits", [1]) +@pytest.mark.parametrize("abits", [1, 2]) # weight bits -@pytest.mark.parametrize("wbits", [1]) +@pytest.mark.parametrize("wbits", [1, 2]) # network topology / size @pytest.mark.parametrize("size", ["CNV"]) def test_streamline_cnv(size, wbits, abits): @@ -62,6 +67,7 @@ def test_streamline_cnv(size, wbits, abits): model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) + model = model.transform(RemoveStaticGraphInputs()) # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) @@ -73,7 +79,11 @@ def test_streamline_cnv(size, wbits, abits): expected = expected_ctx[model.graph.output[0].name] # model.save("orig_cnv.onnx") model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) + assert len(model.graph.initializer) == 21 + assert len(model.graph.value_info) == 43 # model.save("streamlined_cnv.onnx") + assert len(model.graph.node) == 23 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() diff --git a/tests/transformation/streamline/test_streamline_fc.py b/tests/transformation/streamline/test_streamline_fc.py index c68561239b7c30973856fa282d20cd2afaa168ae..dd7e756b4021af26c228804d4b509ecff032347e 100644 --- a/tests/transformation/streamline/test_streamline_fc.py +++ b/tests/transformation/streamline/test_streamline_fc.py @@ -37,7 +37,12 @@ import pytest import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.fold_constants import FoldConstants -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import Streamline from finn.util.test import get_test_model_trained @@ -65,6 +70,7 @@ def test_streamline_fc(size, wbits, abits): model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) + model = model.transform(RemoveStaticGraphInputs()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) @@ -73,6 +79,10 @@ def test_streamline_fc(size, wbits, abits): expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) + assert len(model.graph.initializer) == 11 + assert len(model.graph.value_info) == 21 + assert len(model.graph.quantization_annotation) == 18 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() diff --git a/tests/transformation/test_absorb_mul_into_topk.py b/tests/transformation/test_absorb_mul_into_topk.py new file mode 100644 index 0000000000000000000000000000000000000000..1394220f7c336ccea8fe9c494734c4175bf2e847 --- /dev/null +++ b/tests/transformation/test_absorb_mul_into_topk.py @@ -0,0 +1,108 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.insert_topk import InsertTopK +from finn.transformation.streamline.absorb import AbsorbScalarMulIntoTopK +import finn.core.onnx_exec as oxe + +# parameter to indicate if mul parameter is negative or positive +@pytest.mark.parametrize("mul_positive", [True, False]) +# parameter to indicate if mul parameter is scalar or not +@pytest.mark.parametrize("scalar", [True, False]) +def test_absorb_mul_into_topk(mul_positive, scalar): + if scalar is True: + shape = [1] + else: + shape = [1, 1, 1, 1000] + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 1, 1, 1000]) + a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, shape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 1, 1, 1000]) + + mul_node = helper.make_node("Mul", ["inp", "a0"], ["outp"]) + mul_graph = helper.make_graph( + nodes=[mul_node], + name="mul-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0], + ) + + model = helper.make_model(mul_graph, producer_name="mul_model") + model = ModelWrapper(model) + # initialize values + if mul_positive is True: + a0_values = np.random.uniform(low=0.1, high=1, size=tuple(shape)).astype( + np.float32 + ) + else: + a0_values = np.random.uniform(low=-1, high=-0.1, size=tuple(shape)).astype( + np.float32 + ) + model.set_initializer("a0", a0_values) + model = model.transform(InsertTopK()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model_transformed = model.transform(AbsorbScalarMulIntoTopK()) + + # compare execution results + inp_values = np.random.uniform(low=-10, high=10, size=(1, 1, 1, 1000)).astype( + np.float32 + ) + idict = {"global_in": inp_values} + odict = oxe.execute_onnx(model, idict, True) + y_indices = odict["global_out"] + y_values = odict["TopK_0_out0"] + odict = oxe.execute_onnx(model_transformed, idict, True) + y_tr_indices = odict["global_out"] + y_tr_values = odict["TopK_0_out0"] + + # the indices stay the same, if the model is transformed or not + assert (y_indices == y_tr_indices).all() + + if scalar is True and mul_positive is True: + # the values change if the model was transformed + assert (y_values != y_tr_values).all() + + # check for new order + assert model.graph != model_transformed.graph + assert len(model.graph.node) - 1 == len(model_transformed.graph.node) + assert model_transformed.graph.node[0].op_type == "TopK" + + else: + assert (y_values == y_tr_values).all() + assert model.graph == model_transformed.graph diff --git a/tests/transformation/test_absorb_opposite_transposes.py b/tests/transformation/test_absorb_opposite_transposes.py new file mode 100644 index 0000000000000000000000000000000000000000..859e691277a261f01b559e2e166763e402c5d689 --- /dev/null +++ b/tests/transformation/test_absorb_opposite_transposes.py @@ -0,0 +1,76 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import onnx.helper as oh +from onnx import TensorProto + +import finn.core.onnx_exec as ox +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline.absorb import AbsorbConsecutiveTransposes + + +def test_absorb_opposite_transposes(): + np.random.seed(0) + input_shape = [1, 3, 4, 2] + top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) + value_info = [oh.make_tensor_value_info("add_param_0", TensorProto.FLOAT, [1])] + value_info += [oh.make_tensor_value_info("add_param_1", TensorProto.FLOAT, [1])] + value_info += [oh.make_tensor_value_info("mul_param_0", TensorProto.FLOAT, [1])] + modelproto = oh.make_model( + oh.make_graph( + name="test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=[ + oh.make_node("Add", ["top_in", "add_param_0"], ["t0"]), + oh.make_node("Transpose", ["t0"], ["t1"], perm=[0, 2, 3, 1]), + oh.make_node("Transpose", ["t1"], ["t2"], perm=[0, 3, 1, 2]), + oh.make_node("Add", ["t2", "add_param_1"], ["t3"]), + oh.make_node("Transpose", ["t3"], ["t4"], perm=[0, 2, 3, 1]), + oh.make_node("Transpose", ["t4"], ["t5"], perm=[0, 3, 1, 2]), + oh.make_node("Add", ["t5", "t2"], ["t6"]), + oh.make_node("Mul", ["t6", "mul_param_0"], ["top_out"]), + ], + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + model.set_initializer("add_param_0", np.asarray([1], dtype=np.float32)) + model.set_initializer("add_param_1", np.asarray([3], dtype=np.float32)) + model.set_initializer("mul_param_0", np.asarray([2], dtype=np.float32)) + new_model = model.transform(AbsorbConsecutiveTransposes()) + new_model = new_model.transform(InferShapes()) + inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} + assert ox.compare_execution(model, model, inp_dict) + assert len(new_model.graph.node) == 4 + for n in new_model.graph.node: + assert new_model.graph.node[0].op_type != "Transpose" diff --git a/tests/transformation/test_absorb_transp_into_flatten.py b/tests/transformation/test_absorb_transp_into_flatten.py new file mode 100644 index 0000000000000000000000000000000000000000..fbfa15277717c554da01e38608601997407803b2 --- /dev/null +++ b/tests/transformation/test_absorb_transp_into_flatten.py @@ -0,0 +1,99 @@ +import pytest + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +import finn.core.data_layout as DataLayout +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.absorb import AbsorbTransposeIntoFlatten +import finn.core.onnx_exec as oxe + +# permutation of transpose node +@pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]]) +# reshape or flatten +@pytest.mark.parametrize("shape", [None, [1, -1], [-1, 1]]) +# input shape +@pytest.mark.parametrize("ishape", [[1, 1, 1, 4], [2, 4, 1, 1], [1, 2, 2, 4]]) +# datalayout +@pytest.mark.parametrize("data_layout", ["NCHW", "NHWC"]) +def test_absorb_transp_into_flatten(perm, shape, ishape, data_layout): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + transp_node = helper.make_node("Transpose", ["inp"], ["transp_out"], perm=perm) + dummy_in = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) + if shape is None: + shape_node = helper.make_node("Flatten", ["transp_out"], ["outp"]) + dummy_in = dummy_in.transpose(tuple(perm)) + oshape = dummy_in.reshape(dummy_in.shape[0], -1).shape + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + shape0 = None + else: + shape0 = helper.make_tensor_value_info("shape0", TensorProto.FLOAT, shape) + shape_node = helper.make_node("Reshape", ["transp_out", "shape0"], ["outp"]) + oshape = dummy_in.transpose(tuple(perm)).reshape(tuple(shape)).shape + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + graph = helper.make_graph( + nodes=[transp_node, shape_node], + name="absorb-transpose-graph", + inputs=[inp], + outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="absorb_transpose_model") + model = ModelWrapper(model) + if shape is not None: + model.graph.value_info.append(shape0) + model.set_initializer("shape0", np.asarray(shape)) + if data_layout == "NCHW": + model.set_tensor_layout("inp", DataLayout.NCHW) + else: + model.set_tensor_layout("inp", DataLayout.NHWC) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model.save("test.onnx") + model_transformed = model.transform(AbsorbTransposeIntoFlatten()) + model_transformed.save("test2.onnx") + + # verify transformation + inp_values = np.random.uniform(low=-1, high=1, size=tuple(ishape)).astype( + np.float32 + ) + idict = {model.graph.input[0].name: inp_values} + assert oxe.compare_execution(model, model_transformed, idict) + + # only some of the parameter combinations lead to a graph that will be changed when + # AbsorbTransposeIntoFlatten is applied + + if shape == [-1, 1]: # not a flatten operation, so the graph will not be changed + assert model.graph == model_transformed.graph + + elif perm == [ + 3, + 2, + 0, + 1, + ]: # the first dimension is also part of the transpose operation + # so the graph will not be changed + assert model.graph == model_transformed.graph + + # the following cases are the ones in which the model is transformed + # because we tested the parameters shape and perm befire we can only consider ishape + # and data_layout (the transformed model should only contain a "Flatten" node) + elif ishape == [1, 1, 1, 4] and data_layout == "NHWC": + assert model_transformed.graph.node[0].op_type == "Flatten" + + elif ishape == [2, 4, 1, 1] and data_layout == "NCHW" and shape is None: + # If the first dimension of the input tensor is not 1, flatten and + # reshape (with shape = [1, -1]) would lead to different results + assert model_transformed.graph.node[0].op_type == "Flatten" + + # all other cases lead to an unchanged model + else: + assert model.graph == model_transformed.graph diff --git a/tests/transformation/test_change_datalayout.py b/tests/transformation/test_change_datalayout.py new file mode 100644 index 0000000000000000000000000000000000000000..66459d574957575e61ec1bec631fb7030a27cca1 --- /dev/null +++ b/tests/transformation/test_change_datalayout.py @@ -0,0 +1,112 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest +from onnx import helper, TensorProto + +from finn.custom_op.maxpoolnhwc import compute_pool_output_dim +from finn.core.modelwrapper import ModelWrapper +from finn.core.datatype import DataType +import finn.core.data_layout as DataLayout +from finn.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.util.basic import gen_finn_dt_tensor +from finn.util.basic import get_by_name +import finn.core.onnx_exec as oxe + +# stride +@pytest.mark.parametrize("s", [1, 2]) +# kernel +@pytest.mark.parametrize("k", [3, 4]) +# ibits +@pytest.mark.parametrize("ibits", [4, 8]) +# obits +@pytest.mark.parametrize("obits", [2, 4]) +# signed +@pytest.mark.parametrize("signed", [False, True]) +# channels +@pytest.mark.parametrize("c", [2, 3]) +# input dimension +@pytest.mark.parametrize("idim", [6, 7]) +def test_change_datalayout_quantavgpool(s, k, ibits, obits, signed, c, idim): + n = 1 + odim = compute_pool_output_dim(idim, k, s) + # determine input FINN datatype + if signed is True: + prefix = "INT" + else: + prefix = "UINT" + dt_name = prefix + str(ibits) + dtype = DataType[dt_name] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [n, c, idim, idim]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [n, c, odim, odim]) + + node = helper.make_node( + "QuantAvgPool2d", + ["inp"], + ["outp"], + domain="finn", + stride=s, + kernel=k, + ibits=ibits, + obits=obits, + signed=signed, + data_layout="NCHW", + ) + graph = helper.make_graph( + nodes=[node], name="single-quantavgpool", inputs=[inp], outputs=[outp] + ) + + model = helper.make_model(graph) + model = ModelWrapper(model) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model_transformed = model.transform(ChangeDataLayoutQuantAvgPool2d()) + model_transformed = model_transformed.transform(InferShapes()) + model_transformed = model_transformed.transform(InferDataTypes()) + model_transformed = model_transformed.transform(InferDataLayouts()) + model_transformed = model_transformed.transform(GiveUniqueNodeNames()) + model_transformed = model_transformed.transform(GiveReadableTensorNames()) + inp_values = gen_finn_dt_tensor(dtype, [n, c, idim, idim]) + idict = {"inp": inp_values} + assert oxe.compare_execution(model, model_transformed, idict) + assert len(model.graph.node) + 2 == len(model_transformed.graph.node) + assert model_transformed.graph.node[-1].op_type == "Transpose" + assert model_transformed.graph.node[0].op_type == "Transpose" + # check if QuantAvgPool2d node has datalayout set correctly + node = model_transformed.graph.node[1] + d_layout = get_by_name(node.attribute, "data_layout").s.decode("UTF-8") + assert d_layout == "NHWC" + assert model_transformed.get_tensor_layout(node.input[0]) == DataLayout.NHWC + assert model_transformed.get_tensor_layout(node.output[0]) == DataLayout.NHWC diff --git a/tests/transformation/test_conv_lowering.py b/tests/transformation/test_conv_lowering.py index c06a9a2c9def44b5384d6a87935e0ce85ede1bb2..fe0b58a6adc8278a2be298a6a2c7265c9743a94a 100644 --- a/tests/transformation/test_conv_lowering.py +++ b/tests/transformation/test_conv_lowering.py @@ -27,13 +27,13 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pytest +import onnx.helper as oh +from onnx import TensorProto import os -from onnx import helper, TensorProto import pkg_resources as pk import brevitas.onnx as bo import numpy as np - from finn.core.modelwrapper import ModelWrapper from finn.core.datatype import DataType from finn.transformation.fold_constants import FoldConstants @@ -46,7 +46,7 @@ from finn.custom_op.im2col import compute_conv_output_dim from finn.util.basic import gen_finn_dt_tensor from finn.custom_op.registry import getCustomOp -export_onnx_path = "test_output_cnv.onnx" +export_onnx_path = "test_conv_lowering.onnx" def test_conv_lowering_cnv_w1a1(): @@ -91,16 +91,16 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding[0]) # set up onnx model - inp = helper.make_tensor_value_info( + inp = oh.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim] ) - outp = helper.make_tensor_value_info( + outp = oh.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim] ) - W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) + W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) - dw_cnv = helper.make_node( + dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], @@ -109,7 +109,7 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): strides=[stride, stride], group=ifm_ch, ) - graph = helper.make_graph( + graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], @@ -117,7 +117,7 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): value_info=[W], ) - model = helper.make_model(graph, producer_name="dws_cnv-model") + model = oh.make_model(graph, producer_name="dws_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) @@ -139,4 +139,52 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): # check if created nodes have attributes that indicate depthwise conv assert model.get_tensor_sparsity("W") is not None im2col_node = getCustomOp(model.graph.node[1]) - assert im2col_node.get_nodeattr("dw") == 1 + assert im2col_node.get_nodeattr("depthwise") == 1 + + +def test_conv_lowering_conv_1x1(): + np.random.seed(0) + + in_feature_dim = 7 + in_chn = 3 + kernel_size = 1 + out_feature_dim = in_feature_dim + + input_shape = [1, in_chn, in_feature_dim, in_feature_dim] + output_shape = [1, in_chn, out_feature_dim, out_feature_dim] + + conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size] + + conv_config = {} + conv_config["dilations"] = [1, 1] + conv_config["group"] = 1 + conv_config["kernel_shape"] = [kernel_size, kernel_size] + conv_config["pads"] = [0, 0, 0, 0] + conv_config["strides"] = [1, 1] + + top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) + + value_info = [oh.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape)] + + modelproto = oh.make_model( + oh.make_graph( + name="test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=[oh.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config)], + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + model.set_initializer("p1", np.random.rand(*conv_param_shape).astype(np.float32)) + + new_model = model.transform(LowerConvsToMatMul()) + inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} + + assert oxe.compare_execution(model, new_model, inp_dict) + assert new_model.graph.node[0].op_type == "Transpose" + assert new_model.graph.node[1].op_type == "MatMul" + assert new_model.graph.node[2].op_type == "Transpose" + assert len(new_model.graph.node) == 3 diff --git a/tests/transformation/test_fold_constants.py b/tests/transformation/test_fold_constants.py index 685c14a98b9031096aaf5b244c4f484d4f308bca..a976ffd62bce744a474a6fac2a61a6478526777f 100644 --- a/tests/transformation/test_fold_constants.py +++ b/tests/transformation/test_fold_constants.py @@ -40,7 +40,7 @@ from finn.transformation.fold_constants import FoldConstants from finn.transformation.infer_shapes import InferShapes from finn.util.test import get_test_model_untrained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_fold_constants.onnx" def test_const_folding(): diff --git a/tests/transformation/test_infer_data_layouts.py b/tests/transformation/test_infer_data_layouts.py index fccc7813da6f98c8af4ade7ae562c99b32247a8b..d6d9920043114c78e970842aee5955e3150cf526 100644 --- a/tests/transformation/test_infer_data_layouts.py +++ b/tests/transformation/test_infer_data_layouts.py @@ -44,7 +44,7 @@ import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls from finn.transformation.infer_data_layouts import InferDataLayouts import finn.core.data_layout as DataLayout -export_onnx_path_cnv = "test_output_cnv.onnx" +export_onnx_path_cnv = "test_infer_data_layouts.onnx" def test_infer_data_layouts(): diff --git a/tests/transformation/test_infer_datatypes.py b/tests/transformation/test_infer_datatypes.py index e3db40289c4318894cf5ad41c2f67b3bff501db9..097ae03f6153843fbb7956a72b38431559d5d0f1 100644 --- a/tests/transformation/test_infer_datatypes.py +++ b/tests/transformation/test_infer_datatypes.py @@ -38,7 +38,7 @@ from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_shapes import InferShapes from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_infer_datatypes.onnx" def test_infer_datatypes(): diff --git a/tests/transformation/test_linear_past_eltwise.py b/tests/transformation/test_linear_past_eltwise.py index b77f59779a5e8559f80e017d13b66bcb67249830..4cff5e5e1d40986a006cc02186fce21a907c2ef1 100644 --- a/tests/transformation/test_linear_past_eltwise.py +++ b/tests/transformation/test_linear_past_eltwise.py @@ -41,7 +41,7 @@ from finn.transformation.double_to_single_float import DoubleToSingleFloat import pytest -export_onnx_path = "test_scalar_past_eltwise.onnx" +export_onnx_path = "test_linear_past_eltwise.onnx" # construct a synthetic graph to test: # topk insertion, topk conversion to hls, add conversion to hls diff --git a/tests/transformation/test_merge_onnx_models.py b/tests/transformation/test_merge_onnx_models.py new file mode 100644 index 0000000000000000000000000000000000000000..db7c990baddfb50a39603937a9c5b73f512a0e59 --- /dev/null +++ b/tests/transformation/test_merge_onnx_models.py @@ -0,0 +1,126 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from pkgutil import get_data + +import numpy as np +import onnx +import onnx.numpy_helper as np_helper +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +from finn.core.datatype import DataType +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.merge_onnx_models import MergeONNXModels +import finn.core.onnx_exec as oxe + + +def test_merge_onnx_models(): + # load pre model + raw_m = get_data("finn", "data/onnx/mnist-conv/model.onnx") + model1 = ModelWrapper(raw_m) + # the input for model1 comes from a uint8 vector so we set the finn datatype + # of the input tensor to DataType.UINT8 to verify that the datatypes are correctly + # preserved in the transformed model + model1.set_tensor_datatype(model1.graph.input[0].name, DataType.UINT8) + model1 = model1.transform(InferShapes()) + model1 = model1.transform(GiveUniqueNodeNames()) + model1 = model1.transform(GiveReadableTensorNames()) + + # set up post model + shape = [1, 10] + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) + a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, []) + a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, []) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, shape) + + mul_node = helper.make_node("Mul", ["inp", "a0"], ["mul_out"]) + div_node = helper.make_node("Div", ["mul_out", "a1"], ["outp"]) + + graph = helper.make_graph( + nodes=[mul_node, div_node], + name="model2-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0, a1], + ) + + model2 = helper.make_model(graph, producer_name="model2") + model2 = ModelWrapper(model2) + # initialize model2 + a0_value = np.random.uniform(low=0, high=1, size=(1)).astype(np.float32) + model2.set_initializer("a0", a0_value) + a1_value = np.random.uniform(low=0.1, high=1, size=(1)).astype(np.float32) + model2.set_initializer("a1", a1_value) + # set a dummy sparsity annotation to check if it gets correctly transferred + # to the merged model + sparsity = {"dw": {"kernel_shape": 0}} + model2.set_tensor_sparsity("a1", sparsity) + model2 = model2.transform(InferShapes()) + model2 = model2.transform(InferDataTypes()) + model2 = model2.transform(InferDataLayouts()) + model2 = model2.transform(GiveUniqueNodeNames()) + model2 = model2.transform(GiveReadableTensorNames()) + + # simulate the models before the merging and pass the output of model1 to model2 + # load one of the test vectors + raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") + inp_values = onnx.load_tensor_from_string(raw_i) + inp_values = np_helper.to_array(inp_values) + idict = {model1.graph.input[0].name: inp_values} + odict = oxe.execute_onnx(model1, idict) + temp = odict[model1.graph.output[0].name] + + idict = {model2.graph.input[0].name: temp} + odict = oxe.execute_onnx(model2, idict) + outp = odict[model2.graph.output[0].name] + # merge models + model_transformed = model2.transform(MergeONNXModels(model1)) + + idict = {model_transformed.graph.input[0].name: inp_values} + odict = oxe.execute_onnx(model_transformed, idict) + outp_transformed = odict[model_transformed.graph.output[0].name] + + assert (outp == outp_transformed).all() + assert len(model_transformed.graph.node) == len(model1.graph.node) + len( + model2.graph.node + ) + # to test if the value is preserved we set the sparsity annotation of input[1] + # of the division block to a dummy value, we can now look for the division block + # and check if the sparsity annotation is still the same + for n in model_transformed.graph.node: + if n.op_type == "Div": + tensor_name = n.input[1] + set_sparsity = model_transformed.get_tensor_sparsity(tensor_name) + assert sparsity == set_sparsity + + # check if finn datatype of graph.input[0] is still set to UINT8 + assert model_transformed.get_tensor_datatype("global_in") == DataType.UINT8 diff --git a/tests/transformation/test_move_chw_add_past_conv.py b/tests/transformation/test_move_chw_add_past_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..b626f7e5b8564739ec383aaddfc262d642bf47cc --- /dev/null +++ b/tests/transformation/test_move_chw_add_past_conv.py @@ -0,0 +1,109 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from onnx import helper, TensorProto + +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline.reorder import MoveAddPastConv +from finn.custom_op.im2col import compute_conv_output_dim +import finn.core.onnx_exec as oxe + + +# input dimension +@pytest.mark.parametrize("idim", [4, 7]) +# kernel size +@pytest.mark.parametrize("k", [2, 3]) +# stride +@pytest.mark.parametrize("s", [1, 2]) +# input channels +@pytest.mark.parametrize("ich", [2, 4]) +# output channels +@pytest.mark.parametrize("och", [2, 3]) +def test_move_chw_add_past_conv(idim, k, s, ich, och): + odim = compute_conv_output_dim(idim, k, s) + + ishape = [1, ich, idim, idim] + oshape = [1, och, odim, odim] + add_param_shape = [1, ich, 1, 1] + conv_param_shape = [och, ich, k, k] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, add_param_shape) + a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, conv_param_shape) + + conv_config = {} + conv_config["dilations"] = [1, 1] + conv_config["group"] = 1 + conv_config["kernel_shape"] = [k, k] + conv_config["pads"] = [0, 0, 0, 0] + conv_config["strides"] = [s, s] + + add_node = helper.make_node("Add", ["inp", "a0"], ["add_out"]) + conv_node = helper.make_node("Conv", ["add_out", "a1"], ["outp"], **conv_config) + + model = helper.make_model( + helper.make_graph( + nodes=[add_node, conv_node], + name="move-add-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0, a1], + ) + ) + + model = ModelWrapper(model) + # initialize model + a0_values = np.random.uniform(low=0, high=1, size=tuple(add_param_shape)).astype( + np.float32 + ) + model.set_initializer("a0", a0_values) + a1_values = np.random.uniform(low=0, high=1, size=tuple(conv_param_shape)).astype( + np.float32 + ) + model.set_initializer("a1", a1_values) + + model = model.transform(InferShapes()) + + # execution before transformation + inp_values = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) + idict = {model.graph.input[0].name: inp_values} + odict = oxe.execute_onnx(model, idict) + y_before = odict[model.graph.output[0].name] + + model = model.transform(MoveAddPastConv()) + odict = oxe.execute_onnx(model, idict) + y_after = odict[model.graph.output[0].name] + + assert np.isclose(y_before, y_after).all() + assert model.graph.node[0].op_type == "Conv" + assert model.graph.node[1].op_type == "Add" diff --git a/tests/transformation/test_move_flatten_past_affine.py b/tests/transformation/test_move_flatten_past_affine.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d5e51613d41f3f2db3dabcef7b982ec2816b19 --- /dev/null +++ b/tests/transformation/test_move_flatten_past_affine.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +from finn.core.datatype import DataType +import finn.core.data_layout as DataLayout +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.reorder import MoveFlattenPastAffine +import finn.core.onnx_exec as oxe + +# data layout +@pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW]) +# batch size +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_move_flatten_past_affine(data_layout, batch_size): + if data_layout == DataLayout.NHWC: + ishape = [batch_size, 1, 1, 1024] + oshape = [batch_size, 1000] + else: + ishape = [batch_size, 1024, 1, 1] + oshape = [batch_size, 1000] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + a0 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, [1024, 1000]) + a1 = helper.make_tensor_value_info("a2", TensorProto.FLOAT, []) + a2 = helper.make_tensor_value_info("a3", TensorProto.FLOAT, [1000]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + flatten_node = helper.make_node("Flatten", ["inp"], ["flatten_out"]) + matmul_node = helper.make_node("MatMul", ["flatten_out", "a0"], ["matmul_out"]) + mul_node = helper.make_node("Mul", ["matmul_out", "a1"], ["mul_out"]) + add_node = helper.make_node("Add", ["mul_out", "a2"], ["outp"]) + + graph = helper.make_graph( + nodes=[flatten_node, matmul_node, mul_node, add_node], + name="move-reshape-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0, a1, a2], + ) + + model = helper.make_model(graph, producer_name="move_reshape_model") + model = ModelWrapper(model) + + # initialize values + a0_values = gen_finn_dt_tensor(DataType.TERNARY, [1024, 1000]) + model.set_initializer("a0", a0_values) + a1_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) + model.set_initializer("a1", a1_values) + a2_values = np.random.uniform(low=-1, high=1, size=(1000)).astype(np.float32) + model.set_initializer("a2", a2_values) + + model.set_tensor_datatype("inp", DataType.INT2) + model.set_tensor_layout("inp", data_layout) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + # compare execution before and after transformation + inp_values = gen_finn_dt_tensor(DataType.INT2, ishape) + idict = {model.graph.input[0].name: inp_values} + model_transformed = model.transform(MoveFlattenPastAffine()) + assert oxe.compare_execution(model, model_transformed, idict) + + # depending on data layout check if graph is transformed or not + if data_layout == DataLayout.NHWC: + # check if nodes have new order in transformed graph + assert model.graph != model_transformed.graph + assert model_transformed.graph.node[-1].op_type == "Flatten" + else: + assert model.graph == model_transformed.graph diff --git a/tests/transformation/test_move_flatten_past_topk.py b/tests/transformation/test_move_flatten_past_topk.py new file mode 100644 index 0000000000000000000000000000000000000000..65da92c22dbe9f6b1c5a49172ffae59fa6e98607 --- /dev/null +++ b/tests/transformation/test_move_flatten_past_topk.py @@ -0,0 +1,89 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +from finn.core.datatype import DataType +import finn.core.data_layout as DataLayout +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.insert_topk import InsertTopK +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.reorder import MoveFlattenPastTopK +import finn.core.onnx_exec as oxe + +# data layout +@pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW]) +# batch size +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_move_flatten_past_affine(data_layout, batch_size): + if data_layout == DataLayout.NHWC: + ishape = [batch_size, 1, 1, 1024] + oshape = [batch_size, 1024] + else: + ishape = [batch_size, 1024, 1, 1] + oshape = [batch_size, 1024] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + flatten_node = helper.make_node("Flatten", ["inp"], ["outp"]) + + graph = helper.make_graph( + nodes=[flatten_node], name="move-flatten-graph", inputs=[inp], outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="move_flatten_model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", DataType.INT2) + model.set_tensor_layout("inp", data_layout) + model = model.transform(InsertTopK()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + # compare execution before and after transformation + inp_values = gen_finn_dt_tensor(DataType.INT2, ishape) + idict = {model.graph.input[0].name: inp_values} + model_transformed = model.transform(MoveFlattenPastTopK()) + assert oxe.compare_execution(model, model_transformed, idict) + + # depending on data layout check if graph is transformed or not + if data_layout == DataLayout.NHWC: + # check if nodes have new order in transformed graph + assert model.graph != model_transformed.graph + assert model_transformed.graph.node[-1].op_type == "Flatten" + else: + assert model.graph == model_transformed.graph diff --git a/tests/transformation/test_move_maxpool_past_multithreshold.py b/tests/transformation/test_move_maxpool_past_multithreshold.py new file mode 100644 index 0000000000000000000000000000000000000000..2fc19debf8d6fc89d15e3d731f1e54daa491c321 --- /dev/null +++ b/tests/transformation/test_move_maxpool_past_multithreshold.py @@ -0,0 +1,100 @@ +from onnx import TensorProto, helper +import numpy as np + +import finn.core.onnx_exec as oxe +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes + + +def get_multithreshold_rand_params(channels, num_of_thres, seed=None): + if seed is not None: + np.random.seed(seed) + steps = np.random.rand(channels, 1) * 2 + bias = np.random.rand(channels, 1) * 10 + thres = [np.arange(num_of_thres) for chn in range(channels)] + thres = ((thres - bias) * steps).astype(np.float32) + return thres + + +def test_move_maxpool_past_multithreshold(): + # generate test vectors of correct shape + ch = 64 + ifmdim = 16 + ofmdim = 16 // 4 + input_shape = (1, ch, ifmdim, ifmdim) + output_shape = (1, ch, ofmdim, ofmdim) + + top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) + + maxpool_config = {} + maxpool_config["pads"] = [1, 1, 1, 1] + maxpool_config["kernel_shape"] = [3, 3] + maxpool_config["strides"] = [2, 2] + + value_info = [] + thres1_shape = [1, 1] + value_info += [ + helper.make_tensor_value_info("thres1", TensorProto.FLOAT, thres1_shape) + ] + + thres2_shape = [ch, 14] + value_info += [ + helper.make_tensor_value_info("thres2", TensorProto.FLOAT, thres2_shape) + ] + + nodes = [] + nodes += [helper.make_node("MaxPool", ["top_in"], ["t1"], **maxpool_config)] + nodes += [ + helper.make_node( + "MultiThreshold", + ["t1", "thres1"], + ["t2"], + domain="finn", + out_dtype="BIPOLAR", + out_bias=-1.0, + out_scale=1.0, + ) + ] + nodes += [helper.make_node("MaxPool", ["t2"], ["t3"], **maxpool_config)] + nodes += [ + helper.make_node( + "MultiThreshold", + ["t3", "thres2"], + ["top_out"], + domain="finn", + out_dtype="UINT4", + ) + ] + + modelproto = helper.make_model( + helper.make_graph( + name="test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=nodes, + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + model.set_initializer("thres1", np.array([[0]])) + model.set_initializer( + "thres2", get_multithreshold_rand_params(*thres2_shape, seed=0) + ) + + # Transform + new_model = model.transform(MoveMaxPoolPastMultiThreshold()) + inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} + + # Test + assert oxe.compare_execution(model, new_model, inp_dict) + assert new_model.graph.node[0].op_type == "MaxPool" + assert new_model.graph.node[1].op_type == "MultiThreshold" + assert new_model.graph.node[2].op_type == "MultiThreshold" + assert new_model.graph.node[3].op_type == "MaxPool" + assert len(new_model.graph.node) == 4 diff --git a/tests/transformation/test_move_mul_past_dw_conv.py b/tests/transformation/test_move_mul_past_dw_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..1ae8fbfe89986d58d3d71f5f8735a98469d9d1e3 --- /dev/null +++ b/tests/transformation/test_move_mul_past_dw_conv.py @@ -0,0 +1,93 @@ +import pytest + +from onnx import helper, TensorProto +from finn.custom_op.im2col import compute_conv_output_dim +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_shapes import InferShapes +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.streamline.reorder import MoveMulPastDWConv + + +# input dimension +@pytest.mark.parametrize("ifm_dim", [4, 7]) +# input channels +@pytest.mark.parametrize("ifm_ch", [2, 3]) +# kernel size +@pytest.mark.parametrize("k", [2, 3]) +# stride +@pytest.mark.parametrize("stride", [1, 2]) +# padding +@pytest.mark.parametrize("pad_amt", [0, 1]) +# depthwise +@pytest.mark.parametrize("dw", [0, 1]) +def test_move_mul_past_dw_conv(ifm_dim, ifm_ch, k, stride, pad_amt, dw): + if dw == 1: + ofm_ch = ifm_ch + groups = ifm_ch + W_shape = [ofm_ch, 1, k, k] + else: + ofm_ch = ifm_ch + 2 + groups = 1 + W_shape = [ofm_ch, ifm_ch, k, k] + + ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad_amt) + + # set up onnx model + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim] + ) + mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, [1, ifm_ch, 1, 1]) + W = helper.make_tensor_value_info("W", TensorProto.FLOAT, W_shape) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim] + ) + + Mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"]) + + Conv_node = helper.make_node( + "Conv", + ["mul_out", "W"], + ["outp"], + group=groups, + kernel_shape=[k, k], + pads=[pad_amt, pad_amt, pad_amt, pad_amt], + strides=[stride, stride], + ) + + graph = helper.make_graph( + nodes=[Mul_node, Conv_node], + name="mulpastconv_graph", + inputs=[inp], + outputs=[outp], + value_info=[mul, W], + ) + + model = helper.make_model(graph, producer_name="mulpastconv-model") + model = ModelWrapper(model) + inp_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, ifm_dim, ifm_dim]) + mul_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, 1, 1]) + W_values = gen_finn_dt_tensor(DataType.INT2, W_shape) + model.set_initializer("W", W_values) + model.set_initializer("mul", mul_values) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + idict = {"inp": inp_values} + odict = oxe.execute_onnx(model, idict, True) + out_before = odict["outp"] + + # move channelwise multiplication past depthwise conv + model_transformed = model.transform(MoveMulPastDWConv()) + odict = oxe.execute_onnx(model_transformed, idict, True) + out_after = odict["outp"] + + assert (out_before == out_after).all() + + if dw == 0: + assert model.graph.node[0].op_type == model_transformed.graph.node[0].op_type + assert model.graph.node[1].op_type == model_transformed.graph.node[1].op_type + else: + assert model.graph.node[0].op_type == model_transformed.graph.node[1].op_type + assert model.graph.node[1].op_type == model_transformed.graph.node[0].op_type diff --git a/tests/transformation/test_move_scalar_past_conv.py b/tests/transformation/test_move_scalar_past_conv.py index 0f50642d2b9d1583030630cb4927c2b86667e71a..94fee7907d1ed1cccbf95520e903c7d9b43d8f7d 100644 --- a/tests/transformation/test_move_scalar_past_conv.py +++ b/tests/transformation/test_move_scalar_past_conv.py @@ -7,14 +7,14 @@ import finn.core.onnx_exec as ox from finn.core.modelwrapper import ModelWrapper from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import ( - MoveScalarAddPastConv, + MoveAddPastConv, MoveScalarMulPastConv, ) @pytest.mark.parametrize("padding", [False, True]) @pytest.mark.parametrize( - "test_args", [("Add", MoveScalarAddPastConv()), ("Mul", MoveScalarMulPastConv())], + "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())], ) def test_move_scalar_past_conv(test_args, padding): scalar_op = test_args[0] @@ -83,8 +83,8 @@ def test_move_scalar_past_conv(test_args, padding): assert new_model.graph.node[2].op_type == "Conv" else: assert new_model.graph.node[0].op_type == "Conv" - assert new_model.graph.node[1].op_type == scalar_op - assert new_model.graph.node[2].op_type == "Conv" + assert new_model.graph.node[1].op_type == "Conv" + assert new_model.graph.node[2].op_type == scalar_op else: assert new_model.graph.node[0].op_type == "Conv" assert new_model.graph.node[1].op_type == "Conv" @@ -92,7 +92,7 @@ def test_move_scalar_past_conv(test_args, padding): @pytest.mark.parametrize( - "test_args", [("Add", MoveScalarAddPastConv()), ("Mul", MoveScalarMulPastConv())], + "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())], ) def test_move_scalar_past_conv_only_if_linear(test_args): scalar_op = test_args[0] diff --git a/tests/transformation/test_move_transpose_past_scalar_mul.py b/tests/transformation/test_move_transpose_past_scalar_mul.py new file mode 100644 index 0000000000000000000000000000000000000000..e434fc7d4f683120176e18a2bfa9da99d9ee0b0e --- /dev/null +++ b/tests/transformation/test_move_transpose_past_scalar_mul.py @@ -0,0 +1,82 @@ +import pytest + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +import finn.core.data_layout as DataLayout +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.reorder import MoveTransposePastScalarMul +import finn.core.onnx_exec as oxe + +# permutation of transpose node +@pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]]) +# scalar mul +@pytest.mark.parametrize("scalar", [True, False]) +# data layout +@pytest.mark.parametrize("data_layout", [None, DataLayout.NHWC, DataLayout.NCHW]) +def test_move_transpose_past_scalar_mul(perm, scalar, data_layout): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 2, 3, 4]) + # to determine out_size we need to calculate with "perm" for this test case + dummy_in = np.random.uniform(low=0, high=1, size=(1, 2, 3, 4)).astype(np.float32) + out_size = dummy_in.transpose(tuple(perm)).shape + + if scalar is True: + a0_size = [] + else: + a0_size = out_size + a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, a0_size) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_size) + transp_node = helper.make_node("Transpose", ["inp"], ["transp_out"], perm=perm) + mul_node = helper.make_node("Mul", ["transp_out", "a0"], ["outp"]) + + graph = helper.make_graph( + nodes=[transp_node, mul_node], + name="mv-transpose-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0], + ) + + model = helper.make_model(graph, producer_name="mv_transpose_model") + model = ModelWrapper(model) + + # initialize values + a0_values = np.random.uniform(low=0, high=1, size=tuple(a0_size)).astype(np.float32) + model.set_initializer("a0", a0_values) + if data_layout is not None: + model.set_tensor_layout("inp", data_layout) + model = model.transform(InferDataLayouts()) + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + # compare execution before and after transformation + inp_values = np.random.uniform(low=0, high=1, size=(1, 2, 3, 4)).astype(np.float32) + idict = {model.graph.input[0].name: inp_values} + model_transformed = model.transform(MoveTransposePastScalarMul()) + assert oxe.compare_execution(model, model_transformed, idict) + + # check if order changed + if scalar is True and data_layout is not None: + assert model_transformed.graph.node[0] != model.graph.node[0] + assert model_transformed.graph.node[1] != model.graph.node[1] + assert model_transformed.graph.node[0].op_type == "Mul" + assert model_transformed.graph.node[1].op_type == "Transpose" + mul_input = model_transformed.graph.node[0].input[0] + mul_output = model_transformed.graph.node[0].output[0] + assert model_transformed.get_tensor_layout(mul_input) == data_layout + assert model_transformed.get_tensor_layout(mul_output) == data_layout + else: + assert model_transformed.graph.node[0] == model.graph.node[0] + assert model_transformed.graph.node[1] == model.graph.node[1] + if data_layout is not None: + mul_input = model_transformed.graph.node[1].input[0] + mul_output = model_transformed.graph.node[1].output[0] + assert model_transformed.get_tensor_layout(mul_input) != data_layout + assert model_transformed.get_tensor_layout(mul_output) != data_layout diff --git a/tests/transformation/test_remove_identity_ops.py b/tests/transformation/test_remove_identity_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..536c1ab0b48fa44388da23f45b528da3c5f3b2f2 --- /dev/null +++ b/tests/transformation/test_remove_identity_ops.py @@ -0,0 +1,81 @@ +import pytest + +import numpy as np +from onnx import helper, TensorProto +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline.remove import RemoveIdentityOps +from finn.util.basic import gen_finn_dt_tensor + + +def insert_identity_op(model, op): + if op in ["Add", "Sub"]: + val = np.asarray([0.0], dtype=np.float32) + elif op in ["Mul", "Div"]: + val = np.asarray([1.0], dtype=np.float32) + else: + return + + identity_node = helper.make_node(op, ["div_out", "value"], ["ident_out"]) + graph = model.graph + graph.node.insert(3, identity_node) + graph.node[-1].input[0] = "ident_out" + model.set_initializer("value", val) + + return model + + +# identity operations to be inserted +@pytest.mark.parametrize("op", ["Add", "Sub", "Mul", "Div"]) +def test_remove_identity_ops(op): + + # set up onnx model + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1]) + mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, []) + shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2]) + div = helper.make_tensor_value_info("div", TensorProto.FLOAT, []) + matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2]) + + mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"]) + reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"]) + div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"]) + matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"]) + + graph = helper.make_graph( + nodes=[mul_node, reshape_node, div_node, matmul_node], + name="identity-graph", + inputs=[inp], + outputs=[outp], + value_info=[mul, shape, div, matmul], + ) + + model = helper.make_model(graph, producer_name="mulpastconv-model") + model = ModelWrapper(model) + inp_values = gen_finn_dt_tensor(DataType.INT2, [1, 4, 1, 1]) + mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) + shape_values = np.asarray([1, -1], dtype=np.int64) + div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) + matmul_values = gen_finn_dt_tensor(DataType.INT2, [4, 2]) + model.set_initializer("mul", mul_values) + model.set_initializer("shape", shape_values) + model.set_initializer("div", div_values) + model.set_initializer("matmul", matmul_values) + insert_identity_op(model, op) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + idict = {"inp": inp_values} + odict = oxe.execute_onnx(model, idict) + out_before = odict["outp"] + num_of_nodes_before = len(model.graph.node) + + model = model.transform(RemoveIdentityOps()) + num_of_nodes_after = len(model.graph.node) + assert num_of_nodes_before - 1 == num_of_nodes_after + + odict = oxe.execute_onnx(model, idict) + out_after = odict["outp"] + assert (out_before == out_after).all() diff --git a/tests/transformation/test_sign_to_thres.py b/tests/transformation/test_sign_to_thres.py index b10840df37a695986e54c0bdaa68baa0538f90f2..a92f839e5f6ca8b45eadf939fa35973ac153e0b1 100644 --- a/tests/transformation/test_sign_to_thres.py +++ b/tests/transformation/test_sign_to_thres.py @@ -40,8 +40,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import ConvertSignToThres from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" -transformed_onnx_path = "test_output_lfc_transformed.onnx" +export_onnx_path = "test_sign_to_thres.onnx" def test_sign_to_thres(): diff --git a/tests/transformation/test_sort_graph.py b/tests/transformation/test_sort_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..05842504c13b144bb34e8084fb12b5086fa84115 --- /dev/null +++ b/tests/transformation/test_sort_graph.py @@ -0,0 +1,150 @@ +from onnx import TensorProto, helper +import numpy as np + +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.general import SortGraph +from finn.transformation.infer_shapes import InferShapes +import pytest +import finn.analysis.topology as ta + + +def make_randomly_sorted_linear_model(num_of_nodes, seed=None): + if seed is not None: + np.random.seed(seed) + + ch = 2 + ifmdim = 16 + input_shape = (1, ch, ifmdim, ifmdim) + + top_in = helper.make_tensor_value_info("t0", TensorProto.FLOAT, input_shape) + top_out = helper.make_tensor_value_info( + "t" + str(num_of_nodes), TensorProto.FLOAT, input_shape + ) + + value_info = [] + nodes = [] + for i in range(num_of_nodes): + nodes += [ + helper.make_node("Add", ["t" + str(i), "p" + str(i)], ["t" + str(i + 1)]) + ] + value_info += [ + helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape) + ] + + nodes = np.random.permutation(nodes) + + modelproto = helper.make_model( + helper.make_graph( + name="test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=nodes, + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + + for i in range(num_of_nodes): + model.set_initializer( + "p" + str(i), np.random.rand(*input_shape).astype(np.float32) + ) + + return model + + +@pytest.mark.parametrize("num_of_nodes", [64]) +def test_sort_linear_graph(num_of_nodes): + model = make_randomly_sorted_linear_model(num_of_nodes, seed=0) + new_model = model.transform(SortGraph()) + + # Test + ret = new_model.analysis(ta.nodes_topologically_sorted) + assert ret["nodes_topologically_sorted"], "Nodes are not topologically sorted." + + +def test_sort_nonlinear_graph(): + ch = 2 + ifmdim = 16 + input_shape = (1, ch, ifmdim, ifmdim) + + top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) + + num_of_params = 8 + value_info = [] + for i in range(num_of_params): + value_info += [ + helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape) + ] + + modelproto = helper.make_model( + helper.make_graph( + name="test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=[ + # Not sorted nodes + helper.make_node("Mul", ["fork1", "p2"], ["t3"]), + helper.make_node("Add", ["t4", "p3"], ["t5"]), + helper.make_node("Add", ["t2", "t3"], ["t4"]), + helper.make_node("Add", ["t6", "t7"], ["t8"]), + helper.make_node("Add", ["fork3", "fork3"], ["top_out"]), + helper.make_node("Mul", ["t5", "p4"], ["fork2"]), + helper.make_node("Add", ["top_in", "p0"], ["fork1"]), + helper.make_node("Mul", ["fork1", "p1"], ["t2"]), + helper.make_node("Add", ["fork2", "p5"], ["t6"]), + helper.make_node("Add", ["fork2", "p6"], ["t7"]), + helper.make_node("Mul", ["t8", "p7"], ["fork3"]), + ], + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + + np.random.seed(0) + for i in range(num_of_params): + model.set_initializer( + "p" + str(i), np.random.rand(*input_shape).astype(np.float32) + ) + + new_model = model.transform(SortGraph()) + + # Test + ret = new_model.analysis(ta.nodes_topologically_sorted) + assert ret["nodes_topologically_sorted"], "Nodes are not topologically sorted." + + +if __name__ == "__main__": + import time + + sizes = [10, 50, 100, 500, 1000] + times = [] + reps = 10 + + print("SortGraph performance test:") + print("Test sizes", sizes) + print("Repetitions per size:", reps) + for sz in sizes: + acc_time = 0 + print(" Testing size ", sz) + for i in range(reps): + # it should take the same time even with the sorted one + # but better new model each time as it is a more general approach + model = make_randomly_sorted_linear_model(sz) # new model as seed is None + bef = time.time() + new_model = model.transform(SortGraph(), make_deepcopy=False) + acc_time += time.time() - bef + + times += [acc_time / reps] + + # print csv + print("\nnum_of_nodes, seconds") + for sz, tm in zip(sizes, times): + print("{:12d}, {:6.4e}".format(sz, tm)) + + # plot + # import matplotlib.pyplot as plt + # plt.plot(sizes,times,"--o") + # plt.grid(True) diff --git a/tests/transformation/test_topk_insert.py b/tests/transformation/test_topk_insert.py index 1af0f255d8fb1af8a6e571518f18d831aa71298b..a18e63384150f140cb63ec7b438283eb4797266c 100644 --- a/tests/transformation/test_topk_insert.py +++ b/tests/transformation/test_topk_insert.py @@ -18,7 +18,7 @@ from pkgutil import get_data import pytest -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_topk_insert.onnx" @pytest.mark.parametrize("k", [1, 5, 10]) diff --git a/tests/util/test_create.py b/tests/util/test_create.py new file mode 100644 index 0000000000000000000000000000000000000000..7173add35abf04a35c33b0ef10b42ffdb296a653 --- /dev/null +++ b/tests/util/test_create.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +import finn.util.create as create +from finn.core.datatype import DataType + + +@pytest.mark.parametrize("bitwidth", [DataType.BIPOLAR, DataType.INT2, DataType.INT4]) +def test_hls_random_mlp_maker(bitwidth): + w = bitwidth + a = bitwidth + layer_spec = [ + { + "mw": 185, + "mh": 100, + "simd": 185, + "pe": 100, + "idt": DataType.BIPOLAR, + "wdt": w, + "act": a, + }, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + { + "mw": 100, + "mh": 1, + "simd": 100, + "pe": 1, + "idt": a, + "wdt": w, + "act": DataType.BIPOLAR, + }, + ] + + ret = create.hls_random_mlp_maker(layer_spec) + assert len(ret.graph.node) == 5 + ret.save("mlp-%s.onnx" % str(bitwidth))