diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml index cd59a629405c748187cdf478c0bdb0694c58c79f..924fbd24a174df49af4b3e259ad57d0a7907d42b 100644 --- a/.github/workflows/quicktest-dev-pr.yml +++ b/.github/workflows/quicktest-dev-pr.yml @@ -18,4 +18,6 @@ jobs: uses: actions/checkout@v2 - name: DockerRunQuicktest + env: + NUM_DEFAULT_WORKERS: 4 run: sh run-docker.sh quicktest diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci index 5772b16abc8b927def1e2dfbbb8193a2f964f87d..d06ff8521555ccd6d09383cab039850f1565fc61 100644 --- a/docker/Dockerfile.finn_ci +++ b/docker/Dockerfile.finn_ci @@ -61,6 +61,8 @@ RUN git clone --branch $FINN_CI_BRANCH https://github.com/Xilinx/finn /workspace RUN pip install -r /workspace/finn/requirements.txt RUN apt update; apt install nano RUN pip install pytest-dependency +RUN pip install pytest-xdist +RUN pip install pytest-parallel ENV PYTHONPATH "${PYTHONPATH}:/workspace/finn/src" ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator" diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev index 0e12b504a26ccdb8fd78e162f04cfdeab5a186f1..f8919d7498e0e8ef08a52d1da0782988b56d6df4 100644 --- a/docker/Dockerfile.finn_dev +++ b/docker/Dockerfile.finn_dev @@ -55,6 +55,8 @@ RUN pip install matplotlib RUN pip install pytest-dependency RUN pip install sphinx RUN pip install sphinx_rtd_theme +RUN pip install pytest-xdist +RUN pip install pytest-parallel # switch user RUN groupadd -g $GID $GNAME diff --git a/docker/Jenkinsfile b/docker/Jenkinsfile index 80be261fb3da057186259598f84d915176577a5d..2215bc79cc7b2c20036d882fdc654fbe8721cab6 100644 --- a/docker/Jenkinsfile +++ b/docker/Jenkinsfile @@ -9,7 +9,12 @@ pipeline { string(name: 'PYNQ_PASSWORD', defaultValue: 'xilinx', description: 'PYNQ board password') string(name: 'PYNQ_TARGET_DIR', defaultValue: '/home/xilinx/finn', description: 'PYNQ board target deployment directory') string(name: 'NUM_DEFAULT_WORKERS', defaultValue: '1', description: 'Number of cores for parallel transformations') - string(name: 'DOCKER_CMD', defaultValue: """python setup.py test""", description: 'Command to run') + // main test: everything except rtlsim and end2end tests, parallel run with xdist, no parallel transformations to save on memory + string(name: 'DOCKER_CMD_MAIN', defaultValue: """python setup.py test --addopts "-k 'not (rtlsim or end2end)' --dist=loadfile -n auto" """, description: 'Main test command') + // rtlsim tests: parallel run with pytest-parallel, no parallel transformations to save on memory + string(name: 'DOCKER_CMD_RTLSIM', defaultValue: """python setup.py test --addopts "-k rtlsim --workers auto" """, description: 'rtlsim test command') + // end2end tests: no parallel testing, use NUM_DEFAULT_WORKERS for parallel transformations + string(name: 'DOCKER_CMD_END2END', defaultValue: """python setup.py test --addopts "-k end2end" """, description: 'end2end test command') } environment { DOCKER_TAG='finn_ci:$BUILD_ID' @@ -32,10 +37,49 @@ pipeline { """ } } - stage('Test') { + stage('test-main') { steps { + catchError { sh """ - docker run --name $DOCKER_INST_NAME --init \ + docker run --init \ + --hostname $DOCKER_INST_NAME \ + -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ + -e NUM_DEFAULT_WORKERS=1 \ + -e FINN_INST_NAME=$DOCKER_INST_NAME \ + -e VIVADO_PATH=${params.VIVADO_PATH} \ + -e PYNQ_BOARD=${params.PYNQ_BOARD} \ + -e PYNQ_IP=${params.PYNQ_IP} \ + -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ + -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ + -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ + $DOCKER_TAG ${params.DOCKER_CMD_MAIN} + """} + } + } + stage('test-rtlsim') { + steps { + catchError { + sh """ + docker run --init \ + --hostname $DOCKER_INST_NAME \ + -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ + -e NUM_DEFAULT_WORKERS=1 \ + -e FINN_INST_NAME=$DOCKER_INST_NAME \ + -e VIVADO_PATH=${params.VIVADO_PATH} \ + -e PYNQ_BOARD=${params.PYNQ_BOARD} \ + -e PYNQ_IP=${params.PYNQ_IP} \ + -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ + -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ + -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ + $DOCKER_TAG ${params.DOCKER_CMD_RTLSIM} + """} + } + } + stage('test-end2end') { + steps { + catchError { + sh """ + docker run --init \ --hostname $DOCKER_INST_NAME \ -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ -e NUM_DEFAULT_WORKERS=${params.NUM_DEFAULT_WORKERS} \ @@ -46,8 +90,8 @@ pipeline { -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \ -e PYNQ_PASSWORD=${params.PYNQ_PASSWORD} \ -e PYNQ_TARGET_DIR=${params.PYNQ_TARGET_DIR} \ - $DOCKER_TAG ${params.DOCKER_CMD} - """ + $DOCKER_TAG ${params.DOCKER_CMD_END2END} + """ } } } } diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 132d5bdaa286ba3e50bbd06971e9139f5859ef11..b312737c317517ca0ab19c74cf22284b5977b661 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -15,7 +15,7 @@ gecho () { # the repos themselves are cloned in the Dockerfile BREVITAS_COMMIT=f9a27226d4acf1661dd38bc449f71f89e0983cce CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 -HLSLIB_COMMIT=8aed899c278c36c977a249558d71795086cf852c +HLSLIB_COMMIT=8f9f2018762f654f196b666838aeaf6fc730ad9a PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada diff --git a/docker/quicktest.sh b/docker/quicktest.sh index 4f6a2d3e230de9fcbb947d794722294880a7730d..49b7886836ac4e45dad856dfcd49223276bd831a 100755 --- a/docker/quicktest.sh +++ b/docker/quicktest.sh @@ -1,4 +1,22 @@ #!/bin/bash +: ${PYTEST_PARALLEL=auto} + cd $FINN_ROOT -python setup.py test --addopts "-m 'not (vivado or slow)'" + +# check if command line argument is empty or not present +if [ -z $1 ]; then + echo "Running quicktest: not (vivado or slow) with pytest-xdist" + python setup.py test --addopts "-m 'not (vivado or slow)' --dist=loadfile -n $PYTEST_PARALLEL" +elif [ $1 = "main" ]; then + echo "Running main test suite: not (rtlsim or end2end) with pytest-xdist" + python setup.py test --addopts "-k not (rtlsim or end2end) --dist=loadfile -n $PYTEST_PARALLEL" +elif [ $1 = "rtlsim" ]; then + echo "Running rtlsim test suite with pytest-parallel" + python setup.py test --addopts "-k rtlsim --workers $PYTEST_PARALLEL" +elif [ $1 = "end2end" ]; then + echo "Running end2end test suite with no parallelism" + python setup.py test --addopts "-k end2end" +else + echo "Unrecognized argument to quicktest.sh" +fi diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst index f4fa7a13dcbe4fe8ab9667a111df00c605747710..8b20cebcfc49d14d0afbb26edd678d65425476d3 100644 --- a/docs/finn/getting_started.rst +++ b/docs/finn/getting_started.rst @@ -73,8 +73,12 @@ from the FINN root directory* as follows: python setup.py test --addopts "-k test_end2end_tfc_w1a2" -Please see the pytest documentation for more about picking tests by marks or -by name. +Finally, if you want to run tests in parallel (e.g. to take advantage of a multi-core CPU) +you can use: + * pytest-parallel for any rtlsim tests, e.g. `python setup.py test --addopts "-k rtlsim --workers auto"` + * pytest-xdist for anything else, make sure to add `--dist=loadfile` if you have tests in the same file that have dependencies on each other e.g. `python setup.py test --addopts "-k mytest -n auto --dist=loadfile"` + +Please see the pytest documentation for more about picking tests by marks or by name. Environment variables ********************** diff --git a/notebooks/end2end_example/tfc_end2end_example.ipynb b/notebooks/end2end_example/tfc_end2end_example.ipynb index d573061487de204084e0d3242da8ad1b791f44d8..c84efc964b1f57b7ed385521fc5214fdc2396590 100644 --- a/notebooks/end2end_example/tfc_end2end_example.ipynb +++ b/notebooks/end2end_example/tfc_end2end_example.ipynb @@ -132,7 +132,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7f8890385828>" + "<IPython.lib.display.IFrame at 0x7f7cc4290940>" ] }, "execution_count": 3, @@ -293,7 +293,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe1ad0639e8>" + "<IPython.lib.display.IFrame at 0x7f7c6c567f28>" ] }, "execution_count": 6, @@ -333,9 +333,10 @@ " ConvertDivToMul(),\n", " BatchNormToAffine(),\n", " ConvertSignToThres(),\n", + " AbsorbSignBiasIntoMultiThreshold(),\n", " MoveAddPastMul(),\n", " MoveScalarAddPastMatMul(),\n", - " MoveScalarAddPastConv(),\n", + " MoveAddPastConv(),\n", " MoveScalarMulPastMatMul(),\n", " MoveScalarMulPastConv(),\n", " MoveAddPastMul(),\n", @@ -350,6 +351,7 @@ " ]\n", " for trn in streamline_transformations:\n", " model = model.transform(trn)\n", + " model = model.transform(RemoveIdentityOps())\n", " model = model.transform(GiveUniqueNodeNames())\n", " model = model.transform(GiveReadableTensorNames())\n", " model = model.transform(InferDataTypes())\n", @@ -400,7 +402,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe1346e4ef0>" + "<IPython.lib.display.IFrame at 0x7f7c6c0bf898>" ] }, "execution_count": 8, @@ -454,7 +456,7 @@ " " ], "text/plain": [ - "<IPython.lib.display.IFrame at 0x7fe1346f7780>" + "<IPython.lib.display.IFrame at 0x7f7c6c0e5c18>" ] }, "execution_count": 9, diff --git a/src/finn/custom_op/__init__.py b/src/finn/custom_op/__init__.py index ab6e03bee65b8bf5c4041dd8021b1a561e7673d2..4ae7b9ebffaab6ca6be04b8d73f647b2db22dc78 100644 --- a/src/finn/custom_op/__init__.py +++ b/src/finn/custom_op/__init__.py @@ -56,8 +56,15 @@ class CustomOp(ABC): ret = ret.decode("utf-8") return ret else: - # not set, return default value - return def_val + if req: + raise Exception( + """Required attribute %s unspecified in + a %s node""" + % (name, self.onnx_node.op_type) + ) + else: + # not set, return default value + return def_val except KeyError: raise AttributeError("Op has no such attribute: " + name) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 14ba1b813c8d61145f7d221deee9c184aeb9bddc..71c731f96ca45519c443a5f932ead050770e17de 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -88,6 +88,8 @@ class HLSCustomOp(CustomOp): "res_hls": ("s", False, ""), "res_synth": ("s", False, ""), "rtlsim_so": ("s", False, ""), + # partitioning info + "partition_id": ("i", False, 0), # input and output FIFO depths "inFIFODepth": ("i", False, 2), "outFIFODepth": ("i", False, 2), diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..ad68a4bde29123b2498ac7789048bcd2e13bf3bc --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py @@ -0,0 +1,576 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from math import ceil +import os + +import numpy as np + +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.util.data_packing import ( + npy_to_rtlsim_input, + numpy_to_hls_code, + rtlsim_output_to_npy, +) +from . import templates + +# ONNX i/o tensor shape assumptions for channelwise ops: +# input 0 is the input tensor, shape (..., NumChannels) +# input 1 is the channelwise parameter tensor, shape (NumChannels, params_per_channel) +# output 0 is the output tensor, shape (..., NumChannels) - same as input +# the ... here can be any shape (representing groups of vectors) + + +class ChannelwiseOp_Batch(HLSCustomOp): + """Class that corresponds to finn-hls Thresholding_Batch function. + It can implement a variety of channel-wise parametrized operations, + including Add, Mul and multi-thresholding. + """ + + def __init__(self, onnx_node): + super().__init__(onnx_node) + self.decoupled_wrapper = templates.decoupled_wrapper + + def get_nodeattr_types(self): + my_attrs = { + # channelwise "map" function to apply: + # one of cmp_le, cmp_ge, add, mul + "Func": ("s", False, "cmp_le"), + "PE": ("i", True, 0), + "NumChannels": ("i", True, 0), + # string defining memory resource type for parameters + "ram_style": ("s", False, "distributed"), + # FINN DataTypes for inputs, weights, outputs + "inputDataType": ("s", True, ""), + "paramDataType": ("s", True, ""), + "outputDataType": ("s", True, ""), + # input and output FIFO depths + "inFIFODepth": ("i", False, 0), + "outFIFODepth": ("i", False, 0), + # number of input vectors, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def calc_tmem(self): + """Calculates and returns TMEM, the depth of the memory used + to store the channelwise op parameters.""" + chn = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + return chn // pe + + def make_shape_compatible_op(self, model): + oshape = self.get_normal_output_shape() + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # check input datatype against property + idt_name = self.get_input_datatype().name + exp_idt_name = self.get_nodeattr("inputDataType") + assert exp_idt_name == idt_name, "Bad input DataType for ChannelwiseOp layer" + # TODO: dynamically infer/update odt based on idt as done in ConvertToHLSLayers? + # set output datatype from property + odt = self.get_output_datatype() + model.set_tensor_datatype(node.output[0], odt) + + def verify_node(self): + info_messages = [] + # verify that "domain" is set to "finn" + domain_value = self.onnx_node.domain + if domain_value == "finn": + info_messages.append("Attribute domain is set correctly") + else: + info_messages.append('Attribute domain should be set to "finn"') + + # verify that "backend" is set to "fpgadataflow" + backend_value = self.get_nodeattr("backend") + if backend_value == "fpgadataflow": + info_messages.append("Attribute backend is set correctly") + else: + info_messages.append('Attribute backend should be set to "fpgadataflow"') + + # verify that all necessary attributes exist + # TODO collect automatically from get_nodeattr_types + try: + self.get_nodeattr("code_gen_dir_cppsim") + self.get_nodeattr("executable_path") + self.get_nodeattr("NumChannels") + self.get_nodeattr("PE") + self.get_nodeattr("inputDataType") + self.get_nodeattr("paramDataType") + self.get_nodeattr("outputDataType") + info_messages.append("All necessary attributes exist") + except Exception: + info_messages.append( + """The required Threshold_Batch attributes do not exist.""" + ) + + return info_messages + + def bram_estimation(self): + """Calculates BRAM cost if resource set to BRAM""" + style = self.get_nodeattr("ram_style") + P = self.get_nodeattr("PE") + idt = self.get_input_datatype() + A = idt.bitwidth() + tmem = self.calc_tmem() + + if style == "block" and tmem > 1: + return int(ceil(A * P / 16)) * int(ceil(tmem / 1024)) + else: + return 0 + + def lut_estimation(self): + """Calculates LUT cost, taking memory resource type into account """ + # TODO add in/out FIFO contributions + style = self.get_nodeattr("ram_style") + P = self.get_nodeattr("PE") + idt = self.get_input_datatype() + A = idt.bitwidth() + tmem = self.calc_tmem() + # cost of comparators + comparator_cost = A * P + # cost of LUTRAM + if style == "distributed" and tmem > 1: + lutram_cost = P * A * int(ceil(tmem / 64)) + else: + lutram_cost = 0 + # total cost + return comparator_cost + lutram_cost + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self): + """Returns FINN DataType of output.""" + return DataType[self.get_nodeattr("outputDataType")] + + def get_instream_width(self): + i_bits = self.get_input_datatype().bitwidth() + return i_bits * self.get_nodeattr("PE") + + def get_outstream_width(self): + o_bits = self.get_output_datatype().bitwidth() + return o_bits * self.get_nodeattr("PE") + + def get_folded_input_shape(self): + ich = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + fold = ich // pe + vecs = list(self.get_nodeattr("numInputVectors")) + folded_input_shape = tuple(vecs + [fold, pe]) + return folded_input_shape + + def get_folded_output_shape(self): + # same shape as input + return self.get_folded_input_shape() + + def get_normal_input_shape(self): + ich = self.get_nodeattr("NumChannels") + vecs = list(self.get_nodeattr("numInputVectors")) + normal_input_shape = tuple(vecs + [ich]) + return normal_input_shape + + def get_normal_output_shape(self): + # same shape as input + return self.get_normal_input_shape() + + def get_number_output_values(self): + nf = np.prod(self.get_folded_output_shape()[:-1]) + return nf + + def get_template_param_values(self): + """Returns the template parameter values according to input, output and weight + data types.""" + ret = dict() + inp_hls_str = self.get_input_datatype().get_hls_datatype_str() + out_hls_str = self.get_output_datatype().get_hls_datatype_str() + # fill in TSrcI + ret["TSrcI"] = "Slice<%s>" % inp_hls_str + # fill in TDstI + ret["TDstI"] = "Slice<%s>" % out_hls_str + + return ret + + def get_hls_compatible_parameter_tensor(self, orig_param_vector): + """Convert the original numpy weight matrix orig_weight_matrix into + a form suitable for passing to the hlslib call: + * ensure chn % PE == 0 + * interleave rows between PEs + * reshape into (PE, TMEM) and return + """ + chn = self.get_nodeattr("NumChannels") + pe = self.get_nodeattr("PE") + tmem = chn // pe + assert chn % pe == 0, "Requirement NumChannels divisable by PE is violated." + assert ( + orig_param_vector.ndim == 1 + ), """Parameter vector dimension is {}. + Expected dimension: 1.""".format( + orig_param_vector.ndim + ) + + # if not self.get_input_datatype().signed(): + # # ensure all thresholds are nonnegative + # assert (orig_param_vector >= 0).all() + + # ensure all thresholds are integer + assert (orig_param_vector.astype(np.int32) == orig_param_vector).all() + ret = orig_param_vector + + assert ( + ret.shape[0] == chn + ), "Cardinality of parameter vector is not as expected (chn)" + + # distribute rows between PEs + ret = ret.reshape(tmem, pe).transpose() + assert ( + ret.shape[0] == pe + ), """First dimension after distribution of the + rows between PEs is not as expected (pe)""" + assert ( + ret.shape[1] == tmem + ), """Second dimension after distribution of the + rows between PEs is not as expected (tmem)""" + + return ret.reshape(1, pe, tmem) + + def generate_params(self, model, path): + code_gen_dir = path + # save thresholds in params.h + parameters = model.get_initializer(self.onnx_node.input[1]) + parameter_tensor = self.get_hls_compatible_parameter_tensor(parameters) + pdt = DataType[self.get_nodeattr("paramDataType")] + + parameters_hls_code = numpy_to_hls_code( + parameter_tensor, pdt, "parameters", False, True + ) + # get input data type + export_idt = self.get_input_datatype() + if self.get_input_datatype() == DataType.BIPOLAR: + export_idt = DataType.BINARY + idt_hls = export_idt.get_hls_datatype_str() + + # write parameters into params.h + f_params = open("{}/params.h".format(code_gen_dir), "w") + pdt_hls = pdt.get_hls_datatype_str() + # use binary to export bipolar activations + export_odt = self.get_output_datatype() + if self.get_output_datatype() == DataType.BIPOLAR: + export_odt = DataType.BINARY + odt_hls = export_odt.get_hls_datatype_str() + # get desired function + func = self.get_nodeattr("Func") + if func == "cmp_le": + func_str = "std::less_equal" + elif func == "cmp_ge": + func_str = "std::greater_equal" + elif func == "add": + func_str = "std::plus" + elif func == "mul": + func_str = "std::multiplies" + else: + raise Exception( + """Invalid value for attribute Func! Is currently set to: {} + has to be set to one of the following value + ("cmp_le", "cmp_ge", "add", "mul")""".format( + func + ) + ) + f_params.write( + "static ChannelWiseOperation<{},{},{},{},{},{}> threshs \ + = ".format( + self.calc_tmem(), + self.get_nodeattr("PE"), + idt_hls, + pdt_hls, + odt_hls, + "%s<%s>" % (func_str, odt_hls), + ) + ) + f_params.write(parameters_hls_code) + f_params.close() + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + + # TODO ensure codegen dir exists + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + # create a npy file fore each input of the node (in_ind is input index) + in_ind = 0 + for inputs in node.input: + # it is assumed that the first input of the node is the data input + # the second input are the weights + # the third input are the thresholds + if in_ind == 0: + assert ( + str(context[inputs].dtype) == "float32" + ), """Input datatype is + not float32 as expected.""" + expected_inp_shape = self.get_folded_input_shape() + reshaped_input = context[inputs].reshape(expected_inp_shape) + export_idt = self.get_input_datatype() + # make copy before saving the array + reshaped_input = reshaped_input.copy() + np.save( + os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), + reshaped_input, + ) + elif in_ind > 2: + raise Exception("Unexpected input found for ChannelwiseOp_Batch") + in_ind += 1 + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + # reinterpret binary output as bipolar where needed + if self.get_output_datatype() == DataType.BIPOLAR: + out = context[node.output[0]] + out = 2 * out - 1 + context[node.output[0]] = out + assert ( + context[node.output[0]].shape == self.get_folded_output_shape() + ), """Output shape is not as expected""" + # reshape output to have expected shape + oshape = self.get_normal_output_shape() + context[node.output[0]] = context[node.output[0]].reshape(*oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + output = self.rtlsim(sim, inp) + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + + # load and reshape output + output = np.load(out_npy_path) + oshape = self.get_normal_output_shape() + output = np.asarray([output], dtype=np.float32).reshape(*oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "activations.hpp"'] + self.code_gen_dict["$GLOBALS$"] += ['#include "params.h"'] + + # TODO check and add whatever missing + def defines(self, var): + numInputVectors = list(self.get_nodeattr("numInputVectors")) + numReps = numInputVectors[0] + self.code_gen_dict["$DEFINES$"] = [ + """#define NumChannels1 {}\n#define PE1 {}\n#define numReps {}""".format( + self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps, + ) + ] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + # note: the innermost dim is reversed for the input + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + tmpl_args = self.get_template_param_values() + # TODO: why put some template parameters into defines and not others? + # should ImgDim be defined or just filled in here like we do now? + ishape = self.get_folded_input_shape() + if len(ishape) == 3: + imgdim = 1 + elif len(ishape) == 5: + imgdim = ishape[1] + else: + raise Exception("""Unexpeted input shape""") + self.code_gen_dict["$DOCOMPUTE$"] = [ + """Thresholding_Batch<{}, NumChannels1, PE1, {}, {}> + (in0, out, threshs, numReps);""".format( + imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"], + ) + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + shape = self.get_folded_output_shape() + shape_cpp_str = str(shape).replace("(", "{").replace(")", "}") + + # note: the innermost dim is not reversed for the output + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + shape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + """void {}(hls::stream<ap_uint<{}>> &in0, + hls::stream<ap_uint<{}>> &out + )""".format( + self.onnx_node.name, + self.get_instream_width(), + self.get_outstream_width(), + ) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) + + # the channelwise parameter tensor is acc_type [PE][TMEM][N_PARAMS_PER_CHANNEL] + # partition for parallel access along PE and N_PARAMS_PER_CHANNEL + # dimensions (dims 1 and 3) + self.code_gen_dict["$PRAGMAS$"].append( + ( + "#pragma HLS ARRAY_PARTITION variable=threshs.parameters " + "complete dim=1" + ) + ) + # self.code_gen_dict["$PRAGMAS$"].append( + # ( + # "#pragma HLS ARRAY_PARTITION variable=threshs.parameters " + # "complete dim=3" + # ) + # ) + + # set resource type + ram_style = self.get_nodeattr("ram_style") + pe = self.get_nodeattr("PE") + ich = self.get_nodeattr("NumChannels") + # if PE less than NumChannels, assign cores according to ram_style; + # otherwise if PE == NumChannels, Vivado HLS will unroll to FFs + if pe < ich: + if ram_style == "distributed": + self.code_gen_dict["$PRAGMAS$"].append( + ( + "#pragma HLS RESOURCE variable=threshs.parameters " + "core=ROM_2P_LUTRAM" + ) + ) + elif ram_style == "block": + self.code_gen_dict["$PRAGMAS$"].append( + ( + "#pragma HLS RESOURCE variable=threshs.parameters " + "core=ROM_2P_BRAM" + ) + ) + else: + raise Exception( + """Invalid value for attribute ram_style! Is currently set to: {} + has to be set to one of ("block", "distributed")""".format( + ram_style + ) + ) diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py new file mode 100644 index 0000000000000000000000000000000000000000..0ce4379a2c41baa5bc009e9df7623d133ee89a09 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/downsampler.py @@ -0,0 +1,297 @@ +import os +import numpy as np +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class DownSampler(HLSCustomOp): + """Corresponds to finn-hlslib ConvolutionInputGenerator_kernel1 function. + Basically performs a down sampling of the image removing rows and columns.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + # spatial size of input images + "ImgDim": ("i", True, 0), + # number of channels in input image + "NumChannels": ("i", True, 0), + # Number of input columns computed in parallel + "SIMD": ("i", False, 1), + "Stride": ("i", True, 2), + # FINN input datatype + "inputDataType": ("s", True, ""), + # Batch size + "numInputVectors": ("i", False, 1), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_downsampled_odim(self): + "Return the down sampled spatial size of the output." + idim = self.get_nodeattr("ImgDim") + stride = self.get_nodeattr("Stride") + return int(np.floor((idim - 1) / stride) + 1) + + def get_normal_input_shape(self): + idim = self.get_nodeattr("ImgDim") + num_ch = self.get_nodeattr("NumChannels") + batch = self.get_nodeattr("numInputVectors") + ishape = (batch, idim, idim, num_ch) + return ishape + + def get_normal_output_shape(self): + odim = self.get_downsampled_odim() + num_ch = self.get_nodeattr("NumChannels") + batch = self.get_nodeattr("numInputVectors") + oshape = (batch, odim, odim, num_ch) + return oshape + + def get_folded_input_shape(self): + normal_ishape = list(self.get_normal_input_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_ishape[-1] / simd) + folded_ishape = normal_ishape[:-1] + [fold, simd] + return tuple(folded_ishape) + + def get_folded_output_shape(self): + normal_oshape = list(self.get_normal_output_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_oshape[-1] / simd) + folded_oshape = normal_oshape[:-1] + [fold, simd] + return tuple(folded_oshape) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpect input shape for DownSampler." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # data type stays the same + dtype = model.get_tensor_datatype(node.input[0]) + exp_idtype = self.get_input_datatype() + assert dtype == exp_idtype, "Unexpected datatype for DownSampler" + model.set_tensor_datatype(node.output[0], dtype) + + def verify_node(self): + pass + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + ret = DataType[self.get_nodeattr("inputDataType")] + return ret + + def get_output_datatype(self): + """Returns FINN DataType of output. (Same as input datatype)""" + return self.get_input_datatype() + + def get_instream_width(self): + ibits = self.get_input_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + return ibits * simd + + def get_outstream_width(self): + obits = self.get_output_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + return obits * simd + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"'] + + def defines(self, var): + self.code_gen_dict["$DEFINES$"] = [] + + ifm_ch = self.get_nodeattr("NumChannels") + self.code_gen_dict["$DEFINES$"] += ["#define IFMChannels {}".format(ifm_ch)] + + ibits = self.get_input_datatype().bitwidth() + self.code_gen_dict["$DEFINES$"] += ["#define Input_precision {}".format(ibits)] + + idim = self.get_nodeattr("ImgDim") + self.code_gen_dict["$DEFINES$"] += ["#define IFMDim {}".format(idim)] + + simd = self.get_nodeattr("SIMD") + self.code_gen_dict["$DEFINES$"] += ["#define SIMD {}".format(simd)] + + stride = self.get_nodeattr("Stride") + self.code_gen_dict["$DEFINES$"] += ["#define Stride {}".format(stride)] + + batch_size = self.get_nodeattr("numInputVectors") + self.code_gen_dict["$DEFINES$"] += ["#define numReps {}".format(batch_size)] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [ + """ConvolutionInputGenerator_kernel1<IFMChannels, Input_precision, + IFMDim, SIMD,Stride> (in0, out, numReps);""" + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + oshape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" + % (self.onnx_node.name, packed_hls_type, packed_hls_type) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() + folded_oshape = self.get_folded_output_shape() + + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert ( + inp.shape == exp_ishape + ), """Input shape doesn't + match expected shape (numInputVectors, ImgDim, ImgDim, NumChannels).""" + export_idt = self.get_input_datatype() + + reshaped_input = inp.reshape(folded_ishape) + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + assert ( + context[node.output[0]].shape == folded_oshape + ), "cppsim did not produce expected folded output shape" + context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output shape doesn't match expected shape + (1, OutputDim, OutputDim, NumChannels).""" diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py index 9e6c63dc510aab5f6baff9cb6326a2d0476f67a9..83152dea6cc494b8464c78605399b21b38d48b80 100644 --- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py @@ -75,16 +75,19 @@ class GlobalAccPool_Batch(HLSCustomOp): def get_normal_output_shape(self): ch = self.get_nodeattr("NumChannels") vecs = list(self.get_nodeattr("numInputVectors")) - oshape = tuple([vecs[0]] + [ch]) + if len(vecs) == 1: + oshape = tuple(vecs + [ch]) + elif len(vecs) == 3: + oshape = tuple([vecs[0]] + [1, 1, ch]) return oshape def get_folded_output_shape(self): ch = self.get_nodeattr("NumChannels") pe = self.get_nodeattr("PE") - vecs = list(self.get_nodeattr("numInputVectors")) + unfolded_shape = list(self.get_normal_output_shape()) assert ch % pe == 0, "PE must divide NumChannels" folds = int(ch / pe) - oshape = tuple([vecs[0]] + [folds, pe]) + oshape = tuple(unfolded_shape[:-1] + [folds, pe]) return oshape def make_shape_compatible_op(self, model): diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py new file mode 100644 index 0000000000000000000000000000000000000000..9b718ecbbc490610790b68871080de23a54f4891 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -0,0 +1,346 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import math +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow import HLSCustomOp + + +# the IODMA inerfaces a memory-mapped AXI interface and an AXI stream +# direction "in": pulls data from AXI-MM to AXI stream +# direction "out": pushes data from AXI stream to AXI-MM + +# DMA Addressing +# - burst mode can be "wrap" or "increment" +# - "increment" bursts will increment the address when moving to the next image +# - "wrap" bursts will reinitialize the address to the start address, +# and are useful for e.g. streaming weights, where the same buffer is +# repeatedly read into the FPGA +# - no additional alignment restrictions beyond anything specified in the AXI spec + +# Interfaces +# - AXI-MM name specified by intfName unless this is set to "" (empty, the default) +# in which case output AXI-MM are named "out" and input AXI-MM are named "in0" +# - AXI-MM interface width (in bits) is specified by intfWidth +# - AXI-Stream interface width (in bits) is specified by streamWidth +# - If inftWidth and streamWidth are not equal, the DMA core performs +# width conversion by going up to the least common multiple of bitwidths +# e.g. intfWidth=32b -> 96b -> sreamWidth=24b +# - transfers occur in multiples of the AXI-MM interface width, therefore +# the total number of bits in the tensor must be a multiple of intfWidth +# - transfers occur in multiples of the AXI-Stream interface width, therefore +# the total number of bits in the tensor must be a multiple of streamWidth +# - both interface widths must be a multiple of 8b (AXI protocol requirement) +# - in most systems, intfWidth is also restricted to a power of 2 (e.g. Vitis) +# but this is not universal so we don't check here explicitly + +# Input/output tensor sizes shapes +# - The data being moved is a tensor of shape numInputVectors+[NumChannels] +# - The data type of the tensor elements is specified by dataType +# - on the stream side +# -the normal shape is the same as the ONNX tensor attached to it +# -the folded shape is computed from the stream width and normal shape +# - on the AXI-MM side +# -the normal shape is the same as the one on the stream side +# -the folded shape is not defined + + +class IODMA(HLSCustomOp): + """Class that corresponds to finn-hlslib DMA function(s).""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + "NumChannels": ("i", True, 0), + # FINN input datatype + "dataType": ("s", True, ""), + # Stream parameters + "streamWidth": ("i", False, 32), + # DMA-specific parameters + "intfWidth": ("i", False, 32), + "burstMode": ("s", False, "increment"), + "direction": ("s", False, "in"), + # shape describing input vecs per execution + "numInputVectors": ("ints", False, [1]), + # name of axi-mm interface + "intfName": ("s", False, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_normal_input_shape(self): + vecs = list(self.get_nodeattr("numInputVectors")) + num_ch = self.get_nodeattr("NumChannels") + ishape = tuple(vecs + [num_ch]) + return ishape + + def get_normal_output_shape(self): + return self.get_normal_input_shape() + + def get_folded_input_shape(self): + if self.get_nodeattr("direction") == "in": + raise ValueError("Folded input shape not defined for input IODMA") + else: + shape = list(self.get_normal_input_shape()) + itype_bits = self.get_input_datatype().bitwidth() + intfw = self.get_nodeattr("streamWidth") + assert ( + intfw % itype_bits == 0 + ), "Input stream width must be a multiple of datatype bits" + elems_per_word = intfw // itype_bits + assert shape[-1] % elems_per_word == 0, "Fold depth must be integer" + fold_depth = shape[-1] // elems_per_word + shape[-1] = fold_depth + shape.append(elems_per_word) + return tuple(shape) + + def get_folded_output_shape(self): + if self.get_nodeattr("direction") == "out": + raise ValueError("Folded output shape not defined for output IODMA") + else: + shape = list(self.get_normal_output_shape()) + itype_bits = self.get_output_datatype().bitwidth() + intfw = self.get_nodeattr("streamWidth") + assert ( + intfw % itype_bits == 0 + ), "Input stream width must be a multiple of datatype bits" + elems_per_word = intfw // itype_bits + assert shape[-1] % elems_per_word == 0, "Fold depth must be integer" + fold_depth = shape[-1] // elems_per_word + shape[-1] = fold_depth + shape.append(elems_per_word) + return tuple(shape) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # data type stays the same + dtype = model.get_tensor_datatype(node.input[0]) + exp_idtype = self.get_input_datatype() + assert dtype == exp_idtype, "Unexpected datatype." + model.set_tensor_datatype(node.output[0], dtype) + + def verify_node(self): + pass + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("dataType")] + + def get_output_datatype(self): + """Returns FINN DataType of output. (Same as input datatype)""" + return self.get_input_datatype() + + def get_instream_width(self): + if self.get_nodeattr("direction") == "in": + return self.get_nodeattr("intfWidth") + elif self.get_nodeattr("direction") == "out": + return self.get_nodeattr("streamWidth") + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + + def get_outstream_width(self): + if self.get_nodeattr("direction") == "out": + return self.get_nodeattr("intfWidth") + elif self.get_nodeattr("direction") == "in": + return self.get_nodeattr("streamWidth") + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + + def get_number_output_values(self): + oshape = self.get_normal_output_shape() + itype_bits = self.get_input_datatype().bitwidth() + intfw = self.get_nodeattr("intfWidth") + nelems = np.prod(oshape) + nbits = nelems * itype_bits + assert nbits % intfw == 0, "DMA: total transfer size must be word multiple" + ovalues = nbits // intfw + return ovalues + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "dma.h"'] + self.code_gen_dict["$GLOBALS$"].append('#include "streamtools.h"') + + def defines(self, var): + itype_bits = self.get_input_datatype().bitwidth() + total_bits = itype_bits * np.prod(self.get_normal_input_shape()) + assert total_bits % 8 == 0, "DMA input not a multiple of 1 Byte" + total_bytes = total_bits // 8 + self.code_gen_dict["$DEFINES$"] = [ + """#define NumBytes1 {}\n#define DataWidth1 {}\n""".format( + total_bytes, self.get_nodeattr("intfWidth") + ) + ] + + def get_ap_int_max_w(self): + "Return the maximum width of any ap_int used in this module." + instream = self.get_instream_width() + outstream = self.get_outstream_width() + width_lcm = (instream * outstream) // math.gcd(instream, outstream) + return width_lcm + + def docompute(self): + direction = self.get_nodeattr("direction") + mode = self.get_nodeattr("burstMode") + if direction == "in": + if mode == "wrap": + func = "Mem2Stream_Batch_external_wmem" + else: + func = "Mem2Stream_Batch" + dwc_func = "WidthAdjustedOutputStream" + elif direction == "out": + func = "Stream2Mem_Batch" + dwc_func = "WidthAdjustedInputStream" + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + # define templates for instantiation + dma_inst_template = func + "<DataWidth1, NumBytes1>(%s, %s, numReps);" + dwc_inst_template = dwc_func + "<%d, %d, %d> %s(%s, numReps);" + # do stream infrastructure and instantiations + intfw = self.get_nodeattr("intfWidth") + strmw = self.get_nodeattr("streamWidth") + width_lcm = (strmw * intfw) // math.gcd(strmw, intfw) + # we always need two streams: one of width_lcm, and one of intfw width + # because we use WidthAdjustedInputStream, + dtype_bits = self.get_input_datatype().bitwidth() + total_bits = dtype_bits * np.prod(self.get_normal_input_shape()) + if direction == "in": + self.code_gen_dict["$DOCOMPUTE$"] = [ + dwc_inst_template + % (width_lcm, strmw, total_bits // width_lcm, "dwc_lcm", "out"), + dwc_inst_template + % (intfw, width_lcm, total_bits // intfw, "dwc_intfw", "dwc_lcm"), + dma_inst_template % ("in0", "dwc_intfw"), + ] + else: + self.code_gen_dict["$DOCOMPUTE$"] = [ + dwc_inst_template + % (strmw, width_lcm, total_bits // strmw, "dwc_lcm", "in0"), + dwc_inst_template + % (width_lcm, intfw, total_bits // width_lcm, "dwc_intfw", "dwc_lcm"), + dma_inst_template % ("dwc_intfw", "out"), + ] + + def blackboxfunction(self): + packed_ibits = self.get_instream_width() + packed_hls_type_in = "ap_uint<%d>" % packed_ibits + packed_obits = self.get_outstream_width() + packed_hls_type_out = "ap_uint<%d>" % packed_obits + direction = self.get_nodeattr("direction") + if direction == "in": + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(%s *in0, hls::stream<%s > &out, unsigned int numReps)" + % (self.onnx_node.name, packed_hls_type_in, packed_hls_type_out) + ] + elif direction == "out": + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, %s *out, unsigned int numReps)" + % (self.onnx_node.name, packed_hls_type_in, packed_hls_type_out) + ] + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE s_axilite port=numReps bundle=control" + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE s_axilite port=return bundle=control" + ) + direction = self.get_nodeattr("direction") + intfname = self.get_nodeattr("intfName") + if direction == "in": + if intfname == "": + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=in0" + ) + else: + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=%s" % (intfname) + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE s_axilite port=in0 bundle=control" + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out" + ) + elif direction == "out": + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=in0" + ) + if intfname == "": + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=out" + ) + else: + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE m_axi offset=slave port=%s" % (intfname) + ) + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE s_axilite port=out bundle=control" + ) + else: + raise ValueError("Invalid IODMA direction, please set to in or out") + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS DATAFLOW") + + def execute_node(self, context, graph): + pass + + def dataoutstrm(self): + pass + + def read_npy_data(self): + pass + + def save_as_npy(self): + pass + + def strm_decl(self): + pass diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py index 0060e5d400f30055d532671c8cf1680f0668442a..e4317e02d46df90c8fd0c8854262ca6eb0ea4f48 100644 --- a/src/finn/custom_op/registry.py +++ b/src/finn/custom_op/registry.py @@ -31,6 +31,7 @@ from finn.custom_op.fpgadataflow.convolutioninputgenerator import ( ConvolutionInputGenerator, ) +from finn.custom_op.fpgadataflow.downsampler import DownSampler from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO @@ -51,11 +52,14 @@ from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch from finn.custom_op.quantavgpool2d import QuantAvgPool2d from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch +from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch +from finn.custom_op.fpgadataflow.iodma import IODMA # create a mapping of all known CustomOp names and classes custom_op = {} custom_op["MultiThreshold"] = MultiThreshold +custom_op["DownSampler"] = DownSampler custom_op["XnorPopcountMatMul"] = XnorPopcountMatMul custom_op["Im2Col"] = Im2Col custom_op["StreamingMaxPool_Batch"] = StreamingMaxPool_Batch @@ -74,6 +78,8 @@ custom_op["AddStreams_Batch"] = AddStreams_Batch custom_op["LabelSelect_Batch"] = LabelSelect_Batch custom_op["QuantAvgPool2d"] = QuantAvgPool2d custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch +custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch +custom_op["IODMA"] = IODMA def getCustomOp(node): diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index b70b126680d650547cf376dd601c048c73a1cfd4..34a697a43426aae0f984770689552063aa35b9e8 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from onnx import helper, TensorProto +import numpy as np from finn.core.datatype import DataType from finn.transformation import Transformation @@ -34,7 +35,10 @@ from finn.custom_op.registry import getCustomOp from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_datatypes import InferDataTypes import finn.core.data_layout as DataLayout +from finn.util.onnx import nchw_to_nhwc +import warnings from finn.util.basic import get_by_name +import warnings class InferConvInpGen(Transformation): @@ -52,6 +56,9 @@ class InferConvInpGen(Transformation): i2c_in_shape = model.get_tensor_shape(i2c_input) i2c_out_shape = model.get_tensor_shape(i2c_output) dt = model.get_tensor_datatype(i2c_input) + if not dt.is_integer(): + warnings.warn("Input is not int. Can't infer ConvInpGen") + continue i2c_inst = getCustomOp(n) stride = i2c_inst.get_nodeattr("stride") k = i2c_inst.get_nodeattr("kernel_size") @@ -103,24 +110,40 @@ class InferConvInpGen(Transformation): ) graph.node.insert(node_ind, padding_node) - # create equivalent ConvolutionInputGenerator node - ConvInpGen_node = helper.make_node( - "ConvolutionInputGenerator", - [ConvInpGen_input], - [i2c_output], - domain="finn", - backend="fpgadataflow", - ConvKernelDim=k, - IFMChannels=ifm_ch, - IFMDim=ConvInpGen_idim, - OFMDim=ofm_dim, - SIMD=ifm_ch, - Stride=stride, - inputDataType=dt.name, - outputDataType=dt.name, - depthwise=depthwise, - ) - graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node) + if stride > 1 and k == 1: + # create DownSampler node + ConvInpGen_node = helper.make_node( + "DownSampler", + [ConvInpGen_input], + [i2c_output], + domain="finn", + backend="fpgadataflow", + ImgDim=ConvInpGen_idim, + NumChannels=ifm_ch, + SIMD=ifm_ch, + Stride=stride, + inputDataType=dt.name, + ) + graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node) + else: + # create equivalent ConvolutionInputGenerator node + ConvInpGen_node = helper.make_node( + "ConvolutionInputGenerator", + [ConvInpGen_input], + [i2c_output], + domain="finn", + backend="fpgadataflow", + ConvKernelDim=k, + IFMChannels=ifm_ch, + IFMDim=ConvInpGen_idim, + OFMDim=ofm_dim, + SIMD=ifm_ch, + Stride=stride, + inputDataType=dt.name, + outputDataType=dt.name, + depthwise=depthwise, + ) + graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node) # remove old nodes graph.node.remove(n) graph_modified = True @@ -627,3 +650,243 @@ class InferThresholdingLayer(Transformation): model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) + + +class InferChannelwiseLinearLayer(Transformation): + """Convert any channel-wise Add/Mul into a HLS layer.""" + + def get_smallest_possible(self, vals): + """Returns smallest (fewest bits) possible DataType that can represent + value. Prefers unsigned integers where possible.""" + vals = np.array(vals) + for v in vals: + assert int(v) == v, "Error float value" + + for k in DataType.__members__: + dt = DataType[k] + + if dt in [DataType.BIPOLAR, DataType.TERNARY, DataType.FLOAT32]: + # not currently supported + continue + + if (dt.min() <= vals).all() and (vals <= dt.max()).all(): + return dt + + warnings.warn( + """InferChannelwiseLinearLayer: Output values may not be + representable with supported data types. + Setting maximum width data type available. + This will lead to errors if there are no constrains on the input + """ + ) + + if (0 <= vals).all(): + return DataType.UINT32 + else: + return DataType.INT32 + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "Add" or node.op_type == "Mul": + # assuming input[0] is dynamic + ll_input = node.input[0] + ll_output = node.output[0] + ll_in_shape = model.get_tensor_shape(ll_input) + + # check if input 1 has an initializer + ll_const = node.input[1] + if ll_const is not None: + ll_cinit = model.get_initializer(ll_const) + if ll_cinit is None: + # input 1 is also dynamic + continue + else: + continue + + # get number of channels and channel index from input + ll_in_layout = model.get_tensor_layout(ll_input) + if ll_in_layout == DataLayout.NHWC or ll_in_layout == DataLayout.NC: + ch_index = -1 + ch = ll_in_shape[-1] + elif ll_in_layout == DataLayout.NCHW: + ch_index = 1 + ch = ll_in_shape[1] + else: + continue + + # check if the shape of initializer is compatible + ll_cinit_shape = list(ll_cinit.shape) + if np.prod(ll_cinit_shape) == 1: + warnings.warn( + "Broadcasting " + str(node.op_type) + "(" + node.name + ")" + ) + ll_cinit = np.full((ch), ll_cinit.flatten()[0]) + elif np.prod(ll_cinit_shape) != ch or ll_cinit_shape[ch_index] != ch: + # parameter shape not compatible with Channelwise_batch + continue + + # check initializer contains integers as floats + if not (ll_cinit.astype(np.int32) == ll_cinit).all(): + continue + # all initializer conditions are met + + # check inputs + idt = model.get_tensor_datatype(ll_input) + if not idt.is_integer(): + # skip conversion for layers with float input + continue + + # check layout of inputs/outputs, and convert if needed + # check layout and convert if necessary + if ll_in_layout == DataLayout.NCHW: + ll_input = nchw_to_nhwc(ll_input, model, node_ind) + node_ind += 1 + ll_in_shape = model.get_tensor_shape(ll_input) + + # keep track of where we need to insert the HLS Op + # it has to be ahead of the output transform + insert_point = node_ind + ll_output_layout = model.get_tensor_layout(ll_output) + if ll_output_layout == DataLayout.NCHW: + ll_output = nchw_to_nhwc(ll_output, model, node_ind, reverse=True) + node_ind += 1 + + # get parameter data type + param_min = min(ll_cinit.flatten()) + param_max = max(ll_cinit.flatten()) + pdt = self.get_smallest_possible([param_min, param_max]) + + # set function and determine output data type + if node.op_type == "Add": + func = "add" + out_min = idt.min() + param_min + out_max = idt.max() + param_max + odt = self.get_smallest_possible([out_min, out_max]) + elif node.op_type == "Mul": + func = "mul" + possible_limits = [] + possible_limits += [idt.min() * param_min] + possible_limits += [idt.min() * param_max] + possible_limits += [idt.max() * param_min] + possible_limits += [idt.max() * param_max] + odt = self.get_smallest_possible(possible_limits) + + model.set_initializer(ll_const, ll_cinit.reshape(ch)) + model.set_tensor_datatype(ll_output, odt) + + # create node with no parallelization first + pe = 1 + assert ch % pe == 0, "Requirement IFC divisable by PE is violated." + # create and insert node + new_node = helper.make_node( + "ChannelwiseOp_Batch", + [ll_input, ll_const], + [ll_output], + domain="finn", + backend="fpgadataflow", + Func=func, + NumChannels=ch, + PE=pe, + inputDataType=idt.name, + paramDataType=pdt.name, + outputDataType=odt.name, + numInputVectors=list(ll_in_shape[:-1]), + ) + graph.node.insert(insert_point, new_node) + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + +class InferGlobalAccPoolLayer(Transformation): + """Convert any GlobalAveragePool into a GlobalAccPool HLS layer and a scalar Mul.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "GlobalAveragePool": + in0 = node.input[0] + result = node.output[0] + in0_shape = model.get_tensor_shape(in0) + + idt = model.get_tensor_datatype(in0) + + # skip conversion for layers with float input + if not idt.is_integer(): + continue + + # check layout and convert if necessary + in0_layout = model.get_tensor_layout(in0) + result_layout = model.get_tensor_layout(result) + + if in0_layout == DataLayout.NCHW: + in0 = nchw_to_nhwc(in0, model, node_ind) + node_ind += 1 + in0_shape = model.get_tensor_shape(in0) + + # keep track of where we need to insert the HLS Op + # it has to be ahead of the output transform + insert_point = node_ind + + if result_layout == DataLayout.NCHW: + result = nchw_to_nhwc(result, model, node_ind, reverse=True) + node_ind += 1 + + num_ch = int(in0_shape[-1]) + vecs = in0_shape[:-1] + # create node with no parallelization first + pe = 1 + assert ( + num_ch % pe == 0 + ), "Requirement Labels divisable by PE is violated." + + # create an additional tensor of the same shape and layout as result + out_shape = model.get_tensor_shape(result) + pool_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(pool_out) + pool_out = pool_out.name + model.set_tensor_layout(pool_out, model.get_tensor_layout(result)) + + new_pool = helper.make_node( + "GlobalAccPool_Batch", + [in0], + [pool_out], + domain="finn", + backend="fpgadataflow", + NumChannels=num_ch, + PE=pe, + inputDataType=idt.name, + numInputVectors=vecs, + ) + + mul_value = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, [1] + ) + model.graph.value_info.append(mul_value) + model.set_initializer(mul_value.name, np.array(1 / (vecs[1] * vecs[2]))) + new_mul = helper.make_node("Mul", [pool_out, mul_value.name], [result],) + graph.node.insert(insert_point, new_pool) + graph.node.insert(insert_point + 1, new_mul) + node_ind += 1 + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py new file mode 100644 index 0000000000000000000000000000000000000000..1d9a51875499d77f384c03f54009a9dd1001dea0 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/floorplan.py @@ -0,0 +1,80 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.util.basic import get_by_name + + +class Floorplan(Transformation): + """Perform Floorplanning of the dataflow design. Separate DMAs into their own + partitions IDs, and TODO: split the design into sections of defined size""" + + def __init__(self, limits=None): + super().__init__() + self.resource_limits = limits + + def apply(self, model): + target_partition_id = 0 + # we currently assume that all dataflow nodes belonging to the same partition + # are connected to each other and there is a single input/output to/from each. + all_nodes = list(model.graph.node) + df_nodes = list( + filter(lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes) + ) + dma_nodes = list(filter(lambda x: x.op_type == "IODMA", df_nodes)) + + non_dma_nodes = list(filter(lambda x: x not in dma_nodes, df_nodes)) + dyn_tlastmarker_nodes = list( + filter( + lambda x: x.op_type == "TLastMarker" + and getCustomOp(x).get_nodeattr("DynIters") == "true", + non_dma_nodes, + ) + ) + + non_dma_nodes = list( + filter(lambda x: x not in dyn_tlastmarker_nodes, non_dma_nodes) + ) + + for node in dma_nodes: + node_inst = getCustomOp(node) + node_inst.set_nodeattr("partition_id", target_partition_id) + target_partition_id += 1 + + for node in dyn_tlastmarker_nodes: + node_inst = getCustomOp(node) + node_inst.set_nodeattr("partition_id", target_partition_id) + target_partition_id += 1 + + for node in non_dma_nodes: + # TODO: implement proper floorplanning; for now just a single partition + node_inst = getCustomOp(node) + node_inst.set_nodeattr("partition_id", target_partition_id) + + return (model, False) diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py new file mode 100644 index 0000000000000000000000000000000000000000..e4368edea717f7499481e9b1c6ac20f7d5bb5f58 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/insert_iodma.py @@ -0,0 +1,198 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from onnx import TensorProto +from onnx import helper as oh + +from finn.util.basic import get_by_name +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.transformation.general import SortGraph +import finn.core.data_layout as DataLayout +import math +import numpy as np + + +class InsertIODMA(Transformation): + """Insert DMA nodes on all inputs and outputs.""" + + def __init__(self, max_intfwidth=32): + super().__init__() + assert ( + 2 ** math.log2(max_intfwidth) == max_intfwidth + ), "max_intfwidth must be a power of 2" + self.max_intfwidth = max_intfwidth + + def apply(self, model): + # only makes sense for a pure fpgadataflow graph -- so we check! + all_nodes = list(model.graph.node) + assert all( + get_by_name(x.attribute, "backend").s.decode("UTF-8") == "fpgadataflow" + for x in all_nodes + ) + # parse streamingfclayers looking for external weights with no attached IODMA + fc_extw_nodes = list( + filter( + lambda x: x.op_type == "StreamingFCLayer_Batch" + and get_by_name(x.attribute, "mem_mode") is not None + and get_by_name(x.attribute, "mem_mode").s.decode("UTF-8") == "external" + and model.find_producer(x.input[1]) is None, + all_nodes, + ) + ) + graph_in_name = model.graph.input[0].name + first_node = model.find_consumer(graph_in_name) + graph_out_name = model.graph.output[0].name + final_node = model.find_producer(graph_out_name) + if ( + final_node.op_type == "IODMA" + and first_node.op_type == "IODMA" + and len(fc_extw_nodes) == 0 + ): + # TODO maybe check the correctness of properties + return (model, False) + else: + if final_node.op_type != "IODMA": + # check if tensor is NHWC + assert ( + model.get_tensor_layout(graph_out_name) == DataLayout.NHWC + or model.get_tensor_layout(graph_in_name) == DataLayout.NC + ), "Data layout of tensors must be NHWC or NC" + out_shape = model.get_tensor_shape(graph_out_name) + out_dtype = model.get_tensor_datatype(graph_out_name) + # determine the feasible interface width + transfer_bits = np.prod(out_shape) * out_dtype.bitwidth() + intfwidth = math.gcd(transfer_bits, self.max_intfwidth) + assert ( + intfwidth % 8 == 0 + ), "No feasible interface width for transfer size" + # get width of stream input to DMA + streamWidth = getCustomOp(final_node).get_outstream_width() + # make new buffer + final_node_out = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(final_node_out) + model.set_tensor_datatype(final_node_out.name, out_dtype) + # reroute final node output to final_node_out_name + final_node.output[0] = final_node_out.name + dma_node = oh.make_node( + "IODMA", + [final_node_out.name], + [graph_out_name], + numInputVectors=out_shape[:-1], + NumChannels=out_shape[-1], + dataType=str(out_dtype.name), + intfWidth=intfwidth, + streamWidth=streamWidth, + direction="out", + domain="finn", + backend="fpgadataflow", + ) + model.graph.node.append(dma_node) + if first_node.op_type != "IODMA": + # check if tensor is NHWC + assert ( + model.get_tensor_layout(graph_in_name) == DataLayout.NHWC + or model.get_tensor_layout(graph_in_name) == DataLayout.NC + ), "Data layout of tensors must be NHWC or NC" + in_shape = model.get_tensor_shape(graph_in_name) + in_dtype = model.get_tensor_datatype(graph_in_name) + # determine the feasible interface width + transfer_bits = np.prod(in_shape) * in_dtype.bitwidth() + intfwidth = math.gcd(transfer_bits, self.max_intfwidth) + assert ( + intfwidth % 8 == 0 + ), "No feasible interface width for transfer size" + # get width of stream output from DMA + streamWidth = getCustomOp(first_node).get_instream_width() + # make new buffer + first_node_in = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape + ) + model.graph.value_info.append(first_node_in) + model.set_tensor_datatype(first_node_in.name, in_dtype) + # reroute final node output to final_node_out_name + first_node.input[0] = first_node_in.name + dma_node = oh.make_node( + "IODMA", + [graph_in_name], + [first_node_in.name], + numInputVectors=in_shape[:-1], + NumChannels=in_shape[-1], + dataType=str(in_dtype.name), + intfWidth=intfwidth, + streamWidth=streamWidth, + direction="in", + domain="finn", + backend="fpgadataflow", + ) + model.graph.node.insert(0, dma_node) + for fc_node in fc_extw_nodes: + # check if tensor is NHWC + assert ( + model.get_tensor_layout(fc_node.input[1]) == DataLayout.NHWC + or model.get_tensor_layout(graph_in_name) == DataLayout.NC + ), "Data layout of tensors must be NHWC or NC" + fc_w_name = fc_node.input[1] + w_shape = model.get_tensor_shape(fc_w_name) + w_dtype = model.get_tensor_datatype(fc_w_name) + # determine the feasible interface width + transfer_bits = np.prod(w_shape) * w_dtype.bitwidth() + intfwidth = math.gcd(transfer_bits, self.max_intfwidth) + assert ( + intfwidth % 8 == 0 + ), "No feasible interface width for transfer size" + # calculate width of stream output from DMA + pe = get_by_name(fc_node.attribute, "PE").i + simd = get_by_name(fc_node.attribute, "SIMD").i + streamWidth = simd * pe * w_dtype.bitwidth() + # make new buffer + fc_node_in = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, w_shape + ) + model.graph.value_info.append(fc_node_in) + model.set_tensor_datatype(fc_node_in.name, w_dtype) + dma_node = oh.make_node( + "IODMA", + [fc_w_name], + [fc_node_in.name], + numInputVectors=w_shape[:-1], + NumChannels=w_shape[-1], + dataType=str(w_dtype.name), + intfWidth=intfwidth, + streamWidth=streamWidth, + direction="in", + burstMode="wrap", + domain="finn", + backend="fpgadataflow", + ) + fc_node.input[1] = fc_node_in.name + model.graph.node.insert(0, dma_node) + model = model.transform(SortGraph()) + return (model, True) diff --git a/src/finn/transformation/remove_identity.py b/src/finn/transformation/remove_identity.py deleted file mode 100644 index d7a58d59c1bb8ff643e691442e7eda3c0516aa5c..0000000000000000000000000000000000000000 --- a/src/finn/transformation/remove_identity.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2020, Xilinx -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of FINN nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from finn.transformation import Transformation - - -def _is_identity(node, model): - if node.op_type == "Mul": - scale = model.get_initializer(node.input[1]) - if scale is not None: - return (scale == 1).all() - elif node.op_type == "Add": - bias = model.get_initializer(node.input[1]) - if bias is not None: - return (bias == 0).all() - return False - - -class RemoveIdentity(Transformation): - """Remove nodes that apply identity ops from the graph, including: - * Multiply by 1 - * Add 0 - .""" - - def apply(self, model): - graph = model.graph - node_ind = 0 - graph_modified = False - for node in graph.node: - node_ind += 1 - if _is_identity(node, model): - node_src = node.input[0] - node_dst = node.output[0] - graph.node.remove(node) - model.rename_tensor(node_dst, node_src) - graph_modified = True - return (model, graph_modified) diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py index d9c12a20975084705b801c0ff027d4b99aff9490..d7686eaadcbc800542ab96c5f45145857412b773 100644 --- a/src/finn/transformation/streamline/__init__.py +++ b/src/finn/transformation/streamline/__init__.py @@ -53,7 +53,7 @@ from finn.transformation.streamline.reorder import ( MoveAddPastMul, MoveScalarMulPastMatMul, MoveScalarAddPastMatMul, - MoveScalarAddPastConv, + MoveAddPastConv, MoveScalarMulPastConv, ) @@ -75,7 +75,7 @@ class Streamline(Transformation): AbsorbSignBiasIntoMultiThreshold(), MoveAddPastMul(), MoveScalarAddPastMatMul(), - MoveScalarAddPastConv(), + MoveAddPastConv(), MoveScalarMulPastMatMul(), MoveScalarMulPastConv(), MoveAddPastMul(), diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index dc01eea411fc1f640e481c9be02a92acdd59533f..f089275c221f769daace3e9628a00bf87b4e5457 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -31,6 +31,7 @@ from onnx import helper as oh import warnings from finn.core.datatype import DataType +import finn.core.data_layout as DataLayout from finn.transformation import Transformation from finn.util.basic import get_by_name from finn.custom_op.registry import getCustomOp @@ -357,7 +358,68 @@ class AbsorbTransposeIntoMultiThreshold(Transformation): model = model.transform(InferDataTypes()) return (model, graph_modified) +class AbsorbTransposeIntoFlatten(Transformation): + """Absorb transpose node into succeeding flatten node, if H=W=1 and the first + dimension stays the same. Can also be applied if flatten is implemented implicitly + by a reshape node with shape [1, -1] and the first input dimension is 1""" + def apply(self, model): + graph = model.graph + graph_modified = False + node_ind = 0 + for n in graph.node: + node_ind += 1 + if ( + n.op_type == "Reshape" + and (model.get_initializer(n.input[1]) == [1, -1]).all() + ) or n.op_type == "Flatten": + prod = model.find_producer(n.input[0]) + if ( + prod is not None + and prod.op_type == "Transpose" + # we ensure that the first dimension is not changed from the + # transpose operation + and get_by_name(prod.attribute, "perm").ints[0] == 0 + ): + data_layout = model.get_tensor_layout(prod.input[0]) + # check for the data layout to interpret input shape correctly + if data_layout is None: + warnings.warn( + """Data layout for input tensor of Transpose node is not set. + To use AbsorbTransposeIntoFlatten transformation + please set tensor data layout.""" + ) + continue + elif data_layout == DataLayout.NCHW: + (b, c, h, w) = model.get_tensor_shape(prod.input[0]) + # if h=w=1 the transposition can be absorbed, otherwise + # the absorption would lead to an error in the behavior + if h != 1 or w != 1: + continue + # the flatten node from onnx keeps by default the first + # dim and flattens the rest, that is why this transformation + # can only work with b != 1 if the model contains already a + # flatten node and not a reshape node with shape = [1, -1]. + # If the first dim of the input tensor is not 1, flatten and + # reshape (with shape = [1, -1]) would lead to different results + if n.op_type == "Reshape" and b != 1: + continue + elif data_layout == DataLayout.NHWC: + (b, h, w, c) = model.get_tensor_shape(prod.input[0]) + if h != 1 or w != 1: + continue + if n.op_type == "Reshape" and b != 1: + continue + # create single flatten node and remove obsolete nodes + node = oh.make_node("Flatten", [prod.input[0]], [n.output[0]]) + graph.node.remove(n) + graph.node.remove(prod) + graph.node.insert(node_ind, node) + graph_modified = True + if graph_modified: + model = model.transform(InferDataTypes()) + return (model, graph_modified) + class AbsorbScalarMulIntoTopK(Transformation): """Absorb a mul node into a suceeding topk node if the mul is scalar.""" diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index a1bd16f6d0b70193122d5d067ccdee395260c7b1..2b03532ce3ba7d5159e5ae57e61c2af9c8c37fce 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -29,9 +29,13 @@ import numpy as np import warnings from onnx import helper as oh +from onnx import TensorProto from finn.transformation import Transformation +import finn.core.data_layout as DataLayout from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts from finn.core.datatype import DataType from finn.core.onnx_exec import execute_node from finn.util.basic import get_by_name @@ -68,8 +72,11 @@ class MoveAddPastMul(Transformation): add_weight_name = n.input[1] A = model.get_initializer(mul_weight_name) B = model.get_initializer(add_weight_name) - assert A is not None, "Initializer for mul weights is not set." - assert B is not None, "Initializer for add weights is not set." + if (A is None) or (B is None): + warnings.warn( + "Mul or add does not have constant params, skipping" + ) + continue start_name = n.input[0] middle_name = n.output[0] end_name = consumer.output[0] @@ -124,8 +131,9 @@ class MoveScalarMulPastMatMul(Transformation): matmul_weight_name = consumer.input[1] A = model.get_initializer(mul_weight_name) W = model.get_initializer(matmul_weight_name) - assert A is not None, "Initializer for mul weights is not set." - assert W is not None, "Initializer for matmul weights is not set." + if (A is None) or (W is None): + warnings.warn("MatMul or Mul params are not constant, skipping") + continue start_name = n.input[0] middle_name = n.output[0] end_name = consumer.output[0] @@ -181,8 +189,9 @@ class MoveScalarAddPastMatMul(Transformation): matmul_weight_name = consumer.input[1] A = model.get_initializer(add_weight_name) W = model.get_initializer(matmul_weight_name) - assert A is not None, "Initializer for add weights is not set." - assert W is not None, "Initializer for matmul weights is not set." + if (A is None) or (W is None): + warnings.warn("MatMul or Add params are not constant, skipping") + continue start_name = n.input[0] middle_name = n.output[0] end_name = consumer.output[0] @@ -216,8 +225,8 @@ class MoveScalarAddPastMatMul(Transformation): return (model, graph_modified) -class MoveScalarAddPastConv(Transformation): - """Move scalar add operations past conv operations. We want to have adds +class MoveAddPastConv(Transformation): + """Move scalar and channelwise add operations past conv operations. We want to have adds next to each other such that they can be collapsed into a single add.""" def apply(self, model): @@ -242,8 +251,12 @@ class MoveScalarAddPastConv(Transformation): add_weight_name = n.input[1] conv_in_name = consumer.input[0] conv_in_shape = model.get_tensor_shape(conv_in_name) + # assume datalayout to be NCHW + channels = conv_in_shape[1] A = model.get_initializer(add_weight_name) - assert A is not None, "Initializer for add weights is not set." + if A is None: + warnings.warn("Add param is not constant, skipping") + continue start_name = n.input[0] end_name = consumer.output[0] conv_out_shape = model.get_tensor_shape(end_name) @@ -252,11 +265,17 @@ class MoveScalarAddPastConv(Transformation): pads = list(get_by_name(consumer.attribute, "pads").ints) if sum(pads) == 0: using_padding = False - if all(x == 1 for x in A.shape) and not using_padding: + if ( + all(x == 1 for x in A.shape) or A.shape == (1, channels, 1, 1) + ) and not using_padding: # create a tensor filled with the add constant, in # the shape expected by the convolution conv_in_const = np.zeros(conv_in_shape, dtype=np.float32) - conv_in_const.fill(A.item()) + if A.shape == (1, channels, 1, 1): + for ch in range(channels): + conv_in_const[0][ch].fill(A[0][ch].item()) + else: + conv_in_const.fill(A.item()) # create an execution context and put in const input exec_ctx = model.make_empty_exec_context() exec_ctx[conv_in_name] = conv_in_const @@ -311,7 +330,9 @@ class MoveScalarMulPastConv(Transformation): ): mul_weight_name = n.input[1] A = model.get_initializer(mul_weight_name) - assert A is not None, "Initializer for mul weights is not set." + if A is None: + warnings.warn("Mul param is not constant, skipping") + continue conv_node = consumer mul_node = n start_name = mul_node.input[0] @@ -663,3 +684,215 @@ class MoveMaxPoolPastMultiThreshold(Transformation): model = model.transform(InferShapes()) return (model, graph_modified) + +class MoveFlattenPastTopK(Transformation): + """Move flatten node past a succeeding topk node, if the "axis" attribute in topk + is set to -1 and the data layout before the flatten is NHWC with H=W=1""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if n.op_type == "Flatten": + consumer = model.find_consumer(n.output[0]) + if consumer is not None and consumer.op_type == "TopK": + axis = get_by_name(consumer.attribute, "axis") + if axis is None or axis.i != -1: + continue + start_name = n.input[0] + data_layout = model.get_tensor_layout(start_name) + if data_layout != DataLayout.NHWC: + warnings.warn( + """Transformation can't be applied. The input + to flatten has to have DataLayout.NHWC""" + ) + continue + (b, h, w, c) = model.get_tensor_shape(start_name) + if h != 1 or w != 1: + continue + # get parameter k from topk + k = model.get_tensor_shape(consumer.output[1])[-1] + + # swap conections + # new tensor because dims change + middle_name = model.make_new_valueinfo_name() + topk_indices = oh.make_tensor_value_info( + middle_name, TensorProto.INT64, [b, h, w, k] + ) + end_name = consumer.output[1] + graph.value_info.append(topk_indices) + + # remove old nodes + graph.node.remove(n) + graph.node.remove(consumer) + + # set inputs and outputs correctly + consumer.input[0] = start_name + consumer.output[1] = middle_name + model.set_tensor_shape(consumer.output[0], (b, h, w, k)) + + n.input[0] = middle_name + n.output[0] = end_name + + # insert them back in + graph.node.insert(node_ind - 1, consumer) + graph.node.insert(node_ind, n) + + graph_modified = True + + model = model.transform(InferShapes()) + return (model, graph_modified) + +class MoveFlattenPastAffine(Transformation): + """Moves a node that implements a (1, -1) reshape past a MatMul, Mul or Add node.""" + + def apply(self, model): + graph = model.graph + graph_modified = False + node_ind = 0 + for n in graph.node: + node_ind += 1 + if ( + n.op_type == "Flatten" + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + consumer = model.find_consumer(n.output[0]) + if ( + consumer is not None + and ( + consumer.op_type == "MatMul" + or consumer.op_type == "Mul" + or consumer.op_type == "Add" + ) + and not model.is_join_node(consumer) + ): + # move flatten past operation and rewire tensors + start_name = n.input[0] + # check if datalyout is set to NHWC and H=W=1 + datalayout = model.get_tensor_layout(start_name) + if datalayout == DataLayout.NHWC: + (b, h, w, c) = model.get_tensor_shape(start_name) + if h != 1 or w != 1: + warnings.warn( + """The Transformation can only be performed if + H=W=1.""" + ) + continue + else: + warnings.warn( + """The Transformation can only be performed on + operations that operate on data layout NHWC.""" + ) + continue + middle_name = n.output[0] + end_name = consumer.output[0] + op_param_name = consumer.input[1] + A = model.get_initializer(op_param_name) + if A is None: + warnings.warn("Param is not constant, skipping") + continue + op_in_dt = model.get_tensor_datatype(consumer.input[0]) + op_out_dt = model.get_tensor_datatype(consumer.output[0]) + start_shape = model.get_tensor_shape(start_name) + dummy_in = np.random.uniform(low=0, high=1, size=(start_shape)) + + if consumer.op_type == "MatMul": + dummy_out = np.matmul(dummy_in, A) + elif consumer.op_type == "Mul": + dummy_out = dummy_in * A + elif consumer.op_type == "Add": + dummy_out = dummy_in + A + + new_op = oh.make_node( + consumer.op_type, + [start_name, op_param_name], + [middle_name], + name=consumer.name, + ) + new_flatten = oh.make_node("Flatten", [middle_name], [end_name]) + graph.node.insert(node_ind, new_op) + graph.node.insert(node_ind + 1, new_flatten) + model.set_tensor_shape(middle_name, dummy_out.shape) + # because a flatten node doesn't change the datatype we need + # only the datatype of the op node + model.set_tensor_datatype(start_name, op_in_dt) + model.set_tensor_datatype(middle_name, op_out_dt) + model.set_tensor_datatype(end_name, op_out_dt) + # set datalayout + model.set_tensor_layout(start_name, DataLayout.NHWC) + model.set_tensor_layout(middle_name, DataLayout.NHWC) + # remove old nodes + graph.node.remove(n) + graph.node.remove(consumer) + graph_modified = True + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + return (model, graph_modified) + +class MoveTransposePastScalarMul(Transformation): + """Moves a Transpose node past a scalar Mul node""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if ( + n.op_type == "Transpose" + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + consumer = model.find_consumer(n.output[0]) + if ( + consumer is not None + and consumer.op_type == "Mul" + and not model.is_join_node(consumer) + ): + mul_weight_name = consumer.input[1] + A = model.get_initializer(mul_weight_name) + if A is None: + warnings.warn("Mul param is not constant, skipping") + continue + transp_node = n + mul_node = consumer + start_name = transp_node.input[0] + middle_name = transp_node.output[0] + end_name = mul_node.output[0] + transp_in_shape = model.get_tensor_shape(start_name) + transp_out_shape = model.get_tensor_shape(middle_name) + transp_in_layout = model.get_tensor_layout(start_name) + transp_out_layout = model.get_tensor_layout(middle_name) + if transp_in_layout is None or transp_out_layout is None: + warnings.warn( + """Datalayout is not set for tensors. + Transformation can't be applied.""" + ) + continue + if all(x == 1 for x in A.shape): + # if the mul is scalar, we can simply swap the order of ops + # rewire transpose input to be mul input + mul_node.input[0] = start_name + model.set_tensor_shape(start_name, transp_in_shape) + model.set_tensor_layout(start_name, transp_in_layout) + mul_node.output[0] = middle_name + model.set_tensor_shape(middle_name, transp_in_shape) + model.set_tensor_layout(middle_name, transp_in_layout) + transp_node.input[0] = middle_name + transp_node.output[0] = end_name + model.set_tensor_shape(end_name, transp_out_shape) + model.set_tensor_layout(end_name, transp_out_layout) + graph.node.remove(transp_node) + graph.node.insert(node_ind, transp_node) + graph_modified = True + + if graph_modified is True: + model = model.transform(InferDataLayouts()) + model = model.transform(InferShapes()) + return (model, graph_modified) + diff --git a/src/finn/util/onnx.py b/src/finn/util/onnx.py index b9932111d86d7206b23e1d0e49a6aa8451f8ba24..4d7cdd126ededac887639a932c2021ef5f081c02 100644 --- a/src/finn/util/onnx.py +++ b/src/finn/util/onnx.py @@ -28,6 +28,7 @@ import numpy as np import onnx +import finn.core.data_layout as DataLayout def valueinfo_to_tensor(vi): @@ -37,3 +38,38 @@ def valueinfo_to_tensor(vi): return np.zeros( dims, dtype=onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[vi.type.tensor_type.elem_type] ) + + +def nchw_to_nhwc(t, model, idx, reverse=False): + """Converts between NCHW <-> NHWC layouts for tensor t by inserting a transpose. + If reverse=False, t is assumed NCHW and we insert transpose to convert NCHW -> NHWC + If reverse=True, t is assumed NHWC and we insert transpose to convert NHWC -> NCHW. + """ + graph = model.graph + # create new NHWC tensor + t_shape = model.get_tensor_shape(t) + bs = t_shape[0] + ch = t_shape[1] + height = t_shape[2] + width = t_shape[3] + t_trans = onnx.helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + onnx.TensorProto.FLOAT, + (bs, height, width, ch), # NHWC + ) + graph.value_info.append(t_trans) + dt = model.get_tensor_datatype(t) + t_trans = t_trans.name + model.set_tensor_datatype(t_trans, dt) + model.set_tensor_layout(t_trans, DataLayout.NHWC) + # NCHW <-> NHWC transpose + if reverse: + t_trans_node = onnx.helper.make_node( + "Transpose", [t_trans], [t], perm=[0, 3, 1, 2] + ) + else: + t_trans_node = onnx.helper.make_node( + "Transpose", [t], [t_trans], perm=[0, 2, 3, 1] + ) + graph.node.insert(idx, t_trans_node) + return t_trans diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py index 24854a2153df9af78feb8352ca119e831a9ac9eb..e78812b21a03baa6963f1f0efaefdb4c73e4d0db 100644 --- a/tests/brevitas/test_brevitas_avg_pool_export.py +++ b/tests/brevitas/test_brevitas_avg_pool_export.py @@ -16,7 +16,7 @@ import finn.core.onnx_exec as oxe import pytest -export_onnx_path = "test_avg_pool.onnx" +export_onnx_path = "test_brevitas_avg_pool_export.onnx" @pytest.mark.parametrize("kernel_size", [2, 3]) diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py index c04e16ad1923609c81240235057cc7a190c90ffb..f91ca600d3f0ce3b1cda3c29216fe8e0e3f415e4 100644 --- a/tests/brevitas/test_brevitas_cnv.py +++ b/tests/brevitas/test_brevitas_cnv.py @@ -42,7 +42,7 @@ from finn.transformation.general import GiveUniqueNodeNames from finn.transformation.double_to_single_float import DoubleToSingleFloat from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_cnv.onnx" +export_onnx_path = "test_brevitas_cnv.onnx" @pytest.mark.parametrize("abits", [1, 2]) diff --git a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py b/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py index b66348a9902802bc65b2a35e8bc3e311cc81e0bc..9c7296b7b3b6d36cfb43b6d9e96e7fba6bbce49a 100644 --- a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py +++ b/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py @@ -12,7 +12,7 @@ import finn.core.onnx_exec as oxe from finn.transformation.infer_shapes import InferShapes from brevitas.core.quant import QuantType -export_onnx_path = "test_act.onnx" +export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx" @pytest.mark.parametrize("abits", [1, 2, 4, 8]) diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py index c5ddad12ca3e8d353682fbb20449d44358485f69..77974dacb51aa8746ce33f9a490becd35390db5a 100644 --- a/tests/brevitas/test_brevitas_relu_act_export.py +++ b/tests/brevitas/test_brevitas_relu_act_export.py @@ -12,7 +12,7 @@ from finn.core.modelwrapper import ModelWrapper import finn.core.onnx_exec as oxe from finn.transformation.infer_shapes import InferShapes -export_onnx_path = "test_act.onnx" +export_onnx_path = "test_brevitas_relu_act_export.onnx" @pytest.mark.parametrize("abits", [1, 2, 4, 8]) diff --git a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py b/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py index d499f1517341477eca9915245da9ad12c346c5a9..e0ec82ebed44e2e984be9f62e02bc1721a7f9c33 100644 --- a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py +++ b/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py @@ -12,7 +12,7 @@ from finn.core.modelwrapper import ModelWrapper import finn.core.onnx_exec as oxe from finn.transformation.infer_shapes import InferShapes -export_onnx_path = "test_act.onnx" +export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx" @pytest.mark.parametrize("abits", [2, 4, 8]) diff --git a/tests/core/test_modelwrapper.py b/tests/core/test_modelwrapper.py index 5fa9b23bad5c5b67f65530c55f862f889c07b1ac..0fb7ae42f3bd556755f81a02be6c71fd73ffc519 100644 --- a/tests/core/test_modelwrapper.py +++ b/tests/core/test_modelwrapper.py @@ -36,7 +36,7 @@ import finn.core.data_layout as DataLayout from finn.core.modelwrapper import ModelWrapper from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_modelwrapper.onnx" def test_modelwrapper(): diff --git a/tests/custom_op/test_xnorpopcountmatmul.py b/tests/custom_op/test_xnorpopcountmatmul.py index 37d9b7e5968bdb70023be9b70515410e941f51ce..745b782d418129d96e21c327a49de04d53aa7c48 100644 --- a/tests/custom_op/test_xnorpopcountmatmul.py +++ b/tests/custom_op/test_xnorpopcountmatmul.py @@ -47,7 +47,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.sign_to_thres import ConvertSignToThres from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_xnorpopcountmatmul.onnx" def test_xnorpopcountmatmul(): diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..d09c64a1250f78604c1a0a362cf234712de2cf57 --- /dev/null +++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py @@ -0,0 +1,115 @@ +import pytest + +from onnx import TensorProto, helper + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode + +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.infer_shapes import InferShapes +import numpy as np + + +def prepare_inputs(input_tensor): + return {"inp": input_tensor} + + +def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape): + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, ishape) + p0 = helper.make_tensor_value_info("p0", TensorProto.FLOAT, pshape) + + model = helper.make_model( + helper.make_graph( + name="test", + inputs=[inp], + outputs=[outp], + value_info=[p0], + nodes=[helper.make_node(onnx_op_name, ["inp", "p0"], ["outp"])], + ) + ) + + model = ModelWrapper(model) + model.set_initializer("p0", gen_finn_dt_tensor(pdt, pshape)) + model.set_tensor_datatype("inp", idt) + model.transform(InferDataLayouts(), make_deepcopy=False) + model.transform(InferShapes(), make_deepcopy=False) + return model + + +# parameter datatype +@pytest.mark.parametrize("pdt", [DataType.BIPOLAR, DataType.UINT4, DataType.INT2]) +# input datatype +@pytest.mark.parametrize("idt", [DataType.INT32, DataType.UINT4, DataType.INT4]) +# function +@pytest.mark.parametrize("onnx_op_name", ["Add", "Mul"]) +# vector parameter or scalar parameter (broadcast) +@pytest.mark.parametrize("scalar_param", [True, False]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.vivado +@pytest.mark.slow +def test_convert_to_hls_channelwise_layer( + pdt, idt, onnx_op_name, scalar_param, exec_mode +): + ifm_ch = 16 + ifm_dim = 5 + ishape = (1, ifm_ch, ifm_dim, ifm_dim) + if scalar_param: + pshape = (1,) + else: + pshape = (1, ifm_ch, 1, 1) + + np.random.seed(0) + model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape) + + # Since the aren't Data types with a bit width of a non power of 2, + # there are cases where the input won't use it full range. + if idt == DataType.INT32: + x = gen_finn_dt_tensor(DataType.INT16, (1, ifm_ch, ifm_dim, ifm_dim)) + elif idt == DataType.UINT32: + x = gen_finn_dt_tensor(DataType.UINT16, (1, ifm_ch, ifm_dim, ifm_dim)) + else: + x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) + + input_dict = prepare_inputs(x) + y_expected = oxe.execute_onnx(model, input_dict)["outp"] + + new_model = model.transform(to_hls.InferChannelwiseLinearLayer()) + new_model = new_model.transform(GiveUniqueNodeNames()) + + if exec_mode == "cppsim": + new_model = new_model.transform(PrepareCppSim()) + new_model = new_model.transform(CompileCppSim()) + new_model = new_model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + new_model = new_model.transform(SetExecMode("rtlsim")) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) + new_model = new_model.transform(HLSSynthIP()) + new_model = new_model.transform(ReplaceVerilogRelPaths()) + new_model = new_model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + ctx_produced = oxe.execute_onnx( + new_model, input_dict, return_full_exec_context=True + ) + y_produced = ctx_produced["outp"] + + assert (y_produced == y_expected).all() + assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch" diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py index ee65326ec57fb7fa7fa0490a8980dbabb8efc13c..22c356a5869b25fcc7ae3ef0164ed61b53ef232c 100644 --- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py @@ -5,10 +5,15 @@ import pytest from finn.core.datatype import DataType from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_datatypes import InferDataTypes -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames -from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.util.basic import gen_finn_dt_tensor @@ -17,47 +22,40 @@ import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.custom_op.im2col import compute_conv_output_dim +# conv_config kernel_size,stride, pad -@pytest.mark.parametrize("padding", [True, False]) -@pytest.mark.parametrize("kernel_size", [3, 5]) + +@pytest.mark.parametrize( + "conv_config", [(1, 2, 0), (1, 3, 0), (3, 2, 1), (3, 1, 0), (3, 1, 1), (5, 2, 1)] +) +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.slow @pytest.mark.vivado -def test_convert_to_hls_conv_layer(padding, kernel_size): - - assert ( - kernel_size % 2 != 0 - ), """test_convert_to_hls_conv_layer test only - supports odd kernel_size""" - +def test_convert_to_hls_conv_layer(conv_config, exec_mode): + kernel_size, stride, pad = conv_config np.random.seed(0) - padding = True idt = DataType.UINT4 in_feature_dim = 7 - in_chn = 3 + in_chn = 16 + out_chn = 20 - stages = 1 # just one convolution - - out_feature_dim = ( - in_feature_dim if padding else in_feature_dim - (kernel_size // 2 * 2) * stages - ) + out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) input_shape = [1, in_chn, in_feature_dim, in_feature_dim] - output_shape = [1, in_chn, out_feature_dim, out_feature_dim] + output_shape = [1, out_chn, out_feature_dim, out_feature_dim] - conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size] + conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size] + conv_weight_dt = DataType.UINT4 conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [kernel_size, kernel_size] - if padding: - pad = kernel_size // 2 - conv_config["pads"] = [pad, pad, pad, pad] - else: - conv_config["pads"] = [0, 0, 0, 0] - conv_config["strides"] = [1, 1] + conv_config["pads"] = [pad, pad, pad, pad] + conv_config["strides"] = [stride, stride] top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) @@ -80,27 +78,35 @@ def test_convert_to_hls_conv_layer(padding, kernel_size): model = ModelWrapper(modelproto) model.set_tensor_datatype("top_in", idt) model.set_tensor_datatype("top_out", idt) - model.set_tensor_datatype("p1", DataType.UINT4) + model.set_tensor_datatype("p1", conv_weight_dt) + model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model = model.transform(InferShapes()) - model.set_initializer( - "p1", np.round(np.random.rand(*conv_param_shape).astype(np.float32) * 16) - ) - - model.set_tensor_datatype(model.graph.input[0].name, idt) - model = model.transform(InferShapes()) - model = model.transform(InferDataLayouts()) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) new_model = model.transform(LowerConvsToMatMul()) new_model = new_model.transform(to_hls.InferConvInpGen()) - new_model = new_model.transform(PrepareCppSim()) - new_model = new_model.transform(CompileCppSim()) - new_model = new_model.transform(SetExecMode("cppsim")) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(InferShapes()) + new_model = new_model.transform(InferDataTypes()) + + if exec_mode == "cppsim": + new_model = new_model.transform(PrepareCppSim()) + new_model = new_model.transform(CompileCppSim()) + new_model = new_model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + new_model = new_model.transform(SetExecMode("rtlsim")) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) + new_model = new_model.transform(HLSSynthIP()) + new_model = new_model.transform(ReplaceVerilogRelPaths()) + new_model = new_model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) + if kernel_size == 1 and stride > 1 and pad == 0: + assert new_model.graph.node[1].op_type == "DownSampler" diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py index 48803c9614f53a3a149c6eaac4289d10086513a5..20e3ee08d7ffdd013a89d26bb71d86ccc554a5b4 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py @@ -51,7 +51,7 @@ from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.custom_op.registry import getCustomOp -export_onnx_path_cnv = "test_output_cnv.onnx" +export_onnx_path_cnv = "test_convert_to_hls_layers_cnv.onnx" @pytest.mark.vivado diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py index e261a3114853bf24bdb4c931c46ff92eea4150dd..d77065ad9396d0cc8dd57a39ed823fffcb30ee47 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py @@ -52,8 +52,7 @@ from finn.transformation.streamline.round_thresholds import RoundAndClipThreshol from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_tfc.onnx" -export_onnx_path_cnv = "test_output_cnv.onnx" +export_onnx_path = "test_convert_to_hls_layers_fc.onnx" @pytest.mark.vivado diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..2ed352e28981552b186bb778b94dcbc07471e14b --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -0,0 +1,156 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from onnx import TensorProto, helper + +import finn.core.onnx_exec as oxe +from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) + + +def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs): + NumChannels = C.shape[0] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, vecs + [NumChannels]) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, vecs + [NumChannels] + ) + + node_inp_list = ["inp", "const"] + + node = helper.make_node( + "ChannelwiseOp_Batch", + node_inp_list, + ["outp"], + domain="finn", + backend="fpgadataflow", + NumChannels=NumChannels, + Func=func, + PE=pe, + inputDataType=idt.name, + outputDataType=odt.name, + paramDataType=pdt.name, + numInputVectors=vecs, + ) + graph = helper.make_graph(nodes=[node], name="graph", inputs=[inp], outputs=[outp]) + + model = helper.make_model(graph, producer_name="model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + model.set_tensor_datatype("const", idt) + model.set_initializer("const", C) + return model + + +# activation: None or DataType +@pytest.mark.parametrize("act", [DataType.INT8]) +# input datatype +@pytest.mark.parametrize("idt", [DataType.INT4]) +# param datatype +@pytest.mark.parametrize("pdt", [DataType.INT4]) +# folding, -1 is maximum possible +@pytest.mark.parametrize("nf", [-1, 2]) +# number of input features +@pytest.mark.parametrize("ich", [16]) +# vecs +@pytest.mark.parametrize("vecs", [[1], [1, 7, 7]]) +# function +@pytest.mark.parametrize("func", ["add", "mul"]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_mode): + if nf == -1: + nf = ich + pe = ich // nf + assert ich % pe == 0 + + # generate input and param data + x = gen_finn_dt_tensor(idt, tuple(vecs + [ich])) + # C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32) + C = gen_finn_dt_tensor(pdt, (ich)) + + odt = act + + model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs) + + if exec_mode == "cppsim": + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(HLSSynthIP()) + model = model.transform(ReplaceVerilogRelPaths()) + model = model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + # package input data as dictionary + input_dict = {"inp": x} + + oshape = model.get_tensor_shape("outp") + + C_reshaped = np.broadcast_to(C.flatten(), x.shape) + if func == "add": + y = x + C_reshaped + elif func == "mul": + y = x * C_reshaped + + y_expected = y.reshape(oshape) + # execute model + y_produced = oxe.execute_onnx(model, input_dict)["outp"] + + y_produced = y_produced.reshape(y_expected.shape) + + assert (y_produced == y_expected).all(), "cppsim failed" + + if exec_mode == "rtlsim": + hls_synt_res_est = model.analysis(hls_synth_res_estimation) + assert "ChannelwiseOp_Batch_0" in hls_synt_res_est diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index b830693c32afe629dd6fc70868d0bddacac4c887..a9f5bf5ffa1f816b82ef701800e92249056b7c74 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -54,6 +54,10 @@ from finn.util.basic import gen_finn_dt_tensor, pynq_part_map from finn.util.fpgadataflow import pyverilate_stitched_ip from finn.util.test import load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA +from finn.transformation.fpgadataflow.floorplan import Floorplan + test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") test_fpga_part = pynq_part_map[test_pynq_board] @@ -390,3 +394,19 @@ def test_fpgadataflow_ipstitch_remote_execution(): assert np.isclose(outp["outp"], x).all() except KeyError: pytest.skip("PYNQ board IP address not specified") + + +def test_fpgadataflow_ipstitch_iodma_floorplan(): + model = create_one_fc_model() + if model.graph.node[0].op_type == "StreamingDataflowPartition": + sdp_node = getCustomOp(model.graph.node[0]) + assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" + assert os.path.isfile(sdp_node.get_nodeattr("model")) + model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) + model = model.transform(InferDataLayouts()) + model = model.transform(InsertIODMA()) + model = model.transform(Floorplan()) + assert getCustomOp(model.graph.node[0]).get_nodeattr("partition_id") == 0 + assert getCustomOp(model.graph.node[1]).get_nodeattr("partition_id") == 2 + assert getCustomOp(model.graph.node[2]).get_nodeattr("partition_id") == 1 + model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_iodma_floorplan.onnx") diff --git a/tests/transformation/test_absorb_transp_into_flatten.py b/tests/transformation/test_absorb_transp_into_flatten.py new file mode 100644 index 0000000000000000000000000000000000000000..fbfa15277717c554da01e38608601997407803b2 --- /dev/null +++ b/tests/transformation/test_absorb_transp_into_flatten.py @@ -0,0 +1,99 @@ +import pytest + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +import finn.core.data_layout as DataLayout +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.absorb import AbsorbTransposeIntoFlatten +import finn.core.onnx_exec as oxe + +# permutation of transpose node +@pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]]) +# reshape or flatten +@pytest.mark.parametrize("shape", [None, [1, -1], [-1, 1]]) +# input shape +@pytest.mark.parametrize("ishape", [[1, 1, 1, 4], [2, 4, 1, 1], [1, 2, 2, 4]]) +# datalayout +@pytest.mark.parametrize("data_layout", ["NCHW", "NHWC"]) +def test_absorb_transp_into_flatten(perm, shape, ishape, data_layout): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + transp_node = helper.make_node("Transpose", ["inp"], ["transp_out"], perm=perm) + dummy_in = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) + if shape is None: + shape_node = helper.make_node("Flatten", ["transp_out"], ["outp"]) + dummy_in = dummy_in.transpose(tuple(perm)) + oshape = dummy_in.reshape(dummy_in.shape[0], -1).shape + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + shape0 = None + else: + shape0 = helper.make_tensor_value_info("shape0", TensorProto.FLOAT, shape) + shape_node = helper.make_node("Reshape", ["transp_out", "shape0"], ["outp"]) + oshape = dummy_in.transpose(tuple(perm)).reshape(tuple(shape)).shape + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + graph = helper.make_graph( + nodes=[transp_node, shape_node], + name="absorb-transpose-graph", + inputs=[inp], + outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="absorb_transpose_model") + model = ModelWrapper(model) + if shape is not None: + model.graph.value_info.append(shape0) + model.set_initializer("shape0", np.asarray(shape)) + if data_layout == "NCHW": + model.set_tensor_layout("inp", DataLayout.NCHW) + else: + model.set_tensor_layout("inp", DataLayout.NHWC) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model.save("test.onnx") + model_transformed = model.transform(AbsorbTransposeIntoFlatten()) + model_transformed.save("test2.onnx") + + # verify transformation + inp_values = np.random.uniform(low=-1, high=1, size=tuple(ishape)).astype( + np.float32 + ) + idict = {model.graph.input[0].name: inp_values} + assert oxe.compare_execution(model, model_transformed, idict) + + # only some of the parameter combinations lead to a graph that will be changed when + # AbsorbTransposeIntoFlatten is applied + + if shape == [-1, 1]: # not a flatten operation, so the graph will not be changed + assert model.graph == model_transformed.graph + + elif perm == [ + 3, + 2, + 0, + 1, + ]: # the first dimension is also part of the transpose operation + # so the graph will not be changed + assert model.graph == model_transformed.graph + + # the following cases are the ones in which the model is transformed + # because we tested the parameters shape and perm befire we can only consider ishape + # and data_layout (the transformed model should only contain a "Flatten" node) + elif ishape == [1, 1, 1, 4] and data_layout == "NHWC": + assert model_transformed.graph.node[0].op_type == "Flatten" + + elif ishape == [2, 4, 1, 1] and data_layout == "NCHW" and shape is None: + # If the first dimension of the input tensor is not 1, flatten and + # reshape (with shape = [1, -1]) would lead to different results + assert model_transformed.graph.node[0].op_type == "Flatten" + + # all other cases lead to an unchanged model + else: + assert model.graph == model_transformed.graph diff --git a/tests/transformation/test_conv_lowering.py b/tests/transformation/test_conv_lowering.py index 73891ded1b9691c7c48a2075ad6ca4668fcf6bfe..16c574b29b55e314b06661b28e4bb869bd6b7996 100644 --- a/tests/transformation/test_conv_lowering.py +++ b/tests/transformation/test_conv_lowering.py @@ -41,7 +41,7 @@ from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul from finn.transformation.double_to_single_float import DoubleToSingleFloat import finn.core.onnx_exec as oxe -export_onnx_path = "test_output_cnv.onnx" +export_onnx_path = "test_conv_lowering.onnx" def test_conv_lowering_cnv_w1a1(): diff --git a/tests/transformation/test_fold_constants.py b/tests/transformation/test_fold_constants.py index 685c14a98b9031096aaf5b244c4f484d4f308bca..a976ffd62bce744a474a6fac2a61a6478526777f 100644 --- a/tests/transformation/test_fold_constants.py +++ b/tests/transformation/test_fold_constants.py @@ -40,7 +40,7 @@ from finn.transformation.fold_constants import FoldConstants from finn.transformation.infer_shapes import InferShapes from finn.util.test import get_test_model_untrained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_fold_constants.onnx" def test_const_folding(): diff --git a/tests/transformation/test_infer_data_layouts.py b/tests/transformation/test_infer_data_layouts.py index fccc7813da6f98c8af4ade7ae562c99b32247a8b..d6d9920043114c78e970842aee5955e3150cf526 100644 --- a/tests/transformation/test_infer_data_layouts.py +++ b/tests/transformation/test_infer_data_layouts.py @@ -44,7 +44,7 @@ import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls from finn.transformation.infer_data_layouts import InferDataLayouts import finn.core.data_layout as DataLayout -export_onnx_path_cnv = "test_output_cnv.onnx" +export_onnx_path_cnv = "test_infer_data_layouts.onnx" def test_infer_data_layouts(): diff --git a/tests/transformation/test_infer_datatypes.py b/tests/transformation/test_infer_datatypes.py index e3db40289c4318894cf5ad41c2f67b3bff501db9..097ae03f6153843fbb7956a72b38431559d5d0f1 100644 --- a/tests/transformation/test_infer_datatypes.py +++ b/tests/transformation/test_infer_datatypes.py @@ -38,7 +38,7 @@ from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_shapes import InferShapes from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_infer_datatypes.onnx" def test_infer_datatypes(): diff --git a/tests/transformation/test_linear_past_eltwise.py b/tests/transformation/test_linear_past_eltwise.py index b77f59779a5e8559f80e017d13b66bcb67249830..4cff5e5e1d40986a006cc02186fce21a907c2ef1 100644 --- a/tests/transformation/test_linear_past_eltwise.py +++ b/tests/transformation/test_linear_past_eltwise.py @@ -41,7 +41,7 @@ from finn.transformation.double_to_single_float import DoubleToSingleFloat import pytest -export_onnx_path = "test_scalar_past_eltwise.onnx" +export_onnx_path = "test_linear_past_eltwise.onnx" # construct a synthetic graph to test: # topk insertion, topk conversion to hls, add conversion to hls diff --git a/tests/transformation/test_move_chw_add_past_conv.py b/tests/transformation/test_move_chw_add_past_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..b626f7e5b8564739ec383aaddfc262d642bf47cc --- /dev/null +++ b/tests/transformation/test_move_chw_add_past_conv.py @@ -0,0 +1,109 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from onnx import helper, TensorProto + +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline.reorder import MoveAddPastConv +from finn.custom_op.im2col import compute_conv_output_dim +import finn.core.onnx_exec as oxe + + +# input dimension +@pytest.mark.parametrize("idim", [4, 7]) +# kernel size +@pytest.mark.parametrize("k", [2, 3]) +# stride +@pytest.mark.parametrize("s", [1, 2]) +# input channels +@pytest.mark.parametrize("ich", [2, 4]) +# output channels +@pytest.mark.parametrize("och", [2, 3]) +def test_move_chw_add_past_conv(idim, k, s, ich, och): + odim = compute_conv_output_dim(idim, k, s) + + ishape = [1, ich, idim, idim] + oshape = [1, och, odim, odim] + add_param_shape = [1, ich, 1, 1] + conv_param_shape = [och, ich, k, k] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, add_param_shape) + a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, conv_param_shape) + + conv_config = {} + conv_config["dilations"] = [1, 1] + conv_config["group"] = 1 + conv_config["kernel_shape"] = [k, k] + conv_config["pads"] = [0, 0, 0, 0] + conv_config["strides"] = [s, s] + + add_node = helper.make_node("Add", ["inp", "a0"], ["add_out"]) + conv_node = helper.make_node("Conv", ["add_out", "a1"], ["outp"], **conv_config) + + model = helper.make_model( + helper.make_graph( + nodes=[add_node, conv_node], + name="move-add-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0, a1], + ) + ) + + model = ModelWrapper(model) + # initialize model + a0_values = np.random.uniform(low=0, high=1, size=tuple(add_param_shape)).astype( + np.float32 + ) + model.set_initializer("a0", a0_values) + a1_values = np.random.uniform(low=0, high=1, size=tuple(conv_param_shape)).astype( + np.float32 + ) + model.set_initializer("a1", a1_values) + + model = model.transform(InferShapes()) + + # execution before transformation + inp_values = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) + idict = {model.graph.input[0].name: inp_values} + odict = oxe.execute_onnx(model, idict) + y_before = odict[model.graph.output[0].name] + + model = model.transform(MoveAddPastConv()) + odict = oxe.execute_onnx(model, idict) + y_after = odict[model.graph.output[0].name] + + assert np.isclose(y_before, y_after).all() + assert model.graph.node[0].op_type == "Conv" + assert model.graph.node[1].op_type == "Add" diff --git a/tests/transformation/test_move_flatten_past_affine.py b/tests/transformation/test_move_flatten_past_affine.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d5e51613d41f3f2db3dabcef7b982ec2816b19 --- /dev/null +++ b/tests/transformation/test_move_flatten_past_affine.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +from finn.core.datatype import DataType +import finn.core.data_layout as DataLayout +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.reorder import MoveFlattenPastAffine +import finn.core.onnx_exec as oxe + +# data layout +@pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW]) +# batch size +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_move_flatten_past_affine(data_layout, batch_size): + if data_layout == DataLayout.NHWC: + ishape = [batch_size, 1, 1, 1024] + oshape = [batch_size, 1000] + else: + ishape = [batch_size, 1024, 1, 1] + oshape = [batch_size, 1000] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + a0 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, [1024, 1000]) + a1 = helper.make_tensor_value_info("a2", TensorProto.FLOAT, []) + a2 = helper.make_tensor_value_info("a3", TensorProto.FLOAT, [1000]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + flatten_node = helper.make_node("Flatten", ["inp"], ["flatten_out"]) + matmul_node = helper.make_node("MatMul", ["flatten_out", "a0"], ["matmul_out"]) + mul_node = helper.make_node("Mul", ["matmul_out", "a1"], ["mul_out"]) + add_node = helper.make_node("Add", ["mul_out", "a2"], ["outp"]) + + graph = helper.make_graph( + nodes=[flatten_node, matmul_node, mul_node, add_node], + name="move-reshape-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0, a1, a2], + ) + + model = helper.make_model(graph, producer_name="move_reshape_model") + model = ModelWrapper(model) + + # initialize values + a0_values = gen_finn_dt_tensor(DataType.TERNARY, [1024, 1000]) + model.set_initializer("a0", a0_values) + a1_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) + model.set_initializer("a1", a1_values) + a2_values = np.random.uniform(low=-1, high=1, size=(1000)).astype(np.float32) + model.set_initializer("a2", a2_values) + + model.set_tensor_datatype("inp", DataType.INT2) + model.set_tensor_layout("inp", data_layout) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + # compare execution before and after transformation + inp_values = gen_finn_dt_tensor(DataType.INT2, ishape) + idict = {model.graph.input[0].name: inp_values} + model_transformed = model.transform(MoveFlattenPastAffine()) + assert oxe.compare_execution(model, model_transformed, idict) + + # depending on data layout check if graph is transformed or not + if data_layout == DataLayout.NHWC: + # check if nodes have new order in transformed graph + assert model.graph != model_transformed.graph + assert model_transformed.graph.node[-1].op_type == "Flatten" + else: + assert model.graph == model_transformed.graph diff --git a/tests/transformation/test_move_flatten_past_topk.py b/tests/transformation/test_move_flatten_past_topk.py new file mode 100644 index 0000000000000000000000000000000000000000..65da92c22dbe9f6b1c5a49172ffae59fa6e98607 --- /dev/null +++ b/tests/transformation/test_move_flatten_past_topk.py @@ -0,0 +1,89 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +from finn.core.datatype import DataType +import finn.core.data_layout as DataLayout +from finn.util.basic import gen_finn_dt_tensor +from finn.transformation.insert_topk import InsertTopK +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.reorder import MoveFlattenPastTopK +import finn.core.onnx_exec as oxe + +# data layout +@pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW]) +# batch size +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_move_flatten_past_affine(data_layout, batch_size): + if data_layout == DataLayout.NHWC: + ishape = [batch_size, 1, 1, 1024] + oshape = [batch_size, 1024] + else: + ishape = [batch_size, 1024, 1, 1] + oshape = [batch_size, 1024] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + flatten_node = helper.make_node("Flatten", ["inp"], ["outp"]) + + graph = helper.make_graph( + nodes=[flatten_node], name="move-flatten-graph", inputs=[inp], outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="move_flatten_model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", DataType.INT2) + model.set_tensor_layout("inp", data_layout) + model = model.transform(InsertTopK()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + # compare execution before and after transformation + inp_values = gen_finn_dt_tensor(DataType.INT2, ishape) + idict = {model.graph.input[0].name: inp_values} + model_transformed = model.transform(MoveFlattenPastTopK()) + assert oxe.compare_execution(model, model_transformed, idict) + + # depending on data layout check if graph is transformed or not + if data_layout == DataLayout.NHWC: + # check if nodes have new order in transformed graph + assert model.graph != model_transformed.graph + assert model_transformed.graph.node[-1].op_type == "Flatten" + else: + assert model.graph == model_transformed.graph diff --git a/tests/transformation/test_move_scalar_past_conv.py b/tests/transformation/test_move_scalar_past_conv.py index 0f50642d2b9d1583030630cb4927c2b86667e71a..94fee7907d1ed1cccbf95520e903c7d9b43d8f7d 100644 --- a/tests/transformation/test_move_scalar_past_conv.py +++ b/tests/transformation/test_move_scalar_past_conv.py @@ -7,14 +7,14 @@ import finn.core.onnx_exec as ox from finn.core.modelwrapper import ModelWrapper from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import ( - MoveScalarAddPastConv, + MoveAddPastConv, MoveScalarMulPastConv, ) @pytest.mark.parametrize("padding", [False, True]) @pytest.mark.parametrize( - "test_args", [("Add", MoveScalarAddPastConv()), ("Mul", MoveScalarMulPastConv())], + "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())], ) def test_move_scalar_past_conv(test_args, padding): scalar_op = test_args[0] @@ -83,8 +83,8 @@ def test_move_scalar_past_conv(test_args, padding): assert new_model.graph.node[2].op_type == "Conv" else: assert new_model.graph.node[0].op_type == "Conv" - assert new_model.graph.node[1].op_type == scalar_op - assert new_model.graph.node[2].op_type == "Conv" + assert new_model.graph.node[1].op_type == "Conv" + assert new_model.graph.node[2].op_type == scalar_op else: assert new_model.graph.node[0].op_type == "Conv" assert new_model.graph.node[1].op_type == "Conv" @@ -92,7 +92,7 @@ def test_move_scalar_past_conv(test_args, padding): @pytest.mark.parametrize( - "test_args", [("Add", MoveScalarAddPastConv()), ("Mul", MoveScalarMulPastConv())], + "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())], ) def test_move_scalar_past_conv_only_if_linear(test_args): scalar_op = test_args[0] diff --git a/tests/transformation/test_move_transpose_past_scalar_mul.py b/tests/transformation/test_move_transpose_past_scalar_mul.py new file mode 100644 index 0000000000000000000000000000000000000000..e434fc7d4f683120176e18a2bfa9da99d9ee0b0e --- /dev/null +++ b/tests/transformation/test_move_transpose_past_scalar_mul.py @@ -0,0 +1,82 @@ +import pytest + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.modelwrapper import ModelWrapper +import finn.core.data_layout as DataLayout +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames +from finn.transformation.streamline.reorder import MoveTransposePastScalarMul +import finn.core.onnx_exec as oxe + +# permutation of transpose node +@pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]]) +# scalar mul +@pytest.mark.parametrize("scalar", [True, False]) +# data layout +@pytest.mark.parametrize("data_layout", [None, DataLayout.NHWC, DataLayout.NCHW]) +def test_move_transpose_past_scalar_mul(perm, scalar, data_layout): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 2, 3, 4]) + # to determine out_size we need to calculate with "perm" for this test case + dummy_in = np.random.uniform(low=0, high=1, size=(1, 2, 3, 4)).astype(np.float32) + out_size = dummy_in.transpose(tuple(perm)).shape + + if scalar is True: + a0_size = [] + else: + a0_size = out_size + a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, a0_size) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_size) + transp_node = helper.make_node("Transpose", ["inp"], ["transp_out"], perm=perm) + mul_node = helper.make_node("Mul", ["transp_out", "a0"], ["outp"]) + + graph = helper.make_graph( + nodes=[transp_node, mul_node], + name="mv-transpose-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0], + ) + + model = helper.make_model(graph, producer_name="mv_transpose_model") + model = ModelWrapper(model) + + # initialize values + a0_values = np.random.uniform(low=0, high=1, size=tuple(a0_size)).astype(np.float32) + model.set_initializer("a0", a0_values) + if data_layout is not None: + model.set_tensor_layout("inp", data_layout) + model = model.transform(InferDataLayouts()) + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + # compare execution before and after transformation + inp_values = np.random.uniform(low=0, high=1, size=(1, 2, 3, 4)).astype(np.float32) + idict = {model.graph.input[0].name: inp_values} + model_transformed = model.transform(MoveTransposePastScalarMul()) + assert oxe.compare_execution(model, model_transformed, idict) + + # check if order changed + if scalar is True and data_layout is not None: + assert model_transformed.graph.node[0] != model.graph.node[0] + assert model_transformed.graph.node[1] != model.graph.node[1] + assert model_transformed.graph.node[0].op_type == "Mul" + assert model_transformed.graph.node[1].op_type == "Transpose" + mul_input = model_transformed.graph.node[0].input[0] + mul_output = model_transformed.graph.node[0].output[0] + assert model_transformed.get_tensor_layout(mul_input) == data_layout + assert model_transformed.get_tensor_layout(mul_output) == data_layout + else: + assert model_transformed.graph.node[0] == model.graph.node[0] + assert model_transformed.graph.node[1] == model.graph.node[1] + if data_layout is not None: + mul_input = model_transformed.graph.node[1].input[0] + mul_output = model_transformed.graph.node[1].output[0] + assert model_transformed.get_tensor_layout(mul_input) != data_layout + assert model_transformed.get_tensor_layout(mul_output) != data_layout diff --git a/tests/transformation/test_sign_to_thres.py b/tests/transformation/test_sign_to_thres.py index b10840df37a695986e54c0bdaa68baa0538f90f2..a92f839e5f6ca8b45eadf939fa35973ac153e0b1 100644 --- a/tests/transformation/test_sign_to_thres.py +++ b/tests/transformation/test_sign_to_thres.py @@ -40,8 +40,7 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import ConvertSignToThres from finn.util.test import get_test_model_trained -export_onnx_path = "test_output_lfc.onnx" -transformed_onnx_path = "test_output_lfc_transformed.onnx" +export_onnx_path = "test_sign_to_thres.onnx" def test_sign_to_thres(): diff --git a/tests/transformation/test_topk_insert.py b/tests/transformation/test_topk_insert.py index 1af0f255d8fb1af8a6e571518f18d831aa71298b..a18e63384150f140cb63ec7b438283eb4797266c 100644 --- a/tests/transformation/test_topk_insert.py +++ b/tests/transformation/test_topk_insert.py @@ -18,7 +18,7 @@ from pkgutil import get_data import pytest -export_onnx_path = "test_output_lfc.onnx" +export_onnx_path = "test_topk_insert.onnx" @pytest.mark.parametrize("k", [1, 5, 10])