From 2c4718c883d5139f828f23e9e1020b9e137d77f6 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch <fepaje@mail.upb.de> Date: Wed, 5 May 2021 16:17:52 +0200 Subject: [PATCH] Basic support for flatten layer between conv and fc --- docker/Dockerfile.finn_dev | 2 +- docker/finn_entrypoint.sh | 2 +- .../fpgadataflow/insert_fifo.py | 14 +- src/finn/transformation/move_reshape.py | 73 +++- src/finn/transformation/streamline/absorb.py | 47 ++- .../test_convert_to_hls_conv_fc_transition.py | 337 ++++++++++++++++++ 6 files changed, 433 insertions(+), 42 deletions(-) create mode 100755 tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev index 4c8557b53..7875378ca 100644 --- a/docker/Dockerfile.finn_dev +++ b/docker/Dockerfile.finn_dev @@ -72,7 +72,7 @@ USER $UNAME # cloning dependency repos (as user) # finn-base -RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base +RUN git clone https://github.com/fpjentzsch/finn-base.git /workspace/finn-base # Brevitas RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas # CNPY diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index bd2338305..13939a2e4 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -12,7 +12,7 @@ gecho () { # checkout the correct dependency repo commits # the repos themselves are cloned in the Dockerfile -FINN_BASE_COMMIT=8908c6a3f6674c4fa790954bd41c23ee5bf053df +FINN_BASE_COMMIT=4b40ff84e7c9210325a11bf73b8b9142b776f94c BREVITAS_COMMIT=aff49758ec445d77c75721c7de3091a2a1797ca8 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 HLSLIB_COMMIT=2e49322d1bbc4969ca293843bda1f3f9c05456fc diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index c0ac1319d..f51e6212b 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -29,11 +29,15 @@ def _suitable_node(node): def _suitable_folded_shapes(ishape, oshape): - i_dummy = np.random.rand(*ishape) - o_dummy = np.random.rand(*oshape) - ishape_canonical = np.squeeze(i_dummy).shape - oshape_canonical = np.squeeze(o_dummy).shape - return ishape_canonical == oshape_canonical + matching_stream_width = ishape[-1] == oshape[-1] + matching_size = np.prod(ishape) == np.prod(oshape) + return matching_stream_width and matching_size + + # i_dummy = np.random.rand(*ishape) + # o_dummy = np.random.rand(*oshape) + # ishape_canonical = np.squeeze(i_dummy).shape + # oshape_canonical = np.squeeze(o_dummy).shape + # return ishape_canonical == oshape_canonical class InsertFIFO(Transformation): diff --git a/src/finn/transformation/move_reshape.py b/src/finn/transformation/move_reshape.py index cb8deaeec..87405590e 100644 --- a/src/finn/transformation/move_reshape.py +++ b/src/finn/transformation/move_reshape.py @@ -1,5 +1,6 @@ from finn.transformation.base import Transformation from finn.util.basic import get_by_name, is_finn_op +from finn.custom_op.registry import getCustomOp def _is_fpgadataflow_node(node): @@ -22,29 +23,67 @@ class RemoveCNVtoFCFlatten(Transformation): between two fpgadataflow nodes""" def apply(self, model): - graph = model.graph graph_modified = False for n in graph.node: - if n.op_type == "Reshape": - shape = model.get_initializer(n.input[1]) - if (shape == [1, -1]).all(): - producer = model.find_producer(n.input[0]) - if _is_fpgadataflow_node(producer) is True: - consumer = model.find_consumer(n.output[0]) - if _is_fpgadataflow_node(consumer) is True: - graph_modified = True - consumer.input[0] = n.input[0] - graph.node.remove(n) - elif producer.op_type == "Transpose": - transp_node = producer + if n.op_type == "Flatten": # re-add reshape + # shape = model.get_initializer(n.input[1]) + # if (shape == [1, -1]).all(): + producer = model.find_producer(n.input[0]) + if _is_fpgadataflow_node(producer) is True: + consumer = model.find_consumer(n.output[0]) + if _is_fpgadataflow_node(consumer) is True: + graph_modified = True + consumer.input[0] = n.input[0] + graph.node.remove(n) + elif producer.op_type == "Transpose": + transp_node = producer + + # check if transpose converts NHWC to NCHW + perms = list(get_by_name(transp_node.attribute, "perm").ints) + if perms == [0, 3, 1, 2]: + producer = model.find_producer(transp_node.input[0]) + if _is_fpgadataflow_node(producer) is True: consumer = model.find_consumer(n.output[0]) if _is_fpgadataflow_node(consumer) is True: - graph_modified = True - consumer.input[0] = transp_node.input[0] - graph.node.remove(n) - graph.node.remove(transp_node) + if consumer.op_type == "StreamingFCLayer_Batch": + fc_inst = getCustomOp(consumer) + mw = fc_inst.get_nodeattr("MW") + mh = fc_inst.get_nodeattr("MH") + (b, h, w, c) = model.get_tensor_shape( + transp_node.input[0] + ) + # absorb transpose into weight matrix, allowing FC layer to operate on the NHWC input + W = model.get_initializer(consumer.input[1]) + assert ( + W is not None + ), "Initializer for matmul weights is not set." + print("fc weights before") + print(W.shape) + print(W) + + W_new = W.reshape(c, h, w, mh) + W_new = W_new.transpose((1, 2, 0, 3)) + W_new = W_new.reshape(mw, mh) + + print("fc weights after") + print(W_new.shape) + print(W_new) + + model.set_initializer(consumer.input[1], W_new) + + # remove transpose & flatten nodes + graph_modified = True + consumer.input[0] = transp_node.input[0] + graph.node.remove(n) + graph.node.remove(transp_node) + else: + warnings.warn( + "Could not absorb transpose into node behind flatten layer" + ) + else: + warnings.warn("Unsupported transpose node before flatten layer") return (model, graph_modified) diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index fa2d7a714..979a69163 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -338,24 +338,35 @@ class AbsorbTransposeIntoMultiThreshold(Transformation): graph.node.remove(n) graph.node.remove(final_t_cand) graph_modified = True - elif final_t_cand.op_type == "Reshape": - oshape = model.get_tensor_shape(final_t_cand.output[0]) - if len(oshape) == 2: - # transition to FC part, can still use NHWC - mt = getCustomOp(mt_cand) - mt.set_nodeattr("data_layout", "NHWC") - # get rid of first tranpose node - mt_cand.input[0] = n.input[0] - # fix output shape for MultiThreshold - mt_ishape = model.get_tensor_shape(mt_cand.input[0]) - (b, h, w, c) = mt_ishape - assert ( - h == 1 and w == 1 - ), """Untested spatial dim - in conv->fc transition, proceed with caution!""" - model.set_tensor_shape(mt_cand.output[0], mt_ishape) - graph.node.remove(n) - graph_modified = True + elif final_t_cand.op_type == "Flatten": # TODO: re-add reshape + # oshape = model.get_tensor_shape(final_t_cand.output[0]) + # if len(oshape) == 2: + # transition to FC part, can still use NHWC + mt = getCustomOp(mt_cand) + mt.set_nodeattr("data_layout", "NHWC") + # get rid of first tranpose node + mt_cand.input[0] = n.input[0] + # fix output shape for MultiThreshold + mt_ishape = model.get_tensor_shape(mt_cand.input[0]) + (b, h, w, c) = mt_ishape + # assert ( + # h == 1 and w == 1 + # ), """Untested spatial dim + # in conv->fc transition, proceed with caution!""" + model.set_tensor_shape(mt_cand.output[0], mt_ishape) + + graph.node.remove(n) + transpose_output = model.make_new_valueinfo_name() + new_transpose = oh.make_node( + "Transpose", + [mt_cand.output[0]], + [transpose_output], + perm=[0, 3, 1, 2], + ) + graph.node.insert(node_ind + 1, new_transpose) + final_t_cand.input[0] = transpose_output + + graph_modified = True if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified) diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py new file mode 100755 index 000000000..f0b9758d9 --- /dev/null +++ b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py @@ -0,0 +1,337 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from onnx import TensorProto, helper +import numpy as np +import pytest + +from finn.core.datatype import DataType +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul + +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +import finn.core.onnx_exec as oxe +from finn.core.modelwrapper import ModelWrapper +from finn.util.basic import gen_finn_dt_tensor +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls + +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.custom_op.general.im2col import compute_conv_output_dim +from finn.custom_op.registry import getCustomOp +from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer + +import finn.transformation.streamline.absorb as absorb +from finn.transformation.fold_constants import FoldConstants +from finn.transformation.general import ( + ApplyConfig, + GiveReadableTensorNames, + RemoveUnusedTensors, + RemoveStaticGraphInputs, +) +from finn.transformation.streamline import Streamline +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.move_reshape import RemoveCNVtoFCFlatten +from finn.transformation.streamline.reorder import ( + MakeMaxPoolNHWC, + MoveScalarLinearPastInvariants, +) + +import finn.core.data_layout as DataLayout + + +def get_multithreshold_rand_params(channels, num_of_thres, seed=None): + if seed is not None: + np.random.seed(seed) + steps = np.random.rand(channels, 1) * 20 + bias = np.random.rand(channels, 1) * -10 + thres = [np.arange(num_of_thres) for chn in range(channels)] + thres = ((thres + bias) * steps).astype(np.float32) + thres = np.round(thres) + return thres + + +# conv_config kernel_size,stride, pad + + +# @pytest.mark.parametrize( +# "conv_config", [(1, 2, 0), (1, 3, 0), (3, 2, 1), (3, 1, 0), (3, 1, 1), (5, 2, 1)] +# ) +# @pytest.mark.parametrize("depthwise", [False, True]) +# @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.parametrize("conv_config", [(3, 1, 1)]) +@pytest.mark.parametrize("depthwise", [False]) +@pytest.mark.parametrize("exec_mode", ["cppsim"]) +@pytest.mark.slow +@pytest.mark.vivado +def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, exec_mode): + kernel_size, stride, pad = conv_config + np.random.seed(0) + idt = DataType.UINT4 + odt = DataType.UINT4 + + in_feature_dim = 2 + in_chn = 4 + fc_filters = 8 + + if depthwise is True: + group = out_chn = in_chn + conv_param_shape = [out_chn, 1, kernel_size, kernel_size] + else: + group = 1 + out_chn = 4 + conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size] + + total_pad = 2 * pad + out_feature_dim = compute_conv_output_dim( + in_feature_dim, kernel_size, stride, total_pad + ) + + input_shape = [1, in_chn, in_feature_dim, in_feature_dim] + conv_output_shape = [1, out_chn, out_feature_dim, out_feature_dim] + output_shape = [1, fc_filters] + + fc_param_shape = [out_chn * out_feature_dim * out_feature_dim, fc_filters] + + conv_weight_dt = DataType.INT4 + fc_weight_dt = DataType.INT4 + + conv_config = {} + conv_config["dilations"] = [1, 1] + conv_config["group"] = group + conv_config["kernel_shape"] = [kernel_size, kernel_size] + conv_config["pads"] = [pad, pad, pad, pad] + conv_config["strides"] = [stride, stride] + + global_in = helper.make_tensor_value_info( + "global_in", TensorProto.FLOAT, input_shape + ) + global_out = helper.make_tensor_value_info( + "global_out", TensorProto.FLOAT, output_shape + ) + value_info = [ + helper.make_tensor_value_info( + "conv_param", TensorProto.FLOAT, conv_param_shape + ), + helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)), + helper.make_tensor_value_info( + "matmul_param", TensorProto.FLOAT, fc_param_shape + ), + helper.make_tensor_value_info( + "thres2_param", TensorProto.FLOAT, (fc_filters, 15) + ), + ] + + modelproto = helper.make_model( + helper.make_graph( + name="test", + inputs=[global_in], + outputs=[global_out], + value_info=value_info, + nodes=[ + helper.make_node( + "Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config + ), + helper.make_node( + "MultiThreshold", + ["conv_out", "thres1_param"], + ["thres1_out"], + domain="finn.custom_op.general", + out_dtype="UINT4", + # out_bias=-7, + # out_scale=1.0 + ), + helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1), + helper.make_node( + "MatMul", ["flatten_out", "matmul_param"], ["matmul_out"] + ), + helper.make_node( + "MultiThreshold", + ["matmul_out", "thres2_param"], + ["global_out"], + domain="finn.custom_op.general", + out_dtype="UINT4", + # out_bias=-7, + # out_scale=1.0 + ), + ], + ) + ) + + model = ModelWrapper(modelproto) + model.set_tensor_datatype("global_in", idt) + model.set_tensor_layout("global_in", DataLayout.NCHW) + model.set_tensor_datatype("global_out", odt) + model.set_tensor_datatype("conv_param", conv_weight_dt) + model.set_tensor_datatype("matmul_param", fc_weight_dt) + model.set_tensor_datatype("thres1_param", DataType.INT32) + model.set_tensor_datatype("thres2_param", DataType.INT32) + model.set_tensor_datatype( + "flatten_out", DataType.UINT4 + ) # TODO: not inferred automatically (FLOAT32) + model.set_initializer( + "conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape) + ) + model.set_initializer( + "thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0) + ) + model.set_initializer( + "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0) + ) + model.set_initializer( + "matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape) + ) + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + + model.save("testmodel_in.onnx") + + x = gen_finn_dt_tensor(idt, input_shape) + inp_dict = {model.graph.input[0].name: x} + output = oxe.execute_onnx(model, inp_dict) + print(output) + + # streamlining step + model = model.transform(MoveScalarLinearPastInvariants()) + model = model.transform(Streamline()) + model = model.transform(LowerConvsToMatMul()) + model = model.transform(MakeMaxPoolNHWC()) + model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) + model = model.transform(Streamline()) + + model = model.transform(InferDataLayouts()) + model = model.transform(RemoveUnusedTensors()) + + model.save("testmodel_streamlined.onnx") + + output = oxe.execute_onnx(model, inp_dict) + print(output) + + # convert_to_hls step + model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) + model = model.transform(to_hls.InferThresholdingLayer()) + model = model.transform(to_hls.InferConvInpGen()) + model = model.transform(to_hls.InferStreamingMaxPool()) + model = model.transform(RemoveCNVtoFCFlatten()) + model = model.transform(absorb.AbsorbConsecutiveTransposes()) + + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(InferDataLayouts()) + + if exec_mode == "cppsim": + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(HLSSynthIP()) + model = model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + model.save("testmodel_hls.onnx") + + output = oxe.execute_onnx(model, inp_dict) + print(output) + + model_orig = ModelWrapper("testmodel_in.onnx") + model_hls = ModelWrapper("testmodel_hls.onnx") + + assert oxe.compare_execution(model_orig, model_hls, inp_dict) + + +""" + new_model = model.transform(LowerConvsToMatMul()) + new_model = new_model.transform(to_hls.InferConvInpGen()) + if depthwise is True: + new_model = new_model.transform(to_hls.InferVVAU()) + else: + new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) + fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] + fc_inst = getCustomOp(fc_node) + mw = fc_inst.get_nodeattr("MW") + mh = fc_inst.get_nodeattr("MH") + pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1))) + simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1))) + fc_inst.set_nodeattr("PE", pe_cands[0]) + fc_inst.set_nodeattr("SIMD", simd_cands[0]) + + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(InferShapes()) + new_model = new_model.transform(InferDataTypes()) + + if exec_mode == "cppsim": + new_model = new_model.transform(PrepareCppSim()) + new_model = new_model.transform(CompileCppSim()) + new_model = new_model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + new_model = new_model.transform(SetExecMode("rtlsim")) + new_model = new_model.transform(GiveUniqueNodeNames()) + new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) + new_model = new_model.transform(HLSSynthIP()) + new_model = new_model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + x = gen_finn_dt_tensor(idt, input_shape) + inp_dict = {model.graph.input[0].name: x} + assert oxe.compare_execution(model, new_model, inp_dict) + if kernel_size == 1 and stride > 1 and pad == 0: + assert new_model.graph.node[1].op_type == "DownSampler" + if exec_mode == "rtlsim": + node = new_model.get_nodes_by_op_type("DownSampler")[0] + inst = getCustomOp(node) + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) + exp_cycles = exp_cycles_dict[node.name] + assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) + assert exp_cycles != 0 + + if pad == 1: + padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0] + padding_inst = getCustomOp(padding_node) + assert padding_inst.get_nodeattr("SIMD") == in_chn + + if depthwise is True and exec_mode == "rtlsim": + node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] + inst = getCustomOp(node) + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) + exp_cycles = exp_cycles_dict[node.name] + assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) + assert exp_cycles != 0 + """ -- GitLab