diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index 6436eaad196ab50e76f8f27f31ea0c32698bd171..6fe6e97dfc2f46a150de60011ee715dcb895a9c7 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -35,6 +35,7 @@ from finn.transformation import Transformation from finn.custom_op.registry import getCustomOp from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.general import SortGraph import finn.core.data_layout as DataLayout from finn.util.onnx import nchw_to_nhwc import warnings @@ -693,6 +694,166 @@ class InferThresholdingLayer(Transformation): return (model, graph_modified) +class InferAddStreamsLayer(Transformation): + """Convert any Add into a AddStreams HLS layer.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "Add": + in0 = node.input[0] + in1 = node.input[1] + result = node.output[0] + in0_shape = model.get_tensor_shape(in0) + in1_shape = model.get_tensor_shape(in1) + + # skip if different shapes on inputs + if in0_shape != in1_shape: + continue + + idt0 = model.get_tensor_datatype(in0) + idt1 = model.get_tensor_datatype(in1) + + # skip if different data types on inputs + if idt0 != idt1: + continue + + idt = idt0 + + # skip conversion for layers with float input + if not idt.is_integer(): + continue + + # check layout and convert if necessary + in0_layout = model.get_tensor_layout(in0) + in1_layout = model.get_tensor_layout(in1) + result_layout = model.get_tensor_layout(result) + + if in0_layout == DataLayout.NCHW: + in0 = nchw_to_nhwc(in0, model, node_ind) + node_ind += 1 + in0_shape = model.get_tensor_shape(in0) + + if in1_layout == DataLayout.NCHW: + in1 = nchw_to_nhwc(in1, model, node_ind) + node_ind += 1 + in1_shape = model.get_tensor_shape(in1) + + # keep track of where we need to insert the HLS Op + # it has to be ahead of the output transform + insert_point = node_ind + + if result_layout == DataLayout.NCHW: + result = nchw_to_nhwc(result, model, node_ind, reverse=True) + node_ind += 1 + + # now safe to assume num_channels is size of last dimension + num_channels = int(in0_shape[-1]) + # create node with no parallelization first + pe = 1 + assert ( + num_channels % pe == 0 + ), "Requirement Channels divisable by PE is violated." + + # create and insert new StreamingFCLayer node + new_node = helper.make_node( + "AddStreams_Batch", + [in0, in1], + [result], + domain="finn", + backend="fpgadataflow", + NumChannels=num_channels, + PE=pe, + inputDataType=idt.name, + numInputVectors=in0_shape[:-1], + ) + graph.node.insert(insert_point, new_node) + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + +class InferDuplicateStreamsLayer(Transformation): + """Insert a DuplicateStreams HLS layer for any tensor with fanout == 2 """ + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + successors = model.find_consumers(node.output[0]) + if successors is not None and len(successors) == 2: + output_tensor = node.output[0] + + dt = model.get_tensor_datatype(output_tensor) + + # skip conversion for layers with float input + if not dt.is_integer(): + continue + + # create clone tensors + out_shape = model.get_tensor_shape(output_tensor) + out_tensor_clones = [] + for i in range(2): + clone = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(clone) + out_tensor_clones += [clone.name] + + num_ch = int(out_shape[-1]) + vecs = out_shape[:-1] + + # create node with no parallelization first + pe = 1 + assert ( + num_ch % pe == 0 + ), "Requirement channels divisable by PE is violated." + + dup_node = helper.make_node( + "DuplicateStreams_Batch", + [output_tensor], + out_tensor_clones, + domain="finn", + backend="fpgadataflow", + NumChannels=num_ch, + PE=pe, + inputDataType=dt.name, + numInputVectors=vecs, + ) + + graph.node.insert(node_ind, dup_node) + + # connect successors to out tensor clone + clone_idx = 0 + for successor in successors: + for i, succ_input in enumerate(successor.input): + if succ_input == output_tensor: + successor.input[i] = out_tensor_clones[clone_idx] + clone_idx += 1 + # if one node has multiple connections to the same output + # find_direct_successors will return one node per input + # so break the inner loop will result in correct behaviour + break + + graph_modified = True + + if graph_modified: + model = model.transform(SortGraph()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class InferChannelwiseLinearLayer(Transformation): """Convert any channel-wise Add/Mul into a HLS layer.""" @@ -848,6 +1009,64 @@ class InferChannelwiseLinearLayer(Transformation): return (model, graph_modified) +class InferLabelSelectLayer(Transformation): + """Convert any TopK into a LabelSelect HLS layer.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "TopK": + fc_input = node.input[0] + k_input = node.input[1] + val_output = node.output[0] + idx_output = node.output[1] + fc_in_shape = model.get_tensor_shape(fc_input) + + idt = model.get_tensor_datatype(fc_input) + + # skip conversion for layers with float input + if not idt.is_integer(): + continue + + # skip conversion for if value output is connected (not supported) + if model.find_consumer(val_output) is not None: + continue + + num_labels = int(fc_in_shape[-1]) + # create node with no parallelization first + pe = 1 + assert ( + num_labels % pe == 0 + ), "Requirement Labels divisable by PE is violated." + + k = model.get_initializer(k_input)[0] + + # create and insert new StreamingFCLayer node + new_node = helper.make_node( + "LabelSelect_Batch", + [fc_input], + [idx_output], + domain="finn", + backend="fpgadataflow", + Labels=num_labels, + PE=pe, + K=k, + inputDataType=idt.name, + ) + graph.node.insert(node_ind, new_node) + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class InferGlobalAccPoolLayer(Transformation): """Convert any GlobalAveragePool into a GlobalAccPool HLS layer and a scalar Mul.""" diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py new file mode 100644 index 0000000000000000000000000000000000000000..9d861929f3d421c431a27ccac5d513938aa7d726 --- /dev/null +++ b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py @@ -0,0 +1,232 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import numpy as np + +from onnx import TensorProto, helper + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.fold_constants import FoldConstants +from finn.transformation.general import ( + GiveReadableTensorNames, + GiveUniqueNodeNames, + SortGraph, +) +from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.util.basic import gen_finn_dt_tensor +from finn.util.test import soft_verify_topk +from finn.transformation.double_to_single_float import DoubleToSingleFloat +from finn.transformation.insert_topk import InsertTopK +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.streamline.absorb import ( + AbsorbScalarMulIntoTopK, + AbsorbConsecutiveTransposes, +) +from finn.transformation.streamline.collapse_repeated import ( + CollapseRepeatedMul, + CollapseRepeatedAdd, +) +from finn.transformation.streamline.reorder import MoveAddPastMul + +import pytest + +export_onnx_path = "test_output_synthetic.onnx" + +# construct a synthetic graph to test: +# topk insertion, topk conversion to hls, add conversion to hls +# graph should just be a sum + + +def make_model(ch, ifmdim): + shape = [1, ch, ifmdim, ifmdim] + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) + inp1_add0_ct = helper.make_tensor_value_info("inp1_add0_ct", TensorProto.FLOAT, [1]) + inp1_add = helper.make_tensor_value_info("inp1_add", TensorProto.FLOAT, shape) + inp1_add_ct = helper.make_tensor_value_info("inp1_add_ct", TensorProto.FLOAT, [1]) + inp2_add = helper.make_tensor_value_info("inp2_add", TensorProto.FLOAT, shape) + inp2_add_ct = helper.make_tensor_value_info("inp2_add_ct", TensorProto.FLOAT, [1]) + inp1_mul = helper.make_tensor_value_info("inp1_mul", TensorProto.FLOAT, shape) + inp1_mul_ct = helper.make_tensor_value_info("inp1_mul_ct", TensorProto.FLOAT, [1]) + inp2_mul = helper.make_tensor_value_info("inp2_mul", TensorProto.FLOAT, shape) + inp2_mul_ct = helper.make_tensor_value_info("inp2_mul_ct", TensorProto.FLOAT, [1]) + eltwise_add = helper.make_tensor_value_info("eltwise_add", TensorProto.FLOAT, shape) + pool = helper.make_tensor_value_info("pool", TensorProto.FLOAT, [1, ch, 1, 1]) + reshape_ct = helper.make_tensor_value_info("reshape_ct", TensorProto.INT64, [2]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ch]) + + add0_node = helper.make_node("Add", [inp.name, inp1_add0_ct.name], ["out_add0"]) + add1_node = helper.make_node("Add", ["out_add0", inp1_add_ct.name], [inp1_add.name]) + add2_node = helper.make_node("Add", ["out_add0", inp2_add_ct.name], [inp2_add.name]) + mul1_node = helper.make_node( + "Mul", [inp1_add.name, inp1_mul_ct.name], [inp1_mul.name] + ) + mul2_node = helper.make_node( + "Mul", [inp2_add.name, inp2_mul_ct.name], [inp2_mul.name] + ) + eltwise_add_node = helper.make_node( + "Add", [inp1_mul.name, inp2_mul.name], [eltwise_add.name] + ) + globalavgpool_node = helper.make_node( + "GlobalAveragePool", [eltwise_add.name], [pool.name] + ) + reshape_node = helper.make_node( + "Reshape", [pool.name, reshape_ct.name], [outp.name] + ) + + graph = helper.make_graph( + nodes=[ + add0_node, + add1_node, + add2_node, + mul1_node, + mul2_node, + eltwise_add_node, + globalavgpool_node, + reshape_node, + ], + name="graph", + inputs=[inp], + outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="add-model") + model = ModelWrapper(model) + + # set initializers for scalar add/mul nodes + model.set_initializer(add0_node.input[1], np.array([0.0])) + model.set_initializer(add1_node.input[1], np.array([7.0])) + model.set_initializer(add2_node.input[1], np.array([8.0])) + model.set_initializer(mul1_node.input[1], np.array([2.0])) + model.set_initializer(mul2_node.input[1], np.array([2.0])) + model.set_initializer(reshape_node.input[1], np.array([1, -1])) + + return model + + +# data types +@pytest.mark.parametrize("idt", [DataType.UINT2]) +# channels +@pytest.mark.parametrize("ch", [16]) +# ifmdim +@pytest.mark.parametrize("ifmdim", [5]) +@pytest.mark.vivado +@pytest.mark.slow +def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt): + model = make_model(ch, ifmdim) + model.save(export_onnx_path) + model = ModelWrapper(export_onnx_path) + model = model.transform(DoubleToSingleFloat()) + model = model.transform(InferShapes()) + model = model.transform(FoldConstants()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(InferDataLayouts()) + # model.save("golden.onnx") + # generate test vectors of correct shape + if ifmdim == -1: + input_tensor_shape = (1, ch) + else: + input_tensor_shape = (1, ch, ifmdim, ifmdim) + + x = gen_finn_dt_tensor(idt, input_tensor_shape) + + # generate expected value from streamlined net + input_dict = {model.graph.input[0].name: x} + + output_dict = oxe.execute_onnx(model, input_dict, True) + produced_sum = output_dict[model.graph.output[0].name] + chw_mul = model.get_initializer(model.graph.node[-1].input[1]) + chw_mul = 1 + expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0), axis=(2, 3)) / (ifmdim * ifmdim) + assert (produced_sum.flatten() == expected_sum.flatten()).all() + + model = model.transform(InferDataLayouts()) + + # convert to hls + model.set_tensor_datatype(model.graph.input[0].name, idt) + # extra streamlining + model = model.transform(MoveScalarLinearPastInvariants()) + model = model.transform(MoveAddPastMul()) + model = model.transform(CollapseRepeatedMul()) + model = model.transform(CollapseRepeatedAdd()) + # insert top-k node, which should absorb linear ops before it + + model = model.transform(InferShapes()) + model = model.transform(InferDataLayouts()) + model = model.transform(InferDataTypes()) + + model = model.transform(to_hls.InferChannelwiseLinearLayer()) + model = model.transform(to_hls.InferAddStreamsLayer()) + model = model.transform(to_hls.InferGlobalAccPoolLayer()) + model = model.transform(MoveScalarLinearPastInvariants()) + model = model.transform(InsertTopK()) + model = model.transform(AbsorbScalarMulIntoTopK()) + model = model.transform(InferDataTypes()) + model = model.transform(to_hls.InferLabelSelectLayer()) + model = model.transform(AbsorbConsecutiveTransposes()) + model = model.transform(InferDataTypes()) + model = model.transform(to_hls.InferLabelSelectLayer()) + model = model.transform(to_hls.InferDuplicateStreamsLayer()) + + model = model.transform(SortGraph()) + + # model.save("golden_hls.onnx") + # check topology status + + finn_nodes = model.get_finn_nodes() + assert len(finn_nodes) == 9 + add_nodes = model.get_nodes_by_op_type("AddStreams_Batch") + assert len(add_nodes) == 1 + pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch") + assert len(pool_nodes) == 1 + label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch") + assert len(label_nodes) == 1 + channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch") + assert len(channelwise_nodes) == 5 + dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch") + assert len(dup_nodes) == 1 + + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + + output_dict = oxe.execute_onnx(model, input_dict, True) + produced_topk_hls = output_dict[model.graph.output[0].name] + topk_input = output_dict[model.graph.node[-1].input[0]] + assert soft_verify_topk(topk_input, produced_topk_hls, 5) + + os.remove(export_onnx_path) diff --git a/tests/transformation/test_topk_insert.py b/tests/transformation/test_topk_insert.py index a18e63384150f140cb63ec7b438283eb4797266c..b85ed4aa6999faf751e535c1cc687d639c4eb74f 100644 --- a/tests/transformation/test_topk_insert.py +++ b/tests/transformation/test_topk_insert.py @@ -1,4 +1,4 @@ -import os +# import os import onnx from finn.util.test import get_test_model_trained import brevitas.onnx as bo @@ -57,4 +57,4 @@ def test_topk_insert(k): output_pysim_topk = output_pysim_topk.astype(np.int).flatten() assert np.array_equal(output_golden_topk, output_pysim_topk) - os.remove(export_onnx_path) + # os.remove(export_onnx_path)