diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..7591f09d8d0cd1847672fe5aa09616ff1571033d --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py @@ -0,0 +1,331 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +import numpy as np + +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow import HLSCustomOp +from onnx import TensorProto, helper +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class LabelSelect_Batch(HLSCustomOp): + """Class that corresponds to finn-hlslib LabelSelect_Batch function.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + "Labels": ("i", True, 0), + "PE": ("i", True, 0), + "K": ("i", True, 0), + # FINN DataTypes for input + "inputDataType": ("s", True, ""), + # number of input vectors, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_normal_input_shape(self): + nlabels = self.get_nodeattr("Labels") + vecs = list(self.get_nodeattr("numInputVectors")) + ishape = tuple(vecs + [nlabels]) + return ishape + + def get_folded_input_shape(self): + nlabels = self.get_nodeattr("Labels") + pe = self.get_nodeattr("PE") + vecs = list(self.get_nodeattr("numInputVectors")) + assert nlabels % pe == 0, "PE must divide Labels" + assert pe == 1, "LabelSelect currently fails with folding" + folds = int(nlabels / pe) + folded_ishape = tuple(vecs + [folds, pe]) + return folded_ishape + + def get_normal_output_shape(self): + k = self.get_nodeattr("K") + vecs = list(self.get_nodeattr("numInputVectors")) + oshape = tuple(vecs + [k]) + return oshape + + def get_folded_output_shape(self): + k = self.get_nodeattr("K") + vecs = list(self.get_nodeattr("numInputVectors")) + oshape = tuple(vecs + [k, 1]) + return oshape + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpect input shape." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.int64) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.INT64, + dims=values.shape, + vals=values.flatten(), + ), + ) + + def infer_node_datatype(self, model): + # currently set to uint32 to be compatible with hlslib + # enhancement: consider finding smallest power-of-two int for reduced output bandwidth + model.set_tensor_datatype(self.onnx_node.output[0], DataType.UINT32) + + def verify_node(self): + info_messages = [] + # verify that "domain" is set to "finn" + domain_value = self.onnx_node.domain + if domain_value == "finn": + info_messages.append("Attribute domain is set correctly") + else: + info_messages.append('Attribute domain should be set to "finn"') + + # verify that "backend" is set to "fpgadataflow" + backend_value = self.get_nodeattr("backend") + if backend_value == "fpgadataflow": + info_messages.append("Attribute backend is set correctly") + else: + info_messages.append('Attribute backend should be set to "fpgadataflow"') + + # verify that all necessary attributes exist + try: + self.get_nodeattr("code_gen_dir_cppsim") + self.get_nodeattr("executable_path") + self.get_nodeattr("Labels") + self.get_nodeattr("PE") + self.get_nodeattr("K") + self.get_nodeattr("inputDataType") + info_messages.append("All necessary attributes exist") + except Exception: + info_messages.append( + """The required LabelSelect_Batch attributes do not exist.""" + ) + + # verify that input data is 1D + if len(self.get_nodeattr("numInputVectors")) > 1: + info_messages.append("""LabelSelect_Batch requires 1D data input.""") + raise Exception + + return info_messages + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + ret = DataType[self.get_nodeattr("inputDataType")] + assert ret.signed() is False, "LabelSelect is currently broken for signed inputs" + return ret + + def get_output_datatype(self): + """Returns FINN DataType of output.""" + return DataType.UINT32 + + def get_instream_width(self): + """Returns input stream width.""" + ibits = self.get_input_datatype().bitwidth() + pe = self.get_nodeattr("PE") + in_width = pe * ibits + return in_width + + def get_outstream_width(self): + """Returns output stream width.""" + return self.get_output_datatype().bitwidth() + + def get_number_output_values(self): + return self.get_nodeattr("K") + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() + folded_oshape = self.get_folded_output_shape() + + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert inp.shape == exp_ishape, """Input shape doesn't match expected shape .""" + export_idt = self.get_input_datatype() + # reshape input into folded form + inp = inp.reshape(folded_ishape) + # make copy before saving array + reshaped_input = inp.copy() + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + assert ( + context[node.output[0]].shape == folded_oshape + ), "cppsim \ + did not produce expected ofolded utput shape" + context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output shape doesn't match expected shape.""" + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "maxpool.h"'] + + def defines(self, var): + self.code_gen_dict["$DEFINES$"] = [] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + node = self.onnx_node + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<{}, {}, {}, {}, ap_uint<32>> (in0, out, 1);""".format( + node.op_type, + self.get_nodeattr("Labels"), + self.get_nodeattr("PE"), + self.get_nodeattr("K"), + self.get_input_datatype().get_hls_datatype_str(), + ) + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + oshape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + """void {}(hls::stream<ap_uint<{}*{}>> &in0, + hls::stream<ap_uint<32>> &out)""".format( + self.onnx_node.name, + self.get_nodeattr("PE"), + self.get_input_datatype().bitwidth(), + ) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py index e72cc5b1a23f04bae978c6f32ba7e45760824349..514b0bf62285faaf32c7baa063bbab3622e280f0 100644 --- a/src/finn/custom_op/registry.py +++ b/src/finn/custom_op/registry.py @@ -44,6 +44,7 @@ from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import ( StreamingDataWidthConverter_Batch, ) from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch +from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch # create a mapping of all known CustomOp names and classes custom_op = {} @@ -60,6 +61,7 @@ custom_op["MaxPoolNHWC"] = MaxPoolNHWC custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch custom_op["StreamingFIFO"] = StreamingFIFO custom_op["GlobalAccPool_Batch"] = GlobalAccPool_Batch +custom_op["LabelSelect_Batch"] = LabelSelect_Batch def getCustomOp(node): diff --git a/src/finn/util/test.py b/src/finn/util/test.py index f29af66f52e18f8aeed87e19bfb52354ca1b73a7..b0985bddd3306f81ad0e81f0fb582a0ae7fdaf3d 100644 --- a/src/finn/util/test.py +++ b/src/finn/util/test.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from brevitas_examples import bnn_pynq +import numpy as np import pytest import warnings from finn.core.modelwrapper import ModelWrapper @@ -66,6 +67,15 @@ def get_test_model_untrained(netname, wbits, abits): return get_test_model(netname, wbits, abits, pretrained=False) +def soft_verify_topk(invec, idxvec, k): + """Check that the topK indices provided actually point to the topK largest + values in the input vector""" + np_topk = np.flip(invec.flatten().argsort())[:k] + soft_expected = invec.flatten()[np_topk.astype(np.int).flatten()] + soft_produced = invec.flatten()[idxvec.astype(np.int).flatten()] + return (soft_expected == soft_produced).all() + + def load_test_checkpoint_or_skip(filename): "Try to load given .onnx and return ModelWrapper, else skip current test." try: diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py new file mode 100644 index 0000000000000000000000000000000000000000..2df841728395229dafe33d2804c44a3489ef3e45 --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py @@ -0,0 +1,127 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +from onnx import TensorProto, helper + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.util.basic import gen_finn_dt_tensor +from finn.util.test import soft_verify_topk +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) + + +def make_labelselect_modelwrapper(labels, pe, k, idt): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, labels]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, k]) + + labelselect_node = helper.make_node( + "LabelSelect_Batch", + ["inp"], + ["outp"], + domain="finn", + backend="fpgadataflow", + Labels=labels, + PE=pe, + K=k, + inputDataType=idt.name, + ) + graph = helper.make_graph( + nodes=[labelselect_node], name="graph", inputs=[inp], outputs=[outp], + ) + + model = helper.make_model(graph, producer_name="thresholding-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", DataType.UINT32) + + return model + + +def prepare_inputs(input_tensor, idt): + return {"inp": input_tensor} + + +# TODO: folded inputs fail, likely problem in hlslib +# input datatype -- checked by assertion in HLSCustomOp +@pytest.mark.parametrize("idt", [DataType.UINT8, DataType.UINT16]) +# labels +@pytest.mark.parametrize("labels", [10, 1000]) +# folding +@pytest.mark.parametrize("fold", [-1]) +# number of top labels to select +@pytest.mark.parametrize("k", [1, 5]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.vivado +def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode): + if fold == -1: + pe = 1 + else: + pe = labels // fold + assert labels % pe == 0 + + if k == -1: + k = labels + + # generate input data + x = gen_finn_dt_tensor(idt, (1, labels)) + + model = make_labelselect_modelwrapper(labels, pe, k, idt) + + if exec_mode == "cppsim": + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(HLSSynthIP()) + model = model.transform(ReplaceVerilogRelPaths()) + model = model.transform(PrepareRTLSim()) + else: + raise Exception("Unknown exec_mode") + + # prepare input data and execute + input_dict = prepare_inputs(x, idt) + y = oxe.execute_onnx(model, input_dict)["outp"] + + assert soft_verify_topk(x, y, k), exec_mode + " failed"