From 1500107c4e3b6e405f02d25ea5c40c191df48359 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu <yamanu@xilinx.com> Date: Wed, 13 Oct 2021 19:47:17 +0200 Subject: [PATCH] [CustomOp] sketch CustomOp for lookup --- src/finn/custom_op/fpgadataflow/lookup.py | 294 ++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 src/finn/custom_op/fpgadataflow/lookup.py diff --git a/src/finn/custom_op/fpgadataflow/lookup.py b/src/finn/custom_op/fpgadataflow/lookup.py new file mode 100644 index 000000000..e7a957169 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/lookup.py @@ -0,0 +1,294 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os +import warnings + +from finn.core.datatype import DataType +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class Lookup(HLSCustomOp): + "Streaming elementwise HLS lookup, mapping indices to values." + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + # Number of embeddings ("memory depth") + "NumEmbeddings": ("i", True, 0), + # Dimensionality of each embedding (part of "memory width") + "EmbeddingDim": ("i", True, 0), + # Datatype for embeddings (part of "memory width") + "EmbeddingType": ("s", True, ""), + # Datatype for inputs + "InputType": ("s", True, ""), + # Input shape + "InputShape": ("ints", False, [1]), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_exp_cycles(self): + n_inputs = np.prod(self.get_nodeattr("InputShape")) + exp_cycles = int(n_inputs) + return exp_cycles + + def get_normal_input_shape(self): + return self.get_nodeattr("InputShape") + + def get_normal_output_shape(self): + ishape = self.get_normal_input_shape() + oshape = list(ishape) + [self.get_nodeattr("EmbeddingDim")] + return tuple(oshape) + + def get_folded_input_shape(self): + ishape = self.get_normal_input_shape() + folded_ishape = list(ishape) + [1] + return tuple(folded_ishape) + + def get_folded_output_shape(self): + return self.get_normal_output_shape() + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpect input shape for Lookup: %s vs %s" % ( + str(exp_ishape), + str(ishape), + ) + return super().make_const_shape_op(oshape) + + def infer_node_datatype(self, model): + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "InputType changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("InputType", idt.name) + odt = DataType[self.get_nodeattr("EmbeddingType")] + model.set_tensor_datatype(node.output[0], odt) + + def verify_node(self): + pass + + def get_input_datatype(self): + ret = DataType[self.get_nodeattr("InputType")] + return ret + + def get_output_datatype(self): + ret = DataType[self.get_nodeattr("EmbeddingType")] + return ret + + def get_instream_width(self): + ibits = self.get_input_datatype().bitwidth() + return ibits + + def get_outstream_width(self): + obits = self.get_output_datatype().bitwidth() + ofm_ch = self.get_nodeattr("EmbeddingDim") + return obits * ofm_ch + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "lookup.hpp"'] + + def defines(self, var): + n_inputs = np.prod(self.get_folded_input_shape()[:-1]) + dtype = self.get_input_datatype() + elem_hls_type = dtype.get_hls_datatype_str() + emb_type = DataType[self.get_nodeattr("EmbeddingType")] + emb_hls_type = emb_type.get_hls_datatype_str() + my_defines = [] + my_defines.append( + "#define NumEmbeddings %d" % self.get_nodeattr("NumEmbeddings") + ) + my_defines.append("#define EmbeddingDim %d" % self.get_nodeattr("EmbeddingDim")) + my_defines.append("#define NumInputs %d" % n_inputs) + my_defines.append("#define InputType %s" % elem_hls_type) + my_defines.append("#define EmbeddingType %s" % emb_hls_type) + self.code_gen_dict["$DEFINES$"] = my_defines + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + if dtype == DataType["BIPOLAR"]: + # use binary for bipolar storage + dtype = DataType["BINARY"] + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType["BIPOLAR"]: + # use binary for bipolar storage + dtype = DataType["BINARY"] + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + oshape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [ + """StreamingLookup<NumEmbeddings, EmbeddingDim, NumInputs, + InputType, EmbeddingType >(in0, out, embeddings);""" + ] + + def blackboxfunction(self): + ibits = self.get_instream_width() + packed_input_hls_type = "ap_uint<%d>" % ibits + obits = self.get_outstream_width() + packed_output_hls_type = "ap_uint<%d>" % obits + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" + % (self.onnx_node.name, packed_input_hls_type, packed_output_hls_type) + ] + + def pragmas(self): + my_pragmas = ["#pragma HLS INTERFACE axis port=in0"] + my_pragmas.append("#pragma HLS INTERFACE axis port=out") + my_pragmas.append("#pragma HLS INTERFACE ap_ctrl_none port=return") + self.code_gen_dict["$PRAGMAS$"] = my_pragmas + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() + folded_oshape = self.get_folded_output_shape() + + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert inp.shape == exp_ishape, """Input shape doesn't match expected shape.""" + export_idt = self.get_input_datatype() + + reshaped_input = inp.reshape(folded_ishape) + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + assert ( + context[node.output[0]].shape == folded_oshape + ), "cppsim did not produce expected folded output shape" + context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output shape doesn't match expected shape.""" -- GitLab