diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..c94a19fa0cd2db764b6a8d19e2d23288c4649ff0 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -0,0 +1,372 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import numpy as np +from pyverilator import PyVerilator +from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.core.datatype import DataType +from onnx import TensorProto, helper +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + +# does not do anything at the ONNX node-by-node level, and input-output +# tensor shapes are the same. performs data width conversion at the rtlsim level + + +class StreamingDataWidthConverter_Batch(HLSCustomOp): + """Class that corresponds to finn-hlslib StreamingDataWidthConverter_Batch + function.""" + + def get_nodeattr_types(self): + my_attrs = { + # shape of input/output tensors + "shape": ("ints", True, []), + # bit width of input and output streams + "inWidth": ("i", True, 0), + "outWidth": ("i", True, 0), + # FINN DataTypes for inputs/outputs + "dataType": ("s", True, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_input_datatype(self): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("dataType")] + + def get_output_datatype(self): + """Returns FINN DataType of output.""" + return DataType[self.get_nodeattr("dataType")] + + def get_normal_input_shape(self): + ishape = self.get_nodeattr("shape") + return ishape + + def get_normal_output_shape(self): + oshape = self.get_nodeattr("shape") + return oshape + + def get_folded_input_shape(self): + ishape = self.get_normal_input_shape() + dummy_t = np.random.randn(*ishape) + iwidth = self.get_nodeattr("inWidth") + ibits = self.get_input_datatype().bitwidth() + assert ( + iwidth % ibits == 0 + ), """DWC input width must be divisible by + input element bitwidth""" + ielems = int(iwidth // ibits) + dummy_t = dummy_t.reshape(-1, ielems) + return dummy_t.shape + + def get_folded_output_shape(self): + oshape = self.get_normal_output_shape() + dummy_t = np.random.randn(*oshape) + owidth = self.get_nodeattr("outWidth") + obits = self.get_output_datatype().bitwidth() + assert ( + owidth % obits == 0 + ), """DWC output width must be divisible by + input element bitwidth""" + oelems = int(owidth // obits) + dummy_t = dummy_t.reshape(-1, oelems) + return dummy_t.shape + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def get_number_input_values(self): + folded_ishape = self.get_folded_input_shape() + return np.prod(folded_ishape[:-1]) + + def get_in_stream_width(self): + return self.get_nodeattr("inWidth") + + def get_out_stream_width(self): + return self.get_nodeattr("outWidth") + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpect input shape for StreamingDWC." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # data type stays the same + dtype = model.get_tensor_datatype(node.input[0]) + model.set_tensor_datatype(node.output[0], dtype) + + def verify_node(self): + info_messages = [] + + # verify that "domain" is set to "finn" + domain_value = self.onnx_node.domain + if domain_value == "finn": + info_messages.append("Attribute domain is set correctly") + else: + info_messages.append('Attribute domain should be set to "finn"') + + # verify that "backend" is set to "fpgadataflow" + backend_value = self.get_nodeattr("backend") + if backend_value == "fpgadataflow": + info_messages.append("Attribute backend is set correctly") + else: + info_messages.append('Attribute backend should be set to "fpgadataflow"') + + # verify the number of inputs + if len(self.onnx_node.input) == 1: + info_messages.append("The number of inputs is correct") + else: + info_messages.append("""StreamingDWC needs 1 data input""") + + return info_messages + + def bram_estimation(self): + pass + + def lut_estimation(self): + pass + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "streamtools.h"'] + + def defines(self, var): + numReps = 1 + numInWords = 1 + inWidth = self.get_nodeattr("inWidth") + outWidth = self.get_nodeattr("outWidth") + if outWidth > inWidth: + numInWords = int(outWidth // inWidth) + self.code_gen_dict["$DEFINES$"] = [ + "#define InWidth %d " % inWidth, + "#define OutWidth %d " % outWidth, + "#define NumInWords %d " % numInWords, + "#define numReps %d" % numReps, + ] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_npysim") + dtype = self.get_input_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_in_stream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_stream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_stream_width()) + ) + + def docompute(self): + # TODO continue with fxns below, they are copy-pasted + op = "StreamingDataWidthConverter_Batch" + self.code_gen_dict["$DOCOMPUTE$"] = [ + "%s<InWidth, OutWidth, NumInWords>(in0, out, numReps);" % (op) + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_npysim") + dtype = self.get_output_datatype() + if dtype == DataType.BIPOLAR: + # use binary for bipolar storage + dtype = DataType.BINARY + elem_bits = dtype.bitwidth() + packed_bits = self.get_stream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + oshape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + packed_bits = self.get_stream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" + % (self.onnx_node.name, packed_hls_type, packed_hls_type) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE ap_ctrl_none port=return" + ) + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_oshape = self.get_folded_output_shape() + + # TODO ensure codegen dir exists + if mode == "npysim": + code_gen_dir = self.get_nodeattr("code_gen_dir_npysim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("npysim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert ( + inp.shape == exp_ishape + ), """Input shape doesn't + match expected shape (1, ifm_dim, ifm_dim, ifm_ch).""" + if self.get_input_datatype() == DataType.BIPOLAR: + # store bipolar activations as binary + inp = (inp + 1) / 2 + export_idt = DataType.BINARY + else: + export_idt = self.get_input_datatype() + # no reshaping for input since assuming no folding on input + # make copy before saving array + reshaped_input = inp.copy() + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "npysim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + assert ( + context[node.output[0]].shape == folded_oshape + ), "npysim \ + did not produce expected ofolded utput shape" + context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) + elif mode == "rtlsim": + prefixed_top_name = "%s_%s" % (node.name, node.name) + # check if needed file exists + verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format( + code_gen_dir, node.name, prefixed_top_name + ) + if os.path.isfile(verilog_file): + nbits = self.get_stream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + sim = PyVerilator.build( + verilog_file, + verilog_path=[ + "{}/project_{}/sol1/impl/verilog/".format( + code_gen_dir, node.name + ) + ], + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_stream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, + out_npy_path, + odt, + out_shape, + packed_bits, + target_bits, + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + else: + raise Exception( + """Found no verilog files for this node, + did you run the codegen_ipgen transformation?""" + ) + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("npysim", "rtlsim")""".format( + mode + ) + ) + # binary -> bipolar if needed + if self.get_output_datatype() == DataType.BIPOLAR: + out = context[node.output[0]] + out = 2 * out - 1 + context[node.output[0]] = out + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output + shape doesn't match expected shape (1, ofm_dim, ofm_dim, k*k*ifm_ch)."""