diff --git a/src/finn/custom_op/fpgadataflow/sameresize_batch.py b/src/finn/custom_op/fpgadataflow/sameresize_batch.py deleted file mode 100644 index cf279dcc889d3afaa4da96833067e36371e6fc01..0000000000000000000000000000000000000000 --- a/src/finn/custom_op/fpgadataflow/sameresize_batch.py +++ /dev/null @@ -1,301 +0,0 @@ -import os -import numpy as np -from onnx import TensorProto, helper -from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp -from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy - - -class SameResize_Batch(HLSCustomOp): - """Class that corresponds to finn-hlslib SameResize function. - Implements 'same' padding on a given input image.""" - - def __init__(self, onnx_node): - super().__init__(onnx_node) - - def get_nodeattr_types(self): - my_attrs = { - "ImgDim": ("i", True, 0), - "KernelDim": ("i", True, 0), - "Stride": ("i", True, 0), - "NumChannels": ("i", True, 0), - # FINN input datatype - "inputDataType": ("s", True, ""), - # distribution of added values to achieve "same" padding - "PaddingStyle": ("i", True, 2), - } - my_attrs.update(super().get_nodeattr_types()) - return my_attrs - - def get_normal_input_shape(self): - idim = self.get_nodeattr("ImgDim") - num_ch = self.get_nodeattr("NumChannels") - - ishape = (1, idim, idim, num_ch) - return ishape - - def get_normal_output_shape(self): - idim = self.get_nodeattr("ImgDim") - num_ch = self.get_nodeattr("NumChannels") - kdim = self.get_nodeattr("KernelDim") - stride = self.get_nodeattr("Stride") - assert idim % stride == 0, "Stride must divide input dimension." - # number of "same" windows over the input data - same_windows = idim // stride - odim = kdim + stride * (same_windows - 1) - - oshape = (1, odim, odim, num_ch) - return oshape - - def get_folded_input_shape(self): - # even though there is no folding in the current hlslib op, - # insert a time multiplexing axis to remain compatible with the - # shapes produced by the rest of the dataflow pipeline - ret = list(self.get_normal_input_shape()) - ret.insert(-1, 1) - return tuple(ret) - - def get_folded_output_shape(self): - # even though there is no folding in the current hlslib op, - # insert a time multiplexing axis to remain compatible with the - # shapes produced by the rest of the dataflow pipeline - ret = list(self.get_normal_output_shape()) - ret.insert(-1, 1) - return tuple(ret) - - def make_shape_compatible_op(self, model): - exp_ishape = self.get_normal_input_shape() - oshape = self.get_normal_output_shape() - ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) - assert ishape == exp_ishape, "Unexpect input shape for SameResize." - # implement tensor with correct shape - values = np.random.randn(*oshape).astype(np.float32) - return helper.make_node( - "Constant", - inputs=[], - outputs=[self.onnx_node.output[0]], - value=helper.make_tensor( - name="const_tensor", - data_type=TensorProto.FLOAT, - dims=values.shape, - vals=values.flatten().astype(float), - ), - ) - - def infer_node_datatype(self, model): - node = self.onnx_node - # data type stays the same - dtype = model.get_tensor_datatype(node.input[0]) - model.set_tensor_datatype(node.output[0], dtype) - - def verify_node(self): - pass - - def get_input_datatype(self): - """Returns FINN DataType of input.""" - return DataType[self.get_nodeattr("inputDataType")] - - def get_output_datatype(self): - """Returns FINN DataType of output. (Same as input datatype)""" - return DataType[self.get_nodeattr("inputDataType")] - - def get_instream_width(self): - ibits = self.get_input_datatype().bitwidth() - num_ch = self.get_nodeattr("NumChannels") - - return ibits * num_ch - - def get_outstream_width(self): - obits = self.get_output_datatype().bitwidth() - num_ch = self.get_nodeattr("NumChannels") - - return obits * num_ch - - def get_number_output_values(self): - folded_oshape = self.get_folded_output_shape() - return np.prod(folded_oshape[:-1]) - - def global_includes(self): - self.code_gen_dict["$GLOBALS$"] = ['#include "streamtools.h"'] - - def defines(self, var): - numReps = 1 - self.code_gen_dict["$DEFINES$"] = [ - """#define ImgDim1 {}\n #define KernelDim1 {}\n - #define Stride1 {}\n #define NumChannels1 {}\n - #define PaddingStyle1 {}\n #define numReps {}""".format( - self.get_nodeattr("ImgDim"), - self.get_nodeattr("KernelDim"), - self.get_nodeattr("Stride"), - self.get_nodeattr("NumChannels"), - self.get_nodeattr("PaddingStyle"), - numReps, - ) - ] - - def read_npy_data(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - dtype = self.get_input_datatype() - if dtype == DataType.BIPOLAR: - # use binary for bipolar storage - dtype = DataType.BINARY - elem_bits = dtype.bitwidth() - packed_bits = self.get_instream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits - elem_hls_type = dtype.get_hls_datatype_str() - npy_type = "float" - npy_in = "%s/input_0.npy" % code_gen_dir - self.code_gen_dict["$READNPYDATA$"] = [] - self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) - ) - - def strm_decl(self): - self.code_gen_dict["$STREAMDECLARATIONS$"] = [] - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) - ) - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) - ) - - def docompute(self): - in_t = self.get_input_datatype().get_hls_datatype_str() - node = self.onnx_node - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ImgDim1, KernelDim1, Stride1, NumChannels1, - {}, PaddingStyle1> (in0, out, numReps);""".format( - node.op_type, in_t - ) - ] - - def dataoutstrm(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - dtype = self.get_output_datatype() - if dtype == DataType.BIPOLAR: - # use binary for bipolar storage - dtype = DataType.BINARY - elem_bits = dtype.bitwidth() - packed_bits = self.get_outstream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits - elem_hls_type = dtype.get_hls_datatype_str() - npy_type = "float" - npy_out = "%s/output.npy" % code_gen_dir - oshape = self.get_folded_output_shape() - oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") - - self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' - % ( - packed_hls_type, - elem_hls_type, - elem_bits, - npy_type, - oshape_cpp_str, - npy_out, - ) - ] - - def save_as_npy(self): - self.code_gen_dict["$SAVEASCNPY$"] = [] - - def blackboxfunction(self): - packed_bits = self.get_instream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits - self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_hls_type, packed_hls_type) - ] - - def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") - self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE ap_ctrl_none port=return" - ) - - def execute_node(self, context, graph): - mode = self.get_nodeattr("exec_mode") - node = self.onnx_node - exp_ishape = self.get_normal_input_shape() - exp_oshape = self.get_normal_output_shape() - folded_oshape = self.get_folded_output_shape() - - if mode == "cppsim": - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - elif mode == "rtlsim": - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - else: - raise Exception( - """Invalid value for attribute exec_mode! Is currently set to: {} - has to be set to one of the following value ("cppsim", "rtlsim")""".format( - mode - ) - ) - - inp = context[node.input[0]] - assert str(inp.dtype) == "float32", "Input datatype is not float32" - assert ( - inp.shape == exp_ishape - ), """Input shape doesn't - match expected shape (1, ImgDim, ImgDim, NumChannels).""" - if self.get_input_datatype() == DataType.BIPOLAR: - # store bipolar activations as binary - inp = (inp + 1) / 2 - export_idt = DataType.BINARY - else: - export_idt = self.get_input_datatype() - - # no reshaping for input since assuming no folding on input - # make copy before saving array - inp = inp.copy() - np.save(os.path.join(code_gen_dir, "input_0.npy"), inp) - - if mode == "cppsim": - # execute the precompiled model - super().exec_precompiled_singlenode_model() - # load output npy file - super().npy_to_dynamic_output(context) - assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) - elif mode == "rtlsim": - sim = self.get_rtlsim() - nbits = self.get_instream_width() - rtlsim_inp = npy_to_rtlsim_input( - "{}/input_0.npy".format(code_gen_dir), export_idt, nbits - ) - super().reset_rtlsim(sim) - super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) - odt = export_idt - target_bits = odt.bitwidth() - packed_bits = self.get_outstream_width() - out_npy_path = "{}/output.npy".format(code_gen_dir) - out_shape = self.get_folded_output_shape() - rtlsim_output_to_npy( - rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits - ) - # load and reshape output - output = np.load(out_npy_path) - output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) - context[node.output[0]] = output - else: - raise Exception( - """Invalid value for attribute exec_mode! Is currently set to: {} - has to be set to one of the following value ("cppsim", "rtlsim")""".format( - mode - ) - ) - # binary -> bipolar if needed - if self.get_output_datatype() == DataType.BIPOLAR: - out = context[node.output[0]] - out = 2 * out - 1 - context[node.output[0]] = out - assert ( - context[node.output[0]].shape == exp_oshape - ), """Output shape doesn't match expected shape - (1, OutputDim, OutputDim, NumChannels).""" diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py index 238829e03353d79fab7c51e7d1b9dca6e2a96a11..0d62862c222b44d2e507a90a80bfcd4fa405d3fe 100644 --- a/src/finn/custom_op/registry.py +++ b/src/finn/custom_op/registry.py @@ -44,7 +44,6 @@ from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import ( StreamingDataWidthConverter_Batch, ) from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch -from finn.custom_op.fpgadataflow.sameresize_batch import SameResize_Batch from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch @@ -65,7 +64,6 @@ custom_op["MaxPoolNHWC"] = MaxPoolNHWC custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch custom_op["StreamingFIFO"] = StreamingFIFO custom_op["GlobalAccPool_Batch"] = GlobalAccPool_Batch -custom_op["SameResize_Batch"] = SameResize_Batch custom_op["Thresholding_Batch"] = Thresholding_Batch custom_op["AddStreams_Batch"] = AddStreams_Batch custom_op["LabelSelect_Batch"] = LabelSelect_Batch diff --git a/tests/fpgadataflow/test_fpgadataflow_sameresize.py b/tests/fpgadataflow/test_fpgadataflow_sameresize.py deleted file mode 100644 index 5c4401e1632ad24e7af14729e148c2308762e161..0000000000000000000000000000000000000000 --- a/tests/fpgadataflow/test_fpgadataflow_sameresize.py +++ /dev/null @@ -1,195 +0,0 @@ -import pytest -import os -import numpy as np - -from onnx import TensorProto, helper -from finn.core.datatype import DataType -from finn.core.modelwrapper import ModelWrapper -from finn.util.basic import gen_finn_dt_tensor -import finn.core.onnx_exec as oxe -from finn.transformation.infer_shapes import InferShapes -from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode -from finn.transformation.general import GiveUniqueNodeNames -from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim -from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim -from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP -from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim - -from finn.util.basic import pynq_part_map - -test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") -test_fpga_part = pynq_part_map[test_pynq_board] -target_clk_ns = 10 - - -def make_single_sameresize_modelwrapper( - idim, odim, kdim, stride, num_ch, idt, pad_style -): - inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, idim, idim, num_ch] - ) - outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, odim, odim, num_ch] - ) - - SameResize_node = helper.make_node( - "SameResize_Batch", - ["inp"], - ["outp"], - domain="finn", - backend="fpgadataflow", - ImgDim=idim, - KernelDim=kdim, - Stride=stride, - NumChannels=num_ch, - inputDataType=str(idt.name), - PaddingStyle=pad_style, - ) - - graph = helper.make_graph( - nodes=[SameResize_node], name="sameresize_graph", inputs=[inp], outputs=[outp] - ) - - model = helper.make_model(graph, producer_name="sameresize-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", idt) - - return model - - -# image dimension -@pytest.mark.parametrize("idim", [8, 16]) -# kernel dimension -@pytest.mark.parametrize("kdim", [2, 3]) -# stride -@pytest.mark.parametrize("stride", [1, 2]) -# number of channels -@pytest.mark.parametrize("num_ch", [1, 2]) -# FINN input datatype -@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4]) -@pytest.mark.slow -@pytest.mark.vivado -def test_fpgadataflow_sameresize_cppsim(idim, kdim, stride, num_ch, idt): - pad_style = 2 - assert idim % stride == 0, "Stride must divide input dimension." - # number of "same" windows over the input data - same_windows = idim // stride - odim = kdim + stride * (same_windows - 1) - - # generate input data - x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch]) - input_dict = {"inp": x} - - model = make_single_sameresize_modelwrapper( - idim, odim, kdim, stride, num_ch, idt, pad_style - ) - model = model.transform(InferShapes()) - model = model.transform(SetExecMode("cppsim")) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareCppSim()) - model = model.transform(CompileCppSim()) - y_produced = oxe.execute_onnx(model, input_dict)["outp"] - expected_oshape = (1, odim, odim, num_ch) - assert y_produced.shape == expected_oshape - - # calculate reference - # calculate correct padding according to parameters - pad = odim - idim - if pad_style == 2: - if pad % 2 == 0: - pad_up = pad // 2 - pad_left = pad // 2 - else: - pad_up = pad // 2 + 1 - pad_left = pad // 2 + 1 - else: - pad_up = pad // 2 - pad_left = pad // 2 - pad_down = pad - pad_up - pad_right = pad - pad_left - - # use numpy padding function as reference - if idt == DataType.BIPOLAR: - y_expected = np.pad( - x, - ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), - "constant", - constant_values=-1, - ) - else: - y_expected = np.pad( - x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant" - ) - - assert (y_produced == y_expected).all() - - -# image dimension -@pytest.mark.parametrize("idim", [8, 16]) -# kernel dimension -@pytest.mark.parametrize("kdim", [2, 3]) -# stride -@pytest.mark.parametrize("stride", [1, 2]) -# number of channels -@pytest.mark.parametrize("num_ch", [1, 2]) -# FINN input datatype -@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4]) -@pytest.mark.slow -@pytest.mark.vivado -def test_fpgadataflow_sameresize_rtlsim(idim, kdim, stride, num_ch, idt): - pad_style = 2 - assert idim % stride == 0, "Stride must divide input dimension." - # number of "same" windows over the input data - same_windows = idim // stride - odim = kdim + stride * (same_windows - 1) - - # generate input data - x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch]) - input_dict = {"inp": x} - - model = make_single_sameresize_modelwrapper( - idim, odim, kdim, stride, num_ch, idt, pad_style - ) - model = model.transform(SetExecMode("rtlsim")) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) - model = model.transform(HLSSynthIP()) - model = model.transform(PrepareRTLSim()) - y_produced = oxe.execute_onnx(model, input_dict)["outp"] - - expected_oshape = (1, odim, odim, num_ch) - assert y_produced.shape == expected_oshape - - # calculate reference - # calculate correct padding according to parameters - pad = odim - idim - if pad_style == 2: - if pad % 2 == 0: - pad_up = pad // 2 - pad_left = pad // 2 - else: - pad_up = pad // 2 + 1 - pad_left = pad // 2 + 1 - else: - pad_up = pad // 2 - pad_left = pad // 2 - pad_down = pad - pad_up - pad_right = pad - pad_left - - # use numpy padding function as reference - if idt == DataType.BIPOLAR: - y_expected = np.pad( - x, - ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), - "constant", - constant_values=-1, - ) - else: - y_expected = np.pad( - x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant" - ) - - assert (y_produced == y_expected).all()