diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index daa8319cd3699c9482eed06f1042ae6694dbc5ca..d2e1406d2c4f77e28ee11573cd67831a62b97564 100755 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -32,6 +32,7 @@ import warnings from finn.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -44,6 +45,7 @@ class StreamingMaxPool_Batch(HLSCustomOp): "PoolDim": ("ints", True, []), # [H, W] = [Y, X] "NumChannels": ("i", True, 0), "PE": ("i", True, 0), + "CeilMode": ("i", False, 0), # FINN DataTypes for inputs/outputs "dataType": ("s", True, ""), } @@ -96,6 +98,7 @@ class StreamingMaxPool_Batch(HLSCustomOp): ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") k_h, k_w = tuple(self.get_nodeattr("PoolDim")) ifm_ch = self.get_nodeattr("NumChannels") + ceil_mode = self.get_nodeattr("CeilMode") if not self.is_1d(): assert ( ifm_dim_h % k_h == 0 @@ -103,8 +106,8 @@ class StreamingMaxPool_Batch(HLSCustomOp): assert ( ifm_dim_w % k_w == 0 ), "StreamingMaxPool needs ImgDim_w % PoolDim_w == 0" - ofm_dim_h = int(np.floor(ifm_dim_h / k_w)) - ofm_dim_w = int(np.floor(ifm_dim_w / k_w)) + ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, k_h, 0, ceil_mode) + ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, k_w, 0, ceil_mode) oshape = (1, ofm_dim_h, ofm_dim_w, ifm_ch) return oshape @@ -197,15 +200,19 @@ class StreamingMaxPool_Batch(HLSCustomOp): def defines(self, var): numReps = 1 ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + ceil_mode = self.get_nodeattr("CeilMode") + output_size = compute_pool_output_dim(ifm_dim[1], k[1], k[1], 0, ceil_mode) if self.is_1d(): self.code_gen_dict["$DEFINES$"] = [ """#define ImgDim {}\n #define PoolDim {}\n - #define NumChannels {}\n #define PE {}\n #define numReps {}""".format( + #define NumChannels {}\n #define PE {}\n #define OutputSize {} + \n #define numReps {}""".format( ifm_dim[1], k[1], self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), + output_size, numReps, ) ] @@ -264,7 +271,8 @@ class StreamingMaxPool_Batch(HLSCustomOp): if self.is_1d(): op = "StreamingMaxPool_Precision_1d" self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<ImgDim, PoolDim, NumChannels, PE, %s, %s>(in0, out);" + """%s<ImgDim, PoolDim, NumChannels, PE, + OutputSize, %s, %s>(in0, out);""" % (op, dtype_hls, minval_str) ] else: diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index eb9912b48265f771a1d96c2364c33889e7535b29..652136c82303d8b9ca6772f228312fe96efd33ea 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -363,6 +363,7 @@ class InferStreamingMaxPool(Transformation): ifm_dim_h = mp_in_shape[1] ifm_dim_w = mp_in_shape[2] pe = 1 + ceil_mode = mp_inst.get_nodeattr("ceil_mode") is_1d = (ifm_dim_h == 1 and k_h == 1) or (ifm_dim_w == 1 and k_w == 1) is_divisable = ifm_dim_h % k_h == 0 or ifm_dim_w % k_w == 0 if is_1d or is_divisable: @@ -378,6 +379,7 @@ class InferStreamingMaxPool(Transformation): ImgDim=(ifm_dim_h, ifm_dim_w), dataType=dt.name, PE=pe, + CeilMode=ceil_mode, name="StreamingMaxPool_Batch_" + n.name, ) graph.node.insert(node_ind, new_node) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 0cdd6651d982426b1d81d7313346dcd899294bf7..9751f41d7eb1d82229a3409f74ea6ef7441b4729 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -670,6 +670,13 @@ class MakeMaxPoolNHWC(Transformation): if consumer is not None and consumer.op_type == "Transpose": perms = list(get_by_name(consumer.attribute, "perm").ints) if perms == [0, 2, 3, 1]: + ceil_mode = get_by_name(n.attribute, "ceil_mode") + if ceil_mode is not None: + ceil_mode = ceil_mode.i + else: + ceil_mode = ( + 0 # default to ceil_mode=0 (equivalent to np.floor) + ) n.op_type = "MaxPoolNHWC" n.domain = "finn.custom_op.general" start_name = n.input[0] @@ -683,12 +690,20 @@ class MakeMaxPoolNHWC(Transformation): n.output[0] = end_name model.set_tensor_shape(mid_name, (b, hi, wi, c)) model.set_tensor_shape(end_name, (b, ho, wo, c)) + getCustomOp(n).set_nodeattr("ceil_mode", ceil_mode) graph.node.remove(consumer) graph.node.insert(node_ind - 1, consumer) graph_modified = True elif producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + ceil_mode = get_by_name(n.attribute, "ceil_mode") + if ceil_mode is not None: + ceil_mode = ceil_mode.i + else: + ceil_mode = ( + 0 # default to ceil_mode=0 (equivalent to np.floor) + ) n.op_type = "MaxPoolNHWC" n.domain = "finn.custom_op.general" start_name = producer.input[0] @@ -702,6 +717,7 @@ class MakeMaxPoolNHWC(Transformation): n.output[0] = mid_name model.set_tensor_shape(mid_name, (b, ho, wo, c)) model.set_tensor_shape(end_name, (b, c, ho, wo)) + getCustomOp(n).set_nodeattr("ceil_mode", ceil_mode) graph.node.remove(producer) graph.node.insert(node_ind, producer) graph_modified = True diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py index 884e64e06cace43e92c7a62073f59aa00f0d9c0e..313748bff930a06ccc68b8f06c0f24be178990f0 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py @@ -35,19 +35,22 @@ import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper +from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim # from finn.custom_op.registry import getCustomOp from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.convert_to_hls_layers import InferStreamingMaxPool from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.general import GiveUniqueNodeNames +from finn.transformation.infer_shapes import InferShapes from finn.util.basic import gen_finn_dt_tensor -def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): +def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim ofm_dim_h, ofm_dim_w = ofm_dim @@ -66,6 +69,7 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): domain="finn.custom_op.general", kernel_shape=[k_h, k_w], strides=[k_h, k_w], + ceil_mode=ceil_mode, pads=[0, 0, 0, 0], ) graph = helper.make_graph( @@ -81,7 +85,9 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): return model -def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, pe, ifm_dim, ofm_dim, idt): +def make_single_streamingmaxpool_modelwrapper( + k, ifm_ch, pe, ifm_dim, ofm_dim, idt, ceil_mode +): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim ofm_dim_h, ofm_dim_w = ofm_dim @@ -103,6 +109,7 @@ def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, pe, ifm_dim, ofm_dim, i NumChannels=ifm_ch, PE=pe, ImgDim=[ifm_dim_h, ifm_dim_w], + CeilMode=ceil_mode, dataType=idt.name, ) graph = helper.make_graph( @@ -129,16 +136,20 @@ def prepare_inputs(input_tensor): # kernel size @pytest.mark.parametrize("k", [2, 4]) # input dimension -@pytest.mark.parametrize("ifm_dim", [4, 8]) +@pytest.mark.parametrize("ifm_dim", [4, 10]) # input channels @pytest.mark.parametrize("ifm_ch", [1, 3]) # 1,3 # pe @pytest.mark.parametrize("pe", [1, 3]) +# ceil mode +@pytest.mark.parametrize("ceil_mode", [1]) # execution mode @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"]) @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, pe, exec_mode): +def test_fpgadataflow_streamingmaxpool( + idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode, exec_mode +): ifm_dim_h = ifm_dim k_h = k if dim_1d: @@ -152,12 +163,12 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, pe, exec stride_h = k_h stride_w = k_w - ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) - ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) + ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, stride_h, 0, ceil_mode) + ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, stride_w, 0, ceil_mode) ofm_dim = (ofm_dim_h, ofm_dim_w) if idt == DataType["BIPOLAR"] and dim_1d: pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") - if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: + if (ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0) and (not dim_1d): pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") if pe > ifm_ch: pytest.skip("SIMD cannot be larger than number of input channels") @@ -166,12 +177,13 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, pe, exec # prepare input data input_dict = prepare_inputs(x) - golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) + golden = make_single_maxpoolnhwc_modelwrapper( + k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode + ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] - model = make_single_streamingmaxpool_modelwrapper( - k, ifm_ch, pe, ifm_dim, ofm_dim, idt - ) + model = golden.transform(InferStreamingMaxPool()) + model = model.transform(InferShapes()) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) diff --git a/tests/transformation/streamline/test_maxpool_nhwc.py b/tests/transformation/streamline/test_maxpool_nhwc.py new file mode 100644 index 0000000000000000000000000000000000000000..e600577981f0e7305b8183fcfabec8cc1e042712 --- /dev/null +++ b/tests/transformation/streamline/test_maxpool_nhwc.py @@ -0,0 +1,108 @@ +import pytest + +import onnx +import onnx.helper as oh +from onnx import TensorProto + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline.reorder import MakeMaxPoolNHWC +from finn.util.basic import gen_finn_dt_tensor + + +def create_maxpool(ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt): + ofm_dim_h = compute_pool_output_dim( + ifm_dim[0], kernel_shape[0], strides[0], pads[0], ceil_mode + ) + ofm_dim_w = compute_pool_output_dim( + ifm_dim[1], kernel_shape[1], strides[1], pads[1], ceil_mode + ) + inp = oh.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim[0], ifm_dim[1]] + ) + outp_mp = oh.make_tensor_value_info( + "outp_mp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim_h, ofm_dim_w] + ) + outp = oh.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch] + ) + + maxpool_node = oh.make_node( + "MaxPool", + inputs=["inp"], + outputs=["out_mp"], + ceil_mode=ceil_mode, + kernel_shape=kernel_shape, + pads=pads, + strides=strides, + ) + + transpose_node = onnx.helper.make_node( + "Transpose", + inputs=["out_mp"], + outputs=["outp"], + name="Transpose1", + perm=[0, 2, 3, 1], + ) + + graph = oh.make_graph( + nodes=[maxpool_node, transpose_node], + name="maxpool_graph", + inputs=[inp], + outputs=[outp], + value_info=[outp_mp], + ) + + model = oh.make_model(graph, producer_name="maxpool_model") + model = ModelWrapper(model) + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", idt) + + model = model.transform(InferShapes()) + + return model + + +# input dimension +@pytest.mark.parametrize("ifm_dim", [[8, 8], [9, 9]]) +# input channels +@pytest.mark.parametrize("ifm_ch", [3]) +# kernel shape +@pytest.mark.parametrize("kernel_shape", [[2, 2]]) +# padding +@pytest.mark.parametrize("pads", [[0, 0, 0, 0], [1, 1, 1, 1]]) +# strides +@pytest.mark.parametrize("strides", [[2, 2]]) +# ceil_mode +@pytest.mark.parametrize("ceil_mode", [0, 1]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT4"]]) +def test_maxpool_nhwc(ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt): + # create MaxPool node + maxpool_model = create_maxpool( + ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt + ) + + # generate input tensor for testing + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim[0], ifm_dim[1]]) + input_dict = {"inp": input_tensor} + + # execute first model + output_dict = oxe.execute_onnx(maxpool_model, input_dict) + expected = output_dict["outp"] + + # transform MaxPool into MaxPoolNHWC + maxpool_model = maxpool_model.transform(MakeMaxPoolNHWC()) + + # execute transformed model + output_node_name = maxpool_model.graph.output[0].name + output_dict = oxe.execute_onnx( + maxpool_model, input_dict, return_full_exec_context=False + ) + output = output_dict[output_node_name] + + # compare outputs + assert (expected == output).all()