From b5520dc15175bd91517b2dd3209cee9893fe63a0 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch <fepaje@mail.upb.de> Date: Tue, 25 May 2021 16:44:00 +0200 Subject: [PATCH] Further cleanup --- docker/finn_entrypoint.sh | 2 +- .../fpgadataflow/streamingmaxpool_batch.py | 117 +++++++----------- .../fpgadataflow/convert_to_hls_layers.py | 39 ++---- .../test_layer_streaming_maxpool_batch.py | 50 ++++---- 4 files changed, 81 insertions(+), 127 deletions(-) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 3933d8918..46a294cd7 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -16,7 +16,7 @@ FINN_BASE_COMMIT=ac0b86a63eb937b869bfa453a996a8a8b8506546 FINN_EXP_COMMIT=e9f97dcdb4db2f889b0f36af079a6a1792b7d4de BREVITAS_COMMIT=14abbe1e7ef82485d79415871fcf5766b0a40a00 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 -HLSLIB_COMMIT=4d74baefa79df48b5a0348d63f39a26df075de51 +HLSLIB_COMMIT=b37337c571b98f40423020bc79f97e189f2661d5 PYVERILATOR_COMMIT=e2ff74030de3992dcac54bf1b6aad2915946e8cb OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index a15c91af1..dba279878 100644 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -58,6 +58,23 @@ class StreamingMaxPool_Batch(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("dataType")] + def get_1d_attrs_normalized(self): + # support both (1, D) and (D, 1) cases transparently: + # assume the dummy ('1') dimension is the Y-dimension, i.e. + # images and kernels (and their attributes) of dimension + # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D] + ifm_dim = self.get_nodeattr("ImgDim") + k = self.get_nodeattr("PoolDim") + ifm_ch = self.get_nodeattr("NumChannels") + if ifm_dim[1] == 1: + ifm_dim = ifm_dim[::-1] + k = k[::-1] + return (ifm_dim, k, ifm_ch) + + def is_1d(self): + ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + return (ifm_dim[0] == 1) and (k[0] == 1) + def get_normal_input_shape(self): ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") ifm_ch = self.get_nodeattr("NumChannels") @@ -73,8 +90,8 @@ class StreamingMaxPool_Batch(HLSCustomOp): return tuple(ret) def get_normal_output_shape(self): - k_h, k_w = self.get_nodeattr("PoolDim") ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") + k_h, k_w = tuple(self.get_nodeattr("PoolDim")) ifm_ch = self.get_nodeattr("NumChannels") stride_h = k_h stride_w = k_w @@ -100,13 +117,12 @@ class StreamingMaxPool_Batch(HLSCustomOp): def get_exp_cycles(self): # derived from StreamingMaxPool_Batch loop nest - k_h, k_w = self.get_nodeattr("PoolDim") - ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") - # TODO: correct formula - if k_h == k_w: # todo: better condition - return int(ifm_dim_h * (ifm_dim_h + (ifm_dim_h / k_h))) + ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + if self.is_1d(): + return int(ifm_dim[1] + k[1]) else: - return int((ifm_dim_h / k_h) * (k_h + 1)) + # TODO: adjust inaccurate formula + return int(ifm_dim[1] * (ifm_dim[1] + (ifm_dim[1] / k[1]))) def get_instream_width(self): dt_bits = self.get_input_datatype().bitwidth() @@ -173,51 +189,17 @@ class StreamingMaxPool_Batch(HLSCustomOp): def defines(self, var): numReps = 2 - k = self.get_nodeattr("PoolDim") - ifm_dim = self.get_nodeattr("ImgDim") - - if k[0] == k[1]: # todo: better condition - self.code_gen_dict["$DEFINES$"] = [ - """#define ImgDim {}\n #define PoolDim {}\n - #define NumChannels {}\n #define numReps {}""".format( - ifm_dim[0], - k[0], - self.get_nodeattr("NumChannels"), - numReps, - ) - ] - else: - # TODO: use the same convention als convinpgen?: - - # For the kernel, presenting the input data of size D as - # [H, W] = [Y, X] = [1, D] or [D, 1] - # effectively gives the same result. Because the - # ConvolutionInputGenerator_NonSquare_Dilated(_dws) kernel currently only - # supports dilation>1 along the X-axis and the - # ConvolutionInputGenerator_NonSquare only works for stride>1 along the - # X-axis, we are working with the following assumption: - # the dummy ('1') dimension is the Y-dimension, i.e. - # images and kernels (and their attributes) of dimension - # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D] - if ifm_dim[1] == 1: - ifm_dim = ifm_dim[::-1] - k = k[::-1] - - ifm_dim_y, ifm_dim_x = ifm_dim - k_y, k_x = k - - self.code_gen_dict["$DEFINES$"] = [ - """#define ImgDim_x {}\n #define ImgDim_y {}\n - #define PoolDim_x {}\n #define PoolDim_y {}\n - #define NumChannels {}\n #define numReps {}""".format( - ifm_dim_x, - ifm_dim_y, - k_x, - k_y, - self.get_nodeattr("NumChannels"), - numReps, - ) - ] + ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + + self.code_gen_dict["$DEFINES$"] = [ + """#define ImgDim {}\n #define PoolDim {}\n + #define NumChannels {}\n #define numReps {}""".format( + ifm_dim[1], + k[1], + self.get_nodeattr("NumChannels"), + numReps, + ) + ] def read_npy_data(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -248,33 +230,24 @@ class StreamingMaxPool_Batch(HLSCustomOp): def docompute(self): dtype = self.get_input_datatype() - - k = self.get_nodeattr("PoolDim") - # ifm_dim = self.get_nodeattr("ImgDim") - if k[0] == k[1]: # todo: better condition - if dtype.bitwidth() == 1: + if dtype.bitwidth() == 1: + if self.is_1d(): + raise Exception("Binary 1d MaxPool not implemented on HLS backend") + else: op = "StreamingMaxPool_Batch" - self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op) - ] + self.code_gen_dict["$DOCOMPUTE$"] = [ + "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op) + ] + else: + if self.is_1d(): + op = "StreamingMaxPool_Precision_Batch_1d" else: op = "StreamingMaxPool_Precision_Batch" - dtype = self.get_input_datatype() - dtype_hls = dtype.get_hls_datatype_str() - minval_str = str(int(dtype.min())) - self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0, out, numReps);" - % (op, dtype_hls, minval_str) - ] - else: - # todo: add binary op - op = "StreamingMaxPool_Precision_Batch_NonSquare" dtype = self.get_input_datatype() dtype_hls = dtype.get_hls_datatype_str() minval_str = str(int(dtype.min())) self.code_gen_dict["$DOCOMPUTE$"] = [ - """%s<ImgDim_x, ImgDim_y, PoolDim_x, PoolDim_y, - NumChannels, %s, %s>(in0, out, numReps);""" + "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0, out, numReps);" % (op, dtype_hls, minval_str) ] diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index d3989343f..e1c3eced7 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -235,40 +235,23 @@ class InferStreamingMaxPool(Transformation): # mp_out_shape = model.get_tensor_shape(mp_output) dt = model.get_tensor_datatype(mp_input) mp_inst = getCustomOp(n) - # stride = mp_inst.get_nodeattr("strides")[0] k_h, k_w = mp_inst.get_nodeattr("kernel_shape") - # pad = mp_inst.get_nodeattr("pads")[0] ifm_ch = mp_in_shape[-1] ifm_dim_h = mp_in_shape[1] ifm_dim_w = mp_in_shape[2] - # ofm_dim = mp_out_shape[1] if ifm_dim_h % k_h == 0 and ifm_dim_w % k_w == 0: # create equivalent StreamingMaxPool_Batch node - # TODO support non-k strides - if k_h == k_w: # todo: better condition or none at all - new_node = helper.make_node( - "StreamingMaxPool_Batch", - [mp_input], - [mp_output], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - PoolDim=k_h, - NumChannels=ifm_ch, - ImgDim=ifm_dim_h, - dataType=dt.name, - ) - else: - new_node = helper.make_node( - "StreamingMaxPool_Batch", - [mp_input], - [mp_output], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - PoolDim=(k_h, k_w), - NumChannels=ifm_ch, - ImgDim=(ifm_dim_h, ifm_dim_w), - dataType=dt.name, - ) + new_node = helper.make_node( + "StreamingMaxPool_Batch", + [mp_input], + [mp_output], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + PoolDim=(k_h, k_w), + NumChannels=ifm_ch, + ImgDim=(ifm_dim_h, ifm_dim_w), + dataType=dt.name, + ) graph.node.insert(node_ind, new_node) # remove old nodes graph.node.remove(n) diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py index 790c0cb7e..2220bd29e 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py @@ -43,6 +43,7 @@ from finn.transformation.general import GiveUniqueNodeNames from finn.util.basic import gen_finn_dt_tensor from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.custom_op.registry import getCustomOp +import numpy as np def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): @@ -120,37 +121,38 @@ def prepare_inputs(input_tensor): # input datatype -# @pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2]) -@pytest.mark.parametrize("idt", [DataType.INT4]) +@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4]) +# 1d maxpool +@pytest.mark.parametrize("dim_1d", [False, True]) # kernel size -@pytest.mark.parametrize( - "k", - [ - (2, 1), - ], -) # (4,4)]) +@pytest.mark.parametrize("k", [2, 4]) # input dimension -@pytest.mark.parametrize( - "ifm_dim", - [ - (1024, 1), - ], -) # (6,6), (8,8)]) +@pytest.mark.parametrize("ifm_dim", [4, 8]) # input channels -@pytest.mark.parametrize("ifm_ch", [1, 3]) +@pytest.mark.parametrize("ifm_ch", [1, 3]) # 1,3 # execution mode -# @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"]) -@pytest.mark.parametrize("exec_mode", ["rtlsim"]) +@pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"]) @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): - k_h, k_w = k - ifm_dim_h, ifm_dim_w = ifm_dim +def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode): + ifm_dim_h = ifm_dim + k_h = k + if dim_1d: + ifm_dim_w = 1 + k_w = 1 + else: + ifm_dim_w = ifm_dim_h + k_w = k_h + ifm_dim = (ifm_dim_h, ifm_dim_w) + k = (k_h, k_w) + stride_h = k_h stride_w = k_w ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) ofm_dim = (ofm_dim_h, ofm_dim_w) + if idt == DataType.BIPOLAR and dim_1d: + pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") @@ -186,9 +188,5 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - # DEBUG: - print("expected vs rtlsim cycles") - print(exp_cycles) - print(cycles_rtlsim) - # assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) - # assert exp_cycles != 0 + assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) + assert exp_cycles != 0 -- GitLab