diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn index bb250885c45edc9b4ec47cea8199970f80c217dd..bf1ad4f62d00a7658051be71b415e20bacfcafb1 100644 --- a/docker/Dockerfile.finn +++ b/docker/Dockerfile.finn @@ -91,7 +91,7 @@ ARG FINN_EXP_COMMIT="f82c0d9868bb88ea045dfadb28508d327d287221" ARG BREVITAS_COMMIT="462f86cdc60f9915baf13afd1676fb21da44c2ee" ARG PYVERILATOR_COMMIT="e2ff74030de3992dcac54bf1b6aad2915946e8cb" ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -ARG HLSLIB_COMMIT="0acc01d1889a96da6843708d60323d2ee76784fc" +ARG HLSLIB_COMMIT="fbb07135b3d991602e8abe3f2c51212c11fd392b" ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e" ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" # finn-base diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index edbc07300c02c87b47a67297501163766c4cb0dc..19a42fe2d6b53879d401ec8bd462ddd59623dc1e 100644 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -42,8 +42,8 @@ class StreamingMaxPool_Batch(HLSCustomOp): def get_nodeattr_types(self): my_attrs = { - "ImgDim": ("i", True, 0), - "PoolDim": ("i", True, 0), + "ImgDim": ("ints", True, []), # [H, W] = [Y, X] + "PoolDim": ("ints", True, []), # [H, W] = [Y, X] "NumChannels": ("i", True, 0), # FINN DataTypes for inputs/outputs "dataType": ("s", True, ""), @@ -59,10 +59,27 @@ class StreamingMaxPool_Batch(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("dataType")] - def get_normal_input_shape(self): + def get_1d_attrs_normalized(self): + # support both (1, D) and (D, 1) cases transparently: + # assume the dummy ('1') dimension is the Y-dimension, i.e. + # images and kernels (and their attributes) of dimension + # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D] ifm_dim = self.get_nodeattr("ImgDim") + k = self.get_nodeattr("PoolDim") + ifm_ch = self.get_nodeattr("NumChannels") + if ifm_dim[1] == 1: + ifm_dim = ifm_dim[::-1] + k = k[::-1] + return (ifm_dim, k, ifm_ch) + + def is_1d(self): + ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + return (ifm_dim[0] == 1) and (k[0] == 1) + + def get_normal_input_shape(self): + ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") ifm_ch = self.get_nodeattr("NumChannels") - ishape = (1, ifm_dim, ifm_dim, ifm_ch) + ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch) return ishape def get_folded_input_shape(self): @@ -74,14 +91,17 @@ class StreamingMaxPool_Batch(HLSCustomOp): return tuple(ret) def get_normal_output_shape(self): - k = self.get_nodeattr("PoolDim") - ifm_dim = self.get_nodeattr("ImgDim") + ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") + k_h, k_w = tuple(self.get_nodeattr("PoolDim")) ifm_ch = self.get_nodeattr("NumChannels") - stride = k + stride_h = k_h + stride_w = k_w pad = 0 - assert ifm_dim % k == 0, "StreamingMaxPool needs ImgDim % PoolDim == 0" - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) - oshape = (1, ofm_dim, ofm_dim, ifm_ch) + assert ifm_dim_h % k_h == 0, "StreamingMaxPool needs ImgDim_h % PoolDim_h == 0" + assert ifm_dim_w % k_w == 0, "StreamingMaxPool needs ImgDim_w % PoolDim_w == 0" + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad) + oshape = (1, ofm_dim_h, ofm_dim_w, ifm_ch) return oshape def get_folded_output_shape(self): @@ -98,9 +118,12 @@ class StreamingMaxPool_Batch(HLSCustomOp): def get_exp_cycles(self): # derived from StreamingMaxPool_Batch loop nest - k = self.get_nodeattr("PoolDim") - ifm_dim = self.get_nodeattr("ImgDim") - return int(ifm_dim * (ifm_dim + (ifm_dim / k))) + ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + if self.is_1d(): + return int(ifm_dim[1] + k[1]) + else: + # TODO: adjust inaccurate formula + return int(ifm_dim[1] * (ifm_dim[1] + (ifm_dim[1] / k[1]))) def get_instream_width(self): dt_bits = self.get_input_datatype().bitwidth() @@ -167,11 +190,13 @@ class StreamingMaxPool_Batch(HLSCustomOp): def defines(self, var): numReps = 2 + ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + self.code_gen_dict["$DEFINES$"] = [ """#define ImgDim {}\n #define PoolDim {}\n #define NumChannels {}\n #define numReps {}""".format( - self.get_nodeattr("ImgDim"), - self.get_nodeattr("PoolDim"), + ifm_dim[1], + k[1], self.get_nodeattr("NumChannels"), numReps, ) @@ -207,12 +232,18 @@ class StreamingMaxPool_Batch(HLSCustomOp): def docompute(self): dtype = self.get_input_datatype() if dtype.bitwidth() == 1: - op = "StreamingMaxPool_Batch" + if self.is_1d(): + raise Exception("Binary 1d MaxPool not implemented on HLS backend") + else: + op = "StreamingMaxPool_Batch" self.code_gen_dict["$DOCOMPUTE$"] = [ "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op) ] else: - op = "StreamingMaxPool_Precision_Batch" + if self.is_1d(): + op = "StreamingMaxPool_Precision_Batch_1d" + else: + op = "StreamingMaxPool_Precision_Batch" dtype = self.get_input_datatype() dtype_hls = dtype.get_hls_datatype_str() minval_str = str(int(dtype.min())) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index 03d7b73a567ef8e87890d4ecfdc697ab3c6120fd..c749d645dfbf9996c3eea430a0099cb5f12ee60a 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -235,24 +235,21 @@ class InferStreamingMaxPool(Transformation): # mp_out_shape = model.get_tensor_shape(mp_output) dt = model.get_tensor_datatype(mp_input) mp_inst = getCustomOp(n) - # stride = mp_inst.get_nodeattr("strides")[0] - k = mp_inst.get_nodeattr("kernel_shape")[0] - # pad = mp_inst.get_nodeattr("pads")[0] + k_h, k_w = mp_inst.get_nodeattr("kernel_shape") ifm_ch = mp_in_shape[-1] - ifm_dim = mp_in_shape[1] - # ofm_dim = mp_out_shape[1] - if ifm_dim % k == 0: + ifm_dim_h = mp_in_shape[1] + ifm_dim_w = mp_in_shape[2] + if ifm_dim_h % k_h == 0 and ifm_dim_w % k_w == 0: # create equivalent StreamingMaxPool_Batch node - # TODO support non-k strides new_node = helper.make_node( "StreamingMaxPool_Batch", [mp_input], [mp_output], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - PoolDim=k, + PoolDim=(k_h, k_w), NumChannels=ifm_ch, - ImgDim=ifm_dim, + ImgDim=(ifm_dim_h, ifm_dim_w), dataType=dt.name, ) graph.node.insert(node_ind, new_node) diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py index 11ca79471d4eb2642a141ecdda9b4c55714ec76c..556e15f13607caa556daff079026f0b2bacb1b2b 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py @@ -47,12 +47,15 @@ from finn.util.basic import gen_finn_dt_tensor def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + ofm_dim_h, ofm_dim_w = ofm_dim odt = idt inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch] + "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ) outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ifm_ch] + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch] ) mp_node = helper.make_node( @@ -60,8 +63,8 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): ["inp"], ["outp"], domain="finn.custom_op.general", - kernel_shape=[k, k], - strides=[k, k], + kernel_shape=[k_h, k_w], + strides=[k_h, k_w], pads=[0, 0, 0, 0], ) graph = helper.make_graph( @@ -78,12 +81,15 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + ofm_dim_h, ofm_dim_w = ofm_dim odt = idt inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch] + "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ) outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ifm_ch] + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch] ) smp_node = helper.make_node( @@ -92,9 +98,9 @@ def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): ["outp"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - PoolDim=k, + PoolDim=[k_h, k_w], NumChannels=ifm_ch, - ImgDim=ifm_dim, + ImgDim=[ifm_dim_h, ifm_dim_w], dataType=idt.name, ) graph = helper.make_graph( @@ -115,24 +121,42 @@ def prepare_inputs(input_tensor): # input datatype -@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2]) +@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4]) +# 1d maxpool +@pytest.mark.parametrize("dim_1d", [False, True]) # kernel size @pytest.mark.parametrize("k", [2, 4]) # input dimension -@pytest.mark.parametrize("ifm_dim", [4, 6, 8]) +@pytest.mark.parametrize("ifm_dim", [4, 8]) # input channels -@pytest.mark.parametrize("ifm_ch", [1, 2]) # , 2, 3, 4]) +@pytest.mark.parametrize("ifm_ch", [1, 3]) # 1,3 # execution mode @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"]) @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): - stride = k - ofm_dim = int(((ifm_dim - k) / stride) + 1) - if ifm_dim % k != 0: +def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode): + ifm_dim_h = ifm_dim + k_h = k + if dim_1d: + ifm_dim_w = 1 + k_w = 1 + else: + ifm_dim_w = ifm_dim_h + k_w = k_h + ifm_dim = (ifm_dim_h, ifm_dim_w) + k = (k_h, k_w) + + stride_h = k_h + stride_w = k_w + ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) + ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) + ofm_dim = (ofm_dim_h, ofm_dim_w) + if idt == DataType.BIPOLAR and dim_1d: + pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") + if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") - x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) + x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) @@ -152,7 +176,7 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: - raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") + raise Exception("Unknown exec_mode in test_layer_streaming_maxpool_batch") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"]