Skip to content
Snippets Groups Projects
Unverified Commit e7ca8464 authored by Yaman Umuroglu's avatar Yaman Umuroglu Committed by GitHub
Browse files

Merge pull request #506 from mmrahorovic/feature/vitis-swu-1d

Support for 1D SWU optimized variant
parents f9d6e7ea 2bcc816f
No related branches found
No related tags found
No related merge requests found
...@@ -97,7 +97,7 @@ ARG FINN_EXP_COMMIT="af6102769226b82b639f243dc36f065340991513" ...@@ -97,7 +97,7 @@ ARG FINN_EXP_COMMIT="af6102769226b82b639f243dc36f065340991513"
ARG BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03" ARG BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03"
ARG PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e" ARG PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e"
ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
ARG HLSLIB_COMMIT="f1eecde2d894a6d8971555924c0df147dc5ba033" ARG HLSLIB_COMMIT="da7b47cd65a967b76554a0dda74c097803c5e550"
ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e" ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e"
ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b"
......
...@@ -264,7 +264,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ...@@ -264,7 +264,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
dilation_h, dilation_w = dilation dilation_h, dilation_w = dilation
# since mmv != 1 is not supported yet, we set mmv for now to 1 # since mmv != 1 is not supported yet, we set mmv for now to 1
# mmv = 1 mmv = 1
# see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
if self.use_parallel_window_output(): if self.use_parallel_window_output():
exp_cycles = k_w + ofm_dim_w exp_cycles = k_w + ofm_dim_w
...@@ -272,10 +272,21 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ...@@ -272,10 +272,21 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
cycles_read_block = ifm_dim_w * ifm_ch / simd cycles_read_block = ifm_dim_w * ifm_ch / simd
cycles_write_block = ofm_dim_w * k_w * ifm_ch / simd cycles_write_block = ofm_dim_w * k_w * ifm_ch / simd
exp_cycles = cycles_read_block + cycles_write_block exp_cycles = cycles_read_block + cycles_write_block
elif self.get_nodeattr("depthwise") == 1:
if stride_h > 1 or stride_w > 1:
cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
max_cycles = max(cycles_write_block, cycles_read_block)
exp_cycles = (
ifm_dim_w * k_h * dilation_h * (ifm_ch / simd)
+ ofm_dim_h * max_cycles
)
else:
cycles_read_block = ifm_ch / simd * (k_w - 1) - (k_w - 1)
cycles_write_block = ofm_dim_w * k_w * ifm_ch / simd
exp_cycles = cycles_read_block + cycles_write_block
else: else:
cycles_read_block = ifm_ch / simd * (k_w - 1) - (k_w - 1) exp_cycles = 1 + ofm_dim_w * k_w * ifm_ch / simd
cycles_write_block = ofm_dim_w * k_w * ifm_ch / simd
exp_cycles = cycles_read_block + cycles_write_block
return int(exp_cycles) return int(exp_cycles)
...@@ -561,6 +572,15 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ...@@ -561,6 +572,15 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
"ultra": "ap_resource_uram()", "ultra": "ap_resource_uram()",
} }
hls_ram_style = map_to_hls_ram_style[ram_style] hls_ram_style = map_to_hls_ram_style[ram_style]
(
ifm_ch,
ifm_dim,
ofm_dim,
k,
stride,
dilation,
) = self.get_1d_conv_attrs_normalized()
stride_x = np.prod(stride)
# check which ConvolutionInputGenerator is needed # check which ConvolutionInputGenerator is needed
if self.use_parallel_window_output(): if self.use_parallel_window_output():
...@@ -586,20 +606,34 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ...@@ -586,20 +606,34 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
) )
] ]
else: else:
hls_call = "ConvolutionInputGenerator_1D_dws_lowbuffer" if stride_x > 1:
self.code_gen_dict["$DOCOMPUTE$"] = [ # temporarily use old ConvolutionInputGenerator_NonSquare_dws
"""{}<ConvKernelDim1_x, IFMChannels1, # for depthwise with stride > 1
Input_precision1, IFMDim1_x, OFMDim1_x, # note that both x and y stride are set to same (hlslib bug)
SIMD1> (in0, out, numReps, {});""".format( hls_call = "ConvolutionInputGenerator_NonSquare_dws"
hls_call, hls_ram_style self.code_gen_dict["$DOCOMPUTE$"] = [
) """{}<ConvKernelDim1_x, 1, IFMChannels1,
] Input_precision1, IFMDim1_x, 1, OFMDim1_x, 1,
SIMD1, Stride1_x, Stride1_x
> (in0, out, numReps, {});""".format(
hls_call, hls_ram_style
)
]
else:
hls_call = "ConvolutionInputGenerator_1D_dws_lowbuffer"
self.code_gen_dict["$DOCOMPUTE$"] = [
"""{}<ConvKernelDim1_x, IFMChannels1,
Input_precision1, IFMDim1_x, OFMDim1_x,
SIMD1> (in0, out, numReps, {});""".format(
hls_call, hls_ram_style
)
]
else: else:
hls_call = "ConvolutionInputGenerator_1D_lowbuffer" hls_call = "ConvolutionInputGenerator_1D_lowbuffer"
self.code_gen_dict["$DOCOMPUTE$"] = [ self.code_gen_dict["$DOCOMPUTE$"] = [
"""{}<ConvKernelDim1_x, IFMChannels1, """{}<ConvKernelDim1_x, IFMChannels1,
Input_precision1, IFMDim1_x, OFMDim1_x, Input_precision1, IFMDim1_x, OFMDim1_x,
SIMD1> (in0, out, numReps, {});""".format( Stride1_x, SIMD1> (in0, out, numReps, {});""".format(
hls_call, hls_ram_style hls_call, hls_ram_style
) )
] ]
......
...@@ -213,6 +213,10 @@ class InferConvInpGen(Transformation): ...@@ -213,6 +213,10 @@ class InferConvInpGen(Transformation):
1D depthwise separable convolutions""" 1D depthwise separable convolutions"""
% n.name % n.name
) )
if stride_h > 1 or stride_w > 1:
assert (
stride_h < k_h and stride_w < k_w
), """%s: Stride value must be smaller than kernel dim"""
ConvInpGen_node = helper.make_node( ConvInpGen_node = helper.make_node(
"ConvolutionInputGenerator1D", "ConvolutionInputGenerator1D",
[ConvInpGen_input], [ConvInpGen_input],
......
...@@ -156,8 +156,7 @@ def prepare_inputs(input_tensor): ...@@ -156,8 +156,7 @@ def prepare_inputs(input_tensor):
# input channels # input channels
@pytest.mark.parametrize("ifm_ch", [1, 4]) @pytest.mark.parametrize("ifm_ch", [1, 4])
# Stride # Stride
# @pytest.mark.parametrize("stride", [[1, 1], [2, 1]]) @pytest.mark.parametrize("stride", [[1, 1], [2, 1]])
@pytest.mark.parametrize("stride", [[1, 1]])
# Dilation # Dilation
@pytest.mark.parametrize("dilation", [[1, 1], [2, 1]]) @pytest.mark.parametrize("dilation", [[1, 1], [2, 1]])
# execution mode # execution mode
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment