Skip to content
Snippets Groups Projects
Commit bda8ae7b authored by Yaman Umuroglu's avatar Yaman Umuroglu
Browse files

Merge branch 'feature/convolutioninputgenerator_nonsquare' of...

Merge branch 'feature/convolutioninputgenerator_nonsquare' of https://github.com/mmrahorovic/finn into feature/1d_convs_and_finn_base_update
parents 0def4eb0 a41a6454
No related branches found
No related tags found
No related merge requests found
......@@ -61,12 +61,13 @@ class ConvolutionInputGenerator(HLSCustomOp):
def get_nodeattr_types(self):
my_attrs = {
"ConvKernelDim": ("i", True, 0),
"ConvKernelDim": ("ints", True, []), # [H, W] = [Y, X]
"IFMChannels": ("i", True, 0),
"IFMDim": ("i", True, 0),
"OFMDim": ("i", True, 0),
"IFMDim": ("ints", True, []), # [H, W] = [Y, X]
"OFMDim": ("ints", True, []), # [H, W] = [Y, X]
"SIMD": ("i", True, 0),
"Stride": ("i", True, 0),
"Stride": ("ints", True, [1, 1]), # [H, W] = [Y, X]
"Dilation": ("ints", True, [1, 1]), # [H, W] = [Y, X]
# FINN DataTypes for inputs, weights, outputs
"inputDataType": ("s", True, ""),
"outputDataType": ("s", True, ""),
......@@ -87,43 +88,45 @@ class ConvolutionInputGenerator(HLSCustomOp):
return my_attrs
def get_normal_input_shape(self):
ifm_dim = self.get_nodeattr("IFMDim")
ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
ifm_ch = self.get_nodeattr("IFMChannels")
ishape = (1, ifm_dim, ifm_dim, ifm_ch)
ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch)
return ishape
def get_folded_input_shape(self):
ifm_dim = self.get_nodeattr("IFMDim")
ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
ifm_ch = self.get_nodeattr("IFMChannels")
simd = self.get_nodeattr("SIMD")
assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
wf = int(ifm_ch / simd)
folded_ishape = (1, ifm_dim, ifm_dim, wf, simd)
folded_ishape = (1, ifm_dim_h, ifm_dim_w, wf, simd)
return folded_ishape
def get_normal_output_shape(self):
k = self.get_nodeattr("ConvKernelDim")
ifm_dim = self.get_nodeattr("IFMDim")
k_h, k_w = self.get_nodeattr("ConvKernelDim")
ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
ifm_ch = self.get_nodeattr("IFMChannels")
stride = self.get_nodeattr("Stride")
stride_h, stride_w = self.get_nodeattr("Stride")
dilation_h, dilation_w = self.get_nodeattr("Dilation")
pad = 0
ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
oshape = (1, ofm_dim, ofm_dim, k * k * ifm_ch)
ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
oshape = (1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch)
return oshape
def get_folded_output_shape(self):
k = self.get_nodeattr("ConvKernelDim")
ifm_dim = self.get_nodeattr("IFMDim")
k_h, k_w = self.get_nodeattr("ConvKernelDim")
ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
ifm_ch = self.get_nodeattr("IFMChannels")
stride = self.get_nodeattr("Stride")
stride_h, stride_w = self.get_nodeattr("Stride")
dilation_h, dilation_w = self.get_nodeattr("Dilation")
simd = self.get_nodeattr("SIMD")
pad = 0
ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
wf = int((k * k * ifm_ch) // simd)
folded_oshape = (1, ofm_dim, ofm_dim, wf, simd)
wf = int((k_h * k_w * ifm_ch) // simd)
folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
return folded_oshape
def make_shape_compatible_op(self, model):
......@@ -186,26 +189,31 @@ class ConvolutionInputGenerator(HLSCustomOp):
def get_exp_cycles(self):
simd = self.get_nodeattr("SIMD")
ifm_ch = self.get_nodeattr("IFMChannels")
k = self.get_nodeattr("ConvKernelDim")
ifm_dim = self.get_nodeattr("IFMDim")
ofm_dim = self.get_nodeattr("OFMDim")
stride = self.get_nodeattr("Stride")
k_h, k_w = self.get_nodeattr("ConvKernelDim")
ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
ofm_dim_h, ofm_dim_w = self.get_nodeattr("OFMDim")
stride_h, stride_w = self.get_nodeattr("Stride")
dilation_h, dilation_w = self.get_nodeattr("Dilation")
# since mmv != 1 is not supported yet, we set mmv for now to 1
mmv = 1
# see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
cycles_write_block = (ofm_dim * k * k * (ifm_ch / simd)) / mmv
cycles_read_block = stride * ifm_dim * (ifm_ch / simd)
cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
max_cycles = max(cycles_write_block, cycles_read_block)
exp_cycles = ifm_dim * k * (ifm_ch / simd) + ofm_dim * max_cycles
exp_cycles = (
ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
)
return int(exp_cycles)
def bram_estimation(self):
# NOTE: only tested with a square convolution
simd = self.get_nodeattr("SIMD")
ifm_ch = self.get_nodeattr("IFMChannels")
ifm_dim = self.get_nodeattr("IFMDim")
k = self.get_nodeattr("ConvKernelDim")
stride = self.get_nodeattr("Stride")
ifm_dim = self.get_nodeattr("IFMDim")[0]
k = self.get_nodeattr("ConvKernelDim")[0]
stride = self.get_nodeattr("Stride")[0]
ram_style = self.get_nodeattr("ram_style")
if ram_style == "block" or ram_style == "auto":
ram_depth = ifm_dim * ifm_ch / simd
......@@ -232,11 +240,12 @@ class ConvolutionInputGenerator(HLSCustomOp):
return 0
def lut_estimation(self):
# NOTE: only tested with a square convolution
simd = self.get_nodeattr("SIMD")
ifm_ch = self.get_nodeattr("IFMChannels")
ifm_dim = self.get_nodeattr("IFMDim")
k = self.get_nodeattr("ConvKernelDim")
stride = self.get_nodeattr("Stride")
ifm_dim = self.get_nodeattr("IFMDim")[0]
k = self.get_nodeattr("ConvKernelDim")[0]
stride = self.get_nodeattr("Stride")[0]
ram_style = self.get_nodeattr("ram_style")
if ram_style == "distributed":
ram_luts = int(
......@@ -252,11 +261,12 @@ class ConvolutionInputGenerator(HLSCustomOp):
return 300 + ram_luts
def uram_estimation(self):
# NOTE: only tested with a square convolution
simd = self.get_nodeattr("SIMD")
ifm_ch = self.get_nodeattr("IFMChannels")
ifm_dim = self.get_nodeattr("IFMDim")
k = self.get_nodeattr("ConvKernelDim")
stride = self.get_nodeattr("Stride")
ifm_dim = self.get_nodeattr("IFMDim")[0]
k = self.get_nodeattr("ConvKernelDim")[0]
stride = self.get_nodeattr("Stride")[0]
ram_style = self.get_nodeattr("ram_style")
if ram_style == "ultra":
return int(
......@@ -295,7 +305,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
assert (
inp.shape == exp_ishape
), """Input shape doesn't
match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
match expected shape (1, ifm_dim_h, ifm_dim_w, ifm_ch)."""
if self.get_input_datatype() == DataType.BIPOLAR:
# store bipolar activations as binary
inp = (inp + 1) / 2
......@@ -354,25 +364,33 @@ class ConvolutionInputGenerator(HLSCustomOp):
assert (
context[node.output[0]].shape == exp_oshape
), """Output
shape doesn't match expected shape (1, ofm_dim, ofm_dim, k*k*ifm_ch)."""
shape doesn't match expected shape (1, ofm_dim_h, ofm_dim_w, k_h*k_w*ifm_ch)."""
def global_includes(self):
self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"']
def defines(self, var):
numReps = 1
ifm_dim = self.get_nodeattr("IFMDim")[0]
ifm_ch = self.get_nodeattr("IFMChannels")
ofm_dim = self.get_nodeattr("OFMDim")[0]
k = self.get_nodeattr("ConvKernelDim")[0]
stride = self.get_nodeattr("Stride")[0]
simd = self.get_nodeattr("SIMD")
ifm_precision = self.get_input_datatype().bitwidth()
self.code_gen_dict["$DEFINES$"] = [
"""#define ConvKernelDim1 {}\n #define IFMChannels1 {}\n
#define Input_precision1 {}\n #define IFMDim1 {}\n
#define OFMDim1 {}\n #define SIMD1 {}\n
#define Stride1 {}\n #define numReps {}""".format(
self.get_nodeattr("ConvKernelDim"),
self.get_nodeattr("IFMChannels"),
self.get_input_datatype().bitwidth(),
self.get_nodeattr("IFMDim"),
self.get_nodeattr("OFMDim"),
self.get_nodeattr("SIMD"),
self.get_nodeattr("Stride"),
k,
ifm_ch,
ifm_precision,
ifm_dim,
ofm_dim,
simd,
stride,
numReps,
)
]
......@@ -415,9 +433,11 @@ class ConvolutionInputGenerator(HLSCustomOp):
}
hls_ram_style = map_to_hls_ram_style[ram_style]
hls_call = node.op_type
# check if non optimized ConvolutionInputGenerator is needed
k = self.get_nodeattr("ConvKernelDim")
stride = self.get_nodeattr("Stride")
# check which ConvolutionInputGenerator is needed
k = self.get_nodeattr("ConvKernelDim")[0]
stride = self.get_nodeattr("Stride")[0]
if k % stride != 0:
hls_call += "_kernel_stride"
......
......@@ -47,7 +47,9 @@ from finn.custom_op.registry import getCustomOp
from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt):
def make_single_im2col_modelwrapper(
k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
):
odt = idt
inp = helper.make_tensor_value_info(
"inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
......@@ -61,12 +63,12 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i
["inp"],
["outp"],
domain="finn.custom_op.general",
backend="fpgadataflow",
stride=stride,
stride=[stride, stride],
kernel_size=[k, k],
input_shape=str((1, ifm_dim, ifm_dim, ifm_ch)),
pad_amount=[0, 0, 0, 0],
pad_value=0,
dilations=[dilation, dilation],
)
graph = helper.make_graph(
nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp]
......@@ -82,7 +84,7 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i
def make_single_slidingwindow_modelwrapper(
k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw=0
k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw=0
):
odt = idt
inp = helper.make_tensor_value_info(
......@@ -98,12 +100,13 @@ def make_single_slidingwindow_modelwrapper(
["outp"],
domain="finn.custom_op.fpgadataflow",
backend="fpgadataflow",
ConvKernelDim=k,
ConvKernelDim=[k, k],
IFMChannels=ifm_ch,
IFMDim=ifm_dim,
OFMDim=ofm_dim,
IFMDim=[ifm_dim, ifm_dim],
OFMDim=[ofm_dim, ofm_dim],
SIMD=simd,
Stride=stride,
Stride=[stride, stride],
Dilation=[dilation, dilation],
inputDataType=idt.name,
outputDataType=odt.name,
depthwise=dw,
......@@ -138,6 +141,9 @@ def prepare_inputs(input_tensor):
@pytest.mark.parametrize("ifm_ch", [2, 4])
# Stride
@pytest.mark.parametrize("stride", [1, 2])
# Dilation
# Currently only dilation value of 1 is supported
@pytest.mark.parametrize("dilation", [1])
# execution mode
@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
# input channel parallelism ("SIMD")
......@@ -147,13 +153,13 @@ def prepare_inputs(input_tensor):
@pytest.mark.slow
@pytest.mark.vivado
def test_fpgadataflow_slidingwindow(
idt, k, ifm_dim, ifm_ch, stride, exec_mode, simd, dw
idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw
):
ofm_dim = int(((ifm_dim - k) / stride) + 1)
x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
model = make_single_slidingwindow_modelwrapper(
k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw
k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw
)
if exec_mode == "cppsim":
......@@ -174,9 +180,10 @@ def test_fpgadataflow_slidingwindow(
# execute model
y_produced = oxe.execute_onnx(model, input_dict)["outp"]
golden = make_single_im2col_modelwrapper(
k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt
k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
)
y_expected = oxe.execute_onnx(golden, input_dict)["outp"]
if dw == 0:
assert (y_produced == y_expected).all()
else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment