Skip to content
Snippets Groups Projects
Commit 2c4718c8 authored by Felix Jentzsch's avatar Felix Jentzsch
Browse files

Basic support for flatten layer between conv and fc

parent ba7ebdf1
No related branches found
No related tags found
No related merge requests found
......@@ -72,7 +72,7 @@ USER $UNAME
# cloning dependency repos (as user)
# finn-base
RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
RUN git clone https://github.com/fpjentzsch/finn-base.git /workspace/finn-base
# Brevitas
RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas
# CNPY
......
......@@ -12,7 +12,7 @@ gecho () {
# checkout the correct dependency repo commits
# the repos themselves are cloned in the Dockerfile
FINN_BASE_COMMIT=8908c6a3f6674c4fa790954bd41c23ee5bf053df
FINN_BASE_COMMIT=4b40ff84e7c9210325a11bf73b8b9142b776f94c
BREVITAS_COMMIT=aff49758ec445d77c75721c7de3091a2a1797ca8
CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
HLSLIB_COMMIT=2e49322d1bbc4969ca293843bda1f3f9c05456fc
......
......@@ -29,11 +29,15 @@ def _suitable_node(node):
def _suitable_folded_shapes(ishape, oshape):
i_dummy = np.random.rand(*ishape)
o_dummy = np.random.rand(*oshape)
ishape_canonical = np.squeeze(i_dummy).shape
oshape_canonical = np.squeeze(o_dummy).shape
return ishape_canonical == oshape_canonical
matching_stream_width = ishape[-1] == oshape[-1]
matching_size = np.prod(ishape) == np.prod(oshape)
return matching_stream_width and matching_size
# i_dummy = np.random.rand(*ishape)
# o_dummy = np.random.rand(*oshape)
# ishape_canonical = np.squeeze(i_dummy).shape
# oshape_canonical = np.squeeze(o_dummy).shape
# return ishape_canonical == oshape_canonical
class InsertFIFO(Transformation):
......
from finn.transformation.base import Transformation
from finn.util.basic import get_by_name, is_finn_op
from finn.custom_op.registry import getCustomOp
def _is_fpgadataflow_node(node):
......@@ -22,29 +23,67 @@ class RemoveCNVtoFCFlatten(Transformation):
between two fpgadataflow nodes"""
def apply(self, model):
graph = model.graph
graph_modified = False
for n in graph.node:
if n.op_type == "Reshape":
shape = model.get_initializer(n.input[1])
if (shape == [1, -1]).all():
producer = model.find_producer(n.input[0])
if _is_fpgadataflow_node(producer) is True:
consumer = model.find_consumer(n.output[0])
if _is_fpgadataflow_node(consumer) is True:
graph_modified = True
consumer.input[0] = n.input[0]
graph.node.remove(n)
elif producer.op_type == "Transpose":
transp_node = producer
if n.op_type == "Flatten": # re-add reshape
# shape = model.get_initializer(n.input[1])
# if (shape == [1, -1]).all():
producer = model.find_producer(n.input[0])
if _is_fpgadataflow_node(producer) is True:
consumer = model.find_consumer(n.output[0])
if _is_fpgadataflow_node(consumer) is True:
graph_modified = True
consumer.input[0] = n.input[0]
graph.node.remove(n)
elif producer.op_type == "Transpose":
transp_node = producer
# check if transpose converts NHWC to NCHW
perms = list(get_by_name(transp_node.attribute, "perm").ints)
if perms == [0, 3, 1, 2]:
producer = model.find_producer(transp_node.input[0])
if _is_fpgadataflow_node(producer) is True:
consumer = model.find_consumer(n.output[0])
if _is_fpgadataflow_node(consumer) is True:
graph_modified = True
consumer.input[0] = transp_node.input[0]
graph.node.remove(n)
graph.node.remove(transp_node)
if consumer.op_type == "StreamingFCLayer_Batch":
fc_inst = getCustomOp(consumer)
mw = fc_inst.get_nodeattr("MW")
mh = fc_inst.get_nodeattr("MH")
(b, h, w, c) = model.get_tensor_shape(
transp_node.input[0]
)
# absorb transpose into weight matrix, allowing FC layer to operate on the NHWC input
W = model.get_initializer(consumer.input[1])
assert (
W is not None
), "Initializer for matmul weights is not set."
print("fc weights before")
print(W.shape)
print(W)
W_new = W.reshape(c, h, w, mh)
W_new = W_new.transpose((1, 2, 0, 3))
W_new = W_new.reshape(mw, mh)
print("fc weights after")
print(W_new.shape)
print(W_new)
model.set_initializer(consumer.input[1], W_new)
# remove transpose & flatten nodes
graph_modified = True
consumer.input[0] = transp_node.input[0]
graph.node.remove(n)
graph.node.remove(transp_node)
else:
warnings.warn(
"Could not absorb transpose into node behind flatten layer"
)
else:
warnings.warn("Unsupported transpose node before flatten layer")
return (model, graph_modified)
......@@ -338,24 +338,35 @@ class AbsorbTransposeIntoMultiThreshold(Transformation):
graph.node.remove(n)
graph.node.remove(final_t_cand)
graph_modified = True
elif final_t_cand.op_type == "Reshape":
oshape = model.get_tensor_shape(final_t_cand.output[0])
if len(oshape) == 2:
# transition to FC part, can still use NHWC
mt = getCustomOp(mt_cand)
mt.set_nodeattr("data_layout", "NHWC")
# get rid of first tranpose node
mt_cand.input[0] = n.input[0]
# fix output shape for MultiThreshold
mt_ishape = model.get_tensor_shape(mt_cand.input[0])
(b, h, w, c) = mt_ishape
assert (
h == 1 and w == 1
), """Untested spatial dim
in conv->fc transition, proceed with caution!"""
model.set_tensor_shape(mt_cand.output[0], mt_ishape)
graph.node.remove(n)
graph_modified = True
elif final_t_cand.op_type == "Flatten": # TODO: re-add reshape
# oshape = model.get_tensor_shape(final_t_cand.output[0])
# if len(oshape) == 2:
# transition to FC part, can still use NHWC
mt = getCustomOp(mt_cand)
mt.set_nodeattr("data_layout", "NHWC")
# get rid of first tranpose node
mt_cand.input[0] = n.input[0]
# fix output shape for MultiThreshold
mt_ishape = model.get_tensor_shape(mt_cand.input[0])
(b, h, w, c) = mt_ishape
# assert (
# h == 1 and w == 1
# ), """Untested spatial dim
# in conv->fc transition, proceed with caution!"""
model.set_tensor_shape(mt_cand.output[0], mt_ishape)
graph.node.remove(n)
transpose_output = model.make_new_valueinfo_name()
new_transpose = oh.make_node(
"Transpose",
[mt_cand.output[0]],
[transpose_output],
perm=[0, 3, 1, 2],
)
graph.node.insert(node_ind + 1, new_transpose)
final_t_cand.input[0] = transpose_output
graph_modified = True
if graph_modified:
model = model.transform(InferDataTypes())
return (model, graph_modified)
......
# Copyright (c) 2020, Xilinx
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of FINN nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from onnx import TensorProto, helper
import numpy as np
import pytest
from finn.core.datatype import DataType
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.general import GiveUniqueNodeNames
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
import finn.core.onnx_exec as oxe
from finn.core.modelwrapper import ModelWrapper
from finn.util.basic import gen_finn_dt_tensor
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.custom_op.general.im2col import compute_conv_output_dim
from finn.custom_op.registry import getCustomOp
from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
import finn.transformation.streamline.absorb as absorb
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import (
ApplyConfig,
GiveReadableTensorNames,
RemoveUnusedTensors,
RemoveStaticGraphInputs,
)
from finn.transformation.streamline import Streamline
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.transformation.streamline.reorder import (
MakeMaxPoolNHWC,
MoveScalarLinearPastInvariants,
)
import finn.core.data_layout as DataLayout
def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
if seed is not None:
np.random.seed(seed)
steps = np.random.rand(channels, 1) * 20
bias = np.random.rand(channels, 1) * -10
thres = [np.arange(num_of_thres) for chn in range(channels)]
thres = ((thres + bias) * steps).astype(np.float32)
thres = np.round(thres)
return thres
# conv_config kernel_size,stride, pad
# @pytest.mark.parametrize(
# "conv_config", [(1, 2, 0), (1, 3, 0), (3, 2, 1), (3, 1, 0), (3, 1, 1), (5, 2, 1)]
# )
# @pytest.mark.parametrize("depthwise", [False, True])
# @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
@pytest.mark.parametrize("conv_config", [(3, 1, 1)])
@pytest.mark.parametrize("depthwise", [False])
@pytest.mark.parametrize("exec_mode", ["cppsim"])
@pytest.mark.slow
@pytest.mark.vivado
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, exec_mode):
kernel_size, stride, pad = conv_config
np.random.seed(0)
idt = DataType.UINT4
odt = DataType.UINT4
in_feature_dim = 2
in_chn = 4
fc_filters = 8
if depthwise is True:
group = out_chn = in_chn
conv_param_shape = [out_chn, 1, kernel_size, kernel_size]
else:
group = 1
out_chn = 4
conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size]
total_pad = 2 * pad
out_feature_dim = compute_conv_output_dim(
in_feature_dim, kernel_size, stride, total_pad
)
input_shape = [1, in_chn, in_feature_dim, in_feature_dim]
conv_output_shape = [1, out_chn, out_feature_dim, out_feature_dim]
output_shape = [1, fc_filters]
fc_param_shape = [out_chn * out_feature_dim * out_feature_dim, fc_filters]
conv_weight_dt = DataType.INT4
fc_weight_dt = DataType.INT4
conv_config = {}
conv_config["dilations"] = [1, 1]
conv_config["group"] = group
conv_config["kernel_shape"] = [kernel_size, kernel_size]
conv_config["pads"] = [pad, pad, pad, pad]
conv_config["strides"] = [stride, stride]
global_in = helper.make_tensor_value_info(
"global_in", TensorProto.FLOAT, input_shape
)
global_out = helper.make_tensor_value_info(
"global_out", TensorProto.FLOAT, output_shape
)
value_info = [
helper.make_tensor_value_info(
"conv_param", TensorProto.FLOAT, conv_param_shape
),
helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)),
helper.make_tensor_value_info(
"matmul_param", TensorProto.FLOAT, fc_param_shape
),
helper.make_tensor_value_info(
"thres2_param", TensorProto.FLOAT, (fc_filters, 15)
),
]
modelproto = helper.make_model(
helper.make_graph(
name="test",
inputs=[global_in],
outputs=[global_out],
value_info=value_info,
nodes=[
helper.make_node(
"Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config
),
helper.make_node(
"MultiThreshold",
["conv_out", "thres1_param"],
["thres1_out"],
domain="finn.custom_op.general",
out_dtype="UINT4",
# out_bias=-7,
# out_scale=1.0
),
helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1),
helper.make_node(
"MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]
),
helper.make_node(
"MultiThreshold",
["matmul_out", "thres2_param"],
["global_out"],
domain="finn.custom_op.general",
out_dtype="UINT4",
# out_bias=-7,
# out_scale=1.0
),
],
)
)
model = ModelWrapper(modelproto)
model.set_tensor_datatype("global_in", idt)
model.set_tensor_layout("global_in", DataLayout.NCHW)
model.set_tensor_datatype("global_out", odt)
model.set_tensor_datatype("conv_param", conv_weight_dt)
model.set_tensor_datatype("matmul_param", fc_weight_dt)
model.set_tensor_datatype("thres1_param", DataType.INT32)
model.set_tensor_datatype("thres2_param", DataType.INT32)
model.set_tensor_datatype(
"flatten_out", DataType.UINT4
) # TODO: not inferred automatically (FLOAT32)
model.set_initializer(
"conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)
)
model.set_initializer(
"thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)
)
model.set_initializer(
"thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)
)
model.set_initializer(
"matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)
)
model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
model = model.transform(InferDataLayouts())
model.save("testmodel_in.onnx")
x = gen_finn_dt_tensor(idt, input_shape)
inp_dict = {model.graph.input[0].name: x}
output = oxe.execute_onnx(model, inp_dict)
print(output)
# streamlining step
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(Streamline())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
model.save("testmodel_streamlined.onnx")
output = oxe.execute_onnx(model, inp_dict)
print(output)
# convert_to_hls step
model = model.transform(to_hls.InferQuantizedStreamingFCLayer())
model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())
model = model.transform(RemoveCNVtoFCFlatten())
model = model.transform(absorb.AbsorbConsecutiveTransposes())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(InferDataLayouts())
if exec_mode == "cppsim":
model = model.transform(PrepareCppSim())
model = model.transform(CompileCppSim())
model = model.transform(SetExecMode("cppsim"))
elif exec_mode == "rtlsim":
model = model.transform(SetExecMode("rtlsim"))
model = model.transform(GiveUniqueNodeNames())
model = model.transform(PrepareIP("xc7z020clg400-1", 5))
model = model.transform(HLSSynthIP())
model = model.transform(PrepareRTLSim())
else:
raise Exception("Unknown exec_mode")
model.save("testmodel_hls.onnx")
output = oxe.execute_onnx(model, inp_dict)
print(output)
model_orig = ModelWrapper("testmodel_in.onnx")
model_hls = ModelWrapper("testmodel_hls.onnx")
assert oxe.compare_execution(model_orig, model_hls, inp_dict)
"""
new_model = model.transform(LowerConvsToMatMul())
new_model = new_model.transform(to_hls.InferConvInpGen())
if depthwise is True:
new_model = new_model.transform(to_hls.InferVVAU())
else:
new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer())
fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
fc_inst = getCustomOp(fc_node)
mw = fc_inst.get_nodeattr("MW")
mh = fc_inst.get_nodeattr("MH")
pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1)))
simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1)))
fc_inst.set_nodeattr("PE", pe_cands[0])
fc_inst.set_nodeattr("SIMD", simd_cands[0])
new_model = new_model.transform(GiveUniqueNodeNames())
new_model = new_model.transform(InferShapes())
new_model = new_model.transform(InferDataTypes())
if exec_mode == "cppsim":
new_model = new_model.transform(PrepareCppSim())
new_model = new_model.transform(CompileCppSim())
new_model = new_model.transform(SetExecMode("cppsim"))
elif exec_mode == "rtlsim":
new_model = new_model.transform(SetExecMode("rtlsim"))
new_model = new_model.transform(GiveUniqueNodeNames())
new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
new_model = new_model.transform(HLSSynthIP())
new_model = new_model.transform(PrepareRTLSim())
else:
raise Exception("Unknown exec_mode")
x = gen_finn_dt_tensor(idt, input_shape)
inp_dict = {model.graph.input[0].name: x}
assert oxe.compare_execution(model, new_model, inp_dict)
if kernel_size == 1 and stride > 1 and pad == 0:
assert new_model.graph.node[1].op_type == "DownSampler"
if exec_mode == "rtlsim":
node = new_model.get_nodes_by_op_type("DownSampler")[0]
inst = getCustomOp(node)
cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
exp_cycles = exp_cycles_dict[node.name]
assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
assert exp_cycles != 0
if pad == 1:
padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0]
padding_inst = getCustomOp(padding_node)
assert padding_inst.get_nodeattr("SIMD") == in_chn
if depthwise is True and exec_mode == "rtlsim":
node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0]
inst = getCustomOp(node)
cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
exp_cycles = exp_cycles_dict[node.name]
assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
assert exp_cycles != 0
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment