Skip to content
Snippets Groups Projects
Commit 3759727c authored by Tobi-Alonso's avatar Tobi-Alonso
Browse files

Merge remote-tracking branch 'upstream/dev' into feature/Pool_Batch_op

parents c1c580b5 6432379f
No related branches found
No related tags found
No related merge requests found
......@@ -13,7 +13,7 @@ gecho () {
# checkout the correct dependency repo commits
# the repos themselves are cloned in the Dockerfile
BREVITAS_COMMIT=7696326e5f279cacffd5b6ac8d9e8d81deec3978
BREVITAS_COMMIT=026a509186b7e7b0b65d46a2f905043d41069306
CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
HLSLIB_COMMIT=8aed899c278c36c977a249558d71795086cf852c
PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f
......
......@@ -18,6 +18,7 @@ ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we
Note that FINN uses floating point tensors as a carrier data type to represent integers. Floating point arithmetic can introduce rounding errors, e.g. (int_num * float_scale) / float_scale is not always equal to int_num.
When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`finn.util.basic.sanitize_quant_values`) for more information.
This behavior can be disabled (not recommended!) by setting the environment variable SANITIZE_QUANT_TENSORS=0.
Custom Operations/Nodes
=======================
......
......@@ -39,7 +39,7 @@ from finn.core.remote_exec import remote_exec
from finn.core.rtlsim_exec import rtlsim_exec
from finn.custom_op.registry import getCustomOp
import finn.analysis.topology as ta
from finn.util.basic import sanitize_quant_values
from finn.util.basic import sanitize_quant_values, get_sanitize_quant_tensors
def execute_node(node, context, graph):
......@@ -160,14 +160,17 @@ def execute_onnx(model, input_dict, return_full_exec_context=False):
# we can simply walk down the list since the ONNX spec guarantees that it is
# topologically sorted
for node in graph.node:
# call util function match input values to quantization annotation
execution_context = sanitize_quant_values(
model, node.input, execution_context
)
if get_sanitize_quant_tensors() != 0:
# round input values to match quantization annotation
execution_context = sanitize_quant_values(
model, node.input, execution_context
)
execute_node(node, execution_context, graph)
execution_context = sanitize_quant_values(
model, node.output, execution_context
)
if get_sanitize_quant_tensors() != 0:
# round output values to quantization annotation
execution_context = sanitize_quant_values(
model, node.output, execution_context
)
elif model_exec_mode == "remote_pynq":
# use remote exec metadata built into model to execute on a remote PYNQ
remote_exec(model, execution_context)
......
import numpy as np
from onnx import TensorProto, helper
import onnxruntime as rt
from finn.custom_op import CustomOp
from finn.core.datatype import DataType
class QuantAvgPool2d(CustomOp):
"""Class that corresponds to the quantized average pooling
layer from brevitas"""
def get_nodeattr_types(self):
return {
"stride": ("i", True, 1),
"kernel": ("i", True, 1),
"ibits": ("i", True, 1),
"obits": ("i", True, 1),
"signed": ("i", True, 0),
}
def make_shape_compatible_op(self, model):
node = self.onnx_node
k = self.get_nodeattr("kernel")
s = self.get_nodeattr("stride")
return helper.make_node(
"AveragePool",
inputs=[node.input[0]],
outputs=[node.output[0]],
kernel_shape=[k, k],
strides=[s, s],
)
def infer_node_datatype(self, model):
node = self.onnx_node
bw = self.get_nodeattr("obits")
if bw in [2, 4, 8, 16, 32]:
if self.get_nodeattr("signed") == 0:
dtype = DataType["UINT%d" % bw]
else:
dtype = DataType["INT%d" % bw]
else:
raise Exception("Unsupported output datatype for QuantAvgPool2d")
model.set_tensor_datatype(node.output[0], dtype)
def execute_node(self, context, graph):
# create a standard average pooling node to help calculate the result
node = self.onnx_node
k = self.get_nodeattr("kernel")
s = self.get_nodeattr("stride")
ishape = context[node.input[0]].shape
oshape = context[node.output[0]].shape
inp = helper.make_tensor_value_info(node.input[0], TensorProto.FLOAT, ishape)
outp = helper.make_tensor_value_info(node.output[0], TensorProto.FLOAT, oshape)
node_avgpool = helper.make_node(
"AveragePool",
inputs=[node.input[0]],
outputs=[node.output[0]],
kernel_shape=[k, k],
strides=[s, s],
)
graph_avgpool = helper.make_graph(
nodes=[node_avgpool],
name="single-avgpool-exec",
inputs=[inp],
outputs=[outp],
)
model_avgpool = helper.make_model(graph_avgpool)
idict = {node.input[0]: context[node.input[0]]}
sess = rt.InferenceSession(model_avgpool.SerializeToString())
result_temp = sess.run(None, idict)
# remove scaling introduced by average
result_temp = result_temp[0] * (k * k)
ibits = self.get_nodeattr("ibits")
max_value = 2 ** ibits - 1
max_value = max_value * k * k
max_bit_width = int(max_value).bit_length()
shift_bits = max_bit_width - self.get_nodeattr("obits")
result = np.right_shift(result_temp.astype(int), shift_bits)
context[node.output[0]] = result.astype(np.float32)
def verify_node(self):
pass
......@@ -49,6 +49,7 @@ from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch
from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
from finn.custom_op.quantavgpool2d import QuantAvgPool2d
from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch
# create a mapping of all known CustomOp names and classes
......@@ -71,6 +72,7 @@ custom_op["FMPadding_Batch"] = FMPadding_Batch
custom_op["Thresholding_Batch"] = Thresholding_Batch
custom_op["AddStreams_Batch"] = AddStreams_Batch
custom_op["LabelSelect_Batch"] = LabelSelect_Batch
custom_op["QuantAvgPool2d"] = QuantAvgPool2d
custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch
......
......@@ -71,7 +71,13 @@ def _infer_node_datatype(model, node):
else:
# unknown, assume node produces float32 outputs
for o in node.output:
model.set_tensor_datatype(o, DataType.FLOAT32)
# check if output datatype is already set to a value != FLOAT32
odtype = model.get_tensor_datatype(o)
if odtype is not None and odtype != DataType.FLOAT32:
# don't change data type
model.set_tensor_datatype(o, odtype)
else:
model.set_tensor_datatype(o, DataType.FLOAT32)
# compare old and new output dtypes to see if anything changed
new_odtypes = list(map(lambda x: model.get_tensor_datatype(x), node.output))
graph_modified = new_odtypes != odtypes
......
......@@ -30,6 +30,7 @@ from onnx import helper as oh
from finn.transformation import Transformation
from finn.transformation.infer_shapes import InferShapes
from finn.core.datatype import DataType
class CollapseRepeatedOp(Transformation):
......@@ -83,6 +84,9 @@ class CollapseRepeatedOp(Transformation):
graph.node.insert(node_ind, new_node)
# replace parameter value
model.set_initializer(new_node_param_name, new_param)
# be conservative with param/output DataTypes
model.set_tensor_datatype(new_node_param_name, DataType.FLOAT32)
model.set_tensor_datatype(end_name, DataType.FLOAT32)
# remove old nodes
graph.node.remove(n)
graph.node.remove(consumer)
......
# Copyright (c) 2020, Xilinx
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of FINN nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from finn.transformation import Transformation
from finn.transformation.infer_shapes import InferShapes
import numpy as np
class RemoveIdentityOps(Transformation):
"""Remove identity ops like Add/Sub with zero or Mul/Div with one"""
def apply(self, model):
graph = model.graph
node_ind = 0
graph_modified = False
for n in graph.node:
node_ind += 1
if (
n.op_type in ["Add", "Sub"]
and not model.is_fork_node(n)
and not model.is_join_node(n)
):
A = model.get_initializer(n.input[1])
if A is not None and (A == np.zeros_like(A)).all():
producer = model.find_producer(n.input[0])
# remove node and wire output tensor to
# output of producer node
producer.output[0] = n.output[0]
graph.node.remove(n)
elif (
n.op_type in ["Mul", "Div"]
and not model.is_fork_node(n)
and not model.is_join_node(n)
):
A = model.get_initializer(n.input[1])
if A is not None and (A == np.ones_like(A)).all():
producer = model.find_producer(n.input[0])
# remove node and wire output tensor to
# output of producer node
producer.output[0] = n.output[0]
graph.node.remove(n)
model = model.transform(InferShapes())
return (model, graph_modified)
......@@ -106,6 +106,25 @@ def get_finn_root():
)
def get_execution_error_thresh():
"Return the max error that is allowed for rounding in FINN execution."
try:
return float(os.environ["ERROR_THRESH"])
except KeyError:
return 1e-2
def get_sanitize_quant_tensors():
"""Return whether tensors with quantization annotations should be sanitized.
Enabled by default, disabling will yield faster ONNX execution but may give
incorrect results. Use with caution."""
try:
return int(os.environ["SANITIZE_QUANT_TENSORS"])
except KeyError:
# enabled by default
return 1
def make_build_dir(prefix=""):
"""Creates a temporary folder with given prefix to be used as a build dir.
Use this function instead of tempfile.mkdtemp to ensure any generated files
......@@ -305,7 +324,7 @@ def sanitize_quant_values(model, node_tensors, execution_context, check_values=F
)
# check if rounded values are not too far from original values
max_error = max(np.abs(current_values - updated_values).flatten())
if max_error <= 1e-4:
if max_error <= get_execution_error_thresh():
if check_values is True:
# check again if values can now be represented with set finn datatype
# TODO: vectorize with numpy
......
import os
import onnx # noqa
import torch
import numpy as np
import brevitas.onnx as bo
from brevitas.nn import QuantAvgPool2d
from brevitas.quant_tensor import pack_quant_tensor
from brevitas.core.quant import QuantType
from finn.core.modelwrapper import ModelWrapper
from finn.core.datatype import DataType
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes
from finn.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe
import pytest
export_onnx_path = "test_avg_pool.onnx"
@pytest.mark.parametrize("kernel_size", [2, 3])
@pytest.mark.parametrize("stride", [1, 2])
@pytest.mark.parametrize("signed", [False, True])
@pytest.mark.parametrize("bit_width", [2, 4])
@pytest.mark.parametrize("input_bit_width", [4, 8, 32])
@pytest.mark.parametrize("channels", [2, 4])
@pytest.mark.parametrize("idim", [7, 8])
def test_brevitas_avg_pool_export(
kernel_size, stride, signed, bit_width, input_bit_width, channels, idim
):
ishape = (1, channels, idim, idim)
ibw_tensor = torch.Tensor([input_bit_width])
b_avgpool = QuantAvgPool2d(
kernel_size=kernel_size,
stride=stride,
signed=signed,
min_overall_bit_width=bit_width,
max_overall_bit_width=bit_width,
quant_type=QuantType.INT,
)
# call forward pass manually once to cache scale factor and bitwidth
input_tensor = torch.from_numpy(np.zeros(ishape)).float()
scale = np.ones((1, channels, 1, 1))
output_scale = torch.from_numpy(scale).float()
input_quant_tensor = pack_quant_tensor(
tensor=input_tensor, scale=output_scale, bit_width=ibw_tensor
)
bo.export_finn_onnx(b_avgpool, ishape, export_onnx_path, input_t=input_quant_tensor)
model = ModelWrapper(export_onnx_path)
# determine input FINN datatype
if signed is True:
prefix = "INT"
else:
prefix = "UINT"
dt_name = prefix + str(input_bit_width // 2)
dtype = DataType[dt_name]
model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
# execution with input tensor using integers and scale = 1
# calculate golden output
inp = gen_finn_dt_tensor(dtype, ishape)
input_tensor = torch.from_numpy(inp).float()
input_quant_tensor = pack_quant_tensor(
tensor=input_tensor, scale=output_scale, bit_width=ibw_tensor
)
b_avgpool.eval()
expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy()
# finn execution
idict = {model.graph.input[0].name: inp}
odict = oxe.execute_onnx(model, idict, True)
produced = odict[model.graph.output[0].name]
assert (expected == produced).all()
# execution with input tensor using float and scale != 1
scale = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype(
np.float32
)
inp_tensor = inp * scale
input_tensor = torch.from_numpy(inp_tensor).float()
input_scale = torch.from_numpy(scale).float()
input_quant_tensor = pack_quant_tensor(
tensor=input_tensor, scale=input_scale, bit_width=ibw_tensor
)
# export again to set the scale values correctly
bo.export_finn_onnx(b_avgpool, ishape, export_onnx_path, input_t=input_quant_tensor)
model = ModelWrapper(export_onnx_path)
model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
b_avgpool.eval()
expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy()
# finn execution
idict = {model.graph.input[0].name: inp_tensor}
odict = oxe.execute_onnx(model, idict, True)
produced = odict[model.graph.output[0].name]
assert np.isclose(expected, produced).all()
os.remove(export_onnx_path)
import pytest
import numpy as np
from onnx import helper, TensorProto
import finn.core.onnx_exec as oxe
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.streamline.remove import RemoveIdentityOps
from finn.util.basic import gen_finn_dt_tensor
def insert_identity_op(model, op):
if op in ["Add", "Sub"]:
val = np.asarray([0.0], dtype=np.float32)
elif op in ["Mul", "Div"]:
val = np.asarray([1.0], dtype=np.float32)
else:
return
identity_node = helper.make_node(op, ["div_out", "value"], ["ident_out"])
graph = model.graph
graph.node.insert(3, identity_node)
graph.node[-1].input[0] = "ident_out"
model.set_initializer("value", val)
return model
# identity operations to be inserted
@pytest.mark.parametrize("op", ["Add", "Sub", "Mul", "Div"])
def test_remove_identity_ops(op):
# set up onnx model
inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1])
mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, [])
shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2])
div = helper.make_tensor_value_info("div", TensorProto.FLOAT, [])
matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2])
outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2])
mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"])
reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"])
div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"])
matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"])
graph = helper.make_graph(
nodes=[mul_node, reshape_node, div_node, matmul_node],
name="identity-graph",
inputs=[inp],
outputs=[outp],
value_info=[mul, shape, div, matmul],
)
model = helper.make_model(graph, producer_name="mulpastconv-model")
model = ModelWrapper(model)
inp_values = gen_finn_dt_tensor(DataType.INT2, [1, 4, 1, 1])
mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
shape_values = np.asarray([1, -1], dtype=np.int64)
div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
matmul_values = gen_finn_dt_tensor(DataType.INT2, [4, 2])
model.set_initializer("mul", mul_values)
model.set_initializer("shape", shape_values)
model.set_initializer("div", div_values)
model.set_initializer("matmul", matmul_values)
insert_identity_op(model, op)
model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
idict = {"inp": inp_values}
odict = oxe.execute_onnx(model, idict)
out_before = odict["outp"]
num_of_nodes_before = len(model.graph.node)
model = model.transform(RemoveIdentityOps())
num_of_nodes_after = len(model.graph.node)
assert num_of_nodes_before - 1 == num_of_nodes_after
odict = oxe.execute_onnx(model, idict)
out_after = odict["outp"]
assert (out_before == out_after).all()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment