Skip to content
Snippets Groups Projects
Unverified Commit bdb56b28 authored by Yaman Umuroglu's avatar Yaman Umuroglu Committed by GitHub
Browse files

Merge pull request #142 from quetric/feature/Add_channelwise_op

Feature/add channelwise op
parents ddd4fdbf 6388cd6e
No related branches found
No related tags found
No related merge requests found
...@@ -15,7 +15,7 @@ gecho () { ...@@ -15,7 +15,7 @@ gecho () {
# the repos themselves are cloned in the Dockerfile # the repos themselves are cloned in the Dockerfile
BREVITAS_COMMIT=f9a27226d4acf1661dd38bc449f71f89e0983cce BREVITAS_COMMIT=f9a27226d4acf1661dd38bc449f71f89e0983cce
CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
HLSLIB_COMMIT=8aed899c278c36c977a249558d71795086cf852c HLSLIB_COMMIT=8f9f2018762f654f196b666838aeaf6fc730ad9a
PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f
PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d
OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada
......
...@@ -56,8 +56,15 @@ class CustomOp(ABC): ...@@ -56,8 +56,15 @@ class CustomOp(ABC):
ret = ret.decode("utf-8") ret = ret.decode("utf-8")
return ret return ret
else: else:
# not set, return default value if req:
return def_val raise Exception(
"""Required attribute %s unspecified in
a %s node"""
% (name, self.onnx_node.op_type)
)
else:
# not set, return default value
return def_val
except KeyError: except KeyError:
raise AttributeError("Op has no such attribute: " + name) raise AttributeError("Op has no such attribute: " + name)
......
# Copyright (c) 2020, Xilinx
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of FINN nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from math import ceil
import os
import numpy as np
from onnx import TensorProto, helper
from finn.core.datatype import DataType
from finn.custom_op.fpgadataflow import HLSCustomOp
from finn.util.data_packing import (
npy_to_rtlsim_input,
numpy_to_hls_code,
rtlsim_output_to_npy,
)
from . import templates
# ONNX i/o tensor shape assumptions for channelwise ops:
# input 0 is the input tensor, shape (..., NumChannels)
# input 1 is the channelwise parameter tensor, shape (NumChannels, params_per_channel)
# output 0 is the output tensor, shape (..., NumChannels) - same as input
# the ... here can be any shape (representing groups of vectors)
class ChannelwiseOp_Batch(HLSCustomOp):
"""Class that corresponds to finn-hls Thresholding_Batch function.
It can implement a variety of channel-wise parametrized operations,
including Add, Mul and multi-thresholding.
"""
def __init__(self, onnx_node):
super().__init__(onnx_node)
self.decoupled_wrapper = templates.decoupled_wrapper
def get_nodeattr_types(self):
my_attrs = {
# channelwise "map" function to apply:
# one of cmp_le, cmp_ge, add, mul
"Func": ("s", False, "cmp_le"),
"PE": ("i", True, 0),
"NumChannels": ("i", True, 0),
# string defining memory resource type for parameters
"ram_style": ("s", False, "distributed"),
# FINN DataTypes for inputs, weights, outputs
"inputDataType": ("s", True, ""),
"paramDataType": ("s", True, ""),
"outputDataType": ("s", True, ""),
# input and output FIFO depths
"inFIFODepth": ("i", False, 0),
"outFIFODepth": ("i", False, 0),
# number of input vectors, examples:
# [1] is a single vector (like a FC layer with batch=1)
# [4] is four vectors (like a FC layer with batch=4)
# [1, 4, 4] is four * four vectors (like a conv layer with batch=1)
"numInputVectors": ("ints", False, [1]),
}
my_attrs.update(super().get_nodeattr_types())
return my_attrs
def calc_tmem(self):
"""Calculates and returns TMEM, the depth of the memory used
to store the channelwise op parameters."""
chn = self.get_nodeattr("NumChannels")
pe = self.get_nodeattr("PE")
return chn // pe
def make_shape_compatible_op(self, model):
oshape = self.get_normal_output_shape()
# implement tensor with correct shape
values = np.random.randn(*oshape).astype(np.float32)
return helper.make_node(
"Constant",
inputs=[],
outputs=[self.onnx_node.output[0]],
value=helper.make_tensor(
name="const_tensor",
data_type=TensorProto.FLOAT,
dims=values.shape,
vals=values.flatten().astype(float),
),
)
def infer_node_datatype(self, model):
node = self.onnx_node
# check input datatype against property
idt_name = self.get_input_datatype().name
exp_idt_name = self.get_nodeattr("inputDataType")
assert exp_idt_name == idt_name, "Bad input DataType for ChannelwiseOp layer"
# TODO: dynamically infer/update odt based on idt as done in ConvertToHLSLayers?
# set output datatype from property
odt = self.get_output_datatype()
model.set_tensor_datatype(node.output[0], odt)
def verify_node(self):
info_messages = []
# verify that "domain" is set to "finn"
domain_value = self.onnx_node.domain
if domain_value == "finn":
info_messages.append("Attribute domain is set correctly")
else:
info_messages.append('Attribute domain should be set to "finn"')
# verify that "backend" is set to "fpgadataflow"
backend_value = self.get_nodeattr("backend")
if backend_value == "fpgadataflow":
info_messages.append("Attribute backend is set correctly")
else:
info_messages.append('Attribute backend should be set to "fpgadataflow"')
# verify that all necessary attributes exist
# TODO collect automatically from get_nodeattr_types
try:
self.get_nodeattr("code_gen_dir_cppsim")
self.get_nodeattr("executable_path")
self.get_nodeattr("NumChannels")
self.get_nodeattr("PE")
self.get_nodeattr("inputDataType")
self.get_nodeattr("paramDataType")
self.get_nodeattr("outputDataType")
info_messages.append("All necessary attributes exist")
except Exception:
info_messages.append(
"""The required Threshold_Batch attributes do not exist."""
)
return info_messages
def bram_estimation(self):
"""Calculates BRAM cost if resource set to BRAM"""
style = self.get_nodeattr("ram_style")
P = self.get_nodeattr("PE")
idt = self.get_input_datatype()
A = idt.bitwidth()
tmem = self.calc_tmem()
if style == "block" and tmem > 1:
return int(ceil(A * P / 16)) * int(ceil(tmem / 1024))
else:
return 0
def lut_estimation(self):
"""Calculates LUT cost, taking memory resource type into account """
# TODO add in/out FIFO contributions
style = self.get_nodeattr("ram_style")
P = self.get_nodeattr("PE")
idt = self.get_input_datatype()
A = idt.bitwidth()
tmem = self.calc_tmem()
# cost of comparators
comparator_cost = A * P
# cost of LUTRAM
if style == "distributed" and tmem > 1:
lutram_cost = P * A * int(ceil(tmem / 64))
else:
lutram_cost = 0
# total cost
return comparator_cost + lutram_cost
def get_input_datatype(self):
"""Returns FINN DataType of input."""
return DataType[self.get_nodeattr("inputDataType")]
def get_output_datatype(self):
"""Returns FINN DataType of output."""
return DataType[self.get_nodeattr("outputDataType")]
def get_instream_width(self):
i_bits = self.get_input_datatype().bitwidth()
return i_bits * self.get_nodeattr("PE")
def get_outstream_width(self):
o_bits = self.get_output_datatype().bitwidth()
return o_bits * self.get_nodeattr("PE")
def get_folded_input_shape(self):
ich = self.get_nodeattr("NumChannels")
pe = self.get_nodeattr("PE")
fold = ich // pe
vecs = list(self.get_nodeattr("numInputVectors"))
folded_input_shape = tuple(vecs + [fold, pe])
return folded_input_shape
def get_folded_output_shape(self):
# same shape as input
return self.get_folded_input_shape()
def get_normal_input_shape(self):
ich = self.get_nodeattr("NumChannels")
vecs = list(self.get_nodeattr("numInputVectors"))
normal_input_shape = tuple(vecs + [ich])
return normal_input_shape
def get_normal_output_shape(self):
# same shape as input
return self.get_normal_input_shape()
def get_number_output_values(self):
nf = np.prod(self.get_folded_output_shape()[:-1])
return nf
def get_template_param_values(self):
"""Returns the template parameter values according to input, output and weight
data types."""
ret = dict()
inp_hls_str = self.get_input_datatype().get_hls_datatype_str()
out_hls_str = self.get_output_datatype().get_hls_datatype_str()
# fill in TSrcI
ret["TSrcI"] = "Slice<%s>" % inp_hls_str
# fill in TDstI
ret["TDstI"] = "Slice<%s>" % out_hls_str
return ret
def get_hls_compatible_parameter_tensor(self, orig_param_vector):
"""Convert the original numpy weight matrix orig_weight_matrix into
a form suitable for passing to the hlslib call:
* ensure chn % PE == 0
* interleave rows between PEs
* reshape into (PE, TMEM) and return
"""
chn = self.get_nodeattr("NumChannels")
pe = self.get_nodeattr("PE")
tmem = chn // pe
assert chn % pe == 0, "Requirement NumChannels divisable by PE is violated."
assert (
orig_param_vector.ndim == 1
), """Parameter vector dimension is {}.
Expected dimension: 1.""".format(
orig_param_vector.ndim
)
# if not self.get_input_datatype().signed():
# # ensure all thresholds are nonnegative
# assert (orig_param_vector >= 0).all()
# ensure all thresholds are integer
assert (orig_param_vector.astype(np.int32) == orig_param_vector).all()
ret = orig_param_vector
assert (
ret.shape[0] == chn
), "Cardinality of parameter vector is not as expected (chn)"
# distribute rows between PEs
ret = ret.reshape(tmem, pe).transpose()
assert (
ret.shape[0] == pe
), """First dimension after distribution of the
rows between PEs is not as expected (pe)"""
assert (
ret.shape[1] == tmem
), """Second dimension after distribution of the
rows between PEs is not as expected (tmem)"""
return ret.reshape(1, pe, tmem)
def generate_params(self, model, path):
code_gen_dir = path
# save thresholds in params.h
parameters = model.get_initializer(self.onnx_node.input[1])
parameter_tensor = self.get_hls_compatible_parameter_tensor(parameters)
pdt = DataType[self.get_nodeattr("paramDataType")]
parameters_hls_code = numpy_to_hls_code(
parameter_tensor, pdt, "parameters", False, True
)
# get input data type
export_idt = self.get_input_datatype()
if self.get_input_datatype() == DataType.BIPOLAR:
export_idt = DataType.BINARY
idt_hls = export_idt.get_hls_datatype_str()
# write parameters into params.h
f_params = open("{}/params.h".format(code_gen_dir), "w")
pdt_hls = pdt.get_hls_datatype_str()
# use binary to export bipolar activations
export_odt = self.get_output_datatype()
if self.get_output_datatype() == DataType.BIPOLAR:
export_odt = DataType.BINARY
odt_hls = export_odt.get_hls_datatype_str()
# get desired function
func = self.get_nodeattr("Func")
if func == "cmp_le":
func_str = "std::less_equal"
elif func == "cmp_ge":
func_str = "std::greater_equal"
elif func == "add":
func_str = "std::plus"
elif func == "mul":
func_str = "std::multiplies"
else:
raise Exception(
"""Invalid value for attribute Func! Is currently set to: {}
has to be set to one of the following value
("cmp_le", "cmp_ge", "add", "mul")""".format(
func
)
)
f_params.write(
"static ChannelWiseOperation<{},{},{},{},{},{}> threshs \
= ".format(
self.calc_tmem(),
self.get_nodeattr("PE"),
idt_hls,
pdt_hls,
odt_hls,
"%s<%s>" % (func_str, odt_hls),
)
)
f_params.write(parameters_hls_code)
f_params.close()
def execute_node(self, context, graph):
mode = self.get_nodeattr("exec_mode")
node = self.onnx_node
# TODO ensure codegen dir exists
if mode == "cppsim":
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
elif mode == "rtlsim":
code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
else:
raise Exception(
"""Invalid value for attribute exec_mode! Is currently set to: {}
has to be set to one of the following value ("cppsim", "rtlsim")""".format(
mode
)
)
# create a npy file fore each input of the node (in_ind is input index)
in_ind = 0
for inputs in node.input:
# it is assumed that the first input of the node is the data input
# the second input are the weights
# the third input are the thresholds
if in_ind == 0:
assert (
str(context[inputs].dtype) == "float32"
), """Input datatype is
not float32 as expected."""
expected_inp_shape = self.get_folded_input_shape()
reshaped_input = context[inputs].reshape(expected_inp_shape)
export_idt = self.get_input_datatype()
# make copy before saving the array
reshaped_input = reshaped_input.copy()
np.save(
os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)),
reshaped_input,
)
elif in_ind > 2:
raise Exception("Unexpected input found for ChannelwiseOp_Batch")
in_ind += 1
if mode == "cppsim":
# execute the precompiled model
super().exec_precompiled_singlenode_model()
# load output npy file
super().npy_to_dynamic_output(context)
# reinterpret binary output as bipolar where needed
if self.get_output_datatype() == DataType.BIPOLAR:
out = context[node.output[0]]
out = 2 * out - 1
context[node.output[0]] = out
assert (
context[node.output[0]].shape == self.get_folded_output_shape()
), """Output shape is not as expected"""
# reshape output to have expected shape
oshape = self.get_normal_output_shape()
context[node.output[0]] = context[node.output[0]].reshape(*oshape)
elif mode == "rtlsim":
sim = self.get_rtlsim()
nbits = self.get_instream_width()
inp = npy_to_rtlsim_input(
"{}/input_0.npy".format(code_gen_dir), export_idt, nbits
)
super().reset_rtlsim(sim)
super().toggle_clk(sim)
output = self.rtlsim(sim, inp)
odt = self.get_output_datatype()
target_bits = odt.bitwidth()
packed_bits = self.get_outstream_width()
out_npy_path = "{}/output.npy".format(code_gen_dir)
out_shape = self.get_folded_output_shape()
rtlsim_output_to_npy(
output, out_npy_path, odt, out_shape, packed_bits, target_bits
)
# load and reshape output
output = np.load(out_npy_path)
oshape = self.get_normal_output_shape()
output = np.asarray([output], dtype=np.float32).reshape(*oshape)
context[node.output[0]] = output
else:
raise Exception(
"""Invalid value for attribute exec_mode! Is currently set to: {}
has to be set to one of the following value ("cppsim", "rtlsim")""".format(
mode
)
)
def global_includes(self):
self.code_gen_dict["$GLOBALS$"] = ['#include "activations.hpp"']
self.code_gen_dict["$GLOBALS$"] += ['#include "params.h"']
# TODO check and add whatever missing
def defines(self, var):
numInputVectors = list(self.get_nodeattr("numInputVectors"))
numReps = numInputVectors[0]
self.code_gen_dict["$DEFINES$"] = [
"""#define NumChannels1 {}\n#define PE1 {}\n#define numReps {}""".format(
self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps,
)
]
def read_npy_data(self):
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
dtype = self.get_input_datatype()
elem_bits = dtype.bitwidth()
packed_bits = self.get_instream_width()
packed_hls_type = "ap_uint<%d>" % packed_bits
elem_hls_type = dtype.get_hls_datatype_str()
npy_type = "float"
npy_in = "%s/input_0.npy" % code_gen_dir
self.code_gen_dict["$READNPYDATA$"] = []
# note: the innermost dim is reversed for the input
self.code_gen_dict["$READNPYDATA$"].append(
'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);'
% (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
)
def strm_decl(self):
self.code_gen_dict["$STREAMDECLARATIONS$"] = []
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
)
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
)
def docompute(self):
tmpl_args = self.get_template_param_values()
# TODO: why put some template parameters into defines and not others?
# should ImgDim be defined or just filled in here like we do now?
ishape = self.get_folded_input_shape()
if len(ishape) == 3:
imgdim = 1
elif len(ishape) == 5:
imgdim = ishape[1]
else:
raise Exception("""Unexpeted input shape""")
self.code_gen_dict["$DOCOMPUTE$"] = [
"""Thresholding_Batch<{}, NumChannels1, PE1, {}, {}>
(in0, out, threshs, numReps);""".format(
imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"],
)
]
def dataoutstrm(self):
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
dtype = self.get_output_datatype()
if dtype == DataType.BIPOLAR:
# use binary for bipolar storage
dtype = DataType.BINARY
elem_bits = dtype.bitwidth()
packed_bits = self.get_outstream_width()
packed_hls_type = "ap_uint<%d>" % packed_bits
elem_hls_type = dtype.get_hls_datatype_str()
npy_type = "float"
npy_out = "%s/output.npy" % code_gen_dir
shape = self.get_folded_output_shape()
shape_cpp_str = str(shape).replace("(", "{").replace(")", "}")
# note: the innermost dim is not reversed for the output
self.code_gen_dict["$DATAOUTSTREAM$"] = [
'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);'
% (
packed_hls_type,
elem_hls_type,
elem_bits,
npy_type,
shape_cpp_str,
npy_out,
)
]
def save_as_npy(self):
self.code_gen_dict["$SAVEASCNPY$"] = []
def blackboxfunction(self):
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
"""void {}(hls::stream<ap_uint<{}>> &in0,
hls::stream<ap_uint<{}>> &out
)""".format(
self.onnx_node.name,
self.get_instream_width(),
self.get_outstream_width(),
)
]
def pragmas(self):
self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS INTERFACE ap_ctrl_none port=return"
)
# the channelwise parameter tensor is acc_type [PE][TMEM][N_PARAMS_PER_CHANNEL]
# partition for parallel access along PE and N_PARAMS_PER_CHANNEL
# dimensions (dims 1 and 3)
self.code_gen_dict["$PRAGMAS$"].append(
(
"#pragma HLS ARRAY_PARTITION variable=threshs.parameters "
"complete dim=1"
)
)
# self.code_gen_dict["$PRAGMAS$"].append(
# (
# "#pragma HLS ARRAY_PARTITION variable=threshs.parameters "
# "complete dim=3"
# )
# )
# set resource type
ram_style = self.get_nodeattr("ram_style")
pe = self.get_nodeattr("PE")
ich = self.get_nodeattr("NumChannels")
# if PE less than NumChannels, assign cores according to ram_style;
# otherwise if PE == NumChannels, Vivado HLS will unroll to FFs
if pe < ich:
if ram_style == "distributed":
self.code_gen_dict["$PRAGMAS$"].append(
(
"#pragma HLS RESOURCE variable=threshs.parameters "
"core=ROM_2P_LUTRAM"
)
)
elif ram_style == "block":
self.code_gen_dict["$PRAGMAS$"].append(
(
"#pragma HLS RESOURCE variable=threshs.parameters "
"core=ROM_2P_BRAM"
)
)
else:
raise Exception(
"""Invalid value for attribute ram_style! Is currently set to: {}
has to be set to one of ("block", "distributed")""".format(
ram_style
)
)
...@@ -51,6 +51,7 @@ from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch ...@@ -51,6 +51,7 @@ from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
from finn.custom_op.quantavgpool2d import QuantAvgPool2d from finn.custom_op.quantavgpool2d import QuantAvgPool2d
from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch
from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch
# create a mapping of all known CustomOp names and classes # create a mapping of all known CustomOp names and classes
custom_op = {} custom_op = {}
...@@ -74,6 +75,7 @@ custom_op["AddStreams_Batch"] = AddStreams_Batch ...@@ -74,6 +75,7 @@ custom_op["AddStreams_Batch"] = AddStreams_Batch
custom_op["LabelSelect_Batch"] = LabelSelect_Batch custom_op["LabelSelect_Batch"] = LabelSelect_Batch
custom_op["QuantAvgPool2d"] = QuantAvgPool2d custom_op["QuantAvgPool2d"] = QuantAvgPool2d
custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch
custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch
def getCustomOp(node): def getCustomOp(node):
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from onnx import helper, TensorProto from onnx import helper, TensorProto
import numpy as np
from finn.core.datatype import DataType from finn.core.datatype import DataType
from finn.transformation import Transformation from finn.transformation import Transformation
...@@ -34,6 +35,8 @@ from finn.custom_op.registry import getCustomOp ...@@ -34,6 +35,8 @@ from finn.custom_op.registry import getCustomOp
from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_datatypes import InferDataTypes
import finn.core.data_layout as DataLayout import finn.core.data_layout as DataLayout
from finn.util.onnx import nchw_to_nhwc
import warnings
from finn.util.basic import get_by_name from finn.util.basic import get_by_name
...@@ -627,3 +630,158 @@ class InferThresholdingLayer(Transformation): ...@@ -627,3 +630,158 @@ class InferThresholdingLayer(Transformation):
model = model.transform(InferShapes()) model = model.transform(InferShapes())
model = model.transform(InferDataTypes()) model = model.transform(InferDataTypes())
return (model, graph_modified) return (model, graph_modified)
class InferChannelwiseLinearLayer(Transformation):
"""Convert any channel-wise Add/Mul into a HLS layer."""
def get_smallest_possible(self, vals):
"""Returns smallest (fewest bits) possible DataType that can represent
value. Prefers unsigned integers where possible."""
vals = np.array(vals)
for v in vals:
assert int(v) == v, "Error float value"
for k in DataType.__members__:
dt = DataType[k]
if dt in [DataType.BIPOLAR, DataType.TERNARY, DataType.FLOAT32]:
# not currently supported
continue
if (dt.min() <= vals).all() and (vals <= dt.max()).all():
return dt
warnings.warn(
"""InferChannelwiseLinearLayer: Output values may not be
representable with supported data types.
Setting maximum width data type available.
This will lead to errors if there are no constrains on the input
"""
)
if (0 <= vals).all():
return DataType.UINT32
else:
return DataType.INT32
def apply(self, model):
graph = model.graph
node_ind = 0
graph_modified = False
for node in graph.node:
node_ind += 1
if node.op_type == "Add" or node.op_type == "Mul":
# assuming input[0] is dynamic
ll_input = node.input[0]
ll_output = node.output[0]
ll_in_shape = model.get_tensor_shape(ll_input)
# check if input 1 has an initializer
ll_const = node.input[1]
if ll_const is not None:
ll_cinit = model.get_initializer(ll_const)
if ll_cinit is None:
# input 1 is also dynamic
continue
else:
continue
# get number of channels and channel index from input
ll_in_layout = model.get_tensor_layout(ll_input)
if ll_in_layout == DataLayout.NHWC or ll_in_layout == DataLayout.NC:
ch_index = -1
ch = ll_in_shape[-1]
elif ll_in_layout == DataLayout.NCHW:
ch_index = 1
ch = ll_in_shape[1]
else:
continue
# check if the shape of initializer is compatible
ll_cinit_shape = list(ll_cinit.shape)
if np.prod(ll_cinit_shape) == 1:
warnings.warn(
"Broadcasting " + str(node.op_type) + "(" + node.name + ")"
)
ll_cinit = np.full((ch), ll_cinit.flatten()[0])
elif np.prod(ll_cinit_shape) != ch or ll_cinit_shape[ch_index] != ch:
# parameter shape not compatible with Channelwise_batch
continue
# check initializer contains integers as floats
if not (ll_cinit.astype(np.int32) == ll_cinit).all():
continue
# all initializer conditions are met
# check inputs
idt = model.get_tensor_datatype(ll_input)
if not idt.is_integer():
# skip conversion for layers with float input
continue
# check layout of inputs/outputs, and convert if needed
# check layout and convert if necessary
if ll_in_layout == DataLayout.NCHW:
ll_input = nchw_to_nhwc(ll_input, model, node_ind)
node_ind += 1
ll_in_shape = model.get_tensor_shape(ll_input)
# keep track of where we need to insert the HLS Op
# it has to be ahead of the output transform
insert_point = node_ind
ll_output_layout = model.get_tensor_layout(ll_output)
if ll_output_layout == DataLayout.NCHW:
ll_output = nchw_to_nhwc(ll_output, model, node_ind, reverse=True)
node_ind += 1
# get parameter data type
param_min = min(ll_cinit.flatten())
param_max = max(ll_cinit.flatten())
pdt = self.get_smallest_possible([param_min, param_max])
# set function and determine output data type
if node.op_type == "Add":
func = "add"
out_min = idt.min() + param_min
out_max = idt.max() + param_max
odt = self.get_smallest_possible([out_min, out_max])
elif node.op_type == "Mul":
func = "mul"
possible_limits = []
possible_limits += [idt.min() * param_min]
possible_limits += [idt.min() * param_max]
possible_limits += [idt.max() * param_min]
possible_limits += [idt.max() * param_max]
odt = self.get_smallest_possible(possible_limits)
model.set_initializer(ll_const, ll_cinit.reshape(ch))
model.set_tensor_datatype(ll_output, odt)
# create node with no parallelization first
pe = 1
assert ch % pe == 0, "Requirement IFC divisable by PE is violated."
# create and insert node
new_node = helper.make_node(
"ChannelwiseOp_Batch",
[ll_input, ll_const],
[ll_output],
domain="finn",
backend="fpgadataflow",
Func=func,
NumChannels=ch,
PE=pe,
inputDataType=idt.name,
paramDataType=pdt.name,
outputDataType=odt.name,
numInputVectors=list(ll_in_shape[:-1]),
)
graph.node.insert(insert_point, new_node)
# remove old node
graph.node.remove(node)
graph_modified = True
if graph_modified:
model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
return (model, graph_modified)
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
import numpy as np import numpy as np
import onnx import onnx
import finn.core.data_layout as DataLayout
def valueinfo_to_tensor(vi): def valueinfo_to_tensor(vi):
...@@ -37,3 +38,38 @@ def valueinfo_to_tensor(vi): ...@@ -37,3 +38,38 @@ def valueinfo_to_tensor(vi):
return np.zeros( return np.zeros(
dims, dtype=onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[vi.type.tensor_type.elem_type] dims, dtype=onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[vi.type.tensor_type.elem_type]
) )
def nchw_to_nhwc(t, model, idx, reverse=False):
"""Converts between NCHW <-> NHWC layouts for tensor t by inserting a transpose.
If reverse=False, t is assumed NCHW and we insert transpose to convert NCHW -> NHWC
If reverse=True, t is assumed NHWC and we insert transpose to convert NHWC -> NCHW.
"""
graph = model.graph
# create new NHWC tensor
t_shape = model.get_tensor_shape(t)
bs = t_shape[0]
ch = t_shape[1]
height = t_shape[2]
width = t_shape[3]
t_trans = onnx.helper.make_tensor_value_info(
model.make_new_valueinfo_name(),
onnx.TensorProto.FLOAT,
(bs, height, width, ch), # NHWC
)
graph.value_info.append(t_trans)
dt = model.get_tensor_datatype(t)
t_trans = t_trans.name
model.set_tensor_datatype(t_trans, dt)
model.set_tensor_layout(t_trans, DataLayout.NHWC)
# NCHW <-> NHWC transpose
if reverse:
t_trans_node = onnx.helper.make_node(
"Transpose", [t_trans], [t], perm=[0, 3, 1, 2]
)
else:
t_trans_node = onnx.helper.make_node(
"Transpose", [t], [t_trans], perm=[0, 2, 3, 1]
)
graph.node.insert(idx, t_trans_node)
return t_trans
import pytest
from onnx import TensorProto, helper
import finn.core.onnx_exec as oxe
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
ReplaceVerilogRelPaths,
)
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.general import GiveUniqueNodeNames
from finn.util.basic import gen_finn_dt_tensor
from finn.transformation.infer_shapes import InferShapes
import numpy as np
def prepare_inputs(input_tensor):
return {"inp": input_tensor}
def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape):
inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape)
outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, ishape)
p0 = helper.make_tensor_value_info("p0", TensorProto.FLOAT, pshape)
model = helper.make_model(
helper.make_graph(
name="test",
inputs=[inp],
outputs=[outp],
value_info=[p0],
nodes=[helper.make_node(onnx_op_name, ["inp", "p0"], ["outp"])],
)
)
model = ModelWrapper(model)
model.set_initializer("p0", gen_finn_dt_tensor(pdt, pshape))
model.set_tensor_datatype("inp", idt)
model.transform(InferDataLayouts(), make_deepcopy=False)
model.transform(InferShapes(), make_deepcopy=False)
return model
# parameter datatype
@pytest.mark.parametrize("pdt", [DataType.BIPOLAR, DataType.UINT4, DataType.INT2])
# input datatype
@pytest.mark.parametrize("idt", [DataType.INT32, DataType.UINT4, DataType.INT4])
# function
@pytest.mark.parametrize("onnx_op_name", ["Add", "Mul"])
# vector parameter or scalar parameter (broadcast)
@pytest.mark.parametrize("scalar_param", [True, False])
# execution mode
@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
@pytest.mark.vivado
@pytest.mark.slow
def test_convert_to_hls_channelwise_layer(
pdt, idt, onnx_op_name, scalar_param, exec_mode
):
ifm_ch = 16
ifm_dim = 5
ishape = (1, ifm_ch, ifm_dim, ifm_dim)
if scalar_param:
pshape = (1,)
else:
pshape = (1, ifm_ch, 1, 1)
np.random.seed(0)
model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape)
# Since the aren't Data types with a bit width of a non power of 2,
# there are cases where the input won't use it full range.
if idt == DataType.INT32:
x = gen_finn_dt_tensor(DataType.INT16, (1, ifm_ch, ifm_dim, ifm_dim))
elif idt == DataType.UINT32:
x = gen_finn_dt_tensor(DataType.UINT16, (1, ifm_ch, ifm_dim, ifm_dim))
else:
x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim))
input_dict = prepare_inputs(x)
y_expected = oxe.execute_onnx(model, input_dict)["outp"]
new_model = model.transform(to_hls.InferChannelwiseLinearLayer())
new_model = new_model.transform(GiveUniqueNodeNames())
if exec_mode == "cppsim":
new_model = new_model.transform(PrepareCppSim())
new_model = new_model.transform(CompileCppSim())
new_model = new_model.transform(SetExecMode("cppsim"))
elif exec_mode == "rtlsim":
new_model = new_model.transform(SetExecMode("rtlsim"))
new_model = new_model.transform(GiveUniqueNodeNames())
new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
new_model = new_model.transform(HLSSynthIP())
new_model = new_model.transform(ReplaceVerilogRelPaths())
new_model = new_model.transform(PrepareRTLSim())
else:
raise Exception("Unknown exec_mode")
ctx_produced = oxe.execute_onnx(
new_model, input_dict, return_full_exec_context=True
)
y_produced = ctx_produced["outp"]
assert (y_produced == y_expected).all()
assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch"
# Copyright (c) 2020, Xilinx
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of FINN nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import pytest
import numpy as np
from onnx import TensorProto, helper
import finn.core.onnx_exec as oxe
from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.general import GiveUniqueNodeNames
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.util.basic import gen_finn_dt_tensor
from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
ReplaceVerilogRelPaths,
)
def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs):
NumChannels = C.shape[0]
inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, vecs + [NumChannels])
outp = helper.make_tensor_value_info(
"outp", TensorProto.FLOAT, vecs + [NumChannels]
)
node_inp_list = ["inp", "const"]
node = helper.make_node(
"ChannelwiseOp_Batch",
node_inp_list,
["outp"],
domain="finn",
backend="fpgadataflow",
NumChannels=NumChannels,
Func=func,
PE=pe,
inputDataType=idt.name,
outputDataType=odt.name,
paramDataType=pdt.name,
numInputVectors=vecs,
)
graph = helper.make_graph(nodes=[node], name="graph", inputs=[inp], outputs=[outp])
model = helper.make_model(graph, producer_name="model")
model = ModelWrapper(model)
model.set_tensor_datatype("inp", idt)
model.set_tensor_datatype("outp", odt)
model.set_tensor_datatype("const", idt)
model.set_initializer("const", C)
return model
# activation: None or DataType
@pytest.mark.parametrize("act", [DataType.INT8])
# input datatype
@pytest.mark.parametrize("idt", [DataType.INT4])
# param datatype
@pytest.mark.parametrize("pdt", [DataType.INT4])
# folding, -1 is maximum possible
@pytest.mark.parametrize("nf", [-1, 2])
# number of input features
@pytest.mark.parametrize("ich", [16])
# vecs
@pytest.mark.parametrize("vecs", [[1], [1, 7, 7]])
# function
@pytest.mark.parametrize("func", ["add", "mul"])
# execution mode
@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
@pytest.mark.vivado
@pytest.mark.slow
def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_mode):
if nf == -1:
nf = ich
pe = ich // nf
assert ich % pe == 0
# generate input and param data
x = gen_finn_dt_tensor(idt, tuple(vecs + [ich]))
# C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32)
C = gen_finn_dt_tensor(pdt, (ich))
odt = act
model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs)
if exec_mode == "cppsim":
model = model.transform(PrepareCppSim())
model = model.transform(CompileCppSim())
model = model.transform(SetExecMode("cppsim"))
elif exec_mode == "rtlsim":
model = model.transform(SetExecMode("rtlsim"))
model = model.transform(GiveUniqueNodeNames())
model = model.transform(PrepareIP("xc7z020clg400-1", 5))
model = model.transform(HLSSynthIP())
model = model.transform(ReplaceVerilogRelPaths())
model = model.transform(PrepareRTLSim())
else:
raise Exception("Unknown exec_mode")
# package input data as dictionary
input_dict = {"inp": x}
oshape = model.get_tensor_shape("outp")
C_reshaped = np.broadcast_to(C.flatten(), x.shape)
if func == "add":
y = x + C_reshaped
elif func == "mul":
y = x * C_reshaped
y_expected = y.reshape(oshape)
# execute model
y_produced = oxe.execute_onnx(model, input_dict)["outp"]
y_produced = y_produced.reshape(y_expected.shape)
assert (y_produced == y_expected).all(), "cppsim failed"
if exec_mode == "rtlsim":
hls_synt_res_est = model.analysis(hls_synth_res_estimation)
assert "ChannelwiseOp_Batch_0" in hls_synt_res_est
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment