Skip to content
Snippets Groups Projects
Commit 9f870164 authored by Felix Jentzsch's avatar Felix Jentzsch
Browse files

[VVAU] Fix BIPOLAR/TERNARY compatibility

parent c0cda8f7
No related branches found
No related tags found
No related merge requests found
...@@ -225,9 +225,9 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -225,9 +225,9 @@ class VectorVectorActivation(HLSCustomOp):
def get_instream_width(self, ind=0): def get_instream_width(self, ind=0):
i_bits = self.get_input_datatype().bitwidth() i_bits = self.get_input_datatype().bitwidth()
simd = self.get_nodeattr("SIMD") simd = self.get_nodeattr("SIMD")
#if simd > 1: # if simd > 1:
#pe = self.get_nodeattr("Channels") # pe = self.get_nodeattr("Channels")
#else: # else:
pe = self.get_nodeattr("PE") pe = self.get_nodeattr("PE")
in_width = i_bits * simd * pe in_width = i_bits * simd * pe
return in_width return in_width
...@@ -242,9 +242,9 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -242,9 +242,9 @@ class VectorVectorActivation(HLSCustomOp):
dim_h, dim_w = self.get_nodeattr("Dim") dim_h, dim_w = self.get_nodeattr("Dim")
ch = self.get_nodeattr("Channels") ch = self.get_nodeattr("Channels")
simd = self.get_nodeattr("SIMD") simd = self.get_nodeattr("SIMD")
#if simd > 1: # if simd > 1:
#pe = self.get_nodeattr("Channels") # pe = self.get_nodeattr("Channels")
#else: # else:
pe = self.get_nodeattr("PE") pe = self.get_nodeattr("PE")
sf = k_h * k_w // simd sf = k_h * k_w // simd
nf = ch // pe nf = ch // pe
...@@ -351,6 +351,9 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -351,6 +351,9 @@ class VectorVectorActivation(HLSCustomOp):
), """Weights matrix doesn't ), """Weights matrix doesn't
have expected shape (channels, 1, kernel_size, kernel_size)""" have expected shape (channels, 1, kernel_size, kernel_size)"""
ret = orig_weight_matrix ret = orig_weight_matrix
if self.get_weight_datatype() == DataType["BIPOLAR"]:
# convert bipolar to binary
ret = (ret + 1) / 2
ret = ret.reshape(ch, k_h * k_w) ret = ret.reshape(ch, k_h * k_w)
# distribute rows between PEs # distribute rows between PEs
ret = interleave_matrix_outer_dim_from_partitions(ret, pe) ret = interleave_matrix_outer_dim_from_partitions(ret, pe)
...@@ -649,6 +652,12 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -649,6 +652,12 @@ class VectorVectorActivation(HLSCustomOp):
not float32 as expected.""" not float32 as expected."""
expected_inp_shape = self.get_folded_input_shape() expected_inp_shape = self.get_folded_input_shape()
reshaped_input = context[inputs].reshape(expected_inp_shape) reshaped_input = context[inputs].reshape(expected_inp_shape)
if self.get_input_datatype() == DataType["BIPOLAR"]:
# store bipolar activations as binary
reshaped_input = (reshaped_input + 1) / 2
export_idt = DataType["BINARY"]
else:
export_idt = self.get_input_datatype()
# make copy before saving the array # make copy before saving the array
reshaped_input = reshaped_input.copy() reshaped_input = reshaped_input.copy()
np.save( np.save(
...@@ -664,14 +673,20 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -664,14 +673,20 @@ class VectorVectorActivation(HLSCustomOp):
super().exec_precompiled_singlenode_model() super().exec_precompiled_singlenode_model()
# load output npy file # load output npy file
super().npy_to_dynamic_output(context) super().npy_to_dynamic_output(context)
# reinterpret binary output as bipolar where needed
if self.get_output_datatype() == DataType["BIPOLAR"]:
out = context[node.output[0]]
out = 2 * out - 1
context[node.output[0]] = out
assert ( assert (
context[node.output[0]].shape == self.get_normal_output_shape() context[node.output[0]].shape == self.get_normal_output_shape()
), "cppsim did not produce expected output shape" ), "cppsim did not produce expected output shape"
elif mode == "rtlsim": elif mode == "rtlsim":
sim = self.get_rtlsim() sim = self.get_rtlsim()
nbits = self.get_instream_width() nbits = self.get_instream_width()
idt = self.get_input_datatype() inp = npy_to_rtlsim_input(
inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), idt, nbits) "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
)
super().reset_rtlsim(sim) super().reset_rtlsim(sim)
super().toggle_clk(sim) super().toggle_clk(sim)
...@@ -756,6 +771,9 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -756,6 +771,9 @@ class VectorVectorActivation(HLSCustomOp):
def read_npy_data(self): def read_npy_data(self):
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
dtype = self.get_input_datatype() dtype = self.get_input_datatype()
if dtype == DataType["BIPOLAR"]:
# use binary for bipolar storage
dtype = DataType["BINARY"]
elem_bits = dtype.bitwidth() elem_bits = dtype.bitwidth()
packed_bits = self.get_instream_width() packed_bits = self.get_instream_width()
packed_hls_type = "ap_uint<%d>" % packed_bits packed_hls_type = "ap_uint<%d>" % packed_bits
...@@ -826,6 +844,11 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -826,6 +844,11 @@ class VectorVectorActivation(HLSCustomOp):
) )
] ]
elif mem_mode == "decoupled" or mem_mode == "external": elif mem_mode == "decoupled" or mem_mode == "external":
simd = self.get_nodeattr("SIMD")
if simd > 1:
raise Exception(
"SIMD parallelism not supported for decoupled or external mode"
)
wdt = self.get_weight_datatype() wdt = self.get_weight_datatype()
if wdt == DataType["BIPOLAR"]: if wdt == DataType["BIPOLAR"]:
export_wdt = DataType["BINARY"] export_wdt = DataType["BINARY"]
...@@ -853,6 +876,9 @@ class VectorVectorActivation(HLSCustomOp): ...@@ -853,6 +876,9 @@ class VectorVectorActivation(HLSCustomOp):
def dataoutstrm(self): def dataoutstrm(self):
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
dtype = self.get_output_datatype() dtype = self.get_output_datatype()
if dtype == DataType["BIPOLAR"]:
# use binary for bipolar storage
dtype = DataType["BINARY"]
elem_bits = dtype.bitwidth() elem_bits = dtype.bitwidth()
packed_bits = self.get_outstream_width() packed_bits = self.get_outstream_width()
packed_hls_type = "ap_uint<%d>" % packed_bits packed_hls_type = "ap_uint<%d>" % packed_bits
......
...@@ -220,7 +220,7 @@ def unpack_innermost_dim_from_hex_string( ...@@ -220,7 +220,7 @@ def unpack_innermost_dim_from_hex_string(
if conv_dtype == DataType["BIPOLAR"]: if conv_dtype == DataType["BIPOLAR"]:
ar_list = [2 * x - 1 for x in ar_list] ar_list = [2 * x - 1 for x in ar_list]
# interpret values as signed values # interpret values as signed values
elif conv_dtype.name.startswith("INT"): elif dtype.signed():
mask = 2 ** (conv_dtype.bitwidth() - 1) mask = 2 ** (conv_dtype.bitwidth() - 1)
ar_list = [-(x & mask) + (x & ~mask) for x in ar_list] ar_list = [-(x & mask) + (x & ~mask) for x in ar_list]
......
...@@ -27,30 +27,29 @@ ...@@ -27,30 +27,29 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import pytest import pytest
import numpy as np import numpy as np
from onnx import TensorProto, helper from onnx import TensorProto, helper
from qonnx.core.datatype import DataType from qonnx.core.datatype import DataType
from qonnx.core.modelwrapper import ModelWrapper from qonnx.core.modelwrapper import ModelWrapper
from qonnx.custom_op.general.multithreshold import multithreshold from qonnx.custom_op.general.multithreshold import multithreshold
from qonnx.custom_op.registry import getCustomOp
# from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.general import GiveUniqueNodeNames
from qonnx.util.basic import gen_finn_dt_tensor
from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.infer_shapes import InferShapes
from qonnx.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe import finn.core.onnx_exec as oxe
from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
# from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.minimize_accumulator_width import (
MinimizeAccumulatorWidth,
)
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.fpgadataflow.minimize_accumulator_width import (
MinimizeAccumulatorWidth,
)
def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels):
...@@ -110,7 +109,10 @@ def _make_single_vvau_modelwrapper( ...@@ -110,7 +109,10 @@ def _make_single_vvau_modelwrapper(
if T is not None: if T is not None:
no_act = 0 no_act = 0
node_inp_list = ["inp", "weights", "thresh"] node_inp_list = ["inp", "weights", "thresh"]
actval = odt.min() if odt == DataType["BIPOLAR"]:
actval = 0
else:
actval = odt.min()
else: else:
no_act = 1 no_act = 1
node_inp_list = ["inp", "weights"] node_inp_list = ["inp", "weights"]
...@@ -167,15 +169,15 @@ def prepare_inputs(input_tensor): ...@@ -167,15 +169,15 @@ def prepare_inputs(input_tensor):
# input datatype # input datatype
@pytest.mark.parametrize("idt", [DataType["UINT4"]]) @pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["UINT4"]])
# weight datatype # weight datatype
@pytest.mark.parametrize("wdt", [DataType["UINT4"]]) @pytest.mark.parametrize("wdt", [DataType["BIPOLAR"], DataType["UINT4"]])
# activation: None or DataType # activation: None or DataType
@pytest.mark.parametrize("act", [DataType["UINT4"], None]) @pytest.mark.parametrize("act", [DataType["BIPOLAR"], DataType["UINT4"], None])
# PE # PE
@pytest.mark.parametrize("pe", [1,2,3,6]) @pytest.mark.parametrize("pe", [1, 3, 6])
# SIMD # SIMD
@pytest.mark.parametrize("simd", [1,9]) @pytest.mark.parametrize("simd", [1, 9])
# Input image shape # Input image shape
@pytest.mark.parametrize("dim_h", [10]) @pytest.mark.parametrize("dim_h", [10])
@pytest.mark.parametrize("dim_w", [10]) @pytest.mark.parametrize("dim_w", [10])
...@@ -187,7 +189,7 @@ def prepare_inputs(input_tensor): ...@@ -187,7 +189,7 @@ def prepare_inputs(input_tensor):
# memory mode # memory mode
@pytest.mark.parametrize("mem_mode", ["const"]) @pytest.mark.parametrize("mem_mode", ["const"])
# execution mode # execution mode
@pytest.mark.parametrize("exec_mode", ["cppsim","rtlsim"]) @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
@pytest.mark.fpgadataflow @pytest.mark.fpgadataflow
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.vivado @pytest.mark.vivado
...@@ -203,9 +205,6 @@ def test_fpgadataflow_vvau( ...@@ -203,9 +205,6 @@ def test_fpgadataflow_vvau(
if channels % pe != 0: if channels % pe != 0:
pytest.skip("Requirement Channels divisable by PE is violated.") pytest.skip("Requirement Channels divisable by PE is violated.")
#if pe < channels and simd > 1:
# pytest.skip("Do not apply SIMD parallelism before max PE parallelism")
# Generate weights in expected shape for ONNX and HLS node # Generate weights in expected shape for ONNX and HLS node
W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k]
W_onnx = _infer_sparse_weight_tensor( W_onnx = _infer_sparse_weight_tensor(
...@@ -221,14 +220,23 @@ def test_fpgadataflow_vvau( ...@@ -221,14 +220,23 @@ def test_fpgadataflow_vvau(
if act is None: if act is None:
T = None T = None
tdt = None tdt = None
odt = DataType["INT32"] if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
odt = DataType["UINT32"]
else:
odt = DataType["INT32"]
else: else:
odt = act odt = act
(min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w * channels) (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w)
n_steps = act.get_num_possible_values() - 1 n_steps = act.get_num_possible_values() - 1
T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32) T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32)
T = np.sort(T, axis=1) T = np.sort(T, axis=1)
tdt = DataType["INT32"] if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
tdt = DataType["UINT32"]
# bias thresholds to be positive
T = np.ceil((T + (k_h * k_w)) / 2)
assert (T >= 0).all()
else:
tdt = DataType["INT32"]
model = _make_single_vvau_modelwrapper( model = _make_single_vvau_modelwrapper(
W, pe, simd, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt, mem_mode W, pe, simd, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt, mem_mode
...@@ -250,14 +258,25 @@ def test_fpgadataflow_vvau( ...@@ -250,14 +258,25 @@ def test_fpgadataflow_vvau(
input_dict = prepare_inputs(x_vvau) input_dict = prepare_inputs(x_vvau)
# Calculate output # Calculate output
y_expected = np.matmul(x, W_onnx) # Y is in [N, H, W, C] format if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
# Simulate XNOR-popcount matrix multiplication, see
# qonnx.custom_op.general.xnorpopcount (not usable due to sparse W)
y_expected = np.matmul(x, W_onnx)
y_expected = (y_expected + (k_h * k_w)) / 2
else:
y_expected = np.matmul(x, W_onnx) # Y is in [N, H, W, C] format
if T is not None: if T is not None:
# Reshape Y, as multithreshold expects Y to be in [N, C, H, W] format # Reshape Y, as multithreshold expects Y to be in [N, C, H, W] format
y_expected = np.transpose(y_expected, (0, 3, 1, 2)) y_expected = np.transpose(y_expected, (0, 3, 1, 2))
y_expected = multithreshold(y_expected, T) y_expected = multithreshold(y_expected, T)
y_expected = np.transpose(y_expected, (0, 2, 3, 1)) y_expected = np.transpose(y_expected, (0, 2, 3, 1))
# signed offset if act == DataType["BIPOLAR"]:
y_expected += act.min() # binary to bipolar
y_expected = 2 * y_expected - 1
else:
# signed offset
y_expected += act.min()
y_produced = oxe.execute_onnx(model, input_dict, return_full_exec_context=False)[ y_produced = oxe.execute_onnx(model, input_dict, return_full_exec_context=False)[
"outp" "outp"
...@@ -265,11 +284,11 @@ def test_fpgadataflow_vvau( ...@@ -265,11 +284,11 @@ def test_fpgadataflow_vvau(
assert (y_produced == y_expected).all(), "incorrect result" assert (y_produced == y_expected).all(), "incorrect result"
# if exec_mode == "rtlsim": if exec_mode == "rtlsim":
# node = model.get_nodes_by_op_type("VectorVectorActivation")[0] node = model.get_nodes_by_op_type("VectorVectorActivation")[0]
# inst = getCustomOp(node) inst = getCustomOp(node)
# cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
# exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles_dict = model.analysis(exp_cycles_per_layer)
# exp_cycles = exp_cycles_dict[node.name] exp_cycles = exp_cycles_dict[node.name]
# assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
# assert exp_cycles != 0 assert exp_cycles != 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment