diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 813b673b396f3f63e6eb275c7ac9d7f2eb1eef56..6d4b5fb9e64ebdd3b491e4cddd26b35eca50cbee 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -225,9 +225,9 @@ class VectorVectorActivation(HLSCustomOp): def get_instream_width(self, ind=0): i_bits = self.get_input_datatype().bitwidth() simd = self.get_nodeattr("SIMD") - #if simd > 1: - #pe = self.get_nodeattr("Channels") - #else: + # if simd > 1: + # pe = self.get_nodeattr("Channels") + # else: pe = self.get_nodeattr("PE") in_width = i_bits * simd * pe return in_width @@ -242,9 +242,9 @@ class VectorVectorActivation(HLSCustomOp): dim_h, dim_w = self.get_nodeattr("Dim") ch = self.get_nodeattr("Channels") simd = self.get_nodeattr("SIMD") - #if simd > 1: - #pe = self.get_nodeattr("Channels") - #else: + # if simd > 1: + # pe = self.get_nodeattr("Channels") + # else: pe = self.get_nodeattr("PE") sf = k_h * k_w // simd nf = ch // pe @@ -351,6 +351,9 @@ class VectorVectorActivation(HLSCustomOp): ), """Weights matrix doesn't have expected shape (channels, 1, kernel_size, kernel_size)""" ret = orig_weight_matrix + if self.get_weight_datatype() == DataType["BIPOLAR"]: + # convert bipolar to binary + ret = (ret + 1) / 2 ret = ret.reshape(ch, k_h * k_w) # distribute rows between PEs ret = interleave_matrix_outer_dim_from_partitions(ret, pe) @@ -649,6 +652,12 @@ class VectorVectorActivation(HLSCustomOp): not float32 as expected.""" expected_inp_shape = self.get_folded_input_shape() reshaped_input = context[inputs].reshape(expected_inp_shape) + if self.get_input_datatype() == DataType["BIPOLAR"]: + # store bipolar activations as binary + reshaped_input = (reshaped_input + 1) / 2 + export_idt = DataType["BINARY"] + else: + export_idt = self.get_input_datatype() # make copy before saving the array reshaped_input = reshaped_input.copy() np.save( @@ -664,14 +673,20 @@ class VectorVectorActivation(HLSCustomOp): super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_output(context) + # reinterpret binary output as bipolar where needed + if self.get_output_datatype() == DataType["BIPOLAR"]: + out = context[node.output[0]] + out = 2 * out - 1 + context[node.output[0]] = out assert ( context[node.output[0]].shape == self.get_normal_output_shape() ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() - idt = self.get_input_datatype() - inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), idt, nbits) + inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) super().reset_rtlsim(sim) super().toggle_clk(sim) @@ -756,6 +771,9 @@ class VectorVectorActivation(HLSCustomOp): def read_npy_data(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") dtype = self.get_input_datatype() + if dtype == DataType["BIPOLAR"]: + # use binary for bipolar storage + dtype = DataType["BINARY"] elem_bits = dtype.bitwidth() packed_bits = self.get_instream_width() packed_hls_type = "ap_uint<%d>" % packed_bits @@ -826,6 +844,11 @@ class VectorVectorActivation(HLSCustomOp): ) ] elif mem_mode == "decoupled" or mem_mode == "external": + simd = self.get_nodeattr("SIMD") + if simd > 1: + raise Exception( + "SIMD parallelism not supported for decoupled or external mode" + ) wdt = self.get_weight_datatype() if wdt == DataType["BIPOLAR"]: export_wdt = DataType["BINARY"] @@ -853,6 +876,9 @@ class VectorVectorActivation(HLSCustomOp): def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") dtype = self.get_output_datatype() + if dtype == DataType["BIPOLAR"]: + # use binary for bipolar storage + dtype = DataType["BINARY"] elem_bits = dtype.bitwidth() packed_bits = self.get_outstream_width() packed_hls_type = "ap_uint<%d>" % packed_bits diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py index 65478d2540b53443d3f74b44a22fde3defd8ca93..f7ea2ff9430b1371760e8eb44b38c2c982c1c30a 100644 --- a/src/finn/util/data_packing.py +++ b/src/finn/util/data_packing.py @@ -220,7 +220,7 @@ def unpack_innermost_dim_from_hex_string( if conv_dtype == DataType["BIPOLAR"]: ar_list = [2 * x - 1 for x in ar_list] # interpret values as signed values - elif conv_dtype.name.startswith("INT"): + elif dtype.signed(): mask = 2 ** (conv_dtype.bitwidth() - 1) ar_list = [-(x & mask) + (x & ~mask) for x in ar_list] diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py index ea4be473344bdecd4b200472bfe546aeff864e21..a418de5728d73e22b67f1107ff842421ba680941 100644 --- a/tests/fpgadataflow/test_fpgadataflow_vvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py @@ -27,30 +27,29 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pytest + import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.multithreshold import multithreshold - -# from qonnx.custom_op.registry import getCustomOp +from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import gen_finn_dt_tensor import finn.core.onnx_exec as oxe - -# from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer +from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.minimize_accumulator_width import ( + MinimizeAccumulatorWidth, +) from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode -from finn.transformation.fpgadataflow.minimize_accumulator_width import ( - MinimizeAccumulatorWidth, -) def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): @@ -110,7 +109,10 @@ def _make_single_vvau_modelwrapper( if T is not None: no_act = 0 node_inp_list = ["inp", "weights", "thresh"] - actval = odt.min() + if odt == DataType["BIPOLAR"]: + actval = 0 + else: + actval = odt.min() else: no_act = 1 node_inp_list = ["inp", "weights"] @@ -167,15 +169,15 @@ def prepare_inputs(input_tensor): # input datatype -@pytest.mark.parametrize("idt", [DataType["UINT4"]]) +@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["UINT4"]]) # weight datatype -@pytest.mark.parametrize("wdt", [DataType["UINT4"]]) +@pytest.mark.parametrize("wdt", [DataType["BIPOLAR"], DataType["UINT4"]]) # activation: None or DataType -@pytest.mark.parametrize("act", [DataType["UINT4"], None]) +@pytest.mark.parametrize("act", [DataType["BIPOLAR"], DataType["UINT4"], None]) # PE -@pytest.mark.parametrize("pe", [1,2,3,6]) +@pytest.mark.parametrize("pe", [1, 3, 6]) # SIMD -@pytest.mark.parametrize("simd", [1,9]) +@pytest.mark.parametrize("simd", [1, 9]) # Input image shape @pytest.mark.parametrize("dim_h", [10]) @pytest.mark.parametrize("dim_w", [10]) @@ -187,7 +189,7 @@ def prepare_inputs(input_tensor): # memory mode @pytest.mark.parametrize("mem_mode", ["const"]) # execution mode -@pytest.mark.parametrize("exec_mode", ["cppsim","rtlsim"]) +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado @@ -203,9 +205,6 @@ def test_fpgadataflow_vvau( if channels % pe != 0: pytest.skip("Requirement Channels divisable by PE is violated.") - #if pe < channels and simd > 1: - # pytest.skip("Do not apply SIMD parallelism before max PE parallelism") - # Generate weights in expected shape for ONNX and HLS node W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] W_onnx = _infer_sparse_weight_tensor( @@ -221,14 +220,23 @@ def test_fpgadataflow_vvau( if act is None: T = None tdt = None - odt = DataType["INT32"] + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + odt = DataType["UINT32"] + else: + odt = DataType["INT32"] else: odt = act - (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w * channels) + (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w) n_steps = act.get_num_possible_values() - 1 T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32) T = np.sort(T, axis=1) - tdt = DataType["INT32"] + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + tdt = DataType["UINT32"] + # bias thresholds to be positive + T = np.ceil((T + (k_h * k_w)) / 2) + assert (T >= 0).all() + else: + tdt = DataType["INT32"] model = _make_single_vvau_modelwrapper( W, pe, simd, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt, mem_mode @@ -250,14 +258,25 @@ def test_fpgadataflow_vvau( input_dict = prepare_inputs(x_vvau) # Calculate output - y_expected = np.matmul(x, W_onnx) # Y is in [N, H, W, C] format + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + # Simulate XNOR-popcount matrix multiplication, see + # qonnx.custom_op.general.xnorpopcount (not usable due to sparse W) + y_expected = np.matmul(x, W_onnx) + y_expected = (y_expected + (k_h * k_w)) / 2 + else: + y_expected = np.matmul(x, W_onnx) # Y is in [N, H, W, C] format + if T is not None: # Reshape Y, as multithreshold expects Y to be in [N, C, H, W] format y_expected = np.transpose(y_expected, (0, 3, 1, 2)) y_expected = multithreshold(y_expected, T) y_expected = np.transpose(y_expected, (0, 2, 3, 1)) - # signed offset - y_expected += act.min() + if act == DataType["BIPOLAR"]: + # binary to bipolar + y_expected = 2 * y_expected - 1 + else: + # signed offset + y_expected += act.min() y_produced = oxe.execute_onnx(model, input_dict, return_full_exec_context=False)[ "outp" @@ -265,11 +284,11 @@ def test_fpgadataflow_vvau( assert (y_produced == y_expected).all(), "incorrect result" - # if exec_mode == "rtlsim": - # node = model.get_nodes_by_op_type("VectorVectorActivation")[0] - # inst = getCustomOp(node) - # cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") - # exp_cycles_dict = model.analysis(exp_cycles_per_layer) - # exp_cycles = exp_cycles_dict[node.name] - # assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) - # assert exp_cycles != 0 + if exec_mode == "rtlsim": + node = model.get_nodes_by_op_type("VectorVectorActivation")[0] + inst = getCustomOp(node) + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + exp_cycles_dict = model.analysis(exp_cycles_per_layer) + exp_cycles = exp_cycles_dict[node.name] + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) + assert exp_cycles != 0