[Test] ConvInputGen+VVAU node test to check behaviour of depthwise convolution...

[Test] ConvInputGen+VVAU node test to check behaviour of depthwise convolution in hls layers and update VVAU node

[Test] ConvInputGen+VVAU node test to check behaviour of depthwise convolution...
[Test] ConvInputGen+VVAU node test to check behaviour of depthwise convolution in hls layers and update VVAU node
12621cc8 · auphelia · b0e9a1d3 · 12621cc8 · 12621cc8
Commit 12621cc8 authored 4 years ago by auphelia
--- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
+++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
@@ -32,12 +32,6 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
            "outputDataType": ("s", True, ""),
            # no-activation mode (produce accumulators)
            "noActivation": ("i", False, 0),
-            # FPGA resource type for memories in decoupled mode
-            # auto -- let Vivado decide
-            # block -- use BRAM
-            # distributed -- use LUTRAM
-            # see also https://www.xilinx.com/support/answers/38070.html
-            "ram_style": ("s", False, "auto"),
        }
        my_attrs.update(super().get_nodeattr_types())
        return my_attrs
@@ -154,7 +148,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
        inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
        wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
        # fill in TSrcI and TWeightI
-        # TODO handle non-bipolar binary inputs
+        # TODO handle bipolar inputs
        if inp_is_bipolar or wt_is_bipolar:
            raise Exception("VVAU node doesn't support bipolar values yet.")
        else:
@@ -178,10 +172,9 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
            k,
        ), """Weights matrix doesn't
        have expected shape (channels, 1, kernel_size, kernel_size)"""
-        # start by transposing the original weight matrix, since ONNX and
-        # finn-hlslib use different assumptions
        ret = orig_weight_matrix
        ret = ret.reshape(ch, k * k)
+        # distribute rows between PEs
        ret = interleave_matrix_outer_dim_from_partitions(ret, pe)
        ret = ret.reshape(1, pe, wmem, 1)
        return ret
@@ -218,24 +211,18 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
        weights = model.get_initializer(self.onnx_node.input[1])
        # convert weights into hlslib-compatible format
        weight_tensor = self.get_hls_compatible_weight_tensor(weights)
-        export_wdt = self.get_weight_datatype()
-        # we have converted bipolar weights to binary for export,
-        # so use it as such for weight generation
-        if self.get_weight_datatype() == DataType.BIPOLAR:
-            export_wdt = DataType.BINARY
+        wdt = self.get_weight_datatype()
        code_gen_dir = path

        """Saves weights into params.h"""
-        weight_hls_code = numpy_to_hls_code(
-            weight_tensor, export_wdt, "weights", True, True
-        )
+        weight_hls_code = numpy_to_hls_code(weight_tensor, wdt, "weights", True, True)
        # write weights into params.h
        f_weights = open("{}/params.h".format(code_gen_dir), "w")

-        if export_wdt.bitwidth() != 1:
+        if wdt.bitwidth() != 1:
            f_weights.write(
                "const FixedPointWeights<1,{},{},{}> weights = ".format(
-                    export_wdt.get_hls_datatype_str(),
+                    wdt.get_hls_datatype_str(),
                    self.get_nodeattr("PE"),
                    self.calc_wmem(),
                )
@@ -261,7 +248,6 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                # write thresholds into thresh.h
                f_thresh = open("{}/thresh.h".format(code_gen_dir), "w")
                tdt_hls = tdt.get_hls_datatype_str()
-                # use binary to export bipolar activations
                odt = self.get_output_datatype()
                odt_hls = odt.get_hls_datatype_str()
                f_thresh.write(
@@ -309,12 +295,6 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                not float32 as expected."""
                expected_inp_shape = self.get_folded_input_shape()
                reshaped_input = context[inputs].reshape(expected_inp_shape)
-                if self.get_input_datatype() == DataType.BIPOLAR:
-                    # store bipolar activations as binary
-                    reshaped_input = (reshaped_input + 1) / 2
-                    export_idt = DataType.BINARY
-                else:
-                    export_idt = self.get_input_datatype()
                # make copy before saving the array
                reshaped_input = reshaped_input.copy()
                np.save(
@@ -332,11 +312,6 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
            super().exec_precompiled_singlenode_model()
            # load output npy file
            super().npy_to_dynamic_output(context)
-            # reinterpret binary output as bipolar where needed
-            if self.get_output_datatype() == DataType.BIPOLAR:
-                out = context[node.output[0]]
-                out = 2 * out - 1
-                context[node.output[0]] = out
            assert (
                context[node.output[0]].shape == self.get_folded_output_shape()
            ), """Output shape is not as expected"""
@@ -346,9 +321,8 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
        elif mode == "rtlsim":
            sim = self.get_rtlsim()
            nbits = self.get_instream_width()
-            inp = npy_to_rtlsim_input(
-                "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
-            )
+            idt = self.get_input_datatype()
+            inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), idt, nbits)
            super().reset_rtlsim(sim)
            super().toggle_clk(sim)
            output = self.rtlsim(sim, inp)
@@ -396,9 +370,6 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
    def read_npy_data(self):
        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
        dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
-            # use binary for bipolar storage
-            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
        packed_bits = self.get_instream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -444,9 +415,6 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
    def dataoutstrm(self):
        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
        dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
-            # use binary for bipolar storage
-            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
        packed_bits = self.get_outstream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits

--- a/tests/fpgadataflow/test_depthwise_convolution.py
+++ b/tests/fpgadataflow/test_depthwise_convolution.py
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+import onnx.helper as oh
+from onnx import TensorProto
+import numpy as np
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.datatype import DataType
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.fpgadataflow.convert_to_hls_layers import (
+    InferConvInpGen,
+    InferVVAU,
+)
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+
+import finn.core.onnx_exec as oxe
+from finn.custom_op.im2col import compute_conv_output_dim
+from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
+from finn.custom_op.registry import getCustomOp
+
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
+    ReplaceVerilogRelPaths,
+)
+
+
+def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
+
+    # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold)
+    ofm_ch = ifm_ch
+    ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding)
+
+    if act is None:
+        odt = DataType.INT32
+    else:
+        odt = act
+        out_act = oh.make_tensor_value_info(
+            "out_act", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ofm_ch]
+        )
+        T = oh.make_tensor_value_info("T", TensorProto.FLOAT, [ofm_ch, 15])
+        tdt = DataType.INT32
+        thresh_node = oh.make_node(
+            "MultiThreshold",
+            domain="finn",
+            inputs=["outp", "T"],
+            outputs=["out_act"],
+            data_layout="NHWC",
+            out_dtype=odt.name,
+            out_scale=1.0,
+            out_bias=0.0,
+        )
+
+    # set up onnx model
+    inp = oh.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
+    )
+    outp = oh.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ofm_ch]
+    )
+
+    W_sparse = oh.make_tensor_value_info(
+        "W_sparse", TensorProto.FLOAT, [ifm_ch * k * k, ofm_ch]
+    )
+
+    im2col_node = oh.make_node(
+        "Im2Col",
+        domain="finn",
+        inputs=["inp"],
+        outputs=["im2col_out"],
+        kernel_size=k,
+        stride=stride,
+        pad_amount=padding,
+        input_shape="(1, {}, {}, {})".format(ifm_dim, ifm_dim, ifm_ch),
+        depthwise=1,
+    )
+
+    matmul_node = oh.make_node(
+        "MatMul", inputs=["im2col_out", "W_sparse"], outputs=["outp"]
+    )
+
+    if act is None:
+        node_list = [im2col_node, matmul_node]
+        global_out = outp
+        value_info = [W_sparse]
+    else:
+        node_list = [im2col_node, matmul_node, thresh_node]
+        global_out = out_act
+        value_info = [W_sparse, T]
+
+    graph = oh.make_graph(
+        nodes=node_list,
+        name="lowered_dw_cnv_graph",
+        inputs=[inp],
+        outputs=[global_out],
+        value_info=value_info,
+    )
+    model = oh.make_model(graph, producer_name="lowered_dw_cnv-model")
+    model = ModelWrapper(model)
+
+    # initialize model
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype(model.graph.output[0].name, odt)
+    model.set_tensor_datatype("W_sparse", wdt)
+
+    w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k])
+    # create sparse matrix
+    W_matrix = np.zeros((ofm_ch, ifm_ch, k, k))
+    for ch in range(ifm_ch):
+        W_matrix[ch][ch] = w_tensor[ch][0]
+    W_matrix = W_matrix.astype(np.float32)
+    W_matrix = W_matrix.transpose(0, 2, 3, 1)
+    W_matrix = W_matrix.reshape(ofm_ch, ifm_ch * k * k)
+
+    model.set_initializer("W_sparse", W_matrix.T)
+    sparsity = {"dw": {"kernel_shape": k}}
+    model.set_tensor_sparsity("W_sparse", sparsity)
+
+    if act is not None:
+        (min, max) = calculate_signed_dot_prod_range(idt, wdt, ifm_ch * k * k)
+        n_steps = odt.get_num_possible_values() - 1
+        T_values = np.random.randint(min, max - 1, (ofm_ch, n_steps)).astype(np.float32)
+        # provide non-decreasing thresholds
+        T_values = np.sort(T_values, axis=1)
+        model.set_initializer("T", T_values)
+        model.set_tensor_datatype("T", tdt)
+
+    model = model.transform(InferShapes())
+
+    return model
+
+
+# PE
+@pytest.mark.parametrize("pe", [1, 2, 4])
+# Output activation
+@pytest.mark.parametrize("act", [None, DataType.UINT4])
+# kernel size
+@pytest.mark.parametrize("k", [2, 4])
+# stride
+@pytest.mark.parametrize("stride", [1, 2])
+# padding
+@pytest.mark.parametrize("padding", [0, 1])
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding):
+    idt = wdt = DataType.INT4
+    ifm_dim = 6
+    ifm_ch = 4
+
+    # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold)
+    model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding)
+
+    input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch])
+    input_dict = {"inp": input_tensor}
+
+    new_model = model.transform(InferConvInpGen())
+    new_model = new_model.transform(InferVVAU())
+
+    # set SIMD in ConvInputGen node and PE in VVAU node
+
+    for n in new_model.graph.node:
+        if n.op_type == "ConvolutionInputGenerator":
+            convinputgen_node = getCustomOp(n)
+            convinputgen_node.set_nodeattr("SIMD", pe)
+        elif n.op_type == "Vector_Vector_Activate_Batch":
+            vvau_node = getCustomOp(n)
+            vvau_node.set_nodeattr("PE", pe)
+    new_model = new_model.transform(SetExecMode("cppsim"))
+    new_model = new_model.transform(PrepareCppSim())
+    new_model = new_model.transform(CompileCppSim())
+
+    assert oxe.compare_execution(model, new_model, input_dict)
+
+
+# PE
+@pytest.mark.parametrize("pe", [1, 2, 4])
+# Output activation
+@pytest.mark.parametrize("act", [None, DataType.UINT4])
+# kernel size
+@pytest.mark.parametrize("k", [2, 4])
+# stride
+@pytest.mark.parametrize("stride", [1, 2])
+# padding
+@pytest.mark.parametrize("padding", [0, 1])
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding):
+    idt = wdt = DataType.INT4
+    ifm_dim = 6
+    ifm_ch = 4
+
+    # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold)
+    model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding)
+
+    input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch])
+    input_dict = {"inp": input_tensor}
+
+    new_model = model.transform(InferConvInpGen())
+    new_model = new_model.transform(InferVVAU())
+
+    # set SIMD in ConvInputGen node and PE in VVAU node
+
+    for n in new_model.graph.node:
+        if n.op_type == "ConvolutionInputGenerator":
+            convinputgen_node = getCustomOp(n)
+            convinputgen_node.set_nodeattr("SIMD", pe)
+        elif n.op_type == "Vector_Vector_Activate_Batch":
+            vvau_node = getCustomOp(n)
+            vvau_node.set_nodeattr("PE", pe)
+
+    new_model = new_model.transform(SetExecMode("rtlsim"))
+    new_model = new_model.transform(GiveUniqueNodeNames())
+    new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
+    new_model = new_model.transform(HLSSynthIP())
+    new_model = new_model.transform(ReplaceVerilogRelPaths())
+    new_model = new_model.transform(PrepareRTLSim())
+
+    assert oxe.compare_execution(model, new_model, input_dict)