From 8497814bf2834060cef05bf6832497e022b2e881 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu <yamanu@xilinx.com> Date: Sun, 17 Jan 2021 01:51:00 +0100 Subject: [PATCH] Switch hlslib comparison functions (#263) * [Test] add lfc to end2end tests * [Deps] update hlslib to get comp:: fxns * [HLS] use comp:: comparators instead of std:: * [VVAU] hlslib now uses inner prod dim instead of K * [Thres] manually workaround vivado_hls bug for T[0][0]=0 --- docker/finn_entrypoint.sh | 2 +- .../fpgadataflow/channelwise_op_batch.py | 2 +- .../fpgadataflow/streamingfclayer_batch.py | 9 ++++++- src/finn/custom_op/fpgadataflow/templates.py | 2 +- .../fpgadataflow/thresholding_batch.py | 9 ++++++- .../vector_vector_activate_batch.py | 17 +++++++++--- src/finn/util/test.py | 1 + tests/end2end/test_end2end_bnn_pynq.py | 27 ++++++++++++++++++- .../test_fpgadataflow_thresholding.py | 4 +++ 9 files changed, 63 insertions(+), 10 deletions(-) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index ee8d88789..1ed8875e8 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -15,7 +15,7 @@ gecho () { FINN_BASE_COMMIT=1363981654009067790d5f2d0c3dd303b5fa05cb BREVITAS_COMMIT=aff49758ec445d77c75721c7de3091a2a1797ca8 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 -HLSLIB_COMMIT=cfafe11a93b79ab1af7529d68f08886913a6466e +HLSLIB_COMMIT=2e49322d1bbc4969ca293843bda1f3f9c05456fc PYVERILATOR_COMMIT=e2ff74030de3992dcac54bf1b6aad2915946e8cb OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py index 635f37d56..097ec336f 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py @@ -363,7 +363,7 @@ class ChannelwiseOp_Batch(HLSCustomOp): # get desired function func = self.get_nodeattr("Func") if func == "cmp_le": - func_str = "std::less_equal" + func_str = "comp::less_equal" elif func == "cmp_ge": func_str = "std::greater_equal" elif func == "add": diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 118d66822..8868002c9 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -643,6 +643,13 @@ class StreamingFCLayer_Batch(HLSCustomOp): # ensure all thresholds are integer assert (orig_thres_matrix.astype(np.int32) == orig_thres_matrix).all() ret = orig_thres_matrix + # workaround for vivado_hls threshold bug + if ret[0][0] == 0: + ret = np.copy(ret) + ret[0][0] = 1 + warnings.warn( + "Setting 0-valued first threshold to 1 to avoid vivado_hls bug" + ) # ensure channels = mh , duplicating if necessary if ret.shape[0] == 1: ret = np.tile(ret, (mh, 1)) @@ -846,7 +853,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): tdt_hls, odt_hls, self.get_nodeattr("ActVal"), - "std::less_equal<%s>" % tdt_hls, + "comp::less_equal<%s>" % tdt_hls, ) ) f_thresh.write(thresholds_hls_code) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 48ba43419..40221ce3b 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -372,7 +372,7 @@ void Thresholding_Stream_Batch(hls::stream<TI> &in, // alternatively: number of vertical matrix chunks unsigned const NF = NumChannels / PE; - ThresholdsActivation<1, PE, NumSteps, TT, TO, ActVal, std::less_equal<TT>> internal_thr; + ThresholdsActivation<1, PE, NumSteps, TT, TO, ActVal, comp::less_equal<TT>> internal_thr; #pragma HLS ARRAY_PARTITION variable=internal_thr.m_thresholds complete dim=0 // everything merged into a common iteration space (one "big" loop instead diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index ede3a2e58..30374a7d9 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -334,6 +334,13 @@ class Thresholding_Batch(HLSCustomOp): np.mod(orig_thres_matrix, 1), 0 ).all(), "Need int threshold tensor" ret = orig_thres_matrix + # workaround for vivado_hls threshold bug + if ret[0][0] == 0: + ret = np.copy(ret) + ret[0][0] = 1 + warnings.warn( + "Setting 0-valued first threshold to 1 to avoid vivado_hls bug" + ) # ensure channels = mh , duplicating if necessary if ret.shape[0] == 1: ret = np.tile(ret, (mh, 1)) @@ -394,7 +401,7 @@ class Thresholding_Batch(HLSCustomOp): tdt_hls, odt_hls, self.get_nodeattr("ActVal"), - "std::less_equal<%s>" % tdt_hls, + "comp::less_equal<%s>" % tdt_hls, ) ) f_thresh.write(thresholds_hls_code) diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py index c7e6466bc..9a897d9fa 100644 --- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py +++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py @@ -278,6 +278,13 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): not as expected (2).""" n_thres_steps = orig_thres_matrix.shape[1] ret = orig_thres_matrix + # workaround for vivado_hls threshold bug + if ret[0][0] == 0: + ret = np.copy(ret) + ret[0][0] = 1 + warnings.warn( + "Setting 0-valued first threshold to 1 to avoid vivado_hls bug" + ) # distribute rows between PEs ret = interleave_matrix_outer_dim_from_partitions(ret, pe) assert ( @@ -352,7 +359,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): tdt_hls, odt_hls, self.get_nodeattr("ActVal"), - "std::less_equal<%s>" % tdt_hls, + "comp::less_equal<%s>" % tdt_hls, ) ) f_thresh.write(thresholds_hls_code) @@ -450,11 +457,13 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): def defines(self, var): dim = self.get_nodeattr("Dim") numReps = 1 * dim * dim + kernel = self.get_nodeattr("Kernel") + innerProdDim = kernel * kernel self.code_gen_dict["$DEFINES$"] = [ - """#define Channels1 {}\n #define Kernel1 {}\n + """#define Channels1 {}\n #define InnerProdDim {}\n #define SIMD1 1\n #define PE1 {}\n #define numReps {}""".format( self.get_nodeattr("Channels"), - self.get_nodeattr("Kernel"), + innerProdDim, self.get_nodeattr("PE"), numReps, ) @@ -499,7 +508,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): threshs = "threshs" node = self.onnx_node self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<Channels1, Kernel1, SIMD1, PE1, 1, {}, {}, {}> + """{}<Channels1, InnerProdDim, SIMD1, PE1, 1, {}, {}, {}> (in0, out, weights, {}, numReps, {});""".format( node.op_type, tmpl_args["TSrcI"], diff --git a/src/finn/util/test.py b/src/finn/util/test.py index 498b8b551..0a3475178 100644 --- a/src/finn/util/test.py +++ b/src/finn/util/test.py @@ -159,6 +159,7 @@ def get_trained_network_and_ishape(topology, wbits, abits): topology_to_ishape = { "tfc": (1, 1, 28, 28), + "lfc": (1, 1, 28, 28), "cnv": (1, 3, 32, 32), } ishape = topology_to_ishape[topology] diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 8ed544e8d..5f54eeacf 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -150,6 +150,27 @@ def fold_tfc(model): return model +def fold_lfc(model): + fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + # (PE, SIMD, ramstyle) for each layer + config = [ + (32, 49, "block"), + (64, 32, "auto"), + (32, 64, "auto"), + (10, 8, "distributed"), + ] + for fcl, (pe, simd, ramstyle) in zip(fc_layers, config): + fcl_inst = getCustomOp(fcl) + fcl_inst.set_nodeattr("PE", pe) + fcl_inst.set_nodeattr("SIMD", simd) + fcl_inst.set_nodeattr("ram_style", ramstyle) + # set parallelism for input quantizer to be same as first layer's SIMD + inp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0] + inp_qnt = getCustomOp(inp_qnt_node) + inp_qnt.set_nodeattr("PE", 49) + return model + + def fold_cnv_large(model): fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # each tuple is (PE, SIMD) for a layer @@ -208,6 +229,8 @@ def fold_cnv_small(model): def get_folding_function(topology, wbits, abits): if "tfc" in topology: return fold_tfc + elif "lfc" in topology: + return fold_lfc elif "cnv" in topology: if wbits == 1 and abits == 1: return fold_cnv_large @@ -284,11 +307,13 @@ def topology2dataset(topology): @pytest.mark.parametrize("wbits", [1, 2]) @pytest.mark.parametrize("abits", [1, 2]) -@pytest.mark.parametrize("topology", ["tfc", "cnv"]) +@pytest.mark.parametrize("topology", ["lfc", "tfc", "cnv"]) class TestEnd2End: def test_export(self, topology, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits end2end network configs for now") + if topology == "lfc" and wbits > 1: + pytest.skip("Skipping non-existing lfc configs") (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits) chkpt_name = get_checkpoint_name(topology, wbits, abits, "export") bo.export_finn_onnx(model, ishape, chkpt_name) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index 5d46f4c3d..bbc7e8227 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -122,6 +122,10 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode): odt = act n_steps = act.get_num_possible_values() - 1 T = np.random.randint(idt.min(), idt.max() + 1, (ich, n_steps)).astype(np.float32) + # make the vivado_hls threshold bug appear (incorrect rtlsim result when first + # threshold of first channel is zero, while using BIPOLAR output) + if act == DataType.BIPOLAR: + T[0][0] = 0 # provide non-decreasing thresholds T = np.sort(T, axis=1) -- GitLab