From 68e9b60133bf9278fcd9763c41c82ccc88823ae2 Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Date: Tue, 20 Jul 2021 06:49:51 +0200 Subject: [PATCH] CMSIS-NN: Cleanup of CMSIS-DSP dependencies Left over CMSIS-DSP dependencies are removed For functions with just DSP extension optimization, the reference C implementation(if available) is picked for processors with Helium Technology as auto vectorization can potentially give a better performance. This affects only the non-TFLM functions. Change-Id: I8ededad1d34eeb27c1ac2d65b3250b85562cc0d4 --- CMSIS/NN/Include/arm_nnsupportfunctions.h | 23 +++++- .../Source/ActivationFunctions/arm_relu_q15.c | 8 +- .../Source/ActivationFunctions/arm_relu_q7.c | 12 +-- .../arm_elementwise_add_s8.c | 9 +-- .../arm_elementwise_mul_s8.c | 6 +- .../arm_convolve_1x1_HWC_q7_fast_nonsquare.c | 6 +- .../arm_convolve_HWC_q15_basic.c | 6 +- .../arm_convolve_HWC_q15_fast.c | 6 +- .../arm_convolve_HWC_q15_fast_nonsquare.c | 6 +- .../arm_convolve_HWC_q7_RGB.c | 15 ++-- .../arm_convolve_HWC_q7_basic.c | 8 +- .../arm_convolve_HWC_q7_basic_nonsquare.c | 6 +- .../arm_convolve_HWC_q7_fast.c | 6 +- .../arm_convolve_HWC_q7_fast_nonsquare.c | 6 +- .../arm_depthwise_separable_conv_HWC_q7.c | 6 +- ...epthwise_separable_conv_HWC_q7_nonsquare.c | 6 +- .../arm_fully_connected_mat_q7_vec_q15.c | 6 +- .../arm_fully_connected_mat_q7_vec_q15_opt.c | 6 +- .../arm_fully_connected_q15.c | 6 +- .../arm_fully_connected_q15_opt.c | 8 +- .../arm_fully_connected_q7.c | 6 +- .../arm_fully_connected_q7_opt.c | 9 ++- .../arm_nn_accumulate_q7_to_q15.c | 21 +++--- .../Source/NNSupportFunctions/arm_nn_add_q7.c | 6 +- .../NNSupportFunctions/arm_nn_mult_q15.c | 73 +------------------ .../NNSupportFunctions/arm_nn_mult_q7.c | 47 +----------- .../arm_q7_to_q15_reordered_no_shift.c | 16 ++-- .../Source/PoolingFunctions/arm_max_pool_s8.c | 10 +-- .../Source/PoolingFunctions/arm_pool_q7_HWC.c | 18 ++--- CMSIS/NN/Tests/UnitTest/CMakeLists.txt | 1 + 30 files changed, 143 insertions(+), 225 deletions(-) diff --git a/CMSIS/NN/Include/arm_nnsupportfunctions.h b/CMSIS/NN/Include/arm_nnsupportfunctions.h index 669419f90..ee59d14c2 100644 --- a/CMSIS/NN/Include/arm_nnsupportfunctions.h +++ b/CMSIS/NN/Include/arm_nnsupportfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnsupportfunctions.h * Description: Public header file of support functions for CMSIS NN Library * - * $Date: 5. July 2021 - * $Revision: V.5.6.0 + * $Date: 20. July 2021 + * $Revision: V.5.7.0 * * Target Processor: Cortex-M CPUs * -------------------------------------------------------------------- */ @@ -48,6 +48,13 @@ extern "C" { #define CLAMP(x, h, l) MAX(MIN((x), (h)), (l)) #define REDUCE_MULTIPLIER(_mult) ((_mult < 0x7FFF0000) ? ((_mult + (1 << 15)) >> 16) : 0x7FFF) +/** + * @brief definition to pack four 8 bit values. + */ +#define PACK_Q7x4_32x1(v0, v1, v2, v3) \ + ((((int32_t)(v0) << 0) & (int32_t)0x000000FF) | (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ + (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | (((int32_t)(v3) << 24) & (int32_t)0xFF000000)) + /** * @brief Union for SIMD access of q31/q15/q7 types */ @@ -539,6 +546,18 @@ __STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7) return (val); } +/** + @brief Write four q7 to q7 pointer and increment pointer afterwards. + @param[in] in Double pointer to input value + @param[in] value Four bytes to copy + @return none + */ +__STATIC_FORCEINLINE void arm_nn_write_q7x4_ia(q7_t **in, q31_t value) +{ + memcpy(*in, &value, 4); + *in += 4; +} + /** * @brief memset optimized for MVE * @param[in, out] dst Destination pointer diff --git a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c index d62117c78..1d4ea4e08 100644 --- a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c +++ b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,7 +21,7 @@ * Title: arm_relu_q15.c * Description: Q15 version of ReLU * - * $Date: 09. October 2020 + * $Date: 20. July 2021 * $Revision: V.1.0.2 * * Target Processor: Cortex-M cores @@ -54,7 +54,7 @@ void arm_relu_q15(q15_t *data, uint16_t size) { -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for M cores with DSP extension */ uint16_t i = size >> 1; @@ -66,7 +66,7 @@ void arm_relu_q15(q15_t *data, uint16_t size) while (i) { - in = read_q15x2_ia(&input); + in = arm_nn_read_q15x2_ia((const q15_t **)&input); /* extract the first bit */ buf = __ROR(in & 0x80008000, 15); diff --git a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c index 75be35d99..a3163cddc 100644 --- a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c +++ b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_relu_q7.c * Description: Q7 version of ReLU * - * $Date: 09. October 2020 - * $Revision: V.1.0.3 + * $Date: 20. July 2021 + * $Revision: V.1.1.3 * * Target Processor: Cortex-M cores * @@ -54,7 +54,7 @@ void arm_relu_q7(q7_t *data, uint16_t size) { -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for M cores with DSP extension */ uint16_t i = size >> 2; @@ -66,7 +66,7 @@ void arm_relu_q7(q7_t *data, uint16_t size) while (i) { - in = read_q7x4_ia(&input); + in = arm_nn_read_q7x4_ia((const q7_t **)&input); /* extract the first bit */ buf = (int32_t)__ROR((uint32_t)in & 0x80808080, 7); @@ -74,7 +74,7 @@ void arm_relu_q7(q7_t *data, uint16_t size) /* if MSB=1, mask will be 0xFF, 0x0 otherwise */ mask = __QSUB8(0x00000000, buf); - write_q7x4_ia(&output, in & (~mask)); + arm_nn_write_q7x4_ia(&output, in & (~mask)); i--; } diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c index 85740edb8..6bade7ba6 100644 --- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c +++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c @@ -21,8 +21,8 @@ * Title: arm_elementwise_add_s8 * Description: Element wise add * - * $Date: 01. March 2021 - * $Revision: V.2.5.3 + * $Date: 20. July 2021 + * $Revision: V.2.5.4 * * Target Processor: Cortex-M CPUs * @@ -30,9 +30,6 @@ #include "arm_nnfunctions.h" #include "arm_nnsupportfunctions.h" -#if defined(ARM_MATH_MVEI) -#include "arm_helium_utils.h" -#endif #if defined(ARM_MATH_MVEI) #define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT) \ @@ -209,7 +206,7 @@ arm_status arm_elementwise_add_s8(const int8_t *input_1_vect, sum = MIN(sum, out_activation_max); r4 = (q7_t)sum; - write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4)); + arm_nn_write_q7x4_ia(&output, PACK_Q7x4_32x1(r1, r2, r3, r4)); loop_count--; } diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c index 7c560fe5c..3e3a63b90 100644 --- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c +++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c @@ -21,8 +21,8 @@ * Title: arm_elementwise_mul_s8 * Description: Element wise multiplication * - * $Date: January 26, 2021 - * $Revision: V.1.0.5 + * $Date: July 20, 2021 + * $Revision: V.1.0.6 * * Target Processor: Cortex-M cores * @@ -163,7 +163,7 @@ arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect, mul_res = MIN(mul_res, out_activation_max); r4 = (q7_t)mul_res; - write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4)); + arm_nn_write_q7x4_ia(&output, PACK_Q7x4_32x1(r1, r2, r3, r4)); loop_count--; } diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c index 6418707f1..3db3ba4c6 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c @@ -21,8 +21,8 @@ * Title: arm_convolve_1x1_HWC_q7_fast_nonsquare.c * Description: Fast Q7 version of 1x1 convolution (non-square shape) * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -100,7 +100,7 @@ arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ (void)dim_im_in_y; int16_t i_out_y, i_out_x; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c index e3502ebf4..0a6868a21 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q15_basic.c * Description: Q15 version of convolution * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -88,7 +88,7 @@ arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c index ac007e4a9..6a3222945 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q15_fast.c * Description: Fast Q15 version of convolution * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -93,7 +93,7 @@ arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; q15_t *pBuffer = bufferA; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c index 27947e848..7babe51ea 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q15_fast.c * Description: Fast Q15 version of convolution * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -103,7 +103,7 @@ arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; q15_t *pBuffer = bufferA; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c index 46e9a7788..618f49230 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q7_RGB.c * Description: Q7 version of convolution for RGB image * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -94,7 +94,7 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; @@ -122,8 +122,7 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in, if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) { /* Equivalent to arm_fill_q15(0, pBuffer, ch_im_in) with assumption: ch_im_in = 3 */ - *__SIMD32(pBuffer) = 0x0; - *(pBuffer + 2) = 0; + arm_memset_q7((q7_t *)pBuffer, (q7_t)0, 3 * sizeof(q15_t)); pBuffer += 3; } else @@ -155,7 +154,8 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in, * version 2, no weight shuffling required */ *pBuffer++ = top.half_words[0]; - *__SIMD32(pBuffer) = __PKHBT(bottom.word, top.word, 0); + int32_t packed_word = __PKHBT(bottom.word, top.word, 0); + arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4); #else /* * big-endian, | 1st | 2nd | 3rd | omit | @@ -169,7 +169,8 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in, * version 2, no weight shuffling required */ *pBuffer++ = bottom.half_words[0]; - *__SIMD32(pBuffer) = __PKHTB(top.word, bottom.word, 0); + int32_t packed_word = __PKHTB(top.word, bottom.word, 0); + arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4); #endif pBuffer += 2; } diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c index 942682e09..e274413ad 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q7_basic.c * Description: Q7 version of convolution * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -88,7 +88,7 @@ arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; @@ -182,7 +182,7 @@ arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in, } #else /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - + (void)bufferA; int i, j, k, l, m, n; int conv_out; int in_row, in_col; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c index cd9f78fd0..b42a57dca 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q7_basic.c * Description: Q7 version of convolution * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -87,7 +87,7 @@ arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c index bd9959f2c..51d98fd85 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q7_fast.c * Description: Fast Q7 version of convolution * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -105,7 +105,7 @@ arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c index 6ad061b10..25f17bb45 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c @@ -21,8 +21,8 @@ * Title: arm_convolve_HWC_q7_fast_nonsquare.c * Description: Fast Q7 version of convolution (non-sqaure shape) * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -93,7 +93,7 @@ arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c index de0ef8fec..729147fdc 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c @@ -21,8 +21,8 @@ * Title: arm_depthwise_separable_conv_HWC_q7.c * Description: Q7 depthwise separable convolution function * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -96,7 +96,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, q7_t *bufferB) { (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_out_y, i_out_x; diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c index 9cf89b303..829acf900 100644 --- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c +++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c @@ -21,8 +21,8 @@ * Title: arm_depthwise_separable_conv_HWC_q7_nonsquare.c * Description: Q7 depthwise separable convolution function (non-square shape) * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -95,7 +95,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in, (void)bufferB; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ /* diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c index fa9f775b2..9eb02ebe6 100644 --- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c +++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c @@ -21,8 +21,8 @@ * Title: arm_fully_connected_mat_q7_vec_q15.c * Description: Mixed Q15-Q7 fully-connected layer function * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -76,7 +76,7 @@ arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV, q15_t *vec_buffer) { (void)vec_buffer; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ const q7_t *pB = pM; diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c index 2826ac5f6..a2da77298 100644 --- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c +++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c @@ -21,8 +21,8 @@ * Title: arm_fully_connected_mat_q7_vec_q15_opt.c * Description: Mixed Q15-Q7 opt fully-connected layer function * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -125,7 +125,7 @@ arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV, { (void)vec_buffer; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ const q7_t *pB = pM; diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c index 67d70ec12..d8b6887b5 100644 --- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c +++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c @@ -21,8 +21,8 @@ * Title: arm_fully_connected_q15.c * Description: Q15 basic fully-connected layer function * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -73,7 +73,7 @@ arm_status arm_fully_connected_q15(const q15_t *pV, q15_t *vec_buffer) { (void)vec_buffer; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ const q15_t *pB = pM; diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c index 9de861825..f6c9b1699 100644 --- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c +++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_fully_connected_q15_opt.c * Description: Q15 opt fully-connected layer function * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -104,7 +104,7 @@ arm_status arm_fully_connected_q15_opt(const q15_t *pV, q15_t *vec_buffer) { (void)vec_buffer; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ const q15_t *pB = pM; diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c index 178102dac..d500efe9d 100644 --- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c +++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c @@ -21,8 +21,8 @@ * Title: arm_fully_connected_q7.c * Description: Q7 basic fully-connected layer function * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 + * $Date: July 20, 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -75,7 +75,7 @@ arm_status arm_fully_connected_q7(const q7_t *pV, q15_t *vec_buffer) { -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ const q7_t *pB = pM; diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c index 77c338636..2f3d6539e 100644 --- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c +++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_fully_connected_q7_opt.c * Description: Q7 basic fully-connected layer function * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -136,7 +136,7 @@ arm_status arm_fully_connected_q7_opt(const q7_t *pV, q15_t *vec_buffer) { -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ const q7_t *pB = pM; @@ -382,6 +382,7 @@ arm_status arm_fully_connected_q7_opt(const q7_t *pV, #else /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ + (void)vec_buffer; uint16_t rowCnt = num_of_rows >> 2; const q7_t *pB = pM; const q7_t *pA; diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c index 82c295281..c3f666aa2 100644 --- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c +++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_nn_accumulate_q7_to_q15.c * Description: Accumulate q7 vector into q15 one. * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 + * $Date: 20 July 2021 + * $Revision: V.1.1.2 * * pSrc Processor: Cortex-M CPUs * @@ -44,11 +44,13 @@ void arm_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length) { q15_t *pCnt = pDst; const q7_t *pV = pSrc; + int32_t count = length; +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) q31_t v1, v2, vo1, vo2; - int32_t cnt = length >> 2; + count = length >> 2; q31_t in; - while (cnt > 0l) + while (count > 0l) { q31_t value = arm_nn_read_q7x4_ia(&pV); v1 = __SXTB16(__ROR((uint32_t)value, 8)); @@ -67,13 +69,14 @@ void arm_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length) in = arm_nn_read_q15x2(pCnt); arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo2, in)); - cnt--; + count--; } - cnt = length & 0x3; - while (cnt > 0l) + count = length & 0x3; +#endif + while (count > 0l) { *pCnt++ += *pV++; - cnt--; + count--; } } diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c index 86cf5475c..511e58633 100644 --- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c +++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c @@ -21,8 +21,8 @@ * Title: arm_nn_add_q7.c * Description: Non saturating addition of elements of a q7 vector. * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -44,7 +44,7 @@ void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size) { uint32_t block_count; q31_t result = 0; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Loop unrolling: Compute 4 outputs at a time */ block_count = block_size >> 2U; diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c index 6c54618bb..d6a45efe4 100644 --- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c +++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_nn_mult_q15.c * Description: Q15 vector multiplication with variable output shifts * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 + * $Date: 20. July 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -55,72 +55,7 @@ void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize) { - uint32_t blkCnt; /* loop counters */ - -#if defined(ARM_MATH_DSP) - - /* Run the below code for Cortex-M4 and Cortex-M3 */ - q31_t inA1, inA2, inB1, inB2; /* temporary input variables */ - q15_t out1, out2, out3, out4; /* temporary output variables */ - q31_t mul1, mul2, mul3, mul4; /* temporary variables */ - - /* loop Unrolling */ - blkCnt = blockSize >> 2U; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* read two samples at a time from sourceA */ - inA1 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcA); - /* read two samples at a time from sourceB */ - inB1 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcB); - /* read two samples at a time from sourceA */ - inA2 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcA); - /* read two samples at a time from sourceB */ - inB2 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcB); - - /* multiply mul = sourceA * sourceB */ - mul1 = (q31_t)((q15_t)(inA1 >> 16) * (q15_t)(inB1 >> 16)); - mul2 = (q31_t)((q15_t)inA1 * (q15_t)inB1); - mul3 = (q31_t)((q15_t)(inA2 >> 16) * (q15_t)(inB2 >> 16)); - mul4 = (q31_t)((q15_t)inA2 * (q15_t)inB2); - - /* saturate result to 16 bit */ - out1 = (q15_t)__SSAT((q31_t)(mul1 + NN_ROUND(out_shift)) >> out_shift, 16); - out2 = (q15_t)__SSAT((q31_t)(mul2 + NN_ROUND(out_shift)) >> out_shift, 16); - out3 = (q15_t)__SSAT((q31_t)(mul3 + NN_ROUND(out_shift)) >> out_shift, 16); - out4 = (q15_t)__SSAT((q31_t)(mul4 + NN_ROUND(out_shift)) >> out_shift, 16); - - /* store the result */ -#ifndef ARM_MATH_BIG_ENDIAN - - *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16); - *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16); - -#else - - *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16); - *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16); - -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* Decrement the blockSize loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4U; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_DSP) */ + uint32_t blkCnt = blockSize; /* loop counters */ while (blkCnt > 0U) { diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c index 40dd1cdad..fdced4cf5 100644 --- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c +++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_nn_mult_q7.c * Description: Q7 vector multiplication with variable output shifts * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 + * $Date: 20. July 2021 + * $Revision: V.1.1.2 * * Target Processor: Cortex-M cores * @@ -55,46 +55,7 @@ void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize) { - uint32_t blkCnt; /* loop counters */ - -#if defined(ARM_MATH_DSP) - - /* Run the below code for Cortex-M4 and Cortex-M3 */ - q7_t out1, out2, out3, out4; /* Temporary variables to store the product */ - - /* loop Unrolling */ - blkCnt = blockSize >> 2U; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* C = A * B */ - /* Multiply the inputs and store the results in temporary variables */ - out1 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - out2 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - out3 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - out4 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - - /* Store the results of 4 inputs in the destination buffer in single cycle by packing */ - *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4); - - /* Decrement the blockSize loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4U; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_DSP) */ + uint32_t blkCnt = blockSize; /* loop counters */ while (blkCnt > 0U) { diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c index 9017970df..8abbc3a5a 100644 --- a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c +++ b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_q7_to_q15_reordered_no_shift.c * Description: Converts the elements of the Q7 vector to reordered Q15 vector without left-shift * - * $Date: May 29, 2020 - * $Revision: V.1.0.1 + * $Date: July 20, 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -79,7 +79,7 @@ void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t bl const q7_t *pIn = pSrc; /* Src pointer */ uint32_t blkCnt; /* loop counter */ -#ifndef ARM_MATH_CM0_FAMILY +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) q31_t in; q31_t in1, in2; @@ -103,11 +103,11 @@ void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t bl in2 = __SXTB16(in); #ifndef ARM_MATH_BIG_ENDIAN - *__SIMD32(pDst)++ = in2; - *__SIMD32(pDst)++ = in1; + arm_nn_write_q7x4_ia((q7_t **)&pDst, in2); + arm_nn_write_q7x4_ia((q7_t **)&pDst, in1); #else - *__SIMD32(pDst)++ = in1; - *__SIMD32(pDst)++ = in2; + arm_nn_write_q7x4_ia((q7_t **)&pDst, in1); + arm_nn_write_q7x4_ia((q7_t **)&pDst, in2); #endif /* Decrement the loop counter */ diff --git a/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c b/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c index 36163667b..78e6fca03 100644 --- a/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c +++ b/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_max_pool_s8.c * Description: Pooling function implementations * - * $Date: 19. Februari 2021 - * $Revision: V.2.0.2 + * $Date: 20. July 2021 + * $Revision: V.2.0.3 * * Target Processor: Cortex-M CPUs * @@ -75,7 +75,7 @@ static void compare_and_replace_if_larger_q7(q7_t *base, const q7_t *target, int ref_max.bytes[3] = comp_max.bytes[3]; } - write_q7x4_ia(&dst, ref_max.word); + arm_nn_write_q7x4_ia(&dst, ref_max.word); cnt--; } @@ -127,7 +127,7 @@ static void clamp_output(q7_t *source, int32_t length, const int32_t act_min, co in.bytes[3] = MAX(in.bytes[3], act_min); in.bytes[3] = MIN(in.bytes[3], act_max); - write_q7x4_ia(&source, in.word); + arm_nn_write_q7x4_ia(&source, in.word); cnt--; } diff --git a/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c b/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c index 7546049ce..5a3b1afd3 100644 --- a/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c +++ b/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_pool_q7_HWC.c * Description: Pooling function implementations * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 + * $Date: 20. July 2021 + * $Revision: V.1.1.1 * * Target Processor: Cortex-M cores * @@ -31,7 +31,7 @@ #include "arm_nnfunctions.h" #include "arm_nnsupportfunctions.h" -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /** * @brief A few utility functions used by pooling functions @@ -75,7 +75,7 @@ static void compare_and_replace_if_larger_q7(q7_t *base, // base data if (com.bytes[3] > in.bytes[3]) in.bytes[3] = com.bytes[3]; - *__SIMD32(pIn)++ = in.word; + arm_nn_write_q7x4_ia(&pIn, in.word); cnt--; } @@ -119,10 +119,10 @@ static void accumulate_q7_to_q15(q15_t *base, q7_t *target, const uint16_t lengt #endif in = arm_nn_read_q15x2(pCnt); - *__SIMD32(pCnt)++ = __QADD16(vo1, in); + arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo1, in)); in = arm_nn_read_q15x2(pCnt); - *__SIMD32(pCnt)++ = __QADD16(vo2, in); + arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo2, in)); cnt--; } @@ -178,7 +178,7 @@ void arm_maxpool_q7_HWC(q7_t *Im_in, q7_t *Im_out) { (void)bufferA; -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ int16_t i_x, i_y; @@ -334,7 +334,7 @@ void arm_avepool_q7_HWC(q7_t *Im_in, q7_t *Im_out) { -#if defined(ARM_MATH_DSP) +#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) /* Run the following code for Cortex-M4 and Cortex-M7 */ q15_t *buffer = (q15_t *)bufferA; diff --git a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt index 2c426bd75..ffd8f8fc8 100644 --- a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt +++ b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt @@ -26,6 +26,7 @@ add_compile_options(-O0 -Werror -Wimplicit-function-declaration -Wunused-variable + -Wunused-function -Wno-redundant-decls) option(BUILD_CMSIS_NN_UNIT "If building the unit tests from another project, i.e. \ -- GitLab