mirror of
https://github.com/RT-Thread/rt-thread.git
synced 2026-03-27 09:32:28 +08:00
remove the useless large fold
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,29 +0,0 @@
|
||||
|
||||
SET TMP=C:\Temp
|
||||
SET TEMP=C:\Temp
|
||||
|
||||
SET UVEXE=C:\Keil\UV4\UV4.EXE
|
||||
|
||||
@echo Building DSP Library for Cortex-M0 Little Endian
|
||||
%UVEXE% -rb arm_cortexM0x_math.uvproj -t"DSP_Lib CM0 LE" -o"DSP_Lib CM0 LE.txt" -j0
|
||||
|
||||
@echo Building DSP Library for Cortex-M0 Big Endian
|
||||
%UVEXE% -rb arm_cortexM0x_math.uvproj -t"DSP_Lib CM0 BE" -o"DSP_Lib CM0 BE.txt" -j0
|
||||
|
||||
@echo Building DSP Library for Cortex-M3 Little Endian
|
||||
%UVEXE% -rb arm_cortexM3x_math.uvproj -t"DSP_Lib CM3 LE" -o"DSP_Lib CM3 LE.txt" -j0
|
||||
|
||||
@echo Building DSP Library for Cortex-M3 Big Endian
|
||||
%UVEXE% -rb arm_cortexM3x_math.uvproj -t"DSP_Lib CM3 BE" -o"DSP_Lib CM3 BE.txt" -j0
|
||||
|
||||
@echo Building DSP Library for Cortex-M4 Little Endian
|
||||
%UVEXE% -rb arm_cortexM4x_math.uvproj -t"DSP_Lib CM4 LE" -o"DSP_Lib CM4 LE.txt" -j0
|
||||
|
||||
@echo Building DSP Library for Cortex-M4 Big Endian
|
||||
%UVEXE% -rb arm_cortexM4x_math.uvproj -t"DSP_Lib CM4 BE" -o"DSP_Lib CM4 BE.txt" -j0
|
||||
|
||||
@echo Building DSP Library for Cortex-M4 with FPU Little Endian
|
||||
%UVEXE% -rb arm_cortexM4x_math.uvproj -t"DSP_Lib CM4 LE FPU" -o"DSP_Lib CM4 LE FPU.txt" -j0
|
||||
|
||||
@echo Building DSP Library for Cortex-M4 with FPU Big Endian
|
||||
%UVEXE% -rb arm_cortexM4x_math.uvproj -t"DSP_Lib CM4 BE FPU" -o"DSP_Lib CM4 BE FPU.txt" -j0
|
||||
@@ -1,159 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_f32.c
|
||||
*
|
||||
* Description: Vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
#include <math.h>
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicAbs Vector Absolute Value
|
||||
*
|
||||
* Computes the absolute value of a vector on an element-by-element basis.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = abs(pSrcA[n]), 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* The operation can be done in-place by setting the input and output pointers to the same buffer.
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_abs_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4; /* temporary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute and then store the results in the destination buffer. */
|
||||
/* read sample from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
|
||||
/* find absolute value */
|
||||
in1 = fabsf(in1);
|
||||
|
||||
/* read sample from source */
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* find absolute value */
|
||||
in2 = fabsf(in2);
|
||||
|
||||
/* read sample from source */
|
||||
*pDst = in1;
|
||||
|
||||
/* find absolute value */
|
||||
in3 = fabsf(in3);
|
||||
|
||||
/* find absolute value */
|
||||
in4 = fabsf(in4);
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 1) = in2;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 2) = in3;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
|
||||
/* Update source pointer to process next sampels */
|
||||
pSrc += 4u;
|
||||
|
||||
/* Update destination pointer to process next sampels */
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute and then store the results in the destination buffer. */
|
||||
*pDst++ = fabsf(*pSrc++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
||||
@@ -1,173 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q15.c
|
||||
*
|
||||
* Description: Q15 vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
|
||||
void arm_abs_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q15_t in1; /* Input value1 */
|
||||
q15_t in2; /* Input value2 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read two inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
|
||||
/* Store the Absolute result in the destination buffer by packing the two values, in a single cycle */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in1 > 0) ? in1 : __QSUB16(0, in1)),
|
||||
((in2 > 0) ? in2 : __QSUB16(0, in2)), 16);
|
||||
|
||||
#else
|
||||
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in2 > 0) ? in2 : __QSUB16(0, in2)),
|
||||
((in1 > 0) ? in1 : __QSUB16(0, in1)), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in1 > 0) ? in1 : __QSUB16(0, in1)),
|
||||
((in2 > 0) ? in2 : __QSUB16(0, in2)), 16);
|
||||
|
||||
#else
|
||||
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in2 > 0) ? in2 : __QSUB16(0, in2)),
|
||||
((in1 > 0) ? in1 : __QSUB16(0, in1)), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read the input */
|
||||
in1 = *pSrc++;
|
||||
|
||||
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
||||
*pDst++ = (in1 > 0) ? in1 : __QSUB16(0, in1);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q15_t in; /* Temporary input variable */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read the input */
|
||||
in = *pSrc++;
|
||||
|
||||
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
||||
@@ -1,125 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q31.c
|
||||
*
|
||||
* Description: Q31 vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
void arm_abs_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in; /* Input value */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = (in1 > 0) ? in1 : __QSUB(0, in1);
|
||||
*pDst++ = (in2 > 0) ? in2 : __QSUB(0, in2);
|
||||
*pDst++ = (in3 > 0) ? in3 : __QSUB(0, in3);
|
||||
*pDst++ = (in4 > 0) ? in4 : __QSUB(0, in4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in > 0) ? in : ((in == 0x80000000) ? 0x7fffffff : -in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
||||
@@ -1,152 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q7.c
|
||||
*
|
||||
* Description: Q7 vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Conditions for optimum performance
|
||||
* Input and output buffers should be aligned by 32-bit
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
||||
*/
|
||||
|
||||
void arm_abs_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q7_t in; /* Input value1 */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4; /* temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* temporary output variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read inputs */
|
||||
in1 = (q31_t) * pSrc;
|
||||
in2 = (q31_t) * (pSrc + 1);
|
||||
in3 = (q31_t) * (pSrc + 2);
|
||||
|
||||
/* find absolute value */
|
||||
out1 = (in1 > 0) ? in1 : __QSUB8(0, in1);
|
||||
|
||||
/* read input */
|
||||
in4 = (q31_t) * (pSrc + 3);
|
||||
|
||||
/* find absolute value */
|
||||
out2 = (in2 > 0) ? in2 : __QSUB8(0, in2);
|
||||
|
||||
/* store result to destination */
|
||||
*pDst = (q7_t) out1;
|
||||
|
||||
/* find absolute value */
|
||||
out3 = (in3 > 0) ? in3 : __QSUB8(0, in3);
|
||||
|
||||
/* find absolute value */
|
||||
out4 = (in4 > 0) ? in4 : __QSUB8(0, in4);
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 1) = (q7_t) out2;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 2) = (q7_t) out3;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 3) = (q7_t) out4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif // #define ARM_MATH_CM0
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read the input */
|
||||
in = *pSrc++;
|
||||
|
||||
/* Store the Absolute result in the destination buffer */
|
||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? 0x7f : -in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
||||
@@ -1,145 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_f32.c
|
||||
*
|
||||
* Description: Floating-point vector addition.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicAdd Vector Addition
|
||||
*
|
||||
* Element-by-element addition of two vectors.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_add_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inA1, inA2, inA3, inA4; /* temporary input variabels */
|
||||
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
|
||||
/* read four inputs from sourceA and four inputs from sourceB */
|
||||
inA1 = *pSrcA;
|
||||
inB1 = *pSrcB;
|
||||
inA2 = *(pSrcA + 1);
|
||||
inB2 = *(pSrcB + 1);
|
||||
inA3 = *(pSrcA + 2);
|
||||
inB3 = *(pSrcB + 2);
|
||||
inA4 = *(pSrcA + 3);
|
||||
inB4 = *(pSrcB + 3);
|
||||
|
||||
/* C = A + B */
|
||||
/* add and store result to destination */
|
||||
*pDst = inA1 + inB1;
|
||||
*(pDst + 1) = inA2 + inB2;
|
||||
*(pDst + 2) = inA3 + inB3;
|
||||
*(pDst + 3) = inA4 + inB4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrcA += 4u;
|
||||
pSrcB += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
||||
@@ -1,135 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q15.c
|
||||
*
|
||||
* Description: Q15 vector addition
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_add_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inB1, inB2;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
inA1 = *__SIMD32(pSrcA)++;
|
||||
inA2 = *__SIMD32(pSrcA)++;
|
||||
inB1 = *__SIMD32(pSrcB)++;
|
||||
inB2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
*__SIMD32(pDst)++ = __QADD16(inA1, inB1);
|
||||
*__SIMD32(pDst)++ = __QADD16(inA2, inB2);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ + *pSrcB++), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
||||
@@ -1,143 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q31.c
|
||||
*
|
||||
* Description: Q31 vector addition.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_add_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4;
|
||||
q31_t inB1, inB2, inB3, inB4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
*pDst++ = __QADD(inA1, inB1);
|
||||
*pDst++ = __QADD(inA2, inB2);
|
||||
*pDst++ = __QADD(inA3, inB3);
|
||||
*pDst++ = __QADD(inA4, inB4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ + *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
||||
@@ -1,129 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q7.c
|
||||
*
|
||||
* Description: Q7 vector addition.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_add_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT(*pSrcA++ + *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ + *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
||||
@@ -1,125 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_f32.c
|
||||
*
|
||||
* Description: Floating-point dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup dot_prod Vector Dot Product
|
||||
*
|
||||
* Computes the dot product of two vectors.
|
||||
* The vectors are multiplied element-by-element and then summed.
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of floating-point vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_dot_prod_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float32_t * result)
|
||||
{
|
||||
float32_t sum = 0.0f; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer */
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
/* Store the result back in the destination buffer */
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
||||
@@ -1,135 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q15.c
|
||||
*
|
||||
* Description: Q15 dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of Q15 vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
|
||||
* results are added to a 64-bit accumulator in 34.30 format.
|
||||
* Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
|
||||
* there is no risk of overflow.
|
||||
* The return result is in 34.30 format.
|
||||
*/
|
||||
|
||||
void arm_dot_prod_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
||||
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the results in a temporary buffer. */
|
||||
sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the results in a temporary buffer. */
|
||||
sum += (q63_t) ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the result in the destination buffer in 34.30 format */
|
||||
*result = sum;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
||||
@@ -1,138 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q31.c
|
||||
*
|
||||
* Description: Q31 dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of Q31 vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
|
||||
* are truncated to 2.48 format by discarding the lower 14 bits.
|
||||
* The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
|
||||
* There are 15 guard bits in the accumulator and there is no risk of overflow as long as
|
||||
* the length of the vectors is less than 2^16 elements.
|
||||
* The return result is in 16.48 format.
|
||||
*/
|
||||
|
||||
void arm_dot_prod_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4;
|
||||
q31_t inB1, inB2, inB3, inB4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
sum += ((q63_t) inA1 * inB1) >> 14u;
|
||||
sum += ((q63_t) inA2 * inB2) >> 14u;
|
||||
sum += ((q63_t) inA3 * inB3) >> 14u;
|
||||
sum += ((q63_t) inA4 * inB4) >> 14u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the result in the destination buffer in 16.48 format */
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
||||
@@ -1,154 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q7.c
|
||||
*
|
||||
* Description: Q7 dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of Q7 vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
|
||||
* results are added to an accumulator in 18.14 format.
|
||||
* Nonsaturating additions are used and there is no danger of wrap around as long as
|
||||
* the vectors are less than 2^18 elements long.
|
||||
* The return result is in 18.14 format.
|
||||
*/
|
||||
|
||||
void arm_dot_prod_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q31_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
q31_t sum = 0; /* Temporary variables to store output */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t input1, input2; /* Temporary variables to store input */
|
||||
q31_t inA1, inA2, inB1, inB2; /* Temporary variables to store input */
|
||||
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read 4 samples at a time from sourceA */
|
||||
input1 = *__SIMD32(pSrcA)++;
|
||||
/* read 4 samples at a time from sourceB */
|
||||
input2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
/* extract two q7_t samples to q15_t samples */
|
||||
inA1 = __SXTB16(__ROR(input1, 8));
|
||||
/* extract reminaing two samples */
|
||||
inA2 = __SXTB16(input1);
|
||||
/* extract two q7_t samples to q15_t samples */
|
||||
inB1 = __SXTB16(__ROR(input2, 8));
|
||||
/* extract reminaing two samples */
|
||||
inB2 = __SXTB16(input2);
|
||||
|
||||
/* multiply and accumulate two samples at a time */
|
||||
sum = __SMLAD(inA1, inB1, sum);
|
||||
sum = __SMLAD(inA2, inB2, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Dot product and then store the results in a temporary buffer. */
|
||||
sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Dot product and then store the results in a temporary buffer. */
|
||||
sum += (q31_t) ((q15_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
/* Store the result in the destination buffer in 18.14 format */
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
||||
@@ -1,172 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_f32.c
|
||||
*
|
||||
* Description: Floating-point vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicMult Vector Multiplication
|
||||
*
|
||||
* Element-by-element multiplication of two vectors.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector multiplication.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_mult_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
||||
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||
float32_t out1, out2, out3, out4; /* temporary output variables */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the results in output buffer */
|
||||
/* read sample from sourceA */
|
||||
inA1 = *pSrcA;
|
||||
/* read sample from sourceB */
|
||||
inB1 = *pSrcB;
|
||||
/* read sample from sourceA */
|
||||
inA2 = *(pSrcA + 1);
|
||||
/* read sample from sourceB */
|
||||
inB2 = *(pSrcB + 1);
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out1 = inA1 * inB1;
|
||||
|
||||
/* read sample from sourceA */
|
||||
inA3 = *(pSrcA + 2);
|
||||
/* read sample from sourceB */
|
||||
inB3 = *(pSrcB + 2);
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out2 = inA2 * inB2;
|
||||
|
||||
/* read sample from sourceA */
|
||||
inA4 = *(pSrcA + 3);
|
||||
|
||||
/* store result to destination buffer */
|
||||
*pDst = out1;
|
||||
|
||||
/* read sample from sourceB */
|
||||
inB4 = *(pSrcB + 3);
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out3 = inA3 * inB3;
|
||||
|
||||
/* store result to destination buffer */
|
||||
*(pDst + 1) = out2;
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out4 = inA4 * inB4;
|
||||
/* store result to destination buffer */
|
||||
*(pDst + 2) = out3;
|
||||
/* store result to destination buffer */
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrcA += 4u;
|
||||
pSrcB += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the results in output buffer */
|
||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
||||
@@ -1,152 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q15.c
|
||||
*
|
||||
* Description: Q15 vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q15 vector multiplication
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_mult_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inB1, inB2; /* temporary input variables */
|
||||
q15_t out1, out2, out3, out4; /* temporary output variables */
|
||||
q31_t mul1, mul2, mul3, mul4; /* temporary variables */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read two samples at a time from sourceA */
|
||||
inA1 = *__SIMD32(pSrcA)++;
|
||||
/* read two samples at a time from sourceB */
|
||||
inB1 = *__SIMD32(pSrcB)++;
|
||||
/* read two samples at a time from sourceA */
|
||||
inA2 = *__SIMD32(pSrcA)++;
|
||||
/* read two samples at a time from sourceB */
|
||||
inB2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
/* multiply mul = sourceA * sourceB */
|
||||
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||
mul2 = (q31_t) ((q15_t) inA1 * (q15_t) inB1);
|
||||
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
|
||||
mul4 = (q31_t) ((q15_t) inA2 * (q15_t) inB2);
|
||||
|
||||
/* saturate result to 16 bit */
|
||||
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
|
||||
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
|
||||
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
|
||||
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
|
||||
|
||||
/* store the result */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
||||
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
||||
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the result in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
||||
@@ -1,143 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q31.c
|
||||
*
|
||||
* Description: Q31 vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 vector multiplication.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_mult_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
||||
q31_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* temporary output variables */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||
out2 = ((q63_t) inA2 * inB2) >> 32;
|
||||
out3 = ((q63_t) inA3 * inB3) >> 32;
|
||||
out4 = ((q63_t) inA4 * inB4) >> 32;
|
||||
|
||||
out1 = __SSAT(out1, 31);
|
||||
out2 = __SSAT(out2, 31);
|
||||
out3 = __SSAT(out3, 31);
|
||||
out4 = __SSAT(out4, 31);
|
||||
|
||||
*pDst++ = out1 << 1u;
|
||||
*pDst++ = out2 << 1u;
|
||||
*pDst++ = out3 << 1u;
|
||||
*pDst++ = out4 << 1u;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
||||
@@ -1,128 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q7.c
|
||||
*
|
||||
* Description: Q7 vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10 DP
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector multiplication
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_mult_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q7_t out1, out2, out3, out4; /* Temporary variables to store the product */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the results in temporary variables */
|
||||
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
|
||||
/* Store the results of 4 inputs in the destination buffer in single cycle by packing */
|
||||
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the result in the destination buffer */
|
||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
||||
@@ -1,137 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_f32.c
|
||||
*
|
||||
* Description: Negates floating-point vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup negate Vector Negate
|
||||
*
|
||||
* Negates the elements of a vector.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = -pSrc[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_negate_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4; /* temporary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read inputs from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* negate the input */
|
||||
in1 = -in1;
|
||||
in2 = -in2;
|
||||
in3 = -in3;
|
||||
in4 = -in4;
|
||||
|
||||
/* store the result to destination */
|
||||
*pDst = in1;
|
||||
*(pDst + 1) = in2;
|
||||
*(pDst + 2) = in3;
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the results in the destination buffer. */
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
||||
@@ -1,137 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q15.c
|
||||
*
|
||||
* Description: Negates Q15 vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Conditions for optimum performance
|
||||
* Input and output buffers should be aligned by 32-bit
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
|
||||
void arm_negate_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q15_t in;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t in1, in2; /* Temporary variables */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Read two inputs at a time */
|
||||
in1 = _SIMD32_OFFSET(pSrc);
|
||||
in2 = _SIMD32_OFFSET(pSrc + 2);
|
||||
|
||||
/* negate two samples at a time */
|
||||
in1 = __QSUB16(0, in1);
|
||||
|
||||
/* negate two samples at a time */
|
||||
in2 = __QSUB16(0, in2);
|
||||
|
||||
/* store the result to destination 2 samples at a time */
|
||||
_SIMD32_OFFSET(pDst) = in1;
|
||||
/* store the result to destination 2 samples at a time */
|
||||
_SIMD32_OFFSET(pDst + 2) = in2;
|
||||
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the result in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
||||
@@ -1,124 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q31.c
|
||||
*
|
||||
* Description: Negates Q31 vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
void arm_negate_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q31_t in; /* Temporary variable */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the results in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = __QSUB(0, in1);
|
||||
*pDst++ = __QSUB(0, in2);
|
||||
*pDst++ = __QSUB(0, in3);
|
||||
*pDst++ = __QSUB(0, in4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the result in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == 0x80000000) ? 0x7fffffff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
||||
@@ -1,120 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q7.c
|
||||
*
|
||||
* Description: Negates Q7 vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
||||
*/
|
||||
|
||||
void arm_negate_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q7_t in;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t input; /* Input values1-4 */
|
||||
q31_t zero = 0x00000000;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Read four inputs */
|
||||
input = *__SIMD32(pSrc)++;
|
||||
|
||||
/* Store the Negated results in the destination buffer in a single cycle by packing the results */
|
||||
*__SIMD32(pDst)++ = __QSUB8(zero, input);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the results in the destination buffer. */ \
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q7_t) 0x80) ? 0x7f : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
||||
@@ -1,158 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_f32.c
|
||||
*
|
||||
* Description: Floating-point vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup offset Vector Offset
|
||||
*
|
||||
* Adds a constant offset to each element of a vector.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_offset_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t offset,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
/* read samples from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
|
||||
/* add offset to input */
|
||||
in1 = in1 + offset;
|
||||
|
||||
/* read samples from source */
|
||||
in3 = *(pSrc + 2);
|
||||
|
||||
/* add offset to input */
|
||||
in2 = in2 + offset;
|
||||
|
||||
/* read samples from source */
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* add offset to input */
|
||||
in3 = in3 + offset;
|
||||
|
||||
/* store result to destination */
|
||||
*pDst = in1;
|
||||
|
||||
/* add offset to input */
|
||||
in4 = in4 + offset;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 1) = in2;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 2) = in3;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (*pSrc++) + offset;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
||||
@@ -1,131 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q15.c
|
||||
*
|
||||
* Description: Q15 vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
*/
|
||||
|
||||
void arm_offset_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t offset,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||
offset_packed = __PKHBT(offset, offset, 16);
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer, 2 samples at a time. */
|
||||
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
||||
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrc++ + offset), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
||||
@@ -1,135 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q31.c
|
||||
*
|
||||
* Description: Q31 vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
||||
*/
|
||||
|
||||
void arm_offset_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t offset,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = __QADD(in1, offset);
|
||||
*pDst++ = __QADD(in2, offset);
|
||||
*pDst++ = __QADD(in3, offset);
|
||||
*pDst++ = __QADD(in4, offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = __QADD(*pSrc++, offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
||||
@@ -1,130 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q7.c
|
||||
*
|
||||
* Description: Q7 vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
||||
*/
|
||||
|
||||
void arm_offset_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t offset,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||
offset_packed = __PACKq7(offset, offset, offset, offset);
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
|
||||
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrc++ + offset, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
||||
@@ -1,161 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_f32.c
|
||||
*
|
||||
* Description: Multiplies a floating-point vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup scale Vector Scale
|
||||
*
|
||||
* Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
|
||||
* a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
|
||||
* The shift allows the gain of the scaling operation to exceed 1.0.
|
||||
* The algorithm used with fixed-point data is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* The overall scale factor applied to the fixed-point data is
|
||||
* <pre>
|
||||
* scale = scaleFract * 2^shift.
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a floating-point vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scale scale factor to be applied
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_scale_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t scale,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4; /* temporary variabels */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the results in the destination buffer. */
|
||||
/* read input samples from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
|
||||
/* multiply with scaling factor */
|
||||
in1 = in1 * scale;
|
||||
|
||||
/* read input sample from source */
|
||||
in3 = *(pSrc + 2);
|
||||
|
||||
/* multiply with scaling factor */
|
||||
in2 = in2 * scale;
|
||||
|
||||
/* read input sample from source */
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* multiply with scaling factor */
|
||||
in3 = in3 * scale;
|
||||
in4 = in4 * scale;
|
||||
/* store the result to destination */
|
||||
*pDst = in1;
|
||||
*(pDst + 1) = in2;
|
||||
*(pDst + 2) = in3;
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (*pSrc++) * scale;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
||||
@@ -1,157 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q15.c
|
||||
*
|
||||
* Description: Multiplies a Q15 vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a Q15 vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scaleFract fractional portion of the scale value
|
||||
* @param[in] shift number of bits to shift the result by
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
|
||||
* These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
|
||||
*/
|
||||
|
||||
|
||||
void arm_scale_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t scaleFract,
|
||||
int8_t shift,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int8_t kShift = 15 - shift; /* shift to apply after scaling */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q15_t in1, in2, in3, in4;
|
||||
q31_t inA1, inA2; /* Temporary variables */
|
||||
q31_t out1, out2, out3, out4;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Reading 2 inputs from memory */
|
||||
inA1 = *__SIMD32(pSrc)++;
|
||||
inA2 = *__SIMD32(pSrc)++;
|
||||
|
||||
/* C = A * scale */
|
||||
/* Scale the inputs and then store the 2 results in the destination buffer
|
||||
* in single cycle by packing the outputs */
|
||||
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
|
||||
out2 = (q31_t) ((q15_t) inA1 * scaleFract);
|
||||
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
|
||||
out4 = (q31_t) ((q15_t) inA2 * scaleFract);
|
||||
|
||||
/* apply shifting */
|
||||
out1 = out1 >> kShift;
|
||||
out2 = out2 >> kShift;
|
||||
out3 = out3 >> kShift;
|
||||
out4 = out4 >> kShift;
|
||||
|
||||
/* saturate the output */
|
||||
in1 = (q15_t) (__SSAT(out1, 16));
|
||||
in2 = (q15_t) (__SSAT(out2, 16));
|
||||
in3 = (q15_t) (__SSAT(out3, 16));
|
||||
in4 = (q15_t) (__SSAT(out4, 16));
|
||||
|
||||
/* store the result to destination */
|
||||
*__SIMD32(pDst)++ = __PKHBT(in2, in1, 16);
|
||||
*__SIMD32(pDst)++ = __PKHBT(in4, in3, 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 16));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) (__SSAT(((q31_t) * pSrc++ * scaleFract) >> kShift, 16));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
||||
@@ -1,221 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q31.c
|
||||
*
|
||||
* Description: Multiplies a Q31 vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a Q31 vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scaleFract fractional portion of the scale value
|
||||
* @param[in] shift number of bits to shift the result by
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
|
||||
* These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
|
||||
*/
|
||||
|
||||
void arm_scale_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t scaleFract,
|
||||
int8_t shift,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int8_t kShift = shift + 1; /* Shift to apply after scaling */
|
||||
int8_t sign = (kShift & 0x80);
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in, out;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t in1, in2, in3, in4; /* temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* temporary output variabels */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read four inputs from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* multiply input with scaler value */
|
||||
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
||||
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
||||
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
||||
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
||||
|
||||
/* apply shifting */
|
||||
out1 = in1 << kShift;
|
||||
out2 = in2 << kShift;
|
||||
|
||||
/* saturate the results. */
|
||||
if(in1 != (out1 >> kShift))
|
||||
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
||||
|
||||
if(in2 != (out2 >> kShift))
|
||||
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
||||
|
||||
out3 = in3 << kShift;
|
||||
out4 = in4 << kShift;
|
||||
|
||||
*pDst = out1;
|
||||
*(pDst + 1) = out2;
|
||||
|
||||
if(in3 != (out3 >> kShift))
|
||||
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
||||
|
||||
if(in4 != (out4 >> kShift))
|
||||
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
||||
|
||||
/* Store result destination */
|
||||
*(pDst + 2) = out3;
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
/* Update pointers to process next sampels */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
kShift = -kShift;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read four inputs from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* multiply input with scaler value */
|
||||
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
||||
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
||||
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
||||
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
||||
|
||||
/* apply shifting */
|
||||
out1 = in1 >> kShift;
|
||||
out2 = in2 >> kShift;
|
||||
|
||||
out3 = in3 >> kShift;
|
||||
out4 = in4 >> kShift;
|
||||
|
||||
/* Store result destination */
|
||||
*pDst = out1;
|
||||
*(pDst + 1) = out2;
|
||||
|
||||
*(pDst + 2) = out3;
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
/* Update pointers to process next sampels */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
|
||||
if(sign == 0)
|
||||
{
|
||||
out = in << kShift;
|
||||
if(in != (out >> kShift))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
}
|
||||
else
|
||||
{
|
||||
out = in >> kShift;
|
||||
}
|
||||
|
||||
*pDst++ = out;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
||||
@@ -1,144 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q7.c
|
||||
*
|
||||
* Description: Multiplies a Q7 vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a Q7 vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scaleFract fractional portion of the scale value
|
||||
* @param[in] shift number of bits to shift the result by
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
|
||||
* These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
|
||||
*/
|
||||
|
||||
void arm_scale_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t scaleFract,
|
||||
int8_t shift,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int8_t kShift = 7 - shift; /* shift to apply after scaling */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q7_t in1, in2, in3, in4, out1, out2, out3, out4; /* Temporary variables to store input & output */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Reading 4 inputs from memory */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
/* C = A * scale */
|
||||
/* Scale the inputs and then store the results in the temporary variables. */
|
||||
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
|
||||
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
|
||||
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
|
||||
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
|
||||
|
||||
/* Packing the individual outputs into 32bit and storing in
|
||||
* destination buffer in single write */
|
||||
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 8));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) (__SSAT((((q15_t) * pSrc++ * scaleFract) >> kShift), 8));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
||||
@@ -1,243 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q15.c
|
||||
*
|
||||
* Description: Shifts the elements of a Q15 vector by a specified number of bits.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup shift
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Shifts the elements of a Q15 vector a specified number of bits.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_shift_q15(
|
||||
q15_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint8_t sign; /* Sign of shiftBits */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q15_t in1, in2; /* Temporary variables */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Read 2 inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||
__SSAT((in2 << shiftBits), 16), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||
__SSAT((in1 << shiftBits), 16), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||
__SSAT((in2 << shiftBits), 16), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||
__SSAT((in1 << shiftBits), 16), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift and then store the results in the destination buffer. */
|
||||
*pDst++ = __SSAT((*pSrc++ << shiftBits), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Read 2 inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
||||
(in2 >> -shiftBits), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
||||
(in1 >> -shiftBits), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
||||
(in2 >> -shiftBits), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
||||
(in1 >> -shiftBits), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift and then store the results in the destination buffer. */
|
||||
*pDst++ = __SSAT(((q31_t) * pSrc++ << shiftBits), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of shift group
|
||||
*/
|
||||
@@ -1,195 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q31.c
|
||||
*
|
||||
* Description: Shifts the elements of a Q31 vector by a specified number of bits.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
/**
|
||||
* @defgroup shift Vector Shift
|
||||
*
|
||||
* Shifts the elements of a fixed-point vector by a specified number of bits.
|
||||
* There are separate functions for Q7, Q15, and Q31 data types.
|
||||
* The underlying algorithm used is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* If <code>shift</code> is positive then the elements of the vector are shifted to the left.
|
||||
* If <code>shift</code> is negative then the elements of the vector are shifted to the right.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup shift
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Shifts the elements of a Q31 vector a specified number of bits.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_shift_q31(
|
||||
q31_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
q31_t in1, in2, in3, in4; /* Temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* Temporary output variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the input and then store the results in the destination buffer. */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
out1 = in1 << shiftBits;
|
||||
in3 = *(pSrc + 2);
|
||||
out2 = in2 << shiftBits;
|
||||
in4 = *(pSrc + 3);
|
||||
if(in1 != (out1 >> shiftBits))
|
||||
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
||||
|
||||
if(in2 != (out2 >> shiftBits))
|
||||
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
||||
|
||||
*pDst = out1;
|
||||
out3 = in3 << shiftBits;
|
||||
*(pDst + 1) = out2;
|
||||
out4 = in4 << shiftBits;
|
||||
|
||||
if(in3 != (out3 >> shiftBits))
|
||||
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
||||
|
||||
if(in4 != (out4 >> shiftBits))
|
||||
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
||||
|
||||
*(pDst + 2) = out3;
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
/* Update destination pointer to process next sampels */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the input and then store the results in the destination buffer. */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
*pDst = (in1 >> -shiftBits);
|
||||
*(pDst + 1) = (in2 >> -shiftBits);
|
||||
*(pDst + 2) = (in3 >> -shiftBits);
|
||||
*(pDst + 3) = (in4 >> -shiftBits);
|
||||
|
||||
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A (>> or <<) shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
|
||||
(*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of shift group
|
||||
*/
|
||||
@@ -1,215 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q7.c
|
||||
*
|
||||
* Description: Processing function for the Q7 Shifting
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup shift
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Shifts the elements of a Q7 vector a specified number of bits.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Conditions for optimum performance
|
||||
* Input and output buffers should be aligned by 32-bit
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_shift_q7(
|
||||
q7_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint8_t sign; /* Sign of shiftBits */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q7_t in1; /* Input value1 */
|
||||
q7_t in2; /* Input value2 */
|
||||
q7_t in3; /* Input value3 */
|
||||
q7_t in4; /* Input value4 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Read 4 inputs */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
||||
*__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
|
||||
__SSAT((in2 << shiftBits), 8),
|
||||
__SSAT((in3 << shiftBits), 8),
|
||||
__SSAT((in4 << shiftBits), 8));
|
||||
/* Update source pointer to process next sampels */
|
||||
pSrc += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
shiftBits = -shiftBits;
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Read 4 inputs */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
||||
*__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
|
||||
(in3 >> shiftBits), (in4 >> shiftBits));
|
||||
|
||||
|
||||
pSrc += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
*pDst++ = (in1 >> shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of shift group
|
||||
*/
|
||||
@@ -1,145 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_f32.c
|
||||
*
|
||||
* Description: Floating-point vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicSub Vector Subtraction
|
||||
*
|
||||
* Element-by-element subtraction of two vectors.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_sub_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inA1, inA2, inA3, inA4; /* temporary variables */
|
||||
float32_t inB1, inB2, inB3, inB4; /* temporary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer. */
|
||||
/* Read 4 input samples from sourceA and sourceB */
|
||||
inA1 = *pSrcA;
|
||||
inB1 = *pSrcB;
|
||||
inA2 = *(pSrcA + 1);
|
||||
inB2 = *(pSrcB + 1);
|
||||
inA3 = *(pSrcA + 2);
|
||||
inB3 = *(pSrcB + 2);
|
||||
inA4 = *(pSrcA + 3);
|
||||
inB4 = *(pSrcB + 3);
|
||||
|
||||
/* dst = srcA - srcB */
|
||||
/* subtract and store the result */
|
||||
*pDst = inA1 - inB1;
|
||||
*(pDst + 1) = inA2 - inB2;
|
||||
*(pDst + 2) = inA3 - inB3;
|
||||
*(pDst + 3) = inA4 - inB4;
|
||||
|
||||
|
||||
/* Update pointers to process next sampels */
|
||||
pSrcA += 4u;
|
||||
pSrcB += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
||||
@@ -1,135 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q15.c
|
||||
*
|
||||
* Description: Q15 vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_sub_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2;
|
||||
q31_t inB1, inB2;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer two samples at a time. */
|
||||
inA1 = *__SIMD32(pSrcA)++;
|
||||
inA2 = *__SIMD32(pSrcA)++;
|
||||
inB1 = *__SIMD32(pSrcB)++;
|
||||
inB2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
*__SIMD32(pDst)++ = __QSUB16(inA1, inB1);
|
||||
*__SIMD32(pDst)++ = __QSUB16(inA2, inB2);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ - *pSrcB++), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
||||
@@ -1,141 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q31.c
|
||||
*
|
||||
* Description: Q31 vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_sub_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4;
|
||||
q31_t inB1, inB2, inB3, inB4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
*pDst++ = __QSUB(inA1, inB1);
|
||||
*pDst++ = __QSUB(inA2, inB2);
|
||||
*pDst++ = __QSUB(inA3, inB3);
|
||||
*pDst++ = __QSUB(inA4, inB4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ - *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
||||
@@ -1,126 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q7.c
|
||||
*
|
||||
* Description: Q7 vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_sub_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer 4 samples at a time. */
|
||||
*__SIMD32(pDst)++ = __QSUB8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = __SSAT(*pSrcA++ - *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ - *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,174 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_conj_f32.c
|
||||
*
|
||||
* Description: Floating-point complex conjugate.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cmplx_conj Complex Conjugate
|
||||
*
|
||||
* Conjugates the elements of a complex data vector.
|
||||
*
|
||||
* The <code>pSrc</code> points to the source data and
|
||||
* <code>pDst</code> points to the where the result should be written.
|
||||
* <code>numSamples</code> specifies the number of complex samples
|
||||
* and the data in each array is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* Each array has a total of <code>2*numSamples</code> values.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* pDst[(2*n)+0)] = pSrc[(2*n)+0]; // real part
|
||||
* pDst[(2*n)+1)] = -pSrc[(2*n)+1]; // imag part
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_conj
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point complex conjugate.
|
||||
* @param *pSrc points to the input vector
|
||||
* @param *pDst points to the output vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cmplx_conj_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inR1, inR2, inR3, inR4;
|
||||
float32_t inI1, inI2, inI3, inI4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
/* read real input samples */
|
||||
inR1 = pSrc[0];
|
||||
/* store real samples to destination */
|
||||
pDst[0] = inR1;
|
||||
inR2 = pSrc[2];
|
||||
pDst[2] = inR2;
|
||||
inR3 = pSrc[4];
|
||||
pDst[4] = inR3;
|
||||
inR4 = pSrc[6];
|
||||
pDst[6] = inR4;
|
||||
|
||||
/* read imaginary input samples */
|
||||
inI1 = pSrc[1];
|
||||
inI2 = pSrc[3];
|
||||
|
||||
/* conjugate input */
|
||||
inI1 = -inI1;
|
||||
|
||||
/* read imaginary input samples */
|
||||
inI3 = pSrc[5];
|
||||
|
||||
/* conjugate input */
|
||||
inI2 = -inI2;
|
||||
|
||||
/* read imaginary input samples */
|
||||
inI4 = pSrc[7];
|
||||
|
||||
/* conjugate input */
|
||||
inI3 = -inI3;
|
||||
|
||||
/* store imaginary samples to destination */
|
||||
pDst[1] = inI1;
|
||||
pDst[3] = inI2;
|
||||
|
||||
/* conjugate input */
|
||||
inI4 = -inI4;
|
||||
|
||||
/* store imaginary samples to destination */
|
||||
pDst[5] = inI3;
|
||||
|
||||
/* increment source pointer by 8 to process next sampels */
|
||||
pSrc += 8u;
|
||||
|
||||
/* store imaginary sample to destination */
|
||||
pDst[7] = inI4;
|
||||
|
||||
/* increment destination pointer by 8 to store next samples */
|
||||
pDst += 8u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = numSamples;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* realOut + j (imagOut) = realIn + j (-1) imagIn */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_conj group
|
||||
*/
|
||||
@@ -1,153 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_conj_q15.c
|
||||
*
|
||||
* Description: Q15 complex conjugate.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_conj
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 complex conjugate.
|
||||
* @param *pSrc points to the input vector
|
||||
* @param *pDst points to the output vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
|
||||
void arm_cmplx_conj_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in1, in2, in3, in4;
|
||||
q31_t zero = 0;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
in1 = *__SIMD32(pSrc)++;
|
||||
in2 = *__SIMD32(pSrc)++;
|
||||
in3 = *__SIMD32(pSrc)++;
|
||||
in4 = *__SIMD32(pSrc)++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
in1 = __QASX(zero, in1);
|
||||
in2 = __QASX(zero, in2);
|
||||
in3 = __QASX(zero, in3);
|
||||
in4 = __QASX(zero, in4);
|
||||
|
||||
#else
|
||||
|
||||
in1 = __QSAX(zero, in1);
|
||||
in2 = __QSAX(zero, in2);
|
||||
in3 = __QSAX(zero, in3);
|
||||
in4 = __QSAX(zero, in4);
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16);
|
||||
in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16);
|
||||
in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16);
|
||||
in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16);
|
||||
|
||||
*__SIMD32(pDst)++ = in1;
|
||||
*__SIMD32(pDst)++ = in2;
|
||||
*__SIMD32(pDst)++ = in3;
|
||||
*__SIMD32(pDst)++ = in4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = __SSAT(-*pSrc++, 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
q15_t in;
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* realOut + j (imagOut) = realIn+ j (-1) imagIn */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_conj group
|
||||
*/
|
||||
@@ -1,172 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_conj_q31.c
|
||||
*
|
||||
* Description: Q31 complex conjugate.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_conj
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 complex conjugate.
|
||||
* @param *pSrc points to the input vector
|
||||
* @param *pDst points to the output vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
void arm_cmplx_conj_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in; /* Input value */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inR1, inR2, inR3, inR4; /* Temporary real variables */
|
||||
q31_t inI1, inI2, inI3, inI4; /* Temporary imaginary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
||||
/* read real input sample */
|
||||
inR1 = pSrc[0];
|
||||
/* store real input sample */
|
||||
pDst[0] = inR1;
|
||||
|
||||
/* read imaginary input sample */
|
||||
inI1 = pSrc[1];
|
||||
|
||||
/* read real input sample */
|
||||
inR2 = pSrc[2];
|
||||
/* store real input sample */
|
||||
pDst[2] = inR2;
|
||||
|
||||
/* read imaginary input sample */
|
||||
inI2 = pSrc[3];
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI1 = __QSUB(0, inI1);
|
||||
|
||||
/* read real input sample */
|
||||
inR3 = pSrc[4];
|
||||
/* store real input sample */
|
||||
pDst[4] = inR3;
|
||||
|
||||
/* read imaginary input sample */
|
||||
inI3 = pSrc[5];
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI2 = __QSUB(0, inI2);
|
||||
|
||||
/* read real input sample */
|
||||
inR4 = pSrc[6];
|
||||
/* store real input sample */
|
||||
pDst[6] = inR4;
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI3 = __QSUB(0, inI3);
|
||||
|
||||
/* store imaginary input sample */
|
||||
inI4 = pSrc[7];
|
||||
|
||||
/* store imaginary input samples */
|
||||
pDst[1] = inI1;
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI4 = __QSUB(0, inI4);
|
||||
|
||||
/* store imaginary input samples */
|
||||
pDst[3] = inI2;
|
||||
|
||||
/* increment source pointer by 8 to proecess next samples */
|
||||
pSrc += 8u;
|
||||
|
||||
/* store imaginary input samples */
|
||||
pDst[5] = inI3;
|
||||
pDst[7] = inI4;
|
||||
|
||||
/* increment destination pointer by 8 to process next samples */
|
||||
pDst += 8u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = numSamples;
|
||||
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
||||
*pDst++ = *pSrc++;
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == 0x80000000) ? 0x7fffffff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_conj group
|
||||
*/
|
||||
@@ -1,160 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_dot_prod_f32.c
|
||||
*
|
||||
* Description: Floating-point complex dot product
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cmplx_dot_prod Complex Dot Product
|
||||
*
|
||||
* Computes the dot product of two complex vectors.
|
||||
* The vectors are multiplied element-by-element and then summed.
|
||||
*
|
||||
* The <code>pSrcA</code> points to the first complex input vector and
|
||||
* <code>pSrcB</code> points to the second complex input vector.
|
||||
* <code>numSamples</code> specifies the number of complex samples
|
||||
* and the data in each array is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* Each array has a total of <code>2*numSamples</code> values.
|
||||
*
|
||||
* The underlying algorithm is used:
|
||||
* <pre>
|
||||
* realResult=0;
|
||||
* imagResult=0;
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];
|
||||
* imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point complex dot product
|
||||
* @param *pSrcA points to the first input vector
|
||||
* @param *pSrcB points to the second input vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @param *realResult real part of the result returned here
|
||||
* @param *imagResult imaginary part of the result returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cmplx_dot_prod_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
uint32_t numSamples,
|
||||
float32_t * realResult,
|
||||
float32_t * imagResult)
|
||||
{
|
||||
float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the real and imaginary results in the destination buffers */
|
||||
*realResult = real_sum;
|
||||
*imagResult = imag_sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_dot_prod group
|
||||
*/
|
||||
@@ -1,144 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_dot_prod_q15.c
|
||||
*
|
||||
* Description: Processing function for the Q15 Complex Dot product
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 complex dot product
|
||||
* @param *pSrcA points to the first input vector
|
||||
* @param *pSrcB points to the second input vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @param *realResult real part of the result returned here
|
||||
* @param *imagResult imaginary part of the result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
|
||||
* These are accumulated in a 64-bit accumulator with 34.30 precision.
|
||||
* As a final step, the accumulators are converted to 8.24 format.
|
||||
* The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
|
||||
*/
|
||||
|
||||
void arm_cmplx_dot_prod_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
uint32_t numSamples,
|
||||
q31_t * realResult,
|
||||
q31_t * imagResult)
|
||||
{
|
||||
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the real and imaginary results in 8.24 format */
|
||||
/* Convert real data in 34.30 to 8.24 by 6 right shifts */
|
||||
*realResult = (q31_t) (real_sum) >> 6;
|
||||
/* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
|
||||
*imagResult = (q31_t) (imag_sum) >> 6;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_dot_prod group
|
||||
*/
|
||||
@@ -1,145 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_dot_prod_q31.c
|
||||
*
|
||||
* Description: Q31 complex dot product
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 complex dot product
|
||||
* @param *pSrcA points to the first input vector
|
||||
* @param *pSrcB points to the second input vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @param *realResult real part of the result returned here
|
||||
* @param *imagResult imaginary part of the result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
|
||||
* The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
|
||||
* Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
|
||||
* The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
|
||||
* Input down scaling is not required.
|
||||
*/
|
||||
|
||||
void arm_cmplx_dot_prod_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
uint32_t numSamples,
|
||||
q63_t * realResult,
|
||||
q63_t * imagResult)
|
||||
{
|
||||
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
/* Convert real data in 2.62 to 16.48 by 14 right shifts */
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
/* Convert imag data in 2.62 to 16.48 by 14 right shifts */
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* outReal = realA[0]* realB[0] + realA[2]* realB[2] + realA[4]* realB[4] + .....+ realA[numSamples-2]* realB[numSamples-2] */
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
/* outImag = imagA[1]* imagB[1] + imagA[3]* imagB[3] + imagA[5]* imagB[5] + .....+ imagA[numSamples-1]* imagB[numSamples-1] */
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the real and imaginary results in 16.48 format */
|
||||
*realResult = real_sum;
|
||||
*imagResult = imag_sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_dot_prod group
|
||||
*/
|
||||
@@ -1,157 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mag_f32.c
|
||||
*
|
||||
* Description: Floating-point complex magnitude.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cmplx_mag Complex Magnitude
|
||||
*
|
||||
* Computes the magnitude of the elements of a complex data vector.
|
||||
*
|
||||
* The <code>pSrc</code> points to the source data and
|
||||
* <code>pDst</code> points to the where the result should be written.
|
||||
* <code>numSamples</code> specifies the number of complex samples
|
||||
* in the input array and the data is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* The input array has a total of <code>2*numSamples</code> values;
|
||||
* the output array has a total of <code>numSamples</code> values.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_mag
|
||||
* @{
|
||||
*/
|
||||
/**
|
||||
* @brief Floating-point complex magnitude.
|
||||
* @param[in] *pSrc points to complex input buffer
|
||||
* @param[out] *pDst points to real output buffer
|
||||
* @param[in] numSamples number of complex samples in the input vector
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_cmplx_mag_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
float32_t realIn, imagIn; /* Temporary variables to hold input values */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
|
||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
/* store the result in the destination buffer. */
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
/* store the result in the destination buffer. */
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* out = sqrt((real * real) + (imag * imag)) */
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
/* store the result in the destination buffer. */
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_mag group
|
||||
*/
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user