|
老版本DSP库没有这些宏定义的,这个是新版本增加的。
ARM_MATH_BIG_ENDIAN:
大端格式。
ARM_MATH_MATRIX_CHECK:
检测矩阵的输入输出大小
ARM_MATH_NEON:
ARM_MATH_NEON_EXPERIMENTAL:
这两个暂时用不到,因为M0,M3,M4和M7内核不支持NEON指令,需要等待升级到ARMv8.1-M架构
ARM_MATH_ROUNDING:
主要用在浮点数转Q32,Q15和Q7时,类似四舍五入的处理上,其它函数没用到。
ARM_MATH_LOOPUNROLL:
用于4个为一组的的小批量处理上,加快执行速度。
通过下面的求绝对值函数,可以方便的看出区别:
- void arm_abs_f32(
- const float32_t * pSrc,
- float32_t * pDst,
- uint32_t blockSize)
- {
- uint32_t blkCnt; /* Loop counter */
- #if defined(ARM_MATH_NEON)
- float32x4_t vec1;
- float32x4_t res;
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- /* C = |A| */
- /* Calculate absolute values and then store the results in the destination buffer. */
- vec1 = vld1q_f32(pSrc);
- res = vabsq_f32(vec1);
- vst1q_f32(pDst, res);
- /* Increment pointers */
- pSrc += 4;
- pDst += 4;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- /* Tail */
- blkCnt = blockSize & 0x3;
- #else
- #if defined (ARM_MATH_LOOPUNROLL)
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- /* C = |A| */
- /* Calculate absolute and store result in destination buffer. */
- *pDst++ = fabsf(*pSrc++);
- *pDst++ = fabsf(*pSrc++);
- *pDst++ = fabsf(*pSrc++);
- *pDst++ = fabsf(*pSrc++);
- /* Decrement loop counter */
- blkCnt--;
- }
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
- #else
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
- #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
- #endif /* #if defined(ARM_MATH_NEON) */
- while (blkCnt > 0U)
- {
- /* C = |A| */
- /* Calculate absolute and store result in destination buffer. */
- *pDst++ = fabsf(*pSrc++);
- /* Decrement loop counter */
- blkCnt--;
- }
- }
复制代码
|
|