|
楼主 |
发表于 2015-3-19 10:54:22
|
显示全部楼层
9.5 比例因子(Vector Scale)
这部分函数主要用于实现数据的比例放大和缩小,浮点数据公式描述如下:
pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
如果是Q31,Q15,Q7格式的数据,公式描述如下:
pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
这种情况下,比例因子就是:
scale = scaleFract * 2^shift.
注意,这部分函数支持目标指针和源指针指向相同的缓冲区。
9.5.1 arm_scale_f32
这个函数用于求32位浮点数的比例放缩,源代码分析如下:- /**
- * @brief Multiplies a floating-point vector by a scalar.
- * @param[in] *pSrc points to the input vector
- * @param[in] scale scale factor to be applied
- * @param[out] *pDst points to the output vector
- * @param[in] blockSize number of samples in the vector
- * @return none.
- */
-
-
- void arm_scale_f32(
- float32_t * pSrc,
- float32_t scale,
- float32_t * pDst,
- uint32_t blockSize)
- {
- uint32_t blkCnt; /* loop counter */
- #ifndef ARM_MATH_CM0_FAMILY
-
- /* Run the below code for Cortex-M4 and Cortex-M3 */
- float32_t in1, in2, in3, in4; /* temporary variabels */
-
- /*loop Unrolling */
- blkCnt = blockSize >> 2u;
-
- /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the results in the destination buffer. */
- /* read input samples from source */
- in1 = *pSrc;
- in2 = *(pSrc + 1);
-
- /* multiply with scaling factor */ (1)
- in1 = in1 * scale;
-
- /* read input sample from source */
- in3 = *(pSrc + 2);
-
- /* multiply with scaling factor */
- in2 = in2 * scale;
-
- /* read input sample from source */
- in4 = *(pSrc + 3);
-
- /* multiply with scaling factor */
- in3 = in3 * scale;
- in4 = in4 * scale;
- /* store the result to destination */
- *pDst = in1;
- *(pDst + 1) = in2;
- *(pDst + 2) = in3;
- *(pDst + 3) = in4;
-
- /* update pointers to process next samples */
- pSrc += 4u;
- pDst += 4u;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize % 0x4u;
-
- #else
-
- /* Run the below code for Cortex-M0 */
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
- #endif /* #ifndef ARM_MATH_CM0_FAMILY */
-
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the result in the destination buffer. */
- *pDst++ = (*pSrc++) * scale;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- }
复制代码 1. 浮点数据的比例因子计算比较简单,源浮点数相应相应的比例因子即可。
9.5.2 arm_scale_q31
这个函数用于求32位定点数的比例放缩,源代码分析如下:
- /**
- * @brief Multiplies a Q31 vector by a scalar.
- * @param[in] *pSrc points to the input vector
- * @param[in] scaleFract fractional portion of the scale value
- * @param[in] shift number of bits to shift the result by
- * @param[out] *pDst points to the output vector
- * @param[in] blockSize number of samples in the vector
- * @return none.
- *
- * <b>Scaling and Overflow Behavior:</b> (1)
- * par
- * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
- * These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
- */
-
- void arm_scale_q31(
- q31_t * pSrc,
- q31_t scaleFract,
- int8_t shift,
- q31_t * pDst,
- uint32_t blockSize)
- {
- int8_t kShift = shift + 1; /* Shift to apply after scaling */ (2)
- int8_t sign = (kShift & 0x80);
- uint32_t blkCnt; /* loop counter */
- q31_t in, out;
-
- #ifndef ARM_MATH_CM0_FAMILY
-
- /* Run the below code for Cortex-M4 and Cortex-M3 */
-
- q31_t in1, in2, in3, in4; /* temporary input variables */
- q31_t out1, out2, out3, out4; /* temporary output variabels */
-
-
- /*loop Unrolling */
- blkCnt = blockSize >> 2u;
-
- if(sign == 0u) (3)
- {
- /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while(blkCnt > 0u)
- {
- /* read four inputs from source */
- in1 = *pSrc;
- in2 = *(pSrc + 1);
- in3 = *(pSrc + 2);
- in4 = *(pSrc + 3);
-
- /* multiply input with scaler value */ (4)
- in1 = ((q63_t) in1 * scaleFract) >> 32;
- in2 = ((q63_t) in2 * scaleFract) >> 32;
- in3 = ((q63_t) in3 * scaleFract) >> 32;
- in4 = ((q63_t) in4 * scaleFract) >> 32;
-
- /* apply shifting */
- out1 = in1 << kShift;
- out2 = in2 << kShift;
-
- /* saturate the results. */
- if(in1 != (out1 >> kShift)) (5)
- out1 = 0x7FFFFFFF ^ (in1 >> 31);
-
- if(in2 != (out2 >> kShift))
- out2 = 0x7FFFFFFF ^ (in2 >> 31);
-
- out3 = in3 << kShift;
- out4 = in4 << kShift;
-
- *pDst = out1;
- *(pDst + 1) = out2;
-
- if(in3 != (out3 >> kShift))
- out3 = 0x7FFFFFFF ^ (in3 >> 31);
-
- if(in4 != (out4 >> kShift))
- out4 = 0x7FFFFFFF ^ (in4 >> 31);
-
- /* Store result destination */
- *(pDst + 2) = out3;
- *(pDst + 3) = out4;
-
- /* Update pointers to process next sampels */
- pSrc += 4u;
- pDst += 4u;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- }
- else {
- /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while(blkCnt > 0u)
- {
- /* read four inputs from source */
- in1 = *pSrc;
- in2 = *(pSrc + 1);
- in3 = *(pSrc + 2);
- in4 = *(pSrc + 3);
-
- /* multiply input with scaler value */
- in1 = ((q63_t) in1 * scaleFract) >> 32;
- in2 = ((q63_t) in2 * scaleFract) >> 32;
- in3 = ((q63_t) in3 * scaleFract) >> 32;
- in4 = ((q63_t) in4 * scaleFract) >> 32;
-
- /* apply shifting */ (6)
- out1 = in1 >> -kShift;
- out2 = in2 >> -kShift;
-
- out3 = in3 >> -kShift;
- out4 = in4 >> -kShift;
-
- /* Store result destination */
- *pDst = out1;
- *(pDst + 1) = out2;
-
- *(pDst + 2) = out3;
- *(pDst + 3) = out4;
-
- /* Update pointers to process next sampels */
- pSrc += 4u;
- pDst += 4u;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- }
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize % 0x4u;
-
- #else
-
- /* Run the below code for Cortex-M0 */
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
- #endif /* #ifndef ARM_MATH_CM0_FAMILY */
-
- if(sign == 0)
- {
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the result in the destination buffer. */
- in = *pSrc++;
- in = ((q63_t) in * scaleFract) >> 32;
-
- out = in << kShift;
- if(in != (out >> kShift))
- out = 0x7FFFFFFF ^ (in >> 31);
-
- *pDst++ = out;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- }
- else
- {
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the result in the destination buffer. */
- in = *pSrc++;
- in = ((q63_t) in * scaleFract) >> 32;
-
- out = in >> -kShift;
-
- *pDst++ = out;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- }
- }
复制代码 1. 源数据和比例因子都是Q31格式。这样他们的乘积就是1.31 * 1.31 = 2.62格式。由于输出结果也是Q31格式,那么源数据和比例因子的乘积需要右移32位,并且输出结果需要饱和处理。
2. 这里不清楚为什么要加1操作,留作以后解决。
3. 如果位移是正值,那么就是左移位,否则就是右移位。
4. 将源数据和比例因子的乘积左移32位,保证结果也是Q31格式。
5. 这里是对结果的饱和处理。
6. 数值的右移不存在饱和问题,这里直接取反即可。
9.5.3 arm_scale_q15
这个函数用于求16位定点数的比例放缩,源代码分析如下:
- /**
- * @brief Multiplies a Q15 vector by a scalar.
- * @param[in] *pSrc points to the input vector
- * @param[in] scaleFract fractional portion of the scale value
- * @param[in] shift number of bits to shift the result by
- * @param[out] *pDst points to the output vector
- * @param[in] blockSize number of samples in the vector
- * @return none.
- *
- * <b>Scaling and Overflow Behavior:</b> (1)
- * par
- * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
- * These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
- */
-
-
- void arm_scale_q15(
- q15_t * pSrc,
- q15_t scaleFract,
- int8_t shift,
- q15_t * pDst,
- uint32_t blockSize)
- {
- int8_t kShift = 15 - shift; /* shift to apply after scaling */ (2)
- uint32_t blkCnt; /* loop counter */
-
- #ifndef ARM_MATH_CM0_FAMILY
-
- /* Run the below code for Cortex-M4 and Cortex-M3 */
- q15_t in1, in2, in3, in4;
- q31_t inA1, inA2; /* Temporary variables */
- q31_t out1, out2, out3, out4;
-
-
- /*loop Unrolling */
- blkCnt = blockSize >> 2u;
-
- /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while(blkCnt > 0u)
- {
- /* Reading 2 inputs from memory */
- inA1 = *__SIMD32(pSrc)++; (3)
- inA2 = *__SIMD32(pSrc)++;
-
- /* C = A * scale */
- /* Scale the inputs and then store the 2 results in the destination buffer
- * in single cycle by packing the outputs */
- out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract); (4)
- out2 = (q31_t) ((q15_t) inA1 * scaleFract);
- out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
- out4 = (q31_t) ((q15_t) inA2 * scaleFract);
-
- /* apply shifting */
- out1 = out1 >> kShift;
- out2 = out2 >> kShift;
- out3 = out3 >> kShift;
- out4 = out4 >> kShift;
-
- /* saturate the output */
- in1 = (q15_t) (__SSAT(out1, 16)); (5)
- in2 = (q15_t) (__SSAT(out2, 16));
- in3 = (q15_t) (__SSAT(out3, 16));
- in4 = (q15_t) (__SSAT(out4, 16));
-
- /* store the result to destination */ (6)
- *__SIMD32(pDst)++ = __PKHBT(in2, in1, 16);
- *__SIMD32(pDst)++ = __PKHBT(in4, in3, 16);
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize % 0x4u;
-
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the result in the destination buffer. */
- *pDst++ = (q15_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 16));
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- #else
-
- /* Run the below code for Cortex-M0 */
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the result in the destination buffer. */
- *pDst++ = (q15_t) (__SSAT(((q31_t) * pSrc++ * scaleFract) >> kShift, 16));
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- #endif /* #ifndef ARM_MATH_CM0_FAMILY */
-
- }
复制代码 1. 源数据和比例因子的数据格式都是Q15,这样的话,输出结果就是1.15 * 1.15 = 2.30格式,由于输出结果也是Q15格式,所以输出结果需要饱和处理。
2. 这个变量设计很巧妙,这样下面处理正数左移和负数右移就很方面了,可以直接使用一个右移就可以实现。
3. 读取两个Q15格式的数据。
4. 将源数据乘以比例因子后赋值给Q31格式的变量。
5. 对输出结果做饱和处理。
6. 通过调用一次__PKHBT指令,将两个Q15格式的数据都赋值给目的变量。
9.5.4 arm_scale_q7
这个函数用于求8位定点数的比例放缩,源代码分析如下:
- /**
- * @brief Multiplies a Q7 vector by a scalar.
- * @param[in] *pSrc points to the input vector
- * @param[in] scaleFract fractional portion of the scale value
- * @param[in] shift number of bits to shift the result by
- * @param[out] *pDst points to the output vector
- * @param[in] blockSize number of samples in the vector
- * @return none.
- *
- * <b>Scaling and Overflow Behavior:</b> (1)
- * par
- * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
- * These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
- */
-
- void arm_scale_q7(
- q7_t * pSrc,
- q7_t scaleFract,
- int8_t shift,
- q7_t * pDst,
- uint32_t blockSize)
- {
- int8_t kShift = 7 - shift; /* shift to apply after scaling */ (2)
- uint32_t blkCnt; /* loop counter */
-
- #ifndef ARM_MATH_CM0_FAMILY
-
- /* Run the below code for Cortex-M4 and Cortex-M3 */
- q7_t in1, in2, in3, in4, out1, out2, out3, out4; /* Temporary variables to store input & output */
-
-
- /*loop Unrolling */
- blkCnt = blockSize >> 2u;
-
-
- /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while(blkCnt > 0u)
- {
- /* Reading 4 inputs from memory */
- in1 = *pSrc++;
- in2 = *pSrc++;
- in3 = *pSrc++;
- in4 = *pSrc++;
-
- /* C = A * scale */
- /* Scale the inputs and then store the results in the temporary variables. */
- out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8)); (3)
- out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
- out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
- out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
-
- /* Packing the individual outputs into 32bit and storing in
- * destination buffer in single write */
- *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4); (4)
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize % 0x4u;
-
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the result in the destination buffer. */
- *pDst++ = (q7_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 8));
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- #else
-
- /* Run the below code for Cortex-M0 */
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
- while(blkCnt > 0u)
- {
- /* C = A * scale */
- /* Scale the input and then store the result in the destination buffer. */
- *pDst++ = (q7_t) (__SSAT((((q15_t) * pSrc++ * scaleFract) >> kShift), 8));
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- #endif /* #ifndef ARM_MATH_CM0_FAMILY */
-
- }
复制代码 1. 源数据和比例因子的数据格式都是Q7,这样的话,输出结果就是1.7 * 1.7 = 2.14格式,由于输出结果也是Q7格式,所以输出结果需要饱和处理。
2. 这个变量设计很巧妙,这样下面处理正数左移和负数右移就很方面了,可以直接使用一个右移就可以实现。
3. 对源数据和比例因子的输出结果做8位精度的饱和处理。
9.5.5 实例讲解
实验目的:
1. 四种种类型数据的比例放缩。
实验内容:
1. 按下按键DOWN 串口打印输出结果
实验现象:
通过窗口上位机软件SecureCRT(V5光盘里面有此软件)查看打印信息现象如下:
程序设计:
- /*
- *********************************************************************************************************
- * 函 数 名: DSP_Scale
- * 功能说明: 比例因子
- * 形 参:无
- * 返 回 值: 无
- *********************************************************************************************************
- */
- static void DSP_Scale(void)
- {
- static float32_t pSrcA[5] = {1.0f,1.0f,1.0f,1.0f,1.0f};
- static float32_t scale = 0.0f;
- static float32_t pDst[5];
- static q31_t pSrcA1[5] = {0x6fffffff,1,1,1,1};
- static q31_t scale1 = 0x6fffffff;
- static q31_t pDst1[5];
-
- static q15_t pSrcA2[5] = {0x6fff,1,1,1,1};
- static q15_t scale2 = 0x6fff;
- static q15_t pDst2[5];
-
- static q7_t pSrcA3[5] = {0x70,1,1,1,1};
- static q7_t scale3 = 0x6f;
- static q7_t pDst3[5];
-
- scale += 0.1f;
- arm_scale_f32(pSrcA, scale, pDst, 5);
- printf("arm_sub_f32 = %frn", pDst[0]);
- scale1 += 1;
- arm_scale_q31(pSrcA1, scale1, 0, pDst1, 5);
- printf("arm_scale_q31 = %xrn", pDst1[0]);
-
- scale2 += 1;
- arm_scale_q15(pSrcA2, scale2, 0, pDst2, 5);
- printf("arm_scale_q15 = %xrn", pDst2[0]);
-
- scale3 += 1;
- arm_scale_q7(pSrcA3, scale3, 0, pDst3, 5);
- printf("arm_scale_q7 = %xrn", pDst3[0]);
- printf("***********************************rn");
- }
复制代码 |
|