Commit 4cf2719b authored by Laurent THOMAS's avatar Laurent THOMAS Committed by Robert Schmidt

scale in DFT16 as per normal scaling sqrt(16) = 4

parent f1f02ec0
...@@ -1268,17 +1268,16 @@ __attribute__((always_inline)) static inline void dft16_simd256(int16_t *x, int1 ...@@ -1268,17 +1268,16 @@ __attribute__((always_inline)) static inline void dft16_simd256(int16_t *x, int1
x02t = simde_mm256_adds_epi16(xtmp0,xtmp2); x02t = simde_mm256_adds_epi16(xtmp0,xtmp2);
x13t = simde_mm256_adds_epi16(xtmp1,xtmp3); x13t = simde_mm256_adds_epi16(xtmp1,xtmp3);
ytmp0 = simde_mm256_adds_epi16(x02t,x13t); ytmp0 = simde_mm256_srai_epi16(simde_mm256_adds_epi16(x02t, x13t), 2);
ytmp2 = simde_mm256_subs_epi16(x02t,x13t); ytmp2 = simde_mm256_srai_epi16(simde_mm256_subs_epi16(x02t, x13t), 2);
x1_flip = simde_mm256_sign_epi16(xtmp1, *(simde__m256i *)conjugatedft); x1_flip = simde_mm256_sign_epi16(xtmp1, *(simde__m256i *)conjugatedft);
x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle); x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
x3_flip = simde_mm256_sign_epi16(xtmp3, *(simde__m256i *)conjugatedft); x3_flip = simde_mm256_sign_epi16(xtmp3, *(simde__m256i *)conjugatedft);
x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle); x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
x02t = simde_mm256_subs_epi16(xtmp0,xtmp2); x02t = simde_mm256_subs_epi16(xtmp0,xtmp2);
x13t = simde_mm256_subs_epi16(x1_flip,x3_flip); x13t = simde_mm256_subs_epi16(x1_flip,x3_flip);
ytmp1 = simde_mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f ytmp1 = simde_mm256_srai_epi16(simde_mm256_adds_epi16(x02t, x13t), 2); // x0 + x1f - x2 - x3f
ytmp3 = simde_mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f ytmp3 = simde_mm256_srai_epi16(simde_mm256_subs_epi16(x02t, x13t), 2); // x0 - x1f - x2 + x3f
// [y0 y1 y2 y3 y16 y17 y18 y19] // [y0 y1 y2 y3 y16 y17 y18 y19]
// [y4 y5 y6 y7 y20 y21 y22 y23] // [y4 y5 y6 y7 y20 y21 y22 y23]
...@@ -1652,14 +1651,14 @@ void dft64(int16_t *x,int16_t *y,unsigned char scale) ...@@ -1652,14 +1651,14 @@ void dft64(int16_t *x,int16_t *y,unsigned char scale)
if (scale>0) { if (scale>0) {
y256[0] = shiftright_int16_simd256(y256[0],3); y256[0] = shiftright_int16_simd256(y256[0], 1);
y256[1] = shiftright_int16_simd256(y256[1],3); y256[1] = shiftright_int16_simd256(y256[1], 1);
y256[2] = shiftright_int16_simd256(y256[2],3); y256[2] = shiftright_int16_simd256(y256[2], 1);
y256[3] = shiftright_int16_simd256(y256[3],3); y256[3] = shiftright_int16_simd256(y256[3], 1);
y256[4] = shiftright_int16_simd256(y256[4],3); y256[4] = shiftright_int16_simd256(y256[4], 1);
y256[5] = shiftright_int16_simd256(y256[5],3); y256[5] = shiftright_int16_simd256(y256[5], 1);
y256[6] = shiftright_int16_simd256(y256[6],3); y256[6] = shiftright_int16_simd256(y256[6], 1);
y256[7] = shiftright_int16_simd256(y256[7],3); y256[7] = shiftright_int16_simd256(y256[7], 1);
} }
simde_mm_empty(); simde_mm_empty();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment