Commit 3efdb277 authored by dir's avatar dir Committed by Thomas Schlichter

NR SCFDMA changes for higher RB sizes (RB Sizes until 100Mhz)

parent 403db5f6
...@@ -325,10 +325,10 @@ void nr_ue_layer_mapping(NR_UE_ULSCH_t **ulsch_ue, ...@@ -325,10 +325,10 @@ void nr_ue_layer_mapping(NR_UE_ULSCH_t **ulsch_ue,
void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH) void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH)
{ {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || +defined(__i386__)
__m128i dft_in128[1][1200], dft_out128[1][1200]; __m128i dft_in128[1][3240], dft_out128[1][3240];
#elif defined(__arm__) #elif defined(__arm__)
int16x8_t dft_in128[1][1200], dft_out128[1][1200]; int16x8_t dft_in128[1][3240], dft_out128[1][3240];
#endif #endif
uint32_t *dft_in0 = (uint32_t*)dft_in128[0], *dft_out0 = (uint32_t*)dft_out128[0]; uint32_t *dft_in0 = (uint32_t*)dft_in128[0], *dft_out0 = (uint32_t*)dft_out128[0];
...@@ -340,8 +340,10 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH) ...@@ -340,8 +340,10 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH)
int16x8_t norm128; int16x8_t norm128;
#endif #endif
for (i = 0, ip = 0; i < Msc_PUSCH; i++, ip+=4) { if ((Msc_PUSCH % 1536) > 0) {
dft_in0[ip] = d[i]; for (i = 0, ip = 0; i < Msc_PUSCH; i++, ip+=4) {
dft_in0[ip] = d[i];
}
} }
switch (Msc_PUSCH) { switch (Msc_PUSCH) {
...@@ -480,7 +482,7 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH) ...@@ -480,7 +482,7 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH)
break; break;
case 972: case 972:
dft(DFT_960,(int16_t*)dft_in0, (int16_t*)dft_out0, 1); dft(DFT_972,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break; break;
case 1080: case 1080:
...@@ -494,11 +496,98 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH) ...@@ -494,11 +496,98 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH)
case 1200: case 1200:
dft(DFT_1200,(int16_t*)dft_in0, (int16_t*)dft_out0, 1); dft(DFT_1200,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break; break;
case 1296:
dft(DFT_1296,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 1440:
dft(DFT_1440,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 1500:
dft(DFT_1500,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 1536:
//dft(DFT_1536,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
dft(DFT_1536,(int16_t*)d, (int16_t*)z, 1);
break;
case 1620:
dft(DFT_1620,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 1728:
dft(DFT_1728,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 1800:
dft(DFT_1800,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 1920:
dft(DFT_1920,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 1944:
dft(DFT_1944,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 2160:
dft(DFT_2160,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 2304:
dft(DFT_2304,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 2400:
dft(DFT_2400,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 2592:
dft(DFT_2592,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 2700:
dft(DFT_2700,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 2880:
dft(DFT_2880,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 2916:
dft(DFT_2916,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 3000:
dft(DFT_3000,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
case 3072:
//dft(DFT_3072,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
dft(DFT_3072,(int16_t*)d, (int16_t*)z, 1);
break;
case 3240:
dft(DFT_3240,(int16_t*)dft_in0, (int16_t*)dft_out0, 1);
break;
default:
// should not be reached
LOG_E( PHY, "Unsupported Msc_PUSCH value of %"PRIu16"\n", Msc_PUSCH );
return;
} }
for (i = 0, ip = 0; i < Msc_PUSCH; i++, ip+=4) {
z[i] = dft_out0[ip]; if ((Msc_PUSCH % 1536) > 0) {
for (i = 0, ip = 0; i < Msc_PUSCH; i++, ip+=4)
z[i] = dft_out0[ip];
} }
} }
......
...@@ -16,10 +16,10 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH) ...@@ -16,10 +16,10 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
{ {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i idft_in128[1][1200], idft_out128[1][1200]; __m128i idft_in128[1][3240], idft_out128[1][3240];
__m128i norm128; __m128i norm128;
#elif defined(__arm__) #elif defined(__arm__)
int16x8_t idft_in128[1][1200], idft_out128[1][1200]; int16x8_t idft_in128[1][3240], idft_out128[1][3240];
int16x8_t norm128; int16x8_t norm128;
#endif #endif
int16_t *idft_in0 = (int16_t*)idft_in128[0], *idft_out0 = (int16_t*)idft_out128[0]; int16_t *idft_in0 = (int16_t*)idft_in128[0], *idft_out0 = (int16_t*)idft_out128[0];
...@@ -28,20 +28,19 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH) ...@@ -28,20 +28,19 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
LOG_T(PHY,"Doing lte_idft for Msc_PUSCH %d\n",Msc_PUSCH); LOG_T(PHY,"Doing lte_idft for Msc_PUSCH %d\n",Msc_PUSCH);
// conjugate input if ((Msc_PUSCH % 1536) > 0) {
for (i = 0; i < (Msc_PUSCH>>2); i++) { // conjugate input
for (i = 0; i < (Msc_PUSCH>>2); i++) {
#if defined(__x86_64__)||defined(__i386__) #if defined(__x86_64__)||defined(__i386__)
*&(((__m128i*)z)[i]) = _mm_sign_epi16(*&(((__m128i*)z)[i]), *(__m128i*)&conjugate2[0]); *&(((__m128i*)z)[i]) = _mm_sign_epi16(*&(((__m128i*)z)[i]), *(__m128i*)&conjugate2[0]);
#elif defined(__arm__) #elif defined(__arm__)
*&(((int16x8_t*)z)[i]) = vmulq_s16(*&(((int16x8_t*)z)[i]), *(int16x8_t*)&conjugate2[0]); *&(((int16x8_t*)z)[i]) = vmulq_s16(*&(((int16x8_t*)z)[i]), *(int16x8_t*)&conjugate2[0]);
#endif #endif
}
for (i = 0, ip = 0; i < Msc_PUSCH; i++, ip+=4)
((uint32_t*)idft_in0)[ip+0] = z[i];
} }
for (i=0,ip=0; i<Msc_PUSCH; i++, ip+=4) {
((int32_t*)idft_in0)[ip+0] = z[i];
}
switch (Msc_PUSCH) { switch (Msc_PUSCH) {
case 12: case 12:
dft(DFT_12,(int16_t *)idft_in0, (int16_t *)idft_out0,0); dft(DFT_12,(int16_t *)idft_in0, (int16_t *)idft_out0,0);
...@@ -194,25 +193,102 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH) ...@@ -194,25 +193,102 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
dft(DFT_1200,idft_in0, idft_out0, 1); dft(DFT_1200,idft_in0, idft_out0, 1);
break; break;
case 1296:
dft(DFT_1296,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 1440:
dft(DFT_1440,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 1500:
dft(DFT_1500,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 1536:
//dft(DFT_1536,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
idft(IDFT_1536,(int16_t*)z, (int16_t*)z, 1);
break;
case 1620:
dft(DFT_1620,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 1728:
dft(DFT_1728,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 1800:
dft(DFT_1800,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 1920:
dft(DFT_1920,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 1944:
dft(DFT_1944,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 2160:
dft(DFT_2160,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 2304:
dft(DFT_2304,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 2400:
dft(DFT_2400,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 2592:
dft(DFT_2592,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 2700:
dft(DFT_2700,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 2880:
dft(DFT_2880,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 2916:
dft(DFT_2916,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 3000:
dft(DFT_3000,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
case 3072:
//dft(DFT_3072,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
idft(IDFT_3072,(int16_t*)z, (int16_t*)z, 1);
break;
case 3240:
dft(DFT_3240,(int16_t*)idft_in0, (int16_t*)idft_out0, 1);
break;
default: default:
// should not be reached // should not be reached
LOG_E( PHY, "Unsupported Msc_PUSCH value of %"PRIu16"\n", Msc_PUSCH ); LOG_E( PHY, "Unsupported Msc_PUSCH value of %"PRIu16"\n", Msc_PUSCH );
return; return;
} }
if ((Msc_PUSCH % 1536) > 0) {
for (i = 0, ip = 0; i < Msc_PUSCH; i++, ip+=4)
z[i] = ((uint32_t*)idft_out0)[ip];
// conjugate output
for (i = 0, ip = 0; i < Msc_PUSCH; i++, ip+=4) { for (i = 0; i < (Msc_PUSCH>>2); i++) {
z[i] = ((int32_t*)idft_out0)[ip];
}
// conjugate output
for (i = 0; i < (Msc_PUSCH>>2); i++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
((__m128i*)z)[i] = _mm_sign_epi16(((__m128i*)z)[i], *(__m128i*)&conjugate2[0]); ((__m128i*)z)[i] = _mm_sign_epi16(((__m128i*)z)[i], *(__m128i*)&conjugate2[0]);
#elif defined(__arm__) #elif defined(__arm__)
*&(((int16x8_t*)z)[i]) = vmulq_s16(*&(((int16x8_t*)z)[i]), *(int16x8_t*)&conjugate2[0]); *&(((int16x8_t*)z)[i]) = vmulq_s16(*&(((int16x8_t*)z)[i]), *(int16x8_t*)&conjugate2[0]);
#endif #endif
}
} }
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
...@@ -222,6 +298,7 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH) ...@@ -222,6 +298,7 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
} }
void nr_ulsch_extract_rbs_single(int32_t **rxdataF, void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
NR_gNB_PUSCH *pusch_vars, NR_gNB_PUSCH *pusch_vars,
unsigned char symbol, unsigned char symbol,
......
...@@ -5952,8 +5952,6 @@ void idft32768(int16_t *x,int16_t *y,unsigned char scale) ...@@ -5952,8 +5952,6 @@ void idft32768(int16_t *x,int16_t *y,unsigned char scale)
#endif #endif
int16_t twa1536[1024],twb1536[1024]; int16_t twa1536[1024],twb1536[1024];
// 512 x 3 // 512 x 3
...@@ -9471,145 +9469,1058 @@ void dft1200(int16_t *x,int16_t *y,unsigned char scale_flag) ...@@ -9471,145 +9469,1058 @@ void dft1200(int16_t *x,int16_t *y,unsigned char scale_flag)
} }
void init_rad4(int N,int16_t *tw) {
int16_t *twa = tw; static int16_t twa1296[431*2*4];
int16_t *twb = twa+(N/2); static int16_t twb1296[431*2*4];
int16_t *twc = twb+(N/2);
int i;
for (i=0;i<(N/4);i++) { void dft1296(int16_t *x,int16_t *y,unsigned char scale_flag) //432 * 3
*twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++; {
*twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++;
*twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++;
*twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++;
*twc = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); twc++;
*twc = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N)); twc++;
}
}
void init_rad4_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc) {
int i,j; int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa1296[0];
simd_q15_t *twb128=(simd_q15_t *)&twb1296[0];
simd_q15_t x2128[1296];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1296];//=&ytmp128array3[0];
for (i=1;i<(N/4);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); for (i=0,j=0; i<432; i++,j+=3) {
twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); x2128[i] = x128[j];
twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); x2128[i+432] = x128[j+1];
twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); x2128[i+864] = x128[j+2];
twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N));
for (j=1;j<4;j++) {
((int32_t*)twa)[j]=((int32_t*)twa)[0];
((int32_t*)twb)[j]=((int32_t*)twb)[0];
((int32_t*)twc)[j]=((int32_t*)twc)[0];
}
twa+=8;
twb+=8;
twc+=8;
} }
}
void init_rad2(int N,int16_t *tw) { dft432((int16_t *)x2128,(int16_t *)ytmp128,1);
dft432((int16_t *)(x2128+432),(int16_t *)(ytmp128+432),1);
dft432((int16_t *)(x2128+864),(int16_t *)(ytmp128+864),1);
int16_t *twa = tw; bfly3_tw1(ytmp128,ytmp128+432,ytmp128+864,y128,y128+432,y128+864);
int i;
for (i=0;i<(N>>1);i++) { for (i=1,j=0; i<432; i++,j++) {
*twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++; bfly3(ytmp128+i,
*twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++; ytmp128+432+i,
ytmp128+864+i,
y128+i,
y128+432+i,
y128+864+i,
twa128+j,
twb128+j);
} }
}
void init_rad2_rep(int N,int16_t *twa) { if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<1296; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa1440[479*2*4];
static int16_t twb1440[479*2*4];
void dft1440(int16_t *x,int16_t *y,unsigned char scale_flag) // 480 x 3
{
int i,j; int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa1440[0];
simd_q15_t *twb128=(simd_q15_t *)&twb1440[0];
simd_q15_t x2128[1440];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1440];//=&ytmp128array3[0];
for (i=1;i<(N/2);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); for (i=0,j=0; i<480; i++,j+=3) {
for (j=1;j<4;j++) { x2128[i] = x128[j];
((int32_t*)twa)[j]=((int32_t*)twa)[0]; x2128[i+480] = x128[j+1];
x2128[i+960] = x128[j+2];
}
dft480((int16_t *)x2128,(int16_t *)ytmp128,1);
dft480((int16_t *)(x2128+480),(int16_t *)(ytmp128+480),1);
dft480((int16_t *)(x2128+960),(int16_t *)(ytmp128+960),1);
bfly3_tw1(ytmp128,ytmp128+480,ytmp128+960,y128,y128+480,y128+960);
for (i=1,j=0; i<480; i++,j++) {
bfly3(ytmp128+i,
ytmp128+480+i,
ytmp128+960+i,
y128+i,
y128+480+i,
y128+960+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<1440; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
} }
twa+=8;
} }
}
void init_rad3(int N,int16_t *twa,int16_t *twb) { _mm_empty();
_m_empty();
int i; };
for (i=0;i<(N/3);i++) { static int16_t twa1500[2392]__attribute__((aligned(32)));
*twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++; static int16_t twb1500[2392]__attribute__((aligned(32)));
*twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++; static int16_t twc1500[2392]__attribute__((aligned(32)));
*twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++; static int16_t twd1500[2392]__attribute__((aligned(32)));
*twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++;
void dft1500(int16_t *x,int16_t *y,unsigned char scale_flag)
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa1500[0];
simd_q15_t *twb128=(simd_q15_t *)&twb1500[0];
simd_q15_t *twc128=(simd_q15_t *)&twc1500[0];
simd_q15_t *twd128=(simd_q15_t *)&twd1500[0];
simd_q15_t x2128[1500];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1500];//=&ytmp128array2[0];
for (i=0,j=0; i<300; i++,j+=5) {
x2128[i] = x128[j];
x2128[i+300] = x128[j+1];
x2128[i+600] = x128[j+2];
x2128[i+900] = x128[j+3];
x2128[i+1200] = x128[j+4];
}
dft300((int16_t *)x2128,(int16_t *)ytmp128,1);
dft300((int16_t *)(x2128+300),(int16_t *)(ytmp128+300),1);
dft300((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1);
dft300((int16_t *)(x2128+900),(int16_t *)(ytmp128+900),1);
dft300((int16_t *)(x2128+1200),(int16_t *)(ytmp128+1200),1);
bfly5_tw1(ytmp128,ytmp128+300,ytmp128+600,ytmp128+900,ytmp128+1200,y128,y128+300,y128+600,y128+900,y128+1200);
for (i=1,j=0; i<300; i++,j++) {
bfly5(ytmp128+i,
ytmp128+300+i,
ytmp128+600+i,
ytmp128+900+i,
ytmp128+1200+i,
y128+i,
y128+300+i,
y128+600+i,
y128+900+i,
y128+1200+i,
twa128+j,
twb128+j,
twc128+j,
twd128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[15]);
for (i=0; i<1500; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
} }
_mm_empty();
_m_empty();
} }
void init_rad3_rep(int N,int16_t *twa,int16_t *twb) { static int16_t twa1620[539*2*4];
static int16_t twb1620[539*2*4];
void dft1620(int16_t *x,int16_t *y,unsigned char scale_flag) // 540 x 3
{
int i,j; int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa1620[0];
simd_q15_t *twb128=(simd_q15_t *)&twb1620[0];
simd_q15_t x2128[1620];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1620];//=&ytmp128array3[0];
for (i=1;i<(N/3);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); for (i=0,j=0; i<540; i++,j+=3) {
twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); x2128[i] = x128[j];
twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); x2128[i+540] = x128[j+1];
for (j=1;j<4;j++) { x2128[i+1080] = x128[j+2];
((int32_t*)twa)[j]=((int32_t*)twa)[0]; }
((int32_t*)twb)[j]=((int32_t*)twb)[0];
dft540((int16_t *)x2128,(int16_t *)ytmp128,1);
dft540((int16_t *)(x2128+540),(int16_t *)(ytmp128+540),1);
dft540((int16_t *)(x2128+1080),(int16_t *)(ytmp128+1080),1);
bfly3_tw1(ytmp128,ytmp128+540,ytmp128+1080,y128,y128+540,y128+1080);
for (i=1,j=0; i<540; i++,j++) {
bfly3(ytmp128+i,
ytmp128+540+i,
ytmp128+1080+i,
y128+i,
y128+540+i,
y128+1080+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<1620; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
} }
twa+=8;
twb+=8;
} }
}
void init_rad5_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc,int16_t *twd) { _mm_empty();
_m_empty();
};
static int16_t twa1728[575*2*4];
static int16_t twb1728[575*2*4];
void dft1728(int16_t *x,int16_t *y,unsigned char scale_flag) // 576 x 3
{
int i,j; int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa1728[0];
simd_q15_t *twb128=(simd_q15_t *)&twb1728[0];
simd_q15_t x2128[1728];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1728];//=&ytmp128array3[0];
for (i=1;i<(N/5);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); for (i=0,j=0; i<576; i++,j+=3) {
twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); x2128[i] = x128[j];
twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); x2128[i+576] = x128[j+1];
twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); x2128[i+1152] = x128[j+2];
twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N)); }
twd[0] = (int16_t)round(32767.0*cos(2*M_PI*4*i/N));
twd[1] = -(int16_t)round(32767.0*sin(2*M_PI*4*i/N)); dft576((int16_t *)x2128,(int16_t *)ytmp128,1);
for (j=1;j<4;j++) { dft576((int16_t *)(x2128+576),(int16_t *)(ytmp128+576),1);
((int32_t*)twa)[j]=((int32_t*)twa)[0]; dft576((int16_t *)(x2128+1152),(int16_t *)(ytmp128+1152),1);
((int32_t*)twb)[j]=((int32_t*)twb)[0];
((int32_t*)twc)[j]=((int32_t*)twc)[0]; bfly3_tw1(ytmp128,ytmp128+576,ytmp128+1152,y128,y128+576,y128+1152);
((int32_t*)twd)[j]=((int32_t*)twd)[0];
for (i=1,j=0; i<576; i++,j++) {
bfly3(ytmp128+i,
ytmp128+576+i,
ytmp128+1152+i,
y128+i,
y128+576+i,
y128+1152+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<1728; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
} }
twa+=8;
twb+=8;
twc+=8;
twd+=8;
} }
}
/*----------------------------------------------------------------*/
/* dft library entry points: */
int dfts_autoinit(void) _mm_empty();
_m_empty();
};
static int16_t twa1800[599*2*4];
static int16_t twb1800[599*2*4];
void dft1800(int16_t *x,int16_t *y,unsigned char scale_flag) // 600 x 3
{ {
init_rad4(1024,tw1024); int i,j;
init_rad2(2048,tw2048); simd_q15_t *x128=(simd_q15_t *)x;
init_rad4(4096,tw4096); simd_q15_t *y128=(simd_q15_t *)y;
init_rad2(8192,tw8192); simd_q15_t *twa128=(simd_q15_t *)&twa1800[0];
init_rad4(16384,tw16384); simd_q15_t *twb128=(simd_q15_t *)&twb1800[0];
init_rad2(32768,tw32768); simd_q15_t x2128[1800];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1800];//=&ytmp128array3[0];
init_rad3(1536,twa1536,twb1536);
init_rad3(3072,twa3072,twb3072);
init_rad3(6144,twa6144,twb6144);
init_rad3(12288,twa12288,twb12288); for (i=0,j=0; i<600; i++,j+=3) {
init_rad3(18432,twa18432,twb18432); x2128[i] = x128[j];
init_rad3(24576,twa24576,twb24576); x2128[i+600] = x128[j+1];
init_rad3(36864,twa36864,twb36864); x2128[i+1200] = x128[j+2];
init_rad3(49152,twa49152,twb49152); }
init_rad3(98304,twa98304,twb98304);
dft600((int16_t *)x2128,(int16_t *)ytmp128,1);
dft600((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1);
dft600((int16_t *)(x2128+1200),(int16_t *)(ytmp128+1200),1);
bfly3_tw1(ytmp128,ytmp128+600,ytmp128+1200,y128,y128+600,y128+1200);
for (i=1,j=0; i<600; i++,j++) {
bfly3(ytmp128+i,
ytmp128+600+i,
ytmp128+1200+i,
y128+i,
y128+600+i,
y128+1200+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<1800; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa1920[479*2*4];
static int16_t twb1920[479*2*4];
static int16_t twc1920[479*2*4];
void dft1920(int16_t *x,int16_t *y,unsigned char scale_flag) // 480 x 4
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa1920[0];
simd_q15_t *twb128=(simd_q15_t *)&twb1920[0];
simd_q15_t *twc128=(simd_q15_t *)&twc1920[0];
simd_q15_t x2128[1920];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1920];//=&ytmp128array2[0];
for (i=0,j=0; i<480; i++,j+=4) {
x2128[i] = x128[j];
x2128[i+480] = x128[j+1];
x2128[i+960] = x128[j+2];
x2128[i+1440] = x128[j+3];
}
dft480((int16_t *)x2128,(int16_t *)ytmp128,1);
dft480((int16_t *)(x2128+480),(int16_t *)(ytmp128+480),1);
dft480((int16_t *)(x2128+960),(int16_t *)(ytmp128+960),1);
dft480((int16_t *)(x2128+1440),(int16_t *)(ytmp128+1440),1);
bfly4_tw1(ytmp128,ytmp128+480,ytmp128+960,ytmp128+1440,y128,y128+480,y128+960,y128+1440);
for (i=1,j=0; i<480; i++,j++) {
bfly4(ytmp128+i,
ytmp128+480+i,
ytmp128+960+i,
ytmp128+1440+i,
y128+i,
y128+480+i,
y128+960+i,
y128+1440+i,
twa128+j,
twb128+j,
twc128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[13]);
for (i=0; i<1920; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa1944[647*2*4];
static int16_t twb1944[647*2*4];
void dft1944(int16_t *x,int16_t *y,unsigned char scale_flag) // 648 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa1944[0];
simd_q15_t *twb128=(simd_q15_t *)&twb1944[0];
simd_q15_t x2128[1944];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[1944];//=&ytmp128array3[0];
for (i=0,j=0; i<648; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+648] = x128[j+1];
x2128[i+1296] = x128[j+2];
}
dft648((int16_t *)x2128,(int16_t *)ytmp128,1);
dft648((int16_t *)(x2128+648),(int16_t *)(ytmp128+648),1);
dft648((int16_t *)(x2128+1296),(int16_t *)(ytmp128+1296),1);
bfly3_tw1(ytmp128,ytmp128+648,ytmp128+1296,y128,y128+648,y128+1296);
for (i=1,j=0; i<648; i++,j++) {
bfly3(ytmp128+i,
ytmp128+648+i,
ytmp128+1296+i,
y128+i,
y128+648+i,
y128+1296+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<1944; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa2160[719*2*4];
static int16_t twb2160[719*2*4];
void dft2160(int16_t *x,int16_t *y,unsigned char scale_flag) // 720 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa2160[0];
simd_q15_t *twb128=(simd_q15_t *)&twb2160[0];
simd_q15_t x2128[2160];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[2160];//=&ytmp128array3[0];
for (i=0,j=0; i<720; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+720] = x128[j+1];
x2128[i+1440] = x128[j+2];
}
dft720((int16_t *)x2128,(int16_t *)ytmp128,1);
dft720((int16_t *)(x2128+720),(int16_t *)(ytmp128+720),1);
dft720((int16_t *)(x2128+1440),(int16_t *)(ytmp128+1440),1);
bfly3_tw1(ytmp128,ytmp128+720,ytmp128+1440,y128,y128+720,y128+1440);
for (i=1,j=0; i<720; i++,j++) {
bfly3(ytmp128+i,
ytmp128+720+i,
ytmp128+1440+i,
y128+i,
y128+720+i,
y128+1440+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<2160; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa2304[767*2*4];
static int16_t twb2304[767*2*4];
void dft2304(int16_t *x,int16_t *y,unsigned char scale_flag) // 768 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa2304[0];
simd_q15_t *twb128=(simd_q15_t *)&twb2304[0];
simd_q15_t x2128[2304];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[2304];//=&ytmp128array3[0];
for (i=0,j=0; i<768; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+768] = x128[j+1];
x2128[i+1536] = x128[j+2];
}
dft768((int16_t *)x2128,(int16_t *)ytmp128,1);
dft768((int16_t *)(x2128+768),(int16_t *)(ytmp128+768),1);
dft768((int16_t *)(x2128+1536),(int16_t *)(ytmp128+1536),1);
bfly3_tw1(ytmp128,ytmp128+768,ytmp128+1536,y128,y128+768,y128+1536);
for (i=1,j=0; i<768; i++,j++) {
bfly3(ytmp128+i,
ytmp128+768+i,
ytmp128+1536+i,
y128+i,
y128+768+i,
y128+1536+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<2304; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa2400[599*2*4];
static int16_t twb2400[599*2*4];
static int16_t twc2400[599*2*4];
void dft2400(int16_t *x,int16_t *y,unsigned char scale_flag) // 600 x 4
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa2400[0];
simd_q15_t *twb128=(simd_q15_t *)&twb2400[0];
simd_q15_t *twc128=(simd_q15_t *)&twc2400[0];
simd_q15_t x2128[2400];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[2400];//=&ytmp128array2[0];
for (i=0,j=0; i<600; i++,j+=4) {
x2128[i] = x128[j];
x2128[i+600] = x128[j+1];
x2128[i+1200] = x128[j+2];
x2128[i+1800] = x128[j+3];
}
dft600((int16_t *)x2128,(int16_t *)ytmp128,1);
dft600((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1);
dft600((int16_t *)(x2128+1200),(int16_t *)(ytmp128+1200),1);
dft600((int16_t *)(x2128+1800),(int16_t *)(ytmp128+1800),1);
bfly4_tw1(ytmp128,ytmp128+600,ytmp128+1200,ytmp128+1800,y128,y128+600,y128+1200,y128+1800);
for (i=1,j=0; i<600; i++,j++) {
bfly4(ytmp128+i,
ytmp128+600+i,
ytmp128+1200+i,
ytmp128+1800+i,
y128+i,
y128+600+i,
y128+1200+i,
y128+1800+i,
twa128+j,
twb128+j,
twc128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[13]);
for (i=0; i<2400; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa2592[863*2*4];
static int16_t twb2592[863*2*4];
void dft2592(int16_t *x,int16_t *y,unsigned char scale_flag) // 864 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa2592[0];
simd_q15_t *twb128=(simd_q15_t *)&twb2592[0];
simd_q15_t x2128[2592];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[2592];//=&ytmp128array3[0];
for (i=0,j=0; i<864; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+864] = x128[j+1];
x2128[i+1728] = x128[j+2];
}
dft864((int16_t *)x2128,(int16_t *)ytmp128,1);
dft864((int16_t *)(x2128+864),(int16_t *)(ytmp128+864),1);
dft864((int16_t *)(x2128+1728),(int16_t *)(ytmp128+1728),1);
bfly3_tw1(ytmp128,ytmp128+864,ytmp128+1728,y128,y128+864,y128+1728);
for (i=1,j=0; i<864; i++,j++) {
bfly3(ytmp128+i,
ytmp128+864+i,
ytmp128+1728+i,
y128+i,
y128+864+i,
y128+1728+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<2592; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa2700[899*2*4];
static int16_t twb2700[899*2*4];
void dft2700(int16_t *x,int16_t *y,unsigned char scale_flag) // 900 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa2700[0];
simd_q15_t *twb128=(simd_q15_t *)&twb2700[0];
simd_q15_t x2128[2700];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[2700];//=&ytmp128array3[0];
for (i=0,j=0; i<900; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+900] = x128[j+1];
x2128[i+1800] = x128[j+2];
}
dft900((int16_t *)x2128,(int16_t *)ytmp128,1);
dft900((int16_t *)(x2128+900),(int16_t *)(ytmp128+900),1);
dft900((int16_t *)(x2128+1800),(int16_t *)(ytmp128+1800),1);
bfly3_tw1(ytmp128,ytmp128+900,ytmp128+1800,y128,y128+900,y128+1800);
for (i=1,j=0; i<900; i++,j++) {
bfly3(ytmp128+i,
ytmp128+900+i,
ytmp128+1800+i,
y128+i,
y128+900+i,
y128+1800+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<2700; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa2880[959*2*4];
static int16_t twb2880[959*2*4];
void dft2880(int16_t *x,int16_t *y,unsigned char scale_flag) // 960 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa2880[0];
simd_q15_t *twb128=(simd_q15_t *)&twb2880[0];
simd_q15_t x2128[2880];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[2880];//=&ytmp128array3[0];
for (i=0,j=0; i<960; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+960] = x128[j+1];
x2128[i+1920] = x128[j+2];
}
dft960((int16_t *)x2128,(int16_t *)ytmp128,1);
dft960((int16_t *)(x2128+960),(int16_t *)(ytmp128+960),1);
dft960((int16_t *)(x2128+1920),(int16_t *)(ytmp128+1920),1);
bfly3_tw1(ytmp128,ytmp128+960,ytmp128+1920,y128,y128+960,y128+1920);
for (i=1,j=0; i<960; i++,j++) {
bfly3(ytmp128+i,
ytmp128+960+i,
ytmp128+1920+i,
y128+i,
y128+960+i,
y128+1920+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<2880; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa2916[971*2*4];
static int16_t twb2916[971*2*4];
void dft2916(int16_t *x,int16_t *y,unsigned char scale_flag) // 972 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa2916[0];
simd_q15_t *twb128=(simd_q15_t *)&twb2916[0];
simd_q15_t x2128[2916];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[2916];//=&ytmp128array3[0];
for (i=0,j=0; i<972; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+972] = x128[j+1];
x2128[i+1944] = x128[j+2];
}
dft972((int16_t *)x2128,(int16_t *)ytmp128,1);
dft972((int16_t *)(x2128+972),(int16_t *)(ytmp128+972),1);
dft972((int16_t *)(x2128+1944),(int16_t *)(ytmp128+1944),1);
bfly3_tw1(ytmp128,ytmp128+972,ytmp128+1944,y128,y128+972,y128+1944);
for (i=1,j=0; i<972; i++,j++) {
bfly3(ytmp128+i,
ytmp128+972+i,
ytmp128+1944+i,
y128+i,
y128+972+i,
y128+1944+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<2916; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
static int16_t twa3000[599*8]__attribute__((aligned(32)));
static int16_t twb3000[599*8]__attribute__((aligned(32)));
static int16_t twc3000[599*8]__attribute__((aligned(32)));
static int16_t twd3000[599*8]__attribute__((aligned(32)));
void dft3000(int16_t *x,int16_t *y,unsigned char scale_flag) // 600 * 5
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa3000[0];
simd_q15_t *twb128=(simd_q15_t *)&twb3000[0];
simd_q15_t *twc128=(simd_q15_t *)&twc3000[0];
simd_q15_t *twd128=(simd_q15_t *)&twd3000[0];
simd_q15_t x2128[3000];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[3000];//=&ytmp128array2[0];
for (i=0,j=0; i<600; i++,j+=5) {
x2128[i] = x128[j];
x2128[i+600] = x128[j+1];
x2128[i+1200] = x128[j+2];
x2128[i+1800] = x128[j+3];
x2128[i+2400] = x128[j+4];
}
dft600((int16_t *)x2128,(int16_t *)ytmp128,1);
dft600((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1);
dft600((int16_t *)(x2128+1200),(int16_t *)(ytmp128+1200),1);
dft600((int16_t *)(x2128+1800),(int16_t *)(ytmp128+1800),1);
dft600((int16_t *)(x2128+2400),(int16_t *)(ytmp128+2400),1);
bfly5_tw1(ytmp128,ytmp128+600,ytmp128+1200,ytmp128+1800,ytmp128+2400,y128,y128+600,y128+1200,y128+1800,y128+2400);
for (i=1,j=0; i<600; i++,j++) {
bfly5(ytmp128+i,
ytmp128+600+i,
ytmp128+1200+i,
ytmp128+1800+i,
ytmp128+2400+i,
y128+i,
y128+600+i,
y128+1200+i,
y128+1800+i,
y128+2400+i,
twa128+j,
twb128+j,
twc128+j,
twd128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[15]);
for (i=0; i<3000; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
}
static int16_t twa3240[1079*2*4];
static int16_t twb3240[1079*2*4];
void dft3240(int16_t *x,int16_t *y,unsigned char scale_flag) // 1080 x 3
{
int i,j;
simd_q15_t *x128=(simd_q15_t *)x;
simd_q15_t *y128=(simd_q15_t *)y;
simd_q15_t *twa128=(simd_q15_t *)&twa3240[0];
simd_q15_t *twb128=(simd_q15_t *)&twb3240[0];
simd_q15_t x2128[3240];// = (simd_q15_t *)&x2128array[0];
simd_q15_t ytmp128[3240];//=&ytmp128array3[0];
for (i=0,j=0; i<1080; i++,j+=3) {
x2128[i] = x128[j];
x2128[i+1080] = x128[j+1];
x2128[i+2160] = x128[j+2];
}
dft1080((int16_t *)x2128,(int16_t *)ytmp128,1);
dft1080((int16_t *)(x2128+1080),(int16_t *)(ytmp128+1080),1);
dft1080((int16_t *)(x2128+2160),(int16_t *)(ytmp128+2160),1);
bfly3_tw1(ytmp128,ytmp128+1080,ytmp128+2160,y128,y128+1080,y128+2160);
for (i=1,j=0; i<1080; i++,j++) {
bfly3(ytmp128+i,
ytmp128+1080+i,
ytmp128+2160+i,
y128+i,
y128+1080+i,
y128+2160+i,
twa128+j,
twb128+j);
}
if (scale_flag==1) {
norm128 = set1_int16(dft_norm_table[14]);
for (i=0; i<3240; i++) {
y128[i] = mulhi_int16(y128[i],norm128);
}
}
_mm_empty();
_m_empty();
};
void init_rad4(int N,int16_t *tw) {
int16_t *twa = tw;
int16_t *twb = twa+(N/2);
int16_t *twc = twb+(N/2);
int i;
for (i=0;i<(N/4);i++) {
*twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++;
*twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++;
*twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++;
*twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++;
*twc = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); twc++;
*twc = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N)); twc++;
}
}
void init_rad4_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc) {
int i,j;
for (i=1;i<(N/4);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N));
twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N));
twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N));
twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N));
for (j=1;j<4;j++) {
((int32_t*)twa)[j]=((int32_t*)twa)[0];
((int32_t*)twb)[j]=((int32_t*)twb)[0];
((int32_t*)twc)[j]=((int32_t*)twc)[0];
}
twa+=8;
twb+=8;
twc+=8;
}
}
void init_rad2(int N,int16_t *tw) {
int16_t *twa = tw;
int i;
for (i=0;i<(N>>1);i++) {
*twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++;
*twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++;
}
}
void init_rad2_rep(int N,int16_t *twa) {
int i,j;
for (i=1;i<(N/2);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
for (j=1;j<4;j++) {
((int32_t*)twa)[j]=((int32_t*)twa)[0];
}
twa+=8;
}
}
void init_rad3(int N,int16_t *twa,int16_t *twb) {
int i;
for (i=0;i<(N/3);i++) {
*twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++;
*twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++;
*twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++;
*twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++;
}
}
void init_rad3_rep(int N,int16_t *twa,int16_t *twb) {
int i,j;
for (i=1;i<(N/3);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N));
twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N));
for (j=1;j<4;j++) {
((int32_t*)twa)[j]=((int32_t*)twa)[0];
((int32_t*)twb)[j]=((int32_t*)twb)[0];
}
twa+=8;
twb+=8;
}
}
void init_rad5_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc,int16_t *twd) {
int i,j;
for (i=1;i<(N/5);i++) {
twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N));
twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N));
twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N));
twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N));
twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N));
twd[0] = (int16_t)round(32767.0*cos(2*M_PI*4*i/N));
twd[1] = -(int16_t)round(32767.0*sin(2*M_PI*4*i/N));
for (j=1;j<4;j++) {
((int32_t*)twa)[j]=((int32_t*)twa)[0];
((int32_t*)twb)[j]=((int32_t*)twb)[0];
((int32_t*)twc)[j]=((int32_t*)twc)[0];
((int32_t*)twd)[j]=((int32_t*)twd)[0];
}
twa+=8;
twb+=8;
twc+=8;
twd+=8;
}
}
/*----------------------------------------------------------------*/
/* dft library entry points: */
int dfts_autoinit(void)
{
init_rad4(1024,tw1024);
init_rad2(2048,tw2048);
init_rad4(4096,tw4096);
init_rad2(8192,tw8192);
init_rad4(16384,tw16384);
init_rad2(32768,tw32768);
init_rad3(1536,twa1536,twb1536);
init_rad3(3072,twa3072,twb3072);
init_rad3(6144,twa6144,twb6144);
init_rad3(12288,twa12288,twb12288);
init_rad3(18432,twa18432,twb18432);
init_rad3(24576,twa24576,twb24576);
init_rad3(36864,twa36864,twb36864);
init_rad3(49152,twa49152,twb49152);
init_rad3(98304,twa98304,twb98304);
init_rad2_rep(24,tw24); init_rad2_rep(24,tw24);
...@@ -9645,6 +10556,24 @@ int dfts_autoinit(void) ...@@ -9645,6 +10556,24 @@ int dfts_autoinit(void)
init_rad3_rep(1080,twa1080,twb1080); init_rad3_rep(1080,twa1080,twb1080);
init_rad4_rep(1152,twa1152,twb1152,twc1152); init_rad4_rep(1152,twa1152,twb1152,twc1152);
init_rad4_rep(1200,twa1200,twb1200,twc1200); init_rad4_rep(1200,twa1200,twb1200,twc1200);
init_rad3_rep(1296,twa1296,twb1296);
init_rad3_rep(1440,twa1440,twb1440);
init_rad5_rep(1500,twa1500,twb1500,twc1500,twd1500);
init_rad3_rep(1620,twa1620,twb1620);
init_rad3_rep(1728,twa1728,twb1728);
init_rad3_rep(1800,twa1800,twb1800);
init_rad4_rep(1920,twa1920,twb1920, twc1920);
init_rad3_rep(1944,twa1944,twb1944);
init_rad3_rep(2160,twa2160,twb2160);
init_rad3_rep(2304,twa2304,twb2304);
init_rad4_rep(2400,twa2400,twb2400,twc2400);
init_rad3_rep(2592,twa2592,twb2592);
init_rad3_rep(2700,twa2700,twb2700);
init_rad3_rep(2880,twa2880,twb2880);
init_rad3_rep(2916,twa2916,twb2916);
init_rad5_rep(3000,twa3000,twb3000,twc3000,twd3000);
init_rad3_rep(3240,twa3240,twb3240);
return 0; return 0;
} }
......
...@@ -226,9 +226,26 @@ void dft1024(int16_t *x,int16_t *y,uint8_t scale_flag); ...@@ -226,9 +226,26 @@ void dft1024(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1080(int16_t *x,int16_t *y,uint8_t scale_flag); void dft1080(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1152(int16_t *x,int16_t *y,uint8_t scale_flag); void dft1152(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1200(int16_t *x,int16_t *y,uint8_t scale_flag); void dft1200(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1296(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1440(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1500(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1536(int16_t *sigF,int16_t *sig,uint8_t scale_flag); void dft1536(int16_t *sigF,int16_t *sig,uint8_t scale_flag);
void dft1620(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1728(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1800(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1920(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft1944(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2048(int16_t *x,int16_t *y,uint8_t scale_flag); void dft2048(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2160(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2304(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2400(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2592(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2700(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2880(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft2916(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft3000(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft3072(int16_t *sigF,int16_t *sig,uint8_t scale_flag); void dft3072(int16_t *sigF,int16_t *sig,uint8_t scale_flag);
void dft3240(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft4096(int16_t *x,int16_t *y,uint8_t scale_flag); void dft4096(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft6144(int16_t *sigF,int16_t *sig,uint8_t scale_flag); void dft6144(int16_t *sigF,int16_t *sig,uint8_t scale_flag);
void dft8192(int16_t *x,int16_t *y,uint8_t scale_flag); void dft8192(int16_t *x,int16_t *y,uint8_t scale_flag);
...@@ -283,21 +300,25 @@ typedef enum DFT_size_idx { ...@@ -283,21 +300,25 @@ typedef enum DFT_size_idx {
DFT_108, DFT_120, DFT_128, DFT_144, DFT_180, DFT_192, DFT_216, DFT_240, DFT_108, DFT_120, DFT_128, DFT_144, DFT_180, DFT_192, DFT_216, DFT_240,
DFT_256, DFT_288, DFT_300, DFT_324, DFT_360, DFT_384, DFT_432, DFT_480, DFT_256, DFT_288, DFT_300, DFT_324, DFT_360, DFT_384, DFT_432, DFT_480,
DFT_512, DFT_540, DFT_576, DFT_600, DFT_648, DFT_720, DFT_768, DFT_864, DFT_512, DFT_540, DFT_576, DFT_600, DFT_648, DFT_720, DFT_768, DFT_864,
DFT_900, DFT_960, DFT_972, DFT_1024, DFT_1080, DFT_1152, DFT_1200, DFT_1536, DFT_900, DFT_960, DFT_972, DFT_1024, DFT_1080, DFT_1152, DFT_1200, DFT_1296,
DFT_2048, DFT_3072, DFT_4096, DFT_6144, DFT_8192, DFT_9216, DFT_12288, DFT_18432, DFT_1440, DFT_1500, DFT_1536, DFT_1620, DFT_1728, DFT_1800, DFT_1920, DFT_1944,
DFT_24576, DFT_36864, DFT_49152, DFT_73728, DFT_98304, DFT_2048, DFT_2160, DFT_2304, DFT_2400, DFT_2592, DFT_2700, DFT_2880, DFT_2916,
DFT_3000, DFT_3072, DFT_3240, DFT_4096, DFT_6144, DFT_8192, DFT_9216, DFT_12288,
DFT_18432, DFT_24576, DFT_36864, DFT_49152, DFT_73728, DFT_98304,
DFT_SIZE_IDXTABLESIZE DFT_SIZE_IDXTABLESIZE
} dft_size_idx_t; } dft_size_idx_t;
#ifdef OAIDFTS_MAIN #ifdef OAIDFTS_MAIN
adftfunc_t dft_ftab[]={ adftfunc_t dft_ftab[]={
dft12, dft24, dft36, dft48, dft60, dft72, dft96, dft12, dft24, dft36, dft48, dft60, dft72, dft96,
dft108, dft120, dft128, dft144, dft180, dft192, dft216, dft240, dft108, dft120, dft128, dft144, dft180, dft192, dft216, dft240,
dft256, dft288, dft300, dft324, dft360, dft384, dft432, dft480, dft256, dft288, dft300, dft324, dft360, dft384, dft432, dft480,
dft512, dft540, dft576, dft600, dft648, dft720, dft768, dft864, dft512, dft540, dft576, dft600, dft648, dft720, dft768, dft864,
dft900, dft960, dft972, dft1024, dft1080, dft1152, dft1200, dft1536, dft900, dft960, dft972, dft1024, dft1080, dft1152, dft1200, dft1296,
dft2048, dft3072, dft4096, dft6144, dft8192, dft9216, dft12288, dft18432, dft1440, dft1500, dft1536, dft1620, dft1728, dft1800, dft1920, dft1944,
dft24576, dft36864, dft49152, dft73728, dft98304 dft2048, dft2160, dft2304, dft2400, dft2592, dft2700, dft2880, dft2916,
dft3000, dft3072, dft3240, dft4096, dft6144, dft8192, dft9216, dft12288,
dft18432, dft24576, dft36864, dft49152, dft73728, dft98304
}; };
#endif #endif
...@@ -309,9 +330,9 @@ typedef enum idft_size_idx { ...@@ -309,9 +330,9 @@ typedef enum idft_size_idx {
} idft_size_idx_t; } idft_size_idx_t;
#ifdef OAIDFTS_MAIN #ifdef OAIDFTS_MAIN
aidftfunc_t idft_ftab[]={ aidftfunc_t idft_ftab[]={
idft128, idft256, idft512, idft1024, idft1536, idft2048, idft3072, idft4096, idft128, idft256, idft512, idft1024, idft1536, idft2048, idft3072, idft4096,
idft6144, idft8192, idft9216, idft12288, idft18432, idft24576, idft36864, idft49152, idft6144, idft8192, idft9216, idft12288, idft18432, idft24576, idft36864, idft49152,
idft73728, idft98304 idft73728, idft98304
}; };
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment