Commit 1a2be8fc authored by Laurent THOMAS's avatar Laurent THOMAS Committed by Robert Schmidt

fix dft modifications related to alignment

parent f46240ca
...@@ -155,8 +155,6 @@ typedef struct { ...@@ -155,8 +155,6 @@ typedef struct {
uint8_t h[MAX_NUM_CHANNEL_BITS]; uint8_t h[MAX_NUM_CHANNEL_BITS];
/// Scrambled "b"-sequences (for definition see 36-211 V8.6 2009-03, p.14) /// Scrambled "b"-sequences (for definition see 36-211 V8.6 2009-03, p.14)
uint8_t b_tilde[MAX_NUM_CHANNEL_BITS]; uint8_t b_tilde[MAX_NUM_CHANNEL_BITS];
/// Modulated "d"-sequences (for definition see 36-211 V8.6 2009-03, p.14)
int32_t d[MAX_NUM_RE];
/// Transform-coded "z"-sequences (for definition see 36-211 V8.6 2009-03, p.14-15) /// Transform-coded "z"-sequences (for definition see 36-211 V8.6 2009-03, p.14-15)
int32_t z[MAX_NUM_RE]; int32_t z[MAX_NUM_RE];
/// "q" sequences for CQI/PMI (for definition see 36-212 V8.6 2009-03, p.27) /// "q" sequences for CQI/PMI (for definition see 36-212 V8.6 2009-03, p.27)
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
//#define DEBUG_ULSCH_MODULATION //#define DEBUG_ULSCH_MODULATION
void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb) void dft_lte(int32_t *z,struct complex16 *input, int32_t Msc_PUSCH, uint8_t Nsymb)
{ {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
...@@ -66,7 +66,7 @@ void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb) ...@@ -66,7 +66,7 @@ void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb)
#endif #endif
// printf("Doing lte_dft for Msc_PUSCH %d\n",Msc_PUSCH); // printf("Doing lte_dft for Msc_PUSCH %d\n",Msc_PUSCH);
d0 = (uint32_t *)d; d0 = (uint32_t *)input;
d1 = d0+Msc_PUSCH; d1 = d0+Msc_PUSCH;
d2 = d1+Msc_PUSCH; d2 = d1+Msc_PUSCH;
d3 = d2+Msc_PUSCH; d3 = d2+Msc_PUSCH;
...@@ -476,7 +476,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -476,7 +476,8 @@ void ulsch_modulation(int32_t **txdataF,
// Modulation // Modulation
ulsch_Msymb = G/Q_m; ulsch_Msymb = G/Q_m;
/// Modulated "d"-sequences (for definition see 36-211 V8.6 2009-03, p.14)
struct complex16 d[MAX_NUM_RE] __attribute__((aligned(32)));
if(ulsch->cooperation_flag == 2) if(ulsch->cooperation_flag == 2)
// For Distributed Alamouti Scheme in Collabrative Communication // For Distributed Alamouti Scheme in Collabrative Communication
{ {
...@@ -488,14 +489,14 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -488,14 +489,14 @@ void ulsch_modulation(int32_t **txdataF,
//UE1, -x1* //UE1, -x1*
((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (gain_lin_QPSK) : -gain_lin_QPSK; d[i].r = (ulsch->b_tilde[j] == 1) ? (gain_lin_QPSK) : -gain_lin_QPSK;
((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; d[i].i = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
// if (i<Msc_PUSCH) // if (i<Msc_PUSCH)
// printf("input %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); // printf("input %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
// UE1, x0* // UE1, x0*
((int16_t*)&ulsch->d[i+1])[0] = (ulsch->b_tilde[j-2] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; d[i+1].r = (ulsch->b_tilde[j-2] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK;
((int16_t*)&ulsch->d[i+1])[1] = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK; d[i+1].i = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK;
break; break;
...@@ -521,8 +522,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -521,8 +522,8 @@ void ulsch_modulation(int32_t **txdataF,
qam16_table_offset_im+=1; qam16_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); d[i].r =-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); d[i].i =(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
//UE1,x0* //UE1,x0*
qam16_table_offset_re = 0; qam16_table_offset_re = 0;
...@@ -544,8 +545,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -544,8 +545,8 @@ void ulsch_modulation(int32_t **txdataF,
// ((int16_t*)&ulsch->d[i+1])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); // ((int16_t*)&ulsch->d[i+1])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
// ((int16_t*)&ulsch->d[i+1])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); // ((int16_t*)&ulsch->d[i+1])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); d[i+1].r=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); d[i+1].i=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
break; break;
...@@ -578,8 +579,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -578,8 +579,8 @@ void ulsch_modulation(int32_t **txdataF,
qam64_table_offset_im+=1; qam64_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); d[i].r=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); d[i].i=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
//UE1,x0* //UE1,x0*
qam64_table_offset_re = 0; qam64_table_offset_re = 0;
...@@ -605,8 +606,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -605,8 +606,8 @@ void ulsch_modulation(int32_t **txdataF,
qam64_table_offset_im+=1; qam64_table_offset_im+=1;
((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); d[i+1].r=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); d[i+1].i=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
break; break;
...@@ -621,8 +622,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -621,8 +622,8 @@ void ulsch_modulation(int32_t **txdataF,
case 2: case 2:
// TODO: this has to be updated!!! // TODO: this has to be updated!!!
((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; d[i].r = (ulsch->b_tilde[j] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK;
((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; d[i].i = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
// if (i<Msc_PUSCH) // if (i<Msc_PUSCH)
// printf("input %d/%d Msc_PUSCH %d (%p): %d,%d\n", i,Msymb,Msc_PUSCH,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); // printf("input %d/%d Msc_PUSCH %d (%p): %d,%d\n", i,Msymb,Msc_PUSCH,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
...@@ -646,8 +647,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -646,8 +647,8 @@ void ulsch_modulation(int32_t **txdataF,
qam16_table_offset_im+=1; qam16_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); d[i].r=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); d[i].i=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
// printf("input(16qam) %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); // printf("input(16qam) %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
break; break;
...@@ -676,8 +677,8 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -676,8 +677,8 @@ void ulsch_modulation(int32_t **txdataF,
qam64_table_offset_im+=1; qam64_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); d[i].r=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); d[i].i=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
break; break;
...@@ -688,7 +689,7 @@ void ulsch_modulation(int32_t **txdataF, ...@@ -688,7 +689,7 @@ void ulsch_modulation(int32_t **txdataF,
// Transform Precoding // Transform Precoding
dft_lte(ulsch->z,ulsch->d,Msc_PUSCH,ulsch->Nsymb_pusch); dft_lte(ulsch->z,d,Msc_PUSCH,ulsch->Nsymb_pusch);
DevAssert(txdataF); DevAssert(txdataF);
......
...@@ -91,7 +91,7 @@ int slot_fep(PHY_VARS_UE *ue, ...@@ -91,7 +91,7 @@ int slot_fep(PHY_VARS_UE *ue,
} }
// subframe_offset_F = frame_parms->ofdm_symbol_size * frame_parms->symbols_per_tti * (Ns>>1); // subframe_offset_F = frame_parms->ofdm_symbol_size * frame_parms->symbols_per_tti * (Ns>>1);
if (l<0 || l>=7-frame_parms->Ncp) { if (l<0 || l>=7-frame_parms->Ncp) {
printf("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp); printf("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp);
return(-1); return(-1);
......
...@@ -7082,43 +7082,40 @@ static inline void dft12f(simd_q15_t *x0, ...@@ -7082,43 +7082,40 @@ static inline void dft12f(simd_q15_t *x0,
simd_q15_t tmp_dft12[12]; simd_q15_t tmp_dft12[12];
simd_q15_t *tmp_dft12_ptr = &tmp_dft12[0];
// msg("dft12\n"); // msg("dft12\n");
bfly4_tw1(x0, bfly4_tw1(x0,
x3, x3,
x6, x6,
x9, x9,
tmp_dft12_ptr, tmp_dft12,
tmp_dft12_ptr+3, tmp_dft12+3,
tmp_dft12_ptr+6, tmp_dft12+6,
tmp_dft12_ptr+9); tmp_dft12+9);
bfly4_tw1(x1, bfly4_tw1(x1,
x4, x4,
x7, x7,
x10, x10,
tmp_dft12_ptr+1, tmp_dft12+1,
tmp_dft12_ptr+4, tmp_dft12+4,
tmp_dft12_ptr+7, tmp_dft12+7,
tmp_dft12_ptr+10); tmp_dft12+10);
bfly4_tw1(x2, bfly4_tw1(x2,
x5, x5,
x8, x8,
x11, x11,
tmp_dft12_ptr+2, tmp_dft12+2,
tmp_dft12_ptr+5, tmp_dft12+5,
tmp_dft12_ptr+8, tmp_dft12+8,
tmp_dft12_ptr+11); tmp_dft12+11);
// k2=0; // k2=0;
bfly3_tw1(tmp_dft12_ptr, bfly3_tw1(tmp_dft12,
tmp_dft12_ptr+1, tmp_dft12+1,
tmp_dft12_ptr+2, tmp_dft12+2,
y0, y0,
y4, y4,
y8); y8);
...@@ -7126,9 +7123,9 @@ static inline void dft12f(simd_q15_t *x0, ...@@ -7126,9 +7123,9 @@ static inline void dft12f(simd_q15_t *x0,
// k2=1; // k2=1;
bfly3(tmp_dft12_ptr+3, bfly3(tmp_dft12+3,
tmp_dft12_ptr+4, tmp_dft12+4,
tmp_dft12_ptr+5, tmp_dft12+5,
y1, y1,
y5, y5,
y9, y9,
...@@ -7138,9 +7135,9 @@ static inline void dft12f(simd_q15_t *x0, ...@@ -7138,9 +7135,9 @@ static inline void dft12f(simd_q15_t *x0,
// k2=2; // k2=2;
bfly3(tmp_dft12_ptr+6, bfly3(tmp_dft12+6,
tmp_dft12_ptr+7, tmp_dft12+7,
tmp_dft12_ptr+8, tmp_dft12+8,
y2, y2,
y6, y6,
y10, y10,
...@@ -7148,9 +7145,9 @@ static inline void dft12f(simd_q15_t *x0, ...@@ -7148,9 +7145,9 @@ static inline void dft12f(simd_q15_t *x0,
W4_12); W4_12);
// k2=3; // k2=3;
bfly3(tmp_dft12_ptr+9, bfly3(tmp_dft12+9,
tmp_dft12_ptr+10, tmp_dft12+10,
tmp_dft12_ptr+11, tmp_dft12+11,
y3, y3,
y7, y7,
y11, y11,
...@@ -10607,11 +10604,19 @@ int dfts_autoinit(void) ...@@ -10607,11 +10604,19 @@ int dfts_autoinit(void)
#ifndef MR_MAIN #ifndef MR_MAIN
void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){ void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx); AssertFatal((sizeidx >= 0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx);
AssertFatal( ((intptr_t)output&0x1F)==0,"Buffers should be 32 bytes aligned %p",output); int algn=0xF;
if ((intptr_t)input&0x1F) { #ifdef __AVX2__
if ( (dft_ftab[sizeidx].size%3) != 0 ) // there is no AVX2 implementation for multiples of 3 DFTs
algn=0x1F;
#endif
AssertFatal(((intptr_t)output&algn)==0,"Buffers should be aligned %p",output);
if (((intptr_t)input)&algn) {
LOG_D(PHY, "DFT called with input not aligned, add a memcpy, size %d\n", sizeidx); LOG_D(PHY, "DFT called with input not aligned, add a memcpy, size %d\n", sizeidx);
int16_t tmp[dft_ftab[sizeidx].size*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t) int sz=dft_ftab[sizeidx].size;
if (sizeidx==DFT_12) // This case does 8 DFTs in //
sz*=8;
int16_t tmp[sz*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t)
memcpy(tmp, input, sizeof tmp); memcpy(tmp, input, sizeof tmp);
dft_ftab[sizeidx].func(tmp,output,scale_flag); dft_ftab[sizeidx].func(tmp,output,scale_flag);
} else } else
...@@ -10620,10 +10625,15 @@ void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_fla ...@@ -10620,10 +10625,15 @@ void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_fla
void idft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){ void idft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx); AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx);
AssertFatal( ((intptr_t)output&0x1F)==0,"Buffers should be 32 bytes aligned %p",output); int algn=0xF;
if ((intptr_t)input&0x1F) { #ifdef __AVX2__
algn=0x1F;
#endif
AssertFatal( ((intptr_t)output&algn)==0,"Buffers should be 16 bytes aligned %p",output);
if (((intptr_t)input)&algn ) {
LOG_D(PHY, "DFT called with input not aligned, add a memcpy\n"); LOG_D(PHY, "DFT called with input not aligned, add a memcpy\n");
int16_t tmp[idft_ftab[sizeidx].size*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t) int sz=idft_ftab[sizeidx].size;
int16_t tmp[sz*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t)
memcpy(tmp, input, sizeof tmp); memcpy(tmp, input, sizeof tmp);
dft_ftab[sizeidx].func(tmp,output,scale_flag); dft_ftab[sizeidx].func(tmp,output,scale_flag);
} else } else
......
...@@ -253,7 +253,7 @@ This function performs optimized fixed-point radix-2 FFT/IFFT. ...@@ -253,7 +253,7 @@ This function performs optimized fixed-point radix-2 FFT/IFFT.
SZ_DEF(73728) \ SZ_DEF(73728) \
SZ_DEF(98304) SZ_DEF(98304)
#define FOREACH_IDFTSZ(SZ_DEF) \ #define FOREACH_IDFTSZ(SZ_DEF)\
SZ_DEF(64) \ SZ_DEF(64) \
SZ_DEF(128) \ SZ_DEF(128) \
SZ_DEF(256) \ SZ_DEF(256) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment