Commit 1a2be8fc authored by Laurent THOMAS's avatar Laurent THOMAS Committed by Robert Schmidt

fix dft modifications related to alignment

parent f46240ca
......@@ -155,8 +155,6 @@ typedef struct {
uint8_t h[MAX_NUM_CHANNEL_BITS];
/// Scrambled "b"-sequences (for definition see 36-211 V8.6 2009-03, p.14)
uint8_t b_tilde[MAX_NUM_CHANNEL_BITS];
/// Modulated "d"-sequences (for definition see 36-211 V8.6 2009-03, p.14)
int32_t d[MAX_NUM_RE];
/// Transform-coded "z"-sequences (for definition see 36-211 V8.6 2009-03, p.14-15)
int32_t z[MAX_NUM_RE];
/// "q" sequences for CQI/PMI (for definition see 36-212 V8.6 2009-03, p.27)
......
......@@ -42,7 +42,7 @@
//#define DEBUG_ULSCH_MODULATION
void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb)
void dft_lte(int32_t *z,struct complex16 *input, int32_t Msc_PUSCH, uint8_t Nsymb)
{
#if defined(__x86_64__) || defined(__i386__)
......@@ -66,7 +66,7 @@ void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb)
#endif
// printf("Doing lte_dft for Msc_PUSCH %d\n",Msc_PUSCH);
d0 = (uint32_t *)d;
d0 = (uint32_t *)input;
d1 = d0+Msc_PUSCH;
d2 = d1+Msc_PUSCH;
d3 = d2+Msc_PUSCH;
......@@ -476,7 +476,8 @@ void ulsch_modulation(int32_t **txdataF,
// Modulation
ulsch_Msymb = G/Q_m;
/// Modulated "d"-sequences (for definition see 36-211 V8.6 2009-03, p.14)
struct complex16 d[MAX_NUM_RE] __attribute__((aligned(32)));
if(ulsch->cooperation_flag == 2)
// For Distributed Alamouti Scheme in Collabrative Communication
{
......@@ -488,14 +489,14 @@ void ulsch_modulation(int32_t **txdataF,
//UE1, -x1*
((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (gain_lin_QPSK) : -gain_lin_QPSK;
((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
d[i].r = (ulsch->b_tilde[j] == 1) ? (gain_lin_QPSK) : -gain_lin_QPSK;
d[i].i = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
// if (i<Msc_PUSCH)
// printf("input %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
// UE1, x0*
((int16_t*)&ulsch->d[i+1])[0] = (ulsch->b_tilde[j-2] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK;
((int16_t*)&ulsch->d[i+1])[1] = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK;
d[i+1].r = (ulsch->b_tilde[j-2] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK;
d[i+1].i = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK;
break;
......@@ -521,8 +522,8 @@ void ulsch_modulation(int32_t **txdataF,
qam16_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
d[i].r =-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
d[i].i =(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
//UE1,x0*
qam16_table_offset_re = 0;
......@@ -544,8 +545,8 @@ void ulsch_modulation(int32_t **txdataF,
// ((int16_t*)&ulsch->d[i+1])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
// ((int16_t*)&ulsch->d[i+1])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
d[i+1].r=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
d[i+1].i=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
break;
......@@ -578,8 +579,8 @@ void ulsch_modulation(int32_t **txdataF,
qam64_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
d[i].r=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
d[i].i=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
//UE1,x0*
qam64_table_offset_re = 0;
......@@ -605,8 +606,8 @@ void ulsch_modulation(int32_t **txdataF,
qam64_table_offset_im+=1;
((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
d[i+1].r=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
d[i+1].i=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
break;
......@@ -621,8 +622,8 @@ void ulsch_modulation(int32_t **txdataF,
case 2:
// TODO: this has to be updated!!!
((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK;
((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
d[i].r = (ulsch->b_tilde[j] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK;
d[i].i = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
// if (i<Msc_PUSCH)
// printf("input %d/%d Msc_PUSCH %d (%p): %d,%d\n", i,Msymb,Msc_PUSCH,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
......@@ -646,8 +647,8 @@ void ulsch_modulation(int32_t **txdataF,
qam16_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
d[i].r=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
d[i].i=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
// printf("input(16qam) %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
break;
......@@ -676,8 +677,8 @@ void ulsch_modulation(int32_t **txdataF,
qam64_table_offset_im+=1;
((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
d[i].r=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
d[i].i=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
break;
......@@ -688,7 +689,7 @@ void ulsch_modulation(int32_t **txdataF,
// Transform Precoding
dft_lte(ulsch->z,ulsch->d,Msc_PUSCH,ulsch->Nsymb_pusch);
dft_lte(ulsch->z,d,Msc_PUSCH,ulsch->Nsymb_pusch);
DevAssert(txdataF);
......
......@@ -91,7 +91,7 @@ int slot_fep(PHY_VARS_UE *ue,
}
// subframe_offset_F = frame_parms->ofdm_symbol_size * frame_parms->symbols_per_tti * (Ns>>1);
if (l<0 || l>=7-frame_parms->Ncp) {
printf("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp);
return(-1);
......
......@@ -7082,43 +7082,40 @@ static inline void dft12f(simd_q15_t *x0,
simd_q15_t tmp_dft12[12];
simd_q15_t *tmp_dft12_ptr = &tmp_dft12[0];
// msg("dft12\n");
bfly4_tw1(x0,
x3,
x6,
x9,
tmp_dft12_ptr,
tmp_dft12_ptr+3,
tmp_dft12_ptr+6,
tmp_dft12_ptr+9);
tmp_dft12,
tmp_dft12+3,
tmp_dft12+6,
tmp_dft12+9);
bfly4_tw1(x1,
x4,
x7,
x10,
tmp_dft12_ptr+1,
tmp_dft12_ptr+4,
tmp_dft12_ptr+7,
tmp_dft12_ptr+10);
tmp_dft12+1,
tmp_dft12+4,
tmp_dft12+7,
tmp_dft12+10);
bfly4_tw1(x2,
x5,
x8,
x11,
tmp_dft12_ptr+2,
tmp_dft12_ptr+5,
tmp_dft12_ptr+8,
tmp_dft12_ptr+11);
tmp_dft12+2,
tmp_dft12+5,
tmp_dft12+8,
tmp_dft12+11);
// k2=0;
bfly3_tw1(tmp_dft12_ptr,
tmp_dft12_ptr+1,
tmp_dft12_ptr+2,
bfly3_tw1(tmp_dft12,
tmp_dft12+1,
tmp_dft12+2,
y0,
y4,
y8);
......@@ -7126,9 +7123,9 @@ static inline void dft12f(simd_q15_t *x0,
// k2=1;
bfly3(tmp_dft12_ptr+3,
tmp_dft12_ptr+4,
tmp_dft12_ptr+5,
bfly3(tmp_dft12+3,
tmp_dft12+4,
tmp_dft12+5,
y1,
y5,
y9,
......@@ -7138,9 +7135,9 @@ static inline void dft12f(simd_q15_t *x0,
// k2=2;
bfly3(tmp_dft12_ptr+6,
tmp_dft12_ptr+7,
tmp_dft12_ptr+8,
bfly3(tmp_dft12+6,
tmp_dft12+7,
tmp_dft12+8,
y2,
y6,
y10,
......@@ -7148,9 +7145,9 @@ static inline void dft12f(simd_q15_t *x0,
W4_12);
// k2=3;
bfly3(tmp_dft12_ptr+9,
tmp_dft12_ptr+10,
tmp_dft12_ptr+11,
bfly3(tmp_dft12+9,
tmp_dft12+10,
tmp_dft12+11,
y3,
y7,
y11,
......@@ -10607,11 +10604,19 @@ int dfts_autoinit(void)
#ifndef MR_MAIN
void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx);
AssertFatal( ((intptr_t)output&0x1F)==0,"Buffers should be 32 bytes aligned %p",output);
if ((intptr_t)input&0x1F) {
AssertFatal((sizeidx >= 0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx);
int algn=0xF;
#ifdef __AVX2__
if ( (dft_ftab[sizeidx].size%3) != 0 ) // there is no AVX2 implementation for multiples of 3 DFTs
algn=0x1F;
#endif
AssertFatal(((intptr_t)output&algn)==0,"Buffers should be aligned %p",output);
if (((intptr_t)input)&algn) {
LOG_D(PHY, "DFT called with input not aligned, add a memcpy, size %d\n", sizeidx);
int16_t tmp[dft_ftab[sizeidx].size*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t)
int sz=dft_ftab[sizeidx].size;
if (sizeidx==DFT_12) // This case does 8 DFTs in //
sz*=8;
int16_t tmp[sz*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t)
memcpy(tmp, input, sizeof tmp);
dft_ftab[sizeidx].func(tmp,output,scale_flag);
} else
......@@ -10620,10 +10625,15 @@ void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_fla
void idft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx);
AssertFatal( ((intptr_t)output&0x1F)==0,"Buffers should be 32 bytes aligned %p",output);
if ((intptr_t)input&0x1F) {
int algn=0xF;
#ifdef __AVX2__
algn=0x1F;
#endif
AssertFatal( ((intptr_t)output&algn)==0,"Buffers should be 16 bytes aligned %p",output);
if (((intptr_t)input)&algn ) {
LOG_D(PHY, "DFT called with input not aligned, add a memcpy\n");
int16_t tmp[idft_ftab[sizeidx].size*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t)
int sz=idft_ftab[sizeidx].size;
int16_t tmp[sz*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t)
memcpy(tmp, input, sizeof tmp);
dft_ftab[sizeidx].func(tmp,output,scale_flag);
} else
......
......@@ -253,7 +253,7 @@ This function performs optimized fixed-point radix-2 FFT/IFFT.
SZ_DEF(73728) \
SZ_DEF(98304)
#define FOREACH_IDFTSZ(SZ_DEF) \
#define FOREACH_IDFTSZ(SZ_DEF)\
SZ_DEF(64) \
SZ_DEF(128) \
SZ_DEF(256) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment