Commit de284e9e authored by Raymond Knoppp's avatar Raymond Knoppp

OTA synchronization debugging (aquila)

parent 22eccb1c
......@@ -483,14 +483,17 @@ int ru_sync_time_init(RU_t *ru) // LTE_UE_COMMON *common_vars
*/
int32_t dmrs[ru->frame_parms.ofdm_symbol_size*14] __attribute__((aligned(32)));
int32_t *dmrsp[2] = {&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
// int32_t *dmrsp[2] = {&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
int32_t *dmrsp[2] = {&dmrs[0],NULL};
generate_ul_ref_sigs();
ru->dmrssync = (int16_t*)malloc16_clear(ru->frame_parms.ofdm_symbol_size*2*sizeof(int16_t));
ru->dmrs_corr = (uint64_t*)malloc16_clear(ru->frame_parms.samples_per_tti*10*sizeof(uint64_t));
generate_drs_pusch(NULL,NULL,
&ru->frame_parms,
dmrsp,/*(int32_t**)dmrsp,*/
dmrsp,
0,
AMP,
0,
......@@ -500,28 +503,28 @@ int ru_sync_time_init(RU_t *ru) // LTE_UE_COMMON *common_vars
switch (ru->frame_parms.N_RB_DL) {
case 6:
idft128(dmrs, /// complex input
idft128((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
case 25:
idft512(dmrs,
idft512((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
case 50:
idft1024((int16_t*)dmrsp,/*dmrs,*/
idft1024((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
case 75:
idft1536(dmrs,
idft1536((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync,
1); /// complex output
break;
case 100:
idft2048(dmrs,
idft2048((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
......@@ -537,6 +540,7 @@ void ru_sync_time_free(RU_t *ru) {
AssertFatal(ru->dmrssync!=NULL,"ru->dmrssync is NULL\n");
free(ru->dmrssync);
if (ru->dmrs_corr) free(ru->dmrs_corr);
}
//#define DEBUG_PHY
......@@ -631,6 +635,12 @@ int lte_sync_time_eNB(int32_t **rxdata, ///rx data in time domain
}
static inline int64_t abs64(int64_t x)
{
return (((int64_t)((int32_t*)&x)[0])*((int64_t)((int32_t*)&x)[0]) + ((int64_t)
((int32_t*)&x)[1])*((int64_t)((int32_t*)&x)[1]));
}
int ru_sync_time(RU_t *ru,
int64_t *lev,
int64_t *avg)
......@@ -653,38 +663,42 @@ int ru_sync_time(RU_t *ru,
int32_t magtmp0,maxlev0=0;
int maxpos0=0;
int64_t avg0=0;
int32_t result;
int64_t result;
int64_t dmrs_corr;
int maxval=0;
for (int i=0;i<2*(frame_parms->ofdm_symbol_size);i++) {
maxval = max(maxval,ru->dmrssync[i]);
maxval = max(maxval,-ru->dmrssync[i]);
}
int shift = log2_approx(maxval);
for (int n=0; n<length; n+=4) {
tmp0 = 0;
dmrs_corr = 0;
//calculate dot product of primary_synch0_time and rxdata[ar][n] (ar=0..nb_ant_rx) and store the sum in temp[n];
for (int ar=0; ar<ru->nb_rx; ar++) {
result = dot_product(ru->dmrssync,
(int16_t*) &ru->common.rxdata[ar][n],
frame_parms->ofdm_symbol_size,
11);
((int16_t*)&tmp0)[0] += ((int16_t*) &result)[0];
((int16_t*)&tmp0)[1] += ((int16_t*) &result)[1];
result = dot_product64(ru->dmrssync,
(int16_t*) &ru->common.rxdata[ar][n],
frame_parms->ofdm_symbol_size,
shift);
dmrs_corr += abs64(result);
}
if (ru->dmrs_corr != NULL) ru->dmrs_corr[n] = dmrs_corr;
// tmpi holds <synchi,rx0>+<synci,rx1>+...+<synchi,rx_{nbrx-1}>
magtmp0 = abs32(tmp0);
// this does max |tmp0(n)|^2 and argmax |tmp0(n)|^2
if (magtmp0>maxlev0) { maxlev0 = magtmp0; maxpos0 = n; }
avg0 += magtmp0;
if (dmrs_corr>maxlev0) { maxlev0 = dmrs_corr; maxpos0 = n; }
avg0 += dmrs_corr;
}
avg0/=(length/4);
int dmrsoffset = 3*(frame_parms->ofdm_symbol_size+frame_parms->nb_prefix_samples) + frame_parms->nb_prefix_samples0;
int dmrsoffset = frame_parms->samples_per_tti + (3*frame_parms->ofdm_symbol_size)+(2*frame_parms->nb_prefix_samples) + frame_parms->nb_prefix_samples0;
if ((int64_t)maxlev0 > (5*avg0)) {*lev = maxlev0; *avg=avg0; return((length+maxpos0-dmrsoffset)%length);}
if ((int64_t)maxlev0 > (10*avg0)) {*lev = maxlev0; *avg=avg0; return((length+maxpos0-dmrsoffset)%length);}
return(-1);
......
......@@ -159,6 +159,127 @@ int32_t dot_product(int16_t *x,
}
int64_t dot_product64(int16_t *x,
int16_t *y,
uint32_t N, //must be a multiple of 8
uint8_t output_shift)
{
uint32_t n;
#if defined(__x86_64__) || defined(__i386__)
__m128i *x128,*y128,mmtmp1,mmtmp2,mmtmp3,mmcumul,mmcumul_re,mmcumul_im;
__m128i minus_i = _mm_set_epi16(-1,1,-1,1,-1,1,-1,1);
int32_t result;
x128 = (__m128i*) x;
y128 = (__m128i*) y;
mmcumul_re = _mm_setzero_si128();
mmcumul_im = _mm_setzero_si128();
for (n=0; n<(N>>2); n++) {
// printf("n=%d, x128=%p, y128=%p\n",n,x128,y128);
// print_shorts("x",&x128[0]);
// print_shorts("y",&y128[0]);
// this computes Re(z) = Re(x)*Re(y) + Im(x)*Im(y)
mmtmp1 = _mm_madd_epi16(x128[0],y128[0]);
// print_ints("retmp",&mmtmp1);
// mmtmp1 contains real part of 4 consecutive outputs (32-bit)
// shift and accumulate results
mmtmp1 = _mm_srai_epi32(mmtmp1,output_shift);
mmcumul_re = _mm_add_epi32(mmcumul_re,mmtmp1);
//print_ints("re",&mmcumul_re);
// this computes Im(z) = Re(x)*Im(y) - Re(y)*Im(x)
mmtmp2 = _mm_shufflelo_epi16(y128[0],_MM_SHUFFLE(2,3,0,1));
//print_shorts("y",&mmtmp2);
mmtmp2 = _mm_shufflehi_epi16(mmtmp2,_MM_SHUFFLE(2,3,0,1));
//print_shorts("y",&mmtmp2);
mmtmp2 = _mm_sign_epi16(mmtmp2,minus_i);
// print_shorts("y",&mmtmp2);
mmtmp3 = _mm_madd_epi16(x128[0],mmtmp2);
//print_ints("imtmp",&mmtmp3);
// mmtmp3 contains imag part of 4 consecutive outputs (32-bit)
// shift and accumulate results
mmtmp3 = _mm_srai_epi32(mmtmp3,output_shift);
mmcumul_im = _mm_add_epi32(mmcumul_im,mmtmp3);
//print_ints("im",&mmcumul_im);
x128++;
y128++;
}
// this gives Re Re Im Im
mmcumul = _mm_hadd_epi32(mmcumul_re,mmcumul_im);
//print_ints("cumul1",&mmcumul);
// this gives Re Im Re Im
mmcumul = _mm_hadd_epi32(mmcumul,mmcumul);
//print_ints("cumul2",&mmcumul);
//mmcumul = _mm_srai_epi32(mmcumul,output_shift);
// extract the lower half
result = _mm_extract_epi64(mmcumul,0);
//printf("result: (%d,%d)\n",((int32_t*)&result)[0],((int32_t*)&result)[1]);
_mm_empty();
_m_empty();
return(result);
#elif defined(__arm__)
int16x4_t *x_128=(int16x4_t*)x;
int16x4_t *y_128=(int16x4_t*)y;
int32x4_t tmp_re,tmp_im;
int32x4_t tmp_re1,tmp_im1;
int32x4_t re_cumul,im_cumul;
int32x2_t re_cumul2,im_cumul2;
int32x4_t shift = vdupq_n_s32(-output_shift);
int32x2x2_t result2;
int16_t conjug[4]__attribute__((aligned(16))) = {-1,1,-1,1} ;
re_cumul = vdupq_n_s32(0);
im_cumul = vdupq_n_s32(0);
for (n=0; n<(N>>2); n++) {
tmp_re = vmull_s16(*x_128++, *y_128++);
//tmp_re = [Re(x[0])Re(y[0]) Im(x[0])Im(y[0]) Re(x[1])Re(y[1]) Im(x[1])Im(y[1])]
tmp_re1 = vmull_s16(*x_128++, *y_128++);
//tmp_re1 = [Re(x1[1])Re(x2[1]) Im(x1[1])Im(x2[1]) Re(x1[1])Re(x2[2]) Im(x1[1])Im(x2[2])]
tmp_re = vcombine_s32(vpadd_s32(vget_low_s32(tmp_re),vget_high_s32(tmp_re)),
vpadd_s32(vget_low_s32(tmp_re1),vget_high_s32(tmp_re1)));
//tmp_re = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2]) Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
tmp_im = vmull_s16(vrev32_s16(vmul_s16(*x_128++,*(int16x4_t*)conjug)),*y_128++);
//tmp_im = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
tmp_im1 = vmull_s16(vrev32_s16(vmul_s16(*x_128++,*(int16x4_t*)conjug)),*y_128++);
//tmp_im1 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
tmp_im = vcombine_s32(vpadd_s32(vget_low_s32(tmp_im),vget_high_s32(tmp_im)),
vpadd_s32(vget_low_s32(tmp_im1),vget_high_s32(tmp_im1)));
//tmp_im = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
re_cumul = vqaddq_s32(re_cumul,vqshlq_s32(tmp_re,shift));
im_cumul = vqaddq_s32(im_cumul,vqshlq_s32(tmp_im,shift));
}
re_cumul2 = vpadd_s32(vget_low_s32(re_cumul),vget_high_s32(re_cumul));
im_cumul2 = vpadd_s32(vget_low_s32(im_cumul),vget_high_s32(im_cumul));
re_cumul2 = vpadd_s32(re_cumul2,re_cumul2);
im_cumul2 = vpadd_s32(im_cumul2,im_cumul2);
result2 = vzip_s32(re_cumul2,im_cumul2);
return(vget_lane_s32(result2.val[0],0));
#endif
}
#ifdef MAIN
void print_bytes(char *s,__m128i *x)
{
......
......@@ -597,6 +597,12 @@ int16_t dB_fixed_times10(uint32_t x)
return dB_power;
}
uint8_t dB_fixed64(uint64_t x) {
if ((x<(((uint64_t)1)<<32))) return(dB_fixed((uint32_t)x));
else return(4*dB_table[255]+dB_fixed((uint32_t)(x>>32)));
}
int8_t dB_fixed(uint32_t x)
{
......
......@@ -336,6 +336,7 @@ Compensate the phase rotation of the RF. WARNING: This function is currently unu
int8_t dB_fixed(uint32_t x);
uint8_t dB_fixed64(uint64_t x);
int8_t dB_fixed2(uint32_t x,uint32_t y);
......@@ -349,6 +350,11 @@ int32_t dot_product(int16_t *x,
uint32_t N, //must be a multiple of 8
uint8_t output_shift);
int64_t dot_product64(int16_t *x,
int16_t *y,
uint32_t N, //must be a multiple of 8
uint8_t output_shift);
void dft12(int16_t *x,int16_t *y);
void dft24(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft36(int16_t *x,int16_t *y,uint8_t scale_flag);
......
......@@ -464,6 +464,8 @@ typedef struct RU_t_s{
pthread_t ru_stats_thread;
/// OTA synchronization signal
int16_t *dmrssync;
/// OTA synchronization correlator output
uint64_t *dmrs_corr;
} RU_t;
......
......@@ -1880,19 +1880,25 @@ void *ru_thread_synch(void *arg) {
ru->rx_offset = ru_sync_time(ru,
&peak_val,
&avg);
LOG_I(PHY,"RU synch cnt %d: %d, val %llu\n",cnt,ru->rx_offset,(unsigned long long)peak_val);
LOG_I(PHY,"RU synch cnt %d: %d, val %llu (%d dB,%d dB)\n",cnt,ru->rx_offset,(unsigned long long)peak_val,dB_fixed64(peak_val),dB_fixed64(avg));
cnt++;
if (ru->rx_offset >= 0 && cnt>100) {
if (ru->rx_offset >= 0 && cnt>50) {
LOG_I(PHY,"Estimated peak_val %d dB, avg %d => timing offset %llu\n",dB_fixed(peak_val),dB_fixed(avg),(unsigned long long int)ru->rx_offset);
ru->in_synch = 1;
/*
LOG_M("ru_sync_rx.m","rurx",&ru->common.rxdata[0][0],LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,1);
exit(-1);
LOG_M("ru_sync_corr.m","sync_corr",ru->dmrs_corr,LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,6);
LOG_M("ru_dmrs.m","rudmrs",&ru->dmrssync[0],fp->ofdm_symbol_size,1,1);
exit(-1);*/
} // sync_pos > 0
else //AssertFatal(cnt<1000,"Cannot find synch reference\n");
{
if (cnt>200) {
LOG_M("ru_sync_rx.m","rurx",&ru->common.rxdata[0][0],LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,1);
LOG_M("ru_sync_corr.m","sync_corr",ru->dmrs_corr,LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,6);
LOG_M("ru_dmrs.m","rudmrs",&ru->dmrssync[0],fp->ofdm_symbol_size,1,1);
exit(-1);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment