Commit 4c83082e authored by magounak's avatar magounak

Merge branch 'ru_rau_enhancement-external-timing' of...

Merge branch 'ru_rau_enhancement-external-timing' of https://gitlab.eurecom.fr/oai/openairinterface5g into ru_rau_enhancement-external-timing
parents d24be4d6 f43149e1
......@@ -483,14 +483,17 @@ int ru_sync_time_init(RU_t *ru) // LTE_UE_COMMON *common_vars
*/
int32_t dmrs[ru->frame_parms.ofdm_symbol_size*14] __attribute__((aligned(32)));
int32_t *dmrsp[2] = {&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
// int32_t *dmrsp[2] = {&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
int32_t *dmrsp[2] = {&dmrs[0],NULL};
generate_ul_ref_sigs();
ru->dmrssync = (int16_t*)malloc16_clear(ru->frame_parms.ofdm_symbol_size*2*sizeof(int16_t));
ru->dmrs_corr = (uint64_t*)malloc16_clear(ru->frame_parms.samples_per_tti*10*sizeof(uint64_t));
generate_drs_pusch(NULL,NULL,
&ru->frame_parms,
dmrsp,/*(int32_t**)dmrsp,*/
dmrsp,
0,
AMP,
0,
......@@ -500,28 +503,28 @@ int ru_sync_time_init(RU_t *ru) // LTE_UE_COMMON *common_vars
switch (ru->frame_parms.N_RB_DL) {
case 6:
idft128(dmrs, /// complex input
idft128((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
case 25:
idft512(dmrs,
idft512((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
case 50:
idft1024((int16_t*)dmrsp,/*dmrs,*/
idft1024((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
case 75:
idft1536(dmrs,
idft1536((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync,
1); /// complex output
break;
case 100:
idft2048(dmrs,
idft2048((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
ru->dmrssync, /// complex output
1);
break;
......@@ -537,6 +540,7 @@ void ru_sync_time_free(RU_t *ru) {
AssertFatal(ru->dmrssync!=NULL,"ru->dmrssync is NULL\n");
free(ru->dmrssync);
if (ru->dmrs_corr) free(ru->dmrs_corr);
}
//#define DEBUG_PHY
......@@ -631,6 +635,12 @@ int lte_sync_time_eNB(int32_t **rxdata, ///rx data in time domain
}
static inline int64_t abs64(int64_t x)
{
return (((int64_t)((int32_t*)&x)[0])*((int64_t)((int32_t*)&x)[0]) + ((int64_t)
((int32_t*)&x)[1])*((int64_t)((int32_t*)&x)[1]));
}
int ru_sync_time(RU_t *ru,
int64_t *lev,
int64_t *avg)
......@@ -653,38 +663,42 @@ int ru_sync_time(RU_t *ru,
int32_t magtmp0,maxlev0=0;
int maxpos0=0;
int64_t avg0=0;
int32_t result;
int64_t result;
int64_t dmrs_corr;
int maxval=0;
for (int i=0;i<2*(frame_parms->ofdm_symbol_size);i++) {
maxval = max(maxval,ru->dmrssync[i]);
maxval = max(maxval,-ru->dmrssync[i]);
}
int shift = log2_approx(maxval);
for (int n=0; n<length; n+=4) {
tmp0 = 0;
dmrs_corr = 0;
//calculate dot product of primary_synch0_time and rxdata[ar][n] (ar=0..nb_ant_rx) and store the sum in temp[n];
for (int ar=0; ar<ru->nb_rx; ar++) {
result = dot_product(ru->dmrssync,
result = dot_product64(ru->dmrssync,
(int16_t*) &ru->common.rxdata[ar][n],
frame_parms->ofdm_symbol_size,
11);
((int16_t*)&tmp0)[0] += ((int16_t*) &result)[0];
((int16_t*)&tmp0)[1] += ((int16_t*) &result)[1];
shift);
dmrs_corr += abs64(result);
}
if (ru->dmrs_corr != NULL) ru->dmrs_corr[n] = dmrs_corr;
// tmpi holds <synchi,rx0>+<synci,rx1>+...+<synchi,rx_{nbrx-1}>
magtmp0 = abs32(tmp0);
// this does max |tmp0(n)|^2 and argmax |tmp0(n)|^2
if (magtmp0>maxlev0) { maxlev0 = magtmp0; maxpos0 = n; }
avg0 += magtmp0;
if (dmrs_corr>maxlev0) { maxlev0 = dmrs_corr; maxpos0 = n; }
avg0 += dmrs_corr;
}
avg0/=(length/4);
int dmrsoffset = 3*(frame_parms->ofdm_symbol_size+frame_parms->nb_prefix_samples) + frame_parms->nb_prefix_samples0;
int dmrsoffset = frame_parms->samples_per_tti + (3*frame_parms->ofdm_symbol_size)+(2*frame_parms->nb_prefix_samples) + frame_parms->nb_prefix_samples0;
if ((int64_t)maxlev0 > (5*avg0)) {*lev = maxlev0; *avg=avg0; return((length+maxpos0-dmrsoffset)%length);}
if ((int64_t)maxlev0 > (10*avg0)) {*lev = maxlev0; *avg=avg0; return((length+maxpos0-dmrsoffset)%length);}
return(-1);
......
......@@ -159,6 +159,127 @@ int32_t dot_product(int16_t *x,
}
int64_t dot_product64(int16_t *x,
int16_t *y,
uint32_t N, //must be a multiple of 8
uint8_t output_shift)
{
uint32_t n;
#if defined(__x86_64__) || defined(__i386__)
__m128i *x128,*y128,mmtmp1,mmtmp2,mmtmp3,mmcumul,mmcumul_re,mmcumul_im;
__m128i minus_i = _mm_set_epi16(-1,1,-1,1,-1,1,-1,1);
int32_t result;
x128 = (__m128i*) x;
y128 = (__m128i*) y;
mmcumul_re = _mm_setzero_si128();
mmcumul_im = _mm_setzero_si128();
for (n=0; n<(N>>2); n++) {
// printf("n=%d, x128=%p, y128=%p\n",n,x128,y128);
// print_shorts("x",&x128[0]);
// print_shorts("y",&y128[0]);
// this computes Re(z) = Re(x)*Re(y) + Im(x)*Im(y)
mmtmp1 = _mm_madd_epi16(x128[0],y128[0]);
// print_ints("retmp",&mmtmp1);
// mmtmp1 contains real part of 4 consecutive outputs (32-bit)
// shift and accumulate results
mmtmp1 = _mm_srai_epi32(mmtmp1,output_shift);
mmcumul_re = _mm_add_epi32(mmcumul_re,mmtmp1);
//print_ints("re",&mmcumul_re);
// this computes Im(z) = Re(x)*Im(y) - Re(y)*Im(x)
mmtmp2 = _mm_shufflelo_epi16(y128[0],_MM_SHUFFLE(2,3,0,1));
//print_shorts("y",&mmtmp2);
mmtmp2 = _mm_shufflehi_epi16(mmtmp2,_MM_SHUFFLE(2,3,0,1));
//print_shorts("y",&mmtmp2);
mmtmp2 = _mm_sign_epi16(mmtmp2,minus_i);
// print_shorts("y",&mmtmp2);
mmtmp3 = _mm_madd_epi16(x128[0],mmtmp2);
//print_ints("imtmp",&mmtmp3);
// mmtmp3 contains imag part of 4 consecutive outputs (32-bit)
// shift and accumulate results
mmtmp3 = _mm_srai_epi32(mmtmp3,output_shift);
mmcumul_im = _mm_add_epi32(mmcumul_im,mmtmp3);
//print_ints("im",&mmcumul_im);
x128++;
y128++;
}
// this gives Re Re Im Im
mmcumul = _mm_hadd_epi32(mmcumul_re,mmcumul_im);
//print_ints("cumul1",&mmcumul);
// this gives Re Im Re Im
mmcumul = _mm_hadd_epi32(mmcumul,mmcumul);
//print_ints("cumul2",&mmcumul);
//mmcumul = _mm_srai_epi32(mmcumul,output_shift);
// extract the lower half
result = _mm_extract_epi64(mmcumul,0);
//printf("result: (%d,%d)\n",((int32_t*)&result)[0],((int32_t*)&result)[1]);
_mm_empty();
_m_empty();
return(result);
#elif defined(__arm__)
int16x4_t *x_128=(int16x4_t*)x;
int16x4_t *y_128=(int16x4_t*)y;
int32x4_t tmp_re,tmp_im;
int32x4_t tmp_re1,tmp_im1;
int32x4_t re_cumul,im_cumul;
int32x2_t re_cumul2,im_cumul2;
int32x4_t shift = vdupq_n_s32(-output_shift);
int32x2x2_t result2;
int16_t conjug[4]__attribute__((aligned(16))) = {-1,1,-1,1} ;
re_cumul = vdupq_n_s32(0);
im_cumul = vdupq_n_s32(0);
for (n=0; n<(N>>2); n++) {
tmp_re = vmull_s16(*x_128++, *y_128++);
//tmp_re = [Re(x[0])Re(y[0]) Im(x[0])Im(y[0]) Re(x[1])Re(y[1]) Im(x[1])Im(y[1])]
tmp_re1 = vmull_s16(*x_128++, *y_128++);
//tmp_re1 = [Re(x1[1])Re(x2[1]) Im(x1[1])Im(x2[1]) Re(x1[1])Re(x2[2]) Im(x1[1])Im(x2[2])]
tmp_re = vcombine_s32(vpadd_s32(vget_low_s32(tmp_re),vget_high_s32(tmp_re)),
vpadd_s32(vget_low_s32(tmp_re1),vget_high_s32(tmp_re1)));
//tmp_re = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2]) Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
tmp_im = vmull_s16(vrev32_s16(vmul_s16(*x_128++,*(int16x4_t*)conjug)),*y_128++);
//tmp_im = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
tmp_im1 = vmull_s16(vrev32_s16(vmul_s16(*x_128++,*(int16x4_t*)conjug)),*y_128++);
//tmp_im1 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
tmp_im = vcombine_s32(vpadd_s32(vget_low_s32(tmp_im),vget_high_s32(tmp_im)),
vpadd_s32(vget_low_s32(tmp_im1),vget_high_s32(tmp_im1)));
//tmp_im = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
re_cumul = vqaddq_s32(re_cumul,vqshlq_s32(tmp_re,shift));
im_cumul = vqaddq_s32(im_cumul,vqshlq_s32(tmp_im,shift));
}
re_cumul2 = vpadd_s32(vget_low_s32(re_cumul),vget_high_s32(re_cumul));
im_cumul2 = vpadd_s32(vget_low_s32(im_cumul),vget_high_s32(im_cumul));
re_cumul2 = vpadd_s32(re_cumul2,re_cumul2);
im_cumul2 = vpadd_s32(im_cumul2,im_cumul2);
result2 = vzip_s32(re_cumul2,im_cumul2);
return(vget_lane_s32(result2.val[0],0));
#endif
}
#ifdef MAIN
void print_bytes(char *s,__m128i *x)
{
......
......@@ -597,6 +597,12 @@ int16_t dB_fixed_times10(uint32_t x)
return dB_power;
}
uint8_t dB_fixed64(uint64_t x) {
if ((x<(((uint64_t)1)<<32))) return(dB_fixed((uint32_t)x));
else return(4*dB_table[255]+dB_fixed((uint32_t)(x>>32)));
}
int8_t dB_fixed(uint32_t x)
{
......
......@@ -336,6 +336,7 @@ Compensate the phase rotation of the RF. WARNING: This function is currently unu
int8_t dB_fixed(uint32_t x);
uint8_t dB_fixed64(uint64_t x);
int8_t dB_fixed2(uint32_t x,uint32_t y);
......@@ -349,6 +350,11 @@ int32_t dot_product(int16_t *x,
uint32_t N, //must be a multiple of 8
uint8_t output_shift);
int64_t dot_product64(int16_t *x,
int16_t *y,
uint32_t N, //must be a multiple of 8
uint8_t output_shift);
void dft12(int16_t *x,int16_t *y);
void dft24(int16_t *x,int16_t *y,uint8_t scale_flag);
void dft36(int16_t *x,int16_t *y,uint8_t scale_flag);
......
......@@ -464,6 +464,8 @@ typedef struct RU_t_s{
pthread_t ru_stats_thread;
/// OTA synchronization signal
int16_t *dmrssync;
/// OTA synchronization correlator output
uint64_t *dmrs_corr;
} RU_t;
......
......@@ -97,44 +97,26 @@ void feptx0(RU_t *ru,int slot) {
*/
if (ru->generate_dmrs_sync == 1 && slot == 0 && subframe == 1 && aa==0) {
int32_t dmrs[ru->frame_parms.ofdm_symbol_size*14] __attribute__((aligned(32)));
int32_t *dmrsp[2] = {&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
//int32_t dmrs[ru->frame_parms.ofdm_symbol_size*14] __attribute__((aligned(32)));
//int32_t *dmrsp[2] ={dmrs,NULL}; //{&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
//generate_ul_ref_sigs();
//ru->dmrssync = (int16_t*)malloc16_clear(ru->frame_parms.ofdm_symbol_size*2*sizeof(int16_t));
generate_drs_pusch((PHY_VARS_UE *)NULL,
(UE_rxtx_proc_t*)NULL,
fp,
dmrsp,//ru->common.txdataF_BF,
ru->common.txdataF_BF,
0,
AMP,
1,
0,
0,
fp->N_RB_DL,
aa);
idft1024((int16_t*)dmrsp[0],
(int16_t*)&ru->common.txdata[aa][slot_offset],
1);
/*normal_prefix_mod((int16_t*)dmrsp[0],
(int*)&ru->common.txdata[aa][slot_offset],
1,
fp);
*/
normal_prefix_mod(&ru->common.txdataF_BF[aa][slot*slot_sizeF],
(int*)&ru->common.txdata[aa][slot_offset],
3,
fp);
}
else {
normal_prefix_mod(&ru->common.txdataF_BF[aa][slot*slot_sizeF],
(int*)&ru->common.txdata[aa][slot_offset],
7,
fp);
}
}
/*
len = fp->samples_per_tti>>1;
......
......@@ -966,8 +966,6 @@ extern "C" {
#endif
extern "C" {
* \param openair0_cfg RF frontend parameters set by application
*/
int device_init(openair0_device *device, openair0_config_t *openair0_cfg) {
LOG_D(PHY, "openair0_cfg[0].sdr_addrs == '%s'\n", openair0_cfg[0].sdr_addrs);
LOG_D(PHY, "openair0_cfg[0].clock_source == '%d'\n", openair0_cfg[0].clock_source);
......@@ -1103,11 +1101,14 @@ extern "C" {
s->usrp = uhd::usrp::multi_usrp::make(args);
// lock mboard clocks
if (openair0_cfg[0].clock_source == internal)
if (openair0_cfg[0].clock_source == internal) {
s->usrp->set_clock_source("internal");
else
printf("Setting clock source to internal\n");
}
else {
s->usrp->set_clock_source("external");
printf("Setting clock source to external\n");
}
if (device->type==USRP_X300_DEV) {
openair0_cfg[0].rx_gain_calib_table = calib_table_x310;
#if defined(USRP_REC_PLAY)
......
......@@ -506,7 +506,7 @@ int wakeup_txfh(eNB_rxtx_proc_t *proc,PHY_VARS_eNB *eNB) {
fp = &eNB->RU_list[ru_id]->frame_parms;
if ((fp->frame_type == TDD) && (subframe_select(fp,proc->subframe_tx)==SF_UL)) continue;
// skip the RUs that are not synced
if (eNB->RU_list[ru_id]->state == RU_SYNC) { LOG_I(PHY,"wakeup_txfh: eNB %d : Skipping ru %d\n",eNB->Mod_id,ru_id); continue; }
if (eNB->RU_list[ru_id]->state == RU_SYNC) { LOG_D(PHY,"wakeup_txfh: eNB %d : Skipping ru %d\n",eNB->Mod_id,ru_id); continue; }
if(ru_proc == NULL)
return(0);
......
......@@ -720,7 +720,10 @@ void tx_rf(RU_t *ru) {
int siglen=fp->samples_per_tti,flags=1;
if (SF_type == SF_S) {
siglen = fp->dl_symbols_in_S_subframe*(fp->ofdm_symbol_size+fp->nb_prefix_samples0);
int txsymb = fp->dl_symbols_in_S_subframe+(ru->is_slave==0 ? 1 : 0);
AssertFatal(txsymb>0,"illegal txsymb %d\n",txsymb);
siglen = fp->nb_prefix_samples0 + (txsymb*fp->ofdm_symbol_size) + (txsymb-1)*fp->nb_prefix_samples;
//siglen = fp->dl_symbols_in_S_subframe*(fp->ofdm_symbol_size+fp->nb_prefix_samples0);
flags=3; // end of burst
}
if ((fp->frame_type == TDD) &&
......@@ -797,10 +800,10 @@ void tx_rf(RU_t *ru) {
ru->nb_tx,
flags);
ru->south_out_cnt++;
//int se = dB_fixed(signal_energy(txp[0],siglen+sf_extension));
int se = dB_fixed(signal_energy(txp[0],siglen+sf_extension));
//LOG_D(PHY,"[TXPATH] RU %d tx_rf (en %d,len %d), writing to TS %llu, frame %d, unwrapped_frame %d, subframe %d\n",ru->idx,se,siglen+sf_extension,
// (long long unsigned int)proc->timestamp_tx,proc->frame_tx,proc->frame_tx_unwrap,proc->subframe_tx);
if (SF_type == SF_S) LOG_D(PHY,"[TXPATH] RU %d tx_rf (en %d,len %d), writing to TS %llu, frame %d, unwrapped_frame %d, subframe %d\n",ru->idx,se,siglen+sf_extension,
(long long unsigned int)proc->timestamp_tx,proc->frame_tx,proc->frame_tx_unwrap,proc->subframe_tx);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME( VCD_SIGNAL_DUMPER_FUNCTIONS_TRX_WRITE, 0 );
......@@ -1664,7 +1667,7 @@ static void* ru_thread( void* param ) {
if ((ru->is_slave == 1) && (ru->if_south == LOCAL_RF)) do_ru_synch(ru);
LOG_I(PHY,"Starting steady-state operation\n");
LOG_D(PHY,"Starting steady-state operation\n");
// This is a forever while loop, it loops over subframes which are scheduled by incoming samples from HW devices
while (ru->state == RU_RUN) {
......@@ -1880,19 +1883,25 @@ void *ru_thread_synch(void *arg) {
ru->rx_offset = ru_sync_time(ru,
&peak_val,
&avg);
LOG_I(PHY,"RU synch cnt %d: %d, val %llu\n",cnt,ru->rx_offset,(unsigned long long)peak_val);
LOG_I(PHY,"RU synch cnt %d: %d, val %llu (%d dB,%d dB)\n",cnt,ru->rx_offset,(unsigned long long)peak_val,dB_fixed64(peak_val),dB_fixed64(avg));
cnt++;
if (ru->rx_offset >= 0 && cnt>100) {
if (ru->rx_offset >= 0 && cnt>50) {
LOG_I(PHY,"Estimated peak_val %d dB, avg %d => timing offset %llu\n",dB_fixed(peak_val),dB_fixed(avg),(unsigned long long int)ru->rx_offset);
ru->in_synch = 1;
/*
LOG_M("ru_sync_rx.m","rurx",&ru->common.rxdata[0][0],LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,1);
exit(-1);
LOG_M("ru_sync_corr.m","sync_corr",ru->dmrs_corr,LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,6);
LOG_M("ru_dmrs.m","rudmrs",&ru->dmrssync[0],fp->ofdm_symbol_size,1,1);
exit(-1);*/
} // sync_pos > 0
else //AssertFatal(cnt<1000,"Cannot find synch reference\n");
{
if (cnt>200) {
LOG_M("ru_sync_rx.m","rurx",&ru->common.rxdata[0][0],LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,1);
LOG_M("ru_sync_corr.m","sync_corr",ru->dmrs_corr,LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,6);
LOG_M("ru_dmrs.m","rudmrs",&ru->dmrssync[0],fp->ofdm_symbol_size,1,1);
exit(-1);
}
}
......@@ -2639,11 +2648,9 @@ void init_RU(char *rf_config_file, clock_source_t clock_source,clock_source_t ti
ru->ts_offset = 0;
if (ru->is_slave == 1) {
ru->in_synch = 0;
ru->dmrssync = 0;
}
else {
ru->in_synch = 1;
ru->dmrssync = 0;
ru->generate_dmrs_sync=send_dmrssync;
}
ru->cmd = EMPTY;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment