OTA synchronization debugging (aquila)

de284e9e · Raymond Knoppp · 22eccb1c · de284e9e · de284e9e · de284e9e
Commit de284e9e authored Oct 22, 2018 by Raymond Knoppp
6 changed files
--- a/openair1/PHY/LTE_ESTIMATION/lte_sync_time.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_sync_time.c
@@ -483,14 +483,17 @@ int ru_sync_time_init(RU_t *ru)   // LTE_UE_COMMON *common_vars
  */

  int32_t dmrs[ru->frame_parms.ofdm_symbol_size*14] __attribute__((aligned(32)));
-  int32_t *dmrsp[2] = {&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
+//  int32_t *dmrsp[2] = {&dmrs[(3-ru->frame_parms.Ncp)*ru->frame_parms.ofdm_symbol_size],NULL};
+  int32_t *dmrsp[2] = {&dmrs[0],NULL};

  generate_ul_ref_sigs();
 
  ru->dmrssync = (int16_t*)malloc16_clear(ru->frame_parms.ofdm_symbol_size*2*sizeof(int16_t)); 
+  ru->dmrs_corr = (uint64_t*)malloc16_clear(ru->frame_parms.samples_per_tti*10*sizeof(uint64_t));
+
  generate_drs_pusch(NULL,NULL,
 		     &ru->frame_parms,
-		     dmrsp,/*(int32_t**)dmrsp,*/
+		     dmrsp,
 		     0,
 		     AMP,
 		     0,
@@ -500,28 +503,28 @@ int ru_sync_time_init(RU_t *ru)   // LTE_UE_COMMON *common_vars

  switch (ru->frame_parms.N_RB_DL) {
  case 6:
-    idft128(dmrs,          /// complex input
+    idft128((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
 	    ru->dmrssync, /// complex output
 	    1);
    break;
  case 25:
-    idft512(dmrs,
+    idft512((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
 	    ru->dmrssync, /// complex output
 	    1);
    break;
  case 50:
-    idft1024((int16_t*)dmrsp,/*dmrs,*/
+    idft1024((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
 	    ru->dmrssync, /// complex output
 	    1);
    break;
-    
+     
  case 75:
-    idft1536(dmrs,
+    idft1536((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
 	     ru->dmrssync,
 	     1); /// complex output
    break;
  case 100:
-    idft2048(dmrs,
+    idft2048((int16_t*)(&dmrsp[0][3*ru->frame_parms.ofdm_symbol_size]),
 	     ru->dmrssync, /// complex output
 	     1);
    break;
@@ -537,6 +540,7 @@ void ru_sync_time_free(RU_t *ru) {

  AssertFatal(ru->dmrssync!=NULL,"ru->dmrssync is NULL\n");
  free(ru->dmrssync);
+  if (ru->dmrs_corr) free(ru->dmrs_corr);
 }

 //#define DEBUG_PHY
@@ -631,6 +635,12 @@ int lte_sync_time_eNB(int32_t **rxdata, ///rx data in time domain
 }


+static inline int64_t abs64(int64_t x)
+{
+  return (((int64_t)((int32_t*)&x)[0])*((int64_t)((int32_t*)&x)[0]) + ((int64_t)
+((int32_t*)&x)[1])*((int64_t)((int32_t*)&x)[1]));
+}
+
 int ru_sync_time(RU_t *ru,
 		 int64_t *lev,
 		 int64_t *avg)
@@ -653,38 +663,42 @@ int ru_sync_time(RU_t *ru,
  int32_t magtmp0,maxlev0=0;
  int     maxpos0=0;
  int64_t avg0=0;
-  int32_t result;
+  int64_t result;
+  int64_t dmrs_corr;

+  int maxval=0;
+  for (int i=0;i<2*(frame_parms->ofdm_symbol_size);i++) {
+    maxval = max(maxval,ru->dmrssync[i]);
+    maxval = max(maxval,-ru->dmrssync[i]);
+  }
+  int shift = log2_approx(maxval);

  for (int n=0; n<length; n+=4) {

-    tmp0 = 0;
+    dmrs_corr = 0;

    //calculate dot product of primary_synch0_time and rxdata[ar][n] (ar=0..nb_ant_rx) and store the sum in temp[n];
    for (int ar=0; ar<ru->nb_rx; ar++) {
      
-      result  = dot_product(ru->dmrssync,
-			    (int16_t*) &ru->common.rxdata[ar][n],
-			    frame_parms->ofdm_symbol_size,
-			    11);     
-      ((int16_t*)&tmp0)[0] += ((int16_t*) &result)[0];
-      ((int16_t*)&tmp0)[1] += ((int16_t*) &result)[1];
+      result  = dot_product64(ru->dmrssync,
+			      (int16_t*) &ru->common.rxdata[ar][n],
+			      frame_parms->ofdm_symbol_size,
+			      shift);     
+      dmrs_corr += abs64(result);
    }
+    if (ru->dmrs_corr != NULL) ru->dmrs_corr[n] = dmrs_corr;

    // tmpi holds <synchi,rx0>+<synci,rx1>+...+<synchi,rx_{nbrx-1}>

-    magtmp0 = abs32(tmp0);

-    // this does max |tmp0(n)|^2  and argmax |tmp0(n)|^2 
-      
-    if (magtmp0>maxlev0) { maxlev0 = magtmp0; maxpos0 = n; }
-    avg0 += magtmp0;
+    if (dmrs_corr>maxlev0) { maxlev0 = dmrs_corr; maxpos0 = n; }
+    avg0 += dmrs_corr;
  }
  avg0/=(length/4);

-  int dmrsoffset = 3*(frame_parms->ofdm_symbol_size+frame_parms->nb_prefix_samples) + frame_parms->nb_prefix_samples0;
+  int dmrsoffset = frame_parms->samples_per_tti + (3*frame_parms->ofdm_symbol_size)+(2*frame_parms->nb_prefix_samples) + frame_parms->nb_prefix_samples0;
  
-  if ((int64_t)maxlev0 > (5*avg0)) {*lev = maxlev0; *avg=avg0; return((length+maxpos0-dmrsoffset)%length);}
+  if ((int64_t)maxlev0 > (10*avg0)) {*lev = maxlev0; *avg=avg0; return((length+maxpos0-dmrsoffset)%length);}

  return(-1);


--- a/openair1/PHY/TOOLS/cdot_prod.c
+++ b/openair1/PHY/TOOLS/cdot_prod.c
@@ -159,6 +159,127 @@ int32_t dot_product(int16_t *x,
 }


+int64_t dot_product64(int16_t *x,
+                      int16_t *y,
+                      uint32_t N, //must be a multiple of 8
+                      uint8_t output_shift)
+{
+
+  uint32_t n;
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *x128,*y128,mmtmp1,mmtmp2,mmtmp3,mmcumul,mmcumul_re,mmcumul_im;
+  __m128i minus_i = _mm_set_epi16(-1,1,-1,1,-1,1,-1,1);
+  int32_t result;
+
+  x128 = (__m128i*) x;
+  y128 = (__m128i*) y;
+
+  mmcumul_re = _mm_setzero_si128();
+  mmcumul_im = _mm_setzero_si128();
+
+  for (n=0; n<(N>>2); n++) {
+
+//    printf("n=%d, x128=%p, y128=%p\n",n,x128,y128);
+  //      print_shorts("x",&x128[0]);
+    //    print_shorts("y",&y128[0]);
+
+    // this computes Re(z) = Re(x)*Re(y) + Im(x)*Im(y)
+    mmtmp1 = _mm_madd_epi16(x128[0],y128[0]);
+      //  print_ints("retmp",&mmtmp1);
+    // mmtmp1 contains real part of 4 consecutive outputs (32-bit)
+
+    // shift and accumulate results
+    mmtmp1 = _mm_srai_epi32(mmtmp1,output_shift);
+    mmcumul_re = _mm_add_epi32(mmcumul_re,mmtmp1);
+        //print_ints("re",&mmcumul_re);
+
+
+    // this computes Im(z) = Re(x)*Im(y) - Re(y)*Im(x)
+    mmtmp2 = _mm_shufflelo_epi16(y128[0],_MM_SHUFFLE(2,3,0,1));
+        //print_shorts("y",&mmtmp2);
+    mmtmp2 = _mm_shufflehi_epi16(mmtmp2,_MM_SHUFFLE(2,3,0,1));
+        //print_shorts("y",&mmtmp2);
+    mmtmp2 = _mm_sign_epi16(mmtmp2,minus_i);
+          //  print_shorts("y",&mmtmp2);
+
+    mmtmp3 = _mm_madd_epi16(x128[0],mmtmp2);
+            //print_ints("imtmp",&mmtmp3);
+    // mmtmp3 contains imag part of 4 consecutive outputs (32-bit)
+
+    // shift and accumulate results
+    mmtmp3 = _mm_srai_epi32(mmtmp3,output_shift);
+    mmcumul_im = _mm_add_epi32(mmcumul_im,mmtmp3);
+        //print_ints("im",&mmcumul_im);
+
+    x128++;
+    y128++;
+  }
+
+  // this gives Re Re Im Im
+  mmcumul = _mm_hadd_epi32(mmcumul_re,mmcumul_im);
+    //print_ints("cumul1",&mmcumul);
+
+  // this gives Re Im Re Im
+  mmcumul = _mm_hadd_epi32(mmcumul,mmcumul);
+
+    //print_ints("cumul2",&mmcumul);
+
+
+  //mmcumul = _mm_srai_epi32(mmcumul,output_shift);
+  // extract the lower half
+  result = _mm_extract_epi64(mmcumul,0);
+  //printf("result: (%d,%d)\n",((int32_t*)&result)[0],((int32_t*)&result)[1]); 
+  _mm_empty();
+  _m_empty();
+ 
+  return(result);
+#elif defined(__arm__)
+  int16x4_t *x_128=(int16x4_t*)x;
+  int16x4_t *y_128=(int16x4_t*)y;
+  int32x4_t tmp_re,tmp_im;
+  int32x4_t tmp_re1,tmp_im1;
+  int32x4_t re_cumul,im_cumul;
+  int32x2_t re_cumul2,im_cumul2;
+  int32x4_t shift = vdupq_n_s32(-output_shift); 
+  int32x2x2_t result2;
+  int16_t conjug[4]__attribute__((aligned(16))) = {-1,1,-1,1} ;
+
+  re_cumul = vdupq_n_s32(0);
+  im_cumul = vdupq_n_s32(0); 
+
+  for (n=0; n<(N>>2); n++) {
+
+    tmp_re  = vmull_s16(*x_128++, *y_128++);
+    //tmp_re = [Re(x[0])Re(y[0]) Im(x[0])Im(y[0]) Re(x[1])Re(y[1]) Im(x[1])Im(y[1])] 
+    tmp_re1 = vmull_s16(*x_128++, *y_128++);
+    //tmp_re1 = [Re(x1[1])Re(x2[1]) Im(x1[1])Im(x2[1]) Re(x1[1])Re(x2[2]) Im(x1[1])Im(x2[2])] 
+    tmp_re  = vcombine_s32(vpadd_s32(vget_low_s32(tmp_re),vget_high_s32(tmp_re)),
+                           vpadd_s32(vget_low_s32(tmp_re1),vget_high_s32(tmp_re1)));
+    //tmp_re = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2]) Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] 
+
+    tmp_im  = vmull_s16(vrev32_s16(vmul_s16(*x_128++,*(int16x4_t*)conjug)),*y_128++);
+    //tmp_im = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
+    tmp_im1 = vmull_s16(vrev32_s16(vmul_s16(*x_128++,*(int16x4_t*)conjug)),*y_128++);
+    //tmp_im1 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
+    tmp_im  = vcombine_s32(vpadd_s32(vget_low_s32(tmp_im),vget_high_s32(tmp_im)),
+                           vpadd_s32(vget_low_s32(tmp_im1),vget_high_s32(tmp_im1)));
+    //tmp_im = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
+
+    re_cumul = vqaddq_s32(re_cumul,vqshlq_s32(tmp_re,shift));
+    im_cumul = vqaddq_s32(im_cumul,vqshlq_s32(tmp_im,shift));
+  }
+  
+  re_cumul2 = vpadd_s32(vget_low_s32(re_cumul),vget_high_s32(re_cumul));
+  im_cumul2 = vpadd_s32(vget_low_s32(im_cumul),vget_high_s32(im_cumul));
+  re_cumul2 = vpadd_s32(re_cumul2,re_cumul2);
+  im_cumul2 = vpadd_s32(im_cumul2,im_cumul2);
+  result2   = vzip_s32(re_cumul2,im_cumul2);
+  return(vget_lane_s32(result2.val[0],0));
+#endif
+}
+
+
 #ifdef MAIN
 void print_bytes(char *s,__m128i *x)
 {

--- a/openair1/PHY/TOOLS/dB_routines.c
+++ b/openair1/PHY/TOOLS/dB_routines.c
@@ -597,6 +597,12 @@ int16_t dB_fixed_times10(uint32_t x)
  return dB_power;
 }

+uint8_t dB_fixed64(uint64_t x) {
+
+  if ((x<(((uint64_t)1)<<32))) return(dB_fixed((uint32_t)x));
+  else                         return(4*dB_table[255]+dB_fixed((uint32_t)(x>>32)));
+
+}
 int8_t dB_fixed(uint32_t x)
 {


--- a/openair1/PHY/TOOLS/tools_defs.h
+++ b/openair1/PHY/TOOLS/tools_defs.h
@@ -336,6 +336,7 @@ Compensate the phase rotation of the RF. WARNING: This function is currently unu


 int8_t dB_fixed(uint32_t x);
+uint8_t dB_fixed64(uint64_t x);

 int8_t dB_fixed2(uint32_t x,uint32_t y);

@@ -349,6 +350,11 @@ int32_t dot_product(int16_t *x,
                    uint32_t N, //must be a multiple of 8
                    uint8_t output_shift);

+int64_t dot_product64(int16_t *x,
+                      int16_t *y,
+                      uint32_t N, //must be a multiple of 8
+                      uint8_t output_shift);
+
 void dft12(int16_t *x,int16_t *y);
 void dft24(int16_t *x,int16_t *y,uint8_t scale_flag);
 void dft36(int16_t *x,int16_t *y,uint8_t scale_flag);

--- a/openair1/PHY/defs_eNB.h
+++ b/openair1/PHY/defs_eNB.h
@@ -464,6 +464,8 @@ typedef struct RU_t_s{
  pthread_t            ru_stats_thread;
  /// OTA synchronization signal
  int16_t *dmrssync;
+  /// OTA synchronization correlator output
+  uint64_t *dmrs_corr;
 } RU_t;



--- a/targets/RT/USER/lte-ru.c
+++ b/targets/RT/USER/lte-ru.c
@@ -1880,19 +1880,25 @@ void *ru_thread_synch(void *arg) {
      ru->rx_offset = ru_sync_time(ru,
 				   &peak_val,
 				   &avg);
-      LOG_I(PHY,"RU synch cnt %d: %d, val %llu\n",cnt,ru->rx_offset,(unsigned long long)peak_val);
+      LOG_I(PHY,"RU synch cnt %d: %d, val %llu (%d dB,%d dB)\n",cnt,ru->rx_offset,(unsigned long long)peak_val,dB_fixed64(peak_val),dB_fixed64(avg));
      cnt++;
-      if (ru->rx_offset >= 0 && cnt>100) {
+      if (ru->rx_offset >= 0 && cnt>50) {

 	LOG_I(PHY,"Estimated peak_val %d dB, avg %d => timing offset %llu\n",dB_fixed(peak_val),dB_fixed(avg),(unsigned long long int)ru->rx_offset);
 	ru->in_synch = 1;
+/*
        LOG_M("ru_sync_rx.m","rurx",&ru->common.rxdata[0][0],LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,1);
-        exit(-1);
+        LOG_M("ru_sync_corr.m","sync_corr",ru->dmrs_corr,LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,6);
+        LOG_M("ru_dmrs.m","rudmrs",&ru->dmrssync[0],fp->ofdm_symbol_size,1,1);
+        exit(-1);*/
      } // sync_pos > 0
      else //AssertFatal(cnt<1000,"Cannot find synch reference\n");
          { 
              if (cnt>200) {
                 LOG_M("ru_sync_rx.m","rurx",&ru->common.rxdata[0][0],LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,1);
+                 LOG_M("ru_sync_corr.m","sync_corr",ru->dmrs_corr,LTE_NUMBER_OF_SUBFRAMES_PER_FRAME*fp->samples_per_tti,1,6);
+                 LOG_M("ru_dmrs.m","rudmrs",&ru->dmrssync[0],fp->ofdm_symbol_size,1,1);
+
                 exit(-1);
              }
          }