From 132018a1369a674062efa0ada99eeb20e2f86768 Mon Sep 17 00:00:00 2001
From: Elena_Lukashova <elena.lukashova@eurecom.fr>
Date: Fri, 10 Mar 2017 16:20:41 +0100
Subject: [PATCH] Adding Rank estimation for TM4. Right now no feedback, only
 calculation. To see the values, enable DEBUG_RANK_EST in lte_ue_measurements.

---
 .../PHY/LTE_ESTIMATION/lte_ue_measurements.c  | 682 ++++++++++++++++--
 openair1/PHY/LTE_TRANSPORT/extern.h           |   2 +
 openair1/PHY/LTE_TRANSPORT/proto.h            | 180 +++--
 openair1/PHY/LTE_TRANSPORT/vars.h             |   6 +-
 4 files changed, 739 insertions(+), 131 deletions(-)

diff --git a/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c b/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c
index d075d0ca50..33f7b2e5ef 100644
--- a/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c
@@ -32,8 +32,9 @@
 #define k1 ((long long int) 1000)
 #define k2 ((long long int) (1024-k1))
 
-//#define DEBUG_MEAS_RRC
-//#define DEBUG_MEAS_UE
+#define DEBUG_MEAS_RRC
+#define DEBUG_MEAS_UE
+//#define DEBUG_RANK_EST
 
 #ifdef USER_MODE
 void print_shorts(char *s,short *x)
@@ -198,45 +199,45 @@ void ue_rrc_measurements(PHY_VARS_UE *ue,
           if (ue->frame_parms.Ncp==NORMAL) {
             for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) {
 
-	      rxF_sss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(5*ue->frame_parms.ofdm_symbol_size)];
-	      rxF_pss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(6*ue->frame_parms.ofdm_symbol_size)];
+              rxF_sss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(5*ue->frame_parms.ofdm_symbol_size)];
+              rxF_pss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(6*ue->frame_parms.ofdm_symbol_size)];
 
               //-ve spectrum from SSS
-	      //	      printf("slot %d: SSS DTX: %d,%d, non-DTX %d,%d\n",slot,rxF_pss[-72],rxF_pss[-71],rxF_pss[-36],rxF_pss[-35]);
+              //              printf("slot %d: SSS DTX: %d,%d, non-DTX %d,%d\n",slot,rxF_pss[-72],rxF_pss[-71],rxF_pss[-36],rxF_pss[-35]);
 
-	      //              ue->measurements.n0_power[aarx] = (((int32_t)rxF_pss[-72]*rxF_pss[-72])+((int32_t)rxF_pss[-71]*rxF_pss[-71]));
-	      //	      printf("sssn36 %d\n",ue->measurements.n0_power[aarx]);
+              //              ue->measurements.n0_power[aarx] = (((int32_t)rxF_pss[-72]*rxF_pss[-72])+((int32_t)rxF_pss[-71]*rxF_pss[-71]));
+              //              printf("sssn36 %d\n",ue->measurements.n0_power[aarx]);
               ue->measurements.n0_power[aarx] = (((int32_t)rxF_pss[-70]*rxF_pss[-70])+((int32_t)rxF_pss[-69]*rxF_pss[-69]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-68]*rxF_pss[-68])+((int32_t)rxF_pss[-67]*rxF_pss[-67]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-66]*rxF_pss[-66])+((int32_t)rxF_pss[-65]*rxF_pss[-65]));
-	      //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-64]*rxF_pss[-64])+((int32_t)rxF_pss[-63]*rxF_pss[-63]));
-	      //	      printf("sssm32 %d\n",ue->measurements.n0_power[aarx]);
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-64]*rxF_pss[-64])+((int32_t)rxF_pss[-63]*rxF_pss[-63]));
+              //              printf("sssm32 %d\n",ue->measurements.n0_power[aarx]);
               //+ve spectrum from SSS
-	      ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+70]*rxF_sss[2+70])+((int32_t)rxF_sss[2+69]*rxF_sss[2+69]));
+              ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+70]*rxF_sss[2+70])+((int32_t)rxF_sss[2+69]*rxF_sss[2+69]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+68]*rxF_sss[2+68])+((int32_t)rxF_sss[2+67]*rxF_sss[2+67]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+66]*rxF_sss[2+66])+((int32_t)rxF_sss[2+65]*rxF_sss[2+65]));
-	      //	      ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+64]*rxF_sss[2+64])+((int32_t)rxF_sss[2+63]*rxF_sss[2+63]));
-	      //	      printf("sssp32 %d\n",ue->measurements.n0_power[aarx]);
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+64]*rxF_sss[2+64])+((int32_t)rxF_sss[2+63]*rxF_sss[2+63]));
+              //              printf("sssp32 %d\n",ue->measurements.n0_power[aarx]);
               //+ve spectrum from PSS
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[2+70]*rxF_pss[2+70])+((int32_t)rxF_pss[2+69]*rxF_pss[2+69]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[2+68]*rxF_pss[2+68])+((int32_t)rxF_pss[2+67]*rxF_pss[2+67]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[2+66]*rxF_pss[2+66])+((int32_t)rxF_pss[2+65]*rxF_pss[2+65]));
-	      //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[2+64]*rxF_pss[2+64])+((int32_t)rxF_pss[2+63]*rxF_pss[2+63]));
-	      //	      printf("pss32 %d\n",ue->measurements.n0_power[aarx]);              //-ve spectrum from PSS
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[2+64]*rxF_pss[2+64])+((int32_t)rxF_pss[2+63]*rxF_pss[2+63]));
+              //              printf("pss32 %d\n",ue->measurements.n0_power[aarx]);              //-ve spectrum from PSS
               rxF_pss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(7*ue->frame_parms.ofdm_symbol_size)];
-	      //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-72]*rxF_pss[-72])+((int32_t)rxF_pss[-71]*rxF_pss[-71]));
-	      //	      printf("pssm36 %d\n",ue->measurements.n0_power[aarx]);
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-72]*rxF_pss[-72])+((int32_t)rxF_pss[-71]*rxF_pss[-71]));
+              //              printf("pssm36 %d\n",ue->measurements.n0_power[aarx]);
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-70]*rxF_pss[-70])+((int32_t)rxF_pss[-69]*rxF_pss[-69]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-68]*rxF_pss[-68])+((int32_t)rxF_pss[-67]*rxF_pss[-67]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-66]*rxF_pss[-66])+((int32_t)rxF_pss[-65]*rxF_pss[-65]));
-	      //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-64]*rxF_pss[-64])+((int32_t)rxF_pss[-63]*rxF_pss[-63]));
-	      //	      printf("pssm32 %d\n",ue->measurements.n0_power[aarx]);
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-64]*rxF_pss[-64])+((int32_t)rxF_pss[-63]*rxF_pss[-63]));
+              //              printf("pssm32 %d\n",ue->measurements.n0_power[aarx]);
               ue->measurements.n0_power_dB[aarx] = (unsigned short) dB_fixed(ue->measurements.n0_power[aarx]/12);
               ue->measurements.n0_power_tot /*+=*/ = ue->measurements.n0_power[aarx];
             }
 
-	    ue->measurements.n0_power_tot_dB = (unsigned short) dB_fixed(ue->measurements.n0_power_tot/(12*aarx));
-	    ue->measurements.n0_power_tot_dBm = ue->measurements.n0_power_tot_dB - ue->rx_total_gain_dB - dB_fixed(ue->frame_parms.ofdm_symbol_size);
+            ue->measurements.n0_power_tot_dB = (unsigned short) dB_fixed(ue->measurements.n0_power_tot/(12*aarx));
+            ue->measurements.n0_power_tot_dBm = ue->measurements.n0_power_tot_dB - ue->rx_total_gain_dB - dB_fixed(ue->frame_parms.ofdm_symbol_size);
           } else {
             LOG_E(PHY, "Not yet implemented: noise power calculation when prefix length = EXTENDED\n");
           }
@@ -295,30 +296,30 @@ void ue_rrc_measurements(PHY_VARS_UE *ue,
           if (ue->frame_parms.Ncp==NORMAL) {
             for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) {
 
-	      rxF_sss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(6*ue->frame_parms.ofdm_symbol_size)];
-	      // note this is a dummy pointer, the pss is not really there!
-	      // in FDD the pss is in the symbol after the sss, but not in TDD
+              rxF_sss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(6*ue->frame_parms.ofdm_symbol_size)];
+              // note this is a dummy pointer, the pss is not really there!
+              // in FDD the pss is in the symbol after the sss, but not in TDD
 
-	      rxF_pss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(7*ue->frame_parms.ofdm_symbol_size)];
+              rxF_pss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF[aarx][(7*ue->frame_parms.ofdm_symbol_size)];
 
-	      //-ve spectrum from SSS
-	      //              ue->measurements.n0_power[aarx] = (((int32_t)rxF_pss[-72]*rxF_pss[-72])+((int32_t)rxF_pss[-71]*rxF_pss[-71]));
+              //-ve spectrum from SSS
+              //              ue->measurements.n0_power[aarx] = (((int32_t)rxF_pss[-72]*rxF_pss[-72])+((int32_t)rxF_pss[-71]*rxF_pss[-71]));
               ue->measurements.n0_power[aarx] = (((int32_t)rxF_pss[-70]*rxF_pss[-70])+((int32_t)rxF_pss[-69]*rxF_pss[-69]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-68]*rxF_pss[-68])+((int32_t)rxF_pss[-67]*rxF_pss[-67]));
               ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-66]*rxF_pss[-66])+((int32_t)rxF_pss[-65]*rxF_pss[-65]));
-	      //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-64]*rxF_pss[-64])+((int32_t)rxF_pss[-63]*rxF_pss[-63]));
-	      //+ve spectrum from SSS
-	      //	      ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+72]*rxF_sss[2+72])+((int32_t)rxF_sss[2+71]*rxF_sss[2+71]));
-	      ue->measurements.n0_power[aarx] = (((int32_t)rxF_sss[2+70]*rxF_sss[2+70])+((int32_t)rxF_sss[2+69]*rxF_sss[2+69]));
-	      ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+68]*rxF_sss[2+68])+((int32_t)rxF_sss[2+67]*rxF_sss[2+67]));
-	      ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+66]*rxF_sss[2+66])+((int32_t)rxF_sss[2+65]*rxF_sss[2+65]));
-	      //	      ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+64]*rxF_sss[2+64])+((int32_t)rxF_sss[2+63]*rxF_sss[2+63]));
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_pss[-64]*rxF_pss[-64])+((int32_t)rxF_pss[-63]*rxF_pss[-63]));
+              //+ve spectrum from SSS
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+72]*rxF_sss[2+72])+((int32_t)rxF_sss[2+71]*rxF_sss[2+71]));
+              ue->measurements.n0_power[aarx] = (((int32_t)rxF_sss[2+70]*rxF_sss[2+70])+((int32_t)rxF_sss[2+69]*rxF_sss[2+69]));
+              ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+68]*rxF_sss[2+68])+((int32_t)rxF_sss[2+67]*rxF_sss[2+67]));
+              ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+66]*rxF_sss[2+66])+((int32_t)rxF_sss[2+65]*rxF_sss[2+65]));
+              //              ue->measurements.n0_power[aarx] += (((int32_t)rxF_sss[2+64]*rxF_sss[2+64])+((int32_t)rxF_sss[2+63]*rxF_sss[2+63]));
 
-	      ue->measurements.n0_power_dB[aarx] = (unsigned short) dB_fixed(ue->measurements.n0_power[aarx]/(6));
-	      ue->measurements.n0_power_tot +=  ue->measurements.n0_power[aarx];
-	    }
-	    ue->measurements.n0_power_tot_dB = (unsigned short) dB_fixed(ue->measurements.n0_power_tot/(6*aarx));
-	    ue->measurements.n0_power_tot_dBm = ue->measurements.n0_power_tot_dB - ue->rx_total_gain_dB - dB_fixed(ue->frame_parms.ofdm_symbol_size);
+              ue->measurements.n0_power_dB[aarx] = (unsigned short) dB_fixed(ue->measurements.n0_power[aarx]/(6));
+              ue->measurements.n0_power_tot +=  ue->measurements.n0_power[aarx];
+            }
+            ue->measurements.n0_power_tot_dB = (unsigned short) dB_fixed(ue->measurements.n0_power_tot/(6*aarx));
+            ue->measurements.n0_power_tot_dBm = ue->measurements.n0_power_tot_dB - ue->rx_total_gain_dB - dB_fixed(ue->frame_parms.ofdm_symbol_size);
 
 
           }
@@ -361,8 +362,8 @@ void ue_rrc_measurements(PHY_VARS_UE *ue,
 
               ue->measurements.rsrp[eNB_offset] += (((int32_t)(rxF[off])*rxF[off])+((int32_t)(rxF[off+1])*rxF[off+1]));
               //        printf("rb %d, off %d : %d\n",rb,off,((((int32_t)rxF[off])*rxF[off])+((int32_t)(rxF[off+1])*rxF[off+1])));
-	      //	      if ((ue->frame_rx&0x3ff) == 0)
-	      //                printf("rb %d, off %d : %d\n",rb,off,((rxF[off]*rxF[off])+(rxF[off+1]*rxF[off+1])));
+              //              if ((ue->frame_rx&0x3ff) == 0)
+              //                printf("rb %d, off %d : %d\n",rb,off,((rxF[off]*rxF[off])+(rxF[off+1]*rxF[off+1])));
 
 
               off+=12;
@@ -428,7 +429,7 @@ void ue_rrc_measurements(PHY_VARS_UE *ue,
     //    if (slot == 0) {
 
       if (eNB_offset == 0)
-        LOG_I(PHY,"[UE %d] Frame %d, subframe %d RRC Measurements => rssi %3.1f dBm (digital: %3.1f dB, gain %d), N0 %d dBm\n",ue->Mod_id,
+        printf("[UE %d] Frame %d, subframe %d RRC Measurements => rssi %3.1f dBm (digital: %3.1f dB, gain %d), N0 %d dBm\n",ue->Mod_id,
               ue->proc.proc_rxtx[subframe&1].frame_rx,subframe,10*log10(ue->measurements.rssi)-ue->rx_total_gain_dB,
               10*log10(ue->measurements.rssi),
               ue->rx_total_gain_dB,
@@ -458,7 +459,7 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
                          unsigned int subframe_offset,
                          unsigned char N0_symbol,
                          unsigned char abstraction_flag,
-						 uint8_t subframe)
+                         uint8_t subframe)
 {
 
 
@@ -472,9 +473,15 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
   int16x8_t *dl_ch0_128, *dl_ch1_128;
 #endif
   int *dl_ch0,*dl_ch1;
+
   LTE_DL_FRAME_PARMS *frame_parms = &ue->frame_parms;
   int nb_subbands,subband_size,last_subband_size;
   int N_RB_DL = frame_parms->N_RB_DL;
+
+
+  int rank_tm4;
+
+
   ue->measurements.nb_antennas_rx = frame_parms->nb_antennas_rx;
 
     if (ue->transmission_mode[eNB_id]!=4)
@@ -543,6 +550,18 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
 
   } //eNB_id
 
+
+
+  rank_tm4 = rank_estimation_tm4(&ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][0][4],
+                                 &ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][2][4],
+                                 &ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][1][4],
+                                 &ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][3][4],
+                                 N_RB_DL);
+
+#ifdef DEBUG_RANK_EST
+  printf("rank tm4 %d\n", rank_tm4);
+#endif
+
   // filter to remove jitter
   if (ue->init_averaging == 0) {
     for (eNB_id = 0; eNB_id < ue->n_connected_eNB; eNB_id++)
@@ -567,13 +586,13 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
     ue->measurements.wideband_cqi_avg[eNB_id] = dB_fixed2(ue->measurements.rx_power_avg[eNB_id],ue->measurements.n0_power_avg);
     ue->measurements.rx_rssi_dBm[eNB_id] = ue->measurements.rx_power_avg_dB[eNB_id] - ue->rx_total_gain_dB;
 #ifdef DEBUG_MEAS_UE
-      LOG_D(PHY,"[eNB %d] RSSI %d dBm, RSSI (digital) %d dB, WBandCQI %d dB, rxPwrAvg %d, n0PwrAvg %d\n",
+     printf("[eNB %d] RSSI %d dBm, RSSI (digital) %d dB, WBandCQI %d dB, rxPwrAvg %d, n0PwrAvg %d\n",
             eNB_id,
             ue->measurements.rx_rssi_dBm[eNB_id],
             ue->measurements.rx_power_avg_dB[eNB_id],
             ue->measurements.wideband_cqi_avg[eNB_id],
             ue->measurements.rx_power_avg[eNB_id],
-            ue->measurements.n0_power_avg);
+            ue->measurements.n0_power_tot);
 #endif
   }
 
@@ -634,7 +653,7 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
       }
 
       for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
-	//printf("aarx=%d", aarx);
+        //printf("aarx=%d", aarx);
         // skip the first 4 RE due to interpolation filter length of 5 (not possible to skip 5 due to 128i alignment, must be multiple of 128bit)
 
 #if defined(__x86_64__) || defined(__i386__)
@@ -655,12 +674,12 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
           // pmi
 #if defined(__x86_64__) || defined(__i386__)
 
-	  pmi128_re = _mm_xor_si128(pmi128_re,pmi128_re);
+          pmi128_re = _mm_xor_si128(pmi128_re,pmi128_re);
           pmi128_im = _mm_xor_si128(pmi128_im,pmi128_im);
 #elif defined(__arm__)
 
           pmi128_re = vdupq_n_s32(0);
-	  pmi128_im = vdupq_n_s32(0);
+          pmi128_im = vdupq_n_s32(0);
 #endif
           // limit is the number of groups of 4 REs in a subband (12 = 4 RBs, 3 = 1 RB)
           // for 5 MHz channelization, there are 7 subbands, 6 of size 4 RBs and 1 of size 1 RB
@@ -672,48 +691,48 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
           for (i=0; i<limit; i++) {
 
 #if defined(__x86_64__) || defined(__i386__)
-	      mmtmpPMI0 = _mm_xor_si128(mmtmpPMI0,mmtmpPMI0);
+              mmtmpPMI0 = _mm_xor_si128(mmtmpPMI0,mmtmpPMI0);
               mmtmpPMI1 = _mm_xor_si128(mmtmpPMI1,mmtmpPMI1);
 
             // For each RE in subband perform ch0 * conj(ch1)
             // multiply by conjugated channel
-		//  print_ints("ch0",&dl_ch0_128[0]);
-		//  print_ints("ch1",&dl_ch1_128[0]);
+                //  print_ints("ch0",&dl_ch0_128[0]);
+                //  print_ints("ch1",&dl_ch1_128[0]);
 
-	    mmtmpPMI0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
-	         //  print_ints("re",&mmtmpPMI0);
+            mmtmpPMI0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
+                 //  print_ints("re",&mmtmpPMI0);
             mmtmpPMI1 = _mm_shufflelo_epi16(dl_ch1_128[0],_MM_SHUFFLE(2,3,0,1));
               //  print_ints("_mm_shufflelo_epi16",&mmtmpPMI1);
             mmtmpPMI1 = _mm_shufflehi_epi16(mmtmpPMI1,_MM_SHUFFLE(2,3,0,1));
-	        //  print_ints("_mm_shufflehi_epi16",&mmtmpPMI1);
+                //  print_ints("_mm_shufflehi_epi16",&mmtmpPMI1);
             mmtmpPMI1 = _mm_sign_epi16(mmtmpPMI1,*(__m128i*)&conjugate[0]);
-	       //  print_ints("_mm_sign_epi16",&mmtmpPMI1);
+               //  print_ints("_mm_sign_epi16",&mmtmpPMI1);
             mmtmpPMI1 = _mm_madd_epi16(mmtmpPMI1,dl_ch0_128[0]);
-	       //   print_ints("mm_madd_epi16",&mmtmpPMI1);
+               //   print_ints("mm_madd_epi16",&mmtmpPMI1);
             // mmtmpPMI1 contains imag part of 4 consecutive outputs (32-bit)
             pmi128_re = _mm_add_epi32(pmi128_re,mmtmpPMI0);
-	     //   print_ints(" pmi128_re 0",&pmi128_re);
+             //   print_ints(" pmi128_re 0",&pmi128_re);
             pmi128_im = _mm_add_epi32(pmi128_im,mmtmpPMI1);
-	       //   print_ints(" pmi128_im 0 ",&pmi128_im);
+               //   print_ints(" pmi128_im 0 ",&pmi128_im);
 
-	  /*  mmtmpPMI0 = _mm_xor_si128(mmtmpPMI0,mmtmpPMI0);
+          /*  mmtmpPMI0 = _mm_xor_si128(mmtmpPMI0,mmtmpPMI0);
             mmtmpPMI1 = _mm_xor_si128(mmtmpPMI1,mmtmpPMI1);
 
-	    mmtmpPMI0 = _mm_madd_epi16(dl_ch0_128[1],dl_ch1_128[1]);
-	         //  print_ints("re",&mmtmpPMI0);
+            mmtmpPMI0 = _mm_madd_epi16(dl_ch0_128[1],dl_ch1_128[1]);
+                 //  print_ints("re",&mmtmpPMI0);
             mmtmpPMI1 = _mm_shufflelo_epi16(dl_ch1_128[1],_MM_SHUFFLE(2,3,0,1));
               //  print_ints("_mm_shufflelo_epi16",&mmtmpPMI1);
             mmtmpPMI1 = _mm_shufflehi_epi16(mmtmpPMI1,_MM_SHUFFLE(2,3,0,1));
-	        //  print_ints("_mm_shufflehi_epi16",&mmtmpPMI1);
+                //  print_ints("_mm_shufflehi_epi16",&mmtmpPMI1);
             mmtmpPMI1 = _mm_sign_epi16(mmtmpPMI1,*(__m128i*)&conjugate);
-	       //  print_ints("_mm_sign_epi16",&mmtmpPMI1);
+               //  print_ints("_mm_sign_epi16",&mmtmpPMI1);
             mmtmpPMI1 = _mm_madd_epi16(mmtmpPMI1,dl_ch0_128[1]);
-	       //   print_ints("mm_madd_epi16",&mmtmpPMI1);
+               //   print_ints("mm_madd_epi16",&mmtmpPMI1);
             // mmtmpPMI1 contains imag part of 4 consecutive outputs (32-bit)
             pmi128_re = _mm_add_epi32(pmi128_re,mmtmpPMI0);
-	        //  print_ints(" pmi128_re 1",&pmi128_re);
+                //  print_ints(" pmi128_re 1",&pmi128_re);
             pmi128_im = _mm_add_epi32(pmi128_im,mmtmpPMI1);
-	    //print_ints(" pmi128_im 1 ",&pmi128_im);*/
+            //print_ints(" pmi128_im 1 ",&pmi128_im);*/
 
 #elif defined(__arm__)
 
@@ -807,3 +826,536 @@ void lte_ue_measurements_emul(PHY_VARS_UE *ue,uint8_t subframe,uint8_t eNB_id)
   msg("[PHY] EMUL UE lte_ue_measurements_emul subframe %d, eNB_id %d\n",subframe,eNB_id);
 }
 
+
+//We write a function that takes complement conjugate of the complex channel estimate ch0 and multiplies it with another complex channel estimate ch1 and stores the result in ch0conj_ch1.
+
+uint8_t rank_estimation_tm4(int *dl_ch_estimates_00, // please respect the order of channel estimates
+                            int *dl_ch_estimates_01,
+                            int *dl_ch_estimates_10,
+                            int *dl_ch_estimates_11,
+                            unsigned short nb_rb) {
+
+  int i=0;
+  int rank=1;
+  int N_RB=nb_rb;
+  int *ch00_rank, *ch01_rank, *ch10_rank, *ch11_rank;
+
+  int32_t shift;
+  int avg_0[2];
+  int avg_1[2];
+
+  int count=0;
+
+  int32_t conjch00_ch01[12*N_RB], conjch01_ch00[12*N_RB], conjch10_ch11[12*N_RB], conjch11_ch10[12*N_RB];
+  int32_t conjch00_ch00[12*N_RB], conjch01_ch01[12*N_RB], conjch10_ch10[12*N_RB], conjch11_ch11[12*N_RB];
+  int32_t af_mf_00[12*N_RB], af_mf_00_sq[12*N_RB], af_mf_01_sq[12*N_RB], af_mf_10_sq[12*N_RB];
+  int32_t af_mf_11_sq[12*N_RB], af_mf_01[12*N_RB], af_mf_10[12*N_RB], af_mf_11[12*N_RB];
+  int32_t determ_fin[12*N_RB], denum_db[12*N_RB];
+  int32_t numer_fin[12*N_RB], numer_db[12*N_RB];
+  int32_t cond_db[12*N_RB];
+
+  ch00_rank = dl_ch_estimates_00;
+  ch01_rank = dl_ch_estimates_01;
+  ch10_rank = dl_ch_estimates_10;
+  ch11_rank = dl_ch_estimates_11;
+
+  dlsch_channel_level_TM34_meas(ch00_rank,
+                                ch01_rank,
+                                ch10_rank,
+                                ch11_rank,
+                                avg_0,
+                                avg_1,
+                                N_RB);
+
+  avg_0[0] = (log2_approx(avg_0[0])/2);
+  shift = cmax(avg_0[0],0);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n shift %d \n" , shift);
+  printf("\n conj(ch00)ch01 \n");
+#endif
+
+  conjch0_mult_ch1(ch00_rank,
+                   ch01_rank,
+                   conjch00_ch01,
+                   N_RB,
+                   shift); // this is an arbitrary shift to avoid overflow. can be changed.
+
+#ifdef DEBUG_RANK_EST
+  printf("\n conj(ch01)ch00 \n");
+#endif
+
+  conjch0_mult_ch1(ch01_rank,
+                   ch00_rank,
+                   conjch01_ch00,
+                   N_RB,
+                   shift);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n conj(ch10)ch11 \n");
+#endif
+
+
+  conjch0_mult_ch1(ch10_rank,
+                   ch11_rank,
+                   conjch10_ch11,
+                   N_RB,
+                   shift);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n conj(ch11)ch10 \n");
+#endif
+
+  conjch0_mult_ch1(ch11_rank,
+                   ch10_rank,
+                   conjch11_ch10,
+                   N_RB,
+                   shift);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n conj(ch00)ch00 \n");
+#endif
+
+  conjch0_mult_ch1(ch00_rank,
+                   ch00_rank,
+                   conjch00_ch00,
+                   N_RB,
+                   shift);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n conj(ch01)ch01 \n");
+#endif
+
+  conjch0_mult_ch1(ch01_rank,
+                   ch01_rank,
+                   conjch01_ch01,
+                   N_RB,
+                   shift);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n conj(ch10)ch10 \n");
+#endif
+
+  conjch0_mult_ch1(ch10_rank,
+                   ch10_rank,
+                   conjch10_ch10,
+                   N_RB,
+                   shift);
+#ifdef DEBUG_RANK_EST
+  printf("\n conj(ch11)ch11 \n");
+#endif
+
+  conjch0_mult_ch1(ch11_rank,
+                   ch11_rank,
+                   conjch11_ch11,
+                   N_RB,
+                   shift);
+
+  construct_HhH_elements(conjch00_ch00,
+                         conjch01_ch01,
+                         conjch11_ch11,
+                         conjch10_ch10,
+                         conjch00_ch01,
+                         conjch01_ch00,
+                         conjch10_ch11,
+                         conjch11_ch10,
+                         af_mf_00,
+                         af_mf_01,
+                         af_mf_10,
+                         af_mf_11,
+                         N_RB);
+#ifdef DEBUG_RANK_EST
+  printf("\n |HhH00|^2 \n");
+#endif
+
+  squared_matrix_element(af_mf_00,
+                         af_mf_00_sq,
+                         N_RB);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n |HhH01|^2 \n");
+#endif
+
+  squared_matrix_element(af_mf_01,
+                         af_mf_01_sq,
+                         N_RB);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n |HhH10|^2 \n");
+#endif
+
+  squared_matrix_element(af_mf_10,
+                         af_mf_10_sq,
+                         N_RB);
+
+#ifdef DEBUG_RANK_EST
+  printf("\n |HhH11|^2 \n");
+#endif
+
+  squared_matrix_element(af_mf_11,
+                         af_mf_11_sq,
+                         N_RB);
+
+  det_HhH(af_mf_00,
+          af_mf_01,
+          af_mf_10,
+          af_mf_11,
+          determ_fin,
+          N_RB);
+
+  numer(af_mf_00_sq,
+        af_mf_01_sq,
+        af_mf_10_sq,
+        af_mf_11_sq,
+        numer_fin,
+        N_RB);
+
+  for (i=1; i<12*N_RB; i++)
+  {
+    denum_db[i]=dB_fixed(determ_fin[i]);
+    numer_db[i]=dB_fixed(numer_fin[i]);
+    cond_db[i]=(numer_db[i]-denum_db[i]);
+    if (cond_db[i] < 11)
+      count++;
+#ifdef DEBUG_RANK_EST
+    printf("i %d  numer_db[i] = %d \n", i, numer_db[i]);
+    printf("i %d  denum_db[i] = %d \n", i, denum_db[i]);
+    printf("i %d  cond_db[i] =  %d \n", i, cond_db[i]);
+    printf("i %d counter = %d \n", i, count);
+#endif
+  }
+
+  if (count >= 6*N_RB) // conditional number is lower 10dB in half on more Res Blocks
+    rank=2;
+
+#ifdef DEBUG_RANK_EST
+    printf(" rank = %d \n", rank);
+#endif
+   return(rank);
+}
+
+void conjch0_mult_ch1(int *ch0,
+                      int *ch1,
+                      int32_t *ch0conj_ch1,
+                      unsigned short nb_rb,
+                      unsigned char output_shift0)
+{
+  //This function is used to compute multiplications in Hhermitian * H matrix
+  unsigned short rb;
+  __m128i *dl_ch0_128,*dl_ch1_128, *ch0conj_ch1_128, mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
+
+  dl_ch0_128 = (__m128i *)ch0;
+  dl_ch1_128 = (__m128i *)ch1;
+
+  ch0conj_ch1_128 = (__m128i *)ch0conj_ch1;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
+    mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[0],_MM_SHUFFLE(2,3,0,1));
+    mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+    mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+    mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch1_128[0]);
+    mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
+    mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0);
+    mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+    mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+    ch0conj_ch1_128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+#ifdef DEBUG_RANK_EST
+    printf("\n Computing conjugates \n");
+    print_shorts("ch0:",(int16_t*)&dl_ch0_128[0]);
+    print_shorts("ch1:",(int16_t*)&dl_ch1_128[0]);
+    print_shorts("pack:",(int16_t*)&ch0conj_ch1_128[0]);
+#endif
+
+    dl_ch0_128+=1;
+    dl_ch1_128+=1;
+    ch0conj_ch1_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+void construct_HhH_elements(int *ch0conj_ch0, //00_00
+                            int *ch1conj_ch1,//01_01
+                            int *ch2conj_ch2,//11_11
+                            int *ch3conj_ch3,//10_10
+                            int *ch0conj_ch1,//00_01
+                            int *ch1conj_ch0,//01_00
+                            int *ch2conj_ch3,//10_11
+                            int *ch3conj_ch2,//11_10
+                            int32_t *after_mf_00,
+                            int32_t *after_mf_01,
+                            int32_t *after_mf_10,
+                            int32_t *after_mf_11,
+                            unsigned short nb_rb)
+{
+  unsigned short rb;
+  __m128i *ch0conj_ch0_128, *ch1conj_ch1_128, *ch2conj_ch2_128, *ch3conj_ch3_128;
+  __m128i *ch0conj_ch1_128, *ch1conj_ch0_128, *ch2conj_ch3_128, *ch3conj_ch2_128;
+  __m128i *after_mf_00_128, *after_mf_01_128, *after_mf_10_128, *after_mf_11_128;
+
+  ch0conj_ch0_128 = (__m128i *)ch0conj_ch0;
+  ch1conj_ch1_128 = (__m128i *)ch1conj_ch1;
+  ch2conj_ch2_128 = (__m128i *)ch2conj_ch2;
+  ch3conj_ch3_128 = (__m128i *)ch3conj_ch3;
+  ch0conj_ch1_128 = (__m128i *)ch0conj_ch1;
+  ch1conj_ch0_128 = (__m128i *)ch1conj_ch0;
+  ch2conj_ch3_128 = (__m128i *)ch2conj_ch3;
+  ch3conj_ch2_128 = (__m128i *)ch3conj_ch2;
+  after_mf_00_128 = (__m128i *)after_mf_00;
+  after_mf_01_128 = (__m128i *)after_mf_01;
+  after_mf_10_128 = (__m128i *)after_mf_10;
+  after_mf_11_128 = (__m128i *)after_mf_11;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    after_mf_00_128[0] =_mm_adds_epi16(_mm_srai_epi16(ch0conj_ch0_128[0],1),_mm_srai_epi16(ch3conj_ch3_128[0],1));// _mm_adds_epi32(ch0conj_ch0_128[0], ch3conj_ch3_128[0]); //00_00 + 10_10
+    after_mf_11_128[0] =_mm_adds_epi16(ch1conj_ch1_128[0], ch2conj_ch2_128[0]); //01_01 + 11_11
+    after_mf_01_128[0] =_mm_adds_epi16(ch0conj_ch1_128[0], ch2conj_ch3_128[0]);//00_01 + 10_11
+    after_mf_10_128[0] =_mm_adds_epi16(ch1conj_ch0_128[0], ch3conj_ch2_128[0]);//01_00 + 11_10
+
+#ifdef DEBUG_RANK_EST
+    printf(" \n construct_HhH_elements \n");
+    print_shorts("ch0conj_ch0_128:",(int16_t*)&ch0conj_ch0_128[0]);
+    print_shorts("ch1conj_ch1_128:",(int16_t*)&ch1conj_ch1_128[0]);
+    print_shorts("ch2conj_ch2_128:",(int16_t*)&ch2conj_ch2_128[0]);
+    print_shorts("ch3conj_ch3_128:",(int16_t*)&ch3conj_ch3_128[0]);
+    print_shorts("ch0conj_ch1_128:",(int16_t*)&ch0conj_ch1_128[0]);
+    print_shorts("ch1conj_ch0_128:",(int16_t*)&ch1conj_ch0_128[0]);
+    print_shorts("ch2conj_ch3_128:",(int16_t*)&ch2conj_ch3_128[0]);
+    print_shorts("ch3conj_ch2_128:",(int16_t*)&ch3conj_ch2_128[0]);
+    print_shorts("after_mf_00_128:",(int16_t*)&after_mf_00_128[0]);
+    print_shorts("after_mf_01_128:",(int16_t*)&after_mf_01_128[0]);
+    print_shorts("after_mf_10_128:",(int16_t*)&after_mf_10_128[0]);
+    print_shorts("after_mf_11_128:",(int16_t*)&after_mf_11_128[0]);
+#endif
+
+    ch0conj_ch0_128+=1;
+    ch1conj_ch1_128+=1;
+    ch2conj_ch2_128+=1;
+    ch3conj_ch3_128+=1;
+    ch0conj_ch1_128+=1;
+    ch1conj_ch0_128+=1;
+    ch2conj_ch3_128+=1;
+    ch3conj_ch2_128+=1;
+
+    after_mf_00_128+=1;
+    after_mf_01_128+=1;
+    after_mf_10_128+=1;
+    after_mf_11_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+
+void squared_matrix_element(int32_t *Hh_h_00,
+                            int32_t *Hh_h_00_sq,
+                            unsigned short nb_rb)
+{
+   unsigned short rb;
+  __m128i *Hh_h_00_128,*Hh_h_00_sq_128;
+
+  Hh_h_00_128 = (__m128i *)Hh_h_00;
+  Hh_h_00_sq_128 = (__m128i *)Hh_h_00_sq;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    Hh_h_00_sq_128[0] = _mm_madd_epi16(Hh_h_00_128[0],Hh_h_00_128[0]);
+
+#ifdef DEBUG_RANK_EST
+    printf("\n Computing squared_matrix_element \n");
+    print_shorts("Hh_h_00_128:",(int16_t*)&Hh_h_00_128[0]);
+    print_ints("Hh_h_00_sq_128:",(int32_t*)&Hh_h_00_sq_128[0]);
+#endif
+
+    Hh_h_00_sq_128+=1;
+    Hh_h_00_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+
+
+void det_HhH(int32_t *after_mf_00,
+             int32_t *after_mf_01,
+             int32_t *after_mf_10,
+             int32_t *after_mf_11,
+             int32_t *det_fin,
+             unsigned short nb_rb)
+
+{
+  unsigned short rb;
+  __m128i *after_mf_00_128,*after_mf_01_128, *after_mf_10_128, *after_mf_11_128, ad_re_128, bc_re_128;
+  __m128i *det_fin_128, det_128;
+
+  after_mf_00_128 = (__m128i *)after_mf_00;
+  after_mf_01_128 = (__m128i *)after_mf_01;
+  after_mf_10_128 = (__m128i *)after_mf_10;
+  after_mf_11_128 = (__m128i *)after_mf_11;
+
+  det_fin_128 = (__m128i *)det_fin;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    ad_re_128 = _mm_madd_epi16(after_mf_00_128[0],after_mf_11_128[0]);
+    bc_re_128 = _mm_madd_epi16(after_mf_01_128[0],after_mf_01_128[0]);
+    det_128 = _mm_sub_epi32(ad_re_128, bc_re_128);
+    det_fin_128[0] = _mm_abs_epi32(det_128);
+
+#ifdef DEBUG_RANK_EST
+    printf("\n Computing denominator \n");
+    print_shorts("after_mf_00_128:",(int16_t*)&after_mf_00_128[0]);
+    print_shorts("after_mf_01_128:",(int16_t*)&after_mf_01_128[0]);
+    print_shorts("after_mf_10_128:",(int16_t*)&after_mf_10_128[0]);
+    print_shorts("after_mf_11_128:",(int16_t*)&after_mf_11_128[0]);
+    print_ints("ad_re_128:",(int32_t*)&ad_re_128);
+    print_ints("bc_re_128:",(int32_t*)&bc_re_128);
+    print_ints("det_fin_128:",(int32_t*)&det_fin_128[0]);
+#endif
+
+    det_fin_128+=1;
+    after_mf_00_128+=1;
+    after_mf_01_128+=1;
+    after_mf_10_128+=1;
+    after_mf_11_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+void numer(int32_t *Hh_h_00_sq,
+           int32_t *Hh_h_01_sq,
+           int32_t *Hh_h_10_sq,
+           int32_t *Hh_h_11_sq,
+           int32_t *num_fin,
+           unsigned short nb_rb)
+
+{
+  unsigned short rb;
+  __m128i *h_h_00_sq_128, *h_h_01_sq_128, *h_h_10_sq_128, *h_h_11_sq_128;
+  __m128i *num_fin_128, sq_a_plus_sq_d_128, sq_b_plus_sq_c_128;
+
+  h_h_00_sq_128 = (__m128i *)Hh_h_00_sq;
+  h_h_01_sq_128 = (__m128i *)Hh_h_01_sq;
+  h_h_10_sq_128 = (__m128i *)Hh_h_10_sq;
+  h_h_11_sq_128 = (__m128i *)Hh_h_11_sq;
+
+  num_fin_128 = (__m128i *)num_fin;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    sq_a_plus_sq_d_128 = _mm_add_epi32(h_h_00_sq_128[0],h_h_11_sq_128[0]);
+    sq_b_plus_sq_c_128 = _mm_add_epi32(h_h_01_sq_128[0],h_h_10_sq_128[0]);
+    num_fin_128[0] = _mm_add_epi32(sq_a_plus_sq_d_128, sq_b_plus_sq_c_128);
+
+#ifdef DEBUG_RANK_EST
+    printf("\n Computing numerator \n");
+    print_ints("h_h_00_sq_128:",(int32_t*)&h_h_00_sq_128[0]);
+    print_ints("h_h_01_sq_128:",(int32_t*)&h_h_01_sq_128[0]);
+    print_ints("h_h_10_sq_128:",(int32_t*)&h_h_10_sq_128[0]);
+    print_ints("h_h_11_sq_128:",(int32_t*)&h_h_11_sq_128[0]);
+    print_shorts("sq_a_plus_sq_d_128:",(int16_t*)&sq_a_plus_sq_d_128);
+    print_shorts("sq_b_plus_sq_c_128:",(int16_t*)&sq_b_plus_sq_c_128);
+    print_shorts("num_fin_128:",(int16_t*)&num_fin_128[0]);
+#endif
+
+    num_fin_128+=1;
+    h_h_00_sq_128+=1;
+    h_h_01_sq_128+=1;
+    h_h_10_sq_128+=1;
+    h_h_11_sq_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+
+
+
+
+void dlsch_channel_level_TM34_meas(int *ch00,
+                                   int *ch01,
+                                   int *ch10,
+                                   int *ch11,
+                                   int *avg_0,
+                                   int *avg_1,
+                                   unsigned short nb_rb)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  short rb;
+  unsigned char nre=12;
+  __m128i *ch00_128, *ch01_128, *ch10_128, *ch11_128;
+  __m128i avg_0_row0_128D, avg_1_row0_128D, avg_0_row1_128D, avg_1_row1_128D;
+  __m128i ch00_128_tmp, ch01_128_tmp, ch10_128_tmp, ch11_128_tmp;
+
+  avg_0[0] = 0;
+  avg_0[1] = 0;
+  avg_1[0] = 0;
+  avg_1[1] = 0;
+
+  ch00_128 = (__m128i *)ch00;
+  ch01_128 = (__m128i *)ch01;
+  ch10_128 = (__m128i *)ch10;
+  ch11_128 = (__m128i *)ch11;
+
+  avg_0_row0_128D = _mm_setzero_si128();
+  avg_1_row0_128D = _mm_setzero_si128();
+  avg_0_row1_128D = _mm_setzero_si128();
+  avg_1_row1_128D = _mm_setzero_si128();
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+    ch00_128_tmp = _mm_load_si128(&ch00_128[0]);
+    ch01_128_tmp = _mm_load_si128(&ch01_128[0]);
+    ch10_128_tmp = _mm_load_si128(&ch10_128[0]);
+    ch11_128_tmp = _mm_load_si128(&ch11_128[0]);
+
+    avg_0_row0_128D = _mm_add_epi32(avg_0_row0_128D,_mm_madd_epi16(ch00_128_tmp,ch00_128_tmp));
+    avg_1_row0_128D = _mm_add_epi32(avg_1_row0_128D,_mm_madd_epi16(ch01_128_tmp,ch01_128_tmp));
+    avg_0_row1_128D = _mm_add_epi32(avg_0_row1_128D,_mm_madd_epi16(ch10_128_tmp,ch10_128_tmp));
+    avg_1_row1_128D = _mm_add_epi32(avg_1_row1_128D,_mm_madd_epi16(ch11_128_tmp,ch11_128_tmp));
+
+    ch00_128+=1;
+    ch01_128+=1;
+    ch10_128+=1;
+    ch11_128+=1;
+  }
+
+  avg_0[0] = (((int*)&avg_0_row0_128D)[0])/(nb_rb*nre) +
+           (((int*)&avg_0_row0_128D)[1])/(nb_rb*nre) +
+           (((int*)&avg_0_row0_128D)[2])/(nb_rb*nre) +
+           (((int*)&avg_0_row0_128D)[3])/(nb_rb*nre);
+
+  avg_1[0] = (((int*)&avg_1_row0_128D)[0])/(nb_rb*nre) +
+           (((int*)&avg_1_row0_128D)[1])/(nb_rb*nre) +
+           (((int*)&avg_1_row0_128D)[2])/(nb_rb*nre) +
+           (((int*)&avg_1_row0_128D)[3])/(nb_rb*nre);
+
+  avg_0[1] = (((int*)&avg_0_row1_128D)[0])/(nb_rb*nre) +
+           (((int*)&avg_0_row1_128D)[1])/(nb_rb*nre) +
+           (((int*)&avg_0_row1_128D)[2])/(nb_rb*nre) +
+           (((int*)&avg_0_row1_128D)[3])/(nb_rb*nre);
+
+  avg_1[1] = (((int*)&avg_1_row1_128D)[0])/(nb_rb*nre) +
+           (((int*)&avg_1_row1_128D)[1])/(nb_rb*nre) +
+           (((int*)&avg_1_row1_128D)[2])/(nb_rb*nre) +
+           (((int*)&avg_1_row1_128D)[3])/(nb_rb*nre);
+
+  avg_0[0] = avg_0[0] + avg_0[1];
+  avg_1[0] = avg_1[0] + avg_1[1];
+  avg_0[0] = min (avg_0[0], avg_1[0]);
+  avg_1[0] = avg_0[0];
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}
diff --git a/openair1/PHY/LTE_TRANSPORT/extern.h b/openair1/PHY/LTE_TRANSPORT/extern.h
index 88c034e9db..af4dc20df6 100644
--- a/openair1/PHY/LTE_TRANSPORT/extern.h
+++ b/openair1/PHY/LTE_TRANSPORT/extern.h
@@ -23,6 +23,8 @@ extern unsigned int dlsch_tbs25[27][25],TBStable[27][110],TBStable1C[32];
 extern unsigned short lte_cqi_eff1024[16];
 extern char lte_cqi_snr_dB[15];
 extern short conjugate[8],conjugate2[8];
+extern short minus_one[8];
+extern short minus_one[8];
 extern short *ul_ref_sigs[30][2][33];
 extern short *ul_ref_sigs_rx[30][2][33];
 extern unsigned short dftsizes[33];
diff --git a/openair1/PHY/LTE_TRANSPORT/proto.h b/openair1/PHY/LTE_TRANSPORT/proto.h
index 5fc8dea986..d580597b20 100644
--- a/openair1/PHY/LTE_TRANSPORT/proto.h
+++ b/openair1/PHY/LTE_TRANSPORT/proto.h
@@ -112,7 +112,7 @@ LTE_UE_ULSCH_t *new_ue_ulsch(unsigned char N_RB_UL, uint8_t abstraction_flag);
     @returns status
 */
 int32_t dlsch_encoding(PHY_VARS_eNB *eNB,
-		       uint8_t *a,
+                       uint8_t *a,
                        uint8_t num_pdcch_symbols,
                        LTE_eNB_DLSCH_t *dlsch,
                        int frame,
@@ -157,14 +157,14 @@ int32_t dlsch_encoding_SIC(PHY_VARS_UE *ue,
     @returns status
 */
 int32_t dlsch_encoding_2threads(PHY_VARS_eNB *eNB,
-				uint8_t *a,
-				uint8_t num_pdcch_symbols,
-				LTE_eNB_DLSCH_t *dlsch,
-				int frame,
-				uint8_t subframe,
-				time_stats_t *rm_stats,
-				time_stats_t *te_stats,
-				time_stats_t *i_stats);
+                                uint8_t *a,
+                                uint8_t num_pdcch_symbols,
+                                LTE_eNB_DLSCH_t *dlsch,
+                                int frame,
+                                uint8_t subframe,
+                                time_stats_t *rm_stats,
+                                time_stats_t *te_stats,
+                                time_stats_t *i_stats);
 
 void dlsch_encoding_emul(PHY_VARS_eNB *phy_vars_eNB,
                          uint8_t *DLSCH_pdu,
@@ -263,11 +263,11 @@ int32_t allocate_REs_in_RB(PHY_VARS_eNB* phy_vars_eNB,
                            uint32_t *re_allocated,
                            uint8_t skip_dc,
                            uint8_t skip_half,
-			   uint8_t lprime,
-			   uint8_t mprime,
-			   uint8_t Ns,
-			   int *P1_SHIFT,
-			   int *P2_SHIFT);
+                           uint8_t lprime,
+                           uint8_t mprime,
+                           uint8_t Ns,
+                           int *P1_SHIFT,
+                           int *P2_SHIFT);
 
 
 /** \fn int32_t dlsch_modulation(int32_t **txdataF,
@@ -387,7 +387,7 @@ int32_t generate_pilots_slot(PHY_VARS_eNB *phy_vars_eNB,
 
 int32_t generate_mbsfn_pilot(PHY_VARS_eNB *phy_vars_eNB,
                              eNB_rxtx_proc_t *proc,
-			     int32_t **txdataF,
+                             int32_t **txdataF,
                              int16_t amp);
 
 void generate_ue_spec_pilots(PHY_VARS_eNB *phy_vars_eNB,
@@ -395,7 +395,7 @@ void generate_ue_spec_pilots(PHY_VARS_eNB *phy_vars_eNB,
                              int32_t **txdataF,
                              int16_t amp,
                              uint16_t Ntti,
-		             uint8_t beamforming_mode);
+                             uint8_t beamforming_mode);
 
 int32_t generate_pss(int32_t **txdataF,
                      int16_t amp,
@@ -1138,6 +1138,58 @@ void dlsch_dual_stream_correlationTM34(LTE_DL_FRAME_PARMS *frame_parms,
                                    int **dl_ch_rho_ext,
                                    unsigned char output_shift0,
                                    unsigned char output_shift1);
+//This function is used to compute multiplications in Hhermitian * H matrix
+void conjch0_mult_ch1(int *ch0,
+                      int *ch1,
+                      int32_t *ch0conj_ch1,
+                      unsigned short nb_rb,
+                      unsigned char output_shift0);
+
+void construct_HhH_elements(int *ch0conj_ch0,
+                         int *ch1conj_ch1,
+                         int *ch2conj_ch2,
+                         int *ch3conj_ch3,
+                         int *ch0conj_ch1,
+                         int *ch1conj_ch0,
+                         int *ch2conj_ch3,
+                         int *ch3conj_ch2,
+                         int32_t *after_mf_00,
+                         int32_t *after_mf_01,
+                         int32_t *after_mf_10,
+                         int32_t *after_mf_11,
+                         unsigned short nb_rb);
+
+void squared_matrix_element(int32_t *Hh_h_00,
+                            int32_t *Hh_h_00_sq,
+                            unsigned short nb_rb);
+
+void dlsch_channel_level_TM34_meas(int *ch00,
+                                   int *ch01,
+                                   int *ch10,
+                                   int *ch11,
+                                   int *avg_0,
+                                   int *avg_1,
+                                   unsigned short nb_rb);
+
+void det_HhH(int32_t *after_mf_00,
+             int32_t *after_mf_01,
+             int32_t *after_mf_10,
+             int32_t *after_mf_11,
+             int32_t *det_fin_128,
+             unsigned short nb_rb);
+
+void numer(int32_t *Hh_h_00_sq,
+           int32_t *Hh_h_01_sq,
+           int32_t *Hh_h_10_sq,
+           int32_t *Hh_h_11_sq,
+           int32_t *num_fin,
+           unsigned short nb_rb);
+
+uint8_t rank_estimation_tm4(int *dl_ch_estimates_00,
+                            int *dl_ch_estimates_01,
+                            int *dl_ch_estimates_10,
+                            int *dl_ch_estimates_11,
+                            unsigned short nb_rb);
 
 void dlsch_channel_compensation_TM56(int **rxdataF_ext,
                                      int **dl_ch_estimates_ext,
@@ -1295,7 +1347,7 @@ int32_t rx_pdcch(LTE_UE_COMMON *lte_ue_common_vars,
 int pss_sss_extract(PHY_VARS_UE *phy_vars_ue,
                     int32_t pss_ext[4][72],
                     int32_t sss_ext[4][72],
-					uint8_t subframe);
+                                        uint8_t subframe);
 
 /*! \brief Extract only PSS resource elements
   @param phy_vars_ue Pointer to UE variables
@@ -1564,7 +1616,7 @@ int32_t generate_srs_tx(PHY_VARS_UE *phy_vars_ue,
 */
 
 int32_t generate_drs_pusch(PHY_VARS_UE *phy_vars_ue,
-			   UE_rxtx_proc_t *proc,
+                           UE_rxtx_proc_t *proc,
                            uint8_t eNB_id,
                            int16_t amp,
                            uint32_t subframe,
@@ -1647,7 +1699,7 @@ int generate_ue_ulsch_params_from_dci(void *dci_pdu,
                                       DCI_format_t dci_format,
                                       PHY_VARS_UE *phy_vars_ue,
                                       UE_rxtx_proc_t *proc,
-				      uint16_t si_rnti,
+                                      uint16_t si_rnti,
                                       uint16_t ra_rnti,
                                       uint16_t p_rnti,
                                       uint16_t cba_rnti,
@@ -1655,21 +1707,21 @@ int generate_ue_ulsch_params_from_dci(void *dci_pdu,
                                       uint8_t use_srs);
 
 int32_t generate_ue_ulsch_params_from_rar(PHY_VARS_UE *phy_vars_ue,
-					  UE_rxtx_proc_t *proc,
-					  uint8_t eNB_id);
+                                          UE_rxtx_proc_t *proc,
+                                          uint8_t eNB_id);
 double sinr_eff_cqi_calc(PHY_VARS_UE *phy_vars_ue,
                          uint8_t eNB_id,
-						 uint8_t subframe);
+                                                 uint8_t subframe);
 
 uint8_t sinr2cqi(double sinr,uint8_t trans_mode);
 
 int generate_eNB_ulsch_params_from_dci(PHY_VARS_eNB *PHY_vars_eNB,
-				       eNB_rxtx_proc_t *proc,
-				       void *dci_pdu,
+                                       eNB_rxtx_proc_t *proc,
+                                       void *dci_pdu,
                                        rnti_t rnti,
-				       DCI_format_t dci_format,
+                                       DCI_format_t dci_format,
                                        uint8_t UE_id,
-				       uint16_t si_rnti,
+                                       uint16_t si_rnti,
                                        uint16_t ra_rnti,
                                        uint16_t p_rnti,
                                        uint16_t cba_rnti,
@@ -1725,13 +1777,13 @@ int initial_sync(PHY_VARS_UE *phy_vars_ue, runmode_t mode);
 
 void rx_ulsch(PHY_VARS_eNB *phy_vars_eNB,
               eNB_rxtx_proc_t *proc,
-	      uint8_t eNB_id,  // this is the effective sector id
+              uint8_t eNB_id,  // this is the effective sector id
               uint8_t UE_id,
               LTE_eNB_ULSCH_t **ulsch,
               uint8_t cooperation_flag);
 
 void rx_ulsch_emul(PHY_VARS_eNB *phy_vars_eNB,
-		   eNB_rxtx_proc_t *proc,
+                   eNB_rxtx_proc_t *proc,
                    uint8_t sect_id,
                    uint8_t UE_index);
 
@@ -1779,7 +1831,7 @@ int32_t ulsch_encoding_emul(uint8_t *ulsch_buffer,
   @returns 0 on success
 */
 unsigned int  ulsch_decoding(PHY_VARS_eNB *phy_vars_eNB,
-			     eNB_rxtx_proc_t *proc,
+                             eNB_rxtx_proc_t *proc,
                              uint8_t UE_id,
                              uint8_t control_only_flag,
                              uint8_t Nbundled,
@@ -1794,9 +1846,9 @@ unsigned int  ulsch_decoding(PHY_VARS_eNB *phy_vars_eNB,
   @returns 0 on success
 */
 int ulsch_decoding_data_2thread(PHY_VARS_eNB *eNB,
-				int UE_id,
-				int harq_pid,
-				int llr8_flag);
+                                int UE_id,
+                                int harq_pid,
+                                int llr8_flag);
 
 /*!
   \brief Decoding of ULSCH data component from 36-212. This one is single thread.
@@ -1807,17 +1859,17 @@ int ulsch_decoding_data_2thread(PHY_VARS_eNB *eNB,
   @returns 0 on success
 */
 int ulsch_decoding_data(PHY_VARS_eNB *eNB,
-			int UE_id,
-			int harq_pid,
-			int llr8_flag);
+                        int UE_id,
+                        int harq_pid,
+                        int llr8_flag);
 
 uint32_t ulsch_decoding_emul(PHY_VARS_eNB *phy_vars_eNB,
                              eNB_rxtx_proc_t *proc,
-			     uint8_t UE_index,
+                             uint8_t UE_index,
                              uint16_t *crnti);
 
 void generate_phich_top(PHY_VARS_eNB *phy_vars_eNB,
-			eNB_rxtx_proc_t *proc,
+                        eNB_rxtx_proc_t *proc,
                         int16_t amp,
                         uint8_t sect_id);
 
@@ -1829,7 +1881,7 @@ void generate_phich_top(PHY_VARS_eNB *phy_vars_eNB,
 */
 
 void rx_phich(PHY_VARS_UE *phy_vars_ue,
-	      UE_rxtx_proc_t *proc,
+              UE_rxtx_proc_t *proc,
               uint8_t subframe,
               uint8_t eNB_id);
 
@@ -1941,32 +1993,32 @@ void dlsch_unscrambling(LTE_DL_FRAME_PARMS *frame_parms,
 void init_ncs_cell(LTE_DL_FRAME_PARMS *frame_parms,uint8_t ncs_cell[20][7]);
 
 void generate_pucch1x(int32_t **txdataF,
-		      LTE_DL_FRAME_PARMS *frame_parms,
-		      uint8_t ncs_cell[20][7],
-		      PUCCH_FMT_t fmt,
-		      PUCCH_CONFIG_DEDICATED *pucch_config_dedicated,
-		      uint16_t n1_pucch,
-		      uint8_t shortened_format,
-		      uint8_t *payload,
-		      int16_t amp,
-		      uint8_t subframe);
+                      LTE_DL_FRAME_PARMS *frame_parms,
+                      uint8_t ncs_cell[20][7],
+                      PUCCH_FMT_t fmt,
+                      PUCCH_CONFIG_DEDICATED *pucch_config_dedicated,
+                      uint16_t n1_pucch,
+                      uint8_t shortened_format,
+                      uint8_t *payload,
+                      int16_t amp,
+                      uint8_t subframe);
 
 void generate_pucch2x(int32_t **txdataF,
-		      LTE_DL_FRAME_PARMS *fp,
-		      uint8_t ncs_cell[20][7],
-		      PUCCH_FMT_t fmt,
-		      PUCCH_CONFIG_DEDICATED *pucch_config_dedicated,
-		      uint16_t n2_pucch,
-		      uint8_t *payload,
-		      int A,
-		      int B2,
-		      int16_t amp,
-		      uint8_t subframe,
-		      uint16_t rnti);
+                      LTE_DL_FRAME_PARMS *fp,
+                      uint8_t ncs_cell[20][7],
+                      PUCCH_FMT_t fmt,
+                      PUCCH_CONFIG_DEDICATED *pucch_config_dedicated,
+                      uint16_t n2_pucch,
+                      uint8_t *payload,
+                      int A,
+                      int B2,
+                      int16_t amp,
+                      uint8_t subframe,
+                      uint16_t rnti);
 
 
 void generate_pucch_emul(PHY_VARS_UE *phy_vars_ue,
-			 UE_rxtx_proc_t *proc,
+                         UE_rxtx_proc_t *proc,
                          PUCCH_FMT_t format,
                          uint8_t ncs1,
                          uint8_t *pucch_ack_payload,
@@ -1986,11 +2038,11 @@ uint32_t rx_pucch(PHY_VARS_eNB *phy_vars_eNB,
                   uint8_t pucch1_thres);
 
 int32_t rx_pucch_emul(PHY_VARS_eNB *phy_vars_eNB,
-		      eNB_rxtx_proc_t *proc,
-		      uint8_t UE_index,
-		      PUCCH_FMT_t fmt,
-		      uint8_t n1_pucch_sel,
-		      uint8_t *payload);
+                      eNB_rxtx_proc_t *proc,
+                      uint8_t UE_index,
+                      PUCCH_FMT_t fmt,
+                      uint8_t n1_pucch_sel,
+                      uint8_t *payload);
 
 
 /*!
diff --git a/openair1/PHY/LTE_TRANSPORT/vars.h b/openair1/PHY/LTE_TRANSPORT/vars.h
index ba58939878..98caf16d3e 100644
--- a/openair1/PHY/LTE_TRANSPORT/vars.h
+++ b/openair1/PHY/LTE_TRANSPORT/vars.h
@@ -59,8 +59,10 @@ char lte_cqi_snr_dB[15] = { -2,
 
 unsigned char ue_power_offsets[25] = {14,11,9,8,7,6,6,5,4,4,4,3,3,3,2,2,2,1,1,1,1,1,0,0,0};
 
-short conjugate[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1} ;
-short conjugate2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
+short conjugate[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1};
+short conjugate2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1};
+short minus_one[8]__attribute__((aligned(16))) = {-1,-1,-1,-1,-1,-1,-1,-1};
+short plus_one[8]__attribute__((aligned(16))) = {1,1,1,1,1,1,1,1};
 
 int qam64_table[8],qam16_table[4];
 
-- 
2.26.2