From c77b860097282ae6440f7c87214e9e64ce88d5d6 Mon Sep 17 00:00:00 2001
From: Luis Ariza <lfarizav@unal.edu.co>
Date: Fri, 23 Feb 2018 16:02:29 -0500
Subject: [PATCH] resolving SIMD channel problems

---
 openair1/PHY/TOOLS/defs.h                     |   4 +-
 openair1/PHY/TOOLS/lte_phy_scope.c            |   5 +-
 openair1/SIMULATION/RF/rf.c                   |   6 +-
 openair1/SIMULATION/TOOLS/abstraction.c       | 247 +++++++++---------
 openair1/SIMULATION/TOOLS/defs.h              |   6 +
 openair1/SIMULATION/TOOLS/multipath_channel.c |  30 +--
 openair1/SIMULATION/TOOLS/random_channel.c    |  12 +-
 .../rru.band7.tm1.if4p5.50PRB.oaisim.conf     |  16 +-
 targets/SIMU/USER/channel_sim.c               |   4 +-
 targets/SIMU/USER/oaisim.c                    |   9 +
 targets/SIMU/USER/oaisim_functions.c          |   8 +-
 11 files changed, 191 insertions(+), 156 deletions(-)

diff --git a/openair1/PHY/TOOLS/defs.h b/openair1/PHY/TOOLS/defs.h
index 4168af9b06..221d55c578 100644
--- a/openair1/PHY/TOOLS/defs.h
+++ b/openair1/PHY/TOOLS/defs.h
@@ -43,8 +43,8 @@ struct complex {
 #endif
 
 struct complexf {
-  float r;
-  float i;
+  float x[1200];
+  float y[1200];
 };
 
 struct complex16 {
diff --git a/openair1/PHY/TOOLS/lte_phy_scope.c b/openair1/PHY/TOOLS/lte_phy_scope.c
index 0595c1bf04..6e1513a2d8 100644
--- a/openair1/PHY/TOOLS/lte_phy_scope.c
+++ b/openair1/PHY/TOOLS/lte_phy_scope.c
@@ -562,7 +562,10 @@ void phy_scope_UE(FD_lte_phy_scope_ue *form,
   llr_pdcch = (float*) calloc(12*frame_parms->N_RB_DL*num_pdcch_symbols*2,sizeof(float)); // init to zero
   bit_pdcch = (float*) calloc(12*frame_parms->N_RB_DL*num_pdcch_symbols*2,sizeof(float));
 
-  rxsig_t = (int16_t**) phy_vars_ue->common_vars.rxdata;
+  if (phy_vars_ue->do_ofdm_mod)
+  	rxsig_t = (int16_t**) phy_vars_ue->common_vars.common_vars_rx_data_per_thread[subframe&0x1].rxdataF;
+  else	
+  	rxsig_t = (int16_t**) phy_vars_ue->common_vars.rxdata;
   chest_t = (int16_t**) phy_vars_ue->common_vars.common_vars_rx_data_per_thread[phy_vars_ue->current_thread_id[subframe]].dl_ch_estimates_time[eNB_id];
   chest_f = (int16_t**) phy_vars_ue->common_vars.common_vars_rx_data_per_thread[phy_vars_ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id];
   pbch_llr = (int8_t*) phy_vars_ue->pbch_vars[eNB_id]->llr;
diff --git a/openair1/SIMULATION/RF/rf.c b/openair1/SIMULATION/RF/rf.c
index 749c9780a6..51d8ae13f6 100644
--- a/openair1/SIMULATION/RF/rf.c
+++ b/openair1/SIMULATION/RF/rf.c
@@ -419,8 +419,10 @@ clock_t start=clock();*/
 		      rx128_re =  _mm_loadu_ps(&r_re[a][4*i]);//r_re[a][i],r_re[a][i+1]
 		      rx128_im =  _mm_loadu_ps(&r_im[a][4*i]);//r_im[a][i],r_im[a][i+1]
 		      rx128_gain_lin = _mm_set1_ps(rx_gain_lin);
-		      gauss_0_128_sqrt_NOW = _mm_set1_ps(ziggurat(0.0,1.0));
-		      gauss_1_128_sqrt_NOW = _mm_set1_ps(ziggurat(0.0,1.0));
+		      //start_meas(&desc->ziggurat);
+		      gauss_0_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
+		      gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
+		      //stop_meas(&desc->ziggurat);
 		      gauss_0_128_sqrt_NOW = _mm_mul_ps(gauss_0_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
 		      gauss_1_128_sqrt_NOW = _mm_mul_ps(gauss_1_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
 		      // Amplify by receiver gain and apply 3rd order non-linearity
diff --git a/openair1/SIMULATION/TOOLS/abstraction.c b/openair1/SIMULATION/TOOLS/abstraction.c
index 8a82f2c8fa..720803a5ab 100644
--- a/openair1/SIMULATION/TOOLS/abstraction.c
+++ b/openair1/SIMULATION/TOOLS/abstraction.c
@@ -32,94 +32,10 @@
 // NEW code with lookup table for sin/cos based on delay profile (TO BE TESTED)
 
 static double **cos_lut=NULL,**sin_lut=NULL;
-
+static float **cos_lut_f=NULL,**sin_lut_f=NULL;
 
 //#if 1
 
-//#define abstraction_SSE
-#ifdef  abstraction_SSE//abstraction_SSE is not working.
-int init_freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
-{
-
-  static int first_run=1;
-  double delta_f,freq,twopi;  // 90 kHz spacing
-  double delay;
-  int16_t f;
-  uint8_t l;
-  __m128d cos_lut128,sin_lut128;
-  //static int count=0;
-
-  if ((n_samples&1)==0) {
-    fprintf(stderr, "freq_channel_init: n_samples has to be odd\n");
-    return(-1); 
-  }
-  delta_f = nb_rb*180000/(n_samples-1);
-
-  if (first_run)
-  {
-	cos_lut = (double **)malloc16(n_samples*sizeof(double*));
-	sin_lut = (double **)malloc16(n_samples*sizeof(double*));
-	for (f=-(n_samples>>1); f<=(n_samples>>1); f++) {
-	    cos_lut[f+(n_samples>>1)] = (double *)malloc16_clear((int)desc->nb_taps*sizeof(double));
-	    sin_lut[f+(n_samples>>1)] = (double *)malloc16_clear((int)desc->nb_taps*sizeof(double));
-	}
-	first_run=0;
-  }
-  twopi=2*M_PI*1e-6*delta_f;
-  for (f=-(n_samples>>2); f<0; f++) {
-    //count++;
-    //freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
-    for (l=0; l<(int)desc->nb_taps; l++) {
-
-      if (desc->nb_taps==1)
-        delay = desc->delays[l];
-      else
-        delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
-
-       sincos_ps(_mm_set_pd(cos(twopi*(2*f)*delay),cos(twopi*(2*f)*delay)), &sin_lut128, &cos_lut128);
-      _mm_storeu_pd(&cos_lut[2*f+(n_samples>>1)][l],_mm_set_pd(cos(twopi*(2*f)*delay),cos(twopi*(2*f)*delay)));
-      _mm_storeu_pd(&sin_lut[2*f+(n_samples>>1)][l],_mm_set_pd(sin(twopi*(2*f)*delay),sin(twopi*(2*f)*delay)));
-      //cos_lut[f+(n_samples>>1)][l] = cos(2*M_PI*freq*delay);
-      //sin_lut[f+(n_samples>>1)][l] = sin(2*M_PI*freq*delay);
-      printf("arg %e, f %d, values cos:%e, sin:%e, cos# %e\n",twopi*(2*f)*delay,2*f+(n_samples>>1), cos_lut[2*f+(n_samples>>1)][l], sin_lut[2*f+(n_samples>>1)][l],cos(twopi*(2*f)*delay));
-      printf("arg %e, f %d, values cos:%e, sin:%e, cos# %e\n",twopi*(2*f+1)*delay,2*f+1+(n_samples>>1), cos_lut[2*f+1+(n_samples>>1)][l], sin_lut[2*f+1+(n_samples>>1)][l],cos(twopi*(2*f+1)*delay));
-      //printf("f %d, cos0 %e, cos1 %e\n",2*f,(double) &cos_lut128[0],(double) &cos_lut128[1]);
-      //printf("f %d, sin0 %e, sin1 %e\n",2*f+1,(double) &sin_lut128[0],(double) &sin_lut128[1]);
-    }
-  }
-  for (l=0; l<(int)desc->nb_taps; l++) 
-  {
-      cos_lut[(n_samples>>1)][l] = 1;
-      sin_lut[(n_samples>>1)][l] = 0;
-      printf("f %d,l %d (cos,sin) (%e,%e):\n",2*f,l,cos_lut[(n_samples>>1)][l],sin_lut[(n_samples>>1)][l]);
-  }
-
-  for (f=1; f<=(n_samples>>2); f++) {
-    //count++;
-    //freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
-    for (l=0; l<(int)desc->nb_taps; l++) {
-      if (desc->nb_taps==1)
-        delay = desc->delays[l];
-      else
-        delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
-      cos_lut128=_mm_set_pd(cos(twopi*2*(f+1)*delay),cos(twopi*(2*f)*delay));
-      sin_lut128=_mm_set_pd(sin(twopi*2*(f+1)*delay),sin(twopi*(2*f)*delay));
-      _mm_storeu_pd(&cos_lut[2*f+(n_samples>>1)][l],cos_lut128);
-      _mm_storeu_pd(&sin_lut[2*f+(n_samples>>1)][l],sin_lut128);
-      //cos_lut[f+(n_samples>>1)][l] = cos(2*M_PI*freq*delay);
-      //sin_lut[f+(n_samples>>1)][l] = sin(2*M_PI*freq*delay);
-      //printf("values cos:%d, sin:%d\n", cos_lut[f][l], sin_lut[f][l]);
-
-    }
-  }
-  for (f=-(n_samples>>1); f<=(n_samples>>1); f++) {
-    for (l=0; l<(int)desc->nb_taps; l++) {  
-      printf("f %d, l %d (cos,sin) (%e,%e):\n",f,l,cos_lut[f+(n_samples>>1)][l],sin_lut[f+(n_samples>>1)][l]);
-    }
-  }
-  return(0);
-}
-#else
 int init_freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
 {
 
@@ -165,16 +81,16 @@ int init_freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
   //printf("count %d\n",count);
   return(0);
 }
-#endif
-/*#ifdef abstraction_SSE
+
 int freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
 {
+
+
   int16_t f,f2,d;
   uint8_t aarx,aatx,l;
   double *clut,*slut;
   static int freq_channel_init=0;
   static int n_samples_max=0;
-  __m128d clut128,slut128,chFx_128,chFy_128;
 
   // do some error checking
   // n_samples has to be a odd number because we assume the spectrum is symmetric around the DC and includes the DC
@@ -201,25 +117,23 @@ int freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
 
   start_meas(&desc->interp_freq);
 
-  for (f=-(n_samples_max>>2),f2=-(n_samples>>2); f<(n_samples_max>>2); f+=d,f2++) {
-    //clut = cos_lut[(n_samples_max>>1)+f];
-    //slut = sin_lut[(n_samples_max>>1)+f];
+  for (f=-(n_samples_max>>1),f2=-(n_samples>>1); f<(n_samples_max>>1); f+=d,f2++) {
+    clut = cos_lut[(n_samples_max>>1)+f];
+    slut = sin_lut[(n_samples_max>>1)+f];
+
     for (aarx=0; aarx<desc->nb_rx; aarx++) {
       for (aatx=0; aatx<desc->nb_tx; aatx++) {
-	chFx_128=_mm_setzero_pd();
-	chFy_128=_mm_setzero_pd();
-        //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x=0.0;
-        //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y=0.0;
+        desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x=0.0;
+        desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y=0.0;
+
         for (l=0; l<(int)desc->nb_taps; l++) {
-          //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x+=(desc->a[l][aarx+(aatx*desc->nb_rx)].x*clut[l]+
-          //    desc->a[l][aarx+(aatx*desc->nb_rx)].y*slut[l]);
-          //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y+=(-desc->a[l][aarx+(aatx*desc->nb_rx)].x*slut[l]+
-          //    desc->a[l][aarx+(aatx*desc->nb_rx)].y*clut[l]);
-	  chFx_128=_mm_add_pd(chFx_128,_mm_add_pd(_mm_mul_pd(_mm_set1_pd(desc->a[l][aarx+(aatx*desc->nb_rx)].x),_mm_loadu_pd(&cos_lut[(n_samples_max>>1)+2*f][l])),_mm_mul_pd(_mm_set1_pd(desc->a[l][aarx+(aatx*desc->nb_rx)].y),_mm_loadu_pd(&sin_lut[(n_samples_max>>1)+2*f][l]))));  
-	  chFy_128=_mm_add_pd(chFy_128,_mm_sub_pd(_mm_mul_pd(_mm_set1_pd(desc->a[l][aarx+(aatx*desc->nb_rx)].y),_mm_loadu_pd(&cos_lut[(n_samples_max>>1)+2*f][l])),_mm_mul_pd(_mm_set1_pd(desc->a[l][aarx+(aatx*desc->nb_rx)].x),_mm_loadu_pd(&sin_lut[(n_samples_max>>1)+2*f][l]))));  
+
+          desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x+=(desc->a[l][aarx+(aatx*desc->nb_rx)].x*clut[l]+
+              desc->a[l][aarx+(aatx*desc->nb_rx)].y*slut[l]);
+          desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y+=(-desc->a[l][aarx+(aatx*desc->nb_rx)].x*slut[l]+
+              desc->a[l][aarx+(aatx*desc->nb_rx)].y*clut[l]);
         }
-	_mm_storeu_pd(&desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+2*f2].x,chFx_128);
-	_mm_storeu_pd(&desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+2*f2].y,chFy_128);
+	printf("f %d, (chF.x,chF.y) (%e,%e):\n",f,desc->chF[0][(n_samples>>1)+f].x,desc->chF[0][(n_samples>>1)+f].y);
       }
     }
   }
@@ -228,16 +142,101 @@ int freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
 
   return(0);
 }
-#else*/
-int freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
+
+int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
 {
 
+  static int first_run=1;
+  float delta_f,twopi;  // 90 kHz spacing
+  float delay;
+  int16_t f;
+  uint8_t l;
+  __m128 cos_lut128,sin_lut128;
+  //static int count=0;
+
+  if ((n_samples&1)==0) {
+    fprintf(stderr, "freq_channel_init: n_samples has to be odd\n");
+    return(-1); 
+  }
+  delta_f = nb_rb*180000/(n_samples-1);
+
+  if (first_run)
+  {
+	cos_lut_f = (float **)malloc16(((int)desc->nb_taps)*sizeof(float*));
+	sin_lut_f = (float **)malloc16(((int)desc->nb_taps)*sizeof(float*));
+	for (f=-(n_samples>>1); f<=(n_samples>>1); f++) {
+	    cos_lut_f[f+(n_samples>>1)] = (float *)malloc16_clear(n_samples*sizeof(float));
+	    sin_lut_f[f+(n_samples>>1)] = (float *)malloc16_clear(n_samples*sizeof(float));
+	}
+	first_run=0;
+  }
+  twopi=2*M_PI*1e-6*delta_f;
+  for (f=-(n_samples>>3); f<0; f++) {
+    //count++;
+    //freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
+    for (l=0; l<(int)desc->nb_taps; l++) {
+
+      if (desc->nb_taps==1)
+        delay = desc->delays[l];
+      else
+        delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
+
+       //sincos_ps(_mm_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f)*delay)), &sin_lut128, &cos_lut128);
+      cos_lut128=_mm_set_ps(cos(twopi*(4*f+3)*delay),cos(twopi*(4*f+2)*delay),cos(twopi*(4*f+1)*delay),cos(twopi*(4*f)*delay));
+      sin_lut128=_mm_set_ps(sin(twopi*(4*f+3)*delay),sin(twopi*(4*f+2)*delay),sin(twopi*(4*f+1)*delay),sin(twopi*(4*f)*delay));
+      _mm_storeu_ps(&cos_lut_f[l][4*f+(n_samples>>1)],cos_lut128);
+      _mm_storeu_ps(&sin_lut_f[l][4*f+(n_samples>>1)],sin_lut128);
+      //cos_lut[f+(n_samples>>1)][l] = cos(2*M_PI*freq*delay);
+      //sin_lut[f+(n_samples>>1)][l] = sin(2*M_PI*freq*delay);
+      //printf("cos %e,%e,%e,%e\n",cos_lut128[0],cos_lut128[1],cos_lut128[2],cos_lut128[3]);
+      //printf("sin %e,%e,%e,%e\n",sin_lut128[0],sin_lut128[1],sin_lut128[2],sin_lut128[3]);
+      //printf("arg %e, f %d, values cos:%e, sin:%e, cos# %e, sin# %e\n",twopi*(4*f)*delay,4*f+(n_samples>>1), cos_lut_f[l][4*f+(n_samples>>1)], sin_lut_f[l][4*f+(n_samples>>1)],cos(twopi*(4*f)*delay),sin(twopi*(4*f)*delay));
+      //printf("arg %e, f %d, values cos:%e, sin:%e, cos# %e, sin# %e\n",twopi*(4*f+1)*delay,4*f+1+(n_samples>>1), cos_lut_f[l][4*f+1+(n_samples>>1)], sin_lut_f[l][4*f+1+(n_samples>>1)],cos(twopi*(4*f+1)*delay),sin(twopi*(4*f+1)*delay));
+      //printf("arg %e, f %d, values cos:%e, sin:%e, cos# %e, sin# %e\n",twopi*(4*f+2)*delay,4*f+2+(n_samples>>1), cos_lut_f[l][4*f+2+(n_samples>>1)], sin_lut_f[l][4*f+2+(n_samples>>1)],cos(twopi*(4*f+2)*delay),sin(twopi*(4*f+2)*delay));
+      //printf("arg %e, f %d, values cos:%e, sin:%e, cos# %e, sin# %e\n",twopi*(4*f+3)*delay,4*f+3+(n_samples>>1), cos_lut_f[l][4*f+3+(n_samples>>1)], sin_lut_f[l][4*f+3+(n_samples>>1)],cos(twopi*(4*f+3)*delay),sin(twopi*(4*f+3)*delay));
+      //printf("f %d, cos0 %e, cos1 %e\n",2*f,(double) &cos_lut128[0],(double) &cos_lut128[1]);
+      //printf("f %d, sin0 %e, sin1 %e\n",2*f+1,(double) &sin_lut128[0],(double) &sin_lut128[1]);
+    }
+  }
+  for (l=0; l<(int)desc->nb_taps; l++) 
+  {
+      cos_lut_f[l][(n_samples>>1)] = 1;
+      sin_lut_f[l][(n_samples>>1)] = 0;
+      //printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]);
+  }
+
+  for (f=1; f<=(n_samples>>3); f++) {
+    //count++;
+    //freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
+    for (l=0; l<(int)desc->nb_taps; l++) {
+      if (desc->nb_taps==1)
+        delay = desc->delays[l];
+      else
+        delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
+      cos_lut128=_mm_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay));
+      sin_lut128=_mm_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay));
+      _mm_storeu_ps(&cos_lut_f[l][4*f-3+(n_samples>>1)],cos_lut128);
+      _mm_storeu_ps(&sin_lut_f[l][4*f-3+(n_samples>>1)],sin_lut128);
+      //cos_lut[f+(n_samples>>1)][l] = cos(2*M_PI*freq*delay);
+      //sin_lut[f+(n_samples>>1)][l] = sin(2*M_PI*freq*delay);
+      //printf("values cos:%d, sin:%d\n", cos_lut[f][l], sin_lut[f][l]);
 
+    }
+  }
+  /*for (f=-(n_samples>>1); f<=(n_samples>>1); f++) {
+    for (l=0; l<(int)desc->nb_taps; l++) {  
+      printf("f %d, l %d (cos,sin) (%e,%e):\n",f,l,cos_lut_f[l][f+(n_samples>>1)],sin_lut_f[l][f+(n_samples>>1)]);
+    }
+  }*/
+  return(0);
+}
+int freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
+{
   int16_t f,f2,d;
   uint8_t aarx,aatx,l;
-  double *clut,*slut;
   static int freq_channel_init=0;
   static int n_samples_max=0;
+  __m128 chFx_128,chFy_128;
 
   // do some error checking
   // n_samples has to be a odd number because we assume the spectrum is symmetric around the DC and includes the DC
@@ -252,7 +251,7 @@ int freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
     // we are initializing the lut for the largets possible n_samples=12*nb_rb+1
     // if called with n_samples<12*nb_rb+1, we decimate the lut
     n_samples_max=12*nb_rb+1;
-    if (init_freq_channel(desc,nb_rb,n_samples_max)==0)
+    if (init_freq_channel_SSE_float(desc,nb_rb,n_samples_max)==0)
       freq_channel_init=1;
     else
       return(-1);
@@ -264,31 +263,37 @@ int freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples)
 
   start_meas(&desc->interp_freq);
 
-  for (f=-(n_samples_max>>1),f2=-(n_samples>>1); f<(n_samples_max>>1); f+=d,f2++) {
-    clut = cos_lut[(n_samples_max>>1)+f];
-    slut = sin_lut[(n_samples_max>>1)+f];
-
+  for (f=-(n_samples_max>>3),f2=-(n_samples>>3); f<(n_samples_max>>3); f+=d,f2++) {
+    //clut = cos_lut[(n_samples_max>>1)+f];
+    //slut = sin_lut[(n_samples_max>>1)+f];
     for (aarx=0; aarx<desc->nb_rx; aarx++) {
       for (aatx=0; aatx<desc->nb_tx; aatx++) {
-        desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x=0.0;
-        desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y=0.0;
-
+	chFx_128=_mm_setzero_ps();
+	chFy_128=_mm_setzero_ps();
+        //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x=0.0;
+        //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y=0.0;
         for (l=0; l<(int)desc->nb_taps; l++) {
-
-          desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x+=(desc->a[l][aarx+(aatx*desc->nb_rx)].x*clut[l]+
-              desc->a[l][aarx+(aatx*desc->nb_rx)].y*slut[l]);
-          desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y+=(-desc->a[l][aarx+(aatx*desc->nb_rx)].x*slut[l]+
-              desc->a[l][aarx+(aatx*desc->nb_rx)].y*clut[l]);
+          //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x+=(desc->a[l][aarx+(aatx*desc->nb_rx)].x*clut[l]+
+          //    desc->a[l][aarx+(aatx*desc->nb_rx)].y*slut[l]);
+          //desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y+=(-desc->a[l][aarx+(aatx*desc->nb_rx)].x*slut[l]+
+          //    desc->a[l][aarx+(aatx*desc->nb_rx)].y*clut[l]);
+	  chFx_128=_mm_add_ps(chFx_128,_mm_add_ps(_mm_mul_ps(_mm_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].x),_mm_loadu_ps(&cos_lut_f[l][(n_samples_max>>1)+4*f])),_mm_mul_ps(_mm_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].y),_mm_loadu_ps(&sin_lut_f[l][(n_samples_max>>1)+4*f]))));  
+	  chFy_128=_mm_add_ps(chFy_128,_mm_sub_ps(_mm_mul_ps(_mm_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].y),_mm_loadu_ps(&cos_lut_f[l][(n_samples_max>>1)+4*f])),_mm_mul_ps(_mm_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].x),_mm_loadu_ps(&sin_lut_f[l][(n_samples_max>>1)+4*f]))));  
         }
+	_mm_storeu_ps(&desc->chFf[aarx+(aatx*desc->nb_rx)].x[(n_samples>>1)+4*f],chFx_128);
+	_mm_storeu_ps(&desc->chFf[aarx+(aatx*desc->nb_rx)].y[(n_samples>>1)+4*f],chFy_128);
+	//printf("chFx %e,%e,%e,%e\n",chFx_128[0],chFx_128[1],chFx_128[2],chFx_128[3]);
+	//printf("chFy %e,%e,%e,%e\n",chFy_128[0],chFy_128[1],chFy_128[2],chFy_128[3]);
       }
     }
   }
-
   stop_meas(&desc->interp_freq);
 
+  /*for (f=-(n_samples>>1); f<(n_samples>>1); f++) { 
+      printf("f %d, (chF.x,chF.y) (%e,%e):\n",f,desc->chFf[0].x[(n_samples>>1)+f],desc->chFf[0].y[(n_samples>>1)+f]);
+  }*/
   return(0);
 }
-//#endif
 
 int init_freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb)
 {
diff --git a/openair1/SIMULATION/TOOLS/defs.h b/openair1/SIMULATION/TOOLS/defs.h
index 2cd6b5871e..c54bdc93a7 100644
--- a/openair1/SIMULATION/TOOLS/defs.h
+++ b/openair1/SIMULATION/TOOLS/defs.h
@@ -59,6 +59,7 @@ typedef struct {
   struct complex **ch;
   ///Sampled frequency response (90 kHz resolution)
   struct complex **chF;
+  struct complexf *chFf;
   ///Sampled prach frequency response (frequency analysis)
   struct complex **chF_prach;
   ///Maximum path delay in mus.
@@ -95,6 +96,8 @@ typedef struct {
   time_stats_t interp_freq;
   time_stats_t interp_freq_PRACH;
   time_stats_t convolution;
+  time_stats_t ziggurat;
+  time_stats_t ziggurat_PRACH;
   /// frequency measurements
   time_stats_t DL_multipath_channel_freq;
   time_stats_t DL_dac_fixed_gain;
@@ -473,9 +476,12 @@ double uniformrandom(void);
 void uniformrandomSSE(__m128d *d1,__m128d *d2);
 double ziggurat(double mean, double variance);
 int freq_channel(channel_desc_t *desc,uint16_t nb_rb, int16_t n_samples);
+int freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb, int16_t n_samples);
 int freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
 int init_freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
+int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
 int init_freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
+
 uint8_t multipath_channel_nosigconv(channel_desc_t *desc);
 void multipath_tv_channel(channel_desc_t *desc,
                           double **tx_sig_re,
diff --git a/openair1/SIMULATION/TOOLS/multipath_channel.c b/openair1/SIMULATION/TOOLS/multipath_channel.c
index 3dbb113d2a..c9d865f133 100644
--- a/openair1/SIMULATION/TOOLS/multipath_channel.c
+++ b/openair1/SIMULATION/TOOLS/multipath_channel.c
@@ -294,6 +294,7 @@ void multipath_channel_freq(channel_desc_t *desc,
       	sum=(sum+stop-start);*/
         random_channel_freq(desc,0);
   	freq_channel(desc,nb_rb,n_samples);//Find desc->chF
+        printf("MULTICHANNEL\n");
   	//freq_channel_prach(desc,nb_rb,n_samples,1,44);//Find desc->chF
   }
   //clock_t start=clock();
@@ -319,8 +320,8 @@ void multipath_channel_freq(channel_desc_t *desc,
 					//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
 					tx128_re = _mm_loadu_pd(&tx_sig_re[j][(2*f+1)]);
             				tx128_im = _mm_loadu_pd(&tx_sig_im[j][(2*f+1)]);
-          				chF128_x = _mm_set1_pd(desc->ch[ii+(j*desc->nb_rx)][(2*(f%(ofdm_symbol_size>>1)))+(n_samples>>1)].x);
-          				chF128_y = _mm_set1_pd(desc->ch[ii+(j*desc->nb_rx)][(2*(f%(ofdm_symbol_size>>1)))+(n_samples>>1)].y);
+          				chF128_x = _mm_set1_pd(desc->chFf[ii+(j*desc->nb_rx)].x[(2*(f%(ofdm_symbol_size>>1)))+(n_samples>>1)]);
+          				chF128_y = _mm_set1_pd(desc->chFf[ii+(j*desc->nb_rx)].y[(2*(f%(ofdm_symbol_size>>1)))+(n_samples>>1)]);
 					//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_re*ch128_x
 					//	     -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//-tx128_im*ch128_y
 					//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_im*ch128_x
@@ -357,8 +358,8 @@ void multipath_channel_freq(channel_desc_t *desc,
 					//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
 					tx128_re = _mm_loadu_pd(&tx_sig_re[j][2*f]);
             				tx128_im = _mm_loadu_pd(&tx_sig_im[j][2*f]);
-          				chF128_x = _mm_set1_pd(desc->ch[ii+(j*desc->nb_rx)][2*(f%(ofdm_symbol_size>>1)-((ofdm_symbol_size>>1)-(n_samples>>1)))].x);
-          				chF128_y = _mm_set1_pd(desc->ch[ii+(j*desc->nb_rx)][2*(f%(ofdm_symbol_size>>1)-((ofdm_symbol_size>>1)-(n_samples>>1)))].y);
+          				chF128_x = _mm_set1_pd(desc->chFf[ii+(j*desc->nb_rx)].x[2*(f%(ofdm_symbol_size>>1)-((ofdm_symbol_size>>1)-(n_samples>>1)))]);
+          				chF128_y = _mm_set1_pd(desc->chFf[ii+(j*desc->nb_rx)].y[2*(f%(ofdm_symbol_size>>1)-((ofdm_symbol_size>>1)-(n_samples>>1)))]);
 					//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
 					//	     -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
 					//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
@@ -427,6 +428,7 @@ void multipath_channel_freq(channel_desc_t *desc,
   } else {
         random_channel_freq(desc,0);
   	freq_channel(desc,nb_rb,n_samples);//Find desc->chF
+        printf("MULTICHANNEL\n");
   }
   //clock_t start=clock();
   //printf("symbols_per_tti is %d\n",symbols_per_tti);
@@ -526,7 +528,7 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
   	// do nothing - keep channel
   } else {
         random_channel_freq(desc,0);
-  	freq_channel(desc,nb_rb,n_samples);//Find desc->chF
+  	freq_channel_SSE_float(desc,nb_rb,n_samples);//Find desc->chF
   }
 	for (j=0;j<(symbols_per_tti>>2);j++){
 		for (ii=0; ii<desc->nb_rx; ii++) {
@@ -549,8 +551,8 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
 					//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
 					tx128_re = _mm_loadu_ps(&tx_sig_re[j][(4*f+1)]);
             				tx128_im = _mm_loadu_ps(&tx_sig_im[j][(4*f+1)]);
-          				chF128_x = _mm_set1_ps(desc->ch[ii+(j*desc->nb_rx)][(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)].x);
-          				chF128_y = _mm_set1_ps(desc->ch[ii+(j*desc->nb_rx)][(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)].y);
+          				chF128_x = _mm_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]);
+          				chF128_y = _mm_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]);
 					//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_re*ch128_x
 					//	     -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//-tx128_im*ch128_y
 					//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_im*ch128_x
@@ -587,8 +589,8 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
 					//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
 					tx128_re = _mm_loadu_ps(&tx_sig_re[j][4*f]);
             				tx128_im = _mm_loadu_ps(&tx_sig_im[j][4*f]);
-          				chF128_x = _mm_set1_ps(desc->ch[ii+(j*desc->nb_rx)][4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))].x);
-          				chF128_y = _mm_set1_ps(desc->ch[ii+(j*desc->nb_rx)][4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))].y);
+          				chF128_x = _mm_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]);
+          				chF128_y = _mm_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]);
 					//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
 					//	     -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
 					//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
@@ -630,7 +632,6 @@ void multipath_channel_prach(channel_desc_t *desc,
 
   int ii,j,f;
   __m128d rx_tmp128_re_f,rx_tmp128_im_f,rx_tmp128_re,rx_tmp128_im, rx_tmp128_1,rx_tmp128_2,rx_tmp128_3,rx_tmp128_4,tx128_re,tx128_im,chF128_x,chF128_y,pathloss128;
-  struct complex rx_tmp;
   double path_loss = pow(10,desc->path_loss_dB/20);
   pathloss128 = _mm_set1_pd(path_loss);
   int nb_rb, n_samples;
@@ -658,8 +659,8 @@ void multipath_channel_prach(channel_desc_t *desc,
 						//RX_IM(k) = TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
 						tx128_re = _mm_loadu_pd(&tx_sig_re[j][(2*f)]);
             					tx128_im = _mm_loadu_pd(&tx_sig_im[j][(2*f)]);
-          					chF128_x = _mm_set1_pd(desc->ch[ii+(j*desc->nb_rx)][2*f+(prach_fmt<4)?13:3].x);
-          					chF128_y = _mm_set1_pd(desc->ch[ii+(j*desc->nb_rx)][2*f+(prach_fmt<4)?13:3].y);	
+          					chF128_x = _mm_set1_pd(desc->chFf[ii+(j*desc->nb_rx)].x[2*f+(prach_fmt<4)?13:3]);
+          					chF128_y = _mm_set1_pd(desc->chFf[ii+(j*desc->nb_rx)].y[2*f+(prach_fmt<4)?13:3]);	
 						//rx_tmp.x += (tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)-(tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
 						//rx_tmp.y += (tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)+(tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
 						rx_tmp128_1    = _mm_mul_pd(tx128_re,chF128_x);
@@ -748,7 +749,6 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc,
 
   int ii,j,f;
   __m128 rx_tmp128_re_f,rx_tmp128_im_f,rx_tmp128_re,rx_tmp128_im, rx_tmp128_1,rx_tmp128_2,rx_tmp128_3,rx_tmp128_4,tx128_re,tx128_im,chF128_x,chF128_y,pathloss128;
-  struct complex rx_tmp;
   float path_loss = pow(10,desc->path_loss_dB/20);
   pathloss128 = _mm_set1_ps(path_loss);
   int nb_rb, n_samples;
@@ -776,8 +776,8 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc,
 						//RX_IM(k) = TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
 						tx128_re = _mm_loadu_ps(&tx_sig_re[j][(4*f)]);
             					tx128_im = _mm_loadu_ps(&tx_sig_im[j][(4*f)]);
-          					chF128_x = _mm_set1_ps(desc->ch[ii+(j*desc->nb_rx)][4*f+(prach_fmt<4)?13:3].x);
-          					chF128_y = _mm_set1_ps(desc->ch[ii+(j*desc->nb_rx)][4*f+(prach_fmt<4)?13:3].y);	
+          					chF128_x = _mm_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[4*f+(prach_fmt<4)?13:3]);
+          					chF128_y = _mm_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[4*f+(prach_fmt<4)?13:3]);	
 						//rx_tmp.x += (tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)-(tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
 						//rx_tmp.y += (tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)+(tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
 						rx_tmp128_1    = _mm_mul_ps(tx128_re,chF128_x);
diff --git a/openair1/SIMULATION/TOOLS/random_channel.c b/openair1/SIMULATION/TOOLS/random_channel.c
index 8cd93facc5..443f85e619 100644
--- a/openair1/SIMULATION/TOOLS/random_channel.c
+++ b/openair1/SIMULATION/TOOLS/random_channel.c
@@ -89,6 +89,7 @@ void fill_channel_desc(channel_desc_t *chan_desc,
   chan_desc->max_Doppler                = max_Doppler;
   chan_desc->ch                         = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
   chan_desc->chF                        = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+  chan_desc->chFf                       = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
   chan_desc->chF_prach                  = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
   chan_desc->a                          = (struct complex**) malloc(nb_taps*sizeof(struct complex*));
 
@@ -302,6 +303,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -356,6 +358,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -409,6 +412,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -454,6 +458,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -502,6 +507,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -550,6 +556,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -597,6 +604,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -642,6 +650,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -687,6 +696,7 @@ channel_desc_t *new_channel_desc_scm(uint8_t nb_tx,
     chan_desc->random_aoa     = 0;
     chan_desc->ch             = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->chF            = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
+    chan_desc->chFf           = (struct complexf*) malloc(nb_tx*nb_rx*sizeof(struct complexf));
     chan_desc->chF_prach      = (struct complex**) malloc(nb_tx*nb_rx*sizeof(struct complex*));
     chan_desc->a              = (struct complex**) malloc(chan_desc->nb_taps*sizeof(struct complex*));
     for (i = 0; i<nb_tx*nb_rx; i++)
@@ -1398,7 +1408,7 @@ int random_channel(channel_desc_t *desc, uint8_t abstraction_flag) {
 
 int random_channel_freq(channel_desc_t *desc, uint8_t abstraction_flag) {
 
-  int i,k,l,aarx,aatx;
+  int i,aarx,aatx;
   struct complex anew[NB_ANTENNAS_TX*NB_ANTENNAS_RX],acorr[NB_ANTENNAS_TX*NB_ANTENNAS_RX];
   struct complex phase, alpha, beta;
 
diff --git a/targets/PROJECTS/GENERIC-LTE-EPC/CONF/rru.band7.tm1.if4p5.50PRB.oaisim.conf b/targets/PROJECTS/GENERIC-LTE-EPC/CONF/rru.band7.tm1.if4p5.50PRB.oaisim.conf
index d26e528671..04a864f604 100644
--- a/targets/PROJECTS/GENERIC-LTE-EPC/CONF/rru.band7.tm1.if4p5.50PRB.oaisim.conf
+++ b/targets/PROJECTS/GENERIC-LTE-EPC/CONF/rru.band7.tm1.if4p5.50PRB.oaisim.conf
@@ -139,7 +139,7 @@ eNBs =
 
 
     ////////// MME parameters:
-    mme_ip_address      = ( { ipv4       = "168.176.26.144";
+    mme_ip_address      = ( { ipv4       = "192.168.13.11";
                               ipv6       = "192:168:30::17";
                               active     = "yes";
                               preference = "ipv4";
@@ -149,18 +149,18 @@ eNBs =
     NETWORK_INTERFACES :
     {
 
-        ENB_INTERFACE_NAME_FOR_S1_MME            = "eth0";
-        ENB_IPV4_ADDRESS_FOR_S1_MME              = "168.176.27.98/24";
-        ENB_INTERFACE_NAME_FOR_S1U               = "eth0";
-        ENB_IPV4_ADDRESS_FOR_S1U                 = "168.176.27.98/24";
+        ENB_INTERFACE_NAME_FOR_S1_MME            = "lo";
+        ENB_IPV4_ADDRESS_FOR_S1_MME              = "192.168.12.170/24";
+        ENB_INTERFACE_NAME_FOR_S1U               = "lo";
+        ENB_IPV4_ADDRESS_FOR_S1U                 = "192.168.12.170/24";
         ENB_PORT_FOR_S1U                         = 2152; # Spec 2152
     };
 
     rrh_gw_config = (
     {			  
-      	local_if_name = "eth0";			  
-      	remote_address = "168.176.27.114";
-    	local_address = "168.176.27.98"; 
+      	local_if_name = "lo";			  
+      	remote_address = "192.168.12.171";
+    	local_address = "192.168.12.170"; 
     	local_port = 50000;	#for raw option local port must be the same to remote	       
     	remote_port = 50000; 
     	rrh_gw_active = "yes";
diff --git a/targets/SIMU/USER/channel_sim.c b/targets/SIMU/USER/channel_sim.c
index 4e26ac8c59..c65fc47ae6 100644
--- a/targets/SIMU/USER/channel_sim.c
+++ b/targets/SIMU/USER/channel_sim.c
@@ -406,9 +406,9 @@ void do_DL_sig_freq(channel_desc_t *eNB2UE[NUMBER_OF_eNB_MAX][NUMBER_OF_UE_MAX][
 
   uint8_t eNB_id=0;
 #ifdef    SSE_float
-  double tx_pwr;
-#else
   float tx_pwr;
+#else
+  double tx_pwr;
 #endif
   //double rx_pwr;
   //int32_t rx_pwr0,rx_pwr1,rx_pwr2, rx_pwr3;
diff --git a/targets/SIMU/USER/oaisim.c b/targets/SIMU/USER/oaisim.c
index 83b2fece61..ff17ec8619 100644
--- a/targets/SIMU/USER/oaisim.c
+++ b/targets/SIMU/USER/oaisim.c
@@ -1478,11 +1478,14 @@ reset_opp_meas_oaisim (void)
       reset_meas (&eNB2UE[eNB_id][UE_id][0]->interp_time);
       reset_meas (&eNB2UE[eNB_id][UE_id][0]->interp_freq);
       reset_meas (&eNB2UE[eNB_id][UE_id][0]->convolution);
+      reset_meas (&eNB2UE[eNB_id][UE_id][0]->ziggurat);
       reset_meas (&UE2eNB[UE_id][eNB_id][0]->random_channel);
       reset_meas (&UE2eNB[UE_id][eNB_id][0]->interp_time);
       reset_meas (&UE2eNB[UE_id][eNB_id][0]->interp_freq);
       reset_meas (&UE2eNB[UE_id][eNB_id][0]->interp_freq_PRACH);
       reset_meas (&UE2eNB[UE_id][eNB_id][0]->convolution);
+      reset_meas (&UE2eNB[UE_id][eNB_id][0]->ziggurat);
+      reset_meas (&UE2eNB[UE_id][eNB_id][0]->ziggurat_PRACH);
       //Time consuming in Frequency analysis
       //Downlink
       reset_meas (&eNB2UE[eNB_id][UE_id][0]->DL_multipath_channel_freq);
@@ -1591,6 +1594,8 @@ print_opp_meas_oaisim (void)
                   "[DL][interp_freq]", &oaisim_stats, &oaisim_stats_f);
       print_meas (&eNB2UE[eNB_id][UE_id][0]->convolution,
                   "[DL][convolution]", &oaisim_stats, &oaisim_stats_f);
+      print_meas (&eNB2UE[eNB_id][UE_id][0]->ziggurat,
+                  "[DL][ziggurat]", &oaisim_stats, &oaisim_stats_f);
 
       print_meas (&UE2eNB[UE_id][eNB_id][0]->random_channel,
                   "[UL][random_channel]", &oaisim_stats, &oaisim_stats_f);
@@ -1602,6 +1607,10 @@ print_opp_meas_oaisim (void)
                   "[UL][interp_freq_PRACH]", &oaisim_stats, &oaisim_stats_f);
       print_meas (&UE2eNB[UE_id][eNB_id][0]->convolution,
                   "[UL][convolution]", &oaisim_stats, &oaisim_stats_f);
+      print_meas (&UE2eNB[UE_id][eNB_id][0]->ziggurat,
+                  "[UL][ziggurat]", &oaisim_stats, &oaisim_stats_f);
+      print_meas (&UE2eNB[UE_id][eNB_id][0]->ziggurat_PRACH,
+                  "[UL][ziggurat]", &oaisim_stats, &oaisim_stats_f);
 
       //Time consuming in Frequency analysis
       //Downlink
diff --git a/targets/SIMU/USER/oaisim_functions.c b/targets/SIMU/USER/oaisim_functions.c
index 8be4ddbe4f..0b8fc15d57 100644
--- a/targets/SIMU/USER/oaisim_functions.c
+++ b/targets/SIMU/USER/oaisim_functions.c
@@ -1701,8 +1701,8 @@ void init_ocm(void)
 			       0);
 	if (do_ofdm_mod)
 	{
-		random_channel(eNB2UE[eNB_id][UE_id][CC_id],abstraction_flag);//Find a(l)
-		freq_channel(eNB2UE[eNB_id][UE_id][CC_id],nb_rb,n_samples);//Find desc->chF
+		random_channel_freq(eNB2UE[eNB_id][UE_id][CC_id],abstraction_flag);//Find a(l)
+		freq_channel_SSE_float(eNB2UE[eNB_id][UE_id][CC_id],nb_rb,n_samples);//Find desc->chF
 	}
 
 	else
@@ -1722,8 +1722,8 @@ void init_ocm(void)
 			       0);
 	if (do_ofdm_mod)
 	{
-		random_channel(UE2eNB[UE_id][eNB_id][CC_id],abstraction_flag);//Find a(l)
-		freq_channel(UE2eNB[UE_id][eNB_id][CC_id],nb_rb,n_samples);//Find desc->chF
+		random_channel_freq(UE2eNB[UE_id][eNB_id][CC_id],abstraction_flag);//Find a(l)
+		freq_channel_SSE_float(UE2eNB[UE_id][eNB_id][CC_id],nb_rb,n_samples);//Find desc->chF
 	}
 	else
         	random_channel(UE2eNB[UE_id][eNB_id][CC_id],abstraction_flag);
-- 
2.26.2