fix CI discovered bug in avx2 rotate

980bb242 · Laurent Thomas · laurent · bf219fc2 · 980bb242 · 980bb242
Commit 980bb242 authored Jun 24, 2022 by Laurent Thomas Committed by laurent Jun 27, 2022
8 changed files
--- a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
@@ -41,10 +41,10 @@ extern int16_t *ul_ref_sigs_rx[30][2][34];

 int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
                                  L1_rxtx_proc_t *proc,
-                        				  LTE_eNB_ULSCH_t * ulsch,
-				                          int32_t **ul_ch_estimates,
-				                          int32_t **ul_ch_estimates_time,
-				                          int32_t **rxdataF_ext,
+				  LTE_eNB_ULSCH_t * ulsch,
+				  int32_t **ul_ch_estimates,
+				  int32_t **ul_ch_estimates_time,
+				  int32_t **rxdataF_ext,
                                  module_id_t UE_id,
                                  unsigned char l,
                                  unsigned char Ns) {
@@ -88,7 +88,7 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
  }

  uint16_t N_rb_alloc = ulsch->harq_processes[harq_pid]->nb_rb;
-  int32_t tmp_estimates[N_rb_alloc*12] __attribute__((aligned(16)));
+  int32_t tmp_estimates[N_rb_alloc*12] __attribute__((aligned(32)));
  Msc_RS = N_rb_alloc*12;
  cyclic_shift = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift +
                  ulsch->harq_processes[harq_pid]->n_DMRS2 +
@@ -334,14 +334,14 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
            current_phase2 = cmin(abs(current_phase2),127);
            //          msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]);
            // rotate channel estimates by estimated phase
-            rotate_cpx_vector((int16_t *) ul_ch1,
-                              &ru1[2*current_phase1],
-                              (int16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
+            rotate_cpx_vector((c16_t *) ul_ch1,
+                              (c16_t *)&ru1[2*current_phase1],
+                              (c16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
                              Msc_RS,
                              15);
-            rotate_cpx_vector((int16_t *) ul_ch2,
-                              &ru2[2*current_phase2],
-                              (int16_t *) &tmp_estimates[0],
+            rotate_cpx_vector((c16_t *) ul_ch2,
+                              (c16_t *)&ru2[2*current_phase2],
+                              (c16_t *) &tmp_estimates[0],
                              Msc_RS,
                              15);
            // Combine the two rotated estimates
@@ -657,14 +657,14 @@ int32_t lte_ul_channel_estimation_RRU(LTE_DL_FRAME_PARMS *frame_parms,
          current_phase2 = cmin(abs(current_phase2),127);
          //          msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]);
          // rotate channel estimates by estimated phase
-          rotate_cpx_vector((int16_t *) ul_ch1,
-                            &ru1[2*current_phase1],
-                            (int16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
+          rotate_cpx_vector((c16_t *) ul_ch1,
+                            (c16_t *)&ru1[2*current_phase1],
+                            (c16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
                            Msc_RS,
                            15);
-          rotate_cpx_vector((int16_t *) ul_ch2,
-                            &ru2[2*current_phase2],
-                            (int16_t *) &tmp_estimates[0],
+          rotate_cpx_vector((c16_t *) ul_ch2,
+                            (c16_t *)&ru2[2*current_phase2],
+                            (c16_t *) &tmp_estimates[0],
                            Msc_RS,
                            15);
          // Combine the two rotated estimates

--- a/openair1/PHY/MODULATION/ofdm_mod.c
+++ b/openair1/PHY/MODULATION/ofdm_mod.c
@@ -360,9 +360,9 @@ void apply_nr_rotation(NR_DL_FRAME_PARMS *fp,
      symbol_rotation[2 * (sidx + first_symbol + symb_offset)],
      symbol_rotation[1 + 2 * (sidx + first_symbol + symb_offset)]);

-    rotate_cpx_vector(trxdata + (sidx * length * 2),
-                      &symbol_rotation[2 * (sidx + first_symbol + symb_offset)],
-                      trxdata + (sidx * length * 2),
+    rotate_cpx_vector((c16_t*)trxdata + (sidx * length * 2),
+                      (c16_t*)&symbol_rotation[2 * (sidx + first_symbol + symb_offset)],
+		      (c16_t*) trxdata + (sidx * length * 2),
                      length,
                      15);
  }

--- a/openair1/PHY/MODULATION/slot_fep_nr.c
+++ b/openair1/PHY/MODULATION/slot_fep_nr.c
@@ -98,18 +98,18 @@ int nr_slot_fep(PHY_VARS_NR_UE *ue,
    stop_meas(&ue->rx_dft_stats);

    int symb_offset = (Ns%frame_parms->slots_per_subframe)*frame_parms->symbols_per_slot;
-    int32_t rot2 = ((uint32_t*)frame_parms->symbol_rotation[0])[symbol+symb_offset];
-    ((int16_t*)&rot2)[1]=-((int16_t*)&rot2)[1];
+    c16_t rot2 = ((c16_t*)frame_parms->symbol_rotation[0])[symbol+symb_offset];
+    rot2.i=-rot2.i;

 #ifdef DEBUG_FEP
    //  if (ue->frame <100)
    printf("slot_fep: slot %d, symbol %d rx_offset %u, rotation symbol %d %d.%d\n", Ns,symbol, rx_offset,
-	   symbol+symb_offset,((int16_t*)&rot2)[0],((int16_t*)&rot2)[1]);
+	   symbol+symb_offset,rot2.r,rot2.i);
 #endif

-    rotate_cpx_vector((int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
-		      (int16_t*)&rot2,
-		      (int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
+    rotate_cpx_vector((c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
+		      &rot2,
+		      (c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
 		      frame_parms->ofdm_symbol_size,
 		      15);

@@ -214,18 +214,18 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue,
    stop_meas(&ue->rx_dft_stats);

    int symb_offset = (Ns%frame_parms->slots_per_subframe)*frame_parms->symbols_per_slot;
-    int32_t rot2 = ((uint32_t*)frame_parms->symbol_rotation[0])[symbol + symb_offset];
-    ((int16_t*)&rot2)[1]=-((int16_t*)&rot2)[1];
+    c16_t rot2 = ((c16_t*)frame_parms->symbol_rotation[0])[symbol + symb_offset];
+    rot2.i=-rot2.i;

 #ifdef DEBUG_FEP
    //  if (ue->frame <100)
    printf("slot_fep: slot %d, symbol %d rx_offset %u, rotation symbol %d %d.%d\n", Ns,symbol, rx_offset,
-	   symbol+symb_offset,((int16_t*)&rot2)[0],((int16_t*)&rot2)[1]);
+	   symbol+symb_offset,rot2.r,rot2.i);
 #endif

-    rotate_cpx_vector((int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
-		      (int16_t*)&rot2,
-		      (int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
+    rotate_cpx_vector((c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
+		      &rot2,
+		      (c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
 		      frame_parms->ofdm_symbol_size,
 		      15);
  }
@@ -310,12 +310,12 @@ void apply_nr_rotation_ul(NR_DL_FRAME_PARMS *frame_parms,

  for (int symbol=first_symbol;symbol<nsymb;symbol++) {
    
-    uint32_t rot2 = ((uint32_t*)frame_parms->symbol_rotation[1])[symbol + symb_offset];
-    ((int16_t*)&rot2)[1]=-((int16_t*)&rot2)[1];
-    LOG_D(PHY,"slot %d, symb_offset %d rotating by %d.%d\n",slot,symb_offset,((int16_t*)&rot2)[0],((int16_t*)&rot2)[1]);
-    rotate_cpx_vector((int16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)],
-		      (int16_t*)&rot2,
-		      (int16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)],
+    c16_t rot2 = ((c16_t*)frame_parms->symbol_rotation[1])[symbol + symb_offset];
+    rot2.i=-rot2.i;
+    LOG_D(PHY,"slot %d, symb_offset %d rotating by %d.%d\n",slot,symb_offset,rot2.r,rot2.i);
+    rotate_cpx_vector((c16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)],
+		      &rot2,
+		      (c16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)],
 		      length,
 		      15);


--- a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
+++ b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
@@ -854,7 +854,7 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
                              uint32_t nb_re_pusch)
 {
  //#define DEBUG_UL_PTRS 1
-  int16_t *phase_per_symbol = NULL;
+  c16_t *phase_per_symbol = NULL;
  int32_t *ptrs_re_symbol   = NULL;
  int8_t   ret = 0;

@@ -871,20 +871,20 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
  uint8_t  *ptrsReOffset    = &rel15_ul->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset;
  /* loop over antennas */
  for (int aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
-    phase_per_symbol = (int16_t*)gNB->pusch_vars[ulsch_id]->ptrs_phase_per_slot[aarx];
+    phase_per_symbol = (c16_t*)gNB->pusch_vars[ulsch_id]->ptrs_phase_per_slot[aarx];
    ptrs_re_symbol = &gNB->pusch_vars[ulsch_id]->ptrs_re_per_slot;
    *ptrs_re_symbol = 0;
-    phase_per_symbol[(2*symbol)+1] = 0; // Imag
+    phase_per_symbol[symbol].i = 0; 
    /* set DMRS estimates to 0 angle with magnitude 1 */
    if(is_dmrs_symbol(symbol,*dmrsSymbPos)) {
      /* set DMRS real estimation to 32767 */
-      phase_per_symbol[2*symbol]=(int16_t)((1<<15)-1); // 32767
+      phase_per_symbol[symbol].r=INT16_MAX; // 32767
 #ifdef DEBUG_UL_PTRS
-      printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[2*symbol],phase_per_symbol[(2*symbol)+1]);
+      printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[symbol].r,phase_per_symbol[symbol].i);
 #endif
    }
    else {// real ptrs value is set to 0
-      phase_per_symbol[2*symbol] = 0; // Real
+      phase_per_symbol[symbol].r = 0; 
    }

    if(symbol == *startSymbIndex) {
@@ -909,7 +909,7 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
                             symbol,frame_parms->ofdm_symbol_size,
                             (int16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(symbol * nb_re_pusch)],
                             gNB->nr_gold_pusch_dmrs[rel15_ul->scid][nr_tti_rx][symbol],
-                             &phase_per_symbol[2* symbol],
+                             (int16_t*)&phase_per_symbol[symbol],
                             ptrs_re_symbol);
    }
    /* For last OFDM symbol at each antenna perform interpolation and compensation for the slot*/
@@ -919,7 +919,7 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
      /*------------------------------------------------------------------------------------------------------- */
      /* If L-PTRS is > 0 then we need interpolation */
      if(*L_ptrs > 0) {
-        ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, phase_per_symbol, *startSymbIndex, *nbSymb);
+        ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, (int16_t*)phase_per_symbol, *startSymbIndex, *nbSymb);
        if(ret != 0) {
          LOG_W(PHY,"[PTRS] Compensation is skipped due to error in PTRS slot processing !!\n");
        }
@@ -938,11 +938,11 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
        /* Skip rotation if the slot processing is wrong */
        if((!is_dmrs_symbol(i,*dmrsSymbPos)) && (ret == 0)) {
 #ifdef DEBUG_UL_PTRS
-          printf("[PHY][UL][PTRS]: Rotate Symbol %2d with  %d + j* %d\n", i, phase_per_symbol[2* i],phase_per_symbol[(2* i) +1]);
+          printf("[PHY][UL][PTRS]: Rotate Symbol %2d with  %d + j* %d\n", i, phase_per_symbol[i].r,phase_per_symbol[i].i);
 #endif
-          rotate_cpx_vector((int16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)],
-                            &phase_per_symbol[2* i],
-                            (int16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)],
+          rotate_cpx_vector((c16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)],
+                            &phase_per_symbol[i],
+                            (c16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)],
                            ((*nb_rb) * NR_NB_SC_PER_RB), 15);
        }// if not DMRS Symbol
      }// symbol loop

--- a/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c
+++ b/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c
@@ -1657,7 +1657,6 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue,
                              RX_type_t rx_type)
 {
  //#define DEBUG_DL_PTRS 1
-  int16_t *phase_per_symbol = NULL;
  int32_t *ptrs_re_symbol = NULL;
  int8_t   ret = 0;
  /* harq specific variables */
@@ -1701,20 +1700,20 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue,
  }
  /* loop over antennas */
  for (int aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
-    phase_per_symbol = (int16_t*)pdsch_vars[gNB_id]->ptrs_phase_per_slot[aarx];
+    c16_t *phase_per_symbol = (c16_t*)pdsch_vars[gNB_id]->ptrs_phase_per_slot[aarx];
    ptrs_re_symbol = (int32_t*)pdsch_vars[gNB_id]->ptrs_re_per_slot[aarx];
    ptrs_re_symbol[symbol] = 0;
-    phase_per_symbol[(2*symbol)+1] = 0; // Imag
+    phase_per_symbol[symbol].i = 0; // Imag
    /* set DMRS estimates to 0 angle with magnitude 1 */
    if(is_dmrs_symbol(symbol,*dmrsSymbPos)) {
      /* set DMRS real estimation to 32767 */
-      phase_per_symbol[2*symbol]=(int16_t)((1<<15)-1); // 32767
+      phase_per_symbol[symbol].r=INT16_MAX; // 32767
 #ifdef DEBUG_DL_PTRS
-      printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[2*symbol],phase_per_symbol[(2*symbol)+1]);
+      printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[symbol].r,phase_per_symbol[symbol].i);
 #endif
    }
    else { // real ptrs value is set to 0
-      phase_per_symbol[2*symbol] = 0; // Real
+      phase_per_symbol[symbol].r = 0; // Real
    }

    if(dlsch0_harq->status == ACTIVE) {
@@ -1740,7 +1739,7 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue,
                               symbol,frame_parms->ofdm_symbol_size,
                               (int16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(symbol * nb_re_pdsch)],
                               ue->nr_gold_pdsch[gNB_id][nr_slot_rx][symbol][0],
-                               &phase_per_symbol[2* symbol],
+                               (int16_t*)&phase_per_symbol[symbol],
                               &ptrs_re_symbol[symbol]);
      }
    }// HARQ 0
@@ -1752,7 +1751,7 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue,
      /*------------------------------------------------------------------------------------------------------- */
      /* If L-PTRS is > 0 then we need interpolation */
      if(*L_ptrs > 0) {
-        ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, phase_per_symbol, *startSymbIndex, *nbSymb);
+        ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, (int16_t*)phase_per_symbol, *startSymbIndex, *nbSymb);
        if(ret != 0) {
          LOG_W(PHY,"[PTRS] Compensation is skipped due to error in PTRS slot processing !!\n");
        }
@@ -1771,11 +1770,11 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue,
        /* Skip rotation if the slot processing is wrong */
        if((!is_dmrs_symbol(i,*dmrsSymbPos)) && (ret == 0)) {
 #ifdef DEBUG_DL_PTRS
-          printf("[PHY][DL][PTRS]: Rotate Symbol %2d with  %d + j* %d\n", i, phase_per_symbol[2* i],phase_per_symbol[(2* i) +1]);
+          printf("[PHY][DL][PTRS]: Rotate Symbol %2d with  %d + j* %d\n", i, phase_per_symbol[i].r,phase_per_symbol[i].i);
 #endif
-          rotate_cpx_vector((int16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)],
-                            &phase_per_symbol[2* i],
-                            (int16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)],
+          rotate_cpx_vector((c16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)],
+                            &phase_per_symbol[i],
+                            (c16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)],
                            ((*nb_rb) * NR_NB_SC_PER_RB), 15);
        }// if not DMRS Symbol
      }// symbol loop

--- a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
@@ -597,17 +597,16 @@ uint8_t nr_ue_pusch_common_procedures(PHY_VARS_NR_UE *UE,
  int symb_offset = (slot%frame_parms->slots_per_subframe)*frame_parms->symbols_per_slot;
  for(ap = 0; ap < n_antenna_ports; ap++) {
    for (int s=0;s<NR_NUMBER_OF_SYMBOLS_PER_SLOT;s++){
-
+      c16_t rot=((c16_t*)frame_parms->symbol_rotation[1])[s + symb_offset];
      LOG_D(PHY,"In %s: rotating txdataF symbol %d (%d) => (%d.%d)\n",
-        __FUNCTION__,
-        s,
-        s + symb_offset,
-        frame_parms->symbol_rotation[1][2 * (s + symb_offset)],
-        frame_parms->symbol_rotation[1][1 + (2 * (s + symb_offset))]);
-
-      rotate_cpx_vector((int16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s],
-                        &frame_parms->symbol_rotation[1][2 * (s + symb_offset)],
-                        (int16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s],
+	    __FUNCTION__,
+	    s,
+	    s + symb_offset,
+	    rot.r, rot.i);
+
+      rotate_cpx_vector((c16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s],
+                        &rot,
+                        (c16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s],
                        frame_parms->ofdm_symbol_size,
                        15);
    }

--- a/openair1/PHY/TOOLS/cmult_sv.c
+++ b/openair1/PHY/TOOLS/cmult_sv.c
@@ -145,40 +145,9 @@ void multadd_real_four_symbols_vector_complex_scalar(int16_t *x,

 }
 #ifdef __AVX2__
-void rotate_cpx_vector(int16_t *x,
-                      int16_t *alpha,
-                      int16_t *y,
-                      uint32_t N,
-                      uint16_t output_shift)
-{
-  // multiply a complex vector with a complex value (alpha)
-  // stores result in y
-  // N is the number of complex numbers
-  // output_shift reduces the result of the multiplication by this number of bits
-  AssertFatal(N%8==0, "To be developped");
-  const c16_t for_re={alpha[0], -alpha[1]};
-  __m256i const alpha_for_real =  _mm256_set1_epi32(*(uint32_t*)&for_re);
-  const c16_t for_im={alpha[1], alpha[0]};
-  __m256i const alpha_for_im= _mm256_set1_epi32(*(uint32_t*)&for_im);
-  __m256i const perm_mask =
-    _mm256_set_epi8(31,30,23,22,29,28,21,20,27,26,19,18,25,24,17,16,
-		    15,14,7,6,13,12,5,4,11,10,3,2,9,8,1,0);
- __m256i* xd= (__m256i*)x;
-  const __m256i *end=xd+N/8;
-  for( __m256i* yd = (__m256i *)y; xd<end ; yd++, xd++) {
-    const __m256i xre = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_real),
-					  output_shift);
-    const __m256i xim = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_im),
-					  output_shift);
-    // a bit faster than unpacklo+unpackhi+packs
-    const __m256i tmp=_mm256_packs_epi32(xre,xim);
-    *yd=_mm256_shuffle_epi8(tmp,perm_mask);
-  }
-}
-#else 
-void rotate_cpx_vector(int16_t *x,
-                      int16_t *alpha,
-                      int16_t *y,
+void rotate_cpx_vector(c16_t *x,
+                      c16_t *alpha,
+                      c16_t *y,
                      uint32_t N,
                      uint16_t output_shift)
 {
@@ -206,28 +175,28 @@ void rotate_cpx_vector(int16_t *x,
  __m128i shift = _mm_cvtsi32_si128(output_shift);
  register simd_q15_t m0,m1,m2,m3;

-  ((int16_t *)&alpha_128)[0] = alpha[0];
-  ((int16_t *)&alpha_128)[1] = -alpha[1];
-  ((int16_t *)&alpha_128)[2] = alpha[1];
-  ((int16_t *)&alpha_128)[3] = alpha[0];
-  ((int16_t *)&alpha_128)[4] = alpha[0];
-  ((int16_t *)&alpha_128)[5] = -alpha[1];
-  ((int16_t *)&alpha_128)[6] = alpha[1];
-  ((int16_t *)&alpha_128)[7] = alpha[0];
+  ((int16_t *)&alpha_128)[0] = alpha->r;
+  ((int16_t *)&alpha_128)[1] = -alpha->i;
+  ((int16_t *)&alpha_128)[2] = alpha->i;
+  ((int16_t *)&alpha_128)[3] = alpha->r;
+  ((int16_t *)&alpha_128)[4] = alpha->r;
+  ((int16_t *)&alpha_128)[5] = -alpha->i;
+  ((int16_t *)&alpha_128)[6] = alpha->i;
+  ((int16_t *)&alpha_128)[7] = alpha->r;
 #elif defined(__arm__)
  int32x4_t shift;
  int32x4_t ab_re0,ab_re1,ab_im0,ab_im1,re32,im32;
  int16_t reflip[8]  __attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1};
  int32x4x2_t xtmp;

-  ((int16_t *)&alpha_128)[0] = alpha[0];
-  ((int16_t *)&alpha_128)[1] = alpha[1];
-  ((int16_t *)&alpha_128)[2] = alpha[0];
-  ((int16_t *)&alpha_128)[3] = alpha[1];
-  ((int16_t *)&alpha_128)[4] = alpha[0];
-  ((int16_t *)&alpha_128)[5] = alpha[1];
-  ((int16_t *)&alpha_128)[6] = alpha[0];
-  ((int16_t *)&alpha_128)[7] = alpha[1];
+  ((int16_t *)&alpha_128)[0] = alpha->r;
+  ((int16_t *)&alpha_128)[1] = alpha->i;
+  ((int16_t *)&alpha_128)[2] = alpha->r;
+  ((int16_t *)&alpha_128)[3] = alpha->i;
+  ((int16_t *)&alpha_128)[4] = alpha->r;
+  ((int16_t *)&alpha_128)[5] = alpha->i;
+  ((int16_t *)&alpha_128)[6] = alpha->r;
+  ((int16_t *)&alpha_128)[7] = alpha->i;
  int16x8_t bflip = vrev32q_s16(alpha_128);
  int16x8_t bconj = vmulq_s16(alpha_128,*(int16x8_t *)reflip);
  shift = vdupq_n_s32(-output_shift);
@@ -370,7 +339,7 @@ main ()
  int16_t input[256] __attribute__((aligned(16)));
  int16_t input2[256] __attribute__((aligned(16)));
  int16_t output[256] __attribute__((aligned(16)));
-  int16_t alpha[2];
+  c16_t alpha;

  int i;

@@ -408,8 +377,8 @@ main ()
  input2[14] = 1000;
  input2[15] = 2000;

-  alpha[0]=32767;
-  alpha[1]=0;
+  alpha->r=32767;
+  alpha->i=0;

  //mult_cpx_vector(input,input2,output,L,0);
  rotate_cpx_vector_norep(input,alpha,input,L,15);

--- a/openair1/PHY/TOOLS/tools_defs.h
+++ b/openair1/PHY/TOOLS/tools_defs.h
@@ -453,7 +453,7 @@ idft_size_idx_t get_idft(int ofdm_symbol_size)
 }


-/*!\fn int32_t rotate_cpx_vector(int16_t *x,int16_t *alpha,int16_t *y,uint32_t N,uint16_t output_shift)
+/*!\fn int32_t rotate_cpx_vector(c16_t *x,c16_t *alpha,c16_t *y,uint32_t N,uint16_t output_shift)
 This function performs componentwise multiplication of a vector with a complex scalar.
 @param x Vector input (Q1.15)  in the format  |Re0  Im0|,......,|Re(N-1) Im(N-1)|
 @param alpha Scalar input (Q1.15) in the format  |Re0 Im0|
@@ -463,11 +463,11 @@ This function performs componentwise multiplication of a vector with a complex s

 The function implemented is : \f$\mathbf{y} = \alpha\mathbf{x}\f$
 */
-void rotate_cpx_vector(int16_t *x,
-                          int16_t *alpha,
-                          int16_t *y,
-                          uint32_t N,
-                          uint16_t output_shift);
+void rotate_cpx_vector(c16_t *x,
+		       c16_t *alpha,
+		       c16_t *y,
+		       uint32_t N,
+		       uint16_t output_shift);


 //cadd_sv.c