Further improvements

additional 50% speed improvement with SIMD vectors

Further improvements
additional 50% speed improvement with SIMD vectors
fbd4df01 · Sakthivel Velumani · 82be3653 · fbd4df01 · fbd4df01 · fbd4df01
Commit fbd4df01 authored Aug 03, 2020 by Sakthivel Velumani
10 changed files
--- a/openair1/PHY/MODULATION/nr_modulation.c
+++ b/openair1/PHY/MODULATION/nr_modulation.c
@@ -20,6 +20,7 @@
 */

 #include "nr_modulation.h"
+#include "PHY/NR_REFSIG/nr_mod_table.h"

 extern short nr_mod_table[NR_MOD_TABLE_SIZE_SHORT];

@@ -30,16 +31,29 @@ void nr_modulation(uint32_t *in,
 {
  uint16_t offset;
  uint16_t mask = ((1<<mod_order)-1);
+  int32_t* nr_mod_table32;
+  int32_t* out32 = (int32_t*) out;
+  uint8_t* in_bytes = (uint8_t*) in;
  uint8_t idx;
+  uint16_t u = 1;
+  uint8_t shift_lut[3] = {0, 2, 4};
+    
+#if defined(__SSE2__)
+  __m128i *nr_mod_table128;
+  __m128i *out128;
+  __m64   *nr_mod_table64;
+  __m64   *out64;
+#endif

  offset = (mod_order==2)? NR_MOD_TABLE_QPSK_OFFSET : (mod_order==4)? NR_MOD_TABLE_QAM16_OFFSET : \
                    (mod_order==6)? NR_MOD_TABLE_QAM64_OFFSET: (mod_order==8)? NR_MOD_TABLE_QAM256_OFFSET : 0;

  LOG_D(PHY,"nr_modulation: length %d, mod_order %d\n",length,mod_order);

-  if (mod_order==6) {
-    uint16_t u = 1;
-    uint8_t shift_lut[3] = {0, 2, 4};
+  switch (mod_order) {
+
+  case 6:
+    nr_mod_table32 = (int32_t*) nr_mod_table;
    for (int i=0; i<length/mod_order; i++)
    {
      idx = ((in[i*mod_order/32]>>((i*mod_order)&0x1f)) & mask);
@@ -47,18 +61,42 @@ void nr_modulation(uint32_t *in,
        idx |= (in[(i*mod_order/32)+1]<<shift_lut[(u++)%3]) & 0x3f;
      else if (((i+1)*mod_order)==32*u) u++;

-      out[i<<1] = nr_mod_table[(offset+idx)<<1];
-      out[(i<<1)+1] = nr_mod_table[((offset+idx)<<1)+1];
+      out32[i] = nr_mod_table32[(offset+idx)];
    }
+    return;
+
+  case 8:
+    nr_mod_table32 = (int32_t*) nr_mod_table;
+    for (int i=0; i<length/8; i++)
+      out32[i] = nr_mod_table32[(offset+in_bytes[i])];
+    return;
+
+#if defined(__SSE2__)
+  case 2:
+    nr_mod_table128 = (__m128i*) nr_qpsk_byte_mod_table;
+    out128 = (__m128i*) out;
+    for (int i=0; i<length/8; i++)
+      out128[i] = nr_mod_table128[in_bytes[i]];
+    return;
+
+  case 4:
+    nr_mod_table64 = (__m64*) nr_qam16_byte_mod_table;
+    out64 = (__m64*) out;
+    for (int i=0; i<length/8; i++)
+      out64[i] = nr_mod_table64[in_bytes[i]];
+    return;
+#endif
+
+  default:
+    break;
  }
-  else {
-    for (int i=0; i<length/mod_order; i++)
-    {
-      idx = ((in[i*mod_order/32]>>((i*mod_order)&0x1f)) & mask);

-      out[i<<1] = nr_mod_table[(offset+idx)<<1];
-      out[(i<<1)+1] = nr_mod_table[((offset+idx)<<1)+1];
-    }
+  nr_mod_table32 = (int32_t*) nr_mod_table;
+  for (int i=0; i<length/mod_order; i++)
+  {
+    idx = ((in[i*mod_order/32]>>((i*mod_order)&0x1f)) & mask);
+
+    out32[i] = nr_mod_table32[(offset+idx)];
  }
 }


--- a/openair1/PHY/NR_REFSIG/nr_mod_table.h
+++ b/openair1/PHY/NR_REFSIG/nr_mod_table.h
--- a/openair1/PHY/NR_TRANSPORT/nr_dci.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_dci.c
@@ -161,7 +161,7 @@ uint8_t nr_generate_dci_top(nfapi_nr_dl_tti_pdcch_pdu *pdcch_pdu,
                            int16_t amp,
                            NR_DL_FRAME_PARMS frame_parms) {

-  int16_t mod_dmrs[NR_MAX_CSET_DURATION][NR_MAX_PDCCH_DMRS_LENGTH>>1]; // 3 for the max coreset duration
+  int16_t mod_dmrs[NR_MAX_CSET_DURATION][NR_MAX_PDCCH_DMRS_LENGTH>>1] __attribute__((aligned(128))); // 3 for the max coreset duration
  uint16_t cset_start_sc;
  uint8_t cset_start_symb, cset_nsymb;
  int k,l,k_prime,dci_idx, dmrs_idx;
@@ -260,7 +260,7 @@ uint8_t nr_generate_dci_top(nfapi_nr_dl_tti_pdcch_pdu *pdcch_pdu,
 	   scrambled_output[6], scrambled_output[7], scrambled_output[8], scrambled_output[9], scrambled_output[10],scrambled_output[11] );
 #endif
    /// QPSK modulation
-    int16_t mod_dci[NR_MAX_DCI_SIZE>>1];
+    int16_t mod_dci[NR_MAX_DCI_SIZE>>1] __attribute__((aligned(128)));
    nr_modulation(scrambled_output, encoded_length, DMRS_MOD_ORDER, mod_dci); //Qm = 2 as DMRS is QPSK modulated
 #ifdef DEBUG_DCI
    

--- a/openair1/SIMULATION/NR_PHY/dlschsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlschsim.c
@@ -37,7 +37,6 @@
 #include "PHY/INIT/phy_init.h"
 #include "PHY/MODULATION/modulation_eNB.h"
 #include "PHY/MODULATION/modulation_UE.h"
-#include "PHY/NR_REFSIG/nr_mod_table.h"
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
 #include "PHY/NR_TRANSPORT/nr_dlsch.h"
 #include "PHY/NR_TRANSPORT/nr_transport_proto.h"

--- a/openair1/SIMULATION/NR_PHY/dlsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlsim.c
@@ -39,7 +39,6 @@
 #include "PHY/INIT/phy_init.h"
 #include "PHY/MODULATION/modulation_eNB.h"
 #include "PHY/MODULATION/modulation_UE.h"
-#include "PHY/NR_REFSIG/nr_mod_table.h"
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
 #include "PHY/NR_TRANSPORT/nr_transport_proto.h"
 #include "PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h"

--- a/openair1/SIMULATION/NR_PHY/pbchsim.c
+++ b/openair1/SIMULATION/NR_PHY/pbchsim.c
@@ -33,7 +33,6 @@
 #include "PHY/defs_nr_UE.h"
 #include "PHY/defs_gNB.h"
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
-#include "PHY/NR_REFSIG/nr_mod_table.h"
 #include "PHY/MODULATION/modulation_eNB.h"
 #include "PHY/MODULATION/modulation_UE.h"
 #include "PHY/INIT/phy_init.h"

--- a/openair1/SIMULATION/NR_PHY/prachsim.c
+++ b/openair1/SIMULATION/NR_PHY/prachsim.c
@@ -37,7 +37,6 @@
 #include "SCHED_NR_UE/phy_frame_config_nr.h"
 #include "PHY/phy_vars_nr_ue.h"
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
-#include "PHY/NR_REFSIG/nr_mod_table.h"
 #include "PHY/MODULATION/modulation_eNB.h"
 #include "PHY/MODULATION/modulation_UE.h"
 #include "PHY/INIT/phy_init.h"

--- a/openair1/SIMULATION/NR_PHY/pucchsim.c
+++ b/openair1/SIMULATION/NR_PHY/pucchsim.c
@@ -33,7 +33,6 @@
 #include "PHY/defs_nr_UE.h"
 #include "PHY/defs_gNB.h"
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
-#include "PHY/NR_REFSIG/nr_mod_table.h"
 #include "PHY/MODULATION/modulation_eNB.h"
 #include "PHY/MODULATION/modulation_UE.h"
 #include "PHY/INIT/phy_init.h"

--- a/openair1/SIMULATION/NR_PHY/ulschsim.c
+++ b/openair1/SIMULATION/NR_PHY/ulschsim.c
@@ -34,7 +34,6 @@
 #include "PHY/defs_gNB.h"
 #include "PHY/INIT/phy_init.h"
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
-#include "PHY/NR_REFSIG/nr_mod_table.h"
 #include "PHY/MODULATION/modulation_eNB.h"
 #include "PHY/MODULATION/modulation_UE.h"
 #include "PHY/NR_TRANSPORT/nr_transport_proto.h"

--- a/openair1/SIMULATION/NR_PHY/ulsim.c
+++ b/openair1/SIMULATION/NR_PHY/ulsim.c
@@ -37,7 +37,6 @@
 #include "PHY/MODULATION/modulation_UE.h"
 #include "PHY/MODULATION/nr_modulation.h"
 #include "PHY/NR_REFSIG/dmrs_nr.h"
-#include "PHY/NR_REFSIG/nr_mod_table.h"
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
 #include "PHY/NR_TRANSPORT/nr_dlsch.h"
 #include "PHY/NR_TRANSPORT/nr_sch_dmrs.h"