chore / simplify the code and add the comment

- simplify the buffer alignment - add the comment for llr - clean unused variable

chore / simplify the code and add the comment
- simplify the buffer alignment - add the comment for llr - clean unused variable
fc2432e9 · Tsung Yu Chan · 0263b4a8 · fc2432e9 · fc2432e9 · fc2432e9
Commit fc2432e9 authored Sep 25, 2023 by Tsung Yu Chan
6 changed files
--- a/openair1/PHY/INIT/nr_init.c
+++ b/openair1/PHY/INIT/nr_init.c
@@ -686,9 +686,7 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB)
  int n_buf = Prx*max_ul_mimo_layers;

  int nb_re_pusch = N_RB_UL * NR_NB_SC_PER_RB;
-  int nb_re_pusch2 = nb_re_pusch;
-  if (nb_re_pusch & 7)
-    nb_re_pusch2 += 8 - (nb_re_pusch & 7);
+  int nb_re_pusch2 = (nb_re_pusch + 7) & ~7;

  gNB->pusch_vars = (NR_gNB_PUSCH *)malloc16_clear(gNB->max_nb_pusch * sizeof(NR_gNB_PUSCH));
  for (int ULSCH_id = 0; ULSCH_id < gNB->max_nb_pusch; ULSCH_id++) {

--- a/openair1/PHY/NR_ESTIMATION/nr_freq_equalization.c
+++ b/openair1/PHY/NR_ESTIMATION/nr_freq_equalization.c
@@ -26,7 +26,7 @@
 // Reference of openair1/PHY/LTE_ESTIMATION/freq_equalization.c

 // This is 4096/(1:4096) in simde__m128i format
-simde__m128i nr_inv_ch[4096];/* = {0, 4096/1, 4096/2, 4096/3, 4096/4...}*/
+static simde__m128i nr_inv_ch[4096]; /* = {0, 4096/1, 4096/2, 4096/3, 4096/4...}*/

 void nr_init_fde() 
 {
@@ -51,8 +51,7 @@ void nr_freq_equalization (NR_DL_FRAME_PARMS *frame_parms,
  AssertFatal(Msc_RS <= frame_parms->N_RB_UL*12, "Msc_RS %d >= %d\n",
              Msc_RS, frame_parms->N_RB_UL*12);

-  for (uint16_t re = 0; re < (Msc_RS >> 2); re++) 
-  {
+  for (uint16_t re = 0; re < (Msc_RS >> 2); re++) {
    int16_t amp = (*((int16_t*)&ul_ch_mag128[re]));

    if (amp > 4095)
@@ -62,12 +61,10 @@ void nr_freq_equalization (NR_DL_FRAME_PARMS *frame_parms,

    if (Qm == 4)
      ul_ch_mag128[re]  = simde_mm_set1_epi16(324);  // this is 512*2/sqrt(10)
-    else if (Qm == 6)
-    {
+    else if (Qm == 6) {
      ul_ch_mag128[re]  = simde_mm_set1_epi16(316);  // this is 512*4/sqrt(42)
      ul_ch_magb128[re] = simde_mm_set1_epi16(158);  // this is 512*2/sqrt(42)
-    }
-    else if(Qm != 2)
+    } else if(Qm != 2)
      AssertFatal(1, "nr_freq_equalization(), Qm = %d, should be 2, 4 or 6. symbol=%d, Msc_RS=%d\n", Qm, symbol, Msc_RS);
  }
 }
--- a/openair1/PHY/NR_TRANSPORT/nr_scrambling.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_scrambling.c
@@ -81,17 +81,16 @@ void nr_codeword_unscrambling_init(int16_t *s2, uint32_t size, uint8_t q, uint32
 {
  uint32_t x1;
  uint32_t x2 = (n_RNTI << 15) + (q << 14) + Nid;
-  uint32_t s = 0;
+  simde__m128i *s128=(simde__m128i *)s2;

+  uint32_t s = lte_gold_generic(&x1, &x2, 1);
  uint8_t *s8=(uint8_t *)&s;
-  s = lte_gold_generic(&x1, &x2, 1);
-  simde__m128i *s128=(simde__m128i *)s2;

-  for (int i = 0, j = 0; i < ((size >> 5) + ((size & 0x1f) > 0 ? 1 : 0)); i++, j += 4) {
-    s128[j]   = byte2m128i[s8[0]];
-    s128[j+1] = byte2m128i[s8[1]];
-    s128[j+2] = byte2m128i[s8[2]];
-    s128[j+3] = byte2m128i[s8[3]];
+  for (int i = 0; i < (size +31) >> 5; i++) {
+    *s128++ = byte2m128i[s8[0]];
+    *s128++ = byte2m128i[s8[1]];
+    *s128++ = byte2m128i[s8[2]];
+    *s128++ = byte2m128i[s8[3]];
    s = lte_gold_generic(&x1, &x2, 0);
  }
 }
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
@@ -1277,11 +1277,7 @@ static void inner_rx (PHY_VARS_gNB *gNB,
  int nb_layer = rel15_ul->nrOfLayers;
  int nb_rx_ant = frame_parms->nb_antennas_rx;
  int dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
-
-  int buffer_length = rel15_ul->rb_size * NR_NB_SC_PER_RB;
-  if (buffer_length & 7)
-    buffer_length += (8 - (buffer_length & 7));
-  
+  int buffer_length = (rel15_ul->rb_size * NR_NB_SC_PER_RB + 7) & ~7;
  c16_t rxFext[nb_rx_ant][buffer_length] __attribute__((aligned(32)));
  c16_t chFext[nb_layer][nb_rx_ant][buffer_length] __attribute__((aligned(32)));


--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
@@ -65,25 +65,28 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
 {
  simde__m256i *rxF_256 = (simde__m256i*)rxdataF_comp;
  simde__m256i *ch_mag = (simde__m256i*)ul_ch_mag;
-  simde__m64 *llr_64 = (simde__m64*)ulsch_llr;
+  int64_t *llr_64 = (int64_t*)ulsch_llr;

  simde__m256i xmm0, xmm1, xmm2;

-  for (int i = 0; i < ((nb_re>>3) + ((nb_re&7) ? 1 : 0)); i++) {
-    xmm0 = simde_mm256_abs_epi16(rxF_256[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
+  for (int i = 0; i < ((nb_re + 7) >> 3); i++) {
+    xmm0 = simde_mm256_abs_epi16(rxF_256[i]);       // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
    xmm0 = simde_mm256_subs_epi16(ch_mag[i], xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
 
-    xmm1 = simde_mm256_unpacklo_epi32(rxF_256[i], xmm0); // llr128[0] contains the llrs of the 1st,2nd,5th and 6th REs
-    xmm2 = simde_mm256_unpackhi_epi32(rxF_256[i], xmm0); // llr128[1] contains the llrs of the 3rd, 4th, 7th and 8th REs
+    xmm1 = simde_mm256_unpacklo_epi32(rxF_256[i], xmm0);
+    xmm2 = simde_mm256_unpackhi_epi32(rxF_256[i], xmm0);
+
+    // xmm1 |1st 2ed 3rd 4th  9th 10th 13rd 14th|
+    // xmm2 |5th 6th 7th 8th 11st 12ed 15th 16th|
    
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 0);
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 1);
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 0);
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 1);
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 2);
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 3);
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 2);
-    *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 3);
+    *llr_64++ = simde_mm256_extract_epi64(xmm1, 0);
+    *llr_64++ = simde_mm256_extract_epi64(xmm1, 1);
+    *llr_64++ = simde_mm256_extract_epi64(xmm2, 0);
+    *llr_64++ = simde_mm256_extract_epi64(xmm2, 1);
+    *llr_64++ = simde_mm256_extract_epi64(xmm1, 2);
+    *llr_64++ = simde_mm256_extract_epi64(xmm1, 3);
+    *llr_64++ = simde_mm256_extract_epi64(xmm2, 2);
+    *llr_64++ = simde_mm256_extract_epi64(xmm2, 3);
  }
 }

@@ -106,13 +109,16 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,

  int32_t *llr_32 = (int32_t *)ulsch_llr;

-  for (int i = 0; i < ((nb_re>>3) + ((nb_re&7) ? 1 : 0)); i++) {
+  for (int i = 0; i < ((nb_re + 7) >> 3); i++) {
    xmm0 = rxF[i];
-    xmm1 = simde_mm256_abs_epi16(xmm0);
-    xmm1 = simde_mm256_subs_epi16(ch_maga[i], xmm1);
+    xmm1 = simde_mm256_abs_epi16(xmm0);              // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
+    xmm1 = simde_mm256_subs_epi16(ch_maga[i], xmm1); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
    xmm2 = simde_mm256_abs_epi16(xmm1);
    xmm2 = simde_mm256_subs_epi16(ch_magb[i], xmm2);
-    
+    // xmm0 |1st 4th 7th 10th 13th 16th 19th 22ed|
+    // xmm1 |2ed 5th 8th 11th 14th 17th 20th 23rd|
+    // xmm2 |3rd 6th 9th 12th 15th 18th 21st 24th|
+
    *llr_32++ = simde_mm256_extract_epi32(xmm0,0);
    *llr_32++ = simde_mm256_extract_epi32(xmm1,0);
    *llr_32++ = simde_mm256_extract_epi32(xmm2,0);
@@ -163,7 +169,7 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
  simde__m256i* ch_magb = (simde__m256i*)ul_ch_magb;
  simde__m256i* ch_magc = (simde__m256i*)ul_ch_magc;

-  for (int i = 0; i < ((nb_re>>3) + ((nb_re&7) ? 1 : 0)); i++) {
+  for (int i = 0; i < ((nb_re + 7) >> 3); i++) {
    xmm0 = simde_mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
    xmm0 = simde_mm256_subs_epi16(ch_maga[i], xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
    //  xmmtmpD2 contains 16 LLRs

--- a/openair1/PHY/defs_gNB.h
+++ b/openair1/PHY/defs_gNB.h
@@ -733,7 +733,6 @@ typedef struct PHY_VARS_gNB_s {
  time_stats_t ul_indication_stats;
  time_stats_t schedule_response_stats;
  time_stats_t ulsch_decoding_stats;
-  time_stats_t ulsch_rate_unmatching_stats;
  time_stats_t ulsch_ldpc_decoding_stats;
  time_stats_t ulsch_deinterleaving_stats;
  time_stats_t ulsch_channel_estimation_stats;