Move nr_det_A_MF_2x2, nr_squared_matrix_element and nr_numer_2x2 to csi_rx.c

2b36f2d8 · Roberto Louro Magueta · d864cf94 · 2b36f2d8 · 2b36f2d8 · 2b36f2d8
Commit 2b36f2d8 authored Jul 04, 2022 by Roberto Louro Magueta
3 changed files
--- a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c
@@ -43,6 +43,89 @@
 //#define NR_CSIRS_DEBUG
 //#define NR_CSIIM_DEBUG

+void nr_det_A_MF_2x2(int32_t *a_mf_00,
+                     int32_t *a_mf_01,
+                     int32_t *a_mf_10,
+                     int32_t *a_mf_11,
+                     int32_t *det_fin,
+                     unsigned short nb_rb) {
+
+  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
+
+  __m128i ad_re_128, bc_re_128, det_re_128;
+
+  __m128i *a_mf_00_128 = (__m128i *)a_mf_00;
+  __m128i *a_mf_01_128 = (__m128i *)a_mf_01;
+  __m128i *a_mf_10_128 = (__m128i *)a_mf_10;
+  __m128i *a_mf_11_128 = (__m128i *)a_mf_11;
+  __m128i *det_fin_128 = (__m128i *)det_fin;
+
+  for (int rb = 0; rb<3*nb_rb; rb++) {
+
+    //complex multiplication (I_a+jQ_a)(I_d+jQ_d) = (I_aI_d - Q_aQ_d) + j(Q_aI_d + I_aQ_d)
+    //The imag part is often zero, we compute only the real part
+    ad_re_128 = _mm_sign_epi16(a_mf_00_128[0],*(__m128i*)&nr_conjug2[0]);
+    ad_re_128 = _mm_madd_epi16(ad_re_128,a_mf_11_128[0]); //Re: I_a0*I_d0 - Q_a1*Q_d1
+
+    //complex multiplication (I_b+jQ_b)(I_c+jQ_c) = (I_bI_c - Q_bQ_c) + j(Q_bI_c + I_bQ_c)
+    //The imag part is often zero, we compute only the real part
+    bc_re_128 = _mm_sign_epi16(a_mf_01_128[0],*(__m128i*)&nr_conjug2[0]);
+    bc_re_128 = _mm_madd_epi16(bc_re_128,a_mf_10_128[0]); //Re: I_b0*I_c0 - Q_b1*Q_c1
+
+    det_re_128 = _mm_sub_epi32(ad_re_128, bc_re_128);
+
+    //det in Q30 format
+    det_fin_128[0] = _mm_abs_epi32(det_re_128);
+
+    det_fin_128+=1;
+    a_mf_00_128+=1;
+    a_mf_01_128+=1;
+    a_mf_10_128+=1;
+    a_mf_11_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+void nr_squared_matrix_element(int32_t *a,
+                               int32_t *a_sq,
+                               unsigned short nb_rb) {
+  __m128i *a_128 = (__m128i *)a;
+  __m128i *a_sq_128 = (__m128i *)a_sq;
+  for (int rb=0; rb<3*nb_rb; rb++) {
+    a_sq_128[0] = _mm_madd_epi16(a_128[0], a_128[0]);
+    a_sq_128+=1;
+    a_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+void nr_numer_2x2(int32_t *a_00_sq,
+                  int32_t *a_01_sq,
+                  int32_t *a_10_sq,
+                  int32_t *a_11_sq,
+                  int32_t *num_fin,
+                  unsigned short nb_rb) {
+  __m128i *a_00_sq_128 = (__m128i *)a_00_sq;
+  __m128i *a_01_sq_128 = (__m128i *)a_01_sq;
+  __m128i *a_10_sq_128 = (__m128i *)a_10_sq;
+  __m128i *a_11_sq_128 = (__m128i *)a_11_sq;
+  __m128i *num_fin_128 = (__m128i *)num_fin;
+  for (int rb=0; rb<3*nb_rb; rb++) {
+    __m128i sq_a_plus_sq_d_128 = _mm_add_epi32(a_00_sq_128[0], a_11_sq_128[0]);
+    __m128i sq_b_plus_sq_c_128 = _mm_add_epi32(a_01_sq_128[0], a_10_sq_128[0]);
+    num_fin_128[0] = _mm_add_epi32(sq_a_plus_sq_d_128, sq_b_plus_sq_c_128);
+    num_fin_128+=1;
+    a_00_sq_128+=1;
+    a_01_sq_128+=1;
+    a_10_sq_128+=1;
+    a_11_sq_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
 bool is_csi_rs_in_symbol(fapi_nr_dl_config_csirs_pdu_rel15_t csirs_config_pdu, int symbol) {

  bool ret = false;

--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
@@ -1870,89 +1870,6 @@ void nr_dlsch_detection_mrc(int **rxdataF_comp,
 #endif
 }

-void nr_det_A_MF_2x2(int32_t *a_mf_00,
-                     int32_t *a_mf_01,
-                     int32_t *a_mf_10,
-                     int32_t *a_mf_11,
-                     int32_t *det_fin,
-                     unsigned short nb_rb) {
-
-  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
-
-  __m128i ad_re_128, bc_re_128, det_re_128;
-
-  __m128i *a_mf_00_128 = (__m128i *)a_mf_00;
-  __m128i *a_mf_01_128 = (__m128i *)a_mf_01;
-  __m128i *a_mf_10_128 = (__m128i *)a_mf_10;
-  __m128i *a_mf_11_128 = (__m128i *)a_mf_11;
-  __m128i *det_fin_128 = (__m128i *)det_fin;
-
-  for (int rb = 0; rb<3*nb_rb; rb++) {
-
-    //complex multiplication (I_a+jQ_a)(I_d+jQ_d) = (I_aI_d - Q_aQ_d) + j(Q_aI_d + I_aQ_d)
-    //The imag part is often zero, we compute only the real part
-    ad_re_128 = _mm_sign_epi16(a_mf_00_128[0],*(__m128i*)&nr_conjug2[0]);
-    ad_re_128 = _mm_madd_epi16(ad_re_128,a_mf_11_128[0]); //Re: I_a0*I_d0 - Q_a1*Q_d1
-
-    //complex multiplication (I_b+jQ_b)(I_c+jQ_c) = (I_bI_c - Q_bQ_c) + j(Q_bI_c + I_bQ_c)
-    //The imag part is often zero, we compute only the real part
-    bc_re_128 = _mm_sign_epi16(a_mf_01_128[0],*(__m128i*)&nr_conjug2[0]);
-    bc_re_128 = _mm_madd_epi16(bc_re_128,a_mf_10_128[0]); //Re: I_b0*I_c0 - Q_b1*Q_c1
-
-    det_re_128 = _mm_sub_epi32(ad_re_128, bc_re_128);
-
-    //det in Q30 format
-    det_fin_128[0] = _mm_abs_epi32(det_re_128);
-
-    det_fin_128+=1;
-    a_mf_00_128+=1;
-    a_mf_01_128+=1;
-    a_mf_10_128+=1;
-    a_mf_11_128+=1;
-  }
-  _mm_empty();
-  _m_empty();
-}
-
-void nr_squared_matrix_element(int32_t *a,
-                               int32_t *a_sq,
-                               unsigned short nb_rb) {
-  __m128i *a_128 = (__m128i *)a;
-  __m128i *a_sq_128 = (__m128i *)a_sq;
-  for (int rb=0; rb<3*nb_rb; rb++) {
-    a_sq_128[0] = _mm_madd_epi16(a_128[0], a_128[0]);
-    a_sq_128+=1;
-    a_128+=1;
-  }
-  _mm_empty();
-  _m_empty();
-}
-
-void nr_numer_2x2(int32_t *a_00_sq,
-                  int32_t *a_01_sq,
-                  int32_t *a_10_sq,
-                  int32_t *a_11_sq,
-                  int32_t *num_fin,
-                  unsigned short nb_rb) {
-  __m128i *a_00_sq_128 = (__m128i *)a_00_sq;
-  __m128i *a_01_sq_128 = (__m128i *)a_01_sq;
-  __m128i *a_10_sq_128 = (__m128i *)a_10_sq;
-  __m128i *a_11_sq_128 = (__m128i *)a_11_sq;
-  __m128i *num_fin_128 = (__m128i *)num_fin;
-  for (int rb=0; rb<3*nb_rb; rb++) {
-    __m128i sq_a_plus_sq_d_128 = _mm_add_epi32(a_00_sq_128[0], a_11_sq_128[0]);
-    __m128i sq_b_plus_sq_c_128 = _mm_add_epi32(a_01_sq_128[0], a_10_sq_128[0]);
-    num_fin_128[0] = _mm_add_epi32(sq_a_plus_sq_d_128, sq_b_plus_sq_c_128);
-    num_fin_128+=1;
-    a_00_sq_128+=1;
-    a_01_sq_128+=1;
-    a_10_sq_128+=1;
-    a_11_sq_128+=1;
-  }
-  _mm_empty();
-  _m_empty();
-}
-
 /* Zero Forcing Rx function: nr_a_sum_b()
 * Compute the complex addition x=x+y
 *

--- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
@@ -891,24 +891,6 @@ void nr_a_sum_b(__m128i *input_x,
                __m128i *input_y,
                unsigned short nb_rb);

-void nr_det_A_MF_2x2(int32_t *a_mf_00,
-                     int32_t *a_mf_01,
-                     int32_t *a_mf_10,
-                     int32_t *a_mf_11,
-                     int32_t *det_fin,
-                     unsigned short nb_rb);
-
-void nr_squared_matrix_element(int32_t *a,
-                               int32_t *a_sq,
-                               unsigned short nb_rb);
-
-void nr_numer_2x2(int32_t *a_00_sq,
-                  int32_t *a_01_sq,
-                  int32_t *a_10_sq,
-                  int32_t *a_11_sq,
-                  int32_t *num_fin,
-                  unsigned short nb_rb);
-
 uint8_t rank_estimation_tm3_tm4(int *dl_ch_estimates_00,
                                int *dl_ch_estimates_01,
                                int *dl_ch_estimates_10,