moving generation of precoding matrices at MAC and trying to adapt to SCF (1st version)

c8afb25d · francescomani · 7514bb91 · c8afb25d · c8afb25d · c8afb25d
Commit c8afb25d authored Mar 08, 2023 by francescomani
14 changed files
--- a/common/utils/nr/nr_common.h
+++ b/common/utils/nr/nr_common.h
@@ -76,6 +76,8 @@ static inline const char *rnti_types(nr_rnti_type_t rr)
 }
 #undef R
+#define NR_MAX_NB_LAYERS 4 // 8
 typedef enum {
  nr_FR1 = 0,
  nr_FR2

--- a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
+++ b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
@@ -74,17 +74,17 @@ typedef enum {
  NFAPI_NR_PHY_MSG_TYPE_RACH_INDICATION= 0X89,
  //RESERVED 0X8a ~ 0xff
  NFAPI_NR_PHY_MSG_TYPE_PNF_PARAM_REQUEST = 0x0100,
-	NFAPI_NR_PHY_MSG_TYPE_PNF_PARAM_RESPONSE = 0x0101,
+  NFAPI_NR_PHY_MSG_TYPE_PNF_PARAM_RESPONSE = 0x0101,
-	NFAPI_NR_PHY_MSG_TYPE_PNF_CONFIG_REQUEST= 0x0102,
+  NFAPI_NR_PHY_MSG_TYPE_PNF_CONFIG_REQUEST= 0x0102,
-	NFAPI_NR_PHY_MSG_TYPE_PNF_CONFIG_RESPONSE= 0x0103,
+  NFAPI_NR_PHY_MSG_TYPE_PNF_CONFIG_RESPONSE= 0x0103,
-	NFAPI_NR_PHY_MSG_TYPE_PNF_START_REQUEST= 0x0104,
+  NFAPI_NR_PHY_MSG_TYPE_PNF_START_REQUEST= 0x0104,
-	NFAPI_NR_PHY_MSG_TYPE_PNF_START_RESPONSE= 0x0105,
+  NFAPI_NR_PHY_MSG_TYPE_PNF_START_RESPONSE= 0x0105,
-	NFAPI_NR_PHY_MSG_TYPE_PNF_STOP_REQUEST= 0x0106,
+  NFAPI_NR_PHY_MSG_TYPE_PNF_STOP_REQUEST= 0x0106,
-	NFAPI_NR_PHY_MSG_TYPE_PNF_STOP_RESPONSE= 0x0107,
+  NFAPI_NR_PHY_MSG_TYPE_PNF_STOP_RESPONSE= 0x0107,
  NFAPI_NR_PHY_MSG_TYPE_UL_NODE_SYNC = 0x0180,
-	NFAPI_NR_PHY_MSG_TYPE_DL_NODE_SYNC,
+  NFAPI_NR_PHY_MSG_TYPE_DL_NODE_SYNC,
-	NFAPI_NR_PHY_MSG_TYPE_TIMING_INFO
+  NFAPI_NR_PHY_MSG_TYPE_TIMING_INFO
 } nfapi_nr_phy_msg_type_e;
 // SCF222_5G-FAPI_PHY_SPI_Specificayion.pdf Section 3.3
@@ -449,6 +449,26 @@ typedef struct
 } nfapi_nr_measurement_config_t;
+// Table 3–62 Precoding matrix (PM) PDU (v.222.10.04)
+typedef struct {
+  int16_t precoder_weight_Re;
+  int16_t precoder_weight_Im;
+} nfapi_nr_pm_weights_t;
+typedef struct {
+  uint16_t pm_idx;
+  uint16_t numLayers;
+  uint16_t num_ant_ports;
+  nfapi_nr_pm_weights_t weights[4][4]; // TODO temporary hardcoding
+} nfapi_nr_pm_pdu_t;
+typedef struct {
+  uint16_t num_pm_idx;
+  nfapi_nr_pm_pdu_t *pmi_pdu;
+} nfapi_nr_pm_list_t;
 // ERROR enums
 typedef enum {    // Table 2-22
  NFAPI_NR_PARAM_MSG_OK = 0, 
@@ -560,6 +580,7 @@ typedef struct {
  nfapi_nr_tdd_table_t          tdd_table;
  nfapi_nr_measurement_config_t measurement_config;
  nfapi_nr_nfapi_t              nfapi_config;
+  nfapi_nr_pm_list_t            pmi_list;
 } nfapi_nr_config_request_scf_t;
@@ -592,24 +613,24 @@ typedef struct {
 //3.3.4 STOP
 typedef struct {
-	nfapi_p4_p5_message_header_t header;
+  nfapi_p4_p5_message_header_t header;
-	nfapi_vendor_extension_tlv_t vendor_extension;
+  nfapi_vendor_extension_tlv_t vendor_extension;
 } nfapi_nr_stop_request_t;
 typedef struct {
-	nfapi_p4_p5_message_header_t header;
+  nfapi_p4_p5_message_header_t header;
-	nfapi_vendor_extension_tlv_t vendor_extension;
+  nfapi_vendor_extension_tlv_t vendor_extension;
 } nfapi_nr_stop_indication_t;
 typedef enum {
-	NFAPI_NR_STOP_MSG_INVALID_STATE
+  NFAPI_NR_STOP_MSG_INVALID_STATE
 } nfapi_nr_stop_errors_e;
 //3.3.5 PHY Notifications
 typedef enum {
  NFAPI_NR_PHY_API_MSG_OK              =0x0,
-	NFAPI_NR_PHY_API_MSG_INVALID_STATE   =0x1,
+  NFAPI_NR_PHY_API_MSG_INVALID_STATE   =0x1,
  NFAPI_NR_PHY_API_MSG_INVALID_CONFIG  =0x2,
  NFAPI_NR_PHY_API_SFN_OUT_OF_SYNC     =0X3,
  NFAPI_NR_PHY_API_MSG_SLOR_ERR        =0X4,
@@ -620,7 +641,7 @@ typedef enum {
 } nfapi_nr_phy_notifications_errors_e;
 typedef struct {
-	uint16_t sfn; //0~1023
+  uint16_t sfn; //0~1023
  uint16_t slot;//0~319
  nfapi_nr_phy_msg_type_e msg_id;//Indicate which message received by the PHY has an error. Values taken from Table 3-4.
  nfapi_nr_phy_notifications_errors_e error_code;
@@ -632,42 +653,22 @@ typedef struct {
 //table 3-32
 //? 
 typedef struct {
-	uint16_t beam_idx;     //0~65535
+  uint16_t beam_idx;     //0~65535
 } nfapi_nr_dig_beam_t;
 typedef struct {
-	uint16_t dig_beam_weight_Re;
+  uint16_t dig_beam_weight_Re;
  uint16_t dig_beam_weight_Im;
 } nfapi_nr_txru_t;
 typedef struct {
-	uint16_t num_dig_beams; //0~65535
+  uint16_t num_dig_beams; //0~65535
  uint16_t num_txrus;    //0~65535
  nfapi_nr_dig_beam_t* dig_beam_list;
  nfapi_nr_txru_t*  txru_list;
 } nfapi_nr_dbt_pdu_t;
-//table 3-33
-//?
-typedef struct {
-  uint16_t num_ant_ports;
-	int16_t precoder_weight_Re;
-  int16_t precoder_weight_Im;
-} nfapi_nr_num_ant_ports_t;
-typedef struct {
-  uint16_t numLayers;   //0~65535
-	nfapi_nr_num_ant_ports_t* num_ant_ports_list;
-} nfapi_nr_num_layers_t;
-typedef struct {
-	uint16_t pm_idx;       //0~65535
-  nfapi_nr_num_layers_t* num_layers_list;   //0~65535
-  //nfapi_nr_num_ant_ports_t* num_ant_ports_list;
-} nfapi_nr_pm_pdu_t;
 // Section 3.4
 // Section 3.4.1 slot indication
@@ -678,7 +679,7 @@ typedef struct {
 typedef struct {
  nfapi_p7_message_header_t header;
-	uint16_t sfn; //0->1023   
+  uint16_t sfn; //0->1023
  uint16_t slot;//0->319
 } nfapi_nr_slot_indication_scf_t;
@@ -1441,7 +1442,7 @@ typedef struct {
 //3.4.5 slot_errors
 typedef enum {
-	NFAPI_NR_SLOT_UL_TTI_MSG_INVALID_STATE,
+  NFAPI_NR_SLOT_UL_TTI_MSG_INVALID_STATE,
  NFAPI_NR_SLOT_UL_TTI_SFN_OUT_OF_SYNC,
  NFAPI_NR_SLOT_UL_TTI_MSG_BCH_MISSING,
  NFAPI_NR_SLOT_UL_TTI_MSG_SLOT_ERR
@@ -1449,14 +1450,14 @@ typedef enum {
 } nfapi_nr_slot_errors_ul_tti_e;
 typedef enum {
-	NFAPI_NR_SLOT_DL_TTI_MSG_INVALID_STATE,
+  NFAPI_NR_SLOT_DL_TTI_MSG_INVALID_STATE,
  NFAPI_NR_SLOT_DL_TTI_MSG_SLOT_ERR
 } nfapi_nr_slot_errors_dl_tti_e;
 typedef enum {
-	NFAPI_NR_SLOT_UL_DCI_MSG_INVALID_STATE,
+  NFAPI_NR_SLOT_UL_DCI_MSG_INVALID_STATE,
  NFAPI_NR_SLOT_UL_DCI_MSG_INVALID_SFN,
  NFAPI_NR_SLOT_UL_DCI_MSG_UL_DCI_ERR

--- a/openair1/PHY/INIT/nr_init.c
+++ b/openair1/PHY/INIT/nr_init.c
--- a/openair1/PHY/MODULATION/nr_modulation.c
+++ b/openair1/PHY/MODULATION/nr_modulation.c
@@ -703,13 +703,17 @@ c16_t nr_layer_precoder_cm(int n_layers,
                           int n_symbols,
                           int symSz,
                           c16_t datatx_F_precoding[n_layers][n_symbols][symSz],
-                           c16_t *prec_matrix,
+                           int ap,
+                           nfapi_nr_pm_pdu_t *pmi_pdu,
                           int symbol,
                           int offset)
 {
  c16_t precodatatx_F = {0};
-  for (int al = 0; al < n_layers; al++)
+  for (int al = 0; al < n_layers; al++) {
-    precodatatx_F = c16maddShift(datatx_F_precoding[al][symbol][offset], prec_matrix[al], precodatatx_F, 15);
+    nfapi_nr_pm_weights_t *w = &pmi_pdu->weights[al][ap];
+    c16_t prec_weight = {.r = w->precoder_weight_Re, .i = w->precoder_weight_Im};
+    precodatatx_F = c16maddShift(datatx_F_precoding[al][symbol][offset], prec_weight, precodatatx_F, 15);
+  }
  return precodatatx_F;
 }
@@ -717,69 +721,77 @@ void nr_layer_precoder_simd(const int n_layers,
                           const int n_symbols,
                           const int symSz,
                           const c16_t txdataF_res_mapped[n_layers][n_symbols][symSz],
-                           const c16_t prec_matrix[n_layers],
+                           const int ant,
+                           const nfapi_nr_pm_pdu_t *pmi_pdu,
                           const int symbol,
                           const int sc_offset,
                           const int re_cnt,
                           c16_t *txdataF_precoded)
 {
  uint32_t sc = sc_offset;
+  c16_t prec_weight = {0};
  // For x86, use 256 SIMD for every 8 RE and 128 SIMD for last 4 RE
  // For aarch64, use 128 SIMD for every 4 RE
  // 256 SIMD: Do 8 RE in one iteration, 3 iterations for 2 RB
-  #ifdef __AVX2__
+#ifdef __AVX2__
-    const uint32_t re_cnt_align8 = re_cnt & ~7;
+  const uint32_t re_cnt_align8 = re_cnt & ~7;
-    for(; sc<sc_offset+(re_cnt_align8); sc+=sizeof(simde__m256i)/sizeof(*prec_matrix)){
+  for(; sc < sc_offset + (re_cnt_align8); sc += sizeof(simde__m256i) / sizeof(prec_weight)) {
-      // Matrix multiplication for 4 elements of the result (sizeof(simde__m256i) / sizeof(*prec_matrix) = 8)
+    // Matrix multiplication for 4 elements of the result (sizeof(simde__m256i) / sizeof(*prec_matrix) = 8)
-      simde__m256i y = simde_mm256_set1_epi16(0); // Y = W[0]*X[0] + W[1]*X[1] + ... + W[nrOfLayers-1]*X[nrOfLayers-1]
+    simde__m256i y = simde_mm256_set1_epi16(0); // Y = W[0]*X[0] + W[1]*X[1] + ... + W[nrOfLayers-1]*X[nrOfLayers-1]
-      for(int nl=0; nl<n_layers; nl++){
+    for(int nl = 0; nl < n_layers; nl++) {
-        const simde__m256i x = simde_mm256_loadu_epi32(&txdataF_res_mapped[nl][symbol][sc]);
+      prec_weight.r = pmi_pdu->weights[nl][ant].precoder_weight_Re;
+      prec_weight.i = pmi_pdu->weights[nl][ant].precoder_weight_Im;
-        // Rearrange precoding matrix weight to match complex multiplication and broadcast it to match SIMD size
-        const simde__m256i w_c   = simde_mm256_set1_epi32(c16toI32(c16conj(prec_matrix[nl])));   // broadcast conjugate of w
+      const simde__m256i x = simde_mm256_loadu_epi32(&txdataF_res_mapped[nl][symbol][sc]);
-        const simde__m256i w_s   = simde_mm256_set1_epi32(c16toI32(c16swap(prec_matrix[nl])));   // broadcast swapped real and img of w
+      // Rearrange precoding matrix weight to match complex multiplication and broadcast it to match SIMD size
-        // Multiplication and shift
+      const simde__m256i w_c   = simde_mm256_set1_epi32(c16toI32(c16conj(prec_weight)));   // broadcast conjugate of w
-        const simde__m256i reals = simde_mm256_srai_epi32(simde_mm256_madd_epi16(x, w_c), 15); // (int32_t) .r = (x.r * w.r - x.i * w.i) >> 15
+      const simde__m256i w_s   = simde_mm256_set1_epi32(c16toI32(c16swap(prec_weight)));   // broadcast swapped real and img of w
-        const simde__m256i imags = simde_mm256_slli_epi32(simde_mm256_madd_epi16(x, w_s), 1);  // (int32_t) .i = (x.r * w.i + x.i * w.r) << 1, since higher 16 bit of each 32 bit is taken by blend_epi16
+      // Multiplication and shift
-        // Re-arrange to match c16_t format
+      const simde__m256i reals = simde_mm256_srai_epi32(simde_mm256_madd_epi16(x, w_c), 15); // (int32_t) .r = (x.r * w.r - x.i * w.i) >> 15
-        const simde__m256i produ = simde_mm256_blend_epi16(reals, imags, 0xAA);
+      const simde__m256i imags = simde_mm256_slli_epi32(simde_mm256_madd_epi16(x, w_s), 1);  // (int32_t) .i = (x.r * w.i + x.i * w.r) << 1, since higher 16 bit of each 32 bit is taken by blend_epi16
-        // Accumulate the product
+      // Re-arrange to match c16_t format
-        y = simde_mm256_adds_epi16(y, produ);
+      const simde__m256i produ = simde_mm256_blend_epi16(reals, imags, 0xAA);
-      }
-      // Store the result to txdataF
+      // Accumulate the product
-      simde_mm256_storeu_si256(&txdataF_precoded[sc], y);
+      y = simde_mm256_adds_epi16(y, produ);
    }
-  #endif
+    // Store the result to txdataF
+    simde_mm256_storeu_si256(&txdataF_precoded[sc], y);
+  }
+#endif
  // 128 SIMD: Do 4 RE in one iteration, 3 iterations for 1 RB
  const uint32_t re_cnt_align4 = re_cnt & ~3;
-  for(; sc<sc_offset+re_cnt_align4; sc+=sizeof(simde__m128i)/sizeof(*prec_matrix)){
+  for(; sc < sc_offset+re_cnt_align4; sc += sizeof(simde__m128i) / sizeof(prec_weight)) {
    #ifdef DEBUG_DLSCH_PRECODING_PRINT_WITH_TRIVIAL // Get result with trivial solution, TODO: To be removed
      c16_t y_triv[4];
-      for(int i=0; i<4; i++)
+      for(int i = 0; i < 4; i++)
        y_triv[i] = nr_layer_precoder_cm(n_layers,
-                                  NR_SYMBOLS_PER_SLOT,
+                                         NR_SYMBOLS_PER_SLOT,
-                                  symSz,
+                                         symSz,
-                                  txdataF_res_mapped,
+                                         txdataF_res_mapped,
-                                  prec_matrix,
+                                         ant,
-                                  symbol,
+                                         pmi_pdu,
-                                  sc + i);
+                                         symbol,
+                                         sc + i);
      memcpy(&txdataF_precoded[sc], y_triv, sizeof(y_triv));
    #endif
    // Matrix multiplication for 4 elements of the result (sizeof(simde__m128i) / sizeof(c16_t) = 4)
    simde__m128i y = simde_mm_set1_epi16(0); // Y = W[0]*X[0] + W[1]*X[1] + ... + W[nrOfLayers-1]*X[nrOfLayers-1]
-    for(int nl=0; nl<n_layers; nl++){
+    for(int nl = 0; nl < n_layers; nl++) {
+      prec_weight.r = pmi_pdu->weights[nl][ant].precoder_weight_Re;
+      prec_weight.i = pmi_pdu->weights[nl][ant].precoder_weight_Im;
      const simde__m128i x = simde_mm_loadu_epi32(&txdataF_res_mapped[nl][symbol][sc]);
      // Rearrange precoding matrix weight to match complex multiplication and broadcast it to match SIMD size
-      const simde__m128i w_c   = simde_mm_set1_epi32(c16toI32(c16conj(prec_matrix[nl])));   // broadcast conjugate of w
+      const simde__m128i w_c   = simde_mm_set1_epi32(c16toI32(c16conj(prec_weight)));   // broadcast conjugate of w
-      const simde__m128i w_s   = simde_mm_set1_epi32(c16toI32(c16swap(prec_matrix[nl])));   // broadcast swapped real and img of w
+      const simde__m128i w_s   = simde_mm_set1_epi32(c16toI32(c16swap(prec_weight)));   // broadcast swapped real and img of w
      // Multiplication and shift
      const simde__m128i reals = simde_mm_srai_epi32(simde_mm_madd_epi16(x, w_c), 15); // (int32_t) .r = (x.r * w.r - x.i * w.i) >> 15

--- a/openair1/PHY/MODULATION/nr_modulation.h
+++ b/openair1/PHY/MODULATION/nr_modulation.h
@@ -138,11 +138,13 @@ void apply_nr_rotation_RX(NR_DL_FRAME_PARMS *frame_parms,
  @param[in] n_layers, number of DLSCH layers
 */
 int nr_layer_precoder(int16_t **datatx_F_precoding, const char *prec_matrix, uint8_t n_layers, int32_t re_offset);
 c16_t nr_layer_precoder_cm(int n_layers,
                           int n_symbols,
                           int symSz,
                           c16_t datatx_F_precoding[n_layers][n_symbols][symSz],
-                           c16_t *prec_matrix,
+                           int ap,
+                           nfapi_nr_pm_pdu_t *pmi_pdu,
                           int symbol,
                           int offset);
@@ -156,7 +158,8 @@ void nr_layer_precoder_simd(const int n_layers,
                           const int n_symbols,
                           const int symSz,
                           const c16_t txdataF_res_mapped[n_layers][n_symbols][symSz],
-                           const c16_t prec_matrix[n_layers],
+                           const int ant,
+                           const nfapi_nr_pm_pdu_t *pmi_pdu,
                           const int symbol,
                           const int sc_offset,
                           const int re_cnt,

--- a/openair1/PHY/NR_TRANSPORT/nr_dlsch.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_dlsch.c
@@ -535,58 +535,52 @@ void nr_generate_pdsch(processingData_L1tx_t *msgTx, int frame, int slot)
            }
          }
          else { // non-unitary Precoding
-            if(frame_parms->nb_antennas_tx == 1){ // no precoding matrix defined
+            AssertFatal(frame_parms->nb_antennas_tx > 1, "No precoding can be done with a single antenna port\n");
-              memcpy(&txdataF[ant][txdataF_offset_per_symbol + subCarrier],
+            //get the precoding matrix weights:
-                     &txdataF_precoding[ant][l_symbol][subCarrier],
+            nfapi_nr_pm_pdu_t *pmi_pdu = &gNB->gNB_config.pmi_list.pmi_pdu[pmi - 1]; // pmi 0 is identity matrix
-                     re_cnt * sizeof(**txdataF));
+            AssertFatal(pmi == pmi_pdu->pm_idx, "PMI %d doesn't match to the one in precoding matrix %d\n",
+                        pmi, pmi_pdu->pm_idx);
+            AssertFatal(ant < pmi_pdu->num_ant_ports, "Antenna port index %d exceeds precoding matrix AP size %d\n",
+                        ant, pmi_pdu->num_ant_ports);
+            AssertFatal(rel15->nrOfLayers == pmi_pdu->numLayers, "Number of layers %d doesn't match to the one in precoding matrix %d\n",
+                        rel15->nrOfLayers, pmi_pdu->numLayers);
+            if((subCarrier + re_cnt) < frame_parms->ofdm_symbol_size){ // within ofdm_symbol_size, use SIMDe
+              nr_layer_precoder_simd(rel15->nrOfLayers,
+                                     NR_SYMBOLS_PER_SLOT,
+                                     frame_parms->ofdm_symbol_size,
+                                     txdataF_precoding,
+                                     ant,
+                                     pmi_pdu,
+                                     l_symbol,
+                                     subCarrier,
+                                     re_cnt,
+                                     &txdataF[ant][txdataF_offset_per_symbol]);
              subCarrier += re_cnt;
-              if (subCarrier >= frame_parms->ofdm_symbol_size) {
-                 subCarrier -= frame_parms->ofdm_symbol_size;
-              }
            }
-            else { // precoding with more than 1 tx
+            else{ // crossing ofdm_symbol_size, use simple arithmetic operations
-              //get the precoding matrix weights:
+              for (int i = 0; i < re_cnt; i++) {
-              c16_t **mat = (c16_t**)gNB->nr_mimo_precoding_matrix[rel15->nrOfLayers - 1];
+                txdataF[ant][txdataF_offset_per_symbol + subCarrier] =
-              //i_row =0,...,dl_antenna_port
+                    nr_layer_precoder_cm(rel15->nrOfLayers,
-              //j_col =0,...,nrOfLayers
+                                         NR_SYMBOLS_PER_SLOT,
-              //mat[pmi][i_rows*2+j_col]
+                                         frame_parms->ofdm_symbol_size,
-              c16_t *W_prec = &mat[pmi][ant * rel15->nrOfLayers];
+                                         txdataF_precoding,
-              if((subCarrier + re_cnt) < frame_parms->ofdm_symbol_size){ // within ofdm_symbol_size, use SIMDe
+                                         ant,
-                nr_layer_precoder_simd(rel15->nrOfLayers,
+                                         pmi_pdu,
-                                      NR_SYMBOLS_PER_SLOT,
+                                         l_symbol,
-                                      frame_parms->ofdm_symbol_size,
+                                         subCarrier);
-                                      txdataF_precoding,
-                                      W_prec,
-                                      l_symbol,
-                                      subCarrier,
-                                      re_cnt,
-                                      &txdataF[ant][txdataF_offset_per_symbol]);
-                subCarrier += re_cnt;
-              }
-              else{ // crossing ofdm_symbol_size, use simple arithmetic operations
-                for (int i = 0; i < re_cnt; i++) {
-                  txdataF[ant][txdataF_offset_per_symbol + subCarrier] =
-                      nr_layer_precoder_cm(rel15->nrOfLayers,
-                                           NR_SYMBOLS_PER_SLOT,
-                                           frame_parms->ofdm_symbol_size,
-                                           txdataF_precoding,
-                                           W_prec,
-                                           l_symbol,
-                                           subCarrier);
 #ifdef DEBUG_DLSCH_MAPPING
-                  printf("antenna %d\t l %d \t subCarrier %d \t txdataF: %d %d\n",
+                printf("antenna %d\t l %d \t subCarrier %d \t txdataF: %d %d\n",
-                        ant,
+                       ant,
-                        symbol,
+                       symbol,
-                        subCarrier,
+                       subCarrier,
-                        txdataF[ant][l_symbol * frame_parms->ofdm_symbol_size + subCarrier + txdataF_offset].r,
+                       txdataF[ant][l_symbol * frame_parms->ofdm_symbol_size + subCarrier + txdataF_offset].r,
-                        txdataF[ant][l_symbol * frame_parms->ofdm_symbol_size + subCarrier + txdataF_offset].i);
+                       txdataF[ant][l_symbol * frame_parms->ofdm_symbol_size + subCarrier + txdataF_offset].i);
 #endif
-                  if (++subCarrier >= frame_parms->ofdm_symbol_size) {
+                if (++subCarrier >= frame_parms->ofdm_symbol_size) {
-                    subCarrier -= frame_parms->ofdm_symbol_size;
+                  subCarrier -= frame_parms->ofdm_symbol_size;
-                  }
                }
-              } // else{ // crossing ofdm_symbol_size, use simple arithmetic operations
+              }
-            } // else { // precoding with more than 1 tx
+            } // else{ // crossing ofdm_symbol_size, use simple arithmetic operations
          } // else { // non-unitary Precoding
          rb += rb_step;

--- a/openair1/PHY/defs_gNB.h
+++ b/openair1/PHY/defs_gNB.h
@@ -619,13 +619,6 @@ typedef struct PHY_VARS_gNB_s {
  /// PDSCH DMRS sequence
  uint32_t ****nr_gold_pdsch_dmrs;
-  /// PDSCH codebook I precoding LUTs
-  /// first dimension: Rank number [0,...,noOfLayers-1[
-  /// second dimension: PMI [0,...,CodeSize-1[
-  /// third dimension: [i_rows*noOfLayers+j_col], i_rows=0,...pdsch_AntennaPorts-1 and j_col=0,...,noOfLayers-1
-  int32_t ***nr_mimo_precoding_matrix;
-  int pmiq_size[NR_MAX_NB_LAYERS];
  /// PUSCH DMRS
  uint32_t ****nr_gold_pusch_dmrs;

--- a/openair1/PHY/defs_nr_common.h
+++ b/openair1/PHY/defs_nr_common.h
@@ -79,7 +79,6 @@
 #define NR_MAX_PDCCH_AGG_LEVEL 16 // 3GPP TS 38.211 V15.8 Section 7.3.2 Table 7.3.2.1-1: Supported PDCCH aggregation levels
-#define NR_MAX_NB_LAYERS 4 // 8
 #define NR_MAX_NB_PORTS 32
 #define NR_MAX_PDSCH_TBS 3824

--- a/openair1/SIMULATION/NR_PHY/dlsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlsim.c
@@ -223,35 +223,21 @@ nrUE_params_t *get_nrUE_params(void) {
 }
-void validate_input_pmi(nr_pdsch_AntennaPorts_t pdsch_AntennaPorts, int nrOfLayers, int pmi)
+void validate_input_pmi(nfapi_nr_config_request_scf_t *gNB_config,
+                        nr_pdsch_AntennaPorts_t pdsch_AntennaPorts,
+                        int nrOfLayers,
+                        int pmi)
 {
  if (pmi == 0)
    return;
+  nfapi_nr_pm_pdu_t *pmi_pdu = &gNB_config->pmi_list.pmi_pdu[pmi - 1]; // pmi 0 is identity matrix
+  AssertFatal(pmi == pmi_pdu->pm_idx, "PMI %d doesn't match to the one in precoding matrix %d\n", pmi, pmi_pdu->pm_idx);
+  AssertFatal(nrOfLayers == pmi_pdu->numLayers, "Number of layers %d doesn't match to the one in precoding matrix %d for PMI %d\n",
+              nrOfLayers, pmi_pdu->numLayers, pmi);
  int num_antenna_ports = pdsch_AntennaPorts.N1 * pdsch_AntennaPorts.N2 * pdsch_AntennaPorts.XP;
-  int N1 = pdsch_AntennaPorts.N1;
+  AssertFatal(num_antenna_ports == pmi_pdu->num_ant_ports, "Configured antenna ports %d does not match precoding matrix AP size %d for PMI %d\n",
-  int N2 = pdsch_AntennaPorts.N2;
+              num_antenna_ports, pmi_pdu->num_ant_ports, pmi);
-  int O1 = N1 > 1 ? 4 : 1;
-  int O2 = N2 > 1 ? 4 : 1;
-  int K1, K2;
-  if (num_antenna_ports > 2)
-    get_K1_K2(N1, N2, &K1, &K2);
-  else {
-    K1 = 1; K2 = 1;
-  }
-  int num_pmi = 1; // pmi = 0 is the identity matrix
-  switch (nrOfLayers) {
-    case 1 :
-      num_pmi += N1 * O1 * N2 * O2 * 4;
-      AssertFatal(pmi < num_pmi, "Input PMI index %d exceeds the limit of configured matrices %d for %d layers\n", pmi, num_pmi, nrOfLayers);
-      return;
-    case 2 :
-      num_pmi += N1 * O1 * N2 * O2 * K1 * K2 * 2;
-      AssertFatal(pmi < num_pmi, "Input PMI index %d exceeds the limit of conigured matrices %d for %d layers\n", pmi, num_pmi, nrOfLayers);
-      break;
-    default :
-      AssertFatal(false, "Precoding with more than 2 nrOfLayers not yet supported\n");
-  }
 }
@@ -709,7 +695,7 @@ int main(int argc, char **argv)
  gNB->ap_N2 = pdsch_AntennaPorts.N2;
  gNB->ap_XP = pdsch_AntennaPorts.XP;
-  validate_input_pmi(pdsch_AntennaPorts, g_nrOfLayers, g_pmi);
+  validate_input_pmi(&gNB_mac->config[0], pdsch_AntennaPorts, g_nrOfLayers, g_pmi);
  NR_UE_NR_Capability_t* UE_Capability_nr = CALLOC(1,sizeof(NR_UE_NR_Capability_t));
  prepare_sim_uecap(UE_Capability_nr,scc,mu,

--- a/openair2/LAYER2/NR_MAC_gNB/config.c
+++ b/openair2/LAYER2/NR_MAC_gNB/config.c
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
@@ -668,8 +668,7 @@ static void pf_dl(module_id_t module_id,
      else
        sched_pdsch->mcs = get_mcs_from_bler(bo, stats, &sched_ctrl->dl_bler_stats, max_mcs, frame);
      sched_pdsch->nrOfLayers = get_dl_nrOfLayers(sched_ctrl, current_BWP->dci_format);
-      sched_pdsch->pm_index =
+      sched_pdsch->pm_index = mac->identity_pm ? 0 : get_pm_index(mac, UE, sched_pdsch->nrOfLayers, mac->radio_config.pdsch_AntennaPorts.XP);
-          mac->identity_pm ? 0 : get_pm_index(UE, sched_pdsch->nrOfLayers, mac->radio_config.pdsch_AntennaPorts.XP);
      const uint8_t Qm = nr_get_Qm_dl(sched_pdsch->mcs, current_BWP->mcsTableIdx);
      const uint16_t R = nr_get_code_rate_dl(sched_pdsch->mcs, current_BWP->mcsTableIdx);
      uint32_t tbs = nr_compute_tbs(Qm,

--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
@@ -129,29 +129,36 @@ uint8_t get_dl_nrOfLayers(const NR_UE_sched_ctrl_t *sched_ctrl,
 }
-uint16_t get_pm_index(const NR_UE_info_t *UE,
+uint16_t get_pm_index(const gNB_MAC_INST *nrmac,
+                      const NR_UE_info_t *UE,
                      int layers,
-                      int xp_pdsch_antenna_ports) {
+                      int xp_pdsch_antenna_ports)
+{
-  if (layers == 1) return 0;
  const NR_UE_sched_ctrl_t *sched_ctrl = &UE->UE_sched_ctrl;
  const int report_id = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.csi_report_id;
  const nr_csi_report_t *csi_report = &UE->csi_report_template[report_id];
  const int N1 = csi_report->N1;
  const int N2 = csi_report->N2;
-  const int antenna_ports = (N1*N2)<<1;
+  const int antenna_ports = (N1 * N2) << 1;
-  if (xp_pdsch_antenna_ports == 1 &&
+  if (xp_pdsch_antenna_ports == 1)
-      antenna_ports>1)
    return 0; //identity matrix (basic 5G configuration handled by PMI report is with XP antennas)
  const int x1 = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x1;
  const int x2 = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x2;
-  LOG_D(NR_MAC,"PMI report: x1 %d x2 %d\n",x1,x2);
+  LOG_D(NR_MAC,"PMI report: x1 %d x2 %d layers: %d\n", x1, x2, layers);
+  int prev_layers_size = 0;
+  for (int i = 1; i < layers; i++)
+    prev_layers_size += nrmac->precoding_matrix_size[i - 1];
+  // need to return PM index to matrix initialized in init_DL_MIMO_codebook
+  // index 0 is for identity matrix
+  // order of matrices depends on layers to be transmitted
+  // elements from 1 to n for 1 layer
+  // elements from n+1 to m for 2 layers etc.
  if (antenna_ports == 2)
-    return x2;
+    return 1 + prev_layers_size + x2;  // 0 for identity matrix
  else
    AssertFatal(1==0,"More than 2 antenna ports not yet supported\n");
 }

--- a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
+++ b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
@@ -377,7 +377,8 @@ NR_pdsch_dmrs_t get_dl_dmrs_params(const NR_ServingCellConfigCommon_t *scc,
                                   const NR_tda_info_t *tda_info,
                                   const int Layers);
-uint16_t get_pm_index(const NR_UE_info_t *UE,
+uint16_t get_pm_index(const gNB_MAC_INST *nrmac,
+                      const NR_UE_info_t *UE,
                      int layers,
                      int xp_pdsch_antenna_ports);

--- a/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
+++ b/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
@@ -852,6 +852,7 @@ typedef struct gNB_MAC_INST_s {
  uint8_t min_grant_prb;
  uint8_t min_grant_mcs;
  bool identity_pm;
+  int precoding_matrix_size[NR_MAX_NB_LAYERS];
  nr_mac_rrc_ul_if_t mac_rrc;
  f1_config_t f1_config;