Commit 651ef3da authored by Robert Schmidt's avatar Robert Schmidt

Merge remote-tracking branch 'origin/resource_mapping_optim' into integration_2025_w07 (!3127)

Optimizations of PDSCH Resource Mapping in nr_dlsch.c/nr_modulation.c

These changes add SIMD optimizations for Neon/AVX2/AVX512 in the PDSCH
transmit path. The timing improvements are listed here based on the

    nr_dlsim -e25 -R273 -b273 -s30 -x "layers" -y 4 -z 4 -P

benchmark with "layers" 2,3,4 and comparing "PHY proc tx":

273 PRBS, mcs25, 64QAM

peafowl (gcc11,AMD EPYC 9374F)

    2-layer, 4 TX : 431 us (develop 565 us)
    3-layer, 4 TX : 692 us (develop 849 us)
    4-layer, 4 TX : 963 us (develop 1172 us)

stupix (gcc10, Xeon Gold 6354)

    2-layer, 4 TX : 568 us (develop 652 us)
    3-layer, 4 TX : 901 us (develop 1030 us)
    4-layer, 4 TX : 1250 us (develop 1396 us)

matix (gcc14, Ryzen 9 PRO 7945)

    2-layer, 4 TX : 317 us (develop 505 us)
    3-layer, 4 TX : 538 us (develop 779 us)
    4-layer, 4 TX : 767 us (develop 1233 us)
parents f0d6d42a 8a63b013
This diff is collapsed.
...@@ -60,8 +60,7 @@ void nr_layer_mapping(int nbCodes, ...@@ -60,8 +60,7 @@ void nr_layer_mapping(int nbCodes,
uint8_t n_layers, uint8_t n_layers,
int layerSz, int layerSz,
uint32_t n_symbs, uint32_t n_symbs,
c16_t tx_layers[layerSz], c16_t tx_layers[][layerSz]);
int l);
/*! \brief Perform NR layer mapping. TS 38.211 V15.4.0 subclause 7.3.1.3 /*! \brief Perform NR layer mapping. TS 38.211 V15.4.0 subclause 7.3.1.3
@param[in] ulsch_ue, double Pointer to NR_UE_ULSCH_t struct @param[in] ulsch_ue, double Pointer to NR_UE_ULSCH_t struct
...@@ -136,12 +135,10 @@ void apply_nr_rotation_RX(const NR_DL_FRAME_PARMS *frame_parms, ...@@ -136,12 +135,10 @@ void apply_nr_rotation_RX(const NR_DL_FRAME_PARMS *frame_parms,
c16_t nr_layer_precoder(int sz, c16_t datatx_F_precoding[][sz], const char *prec_matrix, uint8_t n_layers, int32_t re_offset); c16_t nr_layer_precoder(int sz, c16_t datatx_F_precoding[][sz], const char *prec_matrix, uint8_t n_layers, int32_t re_offset);
c16_t nr_layer_precoder_cm(int n_layers, c16_t nr_layer_precoder_cm(int n_layers,
int n_symbols,
int symSz, int symSz,
c16_t datatx_F_precoding[n_layers][n_symbols][symSz], c16_t datatx_F_precoding[n_layers][symSz],
int ap, int ap,
nfapi_nr_pm_pdu_t *pmi_pdu, nfapi_nr_pm_pdu_t *pmi_pdu,
int symbol,
int offset); int offset);
/*! \brief Precoding with SIMDe, txdataF_precoded[] = prec_matrix[] * txdataF_res_mapped[] /*! \brief Precoding with SIMDe, txdataF_precoded[] = prec_matrix[] * txdataF_res_mapped[]
...@@ -151,13 +148,11 @@ c16_t nr_layer_precoder_cm(int n_layers, ...@@ -151,13 +148,11 @@ c16_t nr_layer_precoder_cm(int n_layers,
@param[out] txdataF_precoded Precoded antenna data @param[out] txdataF_precoded Precoded antenna data
*/ */
void nr_layer_precoder_simd(const int n_layers, void nr_layer_precoder_simd(const int n_layers,
const int n_symbols, const int symSz,
const int symSz, const c16_t txdataF_res_mapped[n_layers][symSz],
const c16_t txdataF_res_mapped[n_layers][n_symbols][symSz], const int ant,
const int ant, const nfapi_nr_pm_pdu_t *pmi_pdu,
const nfapi_nr_pm_pdu_t *pmi_pdu, const int sc_offset,
const int symbol, const int re_cnt,
const int sc_offset, c16_t *txdataF_precoded);
const int re_cnt,
c16_t *txdataF_precoded);
#endif #endif
...@@ -249,9 +249,11 @@ void nr_generate_dci_top(processingData_L1tx_t *msgTx, int slot, int txdataF_off ...@@ -249,9 +249,11 @@ void nr_generate_dci_top(processingData_L1tx_t *msgTx, int slot, int txdataF_off
{ {
PHY_VARS_gNB *gNB = msgTx->gNB; PHY_VARS_gNB *gNB = msgTx->gNB;
NR_DL_FRAME_PARMS *frame_parms = &gNB->frame_parms; NR_DL_FRAME_PARMS *frame_parms = &gNB->frame_parms;
start_meas(&gNB->dci_generation_stats);
for (int i = 0; i < msgTx->num_ul_pdcch; i++) for (int i = 0; i < msgTx->num_ul_pdcch; i++)
nr_generate_dci(msgTx->gNB, &msgTx->ul_pdcch_pdu[i].pdcch_pdu.pdcch_pdu_rel15, txdataF_offset, frame_parms, slot); nr_generate_dci(msgTx->gNB, &msgTx->ul_pdcch_pdu[i].pdcch_pdu.pdcch_pdu_rel15, txdataF_offset, frame_parms, slot);
for (int i = 0; i < msgTx->num_dl_pdcch; i++) for (int i = 0; i < msgTx->num_dl_pdcch; i++)
nr_generate_dci(msgTx->gNB, &msgTx->pdcch_pdu[i].pdcch_pdu_rel15, txdataF_offset, frame_parms, slot); nr_generate_dci(msgTx->gNB, &msgTx->pdcch_pdu[i].pdcch_pdu_rel15, txdataF_offset, frame_parms, slot);
stop_meas(&gNB->dci_generation_stats);
} }
This diff is collapsed.
...@@ -520,6 +520,8 @@ typedef struct PHY_VARS_gNB_s { ...@@ -520,6 +520,8 @@ typedef struct PHY_VARS_gNB_s {
time_stats_t dlsch_interleaving_stats; time_stats_t dlsch_interleaving_stats;
time_stats_t dlsch_segmentation_stats; time_stats_t dlsch_segmentation_stats;
time_stats_t dci_generation_stats;
time_stats_t phase_comp_stats;
time_stats_t rx_pusch_stats; time_stats_t rx_pusch_stats;
time_stats_t rx_pusch_init_stats; time_stats_t rx_pusch_init_stats;
time_stats_t rx_pusch_symbol_processing_stats; time_stats_t rx_pusch_symbol_processing_stats;
......
...@@ -290,6 +290,7 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx, ...@@ -290,6 +290,7 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx,
//apply the OFDM symbol rotation here //apply the OFDM symbol rotation here
if (gNB->phase_comp) { if (gNB->phase_comp) {
start_meas(&gNB->phase_comp_stats);
for(int i = 0; i < gNB->common_vars.num_beams_period; ++i) { for(int i = 0; i < gNB->common_vars.num_beams_period; ++i) {
for (int aa = 0; aa < cfg->carrier_config.num_tx_ant.value; aa++) { for (int aa = 0; aa < cfg->carrier_config.num_tx_ant.value; aa++) {
apply_nr_rotation_TX(fp, apply_nr_rotation_TX(fp,
...@@ -304,6 +305,7 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx, ...@@ -304,6 +305,7 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx,
T_INT(aa), T_BUFFER(&gNB->common_vars.txdataF[aa][txdataF_offset], fp->samples_per_slot_wCP*sizeof(int32_t))); T_INT(aa), T_BUFFER(&gNB->common_vars.txdataF[aa][txdataF_offset], fp->samples_per_slot_wCP*sizeof(int32_t)));
} }
} }
stop_meas(&gNB->phase_comp_stats);
} }
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_gNB_TX + gNB->CC_id, 0); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_gNB_TX + gNB->CC_id, 0);
......
...@@ -965,10 +965,12 @@ printf("%d\n", slot); ...@@ -965,10 +965,12 @@ printf("%d\n", slot);
reset_meas(&gNB->dlsch_segmentation_stats); reset_meas(&gNB->dlsch_segmentation_stats);
reset_meas(&gNB->dlsch_modulation_stats); reset_meas(&gNB->dlsch_modulation_stats);
reset_meas(&gNB->dlsch_encoding_stats); reset_meas(&gNB->dlsch_encoding_stats);
reset_meas(&gNB->dci_generation_stats);
reset_meas(&gNB->tinput); reset_meas(&gNB->tinput);
reset_meas(&gNB->tprep); reset_meas(&gNB->tprep);
reset_meas(&gNB->tparity); reset_meas(&gNB->tparity);
reset_meas(&gNB->toutput); reset_meas(&gNB->toutput);
reset_meas(&gNB->phase_comp_stats);
uint32_t errors_scrambling[16] = {0}; uint32_t errors_scrambling[16] = {0};
int n_errors[16] = {0}; int n_errors[16] = {0};
...@@ -1264,6 +1266,7 @@ printf("%d\n", slot); ...@@ -1264,6 +1266,7 @@ printf("%d\n", slot);
UE->dl_harq_processes[0][slot].C, UE->dl_harq_processes[0][slot].C,
msgDataTx->dlsch[0][0].harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0] << 3); msgDataTx->dlsch[0][0].harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0] << 3);
printDistribution(&gNB->phy_proc_tx,table_tx,"PHY proc tx"); printDistribution(&gNB->phy_proc_tx,table_tx,"PHY proc tx");
printStatIndent2(&gNB->dci_generation_stats, "DCI encoding time");
printStatIndent2(&gNB->dlsch_encoding_stats,"DLSCH encoding time"); printStatIndent2(&gNB->dlsch_encoding_stats,"DLSCH encoding time");
printStatIndent3(&gNB->dlsch_segmentation_stats,"DLSCH segmentation time"); printStatIndent3(&gNB->dlsch_segmentation_stats,"DLSCH segmentation time");
printStatIndent3(&gNB->tinput,"DLSCH LDPC input processing time"); printStatIndent3(&gNB->tinput,"DLSCH LDPC input processing time");
...@@ -1274,8 +1277,9 @@ printf("%d\n", slot); ...@@ -1274,8 +1277,9 @@ printf("%d\n", slot);
printStatIndent3(&gNB->dlsch_interleaving_stats, "DLSCH Interleaving time"); printStatIndent3(&gNB->dlsch_interleaving_stats, "DLSCH Interleaving time");
printStatIndent2(&gNB->dlsch_modulation_stats,"DLSCH modulation time"); printStatIndent2(&gNB->dlsch_modulation_stats,"DLSCH modulation time");
printStatIndent2(&gNB->dlsch_scrambling_stats, "DLSCH scrambling time"); printStatIndent2(&gNB->dlsch_scrambling_stats, "DLSCH scrambling time");
printStatIndent2(&gNB->dlsch_resource_mapping_stats, "DLSCH Resource Mapping time"); printStatIndent2(&gNB->dlsch_precoding_stats,"DLSCH Mapping/Precoding time");
printStatIndent2(&gNB->dlsch_precoding_stats,"DLSCH Layer Precoding time"); if (gNB->phase_comp)
printStatIndent2(&gNB->phase_comp_stats, "Phase Compensation");
printf("\nUE function statistics (per %d us slot)\n", 1000 >> *scc->ssbSubcarrierSpacing); printf("\nUE function statistics (per %d us slot)\n", 1000 >> *scc->ssbSubcarrierSpacing);
for (int i = RX_PDSCH_STATS; i <= DLSCH_PROCEDURES_STATS; i++) { for (int i = RX_PDSCH_STATS; i <= DLSCH_PROCEDURES_STATS; i++) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment