Commit 651ef3da authored by Robert Schmidt's avatar Robert Schmidt

Merge remote-tracking branch 'origin/resource_mapping_optim' into integration_2025_w07 (!3127)

Optimizations of PDSCH Resource Mapping in nr_dlsch.c/nr_modulation.c

These changes add SIMD optimizations for Neon/AVX2/AVX512 in the PDSCH
transmit path. The timing improvements are listed here based on the

    nr_dlsim -e25 -R273 -b273 -s30 -x "layers" -y 4 -z 4 -P

benchmark with "layers" 2,3,4 and comparing "PHY proc tx":

273 PRBS, mcs25, 64QAM

peafowl (gcc11,AMD EPYC 9374F)

    2-layer, 4 TX : 431 us (develop 565 us)
    3-layer, 4 TX : 692 us (develop 849 us)
    4-layer, 4 TX : 963 us (develop 1172 us)

stupix (gcc10, Xeon Gold 6354)

    2-layer, 4 TX : 568 us (develop 652 us)
    3-layer, 4 TX : 901 us (develop 1030 us)
    4-layer, 4 TX : 1250 us (develop 1396 us)

matix (gcc14, Ryzen 9 PRO 7945)

    2-layer, 4 TX : 317 us (develop 505 us)
    3-layer, 4 TX : 538 us (develop 779 us)
    4-layer, 4 TX : 767 us (develop 1233 us)
parents f0d6d42a 8a63b013
This diff is collapsed.
......@@ -60,8 +60,7 @@ void nr_layer_mapping(int nbCodes,
uint8_t n_layers,
int layerSz,
uint32_t n_symbs,
c16_t tx_layers[layerSz],
int l);
c16_t tx_layers[][layerSz]);
/*! \brief Perform NR layer mapping. TS 38.211 V15.4.0 subclause 7.3.1.3
@param[in] ulsch_ue, double Pointer to NR_UE_ULSCH_t struct
......@@ -136,12 +135,10 @@ void apply_nr_rotation_RX(const NR_DL_FRAME_PARMS *frame_parms,
c16_t nr_layer_precoder(int sz, c16_t datatx_F_precoding[][sz], const char *prec_matrix, uint8_t n_layers, int32_t re_offset);
c16_t nr_layer_precoder_cm(int n_layers,
int n_symbols,
int symSz,
c16_t datatx_F_precoding[n_layers][n_symbols][symSz],
c16_t datatx_F_precoding[n_layers][symSz],
int ap,
nfapi_nr_pm_pdu_t *pmi_pdu,
int symbol,
int offset);
/*! \brief Precoding with SIMDe, txdataF_precoded[] = prec_matrix[] * txdataF_res_mapped[]
......@@ -151,13 +148,11 @@ c16_t nr_layer_precoder_cm(int n_layers,
@param[out] txdataF_precoded Precoded antenna data
*/
void nr_layer_precoder_simd(const int n_layers,
const int n_symbols,
const int symSz,
const c16_t txdataF_res_mapped[n_layers][n_symbols][symSz],
const int ant,
const nfapi_nr_pm_pdu_t *pmi_pdu,
const int symbol,
const int sc_offset,
const int re_cnt,
c16_t *txdataF_precoded);
const int symSz,
const c16_t txdataF_res_mapped[n_layers][symSz],
const int ant,
const nfapi_nr_pm_pdu_t *pmi_pdu,
const int sc_offset,
const int re_cnt,
c16_t *txdataF_precoded);
#endif
......@@ -249,9 +249,11 @@ void nr_generate_dci_top(processingData_L1tx_t *msgTx, int slot, int txdataF_off
{
PHY_VARS_gNB *gNB = msgTx->gNB;
NR_DL_FRAME_PARMS *frame_parms = &gNB->frame_parms;
start_meas(&gNB->dci_generation_stats);
for (int i = 0; i < msgTx->num_ul_pdcch; i++)
nr_generate_dci(msgTx->gNB, &msgTx->ul_pdcch_pdu[i].pdcch_pdu.pdcch_pdu_rel15, txdataF_offset, frame_parms, slot);
for (int i = 0; i < msgTx->num_dl_pdcch; i++)
nr_generate_dci(msgTx->gNB, &msgTx->pdcch_pdu[i].pdcch_pdu_rel15, txdataF_offset, frame_parms, slot);
stop_meas(&gNB->dci_generation_stats);
}
This diff is collapsed.
......@@ -520,6 +520,8 @@ typedef struct PHY_VARS_gNB_s {
time_stats_t dlsch_interleaving_stats;
time_stats_t dlsch_segmentation_stats;
time_stats_t dci_generation_stats;
time_stats_t phase_comp_stats;
time_stats_t rx_pusch_stats;
time_stats_t rx_pusch_init_stats;
time_stats_t rx_pusch_symbol_processing_stats;
......
......@@ -290,6 +290,7 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx,
//apply the OFDM symbol rotation here
if (gNB->phase_comp) {
start_meas(&gNB->phase_comp_stats);
for(int i = 0; i < gNB->common_vars.num_beams_period; ++i) {
for (int aa = 0; aa < cfg->carrier_config.num_tx_ant.value; aa++) {
apply_nr_rotation_TX(fp,
......@@ -304,6 +305,7 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx,
T_INT(aa), T_BUFFER(&gNB->common_vars.txdataF[aa][txdataF_offset], fp->samples_per_slot_wCP*sizeof(int32_t)));
}
}
stop_meas(&gNB->phase_comp_stats);
}
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_gNB_TX + gNB->CC_id, 0);
......
......@@ -965,10 +965,12 @@ printf("%d\n", slot);
reset_meas(&gNB->dlsch_segmentation_stats);
reset_meas(&gNB->dlsch_modulation_stats);
reset_meas(&gNB->dlsch_encoding_stats);
reset_meas(&gNB->dci_generation_stats);
reset_meas(&gNB->tinput);
reset_meas(&gNB->tprep);
reset_meas(&gNB->tparity);
reset_meas(&gNB->toutput);
reset_meas(&gNB->phase_comp_stats);
uint32_t errors_scrambling[16] = {0};
int n_errors[16] = {0};
......@@ -1264,6 +1266,7 @@ printf("%d\n", slot);
UE->dl_harq_processes[0][slot].C,
msgDataTx->dlsch[0][0].harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0] << 3);
printDistribution(&gNB->phy_proc_tx,table_tx,"PHY proc tx");
printStatIndent2(&gNB->dci_generation_stats, "DCI encoding time");
printStatIndent2(&gNB->dlsch_encoding_stats,"DLSCH encoding time");
printStatIndent3(&gNB->dlsch_segmentation_stats,"DLSCH segmentation time");
printStatIndent3(&gNB->tinput,"DLSCH LDPC input processing time");
......@@ -1274,8 +1277,9 @@ printf("%d\n", slot);
printStatIndent3(&gNB->dlsch_interleaving_stats, "DLSCH Interleaving time");
printStatIndent2(&gNB->dlsch_modulation_stats,"DLSCH modulation time");
printStatIndent2(&gNB->dlsch_scrambling_stats, "DLSCH scrambling time");
printStatIndent2(&gNB->dlsch_resource_mapping_stats, "DLSCH Resource Mapping time");
printStatIndent2(&gNB->dlsch_precoding_stats,"DLSCH Layer Precoding time");
printStatIndent2(&gNB->dlsch_precoding_stats,"DLSCH Mapping/Precoding time");
if (gNB->phase_comp)
printStatIndent2(&gNB->phase_comp_stats, "Phase Compensation");
printf("\nUE function statistics (per %d us slot)\n", 1000 >> *scc->ssbSubcarrierSpacing);
for (int i = RX_PDSCH_STATS; i <= DLSCH_PROCEDURES_STATS; i++) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment