Commit 1be24d48 authored by Robert Schmidt's avatar Robert Schmidt

Merge remote-tracking branch 'origin/remove-globale-ul_ch_estimates' into...

Merge remote-tracking branch 'origin/remove-globale-ul_ch_estimates' into integration_2025_w12 (!3310)

remove globale llr_layers to save memory, improve CPU, simplify code
parents e4cb39d4 78b44a8e
......@@ -205,7 +205,6 @@ void phy_init_nr_gNB(PHY_VARS_gNB *gNB)
pusch->ul_ch_estimates = (int32_t **)malloc16(n_buf * sizeof(int32_t *));
pusch->ptrs_phase_per_slot = (int32_t **)malloc16(n_buf * sizeof(int32_t *));
pusch->rxdataF_comp = (int32_t **)malloc16(n_buf * sizeof(int32_t *));
pusch->llr_layers = (int16_t **)malloc16(max_ul_mimo_layers * sizeof(int32_t *));
for (int i = 0; i < n_buf; i++) {
pusch->ul_ch_estimates[i] = (int32_t *)malloc16_clear(sizeof(int32_t) * fp->ofdm_symbol_size * fp->symbols_per_slot);
pusch->ptrs_phase_per_slot[i] = (int32_t *)malloc16_clear(sizeof(int32_t) * fp->symbols_per_slot); // symbols per slot
......@@ -213,8 +212,6 @@ void phy_init_nr_gNB(PHY_VARS_gNB *gNB)
}
for (int i = 0; i < max_ul_mimo_layers; i++) {
pusch->llr_layers[i] = (int16_t *)malloc16_clear((8 * ((3 * 8 * 6144) + 12))
* sizeof(int16_t)); // [hna] 6144 is LTE and (8*((3*8*6144)+12)) is not clear
}
pusch->llr = (int16_t *)malloc16_clear((8 * ((3 * 8 * 6144) + 12))
* sizeof(int16_t)); // [hna] 6144 is LTE and (8*((3*8*6144)+12)) is not clear
......@@ -273,14 +270,11 @@ void phy_free_nr_gNB(PHY_VARS_gNB *gNB)
for (int ULSCH_id = 0; ULSCH_id < gNB->max_nb_pusch; ULSCH_id++) {
NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ULSCH_id];
for (int i=0; i< max_ul_mimo_layers; i++)
free_and_zero(pusch_vars->llr_layers[i]);
for (int i = 0; i < n_buf; i++) {
free_and_zero(pusch_vars->ul_ch_estimates[i]);
free_and_zero(pusch_vars->ptrs_phase_per_slot[i]);
free_and_zero(pusch_vars->rxdataF_comp[i]);
}
free_and_zero(pusch_vars->llr_layers);
free_and_zero(pusch_vars->ul_ch_estimates);
free_and_zero(pusch_vars->ptrs_phase_per_slot);
free_and_zero(pusch_vars->ul_valid_re_per_slot);
......
......@@ -239,14 +239,14 @@ void reset_active_ulsch(PHY_VARS_gNB *gNB, int frame);
void nr_ulsch_compute_ML_llr(NR_gNB_PUSCH *pusch_vars,
uint32_t symbol,
c16_t* rxdataF_comp0,
c16_t* rxdataF_comp1,
c16_t* ul_ch_mag0,
c16_t* ul_ch_mag1,
c16_t* llr_layers0,
c16_t* llr_layers1,
c16_t* rho0,
c16_t* rho1,
c16_t *rxdataF_comp0,
c16_t *rxdataF_comp1,
c16_t *ul_ch_mag0,
c16_t *ul_ch_mag1,
int16_t *llr_layers0,
int16_t *llr_layers1,
c16_t *rho0,
c16_t *rho1,
uint32_t nb_re,
uint8_t mod_order);
......
......@@ -1030,8 +1030,8 @@ static void inner_rx(PHY_VARS_gNB *gNB,
c16_t rxFext[nb_rx_ant][buffer_length] __attribute__((aligned(32)));
c16_t chFext[nb_layer][nb_rx_ant][buffer_length] __attribute__((aligned(32)));
memset(rxFext, 0, sizeof(c16_t) * nb_rx_ant * buffer_length);
memset(chFext, 0, sizeof(c16_t) * nb_layer * nb_rx_ant* buffer_length);
memset(rxFext, 0, sizeof(rxFext));
memset(chFext, 0, sizeof(chFext));
int dmrs_symbol;
if (gNB->chest_time == 0)
dmrs_symbol = dmrs_symbol_flag ? symbol : get_valid_dmrs_idx_for_channel_est(rel15_ul->ul_dmrs_symb_pos, symbol);
......@@ -1059,10 +1059,10 @@ static void inner_rx(PHY_VARS_gNB *gNB,
c16_t rxF_ch_magb [nb_layer][buffer_length] __attribute__((aligned(32)));
c16_t rxF_ch_magc [nb_layer][buffer_length] __attribute__((aligned(32)));
memset(rho, 0, sizeof(c16_t) * nb_layer * nb_layer* buffer_length);
memset(rxF_ch_maga, 0, sizeof(c16_t) * nb_layer * buffer_length);
memset(rxF_ch_magb, 0, sizeof(c16_t) * nb_layer * buffer_length);
memset(rxF_ch_magc, 0, sizeof(c16_t) * nb_layer * buffer_length);
memset(rho, 0, sizeof(rho));
memset(rxF_ch_maga, 0, sizeof(rxF_ch_maga));
memset(rxF_ch_magb, 0, sizeof(rxF_ch_magb));
memset(rxF_ch_magc, 0, sizeof(rxF_ch_magc));
for (int i = 0; i < nb_layer; i++)
memset(&pusch_vars->rxdataF_comp[i*nb_rx_ant][symbol * buffer_length], 0, sizeof(int32_t) * buffer_length);
......@@ -1105,12 +1105,12 @@ static void inner_rx(PHY_VARS_gNB *gNB,
if (rel15_ul->qam_mod_order <= 6) {
nr_ulsch_compute_ML_llr(pusch_vars,
symbol,
(c16_t*)&pusch_vars->rxdataF_comp[0][symbol * buffer_length],
(c16_t*)&pusch_vars->rxdataF_comp[nb_rx_ant][symbol * buffer_length],
(c16_t *)&pusch_vars->rxdataF_comp[0][symbol * buffer_length],
(c16_t *)&pusch_vars->rxdataF_comp[nb_rx_ant][symbol * buffer_length],
rxF_ch_maga[0],
rxF_ch_maga[1],
(c16_t*)&llr[0][pusch_vars->llr_offset[symbol]],
(c16_t*)&llr[1][pusch_vars->llr_offset[symbol]],
llr[0],
llr[1],
rho[0][1],
rho[1][0],
pusch_vars->ul_valid_re_per_slot[symbol],
......@@ -1134,15 +1134,15 @@ static void inner_rx(PHY_VARS_gNB *gNB,
}
}
if (nb_layer != 2 || rel15_ul->qam_mod_order > 6)
for (int aatx = 0; aatx < nb_layer; aatx++)
nr_ulsch_compute_llr((int32_t*)&pusch_vars->rxdataF_comp[aatx * nb_rx_ant][symbol * buffer_length],
(int32_t*)rxF_ch_maga[aatx],
(int32_t*)rxF_ch_magb[aatx],
(int32_t*)rxF_ch_magc[aatx],
&llr[aatx][pusch_vars->llr_offset[symbol]],
pusch_vars->ul_valid_re_per_slot[symbol],
symbol,
rel15_ul->qam_mod_order);
for (int aatx = 0; aatx < nb_layer; aatx++)
nr_ulsch_compute_llr((int32_t *)&pusch_vars->rxdataF_comp[aatx * nb_rx_ant][symbol * buffer_length],
(int32_t *)rxF_ch_maga[aatx],
(int32_t *)rxF_ch_magb[aatx],
(int32_t *)rxF_ch_magc[aatx],
llr[aatx],
pusch_vars->ul_valid_re_per_slot[symbol],
symbol,
rel15_ul->qam_mod_order);
}
typedef struct puschSymbolProc_s {
......@@ -1154,7 +1154,6 @@ typedef struct puschSymbolProc_s {
int startSymbol;
int numSymbols;
int16_t *llr;
int16_t **llr_layers;
int16_t *scramblingSequence;
uint32_t nvar;
int beam_nb;
......@@ -1175,6 +1174,12 @@ static void nr_pusch_symbol_processing(void *arg)
if (gNB->pusch_vars[ulsch_id].ul_valid_re_per_slot[symbol] == 0)
continue;
int soffset = (slot % RU_RX_SLOT_DEPTH) * frame_parms->symbols_per_slot * frame_parms->ofdm_symbol_size;
int buffer_length = ceil_mod(pusch_vars->ul_valid_re_per_slot[symbol] * NR_NB_SC_PER_RB, 16);
int16_t llrs[rel15_ul->nrOfLayers][ceil_mod(buffer_length * rel15_ul->qam_mod_order, 64)];
int16_t *llrss[rel15_ul->nrOfLayers];
for (int l = 0; l < rel15_ul->nrOfLayers; l++)
llrss[l] = llrs[l];
inner_rx(gNB,
ulsch_id,
slot,
......@@ -1182,8 +1187,8 @@ static void nr_pusch_symbol_processing(void *arg)
pusch_vars,
rel15_ul,
gNB->common_vars.rxdataF[rdata->beam_nb],
(c16_t**)gNB->pusch_vars[ulsch_id].ul_ch_estimates,
rdata->llr_layers,
(c16_t **)gNB->pusch_vars[ulsch_id].ul_ch_estimates,
llrss,
soffset,
gNB->pusch_vars[ulsch_id].ul_valid_re_per_slot[symbol],
symbol,
......@@ -1192,13 +1197,14 @@ static void nr_pusch_symbol_processing(void *arg)
int nb_re_pusch = gNB->pusch_vars[ulsch_id].ul_valid_re_per_slot[symbol];
// layer de-mapping
int16_t* llr_ptr = &rdata->llr_layers[0][pusch_vars->llr_offset[symbol]];
int16_t *llr_ptr = llrs[0];
if (rel15_ul->nrOfLayers != 1) {
llr_ptr = &rdata->llr[pusch_vars->llr_offset[symbol] * rel15_ul->nrOfLayers];
for (int i = 0; i < (nb_re_pusch); i++)
for (int l = 0; l < rel15_ul->nrOfLayers; l++)
for (int m = 0; m < rel15_ul->qam_mod_order; m++)
llr_ptr[i*rel15_ul->nrOfLayers*rel15_ul->qam_mod_order+l*rel15_ul->qam_mod_order+m] = rdata->llr_layers[l][pusch_vars->llr_offset[symbol] + i*rel15_ul->qam_mod_order+m];
for (int i = 0; i < (nb_re_pusch); i++)
for (int l = 0; l < rel15_ul->nrOfLayers; l++)
for (int m = 0; m < rel15_ul->qam_mod_order; m++)
llr_ptr[i * rel15_ul->nrOfLayers * rel15_ul->qam_mod_order + l * rel15_ul->qam_mod_order + m] =
llrss[l][i * rel15_ul->qam_mod_order + m];
}
// unscrambling
int16_t *llr16 = (int16_t*)&rdata->llr[pusch_vars->llr_offset[symbol] * rel15_ul->nrOfLayers];
......@@ -1499,7 +1505,6 @@ int nr_rx_pusch_tp(PHY_VARS_gNB *gNB,
rdata->numSymbols = task_index == loop_iter - 1 ? rel15_ul->nr_of_symbols - (loop_iter - 1) * numSymbols : numSymbols;
rdata->ulsch_id = ulsch_id;
rdata->llr = pusch_vars->llr;
rdata->llr_layers = pusch_vars->llr_layers;
rdata->scramblingSequence = scramblingSequence;
rdata->nvar = nvar;
rdata->beam_nb = beam_nb;
......
......@@ -79,7 +79,7 @@ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
* Output:
* stream0_out: Output LLRs for 1st stream
*/
void nr_ulsch_qpsk_qpsk(c16_t *stream0_in, c16_t *stream1_in, c16_t *stream0_out, c16_t *rho01, uint32_t length)
void nr_ulsch_qpsk_qpsk(c16_t *stream0_in, c16_t *stream1_in, int16_t *stream0_out, c16_t *rho01, uint32_t length)
{
#ifdef USE_128BIT
simde__m128i *rho01_128i = (simde__m128i *)rho01;
......@@ -610,7 +610,7 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
c16_t *stream1_in,
c16_t *ch_mag,
c16_t *ch_mag_i,
c16_t *stream0_out,
int16_t *stream0_out,
c16_t *rho01,
uint32_t length)
{
......@@ -1076,7 +1076,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
c16_t *stream1_in,
c16_t *ch_mag,
c16_t *ch_mag_i,
c16_t *stream0_out,
int16_t *stream0_out,
c16_t *rho01,
uint32_t length)
{
......@@ -1415,15 +1415,13 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
simde__m128i y2i = simde_mm_subs_epi16(logmax_num_re0, logmax_den_re0);
// Map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
int idx0 = 12 * i;
for (int re = 0; re < 8; re++) {
stream0_out[idx0 + 0].r = ((short *)&y0r)[re];
stream0_out[idx0 + 0].i = ((short *)&y1r)[re];
stream0_out[idx0 + 1].r = ((short *)&y2r)[re];
stream0_out[idx0 + 1].i = ((short *)&y0i)[re];
stream0_out[idx0 + 2].r = ((short *)&y1i)[re];
stream0_out[idx0 + 2].i = ((short *)&y2i)[re];
idx0 += 3;
*stream0_out++ = ((short *)&y0r)[re];
*stream0_out++ = ((short *)&y1r)[re];
*stream0_out++ = ((short *)&y2r)[re];
*stream0_out++ = ((short *)&y0i)[re];
*stream0_out++ = ((short *)&y1i)[re];
*stream0_out++ = ((short *)&y2i)[re];
}
}
#else
......@@ -1763,36 +1761,39 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
simde__m256i y2i = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
// Map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
int idx0 = 24 * i;
for (int re = 0; re < 16; re++) {
stream0_out[idx0 + 0].r = ((short *)&y0r)[re];
stream0_out[idx0 + 0].i = ((short *)&y1r)[re];
stream0_out[idx0 + 1].r = ((short *)&y2r)[re];
stream0_out[idx0 + 1].i = ((short *)&y0i)[re];
stream0_out[idx0 + 2].r = ((short *)&y1i)[re];
stream0_out[idx0 + 2].i = ((short *)&y2i)[re];
idx0 += 3;
*stream0_out++ = ((short *)&y0r)[re];
*stream0_out++ = ((short *)&y1r)[re];
*stream0_out++ = ((short *)&y2r)[re];
*stream0_out++ = ((short *)&y0i)[re];
*stream0_out++ = ((short *)&y1i)[re];
*stream0_out++ = ((short *)&y2i)[re];
}
}
#endif
}
static void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rxdataF_ext_offset, uint8_t mod_order, int shift)
static void nr_ulsch_shift_llr(int16_t *llr_layer0,
int16_t *llr_layer1,
uint32_t nb_re,
uint32_t rxdataF_ext_offset,
uint8_t mod_order,
int shift)
{
simde__m128i *llr_layers0 = (simde__m128i *)&llr_layers[0][rxdataF_ext_offset * mod_order];
simde__m128i *llr_layers1 = (simde__m128i *)&llr_layers[1][rxdataF_ext_offset * mod_order];
simde__m128i *llr_layers0 = (simde__m128i *)llr_layer0;
simde__m128i *llr_layers1 = (simde__m128i *)llr_layer1;
uint8_t mem_offset = ((16 - ((long)llr_layers0)) & 0xF) >> 2;
if (mem_offset > 0) {
c16_t *llr_layers0_c16 = (c16_t *)&llr_layers[0][rxdataF_ext_offset * mod_order];
c16_t *llr_layers1_c16 = (c16_t *)&llr_layers[1][rxdataF_ext_offset * mod_order];
c16_t *llr_layers0_c16 = (c16_t *)llr_layer0;
c16_t *llr_layers1_c16 = (c16_t *)llr_layer1;
for (int i = 0; i < mem_offset; i++) {
llr_layers0_c16[i] = c16Shift(llr_layers0_c16[i], shift);
llr_layers1_c16[i] = c16Shift(llr_layers1_c16[i], shift);
}
llr_layers0 = (simde__m128i *)&llr_layers[0][rxdataF_ext_offset * mod_order + (mem_offset << 1)];
llr_layers1 = (simde__m128i *)&llr_layers[1][rxdataF_ext_offset * mod_order + (mem_offset << 1)];
llr_layers0 = (simde__m128i *)&llr_layer0[mem_offset * 2];
llr_layers1 = (simde__m128i *)&llr_layer1[mem_offset * 2];
}
for (int i = 0; i < nb_re >> 2; i++) {
......@@ -1803,14 +1804,14 @@ static void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rx
void nr_ulsch_compute_ML_llr(NR_gNB_PUSCH *pusch_vars,
uint32_t symbol,
c16_t* rxdataF_comp0,
c16_t* rxdataF_comp1,
c16_t* ul_ch_mag0,
c16_t* ul_ch_mag1,
c16_t* llr_layers0,
c16_t* llr_layers1,
c16_t* rho0,
c16_t* rho1,
c16_t *rxdataF_comp0,
c16_t *rxdataF_comp1,
c16_t *ul_ch_mag0,
c16_t *ul_ch_mag1,
int16_t *llr_layers0,
int16_t *llr_layers1,
c16_t *rho0,
c16_t *rho1,
uint32_t nb_re,
uint8_t mod_order)
{
......@@ -1818,7 +1819,7 @@ void nr_ulsch_compute_ML_llr(NR_gNB_PUSCH *pusch_vars,
case 2:
nr_ulsch_qpsk_qpsk(rxdataF_comp0, rxdataF_comp1, llr_layers0, rho0, nb_re);
nr_ulsch_qpsk_qpsk(rxdataF_comp1, rxdataF_comp0, llr_layers1, rho1, nb_re);
nr_ulsch_shift_llr(pusch_vars->llr_layers, nb_re, pusch_vars->llr_offset[symbol] >> 1, 2, 4);
nr_ulsch_shift_llr((int16_t *)llr_layers0, (int16_t *)llr_layers1, nb_re, pusch_vars->llr_offset[symbol] >> 1, 2, 4);
break;
case 4:
nr_ulsch_qam16_qam16(rxdataF_comp0, rxdataF_comp1, ul_ch_mag0, ul_ch_mag1, llr_layers0, rho0, nb_re);
......
......@@ -318,10 +318,6 @@ typedef struct {
/// \brief llr values.
/// - first index: ? [0..1179743] (hard coded)
int16_t *llr;
/// \brief llr values per layer.
/// - first index: ? [0..3] (hard coded)
/// - first index: ? [0..1179743] (hard coded)
int16_t **llr_layers;
// PTRS symbol index, to be updated every PTRS symbol within a slot.
uint8_t ptrs_symbol_index;
/// bit mask of PT-RS ofdm symbol indicies
......
......@@ -1326,13 +1326,6 @@ int main(int argc, char *argv[])
1,
1);
LOG_M("rxsigF0_llrlayers0.m",
"rxsF0_llrlayers0",
&pusch_vars->llr_layers[0][0],
(nb_symb_sch - 1) * NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,
1,
0);
if (precod_nbr_layers == 2) {
LOG_M("chestF3.m",
......@@ -1348,13 +1341,6 @@ int main(int argc, char *argv[])
nb_symb_sch * (off + (NR_NB_SC_PER_RB * pusch_pdu->rb_size)),
1,
1);
LOG_M("rxsigF0_llrlayers1.m",
"rxsF0_llrlayers1",
&pusch_vars->llr_layers[1][0],
(nb_symb_sch - 1) * NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,
1,
0);
}
if (precod_nbr_layers == 4) {
......@@ -1396,24 +1382,6 @@ int main(int argc, char *argv[])
nb_symb_sch * (off + (NR_NB_SC_PER_RB * pusch_pdu->rb_size)),
1,
1);
LOG_M("rxsigF0_llrlayers1.m",
"rxsF0_llrlayers1",
&pusch_vars->llr_layers[1][0],
(nb_symb_sch - 1) * NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,
1,
0);
LOG_M("rxsigF0_llrlayers2.m",
"rxsF0_llrlayers2",
&pusch_vars->llr_layers[2][0],
(nb_symb_sch - 1) * NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,
1,
0);
LOG_M("rxsigF0_llrlayers3.m",
"rxsF0_llrlayers3",
&pusch_vars->llr_layers[3][0],
(nb_symb_sch - 1) * NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,
1,
0);
}
LOG_M("rxsigF0_llr.m",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment