Commit 58d497e8 authored by Robert Schmidt's avatar Robert Schmidt

Merge remote-tracking branch 'origin/pusch-llr-parallelize' into integration_2023_w39

parents 8e59ef77 fc2432e9
......@@ -1040,6 +1040,7 @@ set(PHY_SRC_UE
${OPENAIR1_DIR}/PHY/NR_REFSIG/dmrs_nr.c
${OPENAIR1_DIR}/PHY/NR_REFSIG/ptrs_nr.c
${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_freq_equalization.c
${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_measurements_gNB.c
${OPENAIR1_DIR}/PHY/TOOLS/file_output.c
${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c
......
......@@ -475,10 +475,12 @@ void init_gNB_Tpool(int inst) {
PHY_VARS_gNB *gNB;
gNB = RC.gNB[inst];
gNB_L1_proc_t *proc = &gNB->proc;
// PUSCH symbols per thread need to be calculated by how many threads we have
gNB->num_pusch_symbols_per_thread = 1;
// ULSCH decoding threadpool
initTpool(get_softmodem_params()->threadPoolConfig, &gNB->threadPool, cpumeas(CPUMEAS_GETSTATE));
// ULSCH decoder result FIFO
initNotifiedFIFO(&gNB->respPuschSymb);
initNotifiedFIFO(&gNB->respDecode);
// L1 RX result FIFO
......
......@@ -28,6 +28,7 @@
#include "PHY/CODING/nrPolar_tools/nr_polar_pbch_defs.h"
#include "PHY/NR_TRANSPORT/nr_transport_proto.h"
#include "PHY/NR_TRANSPORT/nr_transport_common_proto.h"
#include "PHY/NR_ESTIMATION/nr_ul_estimation.h"
#include "openair1/PHY/MODULATION/nr_modulation.h"
#include "openair1/PHY/defs_RU.h"
#include "openair1/PHY/CODING/nrLDPC_extern.h"
......@@ -525,6 +526,8 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB)
init_scrambling_luts();
init_pucch2_luts();
nr_init_fde(); // Init array for frequency equalization of transform precoding of PUSCH
load_nrLDPClib(NULL);
if (gNB->ldpc_offload_flag)
......@@ -683,7 +686,7 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB)
int n_buf = Prx*max_ul_mimo_layers;
int nb_re_pusch = N_RB_UL * NR_NB_SC_PER_RB;
int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
int nb_re_pusch2 = (nb_re_pusch + 7) & ~7;
gNB->pusch_vars = (NR_gNB_PUSCH *)malloc16_clear(gNB->max_nb_pusch * sizeof(NR_gNB_PUSCH));
for (int ULSCH_id = 0; ULSCH_id < gNB->max_nb_pusch; ULSCH_id++) {
......
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
#include "PHY/defs_eNB.h"
#include "PHY/sse_intrin.h"
#include "PHY/NR_ESTIMATION/nr_ul_estimation.h"
// Reference of openair1/PHY/LTE_ESTIMATION/freq_equalization.c
// This is 4096/(1:4096) in simde__m128i format
static simde__m128i nr_inv_ch[4096]; /* = {0, 4096/1, 4096/2, 4096/3, 4096/4...}*/
void nr_init_fde()
{
for (int i = 1;i < 4096; i++)
nr_inv_ch[i] = simde_mm_set1_epi16(4096/i);
}
void nr_freq_equalization (NR_DL_FRAME_PARMS *frame_parms,
int32_t *rxdataF_comp,
int32_t *ul_ch_mag,
int32_t *ul_ch_magb,
uint8_t symbol,
uint16_t Msc_RS,
uint8_t Qm)
{
simde__m128i *rxdataF_comp128 = (simde__m128i *)rxdataF_comp;
simde__m128i *ul_ch_mag128 = (simde__m128i *)ul_ch_mag;
simde__m128i *ul_ch_magb128 = (simde__m128i *)ul_ch_magb;
AssertFatal(symbol < frame_parms->symbols_per_slot, "symbol %d >= %d\n",
symbol, frame_parms->symbols_per_slot);
AssertFatal(Msc_RS <= frame_parms->N_RB_UL*12, "Msc_RS %d >= %d\n",
Msc_RS, frame_parms->N_RB_UL*12);
for (uint16_t re = 0; re < (Msc_RS >> 2); re++) {
int16_t amp = (*((int16_t*)&ul_ch_mag128[re]));
if (amp > 4095)
amp = 4095;
rxdataF_comp128[re] = simde_mm_srai_epi16(simde_mm_mullo_epi16(rxdataF_comp128[re],nr_inv_ch[amp]),3);
if (Qm == 4)
ul_ch_mag128[re] = simde_mm_set1_epi16(324); // this is 512*2/sqrt(10)
else if (Qm == 6) {
ul_ch_mag128[re] = simde_mm_set1_epi16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = simde_mm_set1_epi16(158); // this is 512*2/sqrt(42)
} else if(Qm != 2)
AssertFatal(1, "nr_freq_equalization(), Qm = %d, should be 2, 4 or 6. symbol=%d, Msc_RS=%d\n", Qm, symbol, Msc_RS);
}
}
......@@ -486,6 +486,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
* 2) Interpolate PTRS estimated value in TD after all PTRS symbols
* 3) Compensated DMRS based estimated signal with PTRS estimation for slot
*********************************************************************/
// #define DEBUG_UL_PTRS
void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
NR_DL_FRAME_PARMS *frame_parms,
nfapi_nr_pusch_pdu_t *rel15_ul,
......@@ -495,7 +496,6 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
uint32_t nb_re_pusch)
{
NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ulsch_id];
//#define DEBUG_UL_PTRS 1
int32_t *ptrs_re_symbol = NULL;
int8_t ret = 0;
uint8_t symbInSlot = rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols;
......@@ -573,13 +573,6 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
}
}
#ifdef DEBUG_UL_PTRS
LOG_M("ptrsEstUl.m", "est", pusch_vars->ptrs_phase_per_slot[aarx], frame_parms->symbols_per_slot, 1, 1);
LOG_M("rxdataF_bf_ptrs_comp_ul.m","bf_ptrs_cmp",
&gNB->pusch_vars[0]->rxdataF_comp[aarx][rel15_ul->start_symbol_index * NR_NB_SC_PER_RB * rel15_ul->rb_size],
rel15_ul->nr_of_symbols * NR_NB_SC_PER_RB * rel15_ul->rb_size,1,1);
#endif
/*------------------------------------------------------------------------------------------------------- */
/* 3) Compensated DMRS based estimated signal with PTRS estimation */
/*--------------------------------------------------------------------------------------------------------*/
......@@ -590,15 +583,15 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB,
#ifdef DEBUG_UL_PTRS
printf("[PHY][UL][PTRS]: Rotate Symbol %2d with %d + j* %d\n", i, phase_per_symbol[i].r,phase_per_symbol[i].i);
#endif
rotate_cpx_vector((c16_t *)&pusch_vars->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)],
rotate_cpx_vector((c16_t *)&pusch_vars->rxdataF_comp[aarx][i * nb_re_pusch],
&phase_per_symbol[i],
(c16_t *)&pusch_vars->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)],
(c16_t *)&pusch_vars->rxdataF_comp[aarx][i * nb_re_pusch],
((*nb_rb) * NR_NB_SC_PER_RB),
15);
}// if not DMRS Symbol
}// symbol loop
}// last symbol check
}//Antenna loop
} // if not DMRS Symbol
} // symbol loop
} // last symbol check
} // Antenna loop
}
uint32_t calc_power(const int16_t *x, const uint32_t size) {
......
......@@ -83,4 +83,15 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
int32_t srs_estimated_channel_time_shifted[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size],
int8_t *snr_per_rb,
int8_t *snr);
void nr_freq_equalization(NR_DL_FRAME_PARMS *frame_parms,
int *rxdataF_comp,
int *ul_ch_mag,
int *ul_ch_mag_b,
unsigned char symbol,
unsigned short Msc_RS,
unsigned char Qm);
void nr_init_fde(void);
#endif
......@@ -76,3 +76,21 @@ void nr_codeword_unscrambling(int16_t* llr, uint32_t size, uint8_t q, uint32_t N
}
#endif
}
void nr_codeword_unscrambling_init(int16_t *s2, uint32_t size, uint8_t q, uint32_t Nid, uint32_t n_RNTI)
{
uint32_t x1;
uint32_t x2 = (n_RNTI << 15) + (q << 14) + Nid;
simde__m128i *s128=(simde__m128i *)s2;
uint32_t s = lte_gold_generic(&x1, &x2, 1);
uint8_t *s8=(uint8_t *)&s;
for (int i = 0; i < (size +31) >> 5; i++) {
*s128++ = byte2m128i[s8[0]];
*s128++ = byte2m128i[s8[1]];
*s128++ = byte2m128i[s8[2]];
*s128++ = byte2m128i[s8[3]];
s = lte_gold_generic(&x1, &x2, 0);
}
}
......@@ -73,6 +73,7 @@ void nr_codeword_scrambling(uint8_t *in,
uint32_t* out);
void nr_codeword_unscrambling(int16_t* llr, uint32_t size, uint8_t q, uint32_t Nid, uint32_t n_RNTI);
void nr_codeword_unscrambling_init(int16_t *s, uint32_t size, uint8_t q, uint32_t Nid, uint32_t n_RNTI);
/**@}*/
......
......@@ -125,83 +125,12 @@ void free_gNB_dlsch(NR_gNB_DLSCH_t *dlsch, uint16_t N_RB, const NR_DL_FRAME_PARM
@param slot Slot number
@param harq_pid HARQ process ID
*/
void nr_rx_pusch(PHY_VARS_gNB *gNB,
uint8_t UE_id,
int nr_rx_pusch_tp(PHY_VARS_gNB *gNB,
uint8_t ulsch_id,
uint32_t frame,
uint8_t slot,
unsigned char harq_pid);
/** \brief This function performs RB extraction (signal and channel estimates) (currently signal only until channel estimation and compensation are implemented)
@param rxdataF pointer to the received frequency domain signal
@param rxdataF_ext pointer to the extracted frequency domain signal
@param rb_alloc RB allocation map (used for Resource Allocation Type 0 in NR)
@param symbol Symbol on which to act (within-in nr_TTI_rx)
@param start_rb The starting RB in the RB allocation (used for Resource Allocation Type 1 in NR)
@param nb_rb_pusch The number of RBs allocated (used for Resource Allocation Type 1 in NR)
@param frame_parms, Pointer to frame descriptor structure
*/
void nr_ulsch_extract_rbs(c16_t **rxdataF,
NR_gNB_PUSCH *pusch_vars,
int slot,
unsigned char symbol,
uint8_t is_dmrs_symbol,
nfapi_nr_pusch_pdu_t *pusch_pdu,
NR_DL_FRAME_PARMS *frame_parms);
void nr_ulsch_scale_channel(int32_t **ul_ch_estimates_ext,
NR_DL_FRAME_PARMS *frame_parms,
NR_gNB_ULSCH_t *ulsch_gNB,
uint8_t symbol,
uint8_t is_dmrs_symbol,
uint32_t len,
uint8_t nrOfLayers,
uint16_t nb_rb,
int shift_ch_ext);
/** \brief This function computes the average channel level over all allocated RBs and antennas (TX/RX) in order to compute output shift for compensated signal
@param ul_ch_estimates_ext Channel estimates in allocated RBs
@param frame_parms Pointer to frame descriptor
@param avg Pointer to average signal strength
@param pilots_flag Flag to indicate pilots in symbol
@param nb_rb Number of allocated RBs
*/
void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
NR_DL_FRAME_PARMS *frame_parms,
int32_t *avg,
uint8_t symbol,
uint32_t len,
uint8_t nrOfLayers,
unsigned short nb_rb);
/** \brief This function performs channel compensation (matched filtering) on the received RBs for this allocation. In addition, it computes the squared-magnitude of the channel with weightings for 16QAM/64QAM detection as well as dual-stream detection (cross-correlation)
@param rxdataF_ext Frequency-domain received signal in RBs to be demodulated
@param ul_ch_estimates_ext Frequency-domain channel estimates in RBs to be demodulated
@param ul_ch_mag First Channel magnitudes (16QAM/64QAM/256QAM)
@param ul_ch_magb Second weighted Channel magnitudes (64QAM/256QAM)
@param ul_ch_magc Third weighted Channel magnitudes (256QAM)
@param rxdataF_comp Compensated received waveform
@param frame_parms Pointer to frame descriptor
@param symbol Symbol on which to operate
@param Qm Modulation order of allocation
@param nb_rb Number of RBs in allocation
@param output_shift Rescaling for compensated output (should be energy-normalizing)
*/
void nr_ulsch_channel_compensation(int **rxdataF_ext,
int **ul_ch_estimates_ext,
int **ul_ch_mag,
int **ul_ch_magb,
int **ul_ch_magc,
int **rxdataF_comp,
int ***rho,
NR_DL_FRAME_PARMS *frame_parms,
unsigned char symbol,
int length,
uint8_t is_dmrs_symbol,
unsigned char mod_order,
uint8_t nrOfLayers,
unsigned short nb_rb,
unsigned char output_shift);
/*!
\brief This function implements the idft transform precoding in PUSCH
\param z Pointer to input in frequnecy domain, and it is also the output in time domain
......@@ -209,6 +138,28 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
*/
void nr_idft(int32_t *z, uint32_t Msc_PUSCH);
void nr_ulsch_qpsk_qpsk(c16_t *stream0_in,
c16_t *stream1_in,
c16_t *stream0_out,
c16_t *rho01,
uint32_t length);
void nr_ulsch_qam16_qam16(c16_t *stream0_in,
c16_t *stream1_in,
c16_t *ch_mag,
c16_t *ch_mag_i,
c16_t *stream0_out,
c16_t *rho01,
uint32_t length);
void nr_ulsch_qam64_qam64(c16_t *stream0_in,
c16_t *stream1_in,
c16_t *ch_mag,
c16_t *ch_mag_i,
c16_t *stream0_out,
c16_t *rho01,
uint32_t length);
/** \brief This function generates log-likelihood ratios (decoder input) for single-stream QPSK received waveforms.
@param rxdataF_comp Compensated channel output
@param ulsch_llr llr output
......@@ -231,11 +182,9 @@ void nr_ulsch_qpsk_llr(int32_t *rxdataF_comp,
void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
int32_t **ul_ch_mag,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol);
/** \brief This function generates log-likelihood ratios (decoder input) for single-stream 64 QAM received waveforms.
@param rxdataF_comp Compensated channel output
@param ul_ch_mag uplink channel magnitude multiplied by the 1st amplitude threshold in QAM 64
......@@ -248,7 +197,6 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
int32_t **ul_ch_mag,
int32_t **ul_ch_magb,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol);
......@@ -266,7 +214,6 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
int32_t **ul_ch_magb,
int32_t **ul_ch_magc,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol);
......@@ -284,7 +231,6 @@ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
int32_t *ul_ch_magb,
int32_t *ul_ch_magc,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol,
uint8_t mod_order);
......@@ -292,15 +238,17 @@ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
void reset_active_stats(PHY_VARS_gNB *gNB, int frame);
void reset_active_ulsch(PHY_VARS_gNB *gNB, int frame);
void nr_ulsch_compute_ML_llr(int32_t **rxdataF_comp,
int32_t **ul_ch_mag,
int32_t ***rho,
int16_t **llr_layers,
uint8_t nb_antennas_rx,
uint32_t rb_size,
void nr_ulsch_compute_ML_llr(NR_gNB_PUSCH *pusch_vars,
uint32_t symbol,
c16_t* rxdataF_comp0,
c16_t* rxdataF_comp1,
c16_t* ul_ch_mag0,
c16_t* ul_ch_mag1,
c16_t* llr_layers0,
c16_t* llr_layers1,
c16_t* rho0,
c16_t* rho1,
uint32_t nb_re,
uint8_t symbol,
uint32_t rxdataF_ext_offset,
uint8_t mod_order);
void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rxdataF_ext_offset, uint8_t mod_order, int shift);
......
......@@ -122,7 +122,6 @@ NR_gNB_ULSCH_t new_gNB_ulsch(uint8_t max_ldpc_iterations, uint16_t N_RB_UL)
static void nr_processULSegment(void *arg)
{
ldpcDecode_t *rdata = (ldpcDecode_t *)arg;
PHY_VARS_gNB *phy_vars_gNB = rdata->gNB;
NR_UL_gNB_HARQ_t *ulsch_harq = rdata->ulsch_harq;
t_nrLDPC_dec_params *p_decoderParms = &rdata->decoderParms;
int length_dec;
......@@ -156,8 +155,6 @@ static void nr_processULSegment(void *arg)
t_nrLDPC_time_stats procTime = {0};
t_nrLDPC_time_stats *p_procTime = &procTime;
// start_meas(&phy_vars_gNB->ulsch_deinterleaving_stats);
////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////// nr_deinterleaving_ldpc ///////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
......@@ -172,7 +169,6 @@ static void nr_processULSegment(void *arg)
// for (int i =0; i<16; i++)
// printf("rx output deinterleaving w[%d]= %d r_offset %d\n", i,ulsch_harq->w[r][i], r_offset);
stop_meas(&phy_vars_gNB->ulsch_deinterleaving_stats);
//////////////////////////////////////////////////////////////////////////////////////////
......@@ -182,7 +178,6 @@ static void nr_processULSegment(void *arg)
///////////////////////// ulsch_harq->e =====> ulsch_harq->d /////////////////////////
// start_meas(&phy_vars_gNB->ulsch_rate_unmatching_stats);
if (nr_rate_matching_ldpc_rx(rdata->tbslbrm,
p_decoderParms->BG,
......@@ -196,13 +191,10 @@ static void nr_processULSegment(void *arg)
ulsch_harq->F,
Kr - ulsch_harq->F - 2 * (p_decoderParms->Z))
== -1) {
stop_meas(&phy_vars_gNB->ulsch_rate_unmatching_stats);
LOG_E(PHY, "ulsch_decoding.c: Problem in rate_matching\n");
rdata->decodeIterations = max_ldpc_iterations + 1;
return;
} else {
stop_meas(&phy_vars_gNB->ulsch_rate_unmatching_stats);
}
ulsch_harq->d_to_be_cleared[r] = false;
......@@ -221,7 +213,6 @@ static void nr_processULSegment(void *arg)
length_dec = (ulsch_harq->B + 24 * ulsch_harq->C) / ulsch_harq->C;
}
// start_meas(&phy_vars_gNB->ulsch_ldpc_decoding_stats);
// set first 2*Z_c bits to zeros
memset(&z[0], 0, 2 * ulsch_harq->Z * sizeof(int16_t));
......@@ -248,7 +239,6 @@ static void nr_processULSegment(void *arg)
if (rdata->decodeIterations <= p_decoderParms->numMaxIter)
memcpy(ulsch_harq->c[r],llrProcBuf, Kr>>3);
//stop_meas(&phy_vars_gNB->ulsch_ldpc_decoding_stats);
}
int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
......
......@@ -11,11 +11,6 @@
#include <openair1/PHY/TOOLS/phy_scope_interface.h>
#include "PHY/sse_intrin.h"
//#define DEBUG_CH_COMP
//#define DEBUG_RB_EXT
//#define DEBUG_CH_MAG
//#define ML_DEBUG
#define INVALID_VALUE 255
void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
......@@ -281,103 +276,111 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
}
void nr_ulsch_extract_rbs(c16_t **rxdataF,
NR_gNB_PUSCH *pusch_vars,
int slot,
unsigned char symbol,
uint8_t is_dmrs_symbol,
static void nr_ulsch_extract_rbs (c16_t* const rxdataF,
c16_t* const chF,
c16_t *rxFext,
c16_t *chFext,
int rxoffset,
int choffset,
int aarx,
int is_dmrs_symbol,
nfapi_nr_pusch_pdu_t *pusch_pdu,
NR_DL_FRAME_PARMS *frame_parms) {
unsigned short start_re, re, nb_re_pusch;
unsigned char aarx, aatx;
uint32_t rxF_ext_index = 0;
uint32_t ul_ch0_ext_index = 0;
uint32_t ul_ch0_index = 0;
int16_t *rxF,*rxF_ext;
int *ul_ch0,*ul_ch0_ext;
int soffset = (slot&3)*frame_parms->symbols_per_slot*frame_parms->ofdm_symbol_size;
#ifdef DEBUG_RB_EXT
printf("--------------------symbol = %d-----------------------\n", symbol);
printf("--------------------ch_ext_index = %d-----------------------\n", symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size);
#endif
uint8_t is_data_re;
start_re = (frame_parms->first_carrier_offset + (pusch_pdu->rb_start + pusch_pdu->bwp_start) * NR_NB_SC_PER_RB)%frame_parms->ofdm_symbol_size;
nb_re_pusch = NR_NB_SC_PER_RB * pusch_pdu->rb_size;
NR_DL_FRAME_PARMS *frame_parms)
{
uint8_t delta = 0;
int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
int start_re = (frame_parms->first_carrier_offset + (pusch_pdu->rb_start + pusch_pdu->bwp_start) * NR_NB_SC_PER_RB)%frame_parms->ofdm_symbol_size;
int nb_re_pusch = NR_NB_SC_PER_RB * pusch_pdu->rb_size;
for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
rxF = (int16_t *)&rxdataF[aarx][soffset+(symbol * frame_parms->ofdm_symbol_size)];
rxF_ext = (int16_t *)&pusch_vars->rxdataF_ext[aarx][symbol * nb_re_pusch2]; // [hna] rxdataF_ext isn't contiguous in order to solve an alignment problem ib llr computation in case of mod_order = 4, 6
c16_t *rxF = &rxdataF[rxoffset];
c16_t *rxF_ext = &rxFext[0];
c16_t *ul_ch0 = &chF[choffset];
c16_t *ul_ch0_ext = &chFext[0];
if (is_dmrs_symbol == 0) {
if (start_re + nb_re_pusch <= frame_parms->ofdm_symbol_size) {
memcpy((void*)rxF_ext, (void*)&rxF[start_re*2], nb_re_pusch*sizeof(int32_t));
} else {
int neg_length = frame_parms->ofdm_symbol_size-start_re;
int pos_length = nb_re_pusch-neg_length;
memcpy((void*)rxF_ext,(void*)&rxF[start_re*2],neg_length*sizeof(int32_t));
memcpy((void*)&rxF_ext[2*neg_length],(void*)rxF,pos_length*sizeof(int32_t));
if (start_re + nb_re_pusch <= frame_parms->ofdm_symbol_size)
memcpy(rxF_ext, &rxF[start_re], nb_re_pusch * sizeof(c16_t));
else
{
int neg_length = frame_parms->ofdm_symbol_size - start_re;
int pos_length = nb_re_pusch - neg_length;
memcpy(rxF_ext, &rxF[start_re], neg_length * sizeof(c16_t));
memcpy(&rxF_ext[neg_length], rxF, pos_length * sizeof(c16_t));
}
for (aatx = 0; aatx < pusch_pdu->nrOfLayers; aatx++) {
ul_ch0 = &pusch_vars->ul_ch_estimates[aatx*frame_parms->nb_antennas_rx+aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*nb_re_pusch2];
memcpy((void*)ul_ch0_ext,(void*)ul_ch0,nb_re_pusch*sizeof(int32_t));
memcpy(ul_ch0_ext, ul_ch0, nb_re_pusch * sizeof(c16_t));
}
} else {
for (aatx = 0; aatx < pusch_pdu->nrOfLayers; aatx++) {
ul_ch0 = &pusch_vars->ul_ch_estimates[aatx*frame_parms->nb_antennas_rx+aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*nb_re_pusch2];
rxF_ext_index = 0;
ul_ch0_ext_index = 0;
ul_ch0_index = 0;
for (re = 0; re < nb_re_pusch; re++) {
uint16_t k = start_re + re;
is_data_re = allowed_xlsch_re_in_dmrs_symbol(k, start_re, frame_parms->ofdm_symbol_size, pusch_pdu->num_dmrs_cdm_grps_no_data, pusch_pdu->dmrs_config_type);
if (++k >= frame_parms->ofdm_symbol_size) {
k -= frame_parms->ofdm_symbol_size;
else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1) // 6 REs / PRB
{
AssertFatal(delta == 0 || delta == 1, "Illegal delta %d\n",delta);
c16_t *rxF32 = &rxF[start_re];
if (start_re + nb_re_pusch < frame_parms->ofdm_symbol_size) {
for (int idx = 1 - delta; idx < nb_re_pusch; idx += 2)
{
*rxF_ext++ = rxF32[idx];
*ul_ch0_ext++ = ul_ch0[idx];
}
#ifdef DEBUG_RB_EXT
printf("re = %d, is_dmrs_symbol = %d, symbol = %d\n", re, is_dmrs_symbol, symbol);
#endif
// save only data and respective channel estimates
if (is_data_re == 1) {
if (aatx == 0) {
rxF_ext[rxF_ext_index] = (rxF[ ((start_re + re)*2) % (frame_parms->ofdm_symbol_size*2)]);
rxF_ext[rxF_ext_index + 1] = (rxF[(((start_re + re)*2) + 1) % (frame_parms->ofdm_symbol_size*2)]);
rxF_ext_index +=2;
}
ul_ch0_ext[ul_ch0_ext_index] = ul_ch0[ul_ch0_index];
ul_ch0_ext_index++;
#ifdef DEBUG_RB_EXT
printf("dmrs symb %d: rxF_ext[%u] = (%d,%d), ul_ch0_ext[%u] = (%d,%d)\n",
is_dmrs_symbol,rxF_ext_index>>1, rxF_ext[rxF_ext_index],rxF_ext[rxF_ext_index+1],
ul_ch0_ext_index, ((int16_t*)&ul_ch0_ext[ul_ch0_ext_index])[0], ((int16_t*)&ul_ch0_ext[ul_ch0_ext_index])[1]);
#endif
else // handle the two pieces around DC
{
int neg_length = frame_parms->ofdm_symbol_size - start_re;
int pos_length = nb_re_pusch - neg_length;
int idx, idx2;
for (idx = 1 - delta; idx < neg_length; idx += 2)
{
*rxF_ext++ = rxF32[idx];
*ul_ch0_ext++= ul_ch0[idx];
}
ul_ch0_index++;
rxF32 = rxF;
idx2 = idx;
for (idx = 1 - delta; idx < pos_length; idx += 2, idx2 += 2)
{
*rxF_ext++ = rxF32[idx];
*ul_ch0_ext++ = ul_ch0[idx2];
}
}
}
else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type2) // 8 REs / PRB
{
AssertFatal(delta==0||delta==2||delta==4,"Illegal delta %d\n",delta);
if (start_re + nb_re_pusch < frame_parms->ofdm_symbol_size)
{
for (int idx = 0; idx < nb_re_pusch; idx ++)
{
if (idx % 6 == 2 * delta || idx % 6 == 2 * delta + 1)
continue;
*rxF_ext++ = rxF[idx];
*ul_ch0_ext++ = ul_ch0[idx];
}
}
else
{
int neg_length = frame_parms->ofdm_symbol_size - start_re;
int pos_length = nb_re_pusch - neg_length;
c16_t *rxF64 = &rxF[start_re];
int idx, idx2;
for (idx = 0; idx < neg_length; idx ++)
{
if (idx % 6 == 2 * delta || idx % 6 == 2 * delta + 1)
continue;
*rxF_ext++ = rxF64[idx];
*ul_ch0_ext++ = ul_ch0[idx];
}
rxF64 = rxF;
idx2 = idx;
for (idx = 0; idx < pos_length; idx++, idx2++)
{
if (idx % 6 == 2 * delta || idx % 6 == 2 * delta + 1)
continue;
*rxF_ext++ = rxF64[idx];
*ul_ch0_ext++ = ul_ch0[idx2];
}
}
}
}
void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
static void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
NR_DL_FRAME_PARMS *frame_parms,
NR_gNB_ULSCH_t *ulsch_gNB,
uint8_t symbol,
uint8_t is_dmrs_symbol,
uint32_t len,
......@@ -400,60 +403,55 @@ void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
simde__m128i ch_amp128 = simde_mm_set1_epi16(ch_amp); // Q3.13
LOG_D(PHY, "Scaling PUSCH Chest in OFDM symbol %d by %d, pilots %d nb_rb %d NCP %d symbol %d\n", symbol, ch_amp, is_dmrs_symbol, nb_rb, frame_parms->Ncp, symbol);
uint32_t nb_rb_0 = len / 12 + ((len % 12) ? 1 : 0);
int off = ((nb_rb & 1) == 1) ? 4 : 0;
for (int aatx = 0; aatx < nrOfLayers; aatx++) {
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
simde__m128i *ul_ch128 = (simde__m128i *)&ul_ch_estimates_ext[aatx * frame_parms->nb_antennas_rx + aarx][symbol * (off + (nb_rb * NR_NB_SC_PER_RB))];
for (int rb = 0; rb < nb_rb_0; rb++) {
ul_ch128[0] = simde_mm_mulhi_epi16(ul_ch128[0], ch_amp128);
ul_ch128[0] = simde_mm_slli_epi16(ul_ch128[0], b);
ul_ch128[1] = simde_mm_mulhi_epi16(ul_ch128[1], ch_amp128);
ul_ch128[1] = simde_mm_slli_epi16(ul_ch128[1], b);
ul_ch128[2] = simde_mm_mulhi_epi16(ul_ch128[2], ch_amp128);
ul_ch128[2] = simde_mm_slli_epi16(ul_ch128[2], b);
ul_ch128 += 3;
simde__m128i *ul_ch128 = (simde__m128i *)&ul_ch_estimates_ext[aatx * frame_parms->nb_antennas_rx + aarx][symbol * len];
for (int i = 0; i < len >> 2; i++) {
ul_ch128[i] = simde_mm_mulhi_epi16(ul_ch128[i], ch_amp128);
ul_ch128[i] = simde_mm_slli_epi16(ul_ch128[i], b);
}
}
}
}
//compute average channel_level on each (TX,RX) antenna pair
void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
static int get_nb_re_pusch (NR_DL_FRAME_PARMS *frame_parms, nfapi_nr_pusch_pdu_t *rel15_ul,int symbol)
{
uint8_t dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
if (dmrs_symbol_flag == 1) {
if ((rel15_ul->ul_dmrs_symb_pos >> ((symbol + 1) % frame_parms->symbols_per_slot)) & 0x01)
AssertFatal(1==0,"Double DMRS configuration is not yet supported\n");
if (rel15_ul->dmrs_config_type == 0) {
// if no data in dmrs cdm group is 1 only even REs have no data
// if no data in dmrs cdm group is 2 both odd and even REs have no data
return(rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*6)));
}
else return(rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*4)));
} else return(rel15_ul->rb_size * NR_NB_SC_PER_RB);
}
// compute average channel_level on each (TX,RX) antenna pair
static void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
NR_DL_FRAME_PARMS *frame_parms,
int32_t *avg,
uint8_t symbol,
uint32_t len,
uint8_t nrOfLayers,
unsigned short nb_rb)
uint8_t nrOfLayers)
{
short rb;
unsigned char aatx, aarx;
simde__m128i *ul_ch128, avg128U;
int16_t x = factor2(len);
int16_t y = (len)>>x;
uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
int off = ((nb_rb&1) == 1)? 4:0;
for (aatx = 0; aatx < nrOfLayers; aatx++) {
for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
for (int aatx = 0; aatx < nrOfLayers; aatx++) {
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
//clear average level
avg128U = simde_mm_setzero_si128();
ul_ch128=(simde__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
ul_ch128 = (simde__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol * len];
for (rb = 0; rb < nb_rb_0; rb++) {
avg128U = simde_mm_add_epi32(avg128U, simde_mm_srai_epi32(simde_mm_madd_epi16(ul_ch128[0], ul_ch128[0]), x));
avg128U = simde_mm_add_epi32(avg128U, simde_mm_srai_epi32(simde_mm_madd_epi16(ul_ch128[1], ul_ch128[1]), x));
avg128U = simde_mm_add_epi32(avg128U, simde_mm_srai_epi32(simde_mm_madd_epi16(ul_ch128[2], ul_ch128[2]), x));
ul_ch128+=3;
for (int i = 0; i < len >> 2; i++) {
avg128U = simde_mm_add_epi32(avg128U, simde_mm_srai_epi32(simde_mm_madd_epi16(ul_ch128[i], ul_ch128[i]), x));
}
avg[aatx*frame_parms->nb_antennas_rx+aarx] = (((int32_t*)&avg128U)[0] +
......@@ -465,298 +463,109 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
simde_mm_empty();
simde_m_empty();
}
static simde__m128i a_mult_conjb(simde__m128i a, simde__m128i b, unsigned char output_shift)
{
simde__m128i mmtmpD0 = simde_mm_madd_epi16(b, a);
simde__m128i mmtmpD1 = simde_mm_shufflelo_epi16(b, SIMDE_MM_SHUFFLE(2, 3, 0, 1));
mmtmpD1 = simde_mm_shufflehi_epi16(mmtmpD1, SIMDE_MM_SHUFFLE(2, 3, 0, 1));
mmtmpD1 = simde_mm_sign_epi16(mmtmpD1, *(simde__m128i *)&conjugate[0]);
mmtmpD1 = simde_mm_madd_epi16(mmtmpD1, a);
mmtmpD0 = simde_mm_srai_epi32(mmtmpD0, output_shift);
mmtmpD1 = simde_mm_srai_epi32(mmtmpD1, output_shift);
simde__m128i mmtmpD2 = simde_mm_unpacklo_epi32(mmtmpD0, mmtmpD1);
simde__m128i mmtmpD3 = simde_mm_unpackhi_epi32(mmtmpD0, mmtmpD1);
return simde_mm_packs_epi32(mmtmpD2, mmtmpD3);
}
//==============================================================================================
// Pre-processing for LLR computation
//==============================================================================================
void nr_ulsch_channel_compensation(int **rxdataF_ext,
int **ul_ch_estimates_ext,
int **ul_ch_mag,
int **ul_ch_magb,
int **ul_ch_magc,
int **rxdataF_comp,
int ***rho,
static void nr_ulsch_channel_compensation(c16_t *rxFext,
c16_t *chFext,
c16_t *ul_ch_maga,
c16_t *ul_ch_magb,
c16_t *ul_ch_magc,
int32_t **rxComp,
c16_t *rho,
NR_DL_FRAME_PARMS *frame_parms,
unsigned char symbol,
int length,
uint8_t is_dmrs_symbol,
unsigned char mod_order,
uint8_t nrOfLayers,
unsigned short nb_rb,
unsigned char output_shift) {
int off = ((nb_rb&1) == 1)? 4:0;
#ifdef DEBUG_CH_COMP
int16_t *rxF, *ul_ch;
int prnt_idx;
for (int nl=0; nl<nrOfLayers; nl++) {
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
rxF = (int16_t *) &rxdataF_ext[aarx][symbol * (off + (nb_rb * 12))];
ul_ch = (int16_t *) &ul_ch_estimates_ext[nl * frame_parms->nb_antennas_rx + aarx][symbol * (off + (nb_rb * 12))];
printf("--------symbol = %d, mod_order = %d, output_shift = %d, layer %i, antenna rx = %d -----------\n",
symbol, mod_order, output_shift, nl, aarx);
printf("----------------Before compensation------------------\n");
for (prnt_idx = 0; prnt_idx < 12 * 5 * 2; prnt_idx += 2) {
printf("rxF[%d] = (%d,%d)\n", prnt_idx >> 1, rxF[prnt_idx], rxF[prnt_idx + 1]);
printf("ul_ch[%d] = (%d,%d)\n", prnt_idx >> 1, ul_ch[prnt_idx], ul_ch[prnt_idx + 1]);
}
}
}
#endif
#ifdef DEBUG_CH_MAG
int16_t *ch_mag;
int print_idx;
for (int ant=0; ant<frame_parms->nb_antennas_rx; ant++) {
ch_mag = (int16_t *)&ul_ch_mag[ant][symbol*(off+(nb_rb*12))];
printf("--------------------symbol = %d, mod_order = %d-----------------------\n", symbol, mod_order);
printf("----------------Before computation------------------\n");
for (print_idx=0;print_idx<5;print_idx++){
printf("ch_mag[%d] = %d\n", print_idx, ch_mag[print_idx]);
}
}
#endif
nfapi_nr_pusch_pdu_t* rel15_ul,
uint32_t symbol,
uint32_t buffer_length,
uint32_t output_shift)
{
int mod_order = rel15_ul->qam_mod_order;
int nrOfLayers = rel15_ul->nrOfLayers;
int nb_rx_ant = frame_parms->nb_antennas_rx;
unsigned short rb;
unsigned char aatx,aarx;
simde__m128i *ul_ch128,*ul_ch128_2,*ul_ch_mag128,*ul_ch_mag128b,*ul_ch_mag128c,*rxdataF128,*rxdataF_comp128,*rho128;
simde__m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128={0},QAM_amp128b={0},QAM_amp128c={0};
QAM_amp128b = simde_mm_setzero_si128();
simde__m256i QAM_ampa_256 = simde_mm256_setzero_si256();
simde__m256i QAM_ampb_256 = simde_mm256_setzero_si256();
simde__m256i QAM_ampc_256 = simde_mm256_setzero_si256();
uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
for (aatx=0; aatx<nrOfLayers; aatx++) {
if (mod_order == 4) {
QAM_amp128 = simde_mm_set1_epi16(QAM16_n1); // 2/sqrt(10)
QAM_amp128b = simde_mm_setzero_si128();
QAM_amp128c = simde_mm_setzero_si128();
QAM_ampa_256 = simde_mm256_set1_epi16(QAM16_n1);
QAM_ampb_256 = simde_mm256_setzero_si256();
QAM_ampc_256 = simde_mm256_setzero_si256();
}
else if (mod_order == 6) {
QAM_amp128 = simde_mm_set1_epi16(QAM64_n1); //
QAM_amp128b = simde_mm_set1_epi16(QAM64_n2);
QAM_amp128c = simde_mm_setzero_si128();
QAM_ampa_256 = simde_mm256_set1_epi16(QAM64_n1);
QAM_ampb_256 = simde_mm256_set1_epi16(QAM64_n2);
QAM_ampc_256 = simde_mm256_setzero_si256();
}
else if (mod_order == 8) {
QAM_amp128 = simde_mm_set1_epi16(QAM256_n1); //
QAM_amp128b = simde_mm_set1_epi16(QAM256_n2);
QAM_amp128c = simde_mm_set1_epi16(QAM256_n3);
}
// printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
ul_ch128 = (simde__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
ul_ch_mag128 = (simde__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
ul_ch_mag128b = (simde__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
ul_ch_mag128c = (simde__m128i *)&ul_ch_magc[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
rxdataF128 = (simde__m128i *)&rxdataF_ext[aarx][symbol*(off+(nb_rb*12))];
rxdataF_comp128 = (simde__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
for (rb=0; rb<nb_rb_0; rb++) {
if (mod_order>2) {
// get channel amplitude if not QPSK
//print_shorts("ch:",(int16_t*)&ul_ch128[0]);
mmtmpD0 = simde_mm_madd_epi16(ul_ch128[0],ul_ch128[0]);
mmtmpD0 = simde_mm_srai_epi32(mmtmpD0,output_shift);
mmtmpD1 = simde_mm_madd_epi16(ul_ch128[1],ul_ch128[1]);
mmtmpD1 = simde_mm_srai_epi32(mmtmpD1,output_shift);
mmtmpD0 = simde_mm_packs_epi32(mmtmpD0,mmtmpD1);
// store channel magnitude here in a new field of ulsch
ul_ch_mag128[0] = simde_mm_unpacklo_epi16(mmtmpD0,mmtmpD0);
ul_ch_mag128b[0] = ul_ch_mag128[0];
ul_ch_mag128c[0] = ul_ch_mag128[0];
ul_ch_mag128[0] = simde_mm_mulhrs_epi16(ul_ch_mag128[0],QAM_amp128);
ul_ch_mag128b[0] = simde_mm_mulhrs_epi16(ul_ch_mag128b[0],QAM_amp128b);
ul_ch_mag128c[0] = simde_mm_mulhrs_epi16(ul_ch_mag128c[0],QAM_amp128c);
// print_ints("ch: = ",(int32_t*)&mmtmpD0);
// print_shorts("QAM_amp:",(int16_t*)&QAM_amp128);
// print_shorts("mag:",(int16_t*)&ul_ch_mag128[0]);
ul_ch_mag128[1] = simde_mm_unpackhi_epi16(mmtmpD0,mmtmpD0);
ul_ch_mag128b[1] = ul_ch_mag128[1];
ul_ch_mag128c[1] = ul_ch_mag128[1];
ul_ch_mag128[1] = simde_mm_mulhrs_epi16(ul_ch_mag128[1],QAM_amp128);
ul_ch_mag128b[1] = simde_mm_mulhrs_epi16(ul_ch_mag128b[1],QAM_amp128b);
ul_ch_mag128c[1] = simde_mm_mulhrs_epi16(ul_ch_mag128c[1],QAM_amp128c);
mmtmpD0 = simde_mm_madd_epi16(ul_ch128[2],ul_ch128[2]);
mmtmpD0 = simde_mm_srai_epi32(mmtmpD0,output_shift);
mmtmpD1 = simde_mm_packs_epi32(mmtmpD0,mmtmpD0);
ul_ch_mag128[2] = simde_mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
ul_ch_mag128b[2] = ul_ch_mag128[2];
ul_ch_mag128c[2] = ul_ch_mag128[2];
ul_ch_mag128[2] = simde_mm_mulhrs_epi16(ul_ch_mag128[2],QAM_amp128);
ul_ch_mag128b[2] = simde_mm_mulhrs_epi16(ul_ch_mag128b[2],QAM_amp128b);
ul_ch_mag128c[2] = simde_mm_mulhrs_epi16(ul_ch_mag128c[2],QAM_amp128c);
}
// Multiply received data by conjugated channel
rxdataF_comp128[0] = a_mult_conjb(rxdataF128[0], ul_ch128[0], output_shift);
rxdataF_comp128[1] = a_mult_conjb(rxdataF128[1], ul_ch128[1], output_shift);
rxdataF_comp128[2] = a_mult_conjb(rxdataF128[2], ul_ch128[2], output_shift);
ul_ch128 += 3;
ul_ch_mag128 += 3;
ul_ch_mag128b += 3;
ul_ch_mag128c += 3;
rxdataF128 += 3;
rxdataF_comp128 += 3;
}
}
QAM_ampa_256 = simde_mm256_set1_epi16(QAM256_n1);
QAM_ampb_256 = simde_mm256_set1_epi16(QAM256_n2);
QAM_ampc_256 = simde_mm256_set1_epi16(QAM256_n3);
}
if (rho) {
//we compute the Tx correlation matrix for each Rx antenna
//As an example the 2x2 MIMO case requires
//rho[aarx][nb_aatx*nb_aatx] = [cov(H_aarx_0,H_aarx_0) cov(H_aarx_0,H_aarx_1)
// cov(H_aarx_1,H_aarx_0) cov(H_aarx_1,H_aarx_1)], aarx=0,...,nb_antennas_rx-1
int avg_rho_re[frame_parms->nb_antennas_rx][nrOfLayers*nrOfLayers];
int avg_rho_im[frame_parms->nb_antennas_rx][nrOfLayers*nrOfLayers];
for (aarx=0; aarx < frame_parms->nb_antennas_rx; aarx++) {
for (aatx=0; aatx < nrOfLayers; aatx++) {
for (int atx=0; atx< nrOfLayers; atx++) {
avg_rho_re[aarx][aatx*nrOfLayers+atx] = 0;
avg_rho_im[aarx][aatx*nrOfLayers+atx] = 0;
rho128 = (simde__m128i *)&rho[aarx][aatx*nrOfLayers+atx][symbol*(off+(nb_rb*12))];
ul_ch128 = (simde__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
ul_ch128_2 = (simde__m128i *)&ul_ch_estimates_ext[atx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
for (rb=0; rb<nb_rb_0; rb++) {
// multiply by conjugated channel
mmtmpD0 = simde_mm_madd_epi16(ul_ch128[0],ul_ch128_2[0]);
// print_ints("re",&mmtmpD0);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1 = simde_mm_shufflelo_epi16(ul_ch128[0], SIMDE_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = simde_mm_shufflehi_epi16(mmtmpD1, SIMDE_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = simde_mm_sign_epi16(mmtmpD1,*(simde__m128i*)&conjugate[0]);
// print_ints("im",&mmtmpD1);
mmtmpD1 = simde_mm_madd_epi16(mmtmpD1,ul_ch128_2[0]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0 = simde_mm_srai_epi32(mmtmpD0,output_shift);
// print_ints("re(shift)",&mmtmpD0);
mmtmpD1 = simde_mm_srai_epi32(mmtmpD1,output_shift);
// print_ints("im(shift)",&mmtmpD1);
mmtmpD2 = simde_mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
mmtmpD3 = simde_mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
// print_ints("c0",&mmtmpD2);
// print_ints("c1",&mmtmpD3);
rho128[0] = simde_mm_packs_epi32(mmtmpD2,mmtmpD3);
//print_shorts("rx:",ul_ch128_2);
//print_shorts("ch:",ul_ch128);
//print_shorts("pack:",rho128);
avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[0])[0]+
((int16_t*)&rho128[0])[2] +
((int16_t*)&rho128[0])[4] +
((int16_t*)&rho128[0])[6])/16;//
avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[0])[1]+
((int16_t*)&rho128[0])[3] +
((int16_t*)&rho128[0])[5] +
((int16_t*)&rho128[0])[7])/16;//
// multiply by conjugated channel
mmtmpD0 = simde_mm_madd_epi16(ul_ch128[1],ul_ch128_2[1]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1 = simde_mm_shufflelo_epi16(ul_ch128[1], SIMDE_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = simde_mm_shufflehi_epi16(mmtmpD1, SIMDE_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = simde_mm_sign_epi16(mmtmpD1,*(simde__m128i*)conjugate);
mmtmpD1 = simde_mm_madd_epi16(mmtmpD1,ul_ch128_2[1]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0 = simde_mm_srai_epi32(mmtmpD0,output_shift);
mmtmpD1 = simde_mm_srai_epi32(mmtmpD1,output_shift);
mmtmpD2 = simde_mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
mmtmpD3 = simde_mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
rho128[1] =simde_mm_packs_epi32(mmtmpD2,mmtmpD3);
//print_shorts("rx:",ul_ch128_2+1);
//print_shorts("ch:",ul_ch128+1);
//print_shorts("pack:",rho128+1);
simde__m256i xmmp0, xmmp1, xmmp2, xmmp3, xmmp4;
simde__m256i complex_shuffle256 = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
simde__m256i conj256 = simde_mm256_set_epi16(1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1);
for (int aatx = 0; aatx < nrOfLayers; aatx++) {
simde__m256i *rxComp_256 = (simde__m256i*) &rxComp[aatx * nb_rx_ant][symbol * buffer_length];
simde__m256i *rxF_ch_maga_256 = (simde__m256i*)&ul_ch_maga[aatx * buffer_length];
simde__m256i *rxF_ch_magb_256 = (simde__m256i*)&ul_ch_magb[aatx * buffer_length];
simde__m256i *rxF_ch_magc_256 = (simde__m256i*)&ul_ch_magc[aatx * buffer_length];
for (int aarx = 0; aarx < nb_rx_ant; aarx++) {
simde__m256i *rxF_256 = (simde__m256i*) &rxFext[aarx * buffer_length];
simde__m256i *chF_256 = (simde__m256i*) &chFext[(aatx * nb_rx_ant + aarx) * buffer_length];
for (int i = 0; i < buffer_length >> 3; i++)
{
xmmp0 = simde_mm256_madd_epi16(chF_256[i], rxF_256[i]);
// xmmp0 contains real part of 4 consecutive outputs (32-bit) of conj(H_m[i])*R_m[i]
xmmp1 = simde_mm256_shuffle_epi8(chF_256[i], complex_shuffle256);
xmmp1 = simde_mm256_sign_epi16(xmmp1, conj256);
xmmp1 = simde_mm256_madd_epi16(xmmp1, rxF_256[i]);
// xmmp1 contains imag part of 4 consecutive outputs (32-bit) of conj(H_m[i])*R_m[i]
xmmp0 = simde_mm256_srai_epi32(xmmp0, output_shift);
xmmp1 = simde_mm256_srai_epi32(xmmp1, output_shift);
xmmp2 = simde_mm256_unpacklo_epi32(xmmp0, xmmp1);
xmmp3 = simde_mm256_unpackhi_epi32(xmmp0, xmmp1);
xmmp4 = simde_mm256_packs_epi32(xmmp2, xmmp3);
xmmp0 = simde_mm256_madd_epi16(chF_256[i], chF_256[i]); // |h|^2
xmmp0 = simde_mm256_srai_epi32(xmmp0, output_shift);
xmmp0 = simde_mm256_packs_epi32(xmmp0, xmmp0);
xmmp1 = simde_mm256_unpacklo_epi16(xmmp0, xmmp0);
xmmp2 = simde_mm256_mulhrs_epi16(xmmp1, QAM_ampa_256);
xmmp3 = simde_mm256_mulhrs_epi16(xmmp1, QAM_ampb_256);
xmmp1 = simde_mm256_mulhrs_epi16(xmmp1, QAM_ampc_256);
// MRC
rxComp_256[i] = simde_mm256_add_epi16(rxComp_256[i], xmmp4);
if (mod_order > 2)
rxF_ch_maga_256[i] = simde_mm256_add_epi16(rxF_ch_maga_256[i], xmmp2);
if (mod_order > 4)
rxF_ch_magb_256[i] = simde_mm256_add_epi16(rxF_ch_magb_256[i], xmmp3);
if (mod_order > 6)
rxF_ch_magc_256[i] = simde_mm256_add_epi16(rxF_ch_magc_256[i], xmmp1);
}
if (rho != NULL) {
for (int atx = 0; atx < nrOfLayers; atx++) {
simde__m256i *rho_256 = (simde__m256i * )&rho[(aatx * nrOfLayers + atx) * buffer_length];
simde__m256i *chF_256 = (simde__m256i *)&chFext[(aatx * nb_rx_ant + aarx) * buffer_length];
simde__m256i *chF2_256 = (simde__m256i *)&chFext[ (atx * nb_rx_ant + aarx) * buffer_length];
for (int i = 0; i < buffer_length >> 3; i++) {
// multiply by conjugated channel
avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[1])[0]+
((int16_t*)&rho128[1])[2] +
((int16_t*)&rho128[1])[4] +
((int16_t*)&rho128[1])[6])/16;
avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[1])[1]+
((int16_t*)&rho128[1])[3] +
((int16_t*)&rho128[1])[5] +
((int16_t*)&rho128[1])[7])/16;
mmtmpD0 = simde_mm_madd_epi16(ul_ch128[2],ul_ch128_2[2]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1 = simde_mm_shufflelo_epi16(ul_ch128[2], SIMDE_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = simde_mm_shufflehi_epi16(mmtmpD1, SIMDE_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = simde_mm_sign_epi16(mmtmpD1,*(simde__m128i*)conjugate);
mmtmpD1 = simde_mm_madd_epi16(mmtmpD1,ul_ch128_2[2]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0 = simde_mm_srai_epi32(mmtmpD0,output_shift);
mmtmpD1 = simde_mm_srai_epi32(mmtmpD1,output_shift);
mmtmpD2 = simde_mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
mmtmpD3 = simde_mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
rho128[2] = simde_mm_packs_epi32(mmtmpD2,mmtmpD3);
//print_shorts("rx:",ul_ch128_2+2);
//print_shorts("ch:",ul_ch128+2);
//print_shorts("pack:",rho128+2);
avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[2])[0]+
((int16_t*)&rho128[2])[2] +
((int16_t*)&rho128[2])[4] +
((int16_t*)&rho128[2])[6])/16;
xmmp0 = simde_mm256_madd_epi16(chF_256[i], chF2_256[i]);
// xmmp0 contains real part of 4 consecutive outputs (32-bit)
xmmp1 = simde_mm256_shuffle_epi8(chF_256[i], complex_shuffle256);
xmmp1 = simde_mm256_sign_epi16(xmmp1, conj256);
xmmp1 = simde_mm256_madd_epi16(xmmp1, chF2_256[i]);
// xmmp0 contains imag part of 4 consecutive outputs (32-bit)
xmmp0 = simde_mm256_srai_epi32(xmmp0, output_shift);
xmmp1 = simde_mm256_srai_epi32(xmmp1, output_shift);
xmmp2 = simde_mm256_unpacklo_epi32(xmmp0, xmmp1);
xmmp3 = simde_mm256_unpackhi_epi32(xmmp0, xmmp1);
avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[2])[1]+
((int16_t*)&rho128[2])[3] +
((int16_t*)&rho128[2])[5] +
((int16_t*)&rho128[2])[7])/16;
ul_ch128+=3;
ul_ch128_2+=3;
rho128+=3;
}
if (is_dmrs_symbol==1) {
//measurements->rx_correlation[0][0][aarx] = signal_energy(&rho[aarx][aatx*nb_aatx+atx][symbol*nb_rb*12],rb*12);
avg_rho_re[aarx][aatx*nrOfLayers+atx] = 16*avg_rho_re[aarx][aatx*nrOfLayers+atx]/(nb_rb*12);
avg_rho_im[aarx][aatx*nrOfLayers+atx] = 16*avg_rho_im[aarx][aatx*nrOfLayers+atx]/(nb_rb*12);
//printf("rho[rx]%d tx%d tx%d = Re: %d Im: %d\n",aarx, aatx,atx, avg_rho_re[aarx][aatx*nb_aatx+atx], avg_rho_im[aarx][aatx*nb_aatx+atx]);
rho_256[i] = simde_mm256_adds_epi16(rho_256[i], simde_mm256_packs_epi32(xmmp2, xmmp3));
}
}
}
......@@ -765,107 +574,10 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
simde_mm_empty();
simde_m_empty();
#ifdef DEBUG_CH_COMP
for (int nl2=0; nl2<nrOfLayers; nl2++) {
for (int aarx2=0; aarx2<frame_parms->nb_antennas_rx; aarx2++) {
rxF = (int16_t *)&rxdataF_comp[nl2*frame_parms->nb_antennas_rx+aarx2][(symbol*(off+(nb_rb*12)))];
printf("--------After compansation, layer %i, antenna rx %i----------\n", nl2, aarx2);
for (prnt_idx=0;prnt_idx<12*5*2;prnt_idx+=2){
printf("rxF[%d] = (%d,%d)\n", prnt_idx>>1, rxF[prnt_idx],rxF[prnt_idx+1]);
}
}
}
#endif
#ifdef DEBUG_CH_MAG
for (int ant=0; ant<frame_parms->nb_antennas_rx; ant++) {
ch_mag = (int16_t *)&ul_ch_mag[ant][(symbol*(off+(nb_rb*12)))];
printf("----------------After computation------------------\n");
for (print_idx=0;print_idx<12*5*2;print_idx+=2){
printf("ch_mag[%d] = (%d,%d)\n", print_idx>>1, ch_mag[print_idx],ch_mag[print_idx+1]);
}
}
#endif
}
void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
int32_t **rxdataF_comp,
int32_t **ul_ch_mag,
int32_t **ul_ch_magb,
int32_t **ul_ch_magc,
int32_t ***rho,
uint8_t nrOfLayers,
uint8_t symbol,
uint16_t nb_rb,
int length) {
int n_rx = frame_parms->nb_antennas_rx;
simde__m128i *rxdataF_comp128[2],*ul_ch_mag128[2],*ul_ch_mag128b[2],*ul_ch_mag128c[2];
int32_t i;
uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
int off = ((nb_rb&1) == 1)? 4:0;
if (n_rx > 1) {
int nb_re = nb_rb * 12;
for (int aatx = 0; aatx < nrOfLayers; aatx++) {
rxdataF_comp128[0] = (simde__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
ul_ch_mag128[0] = (simde__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
ul_ch_mag128b[0] = (simde__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
ul_ch_mag128c[0] = (simde__m128i *)&ul_ch_magc[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
for (int aa=1;aa < n_rx;aa++) {
rxdataF_comp128[1] = (simde__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
ul_ch_mag128[1] = (simde__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
ul_ch_mag128b[1] = (simde__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
ul_ch_mag128c[1] = (simde__m128i *)&ul_ch_magc[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
// MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
for (i=0; i<nb_rb_0*3; i++) {
rxdataF_comp128[0][i] = simde_mm_adds_epi16(rxdataF_comp128[0][i],rxdataF_comp128[1][i]);
ul_ch_mag128[0][i] = simde_mm_adds_epi16(ul_ch_mag128[0][i],ul_ch_mag128[1][i]);
ul_ch_mag128b[0][i] = simde_mm_adds_epi16(ul_ch_mag128b[0][i],ul_ch_mag128b[1][i]);
ul_ch_mag128c[0][i] = simde_mm_adds_epi16(ul_ch_mag128c[0][i],ul_ch_mag128c[1][i]);
//rxdataF_comp128[0][i] = _mm_add_epi16(rxdataF_comp128_0[i],(*(__m128i *)&jitterc[0]));
}
}
if (rho) {
simde__m128i *rho128[2];
for (int aatx2 = 0; aatx2 < nrOfLayers; aatx2++) {
rho128[0] = (simde__m128i *) &rho[0][aatx * nrOfLayers + aatx2][(symbol * (nb_re + off))];
for (int aa = 1; aa < n_rx; aa++) {
rho128[1] = (simde__m128i *) &rho[aa][aatx * nrOfLayers + aatx2][(symbol * (nb_re + off))];
for (i = 0; i < nb_rb_0 * 3; i++) {
rho128[0][i] = simde_mm_adds_epi16(rho128[0][i], rho128[1][i]);
}
}
}
}
}
}
}
/* Zero Forcing Rx function: nr_det_HhH()
*
*
* */
void nr_ulsch_det_HhH(int32_t *after_mf_00,//a
// Zero Forcing Rx function: nr_det_HhH()
static void nr_ulsch_det_HhH (int32_t *after_mf_00,//a
int32_t *after_mf_01,//b
int32_t *after_mf_10,//c
int32_t *after_mf_11,//d
......@@ -927,49 +639,11 @@ void nr_ulsch_det_HhH(int32_t *after_mf_00,//a
simde_m_empty();
}
/* Zero Forcing Rx function: nr_inv_comp_muli
* Complex number multi: z = x*y
* = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
* */
simde__m128i nr_ulsch_inv_comp_muli(simde__m128i input_x,
simde__m128i input_y)
{
int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
simde__m128i xy_re_128, xy_im_128;
simde__m128i output_z, tmp_z0, tmp_z1;
// complex multiplication (x_re + jx_im)*(y_re + jy_im) = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
// the real part
xy_re_128 = simde_mm_sign_epi16(input_x,*(simde__m128i*)&nr_conjug2[0]);
xy_re_128 = simde_mm_madd_epi16(xy_re_128,input_y); //Re: (x_re*y_re - x_im*y_im)
// the imag part
xy_im_128 = simde_mm_shufflelo_epi16(input_x, SIMDE_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
xy_im_128 = simde_mm_shufflehi_epi16(xy_im_128, SIMDE_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
xy_im_128 = simde_mm_madd_epi16(xy_im_128,input_y);//Im: (x_im*y_re + x_re*y_im)
//convert back to Q15 before packing
xy_re_128 = simde_mm_srai_epi32(xy_re_128,4);//(2^15/64*2*16)
xy_im_128 = simde_mm_srai_epi32(xy_im_128,4);
tmp_z0 = simde_mm_unpacklo_epi32(xy_re_128,xy_im_128);
//print_ints("unpack lo:",&tmp_z0[0]);
tmp_z1 = simde_mm_unpackhi_epi32(xy_re_128,xy_im_128);
//print_ints("unpack hi:",&tmp_z1[0]);
output_z = simde_mm_packs_epi32(tmp_z0,tmp_z1);
simde_mm_empty();
simde_m_empty();
return(output_z);
}
/* Zero Forcing Rx function: nr_conjch0_mult_ch1()
*
*
* */
void nr_ulsch_conjch0_mult_ch1(int *ch0,
static void nr_ulsch_conjch0_mult_ch1(int *ch0,
int *ch1,
int32_t *ch0conj_ch1,
unsigned short nb_rb,
......@@ -1011,7 +685,8 @@ void nr_ulsch_conjch0_mult_ch1(int *ch0,
simde_mm_empty();
simde_m_empty();
}
simde__m128i nr_ulsch_comp_muli_sum(simde__m128i input_x,
static simde__m128i nr_ulsch_comp_muli_sum(simde__m128i input_x,
simde__m128i input_y,
simde__m128i input_w,
simde__m128i input_z,
......@@ -1073,11 +748,12 @@ simde__m128i nr_ulsch_comp_muli_sum(simde__m128i input_x,
simde_m_empty();
return(output);
}
/* Zero Forcing Rx function: nr_construct_HhH_elements()
*
*
* */
void nr_ulsch_construct_HhH_elements(int *conjch00_ch00,
static void nr_ulsch_construct_HhH_elements(int *conjch00_ch00,
int *conjch01_ch01,
int *conjch11_ch11,
int *conjch10_ch10,//
......@@ -1190,10 +866,8 @@ void nr_ulsch_construct_HhH_elements(int *conjch00_ch00,
simde_m_empty();
}
/*
* MMSE Rx function: nr_ulsch_mmse_2layers()
*/
uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
// MMSE Rx function: nr_ulsch_mmse_2layers()
static uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
int **rxdataF_comp,
int **ul_ch_mag,
int **ul_ch_magb,
......@@ -1205,14 +879,13 @@ uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
int shift,
unsigned char symbol,
int length,
uint32_t noise_var)
uint32_t noise_var,
uint32_t buffer_length)
{
int *ch00, *ch01, *ch10, *ch11;
int *ch20, *ch30, *ch21, *ch31;
uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
int off = ((nb_rb&1) == 1)? 4:0;
/* we need at least alignment to 16 bytes, let's put 32 to be sure
* (maybe not necessary but doesn't hurt)
*/
......@@ -1241,10 +914,10 @@ uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
switch (n_rx) {
case 2://
ch00 = (int *)&ul_ch_estimates_ext[0][symbol*(off+nb_rb*12)];
ch01 = (int *)&ul_ch_estimates_ext[2][symbol*(off+nb_rb*12)];
ch10 = (int *)&ul_ch_estimates_ext[1][symbol*(off+nb_rb*12)];
ch11 = (int *)&ul_ch_estimates_ext[3][symbol*(off+nb_rb*12)];
ch00 = &((int *)ul_ch_estimates_ext)[0 * buffer_length];
ch01 = &((int *)ul_ch_estimates_ext)[2 * buffer_length];
ch10 = &((int *)ul_ch_estimates_ext)[1 * buffer_length];
ch11 = &((int *)ul_ch_estimates_ext)[3 * buffer_length];
ch20 = NULL;
ch21 = NULL;
ch30 = NULL;
......@@ -1252,14 +925,14 @@ uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
break;
case 4://
ch00 = (int *)&ul_ch_estimates_ext[0][symbol*(off+nb_rb*12)];
ch01 = (int *)&ul_ch_estimates_ext[4][symbol*(off+nb_rb*12)];
ch10 = (int *)&ul_ch_estimates_ext[1][symbol*(off+nb_rb*12)];
ch11 = (int *)&ul_ch_estimates_ext[5][symbol*(off+nb_rb*12)];
ch20 = (int *)&ul_ch_estimates_ext[2][symbol*(off+nb_rb*12)];
ch21 = (int *)&ul_ch_estimates_ext[6][symbol*(off+nb_rb*12)];
ch30 = (int *)&ul_ch_estimates_ext[3][symbol*(off+nb_rb*12)];
ch31 = (int *)&ul_ch_estimates_ext[7][symbol*(off+nb_rb*12)];
ch00 = &((int *)ul_ch_estimates_ext)[0 * buffer_length];
ch01 = &((int *)ul_ch_estimates_ext)[4 * buffer_length];
ch10 = &((int *)ul_ch_estimates_ext)[1 * buffer_length];
ch11 = &((int *)ul_ch_estimates_ext)[5 * buffer_length];
ch20 = &((int *)ul_ch_estimates_ext)[2 * buffer_length];
ch21 = &((int *)ul_ch_estimates_ext)[6 * buffer_length];
ch30 = &((int *)ul_ch_estimates_ext)[3 * buffer_length];
ch31 = &((int *)ul_ch_estimates_ext)[7 * buffer_length];
break;
default:
......@@ -1468,14 +1141,14 @@ uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
simde__m128i *determ_fin_128 = (simde__m128i *)&determ_fin[0];
simde__m128i *rxdataF_comp128_0 = (simde__m128i *)&rxdataF_comp[0][symbol * (off + nb_rb * 12)]; // aatx=0 @ aarx =0
simde__m128i *rxdataF_comp128_1 = (simde__m128i *)&rxdataF_comp[n_rx][symbol * (off + nb_rb * 12)]; // aatx=1 @ aarx =0
simde__m128i *after_mf_a_128 = (simde__m128i *)af_mf_00;
simde__m128i *after_mf_b_128 = (simde__m128i *)af_mf_01;
simde__m128i *after_mf_c_128 = (simde__m128i *)af_mf_10;
simde__m128i *after_mf_d_128 = (simde__m128i *)af_mf_11;
int rxComp_aligned = ((nb_rb * 12) % 8) ? (8 - (nb_rb * 12) % 8) : 0;
simde__m128i *rxdataF_comp128_0 = (simde__m128i *)&rxdataF_comp[0][symbol * (nb_rb * 12 + rxComp_aligned)];
simde__m128i *rxdataF_comp128_1 = (simde__m128i *)&rxdataF_comp[n_rx][symbol * (nb_rb * 12 + rxComp_aligned)];
if (mod_order > 2) {
if (mod_order == 4) {
......@@ -1491,12 +1164,12 @@ uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
QAM_amp128b = simde_mm_set1_epi16(QAM256_n2);
QAM_amp128c = simde_mm_set1_epi16(QAM256_n3);
}
ul_ch_mag128_0 = (simde__m128i *)&ul_ch_mag[0][symbol * (off + nb_rb * 12)];
ul_ch_mag128b_0 = (simde__m128i *)&ul_ch_magb[0][symbol * (off + nb_rb * 12)];
ul_ch_mag128c_0 = (simde__m128i *)&ul_ch_magc[0][symbol * (off + nb_rb * 12)];
ul_ch_mag128_1 = (simde__m128i *)&ul_ch_mag[frame_parms->nb_antennas_rx][symbol * (off + nb_rb * 12)];
ul_ch_mag128b_1 = (simde__m128i *)&ul_ch_magb[frame_parms->nb_antennas_rx][symbol * (off + nb_rb * 12)];
ul_ch_mag128c_1 = (simde__m128i *)&ul_ch_magc[frame_parms->nb_antennas_rx][symbol * (off + nb_rb * 12)];
ul_ch_mag128_0 = (simde__m128i *) &ul_ch_mag[0];
ul_ch_mag128b_0 = (simde__m128i *)&ul_ch_magb[0];
ul_ch_mag128c_0 = (simde__m128i *)&ul_ch_magc[0];
ul_ch_mag128_1 = (simde__m128i *) &((int *)ul_ch_mag)[1 * buffer_length];
ul_ch_mag128b_1 = (simde__m128i *)&((int *)ul_ch_magb)[1 * buffer_length];
ul_ch_mag128c_1 = (simde__m128i *)&((int *)ul_ch_magc)[1 * buffer_length];
}
for (int rb = 0; rb < 3 * nb_rb_0; rb++) {
......@@ -1586,38 +1259,212 @@ uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms,
return(0);
}
//==============================================================================================
static void inner_rx (PHY_VARS_gNB *gNB,
int ulsch_id,
int slot,
NR_DL_FRAME_PARMS *frame_parms,
NR_gNB_PUSCH *pusch_vars,
nfapi_nr_pusch_pdu_t *rel15_ul,
c16_t **rxF,
c16_t **ul_ch,
int16_t **llr,
int soffset,
int length,
int symbol,
int output_shift,
uint32_t nvar)
{
int nb_layer = rel15_ul->nrOfLayers;
int nb_rx_ant = frame_parms->nb_antennas_rx;
int dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
int buffer_length = (rel15_ul->rb_size * NR_NB_SC_PER_RB + 7) & ~7;
c16_t rxFext[nb_rx_ant][buffer_length] __attribute__((aligned(32)));
c16_t chFext[nb_layer][nb_rx_ant][buffer_length] __attribute__((aligned(32)));
memset(rxFext, 0, sizeof(c16_t) * nb_rx_ant * buffer_length);
memset(chFext, 0, sizeof(c16_t) * nb_layer * nb_rx_ant* buffer_length);
for (int aarx = 0; aarx < nb_rx_ant; aarx++) {
for (int aatx = 0; aatx < nb_layer; aatx++) {
nr_ulsch_extract_rbs(rxF[aarx],
(c16_t *)pusch_vars->ul_ch_estimates[aatx * nb_rx_ant + aarx],
rxFext[aarx],
chFext[aatx][aarx],
soffset+(symbol * frame_parms->ofdm_symbol_size),
pusch_vars->dmrs_symbol * frame_parms->ofdm_symbol_size,
aarx,
dmrs_symbol_flag,
rel15_ul,
frame_parms);
}
}
c16_t rho[nb_layer][nb_layer][buffer_length] __attribute__((aligned(32)));
c16_t rxF_ch_maga [nb_layer][buffer_length] __attribute__((aligned(32)));
c16_t rxF_ch_magb [nb_layer][buffer_length] __attribute__((aligned(32)));
c16_t rxF_ch_magc [nb_layer][buffer_length] __attribute__((aligned(32)));
memset(rho, 0, sizeof(c16_t) * nb_layer * nb_layer* buffer_length);
memset(rxF_ch_maga, 0, sizeof(c16_t) * nb_layer * buffer_length);
memset(rxF_ch_magb, 0, sizeof(c16_t) * nb_layer * buffer_length);
memset(rxF_ch_magc, 0, sizeof(c16_t) * nb_layer * buffer_length);
for (int i = 0; i < nb_layer; i++)
memset(&pusch_vars->rxdataF_comp[i*nb_rx_ant][symbol * buffer_length], 0, sizeof(int32_t) * buffer_length);
nr_ulsch_channel_compensation((c16_t*)rxFext,
(c16_t*)chFext,
(c16_t*)rxF_ch_maga,
(c16_t*)rxF_ch_magb,
(c16_t*)rxF_ch_magc,
pusch_vars->rxdataF_comp,
(nb_layer == 1) ? NULL : (c16_t*)rho,
frame_parms,
rel15_ul,
symbol,
buffer_length,
output_shift);
if (nb_layer == 1 && rel15_ul->transform_precoding == transformPrecoder_enabled && rel15_ul->qam_mod_order <= 6) {
if (rel15_ul->qam_mod_order > 2)
nr_freq_equalization(frame_parms,
&pusch_vars->rxdataF_comp[0][symbol * buffer_length],
(int *)rxF_ch_maga,
(int *)rxF_ch_magb,
symbol,
pusch_vars->ul_valid_re_per_slot[symbol],
rel15_ul->qam_mod_order);
nr_idft(&pusch_vars->rxdataF_comp[0][symbol * buffer_length], pusch_vars->ul_valid_re_per_slot[symbol]);
}
if (rel15_ul->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
nr_pusch_ptrs_processing(gNB,
frame_parms,
rel15_ul,
ulsch_id,
slot,
symbol,
buffer_length);
pusch_vars->ul_valid_re_per_slot[symbol] -= pusch_vars->ptrs_re_per_slot;
}
/* Main Function */
void nr_rx_pusch(PHY_VARS_gNB *gNB,
if (nb_layer == 2) {
if (rel15_ul->qam_mod_order < 6) {
nr_ulsch_compute_ML_llr(pusch_vars,
symbol,
(c16_t*)&pusch_vars->rxdataF_comp[0][symbol * buffer_length],
(c16_t*)&pusch_vars->rxdataF_comp[nb_rx_ant][symbol * buffer_length],
rxF_ch_maga[0],
rxF_ch_maga[1],
(c16_t*)&llr[0][pusch_vars->llr_offset[symbol]],
(c16_t*)&llr[1][pusch_vars->llr_offset[symbol]],
rho[0][1],
rho[1][0],
pusch_vars->ul_valid_re_per_slot[symbol],
rel15_ul->qam_mod_order);
}
else {
nr_ulsch_mmse_2layers(frame_parms,
(int32_t **)pusch_vars->rxdataF_comp,
(int **)rxF_ch_maga,
(int **)rxF_ch_magb,
(int **)rxF_ch_magc,
(int **)chFext,
rel15_ul->rb_size,
frame_parms->nb_antennas_rx,
rel15_ul->qam_mod_order,
pusch_vars->log2_maxh,
symbol,
pusch_vars->ul_valid_re_per_slot[symbol],
nvar,
buffer_length);
}
}
if (nb_layer != 2 || rel15_ul->qam_mod_order >= 6)
for (int aatx = 0; aatx < nb_layer; aatx++)
nr_ulsch_compute_llr((int32_t*)&pusch_vars->rxdataF_comp[aatx * nb_rx_ant][symbol * buffer_length],
(int32_t*)rxF_ch_maga[aatx],
(int32_t*)rxF_ch_magb[aatx],
(int32_t*)rxF_ch_magc[aatx],
&llr[aatx][pusch_vars->llr_offset[symbol]],
pusch_vars->ul_valid_re_per_slot[symbol],
symbol,
rel15_ul->qam_mod_order);
}
static void nr_pusch_symbol_processing(void *arg)
{
puschSymbolProc_t *rdata=(puschSymbolProc_t*)arg;
PHY_VARS_gNB *gNB = rdata->gNB;
NR_DL_FRAME_PARMS *frame_parms = rdata->frame_parms;
nfapi_nr_pusch_pdu_t *rel15_ul = rdata->rel15_ul;
int ulsch_id = rdata->ulsch_id;
int slot = rdata->slot;
NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ulsch_id];
for (int symbol = rdata->startSymbol; symbol < rdata->startSymbol+rdata->numSymbols; symbol++) {
int dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
if (dmrs_symbol_flag == 1)
{
if ((rel15_ul->ul_dmrs_symb_pos >> ((symbol + 1) % frame_parms->symbols_per_slot)) & 0x01)
AssertFatal(1==0,"Double DMRS configuration is not yet supported\n");
gNB->pusch_vars[ulsch_id].dmrs_symbol = symbol;
}
if (gNB->pusch_vars[ulsch_id].ul_valid_re_per_slot[symbol] == 0)
continue;
int soffset = (slot&3)*frame_parms->symbols_per_slot*frame_parms->ofdm_symbol_size;
inner_rx(gNB,
ulsch_id,
slot,
frame_parms,
pusch_vars,
rel15_ul,
gNB->common_vars.rxdataF,
(c16_t**)gNB->pusch_vars[ulsch_id].ul_ch_estimates,
rdata->llr_layers,
soffset,
gNB->pusch_vars[ulsch_id].ul_valid_re_per_slot[symbol],
symbol,
gNB->pusch_vars[ulsch_id].log2_maxh,
rdata->nvar);
int nb_re_pusch = gNB->pusch_vars[ulsch_id].ul_valid_re_per_slot[symbol];
// layer de-mapping
int16_t* llr_ptr = &rdata->llr_layers[0][pusch_vars->llr_offset[symbol]];
if (rel15_ul->nrOfLayers != 1) {
llr_ptr = &rdata->llr[pusch_vars->llr_offset[symbol] * rel15_ul->nrOfLayers];
for (int i = 0; i < (nb_re_pusch); i++)
for (int l = 0; l < rel15_ul->nrOfLayers; l++)
for (int m = 0; m < rel15_ul->qam_mod_order; m++)
llr_ptr[i*rel15_ul->nrOfLayers*rel15_ul->qam_mod_order+l*rel15_ul->qam_mod_order+m] = rdata->llr_layers[l][pusch_vars->llr_offset[symbol] + i*rel15_ul->qam_mod_order+m];
}
// unscrambling
int16_t *llr16 = (int16_t*)&rdata->llr[pusch_vars->llr_offset[symbol] * rel15_ul->nrOfLayers];
for (int i = 0; i < (nb_re_pusch * rel15_ul->qam_mod_order * rel15_ul->nrOfLayers); i++)
llr16[i] = llr_ptr[i] * rdata->s[i];
}
}
int nr_rx_pusch_tp(PHY_VARS_gNB *gNB,
uint8_t ulsch_id,
uint32_t frame,
uint8_t slot,
unsigned char harq_pid)
{
uint8_t aarx, aatx;
uint32_t nb_re_pusch, bwp_start_subcarrier;
int avgs = 0;
uint8_t aarx;
uint32_t bwp_start_subcarrier;
NR_DL_FRAME_PARMS *frame_parms = &gNB->frame_parms;
NR_gNB_ULSCH_t *ulsch = &gNB->ulsch[ulsch_id];
nfapi_nr_pusch_pdu_t *rel15_ul = &ulsch->harq_process->ulsch_pdu;
int avg[frame_parms->nb_antennas_rx*rel15_ul->nrOfLayers];
nfapi_nr_pusch_pdu_t *rel15_ul = &gNB->ulsch[ulsch_id].harq_process->ulsch_pdu;
NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ulsch_id];
pusch_vars->dmrs_symbol = INVALID_VALUE;
pusch_vars->cl_done = 0;
gNB->nbSymb=0;
bwp_start_subcarrier = ((rel15_ul->rb_start + rel15_ul->bwp_start)*NR_NB_SC_PER_RB + frame_parms->first_carrier_offset) % frame_parms->ofdm_symbol_size;
LOG_D(PHY,"pusch %d.%d : bwp_start_subcarrier %d, rb_start %d, first_carrier_offset %d\n", frame,slot,bwp_start_subcarrier, rel15_ul->rb_start, frame_parms->first_carrier_offset);
LOG_D(PHY,"pusch %d.%d : ul_dmrs_symb_pos %x\n",frame,slot,rel15_ul->ul_dmrs_symb_pos);
LOG_D(PHY,"ulsch RX %x : start_rb %d nb_rb %d mcs %d Nl %d Tpmi %d bwp_start %d start_sc %d start_symbol %d num_symbols %d cdmgrpsnodata %d num_dmrs %d dmrs_ports %d\n",
rel15_ul->rnti,rel15_ul->rb_start,rel15_ul->rb_size,rel15_ul->mcs_index,
rel15_ul->nrOfLayers,0,rel15_ul->bwp_start,0,rel15_ul->start_symbol_index,rel15_ul->nr_of_symbols,
rel15_ul->num_dmrs_cdm_grps_no_data,rel15_ul->ul_dmrs_symb_pos,rel15_ul->dmrs_ports);
//----------------------------------------------------------
//--------------------- Channel estimation ---------------------
//------------------- Channel estimation -------------------
//----------------------------------------------------------
start_meas(&gNB->ulsch_channel_estimation_stats);
int max_ch = 0;
......@@ -1643,279 +1490,199 @@ void nr_rx_pusch(PHY_VARS_gNB *gNB,
&nvar_tmp);
nvar += nvar_tmp;
}
nr_gnb_measurements(gNB, ulsch, pusch_vars, symbol, rel15_ul->nrOfLayers);
// measure the SNR from the channel estimation
nr_gnb_measurements(gNB,
&gNB->ulsch[ulsch_id],
pusch_vars,
symbol,
rel15_ul->nrOfLayers);
allocCast2D(n0_subband_power,
unsigned int,
gNB->measurements.n0_subband_power,
frame_parms->nb_antennas_rx,
frame_parms->N_RB_UL,
false);
for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
if (symbol == rel15_ul->start_symbol_index) {
for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++)
{
if (symbol == rel15_ul->start_symbol_index)
{
pusch_vars->ulsch_power[aarx] = 0;
pusch_vars->ulsch_noise_power[aarx] = 0;
}
for (aatx = 0; aatx < rel15_ul->nrOfLayers; aatx++) {
for (int aatx = 0; aatx < rel15_ul->nrOfLayers; aatx++) {
pusch_vars->ulsch_power[aarx] += signal_energy_nodc(
&pusch_vars->ul_ch_estimates[aatx * gNB->frame_parms.nb_antennas_rx + aarx][symbol * frame_parms->ofdm_symbol_size],
rel15_ul->rb_size * 12);
}
for (int rb = 0; rb < rel15_ul->rb_size; rb++) {
for (int rb = 0; rb < rel15_ul->rb_size; rb++)
pusch_vars->ulsch_noise_power[aarx] +=
n0_subband_power[aarx][rel15_ul->bwp_start + rel15_ul->rb_start + rb] / rel15_ul->rb_size;
}
LOG_D(PHY,
"aa %d, bwp_start%d, rb_start %d, rb_size %d: ulsch_power %d, ulsch_noise_power %d\n",
aarx,
rel15_ul->bwp_start,
rel15_ul->rb_start,
rel15_ul->rb_size,
pusch_vars->ulsch_power[aarx],
pusch_vars->ulsch_noise_power[aarx]);
}
}
}
nvar /= (rel15_ul->nr_of_symbols * rel15_ul->nrOfLayers * frame_parms->nb_antennas_rx);
if (gNB->chest_time == 1) { // averaging time domain channel estimates
// averaging time domain channel estimates
if (gNB->chest_time == 1)
{
nr_chest_time_domain_avg(frame_parms,
pusch_vars->ul_ch_estimates,
rel15_ul->nr_of_symbols,
rel15_ul->start_symbol_index,
rel15_ul->ul_dmrs_symb_pos,
rel15_ul->rb_size);
pusch_vars->dmrs_symbol =
get_next_dmrs_symbol_in_slot(rel15_ul->ul_dmrs_symb_pos, rel15_ul->start_symbol_index, rel15_ul->nr_of_symbols);
pusch_vars->dmrs_symbol = get_next_dmrs_symbol_in_slot(rel15_ul->ul_dmrs_symb_pos,
rel15_ul->start_symbol_index,
rel15_ul->nr_of_symbols);
}
stop_meas(&gNB->ulsch_channel_estimation_stats);
int off = ((rel15_ul->rb_size&1) == 1)? 4:0;
uint32_t rxdataF_ext_offset = 0;
uint8_t shift_ch_ext = rel15_ul->nrOfLayers > 1 ? log2_approx(max_ch >> 11) : 0;
start_meas(&gNB->rx_pusch_init_stats);
// Flag to select the receiver: (true) Nonlinear ML receiver, (false) Linear MMSE receiver
// By default, we are using the Nonlinear ML receiver, except
// - for 256QAM as Nonlinear ML receiver is not implemented for 256QAM
// - for 64QAM as Nonlinear ML receiver requires more processing time than MMSE, and many machines are not powerful enough
bool ml_rx = true;
if (rel15_ul->nrOfLayers != 2 || rel15_ul->qam_mod_order >= 6) {
ml_rx = false;
}
// Scrambling initialization
int number_dmrs_symbols = 0;
for (int l = rel15_ul->start_symbol_index; l < rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols; l++)
number_dmrs_symbols += ((rel15_ul->ul_dmrs_symb_pos)>>l) & 0x01;
int nb_re_dmrs;
if (rel15_ul->dmrs_config_type == pusch_dmrs_type1)
nb_re_dmrs = 6*rel15_ul->num_dmrs_cdm_grps_no_data;
else
nb_re_dmrs = 4*rel15_ul->num_dmrs_cdm_grps_no_data;
int ad_shift = 0;
if (rel15_ul->nrOfLayers == 1) {
ad_shift = 1 + log2_approx(frame_parms->nb_antennas_rx >> 2);
} else if (ml_rx == false) {
ad_shift = -3; // For 2-layers, we are already doing a bit shift in the nr_ulsch_mmse_2layers() function, so we can use more bits
}
// get how many bit in a slot //
int G = nr_get_G(rel15_ul->rb_size,
rel15_ul->nr_of_symbols,
nb_re_dmrs,
number_dmrs_symbols, // number of dmrs symbols irrespective of single or double symbol dmrs
rel15_ul->qam_mod_order,
rel15_ul->nrOfLayers);
// initialize scrambling sequence //
int16_t s[G+96] __attribute__((aligned(32)));
int num_re_total = 0;
for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) {
uint8_t dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
if (dmrs_symbol_flag == 1) {
if ((rel15_ul->ul_dmrs_symb_pos >> ((symbol + 1) % frame_parms->symbols_per_slot)) & 0x01)
AssertFatal(1==0,"Double DMRS configuration is not yet supported\n");
nr_codeword_unscrambling_init(s, G, 0, rel15_ul->data_scrambling_id, rel15_ul->rnti);
if (gNB->chest_time == 0) // Non averaging time domain channel estimates
pusch_vars->dmrs_symbol = symbol;
// first the computation of channel levels
if (rel15_ul->dmrs_config_type == 0) {
// if no data in dmrs cdm group is 1 only even REs have no data
// if no data in dmrs cdm group is 2 both odd and even REs have no data
nb_re_pusch = rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*6));
}
else {
nb_re_pusch = rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*4));
}
}
else {
nb_re_pusch = rel15_ul->rb_size * NR_NB_SC_PER_RB;
}
int nb_re_pusch = 0, meas_symbol = -1;
for(meas_symbol = rel15_ul->start_symbol_index;
meas_symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols);
meas_symbol++)
if ((nb_re_pusch = get_nb_re_pusch(frame_parms,rel15_ul,meas_symbol)) > 0)
break;
num_re_total += nb_re_pusch;
pusch_vars->ul_valid_re_per_slot[symbol] = nb_re_pusch;
LOG_D(PHY, "symbol %d: nb_re_pusch %d, DMRS symbl used for Chest :%d \n", symbol, nb_re_pusch, pusch_vars->dmrs_symbol);
AssertFatal(nb_re_pusch>0 && meas_symbol>=0,"nb_re_pusch %d cannot be 0 or meas_symbol %d cannot be negative here\n",nb_re_pusch,meas_symbol);
//----------------------------------------------------------
//--------------------- RBs extraction ---------------------
//----------------------------------------------------------
if (nb_re_pusch > 0) {
start_meas(&gNB->ulsch_rbs_extraction_stats);
nr_ulsch_extract_rbs(gNB->common_vars.rxdataF, pusch_vars, slot, symbol, dmrs_symbol_flag, rel15_ul, frame_parms);
stop_meas(&gNB->ulsch_rbs_extraction_stats);
// extract the first dmrs for the channel level computation
// extract the data in the OFDM frame, to the start of the array
int soffset = (slot&3)*frame_parms->symbols_per_slot*frame_parms->ofdm_symbol_size;
if (nb_re_pusch & 7)
nb_re_pusch += 8 - (nb_re_pusch & 7);
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++)
for (int aatx = 0; aatx < rel15_ul->nrOfLayers; aatx++)
nr_ulsch_extract_rbs(gNB->common_vars.rxdataF[aarx],
(c16_t *)pusch_vars->ul_ch_estimates[aatx * frame_parms->nb_antennas_rx + aarx],
(c16_t*)&pusch_vars->rxdataF_ext[aarx][meas_symbol * nb_re_pusch],
(c16_t*)&pusch_vars->ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][meas_symbol * nb_re_pusch],
soffset + meas_symbol * frame_parms->ofdm_symbol_size,
pusch_vars->dmrs_symbol * frame_parms->ofdm_symbol_size,
aarx,
(rel15_ul->ul_dmrs_symb_pos >> meas_symbol) & 0x01,
rel15_ul,
frame_parms);
int avgs = 0;
int avg[frame_parms->nb_antennas_rx*rel15_ul->nrOfLayers];
uint8_t shift_ch_ext = rel15_ul->nrOfLayers > 1 ? log2_approx(max_ch >> 11) : 0;
//----------------------------------------------------------
//--------------------- Channel Scaling --------------------
//----------------------------------------------------------
nr_ulsch_scale_channel(pusch_vars->ul_ch_estimates_ext,
frame_parms,
ulsch,
symbol,
dmrs_symbol_flag,
meas_symbol,
(rel15_ul->ul_dmrs_symb_pos >> meas_symbol) & 0x01,
nb_re_pusch,
rel15_ul->nrOfLayers,
rel15_ul->rb_size,
shift_ch_ext);
if (pusch_vars->cl_done == 0) {
nr_ulsch_channel_level(pusch_vars->ul_ch_estimates_ext,
frame_parms,
avg,
symbol,
nb_re_pusch,
rel15_ul->nrOfLayers,
rel15_ul->rb_size);
meas_symbol, // index of the start symbol
nb_re_pusch, // number of the re in pusch
rel15_ul->nrOfLayers);
avgs = 0;
for (int aatx = 0; aatx < rel15_ul->nrOfLayers; aatx++)
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++)
avgs = cmax(avgs, avg[aatx*frame_parms->nb_antennas_rx+aarx]);
for (aatx=0;aatx<rel15_ul->nrOfLayers;aatx++)
for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++)
avgs = cmax(avgs,avg[aatx*frame_parms->nb_antennas_rx+aarx]);
pusch_vars->log2_maxh = (log2_approx(avgs) >> 1);
pusch_vars->log2_maxh = (log2_approx(avgs) >> 1) + ad_shift;
if (pusch_vars->log2_maxh < 0) {
pusch_vars->log2_maxh = 0;
}
pusch_vars->cl_done = 1;
}
if (rel15_ul->nrOfLayers == 2 && rel15_ul->qam_mod_order >= 6)
pusch_vars->log2_maxh = (log2_approx(avgs) >> 1) - 3; // for MMSE
else if (rel15_ul->nrOfLayers == 1)
pusch_vars->log2_maxh = (log2_approx(avgs) >> 1) + 1 + log2_approx(frame_parms->nb_antennas_rx >> 2);
//----------------------------------------------------------
//--------------------- Channel Compensation ---------------
//----------------------------------------------------------
start_meas(&gNB->ulsch_channel_compensation_stats);
LOG_D(PHY, "Doing channel compensations log2_maxh %d, avgs %d (%d,%d)\n" ,pusch_vars->log2_maxh, avgs,avg[0], avg[1]);
nr_ulsch_channel_compensation(pusch_vars->rxdataF_ext,
pusch_vars->ul_ch_estimates_ext,
pusch_vars->ul_ch_mag0,
pusch_vars->ul_ch_magb0,
pusch_vars->ul_ch_magc0,
pusch_vars->rxdataF_comp,
(rel15_ul->nrOfLayers > 1) ? pusch_vars->rho : NULL,
frame_parms,
symbol,
nb_re_pusch,
dmrs_symbol_flag,
rel15_ul->qam_mod_order,
rel15_ul->nrOfLayers,
rel15_ul->rb_size,
pusch_vars->log2_maxh);
stop_meas(&gNB->ulsch_channel_compensation_stats);
start_meas(&gNB->ulsch_mrc_stats);
nr_ulsch_detection_mrc(frame_parms,
pusch_vars->rxdataF_comp,
pusch_vars->ul_ch_mag0,
pusch_vars->ul_ch_magb0,
pusch_vars->ul_ch_magc0,
(rel15_ul->nrOfLayers > 1) ? pusch_vars->rho : NULL,
rel15_ul->nrOfLayers,
symbol,
rel15_ul->rb_size,
nb_re_pusch);
// Apply MMSE for 2 Tx layers
if (ml_rx == false && rel15_ul->nrOfLayers == 2) {
nr_ulsch_mmse_2layers(frame_parms,
pusch_vars->rxdataF_comp,
pusch_vars->ul_ch_mag0,
pusch_vars->ul_ch_magb0,
pusch_vars->ul_ch_magc0,
pusch_vars->ul_ch_estimates_ext,
rel15_ul->rb_size,
frame_parms->nb_antennas_rx,
rel15_ul->qam_mod_order,
pusch_vars->log2_maxh,
symbol,
nb_re_pusch,
nvar);
}
if (pusch_vars->log2_maxh < 0)
pusch_vars->log2_maxh = 0;
stop_meas(&gNB->ulsch_mrc_stats);
stop_meas(&gNB->rx_pusch_init_stats);
if (rel15_ul->transform_precoding == transformPrecoder_enabled) {
// For odd number of resource blocks need byte alignment to multiple of 8
int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
start_meas(&gNB->rx_pusch_symbol_processing_stats);
int numSymbols = gNB->num_pusch_symbols_per_thread;
// perform IDFT operation on the compensated rxdata if transform precoding is enabled
nr_idft(&pusch_vars->rxdataF_comp[0][symbol * nb_re_pusch2], nb_re_pusch);
LOG_D(PHY,"Transform precoding being done on data- symbol: %d, nb_re_pusch: %d\n", symbol, nb_re_pusch);
}
for(uint8_t symbol = rel15_ul->start_symbol_index;
symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols);
symbol += numSymbols)
{
int total_res = 0;
for (int s = 0; s < numSymbols;s++) {
pusch_vars->ul_valid_re_per_slot[symbol+s] = get_nb_re_pusch(frame_parms,rel15_ul,symbol+s);
pusch_vars->llr_offset[symbol+s] = ((symbol+s) == rel15_ul->start_symbol_index) ?
0 :
pusch_vars->llr_offset[symbol+s-1] + pusch_vars->ul_valid_re_per_slot[symbol+s-1] * rel15_ul->qam_mod_order;
total_res+=pusch_vars->ul_valid_re_per_slot[symbol+s];
}
if (total_res > 0) {
union puschSymbolReqUnion id = {.s={ulsch_id,frame,slot,0}};
id.p=1+symbol;
notifiedFIFO_elt_t *req = newNotifiedFIFO_elt(sizeof(puschSymbolProc_t), id.p, &gNB->respPuschSymb, &nr_pusch_symbol_processing); // create a job for Tpool
puschSymbolProc_t *rdata = (puschSymbolProc_t*)NotifiedFifoData(req); // data for the job
rdata->gNB = gNB;
rdata->frame_parms = frame_parms;
rdata->rel15_ul = rel15_ul;
rdata->slot = slot;
rdata->startSymbol = symbol;
rdata->numSymbols = numSymbols;
rdata->ulsch_id = ulsch_id;
rdata->llr = pusch_vars->llr;
rdata->llr_layers = pusch_vars->llr_layers;
rdata->s = &s[pusch_vars->llr_offset[symbol]*rel15_ul->nrOfLayers];
rdata->nvar = nvar;
//----------------------------------------------------------
//--------------------- PTRS Processing --------------------
//----------------------------------------------------------
/* In case PTRS is enabled then LLR will be calculated after PTRS symbols are processed *
* otherwise LLR are calculated for each symbol based upon DMRS channel estimates only. */
if (rel15_ul->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
start_meas(&gNB->ulsch_ptrs_processing_stats);
nr_pusch_ptrs_processing(gNB,
frame_parms,
rel15_ul,
ulsch_id,
slot,
symbol,
nb_re_pusch);
stop_meas(&gNB->ulsch_ptrs_processing_stats);
/* Subtract total PTRS RE's in the symbol from PUSCH RE's */
pusch_vars->ul_valid_re_per_slot[symbol] -= pusch_vars->ptrs_re_per_slot;
}
/*---------------------------------------------------------------------------------------------------- */
/*-------------------- LLRs computation -------------------------------------------------------------*/
/*-----------------------------------------------------------------------------------------------------*/
start_meas(&gNB->ulsch_llr_stats);
if (ml_rx == false || rel15_ul->nrOfLayers == 1) {
for (aatx=0; aatx < rel15_ul->nrOfLayers; aatx++) {
nr_ulsch_compute_llr(&pusch_vars->rxdataF_comp[aatx * frame_parms->nb_antennas_rx][symbol * (off + rel15_ul->rb_size * NR_NB_SC_PER_RB)],
pusch_vars->ul_ch_mag0[aatx * frame_parms->nb_antennas_rx],
pusch_vars->ul_ch_magb0[aatx * frame_parms->nb_antennas_rx],
pusch_vars->ul_ch_magc0[aatx * frame_parms->nb_antennas_rx],
&pusch_vars->llr_layers[aatx][rxdataF_ext_offset * rel15_ul->qam_mod_order],
rel15_ul->rb_size,
pusch_vars->ul_valid_re_per_slot[symbol],
symbol,
rel15_ul->qam_mod_order);
}
nr_pusch_symbol_processing(rdata);
} else {
nr_ulsch_compute_ML_llr(pusch_vars->rxdataF_comp,
pusch_vars->ul_ch_mag0,
pusch_vars->rho,
pusch_vars->llr_layers,
frame_parms->nb_antennas_rx,
rel15_ul->rb_size,
nb_re_pusch,
symbol,
rxdataF_ext_offset,
rel15_ul->qam_mod_order);
if (rel15_ul->qam_mod_order == 2) {
nr_ulsch_shift_llr(pusch_vars->llr_layers, nb_re_pusch, rxdataF_ext_offset, rel15_ul->qam_mod_order, 4);
pushTpool(&gNB->threadPool, req);
gNB->nbSymb++;
}
#ifdef ML_DEBUG
c16_t *llr_layers0 = (c16_t *)&pusch_vars->llr_layers[0][rxdataF_ext_offset * rel15_ul->qam_mod_order];
c16_t *llr_layers1 = (c16_t *)&pusch_vars->llr_layers[1][rxdataF_ext_offset * rel15_ul->qam_mod_order];
printf("===============================\n");
printf("AFTER nr_ulsch_compute_ML_llr()\n");
printf("===============================\n");
for (int k = 0; k < nb_re_pusch; k++) {
printf("[%3i] llr_layers0 = (%6i, %6i), llr_layers1 = (%6i, %6i)\n",
k, llr_layers0[k].r, llr_layers0[k].i, llr_layers1[k].r, llr_layers1[k].i);
}
printf("\n");
#endif
}
stop_meas(&gNB->ulsch_llr_stats);
rxdataF_ext_offset += pusch_vars->ul_valid_re_per_slot[symbol];
LOG_D(PHY,"%d.%d Added symbol %d (count %d) to process, in pipe\n",frame,slot,symbol,gNB->nbSymb);
}
} // symbol loop
if (!(frame % 128)) {
int num_llr = num_re_total*rel15_ul->qam_mod_order;
GnbScopeUpdate(gNB, puschLLRe, num_llr);
GnbScopeUpdate(gNB, puschIQe, num_re_total);
while (gNB->nbSymb > 0) {
notifiedFIFO_elt_t *req = pullTpool(&gNB->respPuschSymb, &gNB->threadPool);
gNB->nbSymb--;
delNotifiedFIFO_elt(req);
}
stop_meas(&gNB->rx_pusch_symbol_processing_stats);
return 0;
}
......@@ -30,6 +30,7 @@
* \warning
*/
#include "PHY/defs_gNB.h"
#include "PHY/defs_nr_common.h"
#include "PHY/sse_intrin.h"
#include "PHY/impl_defs_top.h"
......@@ -46,16 +47,9 @@ void nr_ulsch_qpsk_llr(int32_t *rxdataF_comp,
{
c16_t *rxF = (c16_t *)rxdataF_comp;
c16_t *llr32 = (c16_t *)ulsch_llr;
if (!llr32) {
LOG_E(PHY,"nr_ulsch_qpsk_llr: llr is null, symbol %d, llr32 = %p\n",symbol, llr32);
}
for (int i = 0; i < nb_re; i++) {
//*llr32 = *rxF;
llr32->r = rxF->r >> 3;
llr32->i = rxF->i >> 3;
rxF++;
llr32++;
llr32[i].r = rxF[i].r >> 3;
llr32[i].i = rxF[i].i >> 3;
}
}
......@@ -66,72 +60,34 @@ void nr_ulsch_qpsk_llr(int32_t *rxdataF_comp,
void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
int32_t *ul_ch_mag,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol)
{
simde__m256i *rxF = (simde__m256i*)rxdataF_comp;
simde__m256i *ch_mag;
simde__m256i llr256[2];
register simde__m256i xmm0;
uint32_t *llr32;
int i;
int off = ((nb_rb&1) == 1)? 4:0;
llr32 = (uint32_t*)ulsch_llr;
ch_mag = (simde__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
unsigned char len_mod8 = nb_re&7;
nb_re >>= 3; // length in quad words (4 REs)
nb_re += (len_mod8 == 0 ? 0 : 1);
for (i=0; i<nb_re; i++) {
xmm0 = simde_mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm256_subs_epi16(ch_mag[i],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
llr256[0] = simde_mm256_unpacklo_epi32(rxF[i],xmm0); // llr128[0] contains the llrs of the 1st,2nd,5th and 6th REs
llr256[1] = simde_mm256_unpackhi_epi32(rxF[i],xmm0); // llr128[1] contains the llrs of the 3rd, 4th, 7th and 8th REs
// 1st RE
llr32[0] = simde_mm256_extract_epi32(llr256[0],0); // llr32[0] low 16 bits-> y_R , high 16 bits-> y_I
llr32[1] = simde_mm256_extract_epi32(llr256[0],1); // llr32[1] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 2nd RE
llr32[2] = simde_mm256_extract_epi32(llr256[0],2); // llr32[2] low 16 bits-> y_R , high 16 bits-> y_I
llr32[3] = simde_mm256_extract_epi32(llr256[0],3); // llr32[3] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 3rd RE
llr32[4] = simde_mm256_extract_epi32(llr256[1],0); // llr32[4] low 16 bits-> y_R , high 16 bits-> y_I
llr32[5] = simde_mm256_extract_epi32(llr256[1],1); // llr32[5] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 4th RE
llr32[6] = simde_mm256_extract_epi32(llr256[1],2); // llr32[6] low 16 bits-> y_R , high 16 bits-> y_I
llr32[7] = simde_mm256_extract_epi32(llr256[1],3); // llr32[7] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 5th RE
llr32[8] = simde_mm256_extract_epi32(llr256[0],4); // llr32[8] low 16 bits-> y_R , high 16 bits-> y_I
llr32[9] = simde_mm256_extract_epi32(llr256[0],5); // llr32[9] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 6th RE
llr32[10] = simde_mm256_extract_epi32(llr256[0],6); // llr32[10] low 16 bits-> y_R , high 16 bits-> y_I
llr32[11] = simde_mm256_extract_epi32(llr256[0],7); // llr32[11] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 7th RE
llr32[12] = simde_mm256_extract_epi32(llr256[1],4); // llr32[12] low 16 bits-> y_R , high 16 bits-> y_I
llr32[13] = simde_mm256_extract_epi32(llr256[1],5); // llr32[13] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 8th RE
llr32[14] = simde_mm256_extract_epi32(llr256[1],6); // llr32[14] low 16 bits-> y_R , high 16 bits-> y_I
llr32[15] = simde_mm256_extract_epi32(llr256[1],7); // llr32[15] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
llr32+=16;
simde__m256i *rxF_256 = (simde__m256i*)rxdataF_comp;
simde__m256i *ch_mag = (simde__m256i*)ul_ch_mag;
int64_t *llr_64 = (int64_t*)ulsch_llr;
simde__m256i xmm0, xmm1, xmm2;
for (int i = 0; i < ((nb_re + 7) >> 3); i++) {
xmm0 = simde_mm256_abs_epi16(rxF_256[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm256_subs_epi16(ch_mag[i], xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
xmm1 = simde_mm256_unpacklo_epi32(rxF_256[i], xmm0);
xmm2 = simde_mm256_unpackhi_epi32(rxF_256[i], xmm0);
// xmm1 |1st 2ed 3rd 4th 9th 10th 13rd 14th|
// xmm2 |5th 6th 7th 8th 11st 12ed 15th 16th|
*llr_64++ = simde_mm256_extract_epi64(xmm1, 0);
*llr_64++ = simde_mm256_extract_epi64(xmm1, 1);
*llr_64++ = simde_mm256_extract_epi64(xmm2, 0);
*llr_64++ = simde_mm256_extract_epi64(xmm2, 1);
*llr_64++ = simde_mm256_extract_epi64(xmm1, 2);
*llr_64++ = simde_mm256_extract_epi64(xmm1, 3);
*llr_64++ = simde_mm256_extract_epi64(xmm2, 2);
*llr_64++ = simde_mm256_extract_epi64(xmm2, 3);
}
simde_mm_empty();
simde_m_empty();
}
//----------------------------------------------------------------------------------------------
......@@ -142,116 +98,59 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
int32_t *ul_ch_mag,
int32_t *ul_ch_magb,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol)
{
int off = ((nb_rb&1) == 1)? 4:0;
simde__m256i *rxF = (simde__m256i*)rxdataF_comp;
simde__m256i *ch_mag,*ch_magb;
register simde__m256i xmm0,xmm1,xmm2;
int i;
simde__m256i xmm0, xmm1, xmm2;
ch_mag = (simde__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
ch_magb = (simde__m256i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
simde__m256i *ch_maga = (simde__m256i*)ul_ch_mag;
simde__m256i *ch_magb = (simde__m256i*)ul_ch_magb;
int len_mod8 = nb_re&7;
nb_re = nb_re>>3; // length in quad words (4 REs)
nb_re += ((len_mod8 == 0) ? 0 : 1);
int32_t *llr_32 = (int32_t *)ulsch_llr;
for (i=0; i<nb_re; i++) {
for (int i = 0; i < ((nb_re + 7) >> 3); i++) {
xmm0 = rxF[i];
xmm1 = simde_mm256_abs_epi16(xmm0);
xmm1 = simde_mm256_subs_epi16(ch_mag[i],xmm1);
xmm1 = simde_mm256_abs_epi16(xmm0); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm1 = simde_mm256_subs_epi16(ch_maga[i], xmm1); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
xmm2 = simde_mm256_abs_epi16(xmm1);
xmm2 = simde_mm256_subs_epi16(ch_magb[i],xmm2);
// ---------------------------------------
// 1st RE
// ---------------------------------------
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,0);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,1);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,0);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,1);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,0);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,1);
// ---------------------------------------
ulsch_llr+=6;
// ---------------------------------------
// 2nd RE
// ---------------------------------------
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,2);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,3);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,2);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,3);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,2);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,3);
// ---------------------------------------
ulsch_llr+=6;
// ---------------------------------------
// 3rd RE
// ---------------------------------------
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,4);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,5);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,4);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,5);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,4);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,5);
// ---------------------------------------
ulsch_llr+=6;
// ---------------------------------------
// 4th RE
// ---------------------------------------
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,6);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,7);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,6);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,7);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,6);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,7);
// ---------------------------------------
ulsch_llr+=6;
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,8);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,9);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,8);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,9);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,8);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,9);
ulsch_llr[6] = simde_mm256_extract_epi16(xmm0,10);
ulsch_llr[7] = simde_mm256_extract_epi16(xmm0,11);
ulsch_llr[8] = simde_mm256_extract_epi16(xmm1,10);
ulsch_llr[9] = simde_mm256_extract_epi16(xmm1,11);
ulsch_llr[10] = simde_mm256_extract_epi16(xmm2,10);
ulsch_llr[11] = simde_mm256_extract_epi16(xmm2,11);
ulsch_llr[12] = simde_mm256_extract_epi16(xmm0,12);
ulsch_llr[13] = simde_mm256_extract_epi16(xmm0,13);
ulsch_llr[14] = simde_mm256_extract_epi16(xmm1,12);
ulsch_llr[15] = simde_mm256_extract_epi16(xmm1,13);
ulsch_llr[16] = simde_mm256_extract_epi16(xmm2,12);
ulsch_llr[17] = simde_mm256_extract_epi16(xmm2,13);
ulsch_llr[18] = simde_mm256_extract_epi16(xmm0,14);
ulsch_llr[19] = simde_mm256_extract_epi16(xmm0,15);
ulsch_llr[20] = simde_mm256_extract_epi16(xmm1,14);
ulsch_llr[21] = simde_mm256_extract_epi16(xmm1,15);
ulsch_llr[22] = simde_mm256_extract_epi16(xmm2,14);
ulsch_llr[23] = simde_mm256_extract_epi16(xmm2,15);
ulsch_llr+=24;
xmm2 = simde_mm256_subs_epi16(ch_magb[i], xmm2);
// xmm0 |1st 4th 7th 10th 13th 16th 19th 22ed|
// xmm1 |2ed 5th 8th 11th 14th 17th 20th 23rd|
// xmm2 |3rd 6th 9th 12th 15th 18th 21st 24th|
*llr_32++ = simde_mm256_extract_epi32(xmm0,0);
*llr_32++ = simde_mm256_extract_epi32(xmm1,0);
*llr_32++ = simde_mm256_extract_epi32(xmm2,0);
*llr_32++ = simde_mm256_extract_epi32(xmm0,1);
*llr_32++ = simde_mm256_extract_epi32(xmm1,1);
*llr_32++ = simde_mm256_extract_epi32(xmm2,1);
*llr_32++ = simde_mm256_extract_epi32(xmm0,2);
*llr_32++ = simde_mm256_extract_epi32(xmm1,2);
*llr_32++ = simde_mm256_extract_epi32(xmm2,2);
*llr_32++ = simde_mm256_extract_epi32(xmm0,3);
*llr_32++ = simde_mm256_extract_epi32(xmm1,3);
*llr_32++ = simde_mm256_extract_epi32(xmm2,3);
*llr_32++ = simde_mm256_extract_epi32(xmm0,4);
*llr_32++ = simde_mm256_extract_epi32(xmm1,4);
*llr_32++ = simde_mm256_extract_epi32(xmm2,4);
*llr_32++ = simde_mm256_extract_epi32(xmm0,5);
*llr_32++ = simde_mm256_extract_epi32(xmm1,5);
*llr_32++ = simde_mm256_extract_epi32(xmm2,5);
*llr_32++ = simde_mm256_extract_epi32(xmm0,6);
*llr_32++ = simde_mm256_extract_epi32(xmm1,6);
*llr_32++ = simde_mm256_extract_epi32(xmm2,6);
*llr_32++ = simde_mm256_extract_epi32(xmm0,7);
*llr_32++ = simde_mm256_extract_epi32(xmm1,7);
*llr_32++ = simde_mm256_extract_epi32(xmm2,7);
}
simde_mm_empty();
simde_m_empty();
}
void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
......@@ -259,115 +158,50 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
int32_t *ul_ch_magb,
int32_t *ul_ch_magc,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol)
{
int off = ((nb_rb&1) == 1)? 4:0;
simde__m256i *rxF = (simde__m256i*)rxdataF_comp;
simde__m256i *ch_mag,*ch_magb,*ch_magc;
register simde__m256i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
simde__m256i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
simde__m256i *llr256=(simde__m256i*)ulsch_llr;
ch_mag = (simde__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
ch_magb = (simde__m256i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
ch_magc = (simde__m256i*)&ul_ch_magc[(symbol*(off+(nb_rb*12)))];
int len_mod8 = nb_re&7;
int nb_re256 = nb_re>>3; // length in 256-bit words (8 REs)
simde__m256i* ch_maga = (simde__m256i*)ul_ch_mag;
simde__m256i* ch_magb = (simde__m256i*)ul_ch_magb;
simde__m256i* ch_magc = (simde__m256i*)ul_ch_magc;
for (int i=0; i<nb_re256; i++) {
for (int i = 0; i < ((nb_re + 7) >> 3); i++) {
xmm0 = simde_mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm256_subs_epi16(ch_mag[i],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
xmm0 = simde_mm256_subs_epi16(ch_maga[i], xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 16 LLRs
xmm1 = simde_mm256_abs_epi16(xmm0);
xmm1 = simde_mm256_subs_epi16(ch_magb[i],xmm1); // contains 16 LLRs
xmm1 = simde_mm256_subs_epi16(ch_magb[i], xmm1); // contains 16 LLRs
xmm2 = simde_mm256_abs_epi16(xmm1);
xmm2 = simde_mm256_subs_epi16(ch_magc[i],xmm2); // contains 16 LLRs
xmm2 = simde_mm256_subs_epi16(ch_magc[i], xmm2); // contains 16 LLRs
// rxF[i] A0 A1 A2 A3 A4 A5 A6 A7 bits 7,6
// xmm0 B0 B1 B2 B3 B4 B5 B6 B7 bits 5,4
// xmm1 C0 C1 C2 C3 C4 C5 C6 C7 bits 3,2
// xmm2 D0 D1 D2 D3 D4 D5 D6 D7 bits 1,0
xmm3 = simde_mm256_unpacklo_epi32(rxF[i],xmm0); // A0 B0 A1 B1 A4 B4 A5 B5
xmm4 = simde_mm256_unpackhi_epi32(rxF[i],xmm0); // A2 B2 A3 B3 A6 B6 A7 B7
xmm5 = simde_mm256_unpacklo_epi32(xmm1,xmm2); // C0 D0 C1 D1 C4 D4 C5 D5
xmm6 = simde_mm256_unpackhi_epi32(xmm1,xmm2); // C2 D2 C3 D3 C6 D6 C7 D7
xmm0 = simde_mm256_unpacklo_epi64(xmm3,xmm5); // A0 B0 C0 D0 A4 B4 C4 D4
xmm1 = simde_mm256_unpackhi_epi64(xmm3,xmm5); // A1 B1 C1 D1 A5 B5 C5 D5
xmm2 = simde_mm256_unpacklo_epi64(xmm4,xmm6); // A2 B2 C2 D2 A6 B6 C6 D6
xmm3 = simde_mm256_unpackhi_epi64(xmm4,xmm6); // A3 B3 C3 D3 A7 B7 C7 D7
llr256[0] = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x20); // A0 B0 C0 D0 A1 B1 C1 D1
llr256[1] = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x20); // A2 B2 C2 D2 A3 B3 C3 D3
llr256[2] = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x31); // A4 B4 C4 D4 A5 B5 C5 D5
llr256[3] = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x31); // A6 B6 C6 D6 A7 B7 C7 D7
llr256+=4;
}
simde__m128i *llr128 = (simde__m128i*)llr256;
if (len_mod8 >= 4) {
int nb_re128 = nb_re>>2;
simde__m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
simde__m128i *rxF = (simde__m128i*)rxdataF_comp;
simde__m128i *ch_mag = (simde__m128i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
simde__m128i *ch_magb = (simde__m128i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
simde__m128i *ch_magc = (simde__m128i*)&ul_ch_magc[(symbol*(off+(nb_rb*12)))];
xmm0 = simde_mm_abs_epi16(rxF[nb_re128-1]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm_subs_epi16(ch_mag[nb_re128-1],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 8 LLRs
xmm1 = simde_mm_abs_epi16(xmm0);
xmm1 = simde_mm_subs_epi16(ch_magb[nb_re128-1],xmm1); // contains 8 LLRs
xmm2 = simde_mm_abs_epi16(xmm1);
xmm2 = simde_mm_subs_epi16(ch_magc[nb_re128-1],xmm2); // contains 8 LLRs
// rxF[i] A0 A1 A2 A3
// xmm0 B0 B1 B2 B3
// xmm1 C0 C1 C2 C3
// xmm2 D0 D1 D2 D3
xmm3 = simde_mm_unpacklo_epi32(rxF[nb_re128-1],xmm0); // A0 B0 A1 B1
xmm4 = simde_mm_unpackhi_epi32(rxF[nb_re128-1],xmm0); // A2 B2 A3 B3
xmm5 = simde_mm_unpacklo_epi32(xmm1,xmm2); // C0 D0 C1 D1
xmm6 = simde_mm_unpackhi_epi32(xmm1,xmm2); // C2 D2 C3 D3
llr128[0] = simde_mm_unpacklo_epi64(xmm3,xmm5); // A0 B0 C0 D0
llr128[1] = simde_mm_unpackhi_epi64(xmm3,xmm5); // A1 B1 C1 D1
llr128[2] = simde_mm_unpacklo_epi64(xmm4,xmm6); // A2 B2 C2 D2
llr128[3] = simde_mm_unpackhi_epi64(xmm4,xmm6); // A3 B3 C3 D3
llr128+=4;
}
if (len_mod8 == 6) {
int nb_re64 = nb_re>>1;
simde__m64 *llr64 = (simde__m64 *)llr128;
simde__m64 xmm0,xmm1,xmm2;
simde__m64 *rxF = (simde__m64*)rxdataF_comp;
simde__m64 *ch_mag = (simde__m64*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
simde__m64 *ch_magb = (simde__m64*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
simde__m64 *ch_magc = (simde__m64*)&ul_ch_magc[(symbol*(off+(nb_rb*12)))];
xmm0 = simde_mm_abs_pi16(rxF[nb_re64-1]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm_subs_pi16(ch_mag[nb_re-1],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 4 LLRs
xmm1 = simde_mm_abs_pi16(xmm0);
xmm1 = simde_mm_subs_pi16(ch_magb[nb_re64-1],xmm1); // contains 4 LLRs
xmm2 = simde_mm_abs_pi16(xmm1);
xmm2 = simde_mm_subs_pi16(ch_magc[nb_re64-1],xmm2); // contains 4 LLRs
// rxF[i] A0 A1
// xmm0 B0 B1
// xmm1 C0 C1
// xmm2 D0 D1
llr64[0] = simde_m_punpckldq(rxF[nb_re64-1],xmm0); // A0 B0
llr64[2] = simde_m_punpckhdq(rxF[nb_re64-1],xmm0); // A1 B1
llr64[1] = simde_m_punpckldq(xmm1,xmm2); // C0 D0
llr64[3] = simde_m_punpckhdq(xmm1,xmm2); // C1 D1
xmm3 = simde_mm256_unpacklo_epi32(rxF[i], xmm0); // A0 B0 A1 B1 A4 B4 A5 B5
xmm4 = simde_mm256_unpackhi_epi32(rxF[i], xmm0); // A2 B2 A3 B3 A6 B6 A7 B7
xmm5 = simde_mm256_unpacklo_epi32(xmm1, xmm2); // C0 D0 C1 D1 C4 D4 C5 D5
xmm6 = simde_mm256_unpackhi_epi32(xmm1, xmm2); // C2 D2 C3 D3 C6 D6 C7 D7
xmm0 = simde_mm256_unpacklo_epi64(xmm3, xmm5); // A0 B0 C0 D0 A4 B4 C4 D4
xmm1 = simde_mm256_unpackhi_epi64(xmm3, xmm5); // A1 B1 C1 D1 A5 B5 C5 D5
xmm2 = simde_mm256_unpacklo_epi64(xmm4, xmm6); // A2 B2 C2 D2 A6 B6 C6 D6
xmm3 = simde_mm256_unpackhi_epi64(xmm4, xmm6); // A3 B3 C3 D3 A7 B7 C7 D7
*llr256++ = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x20); // A0 B0 C0 D0 A1 B1 C1 D1
*llr256++ = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x20); // A2 B2 C2 D2 A3 B3 C3 D3
*llr256++ = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x31); // A4 B4 C4 D4 A5 B5 C5 D5
*llr256++ = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x31); // A6 B6 C6 D6 A7 B7 C7 D7
}
}
void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
int32_t *ul_ch_mag,
int32_t *ul_ch_magb,
int32_t *ul_ch_magc,
int16_t *ulsch_llr,
uint32_t nb_rb,
uint32_t nb_re,
uint8_t symbol,
uint8_t mod_order)
......@@ -383,7 +217,6 @@ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
nr_ulsch_16qam_llr(rxdataF_comp,
ul_ch_mag,
ulsch_llr,
nb_rb,
nb_re,
symbol);
break;
......@@ -392,7 +225,6 @@ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
ul_ch_mag,
ul_ch_magb,
ulsch_llr,
nb_rb,
nb_re,
symbol);
break;
......@@ -402,7 +234,6 @@ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
ul_ch_magb,
ul_ch_magc,
ulsch_llr,
nb_rb,
nb_re,
symbol);
break;
......@@ -3786,46 +3617,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
simde_m_empty();
}
void nr_ulsch_compute_ML_llr(int32_t **rxdataF_comp,
int32_t **ul_ch_mag,
int32_t ***rho,
int16_t **llr_layers,
uint8_t nb_antennas_rx,
uint32_t rb_size,
uint32_t nb_re,
uint8_t symbol,
uint32_t rxdataF_ext_offset,
uint8_t mod_order)
{
int off = ((rb_size & 1) == 1) ? 4 : 0;
c16_t *rxdataF_comp0 = (c16_t *)&rxdataF_comp[0][symbol * (off + (rb_size * NR_NB_SC_PER_RB))];
c16_t *rxdataF_comp1 = (c16_t *)&rxdataF_comp[nb_antennas_rx][symbol * (off + (rb_size * NR_NB_SC_PER_RB))];
c16_t *ul_ch_mag0 = (c16_t *)&ul_ch_mag[0][symbol * (off + (rb_size * NR_NB_SC_PER_RB))];
c16_t *ul_ch_mag1 = (c16_t *)&ul_ch_mag[nb_antennas_rx][symbol * (off + (rb_size * NR_NB_SC_PER_RB))];
c16_t *llr_layers0 = (c16_t *)&llr_layers[0][rxdataF_ext_offset * mod_order];
c16_t *llr_layers1 = (c16_t *)&llr_layers[1][rxdataF_ext_offset * mod_order];
c16_t *rho0 = (c16_t *)&rho[0][1][symbol * (off + (rb_size * NR_NB_SC_PER_RB))];
c16_t *rho1 = (c16_t *)&rho[0][2][symbol * (off + (rb_size * NR_NB_SC_PER_RB))];
switch (mod_order) {
case 2:
nr_ulsch_qpsk_qpsk(rxdataF_comp0, rxdataF_comp1, llr_layers0, rho0, nb_re);
nr_ulsch_qpsk_qpsk(rxdataF_comp1, rxdataF_comp0, llr_layers1, rho1, nb_re);
break;
case 4:
nr_ulsch_qam16_qam16(rxdataF_comp0, rxdataF_comp1, ul_ch_mag0, ul_ch_mag1, llr_layers0, rho0, nb_re);
nr_ulsch_qam16_qam16(rxdataF_comp1, rxdataF_comp0, ul_ch_mag1, ul_ch_mag0, llr_layers1, rho1, nb_re);
break;
case 6:
nr_ulsch_qam64_qam64(rxdataF_comp0, rxdataF_comp1, ul_ch_mag0, ul_ch_mag1, llr_layers0, rho0, nb_re);
nr_ulsch_qam64_qam64(rxdataF_comp1, rxdataF_comp0, ul_ch_mag1, ul_ch_mag0, llr_layers1, rho1, nb_re);
break;
default:
AssertFatal(1 == 0, "nr_ulsch_compute_llr: invalid Qm value, symbol = %d, Qm = %d\n", symbol, mod_order);
}
}
void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rxdataF_ext_offset, uint8_t mod_order, int shift)
static void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rxdataF_ext_offset, uint8_t mod_order, int shift)
{
simde__m128i *llr_layers0 = (simde__m128i *)&llr_layers[0][rxdataF_ext_offset * mod_order];
simde__m128i *llr_layers1 = (simde__m128i *)&llr_layers[1][rxdataF_ext_offset * mod_order];
......@@ -3848,3 +3640,35 @@ void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rxdataF_e
llr_layers1[i] = simde_mm_srai_epi16(llr_layers1[i], shift);
}
}
void nr_ulsch_compute_ML_llr(NR_gNB_PUSCH *pusch_vars,
uint32_t symbol,
c16_t* rxdataF_comp0,
c16_t* rxdataF_comp1,
c16_t* ul_ch_mag0,
c16_t* ul_ch_mag1,
c16_t* llr_layers0,
c16_t* llr_layers1,
c16_t* rho0,
c16_t* rho1,
uint32_t nb_re,
uint8_t mod_order)
{
switch (mod_order) {
case 2:
nr_ulsch_qpsk_qpsk(rxdataF_comp0, rxdataF_comp1, llr_layers0, rho0, nb_re);
nr_ulsch_qpsk_qpsk(rxdataF_comp1, rxdataF_comp0, llr_layers1, rho1, nb_re);
nr_ulsch_shift_llr(pusch_vars->llr_layers, nb_re, pusch_vars->llr_offset[symbol] >> 1, 2, 4);
break;
case 4:
nr_ulsch_qam16_qam16(rxdataF_comp0, rxdataF_comp1, ul_ch_mag0, ul_ch_mag1, llr_layers0, rho0, nb_re);
nr_ulsch_qam16_qam16(rxdataF_comp1, rxdataF_comp0, ul_ch_mag1, ul_ch_mag0, llr_layers1, rho1, nb_re);
break;
case 6:
nr_ulsch_qam64_qam64(rxdataF_comp0, rxdataF_comp1, ul_ch_mag0, ul_ch_mag1, llr_layers0, rho0, nb_re);
nr_ulsch_qam64_qam64(rxdataF_comp1, rxdataF_comp0, ul_ch_mag1, ul_ch_mag0, llr_layers1, rho1, nb_re);
break;
default:
AssertFatal(1 == 0, "nr_ulsch_compute_llr: invalid Qm value, Qm = %d\n", mod_order);
}
}
......@@ -409,8 +409,8 @@ typedef struct {
/// \brief Total RE count after DMRS/PTRS RE's are extracted from respective symbol.
/// - first index: ? [0...14] smybol per slot
int16_t *ul_valid_re_per_slot;
/// flag to verify if channel level computation is done
uint8_t cl_done;
/// \brief offset for llr corresponding to each symbol
int llr_offset[14];
/// flag to indicate DTX on reception
int DTX;
} NR_gNB_PUSCH;
......@@ -728,18 +728,14 @@ typedef struct PHY_VARS_gNB_s {
time_stats_t dlsch_segmentation_stats;
time_stats_t rx_pusch_stats;
time_stats_t rx_pusch_init_stats;
time_stats_t rx_pusch_symbol_processing_stats;
time_stats_t ul_indication_stats;
time_stats_t schedule_response_stats;
time_stats_t ulsch_decoding_stats;
time_stats_t ulsch_rate_unmatching_stats;
time_stats_t ulsch_ldpc_decoding_stats;
time_stats_t ulsch_deinterleaving_stats;
time_stats_t ulsch_unscrambling_stats;
time_stats_t ulsch_channel_estimation_stats;
time_stats_t ulsch_ptrs_processing_stats;
time_stats_t ulsch_channel_compensation_stats;
time_stats_t ulsch_rbs_extraction_stats;
time_stats_t ulsch_mrc_stats;
time_stats_t ulsch_llr_stats;
time_stats_t rx_srs_stats;
time_stats_t generate_srs_stats;
......@@ -754,6 +750,7 @@ typedef struct PHY_VARS_gNB_s {
time_stats_t rx_dft_stats;
time_stats_t ulsch_freq_offset_estimation_stats;
*/
notifiedFIFO_t respPuschSymb;
notifiedFIFO_t respDecode;
notifiedFIFO_t resp_L1;
notifiedFIFO_t L1_tx_free;
......@@ -761,6 +758,8 @@ typedef struct PHY_VARS_gNB_s {
notifiedFIFO_t L1_tx_out;
notifiedFIFO_t resp_RU_tx;
tpool_t threadPool;
int nbSymb;
int num_pusch_symbols_per_thread;
pthread_t L1_rx_thread;
int L1_rx_thread_core;
pthread_t L1_tx_thread;
......@@ -771,6 +770,32 @@ typedef struct PHY_VARS_gNB_s {
rt_L1_profiling_t rt_L1_profiling;
} PHY_VARS_gNB;
typedef struct puschSymbolProc_s {
PHY_VARS_gNB *gNB;
NR_DL_FRAME_PARMS *frame_parms;
nfapi_nr_pusch_pdu_t *rel15_ul;
int ulsch_id;
int slot;
int startSymbol;
int numSymbols;
int16_t *llr;
int16_t **llr_layers;
int16_t *s;
uint32_t nvar;
} puschSymbolProc_t;
struct puschSymbolReqId {
uint16_t ulsch_id;
uint16_t frame;
uint8_t slot;
uint16_t spare;
} __attribute__((packed));
union puschSymbolReqUnion {
struct puschSymbolReqId s;
uint64_t p;
};
typedef struct LDPCDecode_s {
PHY_VARS_gNB *gNB;
NR_UL_gNB_HARQ_t *ulsch_harq;
......
......@@ -398,18 +398,6 @@ static int nr_ulsch_procedures(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx, int
pusch_pdu->qam_mod_order,
pusch_pdu->nrOfLayers);
nr_ulsch_layer_demapping(gNB->pusch_vars[ULSCH_id].llr,
pusch_pdu->nrOfLayers,
pusch_pdu->qam_mod_order,
G,
gNB->pusch_vars[ULSCH_id].llr_layers);
//----------------------------------------------------------
//------------------- ULSCH unscrambling -------------------
//----------------------------------------------------------
start_meas(&gNB->ulsch_unscrambling_stats);
nr_ulsch_unscrambling(gNB->pusch_vars[ULSCH_id].llr, G, pusch_pdu->data_scrambling_id, pusch_pdu->rnti);
stop_meas(&gNB->ulsch_unscrambling_stats);
//----------------------------------------------------------
//--------------------- ULSCH decoding ---------------------
//----------------------------------------------------------
......@@ -417,7 +405,7 @@ static int nr_ulsch_procedures(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx, int
start_meas(&gNB->ulsch_decoding_stats);
int nbDecode =
nr_ulsch_decoding(gNB, ULSCH_id, gNB->pusch_vars[ULSCH_id].llr, frame_parms, pusch_pdu, frame_rx, slot_rx, harq_pid, G);
stop_meas(&gNB->ulsch_decoding_stats);
return nbDecode;
}
......@@ -899,7 +887,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RX_PUSCH, 1);
start_meas(&gNB->rx_pusch_stats);
nr_rx_pusch(gNB, ULSCH_id, frame_rx, slot_rx, ulsch->harq_pid);
nr_rx_pusch_tp(gNB, ULSCH_id, frame_rx, slot_rx, ulsch->harq_pid);
NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ULSCH_id];
pusch_vars->ulsch_power_tot = 0;
pusch_vars->ulsch_noise_power_tot = 0;
......@@ -963,6 +951,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
totalDecode--;
}
}
stop_meas(&gNB->ulsch_decoding_stats);
for (int i = 0; i < gNB->max_nb_srs; i++) {
NR_gNB_SRS_t *srs = &gNB->srs[i];
if (srs) {
......
......@@ -561,10 +561,12 @@ int main(int argc, char *argv[])
RC.gNB[0] = calloc(1,sizeof(PHY_VARS_gNB));
gNB = RC.gNB[0];
gNB->ofdm_offset_divisor = UINT_MAX;
initNotifiedFIFO(&gNB->respDecode);
gNB->num_pusch_symbols_per_thread = 1;
initFloatingCoresTpool(threadCnt, &gNB->threadPool, false, "gNB-tpool");
initNotifiedFIFO(&gNB->respDecode);
initNotifiedFIFO(&gNB->respPuschSymb);
initNotifiedFIFO(&gNB->L1_tx_free);
initNotifiedFIFO(&gNB->L1_tx_filled);
initNotifiedFIFO(&gNB->L1_tx_out);
......@@ -719,7 +721,6 @@ int main(int argc, char *argv[])
NR_gNB_ULSCH_t *ulsch_gNB = &gNB->ulsch[UE_id];
// nfapi_nr_ul_config_ulsch_pdu *rel15_ul = &ulsch_gNB->harq_process->ulsch_pdu;
NR_Sched_Rsp_t *Sched_INFO = malloc(sizeof(*Sched_INFO));
memset((void*)Sched_INFO,0,sizeof(*Sched_INFO));
nfapi_nr_ul_tti_request_t *UL_tti_req = &Sched_INFO->UL_tti_req;
......@@ -927,15 +928,10 @@ int main(int argc, char *argv[])
roundStats = 0;
reset_meas(&gNB->phy_proc_rx);
reset_meas(&gNB->rx_pusch_stats);
reset_meas(&gNB->rx_pusch_init_stats);
reset_meas(&gNB->rx_pusch_symbol_processing_stats);
reset_meas(&gNB->ulsch_decoding_stats);
reset_meas(&gNB->ulsch_deinterleaving_stats);
reset_meas(&gNB->ulsch_rate_unmatching_stats);
reset_meas(&gNB->ulsch_ldpc_decoding_stats);
reset_meas(&gNB->ulsch_unscrambling_stats);
reset_meas(&gNB->ulsch_channel_estimation_stats);
reset_meas(&gNB->ulsch_llr_stats);
reset_meas(&gNB->ulsch_channel_compensation_stats);
reset_meas(&gNB->ulsch_rbs_extraction_stats);
reset_meas(&UE->ulsch_ldpc_encoding_stats);
reset_meas(&UE->ulsch_rate_matching_stats);
reset_meas(&UE->ulsch_interleaving_stats);
......@@ -1588,25 +1584,22 @@ int main(int argc, char *argv[])
dump_pusch_stats(fd,gNB);
fclose(fd);
if (print_perf==1) {
printDistribution(&gNB->phy_proc_rx,table_rx,"Total PHY proc rx");
printStatIndent(&gNB->rx_pusch_stats,"RX PUSCH time");
printStatIndent2(&gNB->ulsch_channel_estimation_stats,"ULSCH channel estimation time");
printStatIndent2(&gNB->ulsch_ptrs_processing_stats,"ULSCH PTRS Processing time");
printStatIndent2(&gNB->ulsch_rbs_extraction_stats,"ULSCH rbs extraction time");
printStatIndent2(&gNB->ulsch_channel_compensation_stats,"ULSCH channel compensation time");
printStatIndent2(&gNB->ulsch_mrc_stats,"ULSCH mrc computation");
printStatIndent2(&gNB->ulsch_llr_stats,"ULSCH llr computation");
printStatIndent(&gNB->ulsch_unscrambling_stats,"ULSCH unscrambling");
if (print_perf==1)
{
printf("gNB RX\n");
printDistribution(&gNB->phy_proc_rx,table_rx, "Total PHY proc rx");
printStatIndent(&gNB->rx_pusch_stats, "RX PUSCH time");
printStatIndent2(&gNB->ulsch_channel_estimation_stats, "ULSCH channel estimation time");
printStatIndent2(&gNB->rx_pusch_init_stats, "RX PUSCH Initialization time");
printStatIndent2(&gNB->rx_pusch_symbol_processing_stats, "RX PUSCH Symbol Processing time");
printStatIndent(&gNB->ulsch_decoding_stats,"ULSCH total decoding time");
printf("\nUE TX\n");
printStatIndent(&UE->ulsch_encoding_stats,"ULSCH total encoding time");
printStatIndent2(&UE->ulsch_segmentation_stats,"ULSCH segmentation time");
printStatIndent2(&UE->ulsch_ldpc_encoding_stats,"ULSCH LDPC encoder time");
printStatIndent2(&UE->ulsch_rate_matching_stats,"ULSCH rate-matching time");
printStatIndent2(&UE->ulsch_interleaving_stats,"ULSCH interleaving time");
//printStatIndent2(&gNB->ulsch_deinterleaving_stats,"ULSCH deinterleaving");
//printStatIndent2(&gNB->ulsch_rate_unmatching_stats,"ULSCH rate matching rx");
//printStatIndent2(&gNB->ulsch_ldpc_decoding_stats,"ULSCH ldpc decoding");
printStatIndent(&gNB->rx_srs_stats,"RX SRS time");
printStatIndent2(&gNB->generate_srs_stats,"Generate SRS sequence time");
printStatIndent2(&gNB->get_srs_signal_stats,"Get SRS signal time");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment