Commit 7cb89883 authored by Laurent THOMAS's avatar Laurent THOMAS

one-step-cleaning-gNB-ulsch-decoding

parent 9127258a
......@@ -125,34 +125,20 @@ static void nr_processULSegment(void *arg)
PHY_VARS_gNB *phy_vars_gNB = rdata->gNB;
NR_UL_gNB_HARQ_t *ulsch_harq = rdata->ulsch_harq;
t_nrLDPC_dec_params *p_decoderParms = &rdata->decoderParms;
int length_dec;
int Kr;
int Kr_bytes;
int K_bits_F;
uint8_t crc_type;
int i;
int j;
int r = rdata->segment_r;
int A = rdata->A;
int E = rdata->E;
int Qm = rdata->Qm;
int rv_index = rdata->rv_index;
int r_offset = rdata->r_offset;
uint8_t kc = rdata->Kc;
const int Kr = ulsch_harq->K;
const int Kr_bytes = Kr >> 3;
const int K_bits_F = Kr - ulsch_harq->F;
const int r = rdata->segment_r;
const int A = rdata->A;
const int E = rdata->E;
const int Qm = rdata->Qm;
const int rv_index = rdata->rv_index;
const int r_offset = rdata->r_offset;
const uint8_t kc = rdata->Kc;
short *ulsch_llr = rdata->ulsch_llr;
int max_ldpc_iterations = p_decoderParms->numMaxIter;
const int max_ldpc_iterations = p_decoderParms->numMaxIter;
int8_t llrProcBuf[OAI_UL_LDPC_MAX_NUM_LLR] __attribute__((aligned(32)));
int16_t z[68 * 384 + 16] __attribute__((aligned(16)));
int8_t l[68 * 384 + 16] __attribute__((aligned(16)));
simde__m128i *pv = (simde__m128i *)&z;
simde__m128i *pl = (simde__m128i *)&l;
Kr = ulsch_harq->K;
Kr_bytes = Kr >> 3;
K_bits_F = Kr - ulsch_harq->F;
t_nrLDPC_time_stats procTime = {0};
t_nrLDPC_time_stats *p_procTime = &procTime;
......@@ -211,28 +197,33 @@ static void nr_processULSegment(void *arg)
if (ulsch_harq->C == 1) {
if (A > 3824)
crc_type = CRC24_A;
p_decoderParms->crc_type = CRC24_A;
else
crc_type = CRC16;
length_dec = ulsch_harq->B;
p_decoderParms->crc_type = CRC16;
p_decoderParms->block_length = ulsch_harq->B;
} else {
crc_type = CRC24_B;
length_dec = (ulsch_harq->B + 24 * ulsch_harq->C) / ulsch_harq->C;
p_decoderParms->crc_type = CRC24_B;
p_decoderParms->block_length = (ulsch_harq->B + 24 * ulsch_harq->C) / ulsch_harq->C;
}
// start_meas(&phy_vars_gNB->ulsch_ldpc_decoding_stats);
// set first 2*Z_c bits to zeros
memset(&z[0], 0, 2 * ulsch_harq->Z * sizeof(int16_t));
int16_t z[68 * 384 + 16] __attribute__((aligned(16)));
memset(z, 0, 2 * ulsch_harq->Z * sizeof(*z));
// set Filler bits
memset((&z[0] + K_bits_F), 127, ulsch_harq->F * sizeof(int16_t));
memset(z + K_bits_F, 127, ulsch_harq->F * sizeof(*z));
// Move coded bits before filler bits
memcpy((&z[0] + 2 * ulsch_harq->Z), ulsch_harq->d[r], (K_bits_F - 2 * ulsch_harq->Z) * sizeof(int16_t));
memcpy(z + 2 * ulsch_harq->Z, ulsch_harq->d[r], (K_bits_F - 2 * ulsch_harq->Z) * sizeof(*z));
// skip filler bits
memcpy((&z[0] + Kr), ulsch_harq->d[r] + (Kr - 2 * ulsch_harq->Z), (kc * ulsch_harq->Z - Kr) * sizeof(int16_t));
memcpy(z + Kr, ulsch_harq->d[r] + (Kr - 2 * ulsch_harq->Z), (kc * ulsch_harq->Z - Kr) * sizeof(*z));
// Saturate coded bits before decoding into 8 bits values
for (i = 0, j = 0; j < ((kc * ulsch_harq->Z) >> 4) + 1; i += 2, j++) {
simde__m128i *pv = (simde__m128i *)&z;
int8_t l[68 * 384 + 16] __attribute__((aligned(16)));
simde__m128i *pl = (simde__m128i *)&l;
for (int i = 0, j = 0; j < ((kc * ulsch_harq->Z) >> 4) + 1; i += 2, j++) {
pl[j] = simde_mm_packs_epi16(pv[i], pv[i + 1]);
}
//////////////////////////////////////////////////////////////////////////////////////////
......@@ -242,153 +233,34 @@ static void nr_processULSegment(void *arg)
//////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////// pl =====> llrProcBuf //////////////////////////////////
p_decoderParms->block_length = length_dec;
p_decoderParms->crc_type = crc_type;
rdata->decodeIterations = nrLDPC_decoder(p_decoderParms, (int8_t *)pl, llrProcBuf, p_procTime, &ulsch_harq->abort_decode);
rdata->decodeIterations = nrLDPC_decoder(p_decoderParms, l, llrProcBuf, p_procTime, &ulsch_harq->abort_decode);
if (rdata->decodeIterations <= p_decoderParms->numMaxIter)
memcpy(ulsch_harq->c[r],llrProcBuf, Kr>>3);
//stop_meas(&phy_vars_gNB->ulsch_ldpc_decoding_stats);
}
int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
int decode_offload(PHY_VARS_gNB *phy_vars_gNB,
uint8_t ULSCH_id,
short *ulsch_llr,
NR_DL_FRAME_PARMS *frame_parms,
nfapi_nr_pusch_pdu_t *pusch_pdu,
uint32_t frame,
uint8_t nr_tti_rx,
t_nrLDPC_dec_params *decParams,
uint8_t harq_pid,
uint32_t G)
{
if (!ulsch_llr) {
LOG_E(PHY, "ulsch_decoding.c: NULL ulsch_llr pointer\n");
return -1;
}
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_gNB_ULSCH_DECODING, 1);
{
NR_gNB_ULSCH_t *ulsch = &phy_vars_gNB->ulsch[ULSCH_id];
NR_gNB_PUSCH *pusch = &phy_vars_gNB->pusch_vars[ULSCH_id];
NR_UL_gNB_HARQ_t *harq_process = ulsch->harq_process;
if (!harq_process) {
LOG_E(PHY, "ulsch_decoding.c: NULL harq_process pointer\n");
return -1;
}
uint8_t dtx_det = 0;
int Kr;
int Kr_bytes;
harq_process->processedSegments = 0;
// ------------------------------------------------------------------
uint16_t nb_rb = pusch_pdu->rb_size;
uint8_t Qm = pusch_pdu->qam_mod_order;
uint8_t mcs = pusch_pdu->mcs_index;
uint8_t n_layers = pusch_pdu->nrOfLayers;
// ------------------------------------------------------------------
harq_process->TBS = pusch_pdu->pusch_data.tb_size;
dtx_det = 0;
uint32_t A = (harq_process->TBS) << 3;
// target_code_rate is in 0.1 units
float Coderate = (float) pusch_pdu->target_code_rate / 10240.0f;
LOG_D(PHY,"ULSCH Decoding, harq_pid %d rnti %x TBS %d G %d mcs %d Nl %d nb_rb %d, Qm %d, Coderate %f RV %d round %d new RX %d\n",
harq_pid, ulsch->rnti, A, G, mcs, n_layers, nb_rb, Qm, Coderate, pusch_pdu->pusch_data.rv_index, harq_process->round, harq_process->harq_to_be_cleared);
t_nrLDPC_dec_params decParams = {0};
decParams.BG = pusch_pdu->maintenance_parms_v3.ldpcBaseGraph;
int kc;
if (decParams.BG == 2) {
kc = 52;
} else {
kc = 68;
}
NR_gNB_PHY_STATS_t *stats = get_phy_stats(phy_vars_gNB, ulsch->rnti);
if (stats) {
stats->frame = frame;
stats->ulsch_stats.round_trials[harq_process->round]++;
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
stats->ulsch_stats.power[aarx] = dB_fixed_x10(pusch->ulsch_power[aarx]);
stats->ulsch_stats.noise_power[aarx] = dB_fixed_x10(pusch->ulsch_noise_power[aarx]);
}
if (!harq_process->harq_to_be_cleared) {
stats->ulsch_stats.current_Qm = Qm;
stats->ulsch_stats.current_RI = n_layers;
stats->ulsch_stats.total_bytes_tx += harq_process->TBS;
}
}
if (A > 3824)
harq_process->B = A+24;
else
harq_process->B = A+16;
// [hna] Perform nr_segmenation with input and output set to NULL to calculate only (B, C, K, Z, F)
nr_segmentation(NULL,
NULL,
harq_process->B,
&harq_process->C,
&harq_process->K,
&harq_process->Z, // [hna] Z is Zc
&harq_process->F,
decParams.BG);
if (harq_process->C>MAX_NUM_NR_DLSCH_SEGMENTS_PER_LAYER*n_layers) {
LOG_E(PHY,"nr_segmentation.c: too many segments %d, B %d\n",harq_process->C,harq_process->B);
return(-1);
}
#ifdef DEBUG_ULSCH_DECODING
printf("ulsch decoding nr segmentation Z %d\n", harq_process->Z);
if (!frame%100)
printf("K %d C %d Z %d \n", harq_process->K, harq_process->C, harq_process->Z);
#endif
decParams.Z = harq_process->Z;
decParams.numMaxIter = ulsch->max_ldpc_iterations;
decParams.outMode = 0;
uint32_t r_offset = 0;
uint16_t a_segments = MAX_NUM_NR_ULSCH_SEGMENTS_PER_LAYER*n_layers; //number of segments to be allocated
if (nb_rb != 273) {
a_segments = a_segments*nb_rb;
a_segments = a_segments/273 +1;
}
if (harq_process->C > a_segments) {
LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,a_segments);
return -1;
}
#ifdef DEBUG_ULSCH_DECODING
printf("Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
#endif
if (harq_process->harq_to_be_cleared) {
for (int r = 0; r < harq_process->C; r++)
harq_process->d_to_be_cleared[r] = true;
harq_process->harq_to_be_cleared = false;
}
Kr = harq_process->K;
Kr_bytes = Kr >> 3;
uint32_t offset = 0;
if (phy_vars_gNB->ldpc_offload_flag && mcs > 9) {
int8_t llrProcBuf[22 * 384];
// if (dtx_det==0) {
int16_t z_ol[68 * 384];
int8_t l_ol[68 * 384];
int8_t llrProcBuf[22 * 384] __attribute__((aligned(32)));
int16_t z_ol[68 * 384] __attribute__((aligned(32)));
int8_t l_ol[68 * 384] __attribute__((aligned(32)));
int crc_type;
int length_dec;
uint8_t Qm = pusch_pdu->qam_mod_order;
uint8_t n_layers = pusch_pdu->nrOfLayers;
const int Kr = harq_process->K;
const int Kr_bytes = Kr >> 3;
const int kc = decParams->BG == 2 ? 52 : 68;
const uint32_t A = (harq_process->TBS) << 3;
if (harq_process->C == 1) {
if (A > 3824)
......@@ -402,22 +274,38 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
length_dec = (harq_process->B + 24 * harq_process->C) / harq_process->C;
}
int decodeIterations = 2;
int dtx_det = 0;
int r_offset = 0, offset = 0;
for (int r = 0; r < harq_process->C; r++) {
int E = nr_get_E(G, harq_process->C, Qm, n_layers, r);
memset(harq_process->c[r], 0, Kr_bytes);
decParams.R = nr_get_R_ldpc_decoder(pusch_pdu->pusch_data.rv_index, E, decParams.BG, decParams.Z, &harq_process->llrLen, harq_process->round);
decParams->R = nr_get_R_ldpc_decoder(pusch_pdu->pusch_data.rv_index,
E,
decParams->BG,
decParams->Z,
&harq_process->llrLen,
harq_process->round);
if ((dtx_det == 0) && (pusch_pdu->pusch_data.rv_index == 0)) {
// if (dtx_det==0){
memcpy((&z_ol[0]), ulsch_llr + r_offset, E * sizeof(short));
memcpy(z_ol, ulsch_llr + r_offset, E * sizeof(short));
simde__m128i *pv_ol128 = (simde__m128i *)&z_ol;
simde__m128i *pl_ol128 = (simde__m128i *)&l_ol;
for (int i = 0, j = 0; j < ((kc * harq_process->Z) >> 4) + 1; i += 2, j++) {
pl_ol128[j] = simde_mm_packs_epi16(pv_ol128[i], pv_ol128[i + 1]);
}
int ret = nrLDPC_decoder_offload(&decParams, harq_pid, ULSCH_id, r, pusch_pdu->pusch_data.rv_index, harq_process->F, E, Qm, (int8_t *)&pl_ol128[0], llrProcBuf, 1);
int ret = nrLDPC_decoder_offload(decParams,
harq_pid,
ULSCH_id,
r,
pusch_pdu->pusch_data.rv_index,
harq_process->F,
E,
Qm,
(int8_t *)&pl_ol128[0],
llrProcBuf,
1);
if (ret < 0) {
LOG_E(PHY, "ulsch_decoding.c: Problem in LDPC decoder offload\n");
......@@ -463,23 +351,8 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_gNB_ULSCH_DECODING, 0);
bool crc_valid = false;
if (harq_process->processedSegments == harq_process->C) {
// When the number of code blocks is 1 (C = 1) and ulsch_harq->processedSegments = 1, we can assume a good TB because of the
// CRC check made by the LDPC for early termination, so, no need to perform CRC check twice for a single code block
crc_valid = true;
if (harq_process->C > 1) {
// Check ULSCH transport block CRC
crc_type = CRC16;
if (A > 3824) {
crc_type = CRC24_A;
}
crc_valid = check_crc(harq_process->b, harq_process->B, crc_type);
}
}
if (crc_valid) {
LOG_D(PHY, "[gNB %d] ULSCH: Setting ACK for slot %d TBS %d\n", phy_vars_gNB->Mod_id, ulsch->slot, harq_process->TBS);
LOG_D(PHY, "ULSCH: Setting ACK for slot %d TBS %d\n", ulsch->slot, harq_process->TBS);
ulsch->active = false;
harq_process->round = 0;
......@@ -502,22 +375,145 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
nr_fill_indication(phy_vars_gNB, ulsch->frame, ulsch->slot, ULSCH_id, harq_pid, 1, 0);
}
ulsch->last_iteration_cnt = decodeIterations;
return 0;
}
int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
uint8_t ULSCH_id,
short *ulsch_llr,
NR_DL_FRAME_PARMS *frame_parms,
nfapi_nr_pusch_pdu_t *pusch_pdu,
uint32_t frame,
uint8_t nr_tti_rx,
uint8_t harq_pid,
uint32_t G)
{
if (!ulsch_llr) {
LOG_E(PHY, "ulsch_decoding.c: NULL ulsch_llr pointer\n");
return -1;
}
else {
dtx_det = 0;
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_gNB_ULSCH_DECODING, 1);
NR_gNB_ULSCH_t *ulsch = &phy_vars_gNB->ulsch[ULSCH_id];
NR_gNB_PUSCH *pusch = &phy_vars_gNB->pusch_vars[ULSCH_id];
NR_UL_gNB_HARQ_t *harq_process = ulsch->harq_process;
if (!harq_process) {
LOG_E(PHY, "ulsch_decoding.c: NULL harq_process pointer\n");
return -1;
}
// ------------------------------------------------------------------
const uint16_t nb_rb = pusch_pdu->rb_size;
const uint8_t Qm = pusch_pdu->qam_mod_order;
const uint8_t mcs = pusch_pdu->mcs_index;
const uint8_t n_layers = pusch_pdu->nrOfLayers;
// ------------------------------------------------------------------
harq_process->processedSegments = 0;
harq_process->TBS = pusch_pdu->pusch_data.tb_size;
t_nrLDPC_dec_params decParams = {0};
decParams.BG = pusch_pdu->maintenance_parms_v3.ldpcBaseGraph;
const uint32_t A = (harq_process->TBS) << 3;
if (A > 3824)
harq_process->B = A + 24;
else
harq_process->B = A + 16;
NR_gNB_PHY_STATS_t *stats = get_phy_stats(phy_vars_gNB, ulsch->rnti);
if (stats) {
stats->frame = frame;
stats->ulsch_stats.round_trials[harq_process->round]++;
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
stats->ulsch_stats.power[aarx] = dB_fixed_x10(pusch->ulsch_power[aarx]);
stats->ulsch_stats.noise_power[aarx] = dB_fixed_x10(pusch->ulsch_noise_power[aarx]);
}
if (!harq_process->harq_to_be_cleared) {
stats->ulsch_stats.current_Qm = Qm;
stats->ulsch_stats.current_RI = n_layers;
stats->ulsch_stats.total_bytes_tx += harq_process->TBS;
}
}
LOG_D(PHY,
"ULSCH Decoding, harq_pid %d rnti %x TBS %d G %d mcs %d Nl %d nb_rb %d, Qm %d, Coderate %f RV %d round %d new RX %d\n",
harq_pid,
ulsch->rnti,
A,
G,
mcs,
n_layers,
nb_rb,
Qm,
pusch_pdu->target_code_rate / 10240.0f,
pusch_pdu->pusch_data.rv_index,
harq_process->round,
harq_process->harq_to_be_cleared);
// [hna] Perform nr_segmenation with input and output set to NULL to calculate only (B, C, K, Z, F)
nr_segmentation(NULL,
NULL,
harq_process->B,
&harq_process->C,
&harq_process->K,
&harq_process->Z, // [hna] Z is Zc
&harq_process->F,
decParams.BG);
uint16_t a_segments = MAX_NUM_NR_ULSCH_SEGMENTS_PER_LAYER * n_layers; // number of segments to be allocated
if (harq_process->C > a_segments) {
LOG_E(PHY,"nr_segmentation.c: too many segments %d, B %d\n",harq_process->C,harq_process->B);
return(-1);
}
if (nb_rb != 273) {
a_segments = a_segments*nb_rb;
a_segments = a_segments/273 +1;
}
if (harq_process->C > a_segments) {
LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,a_segments);
return -1;
}
#ifdef DEBUG_ULSCH_DECODING
printf("ulsch decoding nr segmentation Z %d\n", harq_process->Z);
if (!frame % 100)
printf("K %d C %d Z %d \n", harq_process->K, harq_process->C, harq_process->Z);
printf("Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
#endif
decParams.Z = harq_process->Z;
decParams.numMaxIter = ulsch->max_ldpc_iterations;
decParams.outMode = 0;
if (harq_process->harq_to_be_cleared) {
for (int r = 0; r < harq_process->C; r++)
harq_process->d_to_be_cleared[r] = true;
harq_process->harq_to_be_cleared = false;
}
if (phy_vars_gNB->ldpc_offload_flag && mcs > 9)
return decode_offload(phy_vars_gNB, ULSCH_id, ulsch_llr, pusch_pdu, &decParams, harq_pid, G);
uint32_t offset = 0, r_offset = 0;
set_abort(&harq_process->abort_decode, false);
for (int r = 0; r < harq_process->C; r++) {
int E = nr_get_E(G, harq_process->C, Qm, n_layers, r);
union ldpcReqUnion id = {.s = {ulsch->rnti, frame, nr_tti_rx, 0, 0}};
notifiedFIFO_elt_t *req = newNotifiedFIFO_elt(sizeof(ldpcDecode_t), id.p, &phy_vars_gNB->respDecode, &nr_processULSegment);
ldpcDecode_t *rdata = (ldpcDecode_t *)NotifiedFifoData(req);
decParams.R = nr_get_R_ldpc_decoder(pusch_pdu->pusch_data.rv_index, E, decParams.BG, decParams.Z, &harq_process->llrLen, harq_process->round);
decParams.R = nr_get_R_ldpc_decoder(pusch_pdu->pusch_data.rv_index,
E,
decParams.BG,
decParams.Z,
&harq_process->llrLen,
harq_process->round);
rdata->gNB = phy_vars_gNB;
rdata->ulsch_harq = harq_process;
rdata->decoderParms = decParams;
rdata->ulsch_llr = ulsch_llr;
rdata->Kc = kc;
rdata->Kc = decParams.BG == 2 ? 52 : 68;
rdata->harq_pid = harq_pid;
rdata->segment_r = r;
rdata->nbSegments = harq_process->C;
......@@ -525,7 +521,7 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
rdata->A = A;
rdata->Qm = Qm;
rdata->r_offset = r_offset;
rdata->Kr_bytes = Kr_bytes;
rdata->Kr_bytes = harq_process->K >> 3;
rdata->rv_index = pusch_pdu->pusch_data.rv_index;
rdata->offset = offset;
rdata->ulsch = ulsch;
......@@ -534,9 +530,7 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
pushTpool(&phy_vars_gNB->threadPool, req);
LOG_D(PHY, "Added a block to decode, in pipe: %d\n", r);
r_offset += E;
offset += (Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
//////////////////////////////////////////////////////////////////////////////////////////
}
offset += ((harq_process->K >> 3) - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
}
return harq_process->C;
}
......@@ -953,7 +953,6 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_ULSCH_PROCEDURES_RX, 0);
}
}
if (totalDecode > 0 && gNB->ldpc_offload_flag == 0) {
while (totalDecode > 0) {
notifiedFIFO_elt_t *req = pullTpool(&gNB->respDecode, &gNB->threadPool);
if (req == NULL)
......@@ -962,7 +961,6 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
delNotifiedFIFO_elt(req);
totalDecode--;
}
}
for (int i = 0; i < gNB->max_nb_srs; i++) {
NR_gNB_SRS_t *srs = &gNB->srs[i];
if (srs) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment