Commit 973a0795 authored by Hongzhi Wang's avatar Hongzhi Wang

update ue dlsch decoding multi-threading

parent 08e91003
...@@ -742,6 +742,7 @@ void init_UE(int nb_inst) { ...@@ -742,6 +742,7 @@ void init_UE(int nb_inst) {
int inst; int inst;
NR_UE_MAC_INST_t *mac_inst; NR_UE_MAC_INST_t *mac_inst;
pthread_t threads[nb_inst]; pthread_t threads[nb_inst];
pthread_t dlsch0_threads;
for (inst=0; inst < nb_inst; inst++) { for (inst=0; inst < nb_inst; inst++) {
PHY_VARS_NR_UE *UE = PHY_vars_UE_g[inst][0]; PHY_VARS_NR_UE *UE = PHY_vars_UE_g[inst][0];
...@@ -763,6 +764,11 @@ void init_UE(int nb_inst) { ...@@ -763,6 +764,11 @@ void init_UE(int nb_inst) {
mac_inst->initial_bwp_ul.cyclic_prefix = UE->frame_parms.Ncp; mac_inst->initial_bwp_ul.cyclic_prefix = UE->frame_parms.Ncp;
LOG_I(PHY,"Intializing UE Threads for instance %d (%p,%p)...\n",inst,PHY_vars_UE_g[inst],PHY_vars_UE_g[inst][0]); LOG_I(PHY,"Intializing UE Threads for instance %d (%p,%p)...\n",inst,PHY_vars_UE_g[inst],PHY_vars_UE_g[inst][0]);
threadCreate(&threads[inst], UE_thread, (void *)UE, "UEthread", -1, OAI_PRIORITY_RT_MAX); threadCreate(&threads[inst], UE_thread, (void *)UE, "UEthread", -1, OAI_PRIORITY_RT_MAX);
#ifdef UE_DLSCH_PARALLELISATION
threadCreate(&dlsch0_threads, dlsch_thread, (void *)UE, "DLthread", -1, OAI_PRIORITY_RT_MAX-1);
#endif
} }
printf("UE threads created by %ld\n", gettid()); printf("UE threads created by %ld\n", gettid());
......
...@@ -49,6 +49,9 @@ ...@@ -49,6 +49,9 @@
static uint64_t nb_total_decod =0; static uint64_t nb_total_decod =0;
static uint64_t nb_error_decod =0; static uint64_t nb_error_decod =0;
notifiedFIFO_t freeBlocks;
notifiedFIFO_elt_t *msgToPush;
//extern double cpuf; //extern double cpuf;
void free_nr_ue_dlsch(NR_UE_DLSCH_t *dlsch) void free_nr_ue_dlsch(NR_UE_DLSCH_t *dlsch)
...@@ -686,11 +689,10 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -686,11 +689,10 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
uint32_t G; uint32_t G;
uint32_t ret,offset; uint32_t ret,offset;
//short dummy_w[MAX_NUM_DLSCH_SEGMENTS][3*(8448+64)]; //short dummy_w[MAX_NUM_DLSCH_SEGMENTS][3*(8448+64)];
uint32_t r,r_offset=0,Kr,Kr_bytes,err_flag=0,K_bytes_F; uint32_t r,r_offset=0,Kr=8424,Kr_bytes,err_flag=0,K_bytes_F;
uint8_t crc_type; uint8_t crc_type;
//UE_rxtx_proc_t *proc = &phy_vars_ue->proc; //UE_rxtx_proc_t *proc = &phy_vars_ue->proc;
int32_t no_iteration_ldpc; int32_t no_iteration_ldpc,length_dec;
int Cby2;
/*uint8_t C; /*uint8_t C;
uint8_t Qm; uint8_t Qm;
uint8_t Nl; uint8_t Nl;
...@@ -711,16 +713,19 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -711,16 +713,19 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
uint8_t kb, kc; uint8_t kb, kc;
uint8_t Ilbrm = 0; uint8_t Ilbrm = 0;
uint32_t Tbslbrm = 950984; uint32_t Tbslbrm = 950984;
uint16_t nb_rb = 30; //to update uint16_t nb_rb = 30;
//uint16_t nb_symb_sch = 12; double Coderate = 0.0;
uint8_t nb_re_dmrs = 6; nfapi_nr_config_request_t *cfg = &phy_vars_ue->nrUE_config;
uint16_t length_dmrs = 1; uint8_t dmrs_type = cfg->pdsch_config.dmrs_type.value;
uint8_t nb_re_dmrs = (dmrs_type==NFAPI_NR_DMRS_TYPE1)?6:4;
uint16_t length_dmrs = 1; //cfg->pdsch_config.dmrs_max_length.value;
uint32_t i,j; uint32_t i,j;
//uint32_t k;
__m128i *pv = (__m128i*)&z; __m128i *pv = (__m128i*)&z;
__m128i *pl = (__m128i*)&l; __m128i *pl = (__m128i*)&l;
notifiedFIFO_t nf;
initNotifiedFIFO(&nf);
if (!dlsch_llr) { if (!dlsch_llr) {
...@@ -766,7 +771,7 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -766,7 +771,7 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
harq_process->TBS = nr_compute_tbs(harq_process->mcs,nb_rb,nb_symb_sch,nb_re_dmrs,length_dmrs, harq_process->Nl); harq_process->TBS = nr_compute_tbs(harq_process->mcs,nb_rb,nb_symb_sch,nb_re_dmrs,length_dmrs, harq_process->Nl);
A = harq_process->TBS; //2072 for QPSK 1/3 A = harq_process->TBS;
ret = dlsch->max_ldpc_iterations; ret = dlsch->max_ldpc_iterations;
...@@ -774,6 +779,9 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -774,6 +779,9 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
G = harq_process->G; G = harq_process->G;
LOG_I(PHY,"DLSCH Decoding main, harq_pid %d TBS %d G %d mcs %d Nl %d nb_symb_sch %d nb_rb %d\n",harq_pid,A,G, harq_process->mcs, harq_process->Nl, nb_symb_sch,nb_rb);
proc->decoder_main_available = 1; proc->decoder_main_available = 1;
proc->decoder_thread_available = 0; proc->decoder_thread_available = 0;
proc->decoder_thread_available1 = 0; proc->decoder_thread_available1 = 0;
...@@ -796,28 +804,43 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -796,28 +804,43 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
} }
kb = harq_process->K/harq_process->Z; Coderate = (float) A /(float) G;
if ( kb==22){ if ((A <=292) || ((A<=3824) && (Coderate <= 0.6667)) || Coderate <= 0.25)
p_decParams->BG = 1; {
p_decParams->R = 13; p_decParams->BG = 2;
kc = 68; if (Coderate < 0.3333){
} p_decParams->R = 15;
else{ kc = 52;
p_decParams->BG = 2; }
p_decParams->R = 13; else if (Coderate <0.6667){
kc = 52; p_decParams->R = 13;
} kc = 32;
}
else {
p_decParams->R = 23;
kc = 17;
}
}
else{
p_decParams->BG = 1;
if (Coderate < 0.6667){
p_decParams->R = 13;
kc = 68;
}
else if (Coderate <0.8889){
p_decParams->R = 23;
kc = 35;
}
else {
p_decParams->R = 89;
kc = 27;
}
}
p_decParams->numMaxIter = 2; //printf("coderate %f kc %d \n", Coderate, kc);
Kr = p_decParams->Z*kb; p_decParams->numMaxIter = dlsch->max_ldpc_iterations;
p_decParams->outMode= 0; p_decParams->outMode= 0;
/*
else {
printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
return(max_ldpc_iterations);
}
*/
err_flag = 0; err_flag = 0;
r_offset = 0; r_offset = 0;
...@@ -841,13 +864,39 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -841,13 +864,39 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
printf("Segmentation: C %d, K %d\n",harq_process->C,harq_process->K); printf("Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
#endif #endif
notifiedFIFO_elt_t *res;
opp_enabled=1; opp_enabled=1;
if (harq_process->C>1) { // wakeup worker if more than 1 segment if (harq_process->C>1) {
if (pthread_mutex_lock(&proc->mutex_dlsch_td) != 0) { for (int nb_seg =1 ; nb_seg<harq_process->C; nb_seg++){
LOG_E( PHY, "[SCHED][UE %d][Slot0] error locking mutex for UE dlsch td\n",phy_vars_ue->Mod_id ); printf("mthread pool C >1\n");
exit_fun("nothing to add"); displayList(&Tpool->incomingFifo);
} if ( (res=tryPullTpool(&nf, Tpool)) != NULL ) {
printf("mthread pool non null\n");
pushNotifiedFIFO_nothreadSafe(&freeBlocks,res);
}
printf("mthread after push\n");
displayList(&freeBlocks);
AssertFatal((msgToPush=pullNotifiedFIFO_nothreadSafe(&freeBlocks)) != NULL,"chained list failure");
nr_rxtx_thread_data_t *curMsg=(nr_rxtx_thread_data_t *)NotifiedFifoData(msgToPush);
curMsg->UE=phy_vars_ue;
memset(&curMsg->proc, 0, sizeof(curMsg->proc));
printf("mthread frame %d slot %d\n", proc->frame_rx, proc->nr_tti_rx);
curMsg->proc.frame_rx = proc->frame_rx;
curMsg->proc.nr_tti_rx = proc->nr_tti_rx;
curMsg->proc.num_seg = nb_seg;
curMsg->proc.eNB_id= eNB_id;
curMsg->proc.harq_pid=harq_pid;
curMsg->proc.llr8_flag = llr8_flag;
printf("mthread after pull");
msgToPush->key=nb_seg;
pushTpool(Tpool, msgToPush);
printf("mthread after pushTpool\n");
displayList(&Tpool->incomingFifo);
/*Qm= harq_process->Qm; /*Qm= harq_process->Qm;
Nl=harq_process->Nl; Nl=harq_process->Nl;
r_thread = harq_process->C/2-1; r_thread = harq_process->C/2-1;
...@@ -857,7 +906,6 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -857,7 +906,6 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
GpmodC = Gp%C; GpmodC = Gp%C;
if (r_thread < (C-(GpmodC))) if (r_thread < (C-(GpmodC)))
Er = Nl*Qm * (Gp/C); Er = Nl*Qm * (Gp/C);
else else
...@@ -865,108 +913,10 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -865,108 +913,10 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
printf("mthread Er %d\n", Er); printf("mthread Er %d\n", Er);
printf("mthread instance_cnt_dlsch_td %d\n", proc->instance_cnt_dlsch_td);*/ printf("mthread instance_cnt_dlsch_td %d\n", proc->instance_cnt_dlsch_td);*/
}
proc->instance_cnt_dlsch_td++;
proc->eNB_id = eNB_id;
proc->harq_pid = harq_pid;
proc->llr8_flag = llr8_flag;
//proc->r[0] = 1;
if (proc->instance_cnt_dlsch_td == 0)
{
LOG_D(PHY,"unblock dlsch td processing thread blocked on instance_cnt_dlsch_td : %d \n", proc->instance_cnt_dlsch_td );
if (pthread_cond_signal(&proc->cond_dlsch_td) != 0) {
LOG_E( PHY, "[SCHED][UE %d][Slot0] ERROR pthread_cond_signal for UE dlsch td\n", phy_vars_ue->Mod_id);
exit_fun("nothing to add");
}
if (pthread_mutex_unlock(&proc->mutex_dlsch_td) != 0) {
LOG_E( PHY, "[SCHED][UE %d][Slot0] error unlocking mutex for UE dlsch td \n",phy_vars_ue->Mod_id );
exit_fun("nothing to add");
}
} else
{
LOG_E( PHY, "[SCHED][UE %d] UE dlsch td thread busy (IC %d)!!\n", phy_vars_ue->Mod_id, proc->instance_cnt_dlsch_td);
if (proc->instance_cnt_dlsch_td > 4)
exit_fun("instance_cnt_dlsch_td > 4");
}
//AssertFatal(pthread_cond_signal(&proc->cond_slot1_dl_processing) ==0 ,"");
AssertFatal(pthread_mutex_unlock(&proc->mutex_dlsch_td) ==0,"");
if (harq_process->C>2) {
if (pthread_mutex_lock(&proc->mutex_dlsch_td1) != 0) {
LOG_E( PHY, "[SCHED][UE %d][Slot0] error locking mutex for UE dlsch td\n",phy_vars_ue->Mod_id );
exit_fun("nothing to add");
}
proc->instance_cnt_dlsch_td1++;
proc->eNB_id = eNB_id;
proc->harq_pid = harq_pid;
proc->llr8_flag = llr8_flag;
// proc->Er = Er;
if (proc->instance_cnt_dlsch_td1 == 0)
{
LOG_D(PHY,"unblock slot1 dl processing thread blocked on instance_cnt_dlsch_td : %d \n", proc->instance_cnt_dlsch_td1 );
if (pthread_cond_signal(&proc->cond_dlsch_td1) != 0) {
LOG_E( PHY, "[SCHED][UE %d][Slot0] ERROR pthread_cond_signal for UE dlsch td\n", phy_vars_ue->Mod_id);
exit_fun("nothing to add");
}
if (pthread_mutex_unlock(&proc->mutex_dlsch_td1) != 0) {
LOG_E( PHY, "[SCHED][UE %d][Slot0] error unlocking mutex for UE dlsch td \n",phy_vars_ue->Mod_id );
exit_fun("nothing to add");
}
} else
{
LOG_E( PHY, "[SCHED][UE %d] UE dlsch td thread 1 busy (IC %d)!!\n", phy_vars_ue->Mod_id, proc->instance_cnt_dlsch_td1);
if (proc->instance_cnt_dlsch_td1 > 4)
exit_fun("instance_cnt_dlsch_td1 > 4");
}
AssertFatal(pthread_mutex_unlock(&proc->mutex_dlsch_td1) ==0,"");
}
/*
if (pthread_mutex_timedlock(&proc->mutex_td,&wait) != 0) {
printf("[eNB] ERROR pthread_mutex_lock for TD thread (IC %d)\n", proc->instance_cnt_td);
exit_fun( "error locking mutex_fep" );
return -1;
}
if (proc->instance_cnt_td==0) {
printf("[UE] TD thread busy\n");
exit_fun("TD thread busy");
pthread_mutex_unlock( &proc->mutex_td );
return -1;
}
++proc->instance_cnt_td;
proc->tdp.UE = phy_vars_ue;
proc->tdp.eNB_id = eNB_id;
proc->tdp.harq_pid = harq_pid;
proc->tdp.llr8_flag = llr8_flag;
printf("----- 2thread llr flag %d tdp flag %d\n",llr8_flag, proc->tdp.llr8_flag);
// wakeup worker to do second half segments
if (pthread_cond_signal(&proc->cond_td) != 0) {
printf("[UE] ERROR pthread_cond_signal for td thread exit\n");
exit_fun( "ERROR pthread_cond_signal" );
return (1+dlsch->last_iteration_cnt);
}
pthread_mutex_unlock( &proc->mutex_td );*/
Cby2 = 1; //harq_process->C/2;
//proc->decoder_main_available = 1; //proc->decoder_main_available = 1;
} }
else {
Cby2 = 1;
}
//for (r=0; r<Cby2; r++) {
r = 0; r = 0;
if (r==0) r_offset =0; if (r==0) r_offset =0;
...@@ -1067,45 +1017,46 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -1067,45 +1017,46 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
memset(harq_process->c[r],0,Kr_bytes); memset(harq_process->c[r],0,Kr_bytes);
// printf("done\n"); // printf("done\n");
if (harq_process->C == 1) if (harq_process->C == 1){
crc_type = CRC24_A; crc_type = CRC24_A;
else length_dec = harq_process->B;
}
else{
crc_type = CRC24_B; crc_type = CRC24_B;
length_dec = (harq_process->B+24*harq_process->C)/harq_process->C;
}
//#ifndef __AVX2__ //#ifndef __AVX2__
if (err_flag == 0) { if (err_flag == 0) {
/* /*
LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n", LOG_I(PHY, "LDPC algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS, Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS,
harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_ldpc_iterations); harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_ldpc_iterations);
*/ */
if (llr8_flag) {
AssertFatal (Kr >= 256, "turbo algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n",
Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round);
}
#if UE_TIMING_TRACE #if UE_TIMING_TRACE
start_meas(dlsch_turbo_decoding_stats); start_meas(dlsch_turbo_decoding_stats);
#endif #endif
LOG_D(PHY,"mthread AbsSubframe %d.%d Start turbo segment %d/%d \n",frame%1024,nr_tti_rx,r,harq_process->C-1); LOG_D(PHY,"mthread AbsSubframe %d.%d Start LDPC segment %d/%d \n",frame%1024,nr_tti_rx,r,harq_process->C-1);
for (int cnt =0; cnt < (kc-2)*p_decParams->Z; cnt++){ /*for (int cnt =0; cnt < (kc-2)*p_decParams->Z; cnt++){
inv_d[cnt] = (1)*harq_process->d[r][cnt]; inv_d[cnt] = (1)*harq_process->d[r][cnt];
} }*/
memset(pv,0,2*p_decParams->Z*sizeof(int16_t)); memset(pv,0,2*p_decParams->Z*sizeof(int16_t));
//memset(pl,0,2*p_decParams->Z*sizeof(int8_t)); //memset(pl,0,2*p_decParams->Z*sizeof(int8_t));
memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t)); memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t));
for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F+((2*p_decParams->Z)>>3); i++, j++) for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F; i++, j++)
{ {
pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j])); pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
} }
for (i=Kr_bytes+((2*p_decParams->Z)>>3),j=Kr_bytes; i < ((kc*p_decParams->Z)>>3); i++, j++) for (i=Kr_bytes,j=K_bytes_F-((2*p_decParams->Z)>>3); i < ((kc*p_decParams->Z)>>3); i++, j++)
{ {
pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j])); pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
} }
for (i=0, j=0; j < ((kc*p_decParams->Z)>>4); i+=2, j++) for (i=0, j=0; j < ((kc*p_decParams->Z)>>4); i+=2, j++)
...@@ -1128,8 +1079,8 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -1128,8 +1079,8 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
ret=2; ret=2;
} }
if (check_crc(llrProcBuf,harq_process->B,harq_process->F,crc_type)) { if (check_crc((uint8_t*)llrProcBuf,length_dec,harq_process->F,crc_type)) {
printf("CRC OK\n"); printf("Segment %d CRC OK\n",r);
ret = 2; ret = 2;
} }
else { else {
...@@ -1245,7 +1196,7 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -1245,7 +1196,7 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
printf("C %d\n",harq_process->C); printf("C %d\n",harq_process->C);
*/ */
uint32_t wait = 0; uint32_t wait = 0;
if (harq_process->C==2){ /*if (harq_process->C==2){
while((proc->decoder_thread_available == 0) ) while((proc->decoder_thread_available == 0) )
{ {
usleep(1); usleep(1);
...@@ -1258,15 +1209,21 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -1258,15 +1209,21 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
usleep(1); usleep(1);
wait++; wait++;
} }
} }*/
/*notifiedFIFO_elt_t *res1=tryPullTpool(&nf, Tpool);
if (!res1) {
printf("mthread trypull null\n");
usleep(1);
wait++;
}*/
proc->decoder_main_available = 0; proc->decoder_main_available = 0;
Kr = harq_process->K; //to check if same K in all segments
Kr_bytes = Kr>>3;
for (r=0; r<harq_process->C; r++) { for (r=0; r<harq_process->C; r++) {
Kr = harq_process->K; //to check if same K in all segments
Kr_bytes = Kr>>3;
memcpy(harq_process->b+offset, memcpy(harq_process->b+offset,
harq_process->c[r], harq_process->c[r],
Kr_bytes- - (harq_process->F>>3) -((harq_process->C>1)?3:0)); Kr_bytes- - (harq_process->F>>3) -((harq_process->C>1)?3:0));
...@@ -1286,27 +1243,20 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -1286,27 +1243,20 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
dlsch->last_iteration_cnt = ret; dlsch->last_iteration_cnt = ret;
//proc->decoder_thread_available = 0; //proc->decoder_thread_available = 0;
//proc->decoder_main_available = 0; //proc->decoder_main_available = 0;
//wait for worker to finish
//wait_on_busy_condition(&proc->mutex_td,&proc->cond_td,&proc->instance_cnt_dlsch td,"dlsch td thread");
//return( (ret>proc->tdp.ret) ? ret : proc->tdp.ret );
return(ret); return(ret);
} }
#endif #endif
#ifdef UE_DLSCH_PARALLELISATION #ifdef UE_DLSCH_PARALLELISATION
#define FIFO_PRIORITY 39 void *nr_dlsch_decoding_process(void *arg)
void *nr_dlsch_decoding_2thread0(void *arg)
{ {
static __thread int UE_dlsch_td_retval; nr_rxtx_thread_data_t *rxtxD= (nr_rxtx_thread_data_t *)arg;
struct nr_rxtx_thread_data *rtd = arg; UE_nr_rxtx_proc_t *proc = &rxtxD->proc;
UE_nr_rxtx_proc_t *proc = rtd->proc; PHY_VARS_NR_UE *phy_vars_ue = rxtxD->UE;
PHY_VARS_NR_UE *phy_vars_ue = rtd->UE;
NR_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->frame_parms; NR_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->frame_parms;
int llr8_flag1; int llr8_flag1;
int32_t no_iteration_ldpc; int32_t no_iteration_ldpc,length_dec;
t_nrLDPC_dec_params decParams; t_nrLDPC_dec_params decParams;
t_nrLDPC_dec_params* p_decParams = &decParams; t_nrLDPC_dec_params* p_decParams = &decParams;
t_nrLDPC_time_stats procTime; t_nrLDPC_time_stats procTime;
...@@ -1322,6 +1272,7 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1322,6 +1272,7 @@ void *nr_dlsch_decoding_2thread0(void *arg)
uint8_t Ilbrm = 0; uint8_t Ilbrm = 0;
uint32_t Tbslbrm = 950984; uint32_t Tbslbrm = 950984;
uint16_t nb_rb = 30; //to update uint16_t nb_rb = 30; //to update
double Coderate = 0.0;
uint16_t nb_symb_sch = 12; uint16_t nb_symb_sch = 12;
uint8_t nb_re_dmrs = 6; uint8_t nb_re_dmrs = 6;
uint16_t length_dmrs = 1; uint16_t length_dmrs = 1;
...@@ -1333,23 +1284,10 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1333,23 +1284,10 @@ void *nr_dlsch_decoding_2thread0(void *arg)
__m128i *pl = (__m128i*)&l; __m128i *pl = (__m128i*)&l;
proc->instance_cnt_dlsch_td=-1; proc->instance_cnt_dlsch_td=-1;
proc->nr_tti_rx=proc->sub_frame_start; //proc->nr_tti_rx=proc->sub_frame_start;
proc->decoder_thread_available = 0; proc->decoder_thread_available = 1;
char threadname[256];
sprintf(threadname,"UE_thread_dlsch_td_%d", proc->sub_frame_start);
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if ( (proc->sub_frame_start+1)%RX_NB_TH == 0 && threads.dlsch_td_one != -1 )
CPU_SET(threads.dlsch_td_one, &cpuset);
if ( (proc->sub_frame_start+1)%RX_NB_TH == 1 && threads.dlsch_td_two != -1 )
CPU_SET(threads.dlsch_td_two, &cpuset);
if ( (proc->sub_frame_start+1)%RX_NB_TH == 2 && threads.dlsch_td_three != -1 )
CPU_SET(threads.dlsch_td_three, &cpuset);
#if UE_TIMING_TRACE #if UE_TIMING_TRACE
time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats; time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
...@@ -1367,45 +1305,21 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1367,45 +1305,21 @@ void *nr_dlsch_decoding_2thread0(void *arg)
uint8_t Nl; uint8_t Nl;
//uint32_t Er; //uint32_t Er;
init_thread(900000,1000000 , FIFO_PRIORITY-1, &cpuset, threadname); int eNB_id = proc->eNB_id;
while (!oai_exit) { int harq_pid = proc->harq_pid;
llr8_flag1 = proc->llr8_flag;
//proc->decoder_thread_available = 1; int frame = proc->frame_rx;
int slot = proc->nr_tti_rx;
if (pthread_mutex_lock(&proc->mutex_dlsch_td) != 0) { r = proc->num_seg;
LOG_E( PHY, "[SCHED][UE] error locking mutex for UE dlsch td\n" );
exit_fun("nothing to add");
}
while (proc->instance_cnt_dlsch_td < 0) {
// most of the time, the thread is waiting here
pthread_cond_wait( &proc->cond_dlsch_td, &proc->mutex_dlsch_td );
}
if (pthread_mutex_unlock(&proc->mutex_dlsch_td) != 0) {
LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE dlsch_td \n" );
exit_fun("nothing to add");
}
uint32_t wait = 0; NR_UE_DLSCH_t *dlsch = phy_vars_ue->dlsch[phy_vars_ue->current_thread_id[slot]][eNB_id][0];
while(proc->decoder_main_available == 0) NR_DL_UE_HARQ_t *harq_process = dlsch->harq_processes[harq_pid];
{ short *dlsch_llr = phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[slot]][eNB_id]->llr[0];
usleep(1); //printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
wait++; p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[r];
} nb_symb_sch = harq_process->nb_symbols;
printf("dlsch decoding process frame %d slot %d segment %d r %d nb symb %d \n", frame, proc->nr_tti_rx, proc->num_seg, r, harq_process->nb_symbols);
//proc->decoder_thread_available = 0;
//PHY_VARS_NR_UE *phy_vars_ue = tdp->UE;
int eNB_id = proc->eNB_id;
int harq_pid = proc->harq_pid;
llr8_flag1 = proc->llr8_flag;
//r_offset = proc->Er;
//UE_rxtx_proc_t *proc = tdp->proc;
int frame = proc->frame_rx;
int subframe = proc->nr_tti_rx;
NR_UE_DLSCH_t *dlsch = phy_vars_ue->dlsch[phy_vars_ue->current_thread_id[subframe]][eNB_id][0];
NR_DL_UE_HARQ_t *harq_process = dlsch->harq_processes[harq_pid];
short *dlsch_llr = phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[subframe]][eNB_id]->llr[0];
//printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[1];
/* /*
if (nb_rb > frame_parms->N_RB_DL) { if (nb_rb > frame_parms->N_RB_DL) {
...@@ -1428,10 +1342,14 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1428,10 +1342,14 @@ void *nr_dlsch_decoding_2thread0(void *arg)
A = harq_process->TBS; //2072 for QPSK 1/3 A = harq_process->TBS; //2072 for QPSK 1/3
ret = dlsch->max_ldpc_iterations; ret = dlsch->max_ldpc_iterations;
harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, harq_process->Qm,harq_process->Nl); harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, harq_process->Qm,harq_process->Nl);
G = harq_process->G; G = harq_process->G;
LOG_I(PHY,"DLSCH Decoding process, harq_pid %d TBS %d G %d mcs %d Nl %d nb_symb_sch %d nb_rb %d\n",harq_pid,A,G, harq_process->mcs, harq_process->Nl, nb_symb_sch,nb_rb);
if (harq_process->round == 0) { if (harq_process->round == 0) {
// This is a new packet, so compute quantities regarding segmentation // This is a new packet, so compute quantities regarding segmentation
...@@ -1448,20 +1366,40 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1448,20 +1366,40 @@ void *nr_dlsch_decoding_2thread0(void *arg)
} }
kb = harq_process->K/harq_process->Z; Coderate = (float) A /(float) G;
if ( kb==22){ if ((A <=292) || ((A<=3824) && (Coderate <= 0.6667)) || Coderate <= 0.25)
p_decParams->BG = 1; {
p_decParams->BG = 2;
if (Coderate < 0.3333){
p_decParams->R = 15;
kc = 52;
}
else if (Coderate <0.6667){
p_decParams->R = 13; p_decParams->R = 13;
kc = 68; kc = 32;
} }
else{ else {
p_decParams->BG = 2; p_decParams->R = 23;
kc = 17;
}
}
else{
p_decParams->BG = 1;
if (Coderate < 0.6667){
p_decParams->R = 13; p_decParams->R = 13;
kc = 52; kc = 68;
} }
else if (Coderate <0.8889){
p_decParams->R = 23;
kc = 35;
}
else {
p_decParams->R = 89;
kc = 27;
}
}
p_decParams->numMaxIter = 2; p_decParams->numMaxIter = dlsch->max_ldpc_iterations;
Kr = p_decParams->Z*kb;
p_decParams->outMode= 0; p_decParams->outMode= 0;
/* /*
...@@ -1511,8 +1449,9 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1511,8 +1449,9 @@ void *nr_dlsch_decoding_2thread0(void *arg)
// printf("thread0 r_offset %d\n",r_offset); // printf("thread0 r_offset %d\n",r_offset);
//for (r=(harq_process->C/2); r<harq_process->C; r++) { //for (r=(harq_process->C/2); r<harq_process->C; r++) {
r=1; //(harq_process->C/2); // r=1; //(harq_process->C/2);
r_offset = r*r_offset;
Kr = harq_process->K; Kr = harq_process->K;
Kr_bytes = Kr>>3; Kr_bytes = Kr>>3;
...@@ -1530,8 +1469,10 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1530,8 +1469,10 @@ void *nr_dlsch_decoding_2thread0(void *arg)
harq_process->w[r], harq_process->w[r],
dlsch_llr+r_offset); dlsch_llr+r_offset);
//for (int i =0; i<16; i++) #ifdef DEBUG_DLSCH_DECODING
// printf("rx output deinterleaving w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset); for (int i =0; i<16; i++)
printf("rx output thread 0 deinterleaving w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset);
#endif
#if UE_TIMING_TRACE #if UE_TIMING_TRACE
stop_meas(dlsch_deinterleaving_stats); stop_meas(dlsch_deinterleaving_stats);
...@@ -1596,14 +1537,15 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1596,14 +1537,15 @@ void *nr_dlsch_decoding_2thread0(void *arg)
// printf("Clearing c, %p\n",harq_process->c[r]); // printf("Clearing c, %p\n",harq_process->c[r]);
memset(harq_process->c[r],0,Kr_bytes); memset(harq_process->c[r],0,Kr_bytes);
// printf("done\n"); if (harq_process->C == 1){
if (harq_process->C == 1)
crc_type = CRC24_A; crc_type = CRC24_A;
else length_dec = harq_process->B;
}
else{
crc_type = CRC24_B; crc_type = CRC24_B;
length_dec = (harq_process->B+24*harq_process->C)/harq_process->C;
}
#if 1
if (err_flag == 0) { if (err_flag == 0) {
/* /*
LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n", LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
...@@ -1627,29 +1569,37 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1627,29 +1569,37 @@ void *nr_dlsch_decoding_2thread0(void *arg)
//memset(pl,0,2*p_decParams->Z*sizeof(int8_t)); //memset(pl,0,2*p_decParams->Z*sizeof(int8_t));
memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t)); memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t));
for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F+((2*p_decParams->Z)>>3); i++, j++) for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F; i++, j++)
{ {
pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j])); pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
} }
for (i=Kr_bytes+((2*p_decParams->Z)>>3),j=Kr_bytes; i < ((kc*p_decParams->Z)>>3); i++, j++) for (i=Kr_bytes,j=K_bytes_F-((2*p_decParams->Z)>>3); i < ((kc*p_decParams->Z)>>3); i++, j++)
{ {
pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j])); pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
} }
for (i=0, j=0; j < ((kc*p_decParams->Z)>>4); i+=2, j++)
{
pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
} for (i=0, j=0; j < ((kc*p_decParams->Z)>>4); i+=2, j++)
{
pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
}
no_iteration_ldpc = nrLDPC_decoder(p_decParams, no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t*)&pl[0], (int8_t*)&pl[0],
llrProcBuf, llrProcBuf,
p_nrLDPC_procBuf, p_nrLDPC_procBuf,
p_procTime); p_procTime);
// Fixme: correct type is unsigned, but nrLDPC_decoder and all called behind use signed int
if (check_crc((uint8_t*)llrProcBuf,length_dec,harq_process->F,crc_type)) {
printf("Segment %d CRC OK\n",r);
ret = 2;
}
else {
printf("CRC NOK\n");
ret = 1+dlsch->max_ldpc_iterations;
}
if (no_iteration_ldpc > 10) if (no_iteration_ldpc > 10)
printf("Error number of iteration LPDC %d\n", no_iteration_ldpc); printf("Error number of iteration LPDC %d\n", no_iteration_ldpc);
//else //else
...@@ -1691,552 +1641,46 @@ void *nr_dlsch_decoding_2thread0(void *arg) ...@@ -1691,552 +1641,46 @@ void *nr_dlsch_decoding_2thread0(void *arg)
} }
//} //}
/*int32_t frame_rx_prev = frame; proc->decoder_thread_available = 1;
int32_t subframe_rx_prev = subframe - 1; //proc->decoder_main_available = 0;
if (subframe_rx_prev < 0) {
frame_rx_prev--;
subframe_rx_prev += 10;
}
frame_rx_prev = frame_rx_prev%1024;*/
#if 0
if (err_flag == 1) {
//#if UE_DEBUG_TRACE
LOG_I(PHY,"[UE %d] THREAD 0 DLSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d) Kr %d r %d harq_process->round %d\n",
phy_vars_ue->Mod_id, frame, subframe, harq_pid,harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs,Kr,r,harq_process->round);
//#endif
dlsch->harq_ack[subframe].ack = 0;
dlsch->harq_ack[subframe].harq_id = harq_pid;
dlsch->harq_ack[subframe].send_harq_status = 1;
harq_process->errors[harq_process->round]++;
harq_process->round++;
// printf("Rate: [UE %d] DLSCH: Setting NACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round);
if (harq_process->round >= dlsch->Mdlharq) {
harq_process->status = SCH_IDLE;
harq_process->round = 0;
}
/* if(is_crnti)
{
LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for subframe %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n",
phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->status,harq_process->round,dlsch->Mdlharq,harq_process->TBS);
}*/
//return((1+dlsch->max_ldpc_iterations));
} else {
#if UE_DEBUG_TRACE
LOG_I(PHY,"[UE %d] THREAD 0 DLSCH: Setting ACK for subframe %d TBS %d mcs %d nb_rb %d\n",
phy_vars_ue->Mod_id,subframe,harq_process->TBS,harq_process->mcs,harq_process->nb_rb);
#endif
harq_process->status = SCH_IDLE; }
harq_process->round = 0;
dlsch->harq_ack[subframe].ack = 1;
dlsch->harq_ack[subframe].harq_id = harq_pid;
dlsch->harq_ack[subframe].send_harq_status = 1;
//LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d)\n",
// phy_vars_ue->Mod_id, frame, subframe, harq_pid, harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs);
/* if(is_crnti) void *dlsch_thread(void *arg) {
{ //this thread should be over the processing thread to keep in real time
LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round,harq_process->TBS); PHY_VARS_NR_UE *UE = (PHY_VARS_NR_UE *) arg;
} notifiedFIFO_t nf;
LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round); initNotifiedFIFO(&nf);
int nbDlProcessing=0;
initNotifiedFIFO_nothreadSafe(&freeBlocks);
}*/ for (int i=0; i<RX_NB_TH_DL+1; i++)
pushNotifiedFIFO_nothreadSafe(&freeBlocks,
newNotifiedFIFO_elt(sizeof(nr_rxtx_thread_data_t), 0,&nf,nr_dlsch_decoding_process));
printf("dlsch_thread\n");
displayList(&freeBlocks);
// Reassembly of Transport block here while (!oai_exit) {
offset = 0;
/* notifiedFIFO_elt_t *res;
printf("harq_pid %d\n",harq_pid);
printf("F %d, Fbytes %d\n",harq_process->F,harq_process->F>>3);
printf("C %d\n",harq_process->C);
*/
for (r=0; r<harq_process->C; r++) {
if (r<harq_process->Cminus)
Kr = harq_process->Kminus;
else
Kr = harq_process->Kplus;
Kr_bytes = Kr>>3; while (nbDlProcessing >= RX_NB_TH_DL) {
if ( (res=tryPullTpool(&nf, Tpool)) != NULL ) {
printf("dlsch thread trypull non null\n");
nr_rxtx_thread_data_t *tmp=(nr_rxtx_thread_data_t *)res->msgData;
nbDlProcessing--;
pushNotifiedFIFO_nothreadSafe(&freeBlocks,res);
}
// printf("Segment %d : Kr= %d bytes\n",r,Kr_bytes); usleep(200);
if (r==0) {
memcpy(harq_process->b,
&harq_process->c[0][(harq_process->F>>3)],
Kr_bytes - (harq_process->F>>3)- ((harq_process->C>1)?3:0));
offset = Kr_bytes - (harq_process->F>>3) - ((harq_process->C>1)?3:0);
// printf("copied %d bytes to b sequence (harq_pid %d)\n",
// Kr_bytes - (harq_process->F>>3),harq_pid);
// printf("b[0] = %x,c[%d] = %x\n",
// harq_process->b[0],
// harq_process->F>>3,
// harq_process->c[0][(harq_process->F>>3)]);
} else {
memcpy(harq_process->b+offset,
harq_process->c[r],
Kr_bytes- ((harq_process->C>1)?3:0));
offset += (Kr_bytes - ((harq_process->C>1)?3:0));
} }
}
dlsch->last_iteration_cnt = ret;
//return(ret); nbDlProcessing++;
} //msgToPush->key=0;
#endif //pushTpool(Tpool, msgToPush);
proc->decoder_thread_available = 1; } // while !oai_exit
//proc->decoder_main_available = 0;
if (pthread_mutex_lock(&proc->mutex_dlsch_td) != 0) {
LOG_E( PHY, "[SCHED][UE] error locking mutex for UE RXTX\n" );
exit_fun("noting to add");
}
proc->instance_cnt_dlsch_td--;
if (pthread_mutex_unlock(&proc->mutex_dlsch_td) != 0) {
LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE td1\n" );
exit_fun("noting to add");
}
}
// thread finished
free(arg);
return &UE_dlsch_td_retval;
} }
#endif
#ifdef UE_DLSCH_PARALLELISATION
#define FIFO_PRIORITY 39
void *nr_dlsch_decoding_2thread1(void *arg)
{
static __thread int UE_dlsch_td_retval1;
struct nr_rxtx_thread_data *rtd = arg;
UE_nr_rxtx_proc_t *proc = rtd->proc;
PHY_VARS_NR_UE *phy_vars_ue = rtd->UE;
NR_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->frame_parms;
int llr8_flag1;
int32_t no_iteration_ldpc;
t_nrLDPC_dec_params decParams;
t_nrLDPC_dec_params* p_decParams = &decParams;
t_nrLDPC_time_stats procTime;
t_nrLDPC_time_stats* p_procTime =&procTime ;
t_nrLDPC_procBuf* p_nrLDPC_procBuf;
int8_t llrProcBuf[OAI_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
int16_t z [68*384];
int8_t l [68*384];
//__m128i l;
int16_t inv_d [68*384];
//int16_t *p_invd =&inv_d;
uint8_t kb, kc;
uint8_t Ilbrm = 0;
uint32_t Tbslbrm = 950984;
uint16_t nb_rb = 30; //to update
uint16_t nb_symb_sch = 12;
uint8_t nb_re_dmrs = 6;
uint16_t length_dmrs = 1;
uint32_t i,j;
//uint32_t k;
__m128i *pv = (__m128i*)&z;
__m128i *pl = (__m128i*)&l;
proc->instance_cnt_dlsch_td1=-1;
proc->nr_tti_rx=proc->sub_frame_start;
printf("start thread 1\n");
proc->decoder_thread_available1 = 0;
char threadname[256];
sprintf(threadname,"UE_thread_dlsch_td1_%d", proc->sub_frame_start);
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if ( (proc->sub_frame_start+1)%RX_NB_TH == 0 && threads.dlsch_td_one != -1 )
CPU_SET(threads.dlsch_td_one, &cpuset);
if ( (proc->sub_frame_start+1)%RX_NB_TH == 1 && threads.dlsch_td_two != -1 )
CPU_SET(threads.dlsch_td_two, &cpuset);
if ( (proc->sub_frame_start+1)%RX_NB_TH == 2 && threads.dlsch_td_three != -1 )
CPU_SET(threads.dlsch_td_three, &cpuset);
#if UE_TIMING_TRACE
time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats;
time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats;
#endif
uint32_t A,E;
uint32_t G;
uint32_t ret,offset;
uint32_t r,r_offset=0,Kr,Kr_bytes,err_flag=0,K_bytes_F;
uint8_t crc_type;
uint8_t C,Cprime;
uint8_t Qm;
uint8_t Nl;
//uint32_t Er;
init_thread(900000,1000000 , FIFO_PRIORITY-1, &cpuset, threadname);
printf("2thread1 oai_exit %d\n", oai_exit);
while (!oai_exit) {
if (pthread_mutex_lock(&proc->mutex_dlsch_td1) != 0) {
LOG_E( PHY, "[SCHED][UE] error locking mutex for UE dlsch td\n" );
exit_fun("nothing to add");
}
while (proc->instance_cnt_dlsch_td1 < 0) {
// most of the time, the thread is waiting here
pthread_cond_wait( &proc->cond_dlsch_td1, &proc->mutex_dlsch_td1 );
}
if (pthread_mutex_unlock(&proc->mutex_dlsch_td1) != 0) {
LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE dlsch_td \n" );
exit_fun("nothing to add");
}
//printf("2thread1 main available %d\n", proc->decoder_main_available);
uint32_t wait = 0;
while(proc->decoder_main_available == 0)
{
usleep(1);
wait++;
}
//proc->decoder_thread_available1 = 0;
//PHY_VARS_NR_UE *phy_vars_ue = tdp->UE;
int eNB_id = proc->eNB_id;
int harq_pid = proc->harq_pid;
llr8_flag1 = proc->llr8_flag;
//r_offset = proc->Er;
//UE_rxtx_proc_t *proc = tdp->proc;
int frame = proc->frame_rx;
int subframe = proc->nr_tti_rx;
NR_UE_DLSCH_t *dlsch = phy_vars_ue->dlsch[phy_vars_ue->current_thread_id[subframe]][eNB_id][0];
NR_DL_UE_HARQ_t *harq_process = dlsch->harq_processes[harq_pid];
short *dlsch_llr = phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[subframe]][eNB_id]->llr[0];
//printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
//printf("2thread1 nr_tti_tx %d subframe %d SF thread id %d r_offset %d\n", proc->nr_tti_rx, subframe, phy_vars_ue->current_thread_id[subframe], r_offset);
p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[2];
/*
if (nb_rb > frame_parms->N_RB_DL) {
printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
return(max_ldpc_iterations);
}*/
/*harq_pid = dlsch->current_harq_pid[phy_vars_ue->current_thread_id[subframe]];
if (harq_pid >= 8) {
printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
return(max_ldpc_iterations);
}
*/
nb_rb = harq_process->nb_rb;
harq_process->trials[harq_process->round]++;
harq_process->TBS = nr_compute_tbs(harq_process->mcs,nb_rb,nb_symb_sch,nb_re_dmrs,length_dmrs, harq_process->Nl);
A = harq_process->TBS; //2072 for QPSK 1/3
ret = dlsch->max_ldpc_iterations;
harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, harq_process->Qm,harq_process->Nl);
G = harq_process->G;
//get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe);
//printf("DLSCH Decoding, A %d harq_pid %d G %d\n",A, harq_pid,harq_process->G);
if (harq_process->round == 0) {
// This is a new packet, so compute quantities regarding segmentation
harq_process->B = A+24;
nr_segmentation(NULL,
NULL,
harq_process->B,
&harq_process->C,
&harq_process->K,
&harq_process->Z,
&harq_process->F);
p_decParams->Z = harq_process->Z;
}
kb = harq_process->K/harq_process->Z;
if ( kb==22){
p_decParams->BG = 1;
p_decParams->R = 89;
kc = 68;
}
else{
p_decParams->BG = 2;
p_decParams->R = 13;
kc = 52;
}
p_decParams->numMaxIter = 2;
Kr = p_decParams->Z*kb;
p_decParams->outMode= 0;
/*
else {
printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
return(max_ldpc_iterations);
}
*/
err_flag = 0;
//r_offset = 0;
/*
unsigned char bw_scaling =1;
switch (frame_parms->N_RB_DL) {
case 106:
bw_scaling =2;
break;
default:
bw_scaling =1;
break;
}
if (harq_process->C > MAX_NUM_DLSCH_SEGMENTS/bw_scaling) {
LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling);
return((1+dlsch->max_ldpc_iterations));
}*/
#ifdef DEBUG_DLSCH_DECODING
printf("Segmentation: C %d, Cminus %d, Kminus %d, Kplus %d\n",harq_process->C,harq_process->Cminus,harq_process->Kminus,harq_process->Kplus);
#endif
opp_enabled=1;
Qm= harq_process->Qm;
Nl=harq_process->Nl;
//r_thread = harq_process->C/2-1;
C= harq_process->C;
Cprime = C; //assume CBGTI not present
if (r <= Cprime - ((G/(Nl*Qm))%Cprime) - 1)
r_offset = Nl*Qm*(G/(Nl*Qm*Cprime));
else
r_offset = Nl*Qm*((G/(Nl*Qm*Cprime))+1);
//printf("sub thread r_offset %d\n", r_offset);
//for (r=(harq_process->C/2); r<harq_process->C; r++) {
r=2; //(harq_process->C/2);
r_offset = r*r_offset;
//printf("thread1 r=%d r_offset %d \n",r, r_offset);
Kr = harq_process->K;
Kr_bytes = Kr>>3;
K_bytes_F = Kr_bytes-(harq_process->F>>3);
Tbslbrm = nr_compute_tbs(28,nb_rb,frame_parms->symbols_per_slot,0,0, harq_process->Nl);
E = nr_get_E(G, harq_process->C, harq_process->Qm, harq_process->Nl, r);
/*
printf("Subblock deinterleaving, d %p w %p\n",
harq_process->d[r],
harq_process->w);
*/
#if UE_TIMING_TRACE
start_meas(dlsch_deinterleaving_stats);
#endif
nr_deinterleaving_ldpc(E,
harq_process->Qm,
harq_process->w[r],
dlsch_llr+r_offset);
//for (int i =0; i<16; i++)
// printf("rx output deinterleaving w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset);
#if UE_TIMING_TRACE
stop_meas(dlsch_deinterleaving_stats);
#endif
#if UE_TIMING_TRACE
start_meas(dlsch_rate_unmatching_stats);
#endif
#ifdef DEBUG_DLSCH_DECODING
LOG_D(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n",
harq_pid,r, G,
Kr*3,
harq_process->TBS,
harq_process->Qm,
harq_process->nb_rb,
harq_process->Nl,
harq_process->rvidx,
harq_process->round);
#endif
if (nr_rate_matching_ldpc_rx(Ilbrm,
Tbslbrm,
p_decParams->BG,
p_decParams->Z,
harq_process->d[r],
harq_process->w[r],
harq_process->C,
harq_process->rvidx,
(harq_process->round==0)?1:0,
E)==-1) {
#if UE_TIMING_TRACE
stop_meas(dlsch_rate_unmatching_stats);
#endif
LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n");
//return(dlsch->max_ldpc_iterations);
} else
{
#if UE_TIMING_TRACE
stop_meas(dlsch_rate_unmatching_stats);
#endif
}
//for (int i =0; i<16; i++)
// printf("rx output ratematching d[%d]= %d r_offset %d\n", i,harq_process->d[r][i], r_offset);
//r_offset += E;
#ifdef DEBUG_DLSCH_DECODING
if (r==0) {
write_output("decoder_llr.m","decllr",dlsch_llr,G,1,0);
write_output("decoder_in.m","dec",&harq_process->d[0][0],(3*8*Kr_bytes)+12,1,0);
}
printf("decoder input(segment %d) :",r);
int i; for (i=0;i<(3*8*Kr_bytes)+12;i++)
printf("%d : %d\n",i,harq_process->d[r][i]);
printf("\n");
#endif
// printf("Clearing c, %p\n",harq_process->c[r]);
memset(harq_process->c[r],0,Kr_bytes);
// printf("done\n");
if (harq_process->C == 1)
crc_type = CRC24_A;
else
crc_type = CRC24_B;
if (err_flag == 0) {
/*
LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS,
harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_ldpc_iterations);
*/
if (llr8_flag1) {
AssertFatal (Kr >= 256, "turbo algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n",
Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round);
}
#if UE_TIMING_TRACE
start_meas(dlsch_turbo_decoding_stats);
#endif
// LOG_D(PHY,"AbsSubframe %d.%d Start turbo segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
memset(pv,0,2*p_decParams->Z*sizeof(int16_t));
//memset(pl,0,2*p_decParams->Z*sizeof(int8_t));
memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t));
for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F+((2*p_decParams->Z)>>3); i++, j++)
{
pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
}
for (i=Kr_bytes+((2*p_decParams->Z)>>3),j=Kr_bytes; i < ((kc*p_decParams->Z)>>3); i++, j++)
{
pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
}
for (i=0, j=0; j < ((kc*p_decParams->Z)>>4); i+=2, j++)
{
pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
}
no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t*)&pl[0],
llrProcBuf,
p_nrLDPC_procBuf,
p_procTime);
if (no_iteration_ldpc > 10)
printf("Error number of iteration LPDC %d\n", no_iteration_ldpc);
//else
// printf("OK number of iteration LPDC %d\n", no_iteration_ldpc);
for (int m=0; m < Kr>>3; m ++)
{
harq_process->c[r][m]= (uint8_t) llrProcBuf[m];
}
/*for (int u=0; u < Kr>>3; u ++)
{
ullrProcBuf[u]= (uint8_t) llrProcBuf[u];
}
printf("output unsigned ullrProcBuf \n");
for (int j=0; j < Kr>>3; j ++)
{
printf(" %d \n", ullrProcBuf[j]);
}
printf(" \n");*/
#endif
//printf("output channel decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
//printf("output decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
#if UE_TIMING_TRACE
stop_meas(dlsch_turbo_decoding_stats);
#endif
}
if ((err_flag == 0) && (ret>=(1+dlsch->max_ldpc_iterations))) {// a Code segment is in error so break;
// LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
err_flag = 1;
}
//}
/*int32_t frame_rx_prev = frame;
int32_t subframe_rx_prev = subframe - 1;
if (subframe_rx_prev < 0) {
frame_rx_prev--;
subframe_rx_prev += 10;
}
frame_rx_prev = frame_rx_prev%1024;*/
proc->decoder_thread_available1 = 1;
//proc->decoder_main_available = 0;
//printf("2thread1 proc->instance_cnt_dlsch_td1 %d\n", proc->instance_cnt_dlsch_td1);
if (pthread_mutex_lock(&proc->mutex_dlsch_td1) != 0) {
LOG_E( PHY, "[SCHED][UE] error locking mutex for UE RXTX\n" );
exit_fun("noting to add");
}
proc->instance_cnt_dlsch_td1--;
if (pthread_mutex_unlock(&proc->mutex_dlsch_td1) != 0) {
LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE td1\n" );
exit_fun("noting to add");
}
//printf("end 2thread1 proc->instance_cnt_dlsch_td1 %d\n", proc->instance_cnt_dlsch_td1);
}
//printf("after 2thread1 after oai exit proc->instance_cnt_dlsch_td %d\n", proc->instance_cnt_dlsch_td1);
// thread finished
free(arg);
return &UE_dlsch_td_retval1;
}
#endif #endif
...@@ -1818,6 +1818,6 @@ int nr_extract_dci_info(PHY_VARS_NR_UE *ue, ...@@ -1818,6 +1818,6 @@ int nr_extract_dci_info(PHY_VARS_NR_UE *ue,
uint16_t n_RB_DLBWP, uint16_t n_RB_DLBWP,
uint16_t crc_scrambled_values[TOTAL_NBR_SCRAMBLED_VALUES]); uint16_t crc_scrambled_values[TOTAL_NBR_SCRAMBLED_VALUES]);
void *dlsch_thread(void *arg);
/**@}*/ /**@}*/
#endif #endif
...@@ -80,6 +80,7 @@ ...@@ -80,6 +80,7 @@
#define RX_NB_TH_MAX 2 #define RX_NB_TH_MAX 2
#define RX_NB_TH 2 #define RX_NB_TH 2
#define RX_NB_TH_DL 2
#define LTE_SLOTS_PER_SUBFRAME 2 #define LTE_SLOTS_PER_SUBFRAME 2
......
...@@ -1221,10 +1221,10 @@ typedef struct { ...@@ -1221,10 +1221,10 @@ typedef struct {
/* this structure is used to pass both UE phy vars and /* this structure is used to pass both UE phy vars and
* proc to the function UE_thread_rxn_txnp4 * proc to the function UE_thread_rxn_txnp4
*/ */
struct nr_rxtx_thread_data { typedef struct nr_rxtx_thread_data_s {
UE_nr_rxtx_proc_t proc;
PHY_VARS_NR_UE *UE; PHY_VARS_NR_UE *UE;
UE_nr_rxtx_proc_t *proc; } nr_rxtx_thread_data_t;
};
/*static inline int wait_on_condition(pthread_mutex_t *mutex,pthread_cond_t *cond,int *instance_cnt,char *name) { /*static inline int wait_on_condition(pthread_mutex_t *mutex,pthread_cond_t *cond,int *instance_cnt,char *name) {
......
...@@ -60,7 +60,7 @@ typedef struct { ...@@ -60,7 +60,7 @@ typedef struct {
uint8_t decoder_thread_available; uint8_t decoder_thread_available;
uint8_t decoder_main_available; uint8_t decoder_main_available;
uint8_t decoder_switch; uint8_t decoder_switch;
int counter_decoder; int num_seg;
uint8_t channel_level; uint8_t channel_level;
int eNB_id; int eNB_id;
int harq_pid; int harq_pid;
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
#define OPENAIR_THREAD_STACK_SIZE PTHREAD_STACK_MIN //4096 //RTL_PTHREAD_STACK_MIN*6 #define OPENAIR_THREAD_STACK_SIZE PTHREAD_STACK_MIN //4096 //RTL_PTHREAD_STACK_MIN*6
//#define DLC_THREAD_STACK_SIZE 4096 //DLC stack size //#define DLC_THREAD_STACK_SIZE 4096 //DLC stack size
//#define UE_SLOT_PARALLELISATION //#define UE_SLOT_PARALLELISATION
//#define UE_DLSCH_PARALLELISATION #define UE_DLSCH_PARALLELISATION//
/*enum openair_SCHED_STATUS { /*enum openair_SCHED_STATUS {
openair_SCHED_STOPPED=1, openair_SCHED_STOPPED=1,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment