Commit 85aa5183 authored by Raymond Knopp's avatar Raymond Knopp

optimization of ldpc deinterleaving and llr computation

parent 84275574
......@@ -310,15 +310,68 @@ void nr_interleaving_ldpc(uint32_t E, uint8_t Qm, uint8_t *e,uint8_t *f)
void nr_deinterleaving_ldpc(uint32_t E, uint8_t Qm, int16_t *e,int16_t *f)
{
uint32_t EQm;
EQm = E/Qm;
for (int j = 0; j< EQm; j++){
for (int i = 0; i< Qm; i++){
e[(i*EQm + j)] = f[(i+j*Qm)];
}
int j2;
int16_t *e1,*e2,*e3,*e4,*e5,*e6,*e7;
switch(Qm) {
case 2:
e1=e+(E/2);
for (int j = 0,j2=0; j< E/2; j+=2,j2+=4){
e[j] = f[j2];
e1[j] = f[j2+1];
e[j+1] = f[j2+2];
e1[j+1] = f[j2+3];
}
break;
case 4:
e1=e+(E/4);
e2=e1+(E/4);
e3=e2+(E/4);
for (int j = 0,j2=0; j< E/4; j++,j2+=4){
e[j] = f[j2];
e1[j] = f[j2+1];
e2[j] = f[j2+2];
e3[j] = f[j2+3];
}
break;
case 6:
e1=e+(E/6);
e2=e1+(E/6);
e3=e2+(E/6);
e4=e3+(E/6);
e5=e4+(E/6);
for (int j = 0,j2=0; j< E/6; j++,j2+=6){
e[j] = f[j2];
e1[j] = f[j2+1];
e2[j] = f[j2+2];
e3[j] = f[j2+3];
e4[j] = f[j2+4];
e5[j] = f[j2+5];
}
break;
case 8:
e1=e+(E/6);
e2=e1+(E/6);
e3=e2+(E/6);
e4=e3+(E/6);
e5=e4+(E/6);
e6=e5+(E/6);
e7=e6+(E/6);
for (int j = 0,j2=0; j< E/8; j++,j2+=8){
e[j] = f[j2];
e1[j] = f[j2+1];
e2[j] = f[j2+2];
e3[j] = f[j2+3];
e4[j] = f[j2+4];
e5[j] = f[j2+5];
e6[j] = f[j2+6];
e7[j] = f[j2+7];
}
break;
default:
AssertFatal(1==0,"Should not get here : Qm %d\n",Qm);
break;
}
}
......
......@@ -185,10 +185,11 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
#if defined(__x86_64__) || defined(__i386__)
__m128i *rxF = (__m128i*)rxdataF_comp;
__m128i *ch_mag,*ch_magb;
register __m128i xmm0,xmm1,xmm2;
#elif defined(__arm__)
int16x8_t *rxF = (int16x8_t*)&rxdataF_comp;
int16x8_t *ch_mag,*ch_magb; // [hna] This should be uncommented once channel estimation is implemented
int16x8_t xmm1,xmm2;
int16x8_t xmm0,xmm1,xmm2;
#endif
int i;
......@@ -208,14 +209,14 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
for (i=0; i<nb_re; i++) {
xmm0 = rxF[i];
#if defined(__x86_64__) || defined(__i386__)
xmm1 = _mm_abs_epi16(rxF[i]);
xmm1 = _mm_abs_epi16(xmm0);
xmm1 = _mm_subs_epi16(ch_mag[i],xmm1);
xmm2 = _mm_abs_epi16(xmm1);
xmm2 = _mm_subs_epi16(ch_magb[i],xmm2);
#elif defined(__arm__)
xmm1 = vabsq_s16(rxF[i]);
xmm1 = vabsq_s16(xmm0);
xmm1 = vsubq_s16(ch_mag[i],xmm1);
xmm2 = vabsq_s16(xmm1);
xmm2 = vsubq_s16(ch_magb[i],xmm2);
......@@ -224,14 +225,16 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// ---------------------------------------
// 1st RE
// ---------------------------------------
ulsch_llr[0] = ((short *)&rxF[i])[0];
ulsch_llr[1] = ((short *)&rxF[i])[1];
#if defined(__x86_64__) || defined(__i386__)
ulsch_llr[0] = _mm_extract_epi16(xmm0,0);
ulsch_llr[1] = _mm_extract_epi16(xmm0,1);
ulsch_llr[2] = _mm_extract_epi16(xmm1,0);
ulsch_llr[3] = _mm_extract_epi16(xmm1,1);
ulsch_llr[4] = _mm_extract_epi16(xmm2,0);
ulsch_llr[5] = _mm_extract_epi16(xmm2,1);
#elif defined(__arm__)
ulsch_llr[0] = vgetq_lane_s16(xmm0,0);
ulsch_llr[1] = vgetq_lane_s16(xmm0,1);
ulsch_llr[2] = vgetq_lane_s16(xmm1,0);
ulsch_llr[3] = vgetq_lane_s16(xmm1,1);
ulsch_llr[4] = vgetq_lane_s16(xmm2,0);
......@@ -244,14 +247,16 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// ---------------------------------------
// 2nd RE
// ---------------------------------------
ulsch_llr[0] = ((short *)&rxF[i])[2];
ulsch_llr[1] = ((short *)&rxF[i])[3];
#if defined(__x86_64__) || defined(__i386__)
ulsch_llr[0] = _mm_extract_epi16(xmm0,2);
ulsch_llr[1] = _mm_extract_epi16(xmm0,3);
ulsch_llr[2] = _mm_extract_epi16(xmm1,2);
ulsch_llr[3] = _mm_extract_epi16(xmm1,3);
ulsch_llr[4] = _mm_extract_epi16(xmm2,2);
ulsch_llr[5] = _mm_extract_epi16(xmm2,3);
#elif defined(__arm__)
ulsch_llr[2] = vgetq_lane_s16(xmm0,2);
ulsch_llr[3] = vgetq_lane_s16(xmm0,3);
ulsch_llr[2] = vgetq_lane_s16(xmm1,2);
ulsch_llr[3] = vgetq_lane_s16(xmm1,3);
ulsch_llr[4] = vgetq_lane_s16(xmm2,2);
......@@ -264,14 +269,16 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// ---------------------------------------
// 3rd RE
// ---------------------------------------
ulsch_llr[0] = ((short *)&rxF[i])[4];
ulsch_llr[1] = ((short *)&rxF[i])[5];
#if defined(__x86_64__) || defined(__i386__)
ulsch_llr[0] = _mm_extract_epi16(xmm0,4);
ulsch_llr[1] = _mm_extract_epi16(xmm0,5);
ulsch_llr[2] = _mm_extract_epi16(xmm1,4);
ulsch_llr[3] = _mm_extract_epi16(xmm1,5);
ulsch_llr[4] = _mm_extract_epi16(xmm2,4);
ulsch_llr[5] = _mm_extract_epi16(xmm2,5);
#elif defined(__arm__)
ulsch_llr[0] = vgetq_lane_s16(xmm0,4);
ulsch_llr[1] = vgetq_lane_s16(xmm0,5);
ulsch_llr[2] = vgetq_lane_s16(xmm1,4);
ulsch_llr[3] = vgetq_lane_s16(xmm1,5);
ulsch_llr[4] = vgetq_lane_s16(xmm2,4);
......@@ -284,14 +291,16 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// ---------------------------------------
// 4th RE
// ---------------------------------------
ulsch_llr[0] = ((short *)&rxF[i])[6];
ulsch_llr[1] = ((short *)&rxF[i])[7];
#if defined(__x86_64__) || defined(__i386__)
ulsch_llr[0] = _mm_extract_epi16(xmm0,6);
ulsch_llr[1] = _mm_extract_epi16(xmm0,7);
ulsch_llr[2] = _mm_extract_epi16(xmm1,6);
ulsch_llr[3] = _mm_extract_epi16(xmm1,7);
ulsch_llr[4] = _mm_extract_epi16(xmm2,6);
ulsch_llr[5] = _mm_extract_epi16(xmm2,7);
#elif defined(__arm__)
ulsch_llr[0] = vgetq_lane_s16(xmm0,6);
ulsch_llr[1] = vgetq_lane_s16(xmm0,7);
ulsch_llr[2] = vgetq_lane_s16(xmm1,6);
ulsch_llr[3] = vgetq_lane_s16(xmm1,7);
ulsch_llr[4] = vgetq_lane_s16(xmm2,6);
......
......@@ -775,6 +775,7 @@ typedef struct PHY_VARS_gNB_s {
time_stats_t dlsch_interleaving_stats;
time_stats_t dlsch_segmentation_stats;
time_stats_t rx_pusch_stats;
time_stats_t ulsch_decoding_stats;
time_stats_t ulsch_rate_unmatching_stats;
time_stats_t ulsch_ldpc_decoding_stats;
......
......@@ -538,6 +538,7 @@ void phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
uint8_t symbol_start = ulsch_harq->ulsch_pdu.start_symbol_index;
uint8_t symbol_end = symbol_start + ulsch_harq->ulsch_pdu.nr_of_symbols;
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RX_PUSCH,1);
start_meas(&gNB->rx_pusch_stats);
for(uint8_t symbol = symbol_start; symbol < symbol_end; symbol++) {
no_sig = nr_rx_pusch(gNB, ULSCH_id, frame_rx, slot_rx, symbol, harq_pid);
if (no_sig) {
......@@ -546,6 +547,7 @@ void phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
return;
}
}
stop_meas(&gNB->rx_pusch_stats);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RX_PUSCH,0);
//LOG_M("rxdataF_comp.m","rxF_comp",gNB->pusch_vars[0]->rxdataF_comp[0],6900,1,1);
//LOG_M("rxdataF_ext.m","rxF_ext",gNB->pusch_vars[0]->rxdataF_ext[0],6900,1,1);
......
......@@ -655,6 +655,7 @@ int main(int argc, char **argv)
gNB->ulsch[0][0]->harq_processes[harq_pid]->round = round;
rv_index = nr_rv_round_map[round];
reset_meas(&gNB->phy_proc_rx);
reset_meas(&gNB->rx_pusch_stats);
reset_meas(&gNB->ulsch_decoding_stats);
reset_meas(&gNB->ulsch_deinterleaving_stats);
reset_meas(&gNB->ulsch_rate_unmatching_stats);
......@@ -993,10 +994,11 @@ int main(int argc, char **argv)
if (print_perf==1) {
printDistribution(&gNB->phy_proc_rx,table_rx,"Total PHY proc rx");
printStatIndent(&gNB->ulsch_channel_estimation_stats,"ULSCH channel estimation time");
printStatIndent(&gNB->ulsch_rbs_extraction_stats,"ULSCH rbs extraction time");
printStatIndent(&gNB->ulsch_channel_compensation_stats,"ULSCH channel compensation time");
printStatIndent(&gNB->ulsch_llr_stats,"ULSCH llr computation");
printStatIndent(&gNB->rx_pusch_stats,"RX PUSCH time");
printStatIndent2(&gNB->ulsch_channel_estimation_stats,"ULSCH channel estimation time");
printStatIndent2(&gNB->ulsch_rbs_extraction_stats,"ULSCH rbs extraction time");
printStatIndent2(&gNB->ulsch_channel_compensation_stats,"ULSCH channel compensation time");
printStatIndent2(&gNB->ulsch_llr_stats,"ULSCH llr computation");
printStatIndent(&gNB->ulsch_unscrambling_stats,"ULSCH unscrambling");
printStatIndent(&gNB->ulsch_decoding_stats,"ULSCH total decoding time");
printStatIndent2(&gNB->ulsch_deinterleaving_stats,"ULSCH deinterleaving");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment