Commit a9d49ef0 authored by Raymond Knopp's avatar Raymond Knopp

further minor optimizations

parent eecd718b
......@@ -415,51 +415,71 @@ uint8_t nr_generate_pdsch(processingData_L1tx_t *msgTx,
}
else { // no PTRS or DMRS in this symbol
// Loop Over SCs:
__m64 *txF=(__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+start_sc+txdataF_offset)<<1)];
int upper_limit=rel15->rbSize*NR_NB_SC_PER_RB;
int remaining_re = 0;
if (start_sc + upper_limit > frame_parms->ofdm_symbol_size) {
remaining_re = upper_limit + start_sc - frame_parms->ofdm_symbol_size;
upper_limit = frame_parms->ofdm_symbol_size - start_sc;
}
__m64 *txl = (__m64*)&tx_layers[ap][m<<1];
__m64 amp64=_mm_set1_pi16(amp);
for (int i=0; i<(upper_limit>>1); i++) {
if (frame_parms->N_RB_DL&1==0) {
__m128i *txF=(__m128i*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+start_sc+txdataF_offset)<<1)];
__m128i *txl = (__m128i*)&tx_layers[ap][m<<1];
__m128i amp128=_mm_set1_epi16(amp);
for (int i=0; i<(upper_limit>>2); i++) {
txF[i] = _mm_mulhrs_epi16(amp128,txl[i]);
} //RE loop, first part
m+=upper_limit;
if (remaining_re > 0) {
txF = (__m128i*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+txdataF_offset)<<1)];
txl = (__m128i*)&tx_layers[ap][m<<1];
for (int i=0; i<(remaining_re>>2); i++) {
txF[i] = _mm_mulhrs_epi16(amp128,txl[i]);
}
}
}
else {
__m64 *txF=(__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+start_sc+txdataF_offset)<<1)];
__m64 *txl = (__m64*)&tx_layers[ap][m<<1];
__m64 amp64=_mm_set1_pi16(amp);
for (int i=0; i<(upper_limit>>1); i++) {
txF[i] = _mm_mulhrs_pi16(amp64,txl[i]);
txF[i] = _mm_mulhrs_pi16(amp64,txl[i]);
#ifdef DEBUG_DLSCH_MAPPING
if ((i&1) > 0)
printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
if ((i&1) > 0)
printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
m, l, start_sc+(i>>1), txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + start_sc+(i>>1))<<1) + (2*txdataF_offset)],
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + start_sc+(i>>1))<<1) + 1 + (2*txdataF_offset)]);
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + start_sc+(i>>1))<<1) + 1 + (2*txdataF_offset)]);
#endif
/* handle this, mute RE */
/*else {
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0;
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0;
}*/
} //RE loop, first part
m+=upper_limit;
if (remaining_re > 0) {
txF = (__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+txdataF_offset)<<1)];
txl = (__m64*)&tx_layers[ap][m<<1];
for (int i=0; i<(remaining_re>>1); i++) {
txF[i] = _mm_mulhrs_pi16(amp64,txl[i]);
/* handle this, mute RE */
/*else {
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0;
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0;
}*/
} //RE loop, first part
m+=upper_limit;
if (remaining_re > 0) {
txF = (__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+txdataF_offset)<<1)];
txl = (__m64*)&tx_layers[ap][m<<1];
for (int i=0; i<(remaining_re>>1); i++) {
txF[i] = _mm_mulhrs_pi16(amp64,txl[i]);
#ifdef DEBUG_DLSCH_MAPPING
if ((i&1) > 0)
printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
m, l, i>>1, txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + (2*txdataF_offset)],
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + 1 + (2*txdataF_offset)]);
if ((i&1) > 0)
printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
m, l, i>>1, txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + (2*txdataF_offset)],
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + 1 + (2*txdataF_offset)]);
#endif
/* handle this, mute RE */
/*else {
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0;
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0;
}*/
} //RE loop, second part
}
m+=remaining_re;
}
/* handle this, mute RE */
/*else {
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0;
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0;
}*/
} //RE loop, second part
} //
m+=remaining_re;
} // N_RB_DL even
} // no DMRS/PTRS in symbol
} // symbol loop
}// layer loop
stop_meas(&gNB->dlsch_resource_mapping_stats);
......
......@@ -380,7 +380,7 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB,
impp.tparity = tparity;
impp.toutput = toutput;
for(int j=0;j<(harq->C/8+1);j++) {
for(int j=0;j<(harq->C/8+((harq->C&7)==0 ? 0 : 1));j++) {
impp.macro_num=j;
nrLDPC_encoder(harq->c,harq->d,*Zc,Kb,Kr,harq->BG,&impp);
}
......
......@@ -1301,9 +1301,10 @@ int main(int argc, char **argv)
printf("\n");
if (print_perf==1) {
printf("\ngNB TX function statistics (per %d us slot, NPRB %d, mcs %d, TBS %d, Kr %d (Zc %d))\n",
printf("\ngNB TX function statistics (per %d us slot, NPRB %d, mcs %d, TBS %d, C %d, Kr %d (Zc %d))\n",
1000>>*scc->ssbSubcarrierSpacing, g_rbSize, g_mcsIndex,
msgDataTx->dlsch[0][0]->harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0]<<3,
msgDataTx->dlsch[0][0]->harq_process.C,
msgDataTx->dlsch[0][0]->harq_process.K,
msgDataTx->dlsch[0][0]->harq_process.K/((msgDataTx->dlsch[0][0]->harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0]<<3)>3824?22:10));
printDistribution(gNB->phy_proc_tx_0,table_tx,"PHY proc tx");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment