Commit a9d49ef0 authored by Raymond Knopp's avatar Raymond Knopp

further minor optimizations

parent eecd718b
...@@ -415,51 +415,71 @@ uint8_t nr_generate_pdsch(processingData_L1tx_t *msgTx, ...@@ -415,51 +415,71 @@ uint8_t nr_generate_pdsch(processingData_L1tx_t *msgTx,
} }
else { // no PTRS or DMRS in this symbol else { // no PTRS or DMRS in this symbol
// Loop Over SCs: // Loop Over SCs:
__m64 *txF=(__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+start_sc+txdataF_offset)<<1)];
int upper_limit=rel15->rbSize*NR_NB_SC_PER_RB; int upper_limit=rel15->rbSize*NR_NB_SC_PER_RB;
int remaining_re = 0; int remaining_re = 0;
if (start_sc + upper_limit > frame_parms->ofdm_symbol_size) { if (start_sc + upper_limit > frame_parms->ofdm_symbol_size) {
remaining_re = upper_limit + start_sc - frame_parms->ofdm_symbol_size; remaining_re = upper_limit + start_sc - frame_parms->ofdm_symbol_size;
upper_limit = frame_parms->ofdm_symbol_size - start_sc; upper_limit = frame_parms->ofdm_symbol_size - start_sc;
} }
__m64 *txl = (__m64*)&tx_layers[ap][m<<1]; if (frame_parms->N_RB_DL&1==0) {
__m64 amp64=_mm_set1_pi16(amp); __m128i *txF=(__m128i*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+start_sc+txdataF_offset)<<1)];
for (int i=0; i<(upper_limit>>1); i++) {
__m128i *txl = (__m128i*)&tx_layers[ap][m<<1];
__m128i amp128=_mm_set1_epi16(amp);
for (int i=0; i<(upper_limit>>2); i++) {
txF[i] = _mm_mulhrs_epi16(amp128,txl[i]);
} //RE loop, first part
m+=upper_limit;
if (remaining_re > 0) {
txF = (__m128i*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+txdataF_offset)<<1)];
txl = (__m128i*)&tx_layers[ap][m<<1];
for (int i=0; i<(remaining_re>>2); i++) {
txF[i] = _mm_mulhrs_epi16(amp128,txl[i]);
}
}
}
else {
__m64 *txF=(__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+start_sc+txdataF_offset)<<1)];
__m64 *txl = (__m64*)&tx_layers[ap][m<<1];
__m64 amp64=_mm_set1_pi16(amp);
for (int i=0; i<(upper_limit>>1); i++) {
txF[i] = _mm_mulhrs_pi16(amp64,txl[i]); txF[i] = _mm_mulhrs_pi16(amp64,txl[i]);
#ifdef DEBUG_DLSCH_MAPPING #ifdef DEBUG_DLSCH_MAPPING
if ((i&1) > 0) if ((i&1) > 0)
printf("m %d\t l %d \t k %d \t txdataF: %d %d\n", printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
m, l, start_sc+(i>>1), txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + start_sc+(i>>1))<<1) + (2*txdataF_offset)], m, l, start_sc+(i>>1), txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + start_sc+(i>>1))<<1) + (2*txdataF_offset)],
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + start_sc+(i>>1))<<1) + 1 + (2*txdataF_offset)]); txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + start_sc+(i>>1))<<1) + 1 + (2*txdataF_offset)]);
#endif #endif
/* handle this, mute RE */ /* handle this, mute RE */
/*else { /*else {
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0; txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0;
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0; txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0;
}*/ }*/
} //RE loop, first part } //RE loop, first part
m+=upper_limit; m+=upper_limit;
if (remaining_re > 0) { if (remaining_re > 0) {
txF = (__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+txdataF_offset)<<1)]; txF = (__m64*)&txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size+txdataF_offset)<<1)];
txl = (__m64*)&tx_layers[ap][m<<1]; txl = (__m64*)&tx_layers[ap][m<<1];
for (int i=0; i<(remaining_re>>1); i++) { for (int i=0; i<(remaining_re>>1); i++) {
txF[i] = _mm_mulhrs_pi16(amp64,txl[i]); txF[i] = _mm_mulhrs_pi16(amp64,txl[i]);
#ifdef DEBUG_DLSCH_MAPPING #ifdef DEBUG_DLSCH_MAPPING
if ((i&1) > 0) if ((i&1) > 0)
printf("m %d\t l %d \t k %d \t txdataF: %d %d\n", printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
m, l, i>>1, txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + (2*txdataF_offset)], m, l, i>>1, txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + (2*txdataF_offset)],
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + 1 + (2*txdataF_offset)]); txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + (i>>1))<<1) + 1 + (2*txdataF_offset)]);
#endif #endif
/* handle this, mute RE */ /* handle this, mute RE */
/*else { /*else {
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0; txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + (2*txdataF_offset)] = 0;
txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0; txdataF_precoding[ap][((l*frame_parms->ofdm_symbol_size + k)<<1) + 1 + (2*txdataF_offset)] = 0;
}*/ }*/
} //RE loop, second part } //RE loop, second part
} } //
m+=remaining_re; m+=remaining_re;
} } // N_RB_DL even
} // no DMRS/PTRS in symbol
} // symbol loop } // symbol loop
}// layer loop }// layer loop
stop_meas(&gNB->dlsch_resource_mapping_stats); stop_meas(&gNB->dlsch_resource_mapping_stats);
......
...@@ -380,7 +380,7 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB, ...@@ -380,7 +380,7 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB,
impp.tparity = tparity; impp.tparity = tparity;
impp.toutput = toutput; impp.toutput = toutput;
for(int j=0;j<(harq->C/8+1);j++) { for(int j=0;j<(harq->C/8+((harq->C&7)==0 ? 0 : 1));j++) {
impp.macro_num=j; impp.macro_num=j;
nrLDPC_encoder(harq->c,harq->d,*Zc,Kb,Kr,harq->BG,&impp); nrLDPC_encoder(harq->c,harq->d,*Zc,Kb,Kr,harq->BG,&impp);
} }
......
...@@ -1301,9 +1301,10 @@ int main(int argc, char **argv) ...@@ -1301,9 +1301,10 @@ int main(int argc, char **argv)
printf("\n"); printf("\n");
if (print_perf==1) { if (print_perf==1) {
printf("\ngNB TX function statistics (per %d us slot, NPRB %d, mcs %d, TBS %d, Kr %d (Zc %d))\n", printf("\ngNB TX function statistics (per %d us slot, NPRB %d, mcs %d, TBS %d, C %d, Kr %d (Zc %d))\n",
1000>>*scc->ssbSubcarrierSpacing, g_rbSize, g_mcsIndex, 1000>>*scc->ssbSubcarrierSpacing, g_rbSize, g_mcsIndex,
msgDataTx->dlsch[0][0]->harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0]<<3, msgDataTx->dlsch[0][0]->harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0]<<3,
msgDataTx->dlsch[0][0]->harq_process.C,
msgDataTx->dlsch[0][0]->harq_process.K, msgDataTx->dlsch[0][0]->harq_process.K,
msgDataTx->dlsch[0][0]->harq_process.K/((msgDataTx->dlsch[0][0]->harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0]<<3)>3824?22:10)); msgDataTx->dlsch[0][0]->harq_process.K/((msgDataTx->dlsch[0][0]->harq_process.pdsch_pdu.pdsch_pdu_rel15.TBSize[0]<<3)>3824?22:10));
printDistribution(gNB->phy_proc_tx_0,table_tx,"PHY proc tx"); printDistribution(gNB->phy_proc_tx_0,table_tx,"PHY proc tx");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment