Commit 20484b5c authored by Raymond Knopp's avatar Raymond Knopp

NR PDCSH scrambling optimization (factor 10 reduction)

parent 1a83cc2f
......@@ -65,6 +65,47 @@ void nr_pdsch_codeword_scrambling(uint8_t *in,
}
void nr_pdsch_codeword_scrambling_optim(uint8_t *in,
uint32_t size,
uint8_t q,
uint32_t Nid,
uint32_t n_RNTI,
uint32_t* out) {
uint32_t x1, x2, s=0,in32;
x2 = (n_RNTI<<15) + (q<<14) + Nid;
s=lte_gold_generic(&x1, &x2, 1);
#if defined(__AVX2__)
for (int i=0; i<((size>>5)+((size&0x1f) > 0 ? 1 : 0)); i++) {
in32=_mm256_movemask_epi8(_mm256_slli_epi16(((__m256i*)in)[i],7));
out[i]=(in32^s);
// printf("in[%d] %x => %x\n",i,in32,out[i]);
s=lte_gold_generic(&x1, &x2, 0);
}
#elif defined(__SSE4__)
_m128i *in128;
for (int i=0; i<((size>>5)+((size&0x1f) > 0 ? 1 : 0)); i++) {
in128=&((__m128i*)in)[i<<1];
((uint16_t*)&in32)[0] = _mm128_movemask_epi8(_mm256_slli_epi16(in128[0],7));
((uint16_t*)&in32)[1] = _mm128_movemask_epi8(_mm256_slli_epi16(in128[1],7));
out[i]=(in32^s);
s=lte_gold_generic(&x1, &x2, 0);
}
//#elsif defined(__arm__) || defined(__aarch64)
#else
nr_pdsch_codeword_scrambling(in,
size,
q,
Nid,
n_RNTI,
out);
#endif
}
uint8_t nr_generate_pdsch(NR_gNB_DLSCH_t *dlsch,
uint32_t ***pdsch_dmrs,
......@@ -133,13 +174,13 @@ uint8_t nr_generate_pdsch(NR_gNB_DLSCH_t *dlsch,
for (int q=0; q<rel15->NrOfCodewords; q++)
memset((void*)scrambled_output[q], 0, (encoded_length>>5)*sizeof(uint32_t));
for (int q=0; q<rel15->NrOfCodewords; q++)
nr_pdsch_codeword_scrambling(harq->f,
encoded_length,
q,
rel15->dlDmrsScramblingId,
rel15->rnti,
scrambled_output[q]);
nr_pdsch_codeword_scrambling_optim(harq->f,
encoded_length,
q,
rel15->dlDmrsScramblingId,
rel15->rnti,
scrambled_output[q]);
stop_meas(dlsch_scrambling_stats);
#ifdef DEBUG_DLSCH
printf("PDSCH scrambling:\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment