Commit f8b9ad88 authored by Laurent THOMAS's avatar Laurent THOMAS Committed by laurent

Remove some code, Rebase gone wrong?

parent b83afc15
......@@ -319,21 +319,6 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
ul_ch[n] = c16mulShift(ul_ls_est[n], ul_delay_table[n % 6], 8);
}
// Treat last pilot specially (right edge)
c16_t ch_l=c16mulShift(*pil,
rx,
15);
*ul_ch = ch_l;
ul_ch++;
ch_offset++;
multadd_real_four_symbols_vector_complex_scalar(filt8_rr1,
&ch_l,
ul_ch);
multadd_real_four_symbols_vector_complex_scalar(filt8_rr2,
&ch_r,
ul_ch);
simde__m128i *ul_ch_128 = (simde__m128i *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset];
ul_ch_128[0] = simde_mm_slli_epi16 (ul_ch_128[0], 2);
}
else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1) { // this is case without frequency-domain linear interpolation, just take average of LS channel estimates of 6 DMRS REs and use a common value for the whole PRB
......
......@@ -1046,190 +1046,7 @@ void nr_dlsch_channel_compensation(uint32_t rx_size_symbol,
}
}
}
_mm_empty();
_m_empty();
#elif defined(__arm__) || defined(__aarch64__)
unsigned short rb;
int16x4_t *dl_ch128,*dl_ch128_2,*rxdataF128;
int32x4_t mmtmpD0,mmtmpD1,mmtmpD0b,mmtmpD1b;
int16x8_t *dl_ch_mag128,*dl_ch_mag128b,mmtmpD2,mmtmpD3,mmtmpD4;
int16x8_t QAM_amp128,QAM_amp128b;
int16x4x2_t *rxdataF_comp128,*rho128;
int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1};
int32x4_t output_shift128 = vmovq_n_s32(-(int32_t)output_shift);
unsigned char symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
for (int l = 0; l < n_layers; l++) {
if (mod_order == 4) {
QAM_amp128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10)
QAM_amp128b = vmovq_n_s16(0);
} else if (mod_order == 6) {
QAM_amp128 = vmovq_n_s16(QAM64_n1); //
QAM_amp128b = vmovq_n_s16(QAM64_n2);
}
// printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
for (int aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
dl_ch128 = (int16x4_t*)dl_ch_estimates_ext[(l << 1) + aarx];
dl_ch_mag128 = (int16x8_t *)dl_ch_mag[l][aarx];
dl_ch_mag128b = (int16x8_t *)dl_ch_magb[l][aarx];
rxdataF128 = (int16x4_t *)rxdataF_ext[aarx];
rxdataF_comp128 = (int16x4x2_t *)(rxdataF_comp[l][aarx] + symbol * nb_rb * 12);
for (rb=0; rb<nb_rb_0; rb++) {
if (mod_order>2) {
// get channel amplitude if not QPSK
mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128[0]);
// mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3];
mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
// mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits
mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128[1]);
mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
// mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits
mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128[2]);
mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128[3]);
mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
mmtmpD0 = vmull_s16(dl_ch128[4], dl_ch128[4]);
mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
mmtmpD1 = vmull_s16(dl_ch128[5], dl_ch128[5]);
mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
dl_ch_mag128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128b);
dl_ch_mag128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128b);
dl_ch_mag128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128);
dl_ch_mag128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128);
dl_ch_mag128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128b);
dl_ch_mag128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128);
}
mmtmpD0 = vmull_s16(dl_ch128[0], rxdataF128[0]);
//mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])]
mmtmpD1 = vmull_s16(dl_ch128[1], rxdataF128[1]);
//mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])]
mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
//mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[0],*(int16x4_t*)conj)), rxdataF128[0]);
//mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[1],*(int16x4_t*)conj)), rxdataF128[1]);
//mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
//mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
rxdataF_comp128[0] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
mmtmpD0 = vmull_s16(dl_ch128[2], rxdataF128[2]);
mmtmpD1 = vmull_s16(dl_ch128[3], rxdataF128[3]);
mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[2],*(int16x4_t*)conj)), rxdataF128[2]);
mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[3],*(int16x4_t*)conj)), rxdataF128[3]);
mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
rxdataF_comp128[1] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
mmtmpD0 = vmull_s16(dl_ch128[4], rxdataF128[4]);
mmtmpD1 = vmull_s16(dl_ch128[5], rxdataF128[5]);
mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[4],*(int16x4_t*)conj)), rxdataF128[4]);
mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[5],*(int16x4_t*)conj)), rxdataF128[5]);
mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
rxdataF_comp128[2] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
dl_ch128+=6;
dl_ch_mag128+=3;
dl_ch_mag128b+=3;
rxdataF128+=6;
rxdataF_comp128+=3;
}
}
}
if (rho) {
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
rho128 = (int16x4x2_t*)&rho[aarx][symbol*frame_parms->N_RB_DL*12];
dl_ch128 = (int16x4_t*)dl_ch_estimates_ext[aarx];
dl_ch128_2 = (int16x4_t*)dl_ch_estimates_ext[2+aarx];
for (rb=0; rb<nb_rb_0; rb++) {
mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]);
mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]);
mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[0],*(int16x4_t*)conj)), dl_ch128_2[0]);
mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[1],*(int16x4_t*)conj)), dl_ch128_2[1]);
mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
rho128[0] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128_2[2]);
mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128_2[3]);
mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[2],*(int16x4_t*)conj)), dl_ch128_2[2]);
mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[3],*(int16x4_t*)conj)), dl_ch128_2[3]);
mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
rho128[1] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]);
mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]);
mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[4],*(int16x4_t*)conj)), dl_ch128_2[4]);
mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[5],*(int16x4_t*)conj)), dl_ch128_2[5]);
mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
rho128[2] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
dl_ch128+=6;
dl_ch128_2+=6;
rho128+=3;
}
if (first_symbol_flag==1) {
measurements->rx_correlation[0][aarx] = signal_energy(&rho[aarx][symbol*frame_parms->N_RB_DL*12],rb*12);
}
}
}
#endif
}
void nr_dlsch_scale_channel(uint32_t rx_size_symbol,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment