Commit 7dc97108 authored by sfn's avatar sfn

Fix indentation issues in nr_dlsch_demodulation.c

parent 3a83e8df
...@@ -355,13 +355,12 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, ...@@ -355,13 +355,12 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
symbol, symbol,
pilots, pilots,
config_type, config_type,
start_rb, start_rb + dlsch0_harq->BWPStart,
nb_rb_pdsch, nb_rb_pdsch,
dlsch0_harq->n_dmrs_cdm_groups, dlsch0_harq->n_dmrs_cdm_groups,
dlsch0_harq->Nl, dlsch0_harq->Nl,
frame_parms, frame_parms,
dlsch0_harq->dlDmrsSymbPos); dlsch0_harq->dlDmrsSymbPos);
else// one layer else// one layer
nb_rb = nr_dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF, nb_rb = nr_dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF,
pdsch_vars[gNB_id]->dl_ch_estimates, pdsch_vars[gNB_id]->dl_ch_estimates,
...@@ -375,11 +374,11 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, ...@@ -375,11 +374,11 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
dlsch0_harq->n_dmrs_cdm_groups, dlsch0_harq->n_dmrs_cdm_groups,
frame_parms, frame_parms,
dlsch0_harq->dlDmrsSymbPos); dlsch0_harq->dlDmrsSymbPos);
}
} /*else if(beamforming_mode>7) { else if(beamforming_mode>7) {
LOG_W(PHY,"dlsch_demodulation: beamforming mode not supported yet.\n"); LOG_W(PHY,"dlsch_demodulation: beamforming mode not supported yet.\n");
}*/ }
//printf("nb_rb = %d, gNB_id %d\n",nb_rb,gNB_id); //printf("nb_rb = %d, gNB_id %d\n",nb_rb,gNB_id);
if (nb_rb==0) { if (nb_rb==0) {
LOG_D(PHY,"dlsch_demodulation.c: nb_rb=0\n"); LOG_D(PHY,"dlsch_demodulation.c: nb_rb=0\n");
...@@ -447,21 +446,21 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, ...@@ -447,21 +446,21 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
pdsch_vars[gNB_id]->log2_maxh = (log2_approx(avgs)/2)+3; pdsch_vars[gNB_id]->log2_maxh = (log2_approx(avgs)/2)+3;
//LOG_I(PHY, "avgs Power per SC is %d lg2_maxh %d\n", avgs, pdsch_vars[gNB_id]->log2_maxh); //LOG_I(PHY, "avgs Power per SC is %d lg2_maxh %d\n", avgs, pdsch_vars[gNB_id]->log2_maxh);
if (dlsch0_harq->mimo_mode == NR_DUALSTREAM) { if (dlsch0_harq->mimo_mode == NR_DUALSTREAM) {
nr_dlsch_channel_level_median(pdsch_vars[gNB_id]->dl_ch_estimates_ext, nr_dlsch_channel_level_median(pdsch_vars[gNB_id]->dl_ch_estimates_ext,
median, median,
n_tx, n_tx,
n_rx, n_rx,
len, len,
symbol*nb_rb*12); symbol*nb_rb*12);
for (aatx = 0; aatx < n_tx; aatx++) { for (aatx = 0; aatx < n_tx; aatx++) {
for (aarx = 0; aarx < n_rx; aarx++) { for (aarx = 0; aarx < n_rx; aarx++) {
avgs = cmax(avgs, median[aatx*n_rx + aarx]); avgs = cmax(avgs, median[aatx*n_rx + aarx]);
} }
} }
pdsch_vars[gNB_id]->log2_maxh = (log2_approx(avgs)/2) + 1; pdsch_vars[gNB_id]->log2_maxh = (log2_approx(avgs)/2) + 1;
} }
} }
LOG_D(PHY,"[DLSCH] AbsSubframe %d.%d log2_maxh = %d [log2_maxh0 %d log2_maxh1 %d] (%d,%d)\n", LOG_D(PHY,"[DLSCH] AbsSubframe %d.%d log2_maxh = %d [log2_maxh0 %d log2_maxh1 %d] (%d,%d)\n",
frame%1024, frame%1024,
...@@ -503,28 +502,30 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, ...@@ -503,28 +502,30 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
pdsch_vars[gNB_id]->dl_ch_magr0, pdsch_vars[gNB_id]->dl_ch_magr0,
pdsch_vars[gNB_id]->rxdataF_comp0, pdsch_vars[gNB_id]->rxdataF_comp0,
(n_tx>1) ? pdsch_vars[gNB_id]->rho : NULL, (n_tx>1) ? pdsch_vars[gNB_id]->rho : NULL,
frame_parms, frame_parms,
n_tx, n_tx,
symbol, symbol,
pilots, pilots,
first_symbol_flag, first_symbol_flag,
dlsch0_harq->Qm, dlsch0_harq->Qm,
nb_rb, nb_rb,
pdsch_vars[gNB_id]->log2_maxh, pdsch_vars[gNB_id]->log2_maxh,
measurements); // log2_maxh+I0_shift measurements); // log2_maxh+I0_shift
} else if (dlsch0_harq->mimo_mode == NR_DUALSTREAM){ }
else if (dlsch0_harq->mimo_mode == NR_DUALSTREAM) {
nr_dlsch_channel_compensation_core(pdsch_vars[gNB_id]->rxdataF_ext, nr_dlsch_channel_compensation_core(pdsch_vars[gNB_id]->rxdataF_ext,
pdsch_vars[gNB_id]->dl_ch_estimates_ext, pdsch_vars[gNB_id]->dl_ch_estimates_ext,
pdsch_vars[gNB_id]->dl_ch_mag0, pdsch_vars[gNB_id]->dl_ch_mag0,
pdsch_vars[gNB_id]->dl_ch_magb0, pdsch_vars[gNB_id]->dl_ch_magb0,
pdsch_vars[gNB_id]->rxdataF_comp0, //rxdataF_comp pdsch_vars[gNB_id]->rxdataF_comp0, //rxdataF_comp
NULL, NULL,
n_tx, n_tx,
n_rx, n_rx,
dlsch0_harq->Qm, dlsch0_harq->Qm,
pdsch_vars[gNB_id]->log2_maxh, pdsch_vars[gNB_id]->log2_maxh,
2*len, // subcarriers Re Im 2*len, // subcarriers Re Im
0); // we start from the beginning of the vector 0); // we start from the beginning of the vector
// compute correlation between signal and interference channels (rho12 and rho21) // compute correlation between signal and interference channels (rho12 and rho21)
nr_dlsch_dual_stream_correlation_core(pdsch_vars[gNB_id]->dl_ch_estimates_ext, nr_dlsch_dual_stream_correlation_core(pdsch_vars[gNB_id]->dl_ch_estimates_ext,
&(pdsch_vars[gNB_id]->dl_ch_estimates_ext[2]), &(pdsch_vars[gNB_id]->dl_ch_estimates_ext[2]),
...@@ -550,41 +551,40 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, ...@@ -550,41 +551,40 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
#endif #endif
if (frame_parms->nb_antennas_rx > 1) { if (frame_parms->nb_antennas_rx > 1) {
if (dlsch0_harq->mimo_mode<NR_DUALSTREAM) { if (dlsch0_harq->mimo_mode<NR_DUALSTREAM) {
nr_dlsch_detection_mrc(pdsch_vars[gNB_id]->rxdataF_comp0, nr_dlsch_detection_mrc(pdsch_vars[gNB_id]->rxdataF_comp0,
(n_tx>1)? pdsch_vars[gNB_id]->rho : NULL, (n_tx>1)? pdsch_vars[gNB_id]->rho : NULL,
pdsch_vars[gNB_id]->dl_ch_mag0, pdsch_vars[gNB_id]->dl_ch_mag0,
pdsch_vars[gNB_id]->dl_ch_magb0, pdsch_vars[gNB_id]->dl_ch_magb0,
n_tx, n_tx,
n_rx, n_rx,
symbol, symbol,
nb_rb); nb_rb);
if (n_tx == 2)//Apply zero forcing for 2 Tx layers
if (n_tx == 2) nr_zero_forcing_rx_2layers(pdsch_vars[gNB_id]->rxdataF_comp0,
nr_zero_forcing_rx_2layers(pdsch_vars[gNB_id]->rxdataF_comp0,
pdsch_vars[gNB_id]->dl_ch_mag0,
pdsch_vars[gNB_id]->dl_ch_magb0,
pdsch_vars[gNB_id]->dl_ch_estimates_ext,
nb_rb,
n_rx,
dlsch0_harq->Qm,
pdsch_vars[gNB_id]->log2_maxh,
symbol);
} else if (dlsch0_harq->mimo_mode == NR_DUALSTREAM){
nr_dlsch_detection_mrc_core(pdsch_vars[gNB_id]->rxdataF_comp0,
NULL,
pdsch_vars[gNB_id]->dl_ch_rho_ext[harq_pid][round],
pdsch_vars[gNB_id]->dl_ch_rho2_ext,
pdsch_vars[gNB_id]->dl_ch_mag0, pdsch_vars[gNB_id]->dl_ch_mag0,
pdsch_vars[gNB_id]->dl_ch_magb0, pdsch_vars[gNB_id]->dl_ch_magb0,
NULL, pdsch_vars[gNB_id]->dl_ch_estimates_ext,
NULL, nb_rb,
n_tx,
n_rx, n_rx,
2*len, dlsch0_harq->Qm,
0); pdsch_vars[gNB_id]->log2_maxh,
} symbol);
}
else if (dlsch0_harq->mimo_mode == NR_DUALSTREAM) {
nr_dlsch_detection_mrc_core(pdsch_vars[gNB_id]->rxdataF_comp0,
NULL,
pdsch_vars[gNB_id]->dl_ch_rho_ext[harq_pid][round],
pdsch_vars[gNB_id]->dl_ch_rho2_ext,
pdsch_vars[gNB_id]->dl_ch_mag0,
pdsch_vars[gNB_id]->dl_ch_magb0,
NULL,
NULL,
n_tx,
n_rx,
2*len,
0);
}
} }
//printf("start compute LLR\n"); //printf("start compute LLR\n");
if (dlsch0_harq->mimo_mode == NR_DUALSTREAM) { if (dlsch0_harq->mimo_mode == NR_DUALSTREAM) {
...@@ -841,9 +841,9 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext, ...@@ -841,9 +841,9 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext,
dl_ch_mag128r[0] = dl_ch_mag128[0]; dl_ch_mag128r[0] = dl_ch_mag128[0];
dl_ch_mag128[0] = _mm_mulhi_epi16(dl_ch_mag128[0],QAM_amp128); dl_ch_mag128[0] = _mm_mulhi_epi16(dl_ch_mag128[0],QAM_amp128);
dl_ch_mag128[0] = _mm_slli_epi16(dl_ch_mag128[0],1); dl_ch_mag128[0] = _mm_slli_epi16(dl_ch_mag128[0],1);
//print_ints("Re(ch):",(int16_t*)&mmtmpD0); //print_ints("Re(ch):",(int16_t*)&mmtmpD0);
//print_shorts("QAM_amp:",(int16_t*)&QAM_amp128); //print_shorts("QAM_amp:",(int16_t*)&QAM_amp128);
//print_shorts("mag:",(int16_t*)&dl_ch_mag128[0]); //print_shorts("mag:",(int16_t*)&dl_ch_mag128[0]);
dl_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0); dl_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0);
dl_ch_mag128b[1] = dl_ch_mag128[1]; dl_ch_mag128b[1] = dl_ch_mag128[1];
dl_ch_mag128r[1] = dl_ch_mag128[1]; dl_ch_mag128r[1] = dl_ch_mag128[1];
...@@ -866,22 +866,23 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext, ...@@ -866,22 +866,23 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext,
dl_ch_mag128b[0] = _mm_mulhi_epi16(dl_ch_mag128b[0],QAM_amp128b); dl_ch_mag128b[0] = _mm_mulhi_epi16(dl_ch_mag128b[0],QAM_amp128b);
dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1); dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1);
dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b); dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b);
dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1); dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1);
dl_ch_mag128r[0] = _mm_mulhi_epi16(dl_ch_mag128r[0],QAM_amp128r); dl_ch_mag128r[0] = _mm_mulhi_epi16(dl_ch_mag128r[0],QAM_amp128r);
dl_ch_mag128r[0] = _mm_slli_epi16(dl_ch_mag128r[0],1); dl_ch_mag128r[0] = _mm_slli_epi16(dl_ch_mag128r[0],1);
dl_ch_mag128r[1] = _mm_mulhi_epi16(dl_ch_mag128r[1],QAM_amp128r); dl_ch_mag128r[1] = _mm_mulhi_epi16(dl_ch_mag128r[1],QAM_amp128r);
dl_ch_mag128r[1] = _mm_slli_epi16(dl_ch_mag128r[1],1); dl_ch_mag128r[1] = _mm_slli_epi16(dl_ch_mag128r[1],1);
if (pilots==0) { if (pilots==0) {
dl_ch_mag128b[2] = _mm_mulhi_epi16(dl_ch_mag128b[2],QAM_amp128b); dl_ch_mag128b[2] = _mm_mulhi_epi16(dl_ch_mag128b[2],QAM_amp128b);
dl_ch_mag128b[2] = _mm_slli_epi16(dl_ch_mag128b[2],1); dl_ch_mag128b[2] = _mm_slli_epi16(dl_ch_mag128b[2],1);
dl_ch_mag128r[2] = _mm_mulhi_epi16(dl_ch_mag128r[2],QAM_amp128r); dl_ch_mag128r[2] = _mm_mulhi_epi16(dl_ch_mag128r[2],QAM_amp128r);
dl_ch_mag128r[2] = _mm_slli_epi16(dl_ch_mag128r[2],1); dl_ch_mag128r[2] = _mm_slli_epi16(dl_ch_mag128r[2],1);
} }
} }
// multiply by conjugated channel // multiply by conjugated channel
...@@ -893,67 +894,72 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext, ...@@ -893,67 +894,72 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext,
mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
// print_ints("im",&mmtmpD1); // print_ints("im",&mmtmpD1);
mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]); mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
// print_ints("re(shift)",&mmtmpD0); // print_ints("re(shift)",&mmtmpD0);
mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
// print_ints("im(shift)",&mmtmpD1); // print_ints("im(shift)",&mmtmpD1);
mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
// print_ints("c0",&mmtmpD2); // print_ints("c0",&mmtmpD2);
// print_ints("c1",&mmtmpD3); // print_ints("c1",&mmtmpD3);
rxdataF_comp128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); rxdataF_comp128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
//printf("arx%d atx%d rb_index %d symbol%d\n",aarx,aatx,rb,symbol);
//printf("arx%d atx%d rb_index %d symbol%d\n",aarx,aatx,rb,symbol); // print_shorts("rx:",rxdataF128);
// print_shorts("ch:",dl_ch128);
// print_shorts("pack:",rxdataF_comp128);
//printf("arx%d atx%d rb_index %d symbol%d\n",aarx,aatx,rb,symbol);
//print_shorts("rx:",(int16_t*)&rxdataF128[0]); //print_shorts("rx:",(int16_t*)&rxdataF128[0]);
//print_shorts("ch:",(int16_t*)&dl_ch128[0]); //print_shorts("ch:",(int16_t*)&dl_ch128[0]);
//print_shorts("pack:",(int16_t*)&rxdataF_comp128[0]); //print_shorts("pack:",(int16_t*)&rxdataF_comp128[0]);
// multiply by conjugated channel // multiply by conjugated channel
mmtmpD0 = _mm_madd_epi16(dl_ch128[1],rxdataF128[1]); mmtmpD0 = _mm_madd_epi16(dl_ch128[1],rxdataF128[1]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit) // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1)); mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]); mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
rxdataF_comp128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3); rxdataF_comp128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
//print_shorts("rx:",(int16_t*)&rxdataF128[1]); //print_shorts("rx:",(int16_t*)&rxdataF128[1]);
//print_shorts("ch:",(int16_t*)&dl_ch128[1]); //print_shorts("ch:",(int16_t*)&dl_ch128[1]);
//print_shorts("pack:",(int16_t*)&rxdataF_comp128[1]); //print_shorts("pack:",(int16_t*)&rxdataF_comp128[1]);
if (pilots==0) { if (pilots==0) {
// multiply by conjugated channel // multiply by conjugated channel
mmtmpD0 = _mm_madd_epi16(dl_ch128[2],rxdataF128[2]); mmtmpD0 = _mm_madd_epi16(dl_ch128[2],rxdataF128[2]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit) // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1)); mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]); mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
//print_shorts("rx:",(int16_t*)&rxdataF128[2]); //print_shorts("rx:",(int16_t*)&rxdataF128[2]);
//print_shorts("ch:",(int16_t*)&dl_ch128[2]); //print_shorts("ch:",(int16_t*)&dl_ch128[2]);
//print_shorts("pack:",(int16_t*)&rxdataF_comp128[2]); //print_shorts("pack:",(int16_t*)&rxdataF_comp128[2]);
dl_ch128+=3; dl_ch128+=3;
dl_ch_mag128+=3; dl_ch_mag128+=3;
dl_ch_mag128b+=3; dl_ch_mag128b+=3;
dl_ch_mag128r+=3; dl_ch_mag128r+=3;
rxdataF128+=3; rxdataF128+=3;
rxdataF_comp128+=3; rxdataF_comp128+=3;
} else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less }
dl_ch128+=2; else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less
dl_ch128+=2;
dl_ch_mag128+=2; dl_ch_mag128+=2;
dl_ch_mag128b+=2; dl_ch_mag128b+=2;
dl_ch_mag128r+=2; dl_ch_mag128r+=2;
...@@ -1564,41 +1570,41 @@ void nr_dlsch_channel_level(int **dl_ch_estimates_ext, ...@@ -1564,41 +1570,41 @@ void nr_dlsch_channel_level(int **dl_ch_estimates_ext,
#if defined(__x86_64__)||defined(__i386__) #if defined(__x86_64__)||defined(__i386__)
short rb; short rb;
unsigned char aatx,aarx; unsigned char aatx,aarx;
__m128i *dl_ch128, avg128D; __m128i *dl_ch128, avg128D;
//nb_rb*nre = y * 2^x //nb_rb*nre = y * 2^x
int16_t x = factor2(len); int16_t x = factor2(len);
//x = (x>4) ? 4 : x; //x = (x>4) ? 4 : x;
int16_t y = (len)>>x; int16_t y = (len)>>x;
//printf("len = %d = %d * 2^(%d)\n",len,y,x); //printf("len = %d = %d * 2^(%d)\n",len,y,x);
AssertFatal(y!=0,"Cannot divide by zero: in function %s of file %s\n", __func__, __FILE__); AssertFatal(y!=0,"Cannot divide by zero: in function %s of file %s\n", __func__, __FILE__);
for (aatx=0; aatx<n_tx; aatx++) for (aatx=0; aatx<n_tx; aatx++)
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
//clear average level //clear average level
avg128D = _mm_setzero_si128(); avg128D = _mm_setzero_si128();
dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx*frame_parms->nb_antennas_rx)+aarx][symbol*nb_rb*12]; dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx*frame_parms->nb_antennas_rx)+aarx][symbol*nb_rb*12];
for (rb=0;rb<nb_rb;rb++) {
avg128D = _mm_add_epi32(avg128D,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[0],dl_ch128[0]),x));
avg128D = _mm_add_epi32(avg128D,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[1],dl_ch128[1]),x));
avg128D = _mm_add_epi32(avg128D,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[2],dl_ch128[2]),x));
dl_ch128+=3;
}
avg[(aatx*frame_parms->nb_antennas_rx)+aarx] =(((int32_t*)&avg128D)[0] +
((int32_t*)&avg128D)[1] +
((int32_t*)&avg128D)[2] +
((int32_t*)&avg128D)[3])/y;
// printf("Channel level : %d\n",avg[(aatx<<1)+aarx]);
}
_mm_empty(); for (rb=0;rb<nb_rb;rb++) {
_m_empty(); avg128D = _mm_add_epi32(avg128D,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[0],dl_ch128[0]),x));
avg128D = _mm_add_epi32(avg128D,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[1],dl_ch128[1]),x));
avg128D = _mm_add_epi32(avg128D,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[2],dl_ch128[2]),x));
dl_ch128+=3;
}
avg[(aatx*frame_parms->nb_antennas_rx)+aarx] =(((int32_t*)&avg128D)[0] +
((int32_t*)&avg128D)[1] +
((int32_t*)&avg128D)[2] +
((int32_t*)&avg128D)[3])/y;
// printf("Channel level : %d\n",avg[(aatx<<1)+aarx]);
}
_mm_empty();
_m_empty();
#elif defined(__arm__) #elif defined(__arm__)
...@@ -1669,48 +1675,46 @@ void nr_dlsch_channel_level_median(int **dl_ch_estimates_ext, ...@@ -1669,48 +1675,46 @@ void nr_dlsch_channel_level_median(int **dl_ch_estimates_ext,
#if defined(__x86_64__)||defined(__i386__) #if defined(__x86_64__)||defined(__i386__)
short ii; short ii;
int aatx,aarx; int aatx,aarx;
int length2; int length2;
int max = 0, min=0; int max = 0, min=0;
int norm_pack; int norm_pack;
__m128i *dl_ch128, norm128D; __m128i *dl_ch128, norm128D;
for (aatx=0; aatx<n_tx; aatx++){
for (aarx=0; aarx<n_rx; aarx++) {
max = median[aatx*n_rx + aarx];//initialize the med point for max
min = median[aatx*n_rx + aarx];//initialize the med point for min
norm128D = _mm_setzero_si128();
dl_ch128=(__m128i *)&dl_ch_estimates_ext[aatx*n_rx + aarx][start_point];
length2 = length>>2;//length = number of REs, hence length2=nb_REs*(32/128) in SIMD loop for (aatx=0; aatx<n_tx; aatx++) {
for (aarx=0; aarx<n_rx; aarx++) {
max = median[aatx*n_rx + aarx];//initialize the med point for max
min = median[aatx*n_rx + aarx];//initialize the med point for min
norm128D = _mm_setzero_si128();
for (ii=0;ii<length2;ii++) { dl_ch128=(__m128i *)&dl_ch_estimates_ext[aatx*n_rx + aarx][start_point];
norm128D = _mm_srai_epi32( _mm_madd_epi16(dl_ch128[0],dl_ch128[0]), 2);//[|H_0|²/4 |H_1|²/4 |H_2|²/4 |H_3|²/4]
//print_ints("norm128D",&norm128D[0]);
norm_pack = ((int32_t*)&norm128D)[0] + length2 = length>>2;//length = number of REs, hence length2=nb_REs*(32/128) in SIMD loop
((int32_t*)&norm128D)[1] +
((int32_t*)&norm128D)[2] +
((int32_t*)&norm128D)[3];// compute the sum
if (norm_pack > max) for (ii=0;ii<length2;ii++) {
max = norm_pack;//store values more than max norm128D = _mm_srai_epi32( _mm_madd_epi16(dl_ch128[0],dl_ch128[0]), 2);//[|H_0|²/4 |H_1|²/4 |H_2|²/4 |H_3|²/4]
if (norm_pack < min) //print_ints("norm128D",&norm128D[0]);
min = norm_pack;//store values less than min
dl_ch128+=1; norm_pack = ((int32_t*)&norm128D)[0] +
} ((int32_t*)&norm128D)[1] +
((int32_t*)&norm128D)[2] +
((int32_t*)&norm128D)[3];// compute the sum
median[aatx*n_rx + aarx] = (max+min)>>1; if (norm_pack > max)
max = norm_pack;//store values more than max
if (norm_pack < min)
min = norm_pack;//store values less than min
dl_ch128+=1;
}
//printf("Channel level median [%d]: %d max = %d min = %d\n",aatx*n_rx + aarx, median[aatx*n_rx + aarx],max,min); median[aatx*n_rx + aarx] = (max+min)>>1;
} //printf("Channel level median [%d]: %d max = %d min = %d\n",aatx*n_rx + aarx, median[aatx*n_rx + aarx],max,min);
} }
}
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
#elif defined(__arm__) #elif defined(__arm__)
...@@ -2068,7 +2072,7 @@ unsigned short nr_dlsch_extract_rbs_single(int **rxdataF, ...@@ -2068,7 +2072,7 @@ unsigned short nr_dlsch_extract_rbs_single(int **rxdataF,
unsigned char j=0; unsigned char j=0;
if (config_type==pdsch_dmrs_type1){ if (config_type==pdsch_dmrs_type1) {
AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2, AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2,
"n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups); "n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups);
nushift = n_dmrs_cdm_groups -1;//delta in Table 7.4.1.1.2-1 nushift = n_dmrs_cdm_groups -1;//delta in Table 7.4.1.1.2-1
...@@ -2077,7 +2081,7 @@ unsigned short nr_dlsch_extract_rbs_single(int **rxdataF, ...@@ -2077,7 +2081,7 @@ unsigned short nr_dlsch_extract_rbs_single(int **rxdataF,
AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2 || n_dmrs_cdm_groups == 3, AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2 || n_dmrs_cdm_groups == 3,
"n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups); "n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups);
nushift = (n_dmrs_cdm_groups -1)<<1;//delta in Table 7.4.1.1.2-2 nushift = (n_dmrs_cdm_groups -1)<<1;//delta in Table 7.4.1.1.2-2
} }
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
...@@ -2139,7 +2143,6 @@ unsigned short nr_dlsch_extract_rbs_single(int **rxdataF, ...@@ -2139,7 +2143,6 @@ unsigned short nr_dlsch_extract_rbs_single(int **rxdataF,
} }
} }
} }
return(nb_rb_pdsch); return(nb_rb_pdsch);
} }
...@@ -2163,11 +2166,10 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF, ...@@ -2163,11 +2166,10 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF,
int *dl_ch0,*dl_ch0_ext,*rxF,*rxF_ext; int *dl_ch0,*dl_ch0_ext,*rxF,*rxF_ext;
int8_t validDmrsEst = 0; //store last DMRS Symbol index int8_t validDmrsEst = 0; //store last DMRS Symbol index
if (config_type==pdsch_dmrs_type1){ if (config_type==pdsch_dmrs_type1) {
AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2, AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2,
"n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups); "n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups);
nushift = n_dmrs_cdm_groups -1;//delta in Table 7.4.1.1.2-1 nushift = n_dmrs_cdm_groups -1;//delta in Table 7.4.1.1.2-1
} else { } else {
AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2 || n_dmrs_cdm_groups == 3, AssertFatal(n_dmrs_cdm_groups == 1 || n_dmrs_cdm_groups == 2 || n_dmrs_cdm_groups == 3,
"n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups); "n_dmrs_cdm_groups %d is illegal\n",n_dmrs_cdm_groups);
...@@ -2190,8 +2192,8 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF, ...@@ -2190,8 +2192,8 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF,
dl_ch0 = &dl_ch_estimates[(aatx*frame_parms->nb_antennas_rx)+aarx][(validDmrsEst*(frame_parms->ofdm_symbol_size))]; dl_ch0 = &dl_ch_estimates[(aatx*frame_parms->nb_antennas_rx)+aarx][(validDmrsEst*(frame_parms->ofdm_symbol_size))];
dl_ch0_ext = &dl_ch_estimates_ext[(aatx*frame_parms->nb_antennas_rx)+aarx][symbol*(nb_rb_pdsch*NR_NB_SC_PER_RB)]; dl_ch0_ext = &dl_ch_estimates_ext[(aatx*frame_parms->nb_antennas_rx)+aarx][symbol*(nb_rb_pdsch*NR_NB_SC_PER_RB)];
for (rb = 0; rb < nb_rb_pdsch; rb++) { for (rb = 0; rb < nb_rb_pdsch; rb++)
{
if (pilots==0) {//data symbol only if (pilots==0) {//data symbol only
if (aatx==0) {//Extract Rx signal only if (aatx==0) {//Extract Rx signal only
memcpy((void*)rxF_ext,(void*)rxF,12*sizeof(*rxF_ext)); memcpy((void*)rxF_ext,(void*)rxF,12*sizeof(*rxF_ext));
...@@ -2199,7 +2201,8 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF, ...@@ -2199,7 +2201,8 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF,
} }
memcpy((void*)dl_ch0_ext,(void*)dl_ch0,12*sizeof(*dl_ch0_ext));//Extract Channel Estimate memcpy((void*)dl_ch0_ext,(void*)dl_ch0,12*sizeof(*dl_ch0_ext));//Extract Channel Estimate
dl_ch0_ext+=12; dl_ch0_ext+=12;
} else {//the symbol contains DMRS }
else {//the symbol contains DMRS
j=0; j=0;
if (config_type==pdsch_dmrs_type1) { if (config_type==pdsch_dmrs_type1) {
if (nushift == 0) {//data is multiplexed if (nushift == 0) {//data is multiplexed
...@@ -2211,7 +2214,8 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF, ...@@ -2211,7 +2214,8 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF,
dl_ch0_ext+=6; dl_ch0_ext+=6;
if (aatx==0) rxF_ext+=6; if (aatx==0) rxF_ext+=6;
} }
} else {//pdsch_dmrs_type2 }
else {//pdsch_dmrs_type2
for (i = (2+nushift); i<6; i++) { for (i = (2+nushift); i<6; i++) {
if (aatx==0) rxF_ext[j]=rxF[i]; if (aatx==0) rxF_ext[j]=rxF[i];
dl_ch0_ext[j]=dl_ch0[i]; dl_ch0_ext[j]=dl_ch0[i];
...@@ -2236,53 +2240,50 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF, ...@@ -2236,53 +2240,50 @@ unsigned short nr_dlsch_extract_rbs_multiple(int **rxdataF,
rxF = &rxdataF[aarx][k+(symbol*(frame_parms->ofdm_symbol_size))]; rxF = &rxdataF[aarx][k+(symbol*(frame_parms->ofdm_symbol_size))];
} }
} }
}//rb }//rb
}//aatx }//aatx
}//aarx }//aarx
return(nb_rb_pdsch); return(nb_rb_pdsch);
} }
void nr_dlsch_detection_mrc(int **rxdataF_comp, void nr_dlsch_detection_mrc(int **rxdataF_comp,
int **rho, int **rho,
int **dl_ch_mag, int **dl_ch_mag,
int **dl_ch_magb, int **dl_ch_magb,
short n_tx, short n_tx,
short n_rx, short n_rx,
unsigned char symbol, unsigned char symbol,
unsigned short nb_rb) { unsigned short nb_rb) {
#if defined(__x86_64__)||defined(__i386__) #if defined(__x86_64__)||defined(__i386__)
unsigned char aatx, aarx; unsigned char aatx, aarx;
int i; int i;
__m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b; __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b;
if (n_rx>1) { if (n_rx>1) {
for (aatx=0; aatx<n_tx; aatx++) { for (aatx=0; aatx<n_tx; aatx++) {
rxdataF_comp128_0 = (__m128i *)&rxdataF_comp[(aatx*n_rx)][symbol*nb_rb*12];//aarx=0 rxdataF_comp128_0 = (__m128i *)&rxdataF_comp[(aatx*n_rx)][symbol*nb_rb*12];//aarx=0
dl_ch_mag128_0 = (__m128i *)&dl_ch_mag[(aatx*n_rx)][symbol*nb_rb*12];//aarx=0 dl_ch_mag128_0 = (__m128i *)&dl_ch_mag[(aatx*n_rx)][symbol*nb_rb*12];//aarx=0
dl_ch_mag128_0b = (__m128i *)&dl_ch_magb[(aatx*n_rx)][symbol*nb_rb*12]; dl_ch_mag128_0b = (__m128i *)&dl_ch_magb[(aatx*n_rx)][symbol*nb_rb*12];
for (aarx=1; aarx<n_rx; aarx++) {
for (aarx=1; aarx<n_rx; aarx++) { rxdataF_comp128_1 = (__m128i *)&rxdataF_comp[(aatx*n_rx)+aarx][symbol*nb_rb*12];// aarx=1,..., n_rx-1
rxdataF_comp128_1 = (__m128i *)&rxdataF_comp[(aatx*n_rx)+aarx][symbol*nb_rb*12];// aarx=1,..., n_rx-1 dl_ch_mag128_1 = (__m128i *)&dl_ch_mag[(aatx*n_rx)+aarx][symbol*nb_rb*12];
dl_ch_mag128_1 = (__m128i *)&dl_ch_mag[(aatx*n_rx)+aarx][symbol*nb_rb*12]; dl_ch_mag128_1b = (__m128i *)&dl_ch_magb[(aatx*n_rx)+aarx][symbol*nb_rb*12];
dl_ch_mag128_1b = (__m128i *)&dl_ch_magb[(aatx*n_rx)+aarx][symbol*nb_rb*12];
// MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
// MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation) for (i=0; i<nb_rb*3; i++) {
for (i=0; i<nb_rb*3; i++) { rxdataF_comp128_0[i] = _mm_adds_epi16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
rxdataF_comp128_0[i] = _mm_adds_epi16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]); dl_ch_mag128_0[i] = _mm_adds_epi16(dl_ch_mag128_0[i],dl_ch_mag128_1[i]);
dl_ch_mag128_0[i] = _mm_adds_epi16(dl_ch_mag128_0[i],dl_ch_mag128_1[i]); dl_ch_mag128_0b[i] = _mm_adds_epi16(dl_ch_mag128_0b[i],dl_ch_mag128_1b[i]);
dl_ch_mag128_0b[i] = _mm_adds_epi16(dl_ch_mag128_0b[i],dl_ch_mag128_1b[i]); /*if (i==0) {
/*if (i==0) { * printf("atx%d symbol%d\n",aatx,symbol);
printf("atx%d symbol%d\n",aatx,symbol); * printf("mrc comp0 re: %d mrc comp0 im: %d \n",((int16_t*)&rxdataF_comp128_0[0])[0],((int16_t*)&rxdataF_comp128_0[0])[1]);
printf("mrc comp0 re: %d mrc comp0 im: %d \n",((int16_t*)&rxdataF_comp128_0[0])[0],((int16_t*)&rxdataF_comp128_0[0])[1]); * printf("mrc mag0 = %d = %d \n",((int16_t*)&dl_ch_mag128_0[0])[0],((int16_t*)&dl_ch_mag128_0[0])[1]);
printf("mrc mag0 = %d = %d \n",((int16_t*)&dl_ch_mag128_0[0])[0],((int16_t*)&dl_ch_mag128_0[0])[1]); * printf("mrc mag0b = %d = %d \n",((int16_t*)&dl_ch_mag128_0b[0])[0],((int16_t*)&dl_ch_mag128_0b[0])[1]);
printf("mrc mag0b = %d = %d \n",((int16_t*)&dl_ch_mag128_0b[0])[0],((int16_t*)&dl_ch_mag128_0b[0])[1]); * }*/
}*/ }
} }
} }
} if (rho) {
if (rho) {
/*rho128_0 = (__m128i *) &rho[0][symbol*frame_parms->N_RB_DL*12]; /*rho128_0 = (__m128i *) &rho[0][symbol*frame_parms->N_RB_DL*12];
rho128_1 = (__m128i *) &rho[1][symbol*frame_parms->N_RB_DL*12]; rho128_1 = (__m128i *) &rho[1][symbol*frame_parms->N_RB_DL*12];
for (i=0; i<nb_rb*3; i++) { for (i=0; i<nb_rb*3; i++) {
...@@ -2290,7 +2291,7 @@ void nr_dlsch_detection_mrc(int **rxdataF_comp, ...@@ -2290,7 +2291,7 @@ void nr_dlsch_detection_mrc(int **rxdataF_comp,
// print_shorts("mrc rho1:",&rho128_1[i]); // print_shorts("mrc rho1:",&rho128_1[i]);
rho128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_0[i],1),_mm_srai_epi16(rho128_1[i],1)); rho128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_0[i],1),_mm_srai_epi16(rho128_1[i],1));
}*/ }*/
} }
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
} }
...@@ -2323,16 +2324,16 @@ void nr_det_HhH(int32_t *after_mf_00,//a ...@@ -2323,16 +2324,16 @@ void nr_det_HhH(int32_t *after_mf_00,//a
for (rb=0; rb<3*nb_rb; rb++) { for (rb=0; rb<3*nb_rb; rb++) {
// complex multiplication (I_a+jQ_a)(I_d+jQ_d) = (I_aI_d - Q_aQ_d) + j(Q_aI_d + I_aQ_d) //complex multiplication (I_a+jQ_a)(I_d+jQ_d) = (I_aI_d - Q_aQ_d) + j(Q_aI_d + I_aQ_d)
// The imag part is often zero, we compute only the real part //The imag part is often zero, we compute only the real part
ad_re_128 = _mm_sign_epi16(after_mf_00_128[0],*(__m128i*)&nr_conjug2[0]); ad_re_128 = _mm_sign_epi16(after_mf_00_128[0],*(__m128i*)&nr_conjug2[0]);
ad_re_128 = _mm_madd_epi16(ad_re_128,after_mf_11_128[0]); //Re: I_a0*I_d0 - Q_a1*Q_d1 ad_re_128 = _mm_madd_epi16(ad_re_128,after_mf_11_128[0]); //Re: I_a0*I_d0 - Q_a1*Q_d1
//ad_im_128 = _mm_shufflelo_epi16(after_mf_00_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits //ad_im_128 = _mm_shufflelo_epi16(after_mf_00_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
//ad_im_128 = _mm_shufflehi_epi16(ad_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits //ad_im_128 = _mm_shufflehi_epi16(ad_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
//ad_im_128 = _mm_madd_epi16(ad_im_128,after_mf_11_128[0]);//Im: (Q_aI_d + I_aQ_d) //ad_im_128 = _mm_madd_epi16(ad_im_128,after_mf_11_128[0]);//Im: (Q_aI_d + I_aQ_d)
// complex multiplication (I_b+jQ_b)(I_c+jQ_c) = (I_bI_c - Q_bQ_c) + j(Q_bI_c + I_bQ_c) //complex multiplication (I_b+jQ_b)(I_c+jQ_c) = (I_bI_c - Q_bQ_c) + j(Q_bI_c + I_bQ_c)
// The imag part is often zero, we compute only the real part //The imag part is often zero, we compute only the real part
bc_re_128 = _mm_sign_epi16(after_mf_01_128[0],*(__m128i*)&nr_conjug2[0]); bc_re_128 = _mm_sign_epi16(after_mf_01_128[0],*(__m128i*)&nr_conjug2[0]);
bc_re_128 = _mm_madd_epi16(bc_re_128,after_mf_10_128[0]); //Re: I_b0*I_c0 - Q_b1*Q_c1 bc_re_128 = _mm_madd_epi16(bc_re_128,after_mf_10_128[0]); //Re: I_b0*I_c0 - Q_b1*Q_c1
//bc_im_128 = _mm_shufflelo_epi16(after_mf_01_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_b0 Q_b1 I_b2 Q_b3]_64bits to [Q_b1 I_b0 Q_b3 I_b2]_64bits //bc_im_128 = _mm_shufflelo_epi16(after_mf_01_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_b0 Q_b1 I_b2 Q_b3]_64bits to [Q_b1 I_b0 Q_b3 I_b2]_64bits
...@@ -2354,11 +2355,11 @@ void nr_det_HhH(int32_t *after_mf_00,//a ...@@ -2354,11 +2355,11 @@ void nr_det_HhH(int32_t *after_mf_00,//a
det_fin_128[0] = _mm_abs_epi32(det_re_128); det_fin_128[0] = _mm_abs_epi32(det_re_128);
/*if ((rb==0)&&(symbol==1)) { /*if ((rb==0)&&(symbol==1)) {
printf("\n Computing det_HhH_inv \n"); * printf("\n Computing det_HhH_inv \n");
print_ints("det_re_128:",(int32_t*)&det_re_128); * print_ints("det_re_128:",(int32_t*)&det_re_128);
//print_ints("det_im_128:",(int32_t*)&det_im_128); * print_ints("det_im_128:",(int32_t*)&det_im_128);
print_ints("det_fin_128:",(int32_t*)&det_fin_128[0]); * print_ints("det_fin_128:",(int32_t*)&det_fin_128[0]);
}*/ * }*/
det_fin_128+=1; det_fin_128+=1;
after_mf_00_128+=1; after_mf_00_128+=1;
...@@ -2370,7 +2371,6 @@ void nr_det_HhH(int32_t *after_mf_00,//a ...@@ -2370,7 +2371,6 @@ void nr_det_HhH(int32_t *after_mf_00,//a
_m_empty(); _m_empty();
} }
/* Zero Forcing Rx function: nr_inv_comp_muli /* Zero Forcing Rx function: nr_inv_comp_muli
* Complex number multi: z = x*y * Complex number multi: z = x*y
* = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im) * = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
...@@ -2395,7 +2395,6 @@ __m128i nr_inv_comp_muli(__m128i input_x, ...@@ -2395,7 +2395,6 @@ __m128i nr_inv_comp_muli(__m128i input_x,
xy_im_128 = _mm_madd_epi16(xy_im_128,input_y);//Im: (x_im*y_re + x_re*y_im) xy_im_128 = _mm_madd_epi16(xy_im_128,input_y);//Im: (x_im*y_re + x_re*y_im)
//convert back to Q15 before packing //convert back to Q15 before packing
xy_re_128 = _mm_srai_epi32(xy_re_128,4);//(2^15/64*2*16) xy_re_128 = _mm_srai_epi32(xy_re_128,4);//(2^15/64*2*16)
xy_im_128 = _mm_srai_epi32(xy_im_128,4); xy_im_128 = _mm_srai_epi32(xy_im_128,4);
...@@ -2404,6 +2403,7 @@ __m128i nr_inv_comp_muli(__m128i input_x, ...@@ -2404,6 +2403,7 @@ __m128i nr_inv_comp_muli(__m128i input_x,
tmp_z1 = _mm_unpackhi_epi32(xy_re_128,xy_im_128); tmp_z1 = _mm_unpackhi_epi32(xy_re_128,xy_im_128);
//print_ints("unpack hi:",&tmp_z1[0]); //print_ints("unpack hi:",&tmp_z1[0]);
output_z = _mm_packs_epi32(tmp_z0,tmp_z1); output_z = _mm_packs_epi32(tmp_z0,tmp_z1);
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
return(output_z); return(output_z);
...@@ -2461,27 +2461,27 @@ void nr_conjch0_mult_ch1(int *ch0, ...@@ -2461,27 +2461,27 @@ void nr_conjch0_mult_ch1(int *ch0,
* *
* */ * */
void nr_construct_HhH_elements(int *conjch00_ch00, void nr_construct_HhH_elements(int *conjch00_ch00,
int *conjch01_ch01, int *conjch01_ch01,
int *conjch11_ch11, int *conjch11_ch11,
int *conjch10_ch10,// int *conjch10_ch10,//
int *conjch20_ch20, int *conjch20_ch20,
int *conjch21_ch21, int *conjch21_ch21,
int *conjch30_ch30, int *conjch30_ch30,
int *conjch31_ch31, int *conjch31_ch31,
int *conjch00_ch01,//00_01 int *conjch00_ch01,//00_01
int *conjch01_ch00,//01_00 int *conjch01_ch00,//01_00
int *conjch10_ch11,//10_11 int *conjch10_ch11,//10_11
int *conjch11_ch10,//11_10 int *conjch11_ch10,//11_10
int *conjch20_ch21, int *conjch20_ch21,
int *conjch21_ch20, int *conjch21_ch20,
int *conjch30_ch31, int *conjch30_ch31,
int *conjch31_ch30, int *conjch31_ch30,
int32_t *after_mf_00, int32_t *after_mf_00,
int32_t *after_mf_01, int32_t *after_mf_01,
int32_t *after_mf_10, int32_t *after_mf_10,
int32_t *after_mf_11, int32_t *after_mf_11,
unsigned short nb_rb, unsigned short nb_rb,
unsigned char symbol) unsigned char symbol)
{ {
//This function is used to construct the (H_hermitian * H matrix) matrix elements //This function is used to construct the (H_hermitian * H matrix) matrix elements
unsigned short rb; unsigned short rb;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment