diff --git a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c index 5465e228743ab3ea3932955419ab54534a725d8f..6b7c04506198df6857795b906af8b78555e5be53 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c +++ b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c @@ -372,6 +372,12 @@ int nr_csi_rs_ri_estimation(PHY_VARS_NR_UE *ue, * | conjch01 conjch11 | | ch10 ch11 | | conjch01*ch00+conjch11*ch10 conjch01*ch01+conjch11*ch11 | */ + for(uint16_t port_tx_conjch = 0; port_tx_conjch < nr_csi_rs_info->N_ports; port_tx_conjch++) { + for(uint16_t port_tx_ch = 0; port_tx_ch < nr_csi_rs_info->N_ports; port_tx_ch++) { + memset(nr_csi_rs_info->csi_rs_estimated_A_MF[port_tx_conjch][port_tx_ch],0,NR_MAX_OFDM_SYMBOL_SIZE*sizeof(int32_t)); + } + } + for (int rb = csirs_config_pdu->start_rb; rb < (csirs_config_pdu->start_rb+csirs_config_pdu->nr_of_rbs); rb++) { if (csirs_config_pdu->freq_density <= 1 && csirs_config_pdu->freq_density != (rb % 2)) { @@ -379,66 +385,48 @@ int nr_csi_rs_ri_estimation(PHY_VARS_NR_UE *ue, } uint16_t k = (frame_parms->first_carrier_offset + rb*NR_NB_SC_PER_RB) % frame_parms->ofdm_symbol_size; - // conjch x ch computation for (int ant_rx_conjch = 0; ant_rx_conjch < frame_parms->nb_antennas_rx; ant_rx_conjch++) { for(uint16_t port_tx_conjch = 0; port_tx_conjch < nr_csi_rs_info->N_ports; port_tx_conjch++) { for (int ant_rx_ch = 0; ant_rx_ch < frame_parms->nb_antennas_rx; ant_rx_ch++) { for(uint16_t port_tx_ch = 0; port_tx_ch < nr_csi_rs_info->N_ports; port_tx_ch++) { + + // conjch x ch computation nr_conjch0_mult_ch1(&csi_rs_estimated_channel_freq[ant_rx_conjch][port_tx_conjch][k], &csi_rs_estimated_channel_freq[ant_rx_ch][port_tx_ch][k], &nr_csi_rs_info->csi_rs_estimated_conjch_ch[ant_rx_conjch][port_tx_conjch][ant_rx_ch][port_tx_ch][k], 1, 0); + + // construct Hh x H elements + if(ant_rx_conjch == ant_rx_ch) { + nr_a_sum_b((__m128i *)&nr_csi_rs_info->csi_rs_estimated_A_MF[port_tx_conjch][port_tx_ch][k], + (__m128i *)&nr_csi_rs_info->csi_rs_estimated_conjch_ch[ant_rx_conjch][port_tx_conjch][ant_rx_ch][port_tx_ch][k], + 1); + } } } } } - // construct Hh x H elements - nr_construct_HhH_elements(0 < frame_parms->nb_antennas_rx && 0 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[0][0][0][0][k] : NULL, - 0 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[0][1][0][1][k] : NULL, - 1 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[1][1][1][1][k] : NULL, - 1 < frame_parms->nb_antennas_rx && 0 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[1][0][1][0][k] : NULL, - 2 < frame_parms->nb_antennas_rx && 0 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[2][0][2][0][k] : NULL, - 2 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[2][1][2][1][k] : NULL, - 3 < frame_parms->nb_antennas_rx && 0 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[3][0][3][0][k] : NULL, - 3 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[3][1][3][1][k] : NULL, - 0 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[0][0][0][1][k] : NULL, - 0 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[0][1][0][0][k] : NULL, - 1 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[1][0][1][1][k] : NULL, - 1 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[1][1][1][0][k] : NULL, - 2 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[2][0][2][1][k] : NULL, - 2 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[2][1][2][0][k] : NULL, - 3 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[3][0][3][1][k] : NULL, - 3 < frame_parms->nb_antennas_rx && 1 < nr_csi_rs_info->N_ports ? &nr_csi_rs_info->csi_rs_estimated_conjch_ch[3][1][3][0][k] : NULL, - &nr_csi_rs_info->csi_rs_estimated_A_MF[0][0][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF[0][1][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF[1][0][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF[1][1][k], - 1, - 0); - // compute the determinant of A_MF (denominator) - nr_det_HhH(&nr_csi_rs_info->csi_rs_estimated_A_MF[0][0][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF[0][1][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF[1][0][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF[1][1][k], - &nr_csi_rs_info->csi_rs_estimated_determ_fin[k], - 1, - 0, - 0); + nr_det_A_MF_2x2(&nr_csi_rs_info->csi_rs_estimated_A_MF[0][0][k], + &nr_csi_rs_info->csi_rs_estimated_A_MF[0][1][k], + &nr_csi_rs_info->csi_rs_estimated_A_MF[1][0][k], + &nr_csi_rs_info->csi_rs_estimated_A_MF[1][1][k], + &nr_csi_rs_info->csi_rs_estimated_determ_fin[k], + 1); // compute the square of A_MF (numerator) - squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[0][0][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][0][k], 1); - squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[0][1][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][1][k], 1); - squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[1][0][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][0][k], 1); - squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[1][1][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][1][k], 1); - numer(&nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][0][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][1][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][0][k], - &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][1][k], - &nr_csi_rs_info->csi_rs_estimated_numer_fin[k], - 1); + nr_squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[0][0][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][0][k], 1); + nr_squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[0][1][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][1][k], 1); + nr_squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[1][0][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][0][k], 1); + nr_squared_matrix_element(&nr_csi_rs_info->csi_rs_estimated_A_MF[1][1][k], &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][1][k], 1); + nr_numer_2x2(&nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][0][k], + &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[0][1][k], + &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][0][k], + &nr_csi_rs_info->csi_rs_estimated_A_MF_sq[1][1][k], + &nr_csi_rs_info->csi_rs_estimated_numer_fin[k], + 1); // compute the conditional number for (int sc_idx=0; sc_idx < NR_NB_SC_PER_RB; sc_idx++) { diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c index 69eccaf02e7c541c826a1d99a5c1c4a0faa97d68..07e7e5c3bbe6b7b0c36bb1626676880a35670323 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c @@ -1869,6 +1869,89 @@ void nr_dlsch_detection_mrc(int **rxdataF_comp, #endif } +void nr_det_A_MF_2x2(int32_t *a_mf_00, + int32_t *a_mf_01, + int32_t *a_mf_10, + int32_t *a_mf_11, + int32_t *det_fin, + unsigned short nb_rb) { + + int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ; + + __m128i ad_re_128, bc_re_128, det_re_128; + + __m128i *a_mf_00_128 = (__m128i *)a_mf_00; + __m128i *a_mf_01_128 = (__m128i *)a_mf_01; + __m128i *a_mf_10_128 = (__m128i *)a_mf_10; + __m128i *a_mf_11_128 = (__m128i *)a_mf_11; + __m128i *det_fin_128 = (__m128i *)det_fin; + + for (int rb = 0; rb<3*nb_rb; rb++) { + + //complex multiplication (I_a+jQ_a)(I_d+jQ_d) = (I_aI_d - Q_aQ_d) + j(Q_aI_d + I_aQ_d) + //The imag part is often zero, we compute only the real part + ad_re_128 = _mm_sign_epi16(a_mf_00_128[0],*(__m128i*)&nr_conjug2[0]); + ad_re_128 = _mm_madd_epi16(ad_re_128,a_mf_11_128[0]); //Re: I_a0*I_d0 - Q_a1*Q_d1 + + //complex multiplication (I_b+jQ_b)(I_c+jQ_c) = (I_bI_c - Q_bQ_c) + j(Q_bI_c + I_bQ_c) + //The imag part is often zero, we compute only the real part + bc_re_128 = _mm_sign_epi16(a_mf_01_128[0],*(__m128i*)&nr_conjug2[0]); + bc_re_128 = _mm_madd_epi16(bc_re_128,a_mf_10_128[0]); //Re: I_b0*I_c0 - Q_b1*Q_c1 + + det_re_128 = _mm_sub_epi32(ad_re_128, bc_re_128); + + //det in Q30 format + det_fin_128[0] = _mm_abs_epi32(det_re_128); + + det_fin_128+=1; + a_mf_00_128+=1; + a_mf_01_128+=1; + a_mf_10_128+=1; + a_mf_11_128+=1; + } + _mm_empty(); + _m_empty(); +} + +void nr_squared_matrix_element(int32_t *a, + int32_t *a_sq, + unsigned short nb_rb) { + __m128i *a_128 = (__m128i *)a; + __m128i *a_sq_128 = (__m128i *)a_sq; + for (int rb=0; rb<3*nb_rb; rb++) { + a_sq_128[0] = _mm_madd_epi16(a_128[0], a_128[0]); + a_sq_128+=1; + a_128+=1; + } + _mm_empty(); + _m_empty(); +} + +void nr_numer_2x2(int32_t *a_00_sq, + int32_t *a_01_sq, + int32_t *a_10_sq, + int32_t *a_11_sq, + int32_t *num_fin, + unsigned short nb_rb) { + __m128i *a_00_sq_128 = (__m128i *)a_00_sq; + __m128i *a_01_sq_128 = (__m128i *)a_01_sq; + __m128i *a_10_sq_128 = (__m128i *)a_10_sq; + __m128i *a_11_sq_128 = (__m128i *)a_11_sq; + __m128i *num_fin_128 = (__m128i *)num_fin; + for (int rb=0; rb<3*nb_rb; rb++) { + __m128i sq_a_plus_sq_d_128 = _mm_add_epi32(a_00_sq_128[0], a_11_sq_128[0]); + __m128i sq_b_plus_sq_c_128 = _mm_add_epi32(a_01_sq_128[0], a_10_sq_128[0]); + num_fin_128[0] = _mm_add_epi32(sq_a_plus_sq_d_128, sq_b_plus_sq_c_128); + num_fin_128+=1; + a_00_sq_128+=1; + a_01_sq_128+=1; + a_10_sq_128+=1; + a_11_sq_128+=1; + } + _mm_empty(); + _m_empty(); +} + /* Zero Forcing Rx function: nr_a_sum_b() * Compute the complex addition x=x+y * diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h index e1df279abf47f7c5935bcef0e0a7d3e102e23419..c2bf41c92321711a462e5dfea9e36b81efb8f9ce 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h @@ -851,10 +851,6 @@ void construct_HhH_elements(int *ch0conj_ch0, int32_t *after_mf_11, unsigned short nb_rb); -void squared_matrix_element(int32_t *Hh_h_00, - int32_t *Hh_h_00_sq, - unsigned short nb_rb); - void dlsch_channel_level_TM34_meas(int *ch00, int *ch01, int *ch10, @@ -881,19 +877,33 @@ void nr_dlsch_detection_mrc(int **rxdataF_comp, unsigned short nb_rb, int length); -void det_HhH(int32_t *after_mf_00, - int32_t *after_mf_01, - int32_t *after_mf_10, - int32_t *after_mf_11, - int32_t *det_fin_128, - unsigned short nb_rb); - -void numer(int32_t *Hh_h_00_sq, - int32_t *Hh_h_01_sq, - int32_t *Hh_h_10_sq, - int32_t *Hh_h_11_sq, - int32_t *num_fin, - unsigned short nb_rb); +void nr_conjch0_mult_ch1(int *ch0, + int *ch1, + int32_t *ch0conj_ch1, + unsigned short nb_rb, + unsigned char output_shift0); + +void nr_a_sum_b(__m128i *input_x, + __m128i *input_y, + unsigned short nb_rb); + +void nr_det_A_MF_2x2(int32_t *a_mf_00, + int32_t *a_mf_01, + int32_t *a_mf_10, + int32_t *a_mf_11, + int32_t *det_fin, + unsigned short nb_rb); + +void nr_squared_matrix_element(int32_t *a, + int32_t *a_sq, + unsigned short nb_rb); + +void nr_numer_2x2(int32_t *a_00_sq, + int32_t *a_01_sq, + int32_t *a_10_sq, + int32_t *a_11_sq, + int32_t *num_fin, + unsigned short nb_rb); uint8_t rank_estimation_tm3_tm4(int *dl_ch_estimates_00, int *dl_ch_estimates_01,