From ba55fcac27fb34dad97beea1bcd609a6dfa8a88d Mon Sep 17 00:00:00 2001 From: hbilel <haithem.bilel@alcatelonetouch.com> Date: Mon, 6 Mar 2017 12:13:20 +0100 Subject: [PATCH] [OAI-UE] Mimo LLR computation AVX2 + fix in TM3 deprecoding --- cmake_targets/CMakeLists.txt | 1 + .../PHY/LTE_TRANSPORT/dlsch_demodulation.c | 23 ++++++++------ .../PHY/LTE_TRANSPORT/dlsch_llr_computation.c | 31 ++++++++++++++++++- openair1/PHY/LTE_TRANSPORT/proto.h | 16 ++++++++++ openair1/PHY/Makefile.inc | 1 + openair3/NAS/UE/ESM/esm_ebr_context.c | 2 +- targets/RT/USER/lte-softmodem.c | 8 ++++- 7 files changed, 70 insertions(+), 12 deletions(-) diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt index e4d3acf4b1..0bf63cfb1a 100644 --- a/cmake_targets/CMakeLists.txt +++ b/cmake_targets/CMakeLists.txt @@ -1034,6 +1034,7 @@ set(PHY_SRC ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_modulation.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_demodulation.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/power_control.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_decoding.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_scrambling.c diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c index 9ae212cd27..7b1fb3cd50 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c @@ -42,6 +42,7 @@ #define NOCYGWIN_STATIC #endif +extern int16_t dlsch_demod_shift; //#define DEBUG_HARQ //#undef LOG_D @@ -402,8 +403,8 @@ int rx_pdsch(PHY_VARS_UE *ue, LOG_D(PHY,"Channel Level TM34 avg_0 %d, avg_1 %d, rx_type %d, rx_standard %d, interf_unaw_shift %d \n", avg_0[0], avg_1[0], rx_type, rx_standard, interf_unaw_shift); if (rx_type>rx_standard) { - avg_0[0] = (log2_approx(avg_0[0])/2) - 5 + 2 ;//+ 2; - avg_1[0] = (log2_approx(avg_1[0])/2) - 5 + 2 ;//+ 2; + avg_0[0] = (log2_approx(avg_0[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4; + avg_1[0] = (log2_approx(avg_1[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4; pdsch_vars[eNB_id]->log2_maxh0 = cmax(avg_0[0],0); pdsch_vars[eNB_id]->log2_maxh1 = cmax(avg_1[0],0); //printf("TM4 I-A log2_maxh0 = %d\n", pdsch_vars[eNB_id]->log2_maxh0); @@ -1067,9 +1068,9 @@ int rx_pdsch(PHY_VARS_UE *ue, write_output("dl_ch_estimates_ext10.m", "dl_ch_estimates_ext10", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[2][0],14*frame_parms->N_RB_DL*12,1,1); write_output("dl_ch_estimates_ext11.m", "dl_ch_estimates_ext11", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[3][0],14*frame_parms->N_RB_DL*12,1,1); write_output("rxdataF_comp00.m","rxdataF_comp00", &pdsch_vars[eNB_id]->rxdataF_comp0[0][0],14*frame_parms->N_RB_DL*12,1,1); - write_output("rxdataF_comp01.m","rxdataF_comp01", &pdsch_vars[eNB_id]->rxdataF_comp0[0][0],14*frame_parms->N_RB_DL*12,1,1); - write_output("rxdataF_comp10.m","rxdataF_comp10", &pdsch_vars[eNB_id]->rxdataF_comp0[0][0],14*frame_parms->N_RB_DL*12,1,1); - write_output("rxdataF_comp11.m","rxdataF_comp11", &pdsch_vars[eNB_id]->rxdataF_comp0[0][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp01.m","rxdataF_comp01", &pdsch_vars[eNB_id]->rxdataF_comp0[1][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp10.m","rxdataF_comp10", &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][0][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp11.m","rxdataF_comp11", &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][1][0],14*frame_parms->N_RB_DL*12,1,1); #endif write_output("llr0.m","llr0", &pdsch_vars[eNB_id]->llr[0][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0); write_output("llr1.m","llr1", &pdsch_vars[eNB_id]->llr[1][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0); @@ -1666,9 +1667,8 @@ void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) { __m128i tmp0,tmp1; - // sqrt(2) is already taken into account in computation sqrt_rho_a, sqrt_rho_b, - //so divide by 2 is replaced by divide by sqrt(2). +//_mm_mulhi_epi16 // print_shorts("prec2A_TM3 ch0 (before):",ch0); // print_shorts("prec2A_TM3 ch1 (before):",ch1); @@ -1679,6 +1679,11 @@ void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) { ch0[0] = _mm_adds_epi16(ch0[0],tmp1); ch1[0] = _mm_subs_epi16(tmp0,tmp1); + ch0[0] = _mm_mulhi_epi16(ch0[0],amp); + ch0[0] = _mm_slli_epi16(ch0[0],1); + + ch1[0] = _mm_mulhi_epi16(ch1[0],amp); + ch1[0] = _mm_slli_epi16(ch1[0],1); // print_shorts("prec2A_TM3 ch0 (mid):",&tmp0); // print_shorts("prec2A_TM3 ch1 (mid):",ch1); @@ -1688,8 +1693,8 @@ void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) { ch1[0] = _mm_mulhi_epi16(ch1[0],amp); ch1[0] = _mm_slli_epi16(ch1[0],1); - // ch0[0] = _mm_srai_epi16(ch0[0],1); - // ch1[0] = _mm_srai_epi16(ch1[0],1); + //ch0[0] = _mm_srai_epi16(ch0[0],1); + //ch1[0] = _mm_srai_epi16(ch1[0],1); // print_shorts("prec2A_TM3 ch0 (after):",ch0); // print_shorts("prec2A_TM3 ch1 (after):",ch1); diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c index 0b73e2b023..c33dec87d3 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c @@ -8831,6 +8831,7 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, len = (nb_rb*12) - pbch_pss_sss_adjust; } +#if 0 qam64_qam64((short *)rxF, (short *)rxF_i, (short *)ch_mag, @@ -8838,7 +8839,35 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, (short *)llr16, (short *)rho, len); - +#else + // Round length up to multiple of 16 words + uint32_t len256i = ((len+16)>>4)*16; + int32_t *rxF_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *rxF_i_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *ch_mag_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *ch_mag_i_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *rho_256i = (int32_t*) malloc16_clear(len256i*4); + + memcpy(rxF_256i, rxF, len*4); + memcpy(rxF_i_256i, rxF_i, len*4); + memcpy(ch_mag_256i, ch_mag, len*4); + memcpy(ch_mag_i_256i, ch_mag_i, len*4); + memcpy(rho_256i, rho, len*4); + + qam64_qam64_avx2((int32_t *)rxF_256i, + (int32_t *)rxF_i_256i, + (int32_t *)ch_mag_256i, + (int32_t *)ch_mag_i_256i, + (int16_t *)llr16, + (int32_t *) rho_256i, + len); + + free16(rxF_256i, sizeof(rxF_256i)); + free16(rxF_i_256i, sizeof(rxF_i_256i)); + free16(ch_mag_256i, sizeof(ch_mag_256i)); + free16(ch_mag_i_256i, sizeof(ch_mag_i_256i)); + free16(rho_256i, sizeof(rho_256i)); +#endif llr16 += (6*len); *llr16p = (short *)llr16; return(0); diff --git a/openair1/PHY/LTE_TRANSPORT/proto.h b/openair1/PHY/LTE_TRANSPORT/proto.h index 5fc8dea986..a5aa145e17 100644 --- a/openair1/PHY/LTE_TRANSPORT/proto.h +++ b/openair1/PHY/LTE_TRANSPORT/proto.h @@ -746,6 +746,22 @@ void qam64_qam64(short *stream0_in, short *rho01, int length); +/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/64QAM reception. + @param stream0_in Input from channel compensated (MR combined) stream 0 + @param stream1_in Input from channel compensated (MR combined) stream 1 + @param ch_mag Input from scaled channel magnitude square of h0'*g0 + @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 + @param stream0_out Output from LLR unit for stream0 + @param rho01 Cross-correlation between channels (MR combined) + @param length in complex channel outputs*/ +void qam64_qam64_avx2(int32_t *stream0_in, + int32_t *stream1_in, + int32_t *ch_mag, + int32_t *ch_mag_i, + int16_t *stream0_out, + int32_t *rho01, + int length); + /** \brief This function perform LLR computation for dual-stream (64QAM/64QAM) transmission. @param frame_parms Frame descriptor structure @param rxdataF_comp Compensated channel output diff --git a/openair1/PHY/Makefile.inc b/openair1/PHY/Makefile.inc index 1586f353c0..90094b31f8 100644 --- a/openair1/PHY/Makefile.inc +++ b/openair1/PHY/Makefile.inc @@ -7,6 +7,7 @@ PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_coding.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_modulation.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_demodulation.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_llr_computation.o +PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/power_control.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_decoding.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_scrambling.o diff --git a/openair3/NAS/UE/ESM/esm_ebr_context.c b/openair3/NAS/UE/ESM/esm_ebr_context.c index bfa4a04cc8..31f215e359 100644 --- a/openair3/NAS/UE/ESM/esm_ebr_context.c +++ b/openair3/NAS/UE/ESM/esm_ebr_context.c @@ -286,7 +286,7 @@ int esm_ebr_context_create( LOG_TRACE(INFO, "ESM-PROC - executing %s ", command_line); - if (system(command_line)) ; /* TODO: what to do? */ + //if (system(command_line)) ; /* TODO: what to do? */ break; diff --git a/targets/RT/USER/lte-softmodem.c b/targets/RT/USER/lte-softmodem.c index 8303d88218..5761577259 100644 --- a/targets/RT/USER/lte-softmodem.c +++ b/targets/RT/USER/lte-softmodem.c @@ -151,6 +151,8 @@ uint8_t usim_test = 0; uint8_t nb_antenna_tx = 1; uint8_t nb_antenna_rx = 1; +int16_t dlsch_demod_shift = 0; + char ref[128] = "internal"; char channels[128] = "0"; @@ -635,6 +637,7 @@ static void get_options (int argc, char **argv) { LONG_OPTION_THREADIQ, LONG_OPTION_THREADODDSUBFRAME, LONG_OPTION_THREADEVENSUBFRAME, + LONG_OPTION_DEMOD_SHIFT, #if T_TRACER LONG_OPTION_T_PORT, LONG_OPTION_T_NOWAIT, @@ -670,6 +673,7 @@ static void get_options (int argc, char **argv) { {"threadIQ", required_argument, NULL, LONG_OPTION_THREADIQ}, {"threadOddSubframe", required_argument, NULL, LONG_OPTION_THREADODDSUBFRAME}, {"threadEvenSubframe", required_argument, NULL, LONG_OPTION_THREADEVENSUBFRAME}, + {"dlsch-demod-shift", required_argument, NULL, LONG_OPTION_DEMOD_SHIFT}, #if T_TRACER {"T_port", required_argument, 0, LONG_OPTION_T_PORT}, {"T_nowait", no_argument, 0, LONG_OPTION_T_NOWAIT}, @@ -800,7 +804,9 @@ static void get_options (int argc, char **argv) { case LONG_OPTION_THREADEVENSUBFRAME: threads.even=atoi(optarg); break; - + case LONG_OPTION_DEMOD_SHIFT: + dlsch_demod_shift = atof(optarg); + break; #if T_TRACER case LONG_OPTION_T_PORT: { extern int T_port; -- 2.26.2