Commit 04073b03 authored by Tsung-Yu Chan's avatar Tsung-Yu Chan Committed by Tsung Yu Chan

feat / add the MMSE and precoding

  - avoid the finction duplication
  - add the transform predoding to the nr_rx_pusch_tp
  - add the MMSE for 2 layer which modulation greater than 16 qam
  - can support to 256 qam for 1 layer and 2 layer
parent 1a42fa53
...@@ -1040,6 +1040,7 @@ set(PHY_SRC_UE ...@@ -1040,6 +1040,7 @@ set(PHY_SRC_UE
${OPENAIR1_DIR}/PHY/NR_REFSIG/dmrs_nr.c ${OPENAIR1_DIR}/PHY/NR_REFSIG/dmrs_nr.c
${OPENAIR1_DIR}/PHY/NR_REFSIG/ptrs_nr.c ${OPENAIR1_DIR}/PHY/NR_REFSIG/ptrs_nr.c
${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c ${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_freq_equalization.c
${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_measurements_gNB.c ${OPENAIR1_DIR}/PHY/NR_ESTIMATION/nr_measurements_gNB.c
${OPENAIR1_DIR}/PHY/TOOLS/file_output.c ${OPENAIR1_DIR}/PHY/TOOLS/file_output.c
${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c ${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c
......
...@@ -479,6 +479,7 @@ void init_gNB_Tpool(int inst) { ...@@ -479,6 +479,7 @@ void init_gNB_Tpool(int inst) {
// ULSCH decoding threadpool // ULSCH decoding threadpool
initTpool(get_softmodem_params()->threadPoolConfig, &gNB->threadPool, cpumeas(CPUMEAS_GETSTATE)); initTpool(get_softmodem_params()->threadPoolConfig, &gNB->threadPool, cpumeas(CPUMEAS_GETSTATE));
// ULSCH decoder result FIFO // ULSCH decoder result FIFO
initNotifiedFIFO(&gNB->respPuschSymb);
initNotifiedFIFO(&gNB->respDecode); initNotifiedFIFO(&gNB->respDecode);
// L1 RX result FIFO // L1 RX result FIFO
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "PHY/CODING/nrPolar_tools/nr_polar_pbch_defs.h" #include "PHY/CODING/nrPolar_tools/nr_polar_pbch_defs.h"
#include "PHY/NR_TRANSPORT/nr_transport_proto.h" #include "PHY/NR_TRANSPORT/nr_transport_proto.h"
#include "PHY/NR_TRANSPORT/nr_transport_common_proto.h" #include "PHY/NR_TRANSPORT/nr_transport_common_proto.h"
#include "PHY/NR_ESTIMATION/nr_ul_estimation.h"
#include "openair1/PHY/MODULATION/nr_modulation.h" #include "openair1/PHY/MODULATION/nr_modulation.h"
#include "openair1/PHY/defs_RU.h" #include "openair1/PHY/defs_RU.h"
#include "openair1/PHY/CODING/nrLDPC_extern.h" #include "openair1/PHY/CODING/nrLDPC_extern.h"
...@@ -525,6 +526,8 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB) ...@@ -525,6 +526,8 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB)
init_scrambling_luts(); init_scrambling_luts();
init_pucch2_luts(); init_pucch2_luts();
nr_init_fde(); // Init array for frequency equalization of transform precoding of PUSCH
load_nrLDPClib(NULL); load_nrLDPClib(NULL);
if (gNB->ldpc_offload_flag) if (gNB->ldpc_offload_flag)
......
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
#include "PHY/defs_eNB.h"
#include "PHY/sse_intrin.h"
#include "PHY/NR_ESTIMATION/nr_ul_estimation.h"
// Reference of openair1/PHY/LTE_ESTIMATION/freq_equalization.c
// This is 4096/(1:4096) in simde__m128i format
simde__m128i nr_inv_ch[4096];/* = {0, 4096/1, 4096/2, 4096/3, 4096/4...}*/
void nr_init_fde()
{
for (int i = 1;i < 4096; i++)
nr_inv_ch[i] = simde_mm_set1_epi16(4096/i);
}
void nr_freq_equalization (NR_DL_FRAME_PARMS *frame_parms,
int32_t *rxdataF_comp,
int32_t *ul_ch_mag,
int32_t *ul_ch_magb,
uint8_t symbol,
uint16_t Msc_RS,
uint8_t Qm)
{
simde__m128i *rxdataF_comp128 = (simde__m128i *)rxdataF_comp;
simde__m128i *ul_ch_mag128 = (simde__m128i *)ul_ch_mag;
simde__m128i *ul_ch_magb128 = (simde__m128i *)ul_ch_magb;
AssertFatal(symbol < frame_parms->symbols_per_slot, "symbol %d >= %d\n",
symbol, frame_parms->symbols_per_slot);
AssertFatal(Msc_RS <= frame_parms->N_RB_UL*12, "Msc_RS %d >= %d\n",
Msc_RS, frame_parms->N_RB_UL*12);
for (uint16_t re = 0; re < (Msc_RS >> 2); re++)
{
int16_t amp = (*((int16_t*)&ul_ch_mag128[re]));
if (amp > 4095)
amp = 4095;
rxdataF_comp128[re] = simde_mm_srai_epi16(simde_mm_mullo_epi16(rxdataF_comp128[re],nr_inv_ch[amp]),3);
if (Qm == 4)
ul_ch_mag128[re] = simde_mm_set1_epi16(324); // this is 512*2/sqrt(10)
else if (Qm == 6)
{
ul_ch_mag128[re] = simde_mm_set1_epi16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = simde_mm_set1_epi16(158); // this is 512*2/sqrt(42)
}
else if(Qm != 2)
AssertFatal(1, "nr_freq_equalization(), Qm = %d, should be 2, 4 or 6. symbol=%d, Msc_RS=%d\n", Qm, symbol, Msc_RS);
}
}
...@@ -83,4 +83,15 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB, ...@@ -83,4 +83,15 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
int32_t srs_estimated_channel_time_shifted[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size], int32_t srs_estimated_channel_time_shifted[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size],
int8_t *snr_per_rb, int8_t *snr_per_rb,
int8_t *snr); int8_t *snr);
void nr_freq_equalization(NR_DL_FRAME_PARMS *frame_parms,
int *rxdataF_comp,
int *ul_ch_mag,
int *ul_ch_mag_b,
unsigned char symbol,
unsigned short Msc_RS,
unsigned char Qm);
void nr_init_fde(void);
#endif #endif
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -70,68 +70,28 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp, ...@@ -70,68 +70,28 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
uint32_t nb_re, uint32_t nb_re,
uint8_t symbol) uint8_t symbol)
{ {
simde__m256i *rxF_256 = (simde__m256i*)rxdataF_comp;
simde__m256i *ch_mag = (simde__m256i*)ul_ch_mag;
simde__m64 *llr_64 = (simde__m64*)ulsch_llr;
simde__m256i *rxF = (simde__m256i*)rxdataF_comp; simde__m256i xmm0, xmm1, xmm2;
simde__m256i *ch_mag;
simde__m256i llr256[2];
register simde__m256i xmm0;
uint32_t *llr32;
int i;
int off = ((nb_rb&1) == 1)? 4:0;
llr32 = (uint32_t*)ulsch_llr;
ch_mag = (simde__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))]; for (int i = 0; i < ((nb_re>>3) + ((nb_re&7) ? 1 : 0)); i++) {
unsigned char len_mod8 = nb_re&7; xmm0 = simde_mm256_abs_epi16(rxF_256[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
nb_re >>= 3; // length in quad words (4 REs) xmm0 = simde_mm256_subs_epi16(ch_mag[i], xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
nb_re += (len_mod8 == 0 ? 0 : 1);
for (i=0; i<nb_re; i++) {
xmm0 = simde_mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm256_subs_epi16(ch_mag[i],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
llr256[0] = simde_mm256_unpacklo_epi32(rxF[i],xmm0); // llr128[0] contains the llrs of the 1st,2nd,5th and 6th REs xmm1 = simde_mm256_unpacklo_epi32(rxF_256[i], xmm0); // llr128[0] contains the llrs of the 1st,2nd,5th and 6th REs
llr256[1] = simde_mm256_unpackhi_epi32(rxF[i],xmm0); // llr128[1] contains the llrs of the 3rd, 4th, 7th and 8th REs xmm2 = simde_mm256_unpackhi_epi32(rxF_256[i], xmm0); // llr128[1] contains the llrs of the 3rd, 4th, 7th and 8th REs
// 1st RE *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 0);
llr32[0] = simde_mm256_extract_epi32(llr256[0],0); // llr32[0] low 16 bits-> y_R , high 16 bits-> y_I *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 1);
llr32[1] = simde_mm256_extract_epi32(llr256[0],1); // llr32[1] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2 *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 0);
*llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 1);
// 2nd RE *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 2);
llr32[2] = simde_mm256_extract_epi32(llr256[0],2); // llr32[2] low 16 bits-> y_R , high 16 bits-> y_I *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm1, 3);
llr32[3] = simde_mm256_extract_epi32(llr256[0],3); // llr32[3] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2 *llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 2);
*llr_64++ = (simde__m64)simde_mm256_extract_epi64(xmm2, 3);
// 3rd RE
llr32[4] = simde_mm256_extract_epi32(llr256[1],0); // llr32[4] low 16 bits-> y_R , high 16 bits-> y_I
llr32[5] = simde_mm256_extract_epi32(llr256[1],1); // llr32[5] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 4th RE
llr32[6] = simde_mm256_extract_epi32(llr256[1],2); // llr32[6] low 16 bits-> y_R , high 16 bits-> y_I
llr32[7] = simde_mm256_extract_epi32(llr256[1],3); // llr32[7] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 5th RE
llr32[8] = simde_mm256_extract_epi32(llr256[0],4); // llr32[8] low 16 bits-> y_R , high 16 bits-> y_I
llr32[9] = simde_mm256_extract_epi32(llr256[0],5); // llr32[9] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 6th RE
llr32[10] = simde_mm256_extract_epi32(llr256[0],6); // llr32[10] low 16 bits-> y_R , high 16 bits-> y_I
llr32[11] = simde_mm256_extract_epi32(llr256[0],7); // llr32[11] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 7th RE
llr32[12] = simde_mm256_extract_epi32(llr256[1],4); // llr32[12] low 16 bits-> y_R , high 16 bits-> y_I
llr32[13] = simde_mm256_extract_epi32(llr256[1],5); // llr32[13] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
// 8th RE
llr32[14] = simde_mm256_extract_epi32(llr256[1],6); // llr32[14] low 16 bits-> y_R , high 16 bits-> y_I
llr32[15] = simde_mm256_extract_epi32(llr256[1],7); // llr32[15] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
llr32+=16;
} }
simde_mm_empty();
simde_m_empty();
} }
//---------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------
...@@ -146,112 +106,53 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp, ...@@ -146,112 +106,53 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
uint32_t nb_re, uint32_t nb_re,
uint8_t symbol) uint8_t symbol)
{ {
int off = ((nb_rb&1) == 1)? 4:0;
simde__m256i *rxF = (simde__m256i*)rxdataF_comp; simde__m256i *rxF = (simde__m256i*)rxdataF_comp;
simde__m256i *ch_mag,*ch_magb; simde__m256i xmm0, xmm1, xmm2;
register simde__m256i xmm0,xmm1,xmm2;
int i; simde__m256i *ch_maga = (simde__m256i*)ul_ch_mag;
simde__m256i *ch_magb = (simde__m256i*)ul_ch_magb;
ch_mag = (simde__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))]; int32_t *llr_32 = (int32_t *)ulsch_llr;
ch_magb = (simde__m256i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
int len_mod8 = nb_re&7; for (int i = 0; i < ((nb_re>>3) + ((nb_re&7) ? 1 : 0)); i++) {
nb_re = nb_re>>3; // length in quad words (4 REs)
nb_re += ((len_mod8 == 0) ? 0 : 1);
for (i=0; i<nb_re; i++) {
xmm0 = rxF[i]; xmm0 = rxF[i];
xmm1 = simde_mm256_abs_epi16(xmm0); xmm1 = simde_mm256_abs_epi16(xmm0);
xmm1 = simde_mm256_subs_epi16(ch_mag[i],xmm1); xmm1 = simde_mm256_subs_epi16(ch_maga[i], xmm1);
xmm2 = simde_mm256_abs_epi16(xmm1); xmm2 = simde_mm256_abs_epi16(xmm1);
xmm2 = simde_mm256_subs_epi16(ch_magb[i],xmm2); xmm2 = simde_mm256_subs_epi16(ch_magb[i], xmm2);
// ---------------------------------------
// 1st RE
// ---------------------------------------
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,0);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,1);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,0);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,1);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,0);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,1);
// ---------------------------------------
ulsch_llr+=6;
// --------------------------------------- *llr_32++ = simde_mm256_extract_epi32(xmm0,0);
// 2nd RE *llr_32++ = simde_mm256_extract_epi32(xmm1,0);
// --------------------------------------- *llr_32++ = simde_mm256_extract_epi32(xmm2,0);
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,2);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,3);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,2);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,3);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,2);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,3);
// ---------------------------------------
ulsch_llr+=6;
// ---------------------------------------
// 3rd RE
// ---------------------------------------
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,4);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,5);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,4);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,5);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,4);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,5);
// ---------------------------------------
ulsch_llr+=6;
// ---------------------------------------
// 4th RE
// ---------------------------------------
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,6);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,7);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,6);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,7);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,6);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,7);
// ---------------------------------------
ulsch_llr+=6;
ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,8);
ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,9);
ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,8);
ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,9);
ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,8);
ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,9);
ulsch_llr[6] = simde_mm256_extract_epi16(xmm0,10);
ulsch_llr[7] = simde_mm256_extract_epi16(xmm0,11);
ulsch_llr[8] = simde_mm256_extract_epi16(xmm1,10);
ulsch_llr[9] = simde_mm256_extract_epi16(xmm1,11);
ulsch_llr[10] = simde_mm256_extract_epi16(xmm2,10);
ulsch_llr[11] = simde_mm256_extract_epi16(xmm2,11);
ulsch_llr[12] = simde_mm256_extract_epi16(xmm0,12);
ulsch_llr[13] = simde_mm256_extract_epi16(xmm0,13);
ulsch_llr[14] = simde_mm256_extract_epi16(xmm1,12);
ulsch_llr[15] = simde_mm256_extract_epi16(xmm1,13);
ulsch_llr[16] = simde_mm256_extract_epi16(xmm2,12);
ulsch_llr[17] = simde_mm256_extract_epi16(xmm2,13);
ulsch_llr[18] = simde_mm256_extract_epi16(xmm0,14);
ulsch_llr[19] = simde_mm256_extract_epi16(xmm0,15);
ulsch_llr[20] = simde_mm256_extract_epi16(xmm1,14);
ulsch_llr[21] = simde_mm256_extract_epi16(xmm1,15);
ulsch_llr[22] = simde_mm256_extract_epi16(xmm2,14);
ulsch_llr[23] = simde_mm256_extract_epi16(xmm2,15);
ulsch_llr+=24;
}
simde_mm_empty(); *llr_32++ = simde_mm256_extract_epi32(xmm0,1);
simde_m_empty(); *llr_32++ = simde_mm256_extract_epi32(xmm1,1);
*llr_32++ = simde_mm256_extract_epi32(xmm2,1);
*llr_32++ = simde_mm256_extract_epi32(xmm0,2);
*llr_32++ = simde_mm256_extract_epi32(xmm1,2);
*llr_32++ = simde_mm256_extract_epi32(xmm2,2);
*llr_32++ = simde_mm256_extract_epi32(xmm0,3);
*llr_32++ = simde_mm256_extract_epi32(xmm1,3);
*llr_32++ = simde_mm256_extract_epi32(xmm2,3);
*llr_32++ = simde_mm256_extract_epi32(xmm0,4);
*llr_32++ = simde_mm256_extract_epi32(xmm1,4);
*llr_32++ = simde_mm256_extract_epi32(xmm2,4);
*llr_32++ = simde_mm256_extract_epi32(xmm0,5);
*llr_32++ = simde_mm256_extract_epi32(xmm1,5);
*llr_32++ = simde_mm256_extract_epi32(xmm2,5);
*llr_32++ = simde_mm256_extract_epi32(xmm0,6);
*llr_32++ = simde_mm256_extract_epi32(xmm1,6);
*llr_32++ = simde_mm256_extract_epi32(xmm2,6);
*llr_32++ = simde_mm256_extract_epi32(xmm0,7);
*llr_32++ = simde_mm256_extract_epi32(xmm1,7);
*llr_32++ = simde_mm256_extract_epi32(xmm2,7);
}
} }
void nr_ulsch_256qam_llr(int32_t *rxdataF_comp, void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
...@@ -263,105 +164,42 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp, ...@@ -263,105 +164,42 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
uint32_t nb_re, uint32_t nb_re,
uint8_t symbol) uint8_t symbol)
{ {
int off = ((nb_rb&1) == 1)? 4:0;
simde__m256i *rxF = (simde__m256i*)rxdataF_comp; simde__m256i *rxF = (simde__m256i*)rxdataF_comp;
simde__m256i *ch_mag,*ch_magb,*ch_magc; simde__m256i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
register simde__m256i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
simde__m256i *llr256=(simde__m256i*)ulsch_llr; simde__m256i *llr256=(simde__m256i*)ulsch_llr;
ch_mag = (simde__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))]; simde__m256i* ch_maga = (simde__m256i*)ul_ch_mag;
ch_magb = (simde__m256i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))]; simde__m256i* ch_magb = (simde__m256i*)ul_ch_magb;
ch_magc = (simde__m256i*)&ul_ch_magc[(symbol*(off+(nb_rb*12)))]; simde__m256i* ch_magc = (simde__m256i*)ul_ch_magc;
int len_mod8 = nb_re&7;
int nb_re256 = nb_re>>3; // length in 256-bit words (8 REs)
for (int i=0; i<nb_re256; i++) {
xmm0 = simde_mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm256_subs_epi16(ch_mag[i],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 16 LLRs
xmm1 = simde_mm256_abs_epi16(xmm0);
xmm1 = simde_mm256_subs_epi16(ch_magb[i],xmm1); // contains 16 LLRs
xmm2 = simde_mm256_abs_epi16(xmm1);
xmm2 = simde_mm256_subs_epi16(ch_magc[i],xmm2); // contains 16 LLRs
// rxF[i] A0 A1 A2 A3 A4 A5 A6 A7 bits 7,6
// xmm0 B0 B1 B2 B3 B4 B5 B6 B7 bits 5,4
// xmm1 C0 C1 C2 C3 C4 C5 C6 C7 bits 3,2
// xmm2 D0 D1 D2 D3 D4 D5 D6 D7 bits 1,0
xmm3 = simde_mm256_unpacklo_epi32(rxF[i],xmm0); // A0 B0 A1 B1 A4 B4 A5 B5
xmm4 = simde_mm256_unpackhi_epi32(rxF[i],xmm0); // A2 B2 A3 B3 A6 B6 A7 B7
xmm5 = simde_mm256_unpacklo_epi32(xmm1,xmm2); // C0 D0 C1 D1 C4 D4 C5 D5
xmm6 = simde_mm256_unpackhi_epi32(xmm1,xmm2); // C2 D2 C3 D3 C6 D6 C7 D7
xmm0 = simde_mm256_unpacklo_epi64(xmm3,xmm5); // A0 B0 C0 D0 A4 B4 C4 D4
xmm1 = simde_mm256_unpackhi_epi64(xmm3,xmm5); // A1 B1 C1 D1 A5 B5 C5 D5
xmm2 = simde_mm256_unpacklo_epi64(xmm4,xmm6); // A2 B2 C2 D2 A6 B6 C6 D6
xmm3 = simde_mm256_unpackhi_epi64(xmm4,xmm6); // A3 B3 C3 D3 A7 B7 C7 D7
llr256[0] = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x20); // A0 B0 C0 D0 A1 B1 C1 D1
llr256[1] = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x20); // A2 B2 C2 D2 A3 B3 C3 D3
llr256[2] = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x31); // A4 B4 C4 D4 A5 B5 C5 D5
llr256[3] = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x31); // A6 B6 C6 D6 A7 B7 C7 D7
llr256+=4;
for (int i = 0; i < ((nb_re>>3) + ((nb_re&7) ? 1 : 0)); i++) {
xmm0 = simde_mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm256_subs_epi16(ch_maga[i], xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 16 LLRs
xmm1 = simde_mm256_abs_epi16(xmm0);
xmm1 = simde_mm256_subs_epi16(ch_magb[i], xmm1); // contains 16 LLRs
xmm2 = simde_mm256_abs_epi16(xmm1);
xmm2 = simde_mm256_subs_epi16(ch_magc[i], xmm2); // contains 16 LLRs
// rxF[i] A0 A1 A2 A3 A4 A5 A6 A7 bits 7,6
// xmm0 B0 B1 B2 B3 B4 B5 B6 B7 bits 5,4
// xmm1 C0 C1 C2 C3 C4 C5 C6 C7 bits 3,2
// xmm2 D0 D1 D2 D3 D4 D5 D6 D7 bits 1,0
xmm3 = simde_mm256_unpacklo_epi32(rxF[i], xmm0); // A0 B0 A1 B1 A4 B4 A5 B5
xmm4 = simde_mm256_unpackhi_epi32(rxF[i], xmm0); // A2 B2 A3 B3 A6 B6 A7 B7
xmm5 = simde_mm256_unpacklo_epi32(xmm1, xmm2); // C0 D0 C1 D1 C4 D4 C5 D5
xmm6 = simde_mm256_unpackhi_epi32(xmm1, xmm2); // C2 D2 C3 D3 C6 D6 C7 D7
xmm0 = simde_mm256_unpacklo_epi64(xmm3, xmm5); // A0 B0 C0 D0 A4 B4 C4 D4
xmm1 = simde_mm256_unpackhi_epi64(xmm3, xmm5); // A1 B1 C1 D1 A5 B5 C5 D5
xmm2 = simde_mm256_unpacklo_epi64(xmm4, xmm6); // A2 B2 C2 D2 A6 B6 C6 D6
xmm3 = simde_mm256_unpackhi_epi64(xmm4, xmm6); // A3 B3 C3 D3 A7 B7 C7 D7
*llr256++ = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x20); // A0 B0 C0 D0 A1 B1 C1 D1
*llr256++ = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x20); // A2 B2 C2 D2 A3 B3 C3 D3
*llr256++ = simde_mm256_permute2x128_si256(xmm0, xmm1, 0x31); // A4 B4 C4 D4 A5 B5 C5 D5
*llr256++ = simde_mm256_permute2x128_si256(xmm2, xmm3, 0x31); // A6 B6 C6 D6 A7 B7 C7 D7
} }
simde__m128i *llr128 = (simde__m128i*)llr256;
if (len_mod8 >= 4) {
int nb_re128 = nb_re>>2;
simde__m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
simde__m128i *rxF = (simde__m128i*)rxdataF_comp;
simde__m128i *ch_mag = (simde__m128i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
simde__m128i *ch_magb = (simde__m128i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
simde__m128i *ch_magc = (simde__m128i*)&ul_ch_magc[(symbol*(off+(nb_rb*12)))];
xmm0 = simde_mm_abs_epi16(rxF[nb_re128-1]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm_subs_epi16(ch_mag[nb_re128-1],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 8 LLRs
xmm1 = simde_mm_abs_epi16(xmm0);
xmm1 = simde_mm_subs_epi16(ch_magb[nb_re128-1],xmm1); // contains 8 LLRs
xmm2 = simde_mm_abs_epi16(xmm1);
xmm2 = simde_mm_subs_epi16(ch_magc[nb_re128-1],xmm2); // contains 8 LLRs
// rxF[i] A0 A1 A2 A3
// xmm0 B0 B1 B2 B3
// xmm1 C0 C1 C2 C3
// xmm2 D0 D1 D2 D3
xmm3 = simde_mm_unpacklo_epi32(rxF[nb_re128-1],xmm0); // A0 B0 A1 B1
xmm4 = simde_mm_unpackhi_epi32(rxF[nb_re128-1],xmm0); // A2 B2 A3 B3
xmm5 = simde_mm_unpacklo_epi32(xmm1,xmm2); // C0 D0 C1 D1
xmm6 = simde_mm_unpackhi_epi32(xmm1,xmm2); // C2 D2 C3 D3
llr128[0] = simde_mm_unpacklo_epi64(xmm3,xmm5); // A0 B0 C0 D0
llr128[1] = simde_mm_unpackhi_epi64(xmm3,xmm5); // A1 B1 C1 D1
llr128[2] = simde_mm_unpacklo_epi64(xmm4,xmm6); // A2 B2 C2 D2
llr128[3] = simde_mm_unpackhi_epi64(xmm4,xmm6); // A3 B3 C3 D3
llr128+=4;
}
if (len_mod8 == 6) {
int nb_re64 = nb_re>>1;
simde__m64 *llr64 = (simde__m64 *)llr128;
simde__m64 xmm0,xmm1,xmm2;
simde__m64 *rxF = (simde__m64*)rxdataF_comp;
simde__m64 *ch_mag = (simde__m64*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
simde__m64 *ch_magb = (simde__m64*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
simde__m64 *ch_magc = (simde__m64*)&ul_ch_magc[(symbol*(off+(nb_rb*12)))];
xmm0 = simde_mm_abs_pi16(rxF[nb_re64-1]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0 = simde_mm_subs_pi16(ch_mag[nb_re-1],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 4 LLRs
xmm1 = simde_mm_abs_pi16(xmm0);
xmm1 = simde_mm_subs_pi16(ch_magb[nb_re64-1],xmm1); // contains 4 LLRs
xmm2 = simde_mm_abs_pi16(xmm1);
xmm2 = simde_mm_subs_pi16(ch_magc[nb_re64-1],xmm2); // contains 4 LLRs
// rxF[i] A0 A1
// xmm0 B0 B1
// xmm1 C0 C1
// xmm2 D0 D1
llr64[0] = simde_m_punpckldq(rxF[nb_re64-1],xmm0); // A0 B0
llr64[2] = simde_m_punpckhdq(rxF[nb_re64-1],xmm0); // A1 B1
llr64[1] = simde_m_punpckldq(xmm1,xmm2); // C0 D0
llr64[3] = simde_m_punpckhdq(xmm1,xmm2); // C1 D1
}
} }
void nr_ulsch_compute_llr(int32_t *rxdataF_comp, void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
int32_t *ul_ch_mag, int32_t *ul_ch_mag,
int32_t *ul_ch_magb, int32_t *ul_ch_magb,
......
...@@ -760,7 +760,7 @@ typedef struct PHY_VARS_gNB_s { ...@@ -760,7 +760,7 @@ typedef struct PHY_VARS_gNB_s {
time_stats_t rx_dft_stats; time_stats_t rx_dft_stats;
time_stats_t ulsch_freq_offset_estimation_stats; time_stats_t ulsch_freq_offset_estimation_stats;
*/ */
notifiedFIFO_t *respPuschSymb; notifiedFIFO_t respPuschSymb;
notifiedFIFO_t respDecode; notifiedFIFO_t respDecode;
notifiedFIFO_t resp_L1; notifiedFIFO_t resp_L1;
notifiedFIFO_t L1_tx_free; notifiedFIFO_t L1_tx_free;
...@@ -769,7 +769,6 @@ typedef struct PHY_VARS_gNB_s { ...@@ -769,7 +769,6 @@ typedef struct PHY_VARS_gNB_s {
notifiedFIFO_t resp_RU_tx; notifiedFIFO_t resp_RU_tx;
tpool_t threadPool; tpool_t threadPool;
int nbSymb; int nbSymb;
int use_pusch_tp;
int num_pusch_symbols_per_thread; int num_pusch_symbols_per_thread;
pthread_t L1_rx_thread; pthread_t L1_rx_thread;
int L1_rx_thread_core; int L1_rx_thread_core;
...@@ -792,6 +791,7 @@ typedef struct puschSymbolProc_s { ...@@ -792,6 +791,7 @@ typedef struct puschSymbolProc_s {
int16_t *llr; int16_t *llr;
int16_t **llr_layers; int16_t **llr_layers;
int16_t *s; int16_t *s;
uint32_t nvar;
} puschSymbolProc_t; } puschSymbolProc_t;
struct puschSymbolReqId { struct puschSymbolReqId {
......
...@@ -398,20 +398,20 @@ static int nr_ulsch_procedures(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx, int ...@@ -398,20 +398,20 @@ static int nr_ulsch_procedures(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx, int
pusch_pdu->qam_mod_order, pusch_pdu->qam_mod_order,
pusch_pdu->nrOfLayers); pusch_pdu->nrOfLayers);
if (gNB->use_pusch_tp == 0 )
{
nr_ulsch_layer_demapping(gNB->pusch_vars[ULSCH_id].llr, // nr_ulsch_layer_demapping(gNB->pusch_vars[ULSCH_id].llr,
pusch_pdu->nrOfLayers, // pusch_pdu->nrOfLayers,
pusch_pdu->qam_mod_order, // pusch_pdu->qam_mod_order,
G, // G,
gNB->pusch_vars[ULSCH_id].llr_layers); // gNB->pusch_vars[ULSCH_id].llr_layers);
//---------------------------------------------------------- // //----------------------------------------------------------
//------------------- ULSCH unscrambling ------------------- // //------------------- ULSCH unscrambling -------------------
//---------------------------------------------------------- // //----------------------------------------------------------
start_meas(&gNB->ulsch_unscrambling_stats); // start_meas(&gNB->ulsch_unscrambling_stats);
nr_ulsch_unscrambling(gNB->pusch_vars[ULSCH_id].llr, G, pusch_pdu->data_scrambling_id, pusch_pdu->rnti); // nr_ulsch_unscrambling(gNB->pusch_vars[ULSCH_id].llr, G, pusch_pdu->data_scrambling_id, pusch_pdu->rnti);
stop_meas(&gNB->ulsch_unscrambling_stats); // stop_meas(&gNB->ulsch_unscrambling_stats);
}
//---------------------------------------------------------- //----------------------------------------------------------
//--------------------- ULSCH decoding --------------------- //--------------------- ULSCH decoding ---------------------
...@@ -902,8 +902,9 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) ...@@ -902,8 +902,9 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RX_PUSCH, 1); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RX_PUSCH, 1);
start_meas(&gNB->rx_pusch_stats); start_meas(&gNB->rx_pusch_stats);
if (gNB->use_pusch_tp) nr_rx_pusch_tp(gNB, ULSCH_id, frame_rx, slot_rx, ulsch->harq_pid); // if (gNB->use_pusch_tp) nr_rx_pusch_tp(gNB, ULSCH_id, frame_rx, slot_rx, ulsch->harq_pid);
else nr_rx_pusch(gNB, ULSCH_id, frame_rx, slot_rx, ulsch->harq_pid); // else nr_rx_pusch(gNB, ULSCH_id, frame_rx, slot_rx, ulsch->harq_pid);
nr_rx_pusch_tp(gNB, ULSCH_id, frame_rx, slot_rx, ulsch->harq_pid);
NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ULSCH_id]; NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ULSCH_id];
pusch_vars->ulsch_power_tot = 0; pusch_vars->ulsch_power_tot = 0;
pusch_vars->ulsch_noise_power_tot = 0; pusch_vars->ulsch_noise_power_tot = 0;
......
...@@ -222,7 +222,6 @@ int main(int argc, char *argv[]) ...@@ -222,7 +222,6 @@ int main(int argc, char *argv[])
int ibwp_rboffset=41; int ibwp_rboffset=41;
int params_from_file = 0; int params_from_file = 0;
int threadCnt=0; int threadCnt=0;
int use_tpool = 0;
int max_ldpc_iterations = 5; int max_ldpc_iterations = 5;
if ( load_configmodule(argc,argv,CONFIG_ENABLECMDLINEONLY) == 0 ) { if ( load_configmodule(argc,argv,CONFIG_ENABLECMDLINEONLY) == 0 ) {
exit_fun("[NR_ULSIM] Error, configuration module init failed\n"); exit_fun("[NR_ULSIM] Error, configuration module init failed\n");
...@@ -364,7 +363,6 @@ int main(int argc, char *argv[]) ...@@ -364,7 +363,6 @@ int main(int argc, char *argv[])
case 'C': case 'C':
threadCnt = atoi(optarg); threadCnt = atoi(optarg);
use_tpool = 1;
break; break;
case 'u': case 'u':
...@@ -563,14 +561,12 @@ int main(int argc, char *argv[]) ...@@ -563,14 +561,12 @@ int main(int argc, char *argv[])
RC.gNB[0] = calloc(1,sizeof(PHY_VARS_gNB)); RC.gNB[0] = calloc(1,sizeof(PHY_VARS_gNB));
gNB = RC.gNB[0]; gNB = RC.gNB[0];
gNB->ofdm_offset_divisor = UINT_MAX; gNB->ofdm_offset_divisor = UINT_MAX;
initNotifiedFIFO(&gNB->respDecode);
gNB->use_pusch_tp = use_tpool;
gNB->num_pusch_symbols_per_thread = 1; gNB->num_pusch_symbols_per_thread = 1;
initFloatingCoresTpool(threadCnt, &gNB->threadPool, false, "gNB-tpool"); initFloatingCoresTpool(threadCnt, &gNB->threadPool, false, "gNB-tpool");
initNotifiedFIFO(&gNB->respDecode); initNotifiedFIFO(&gNB->respDecode);
gNB->respPuschSymb = (notifiedFIFO_t*) malloc(sizeof(notifiedFIFO_t));
initNotifiedFIFO(gNB->respPuschSymb); initNotifiedFIFO(&gNB->respPuschSymb);
initNotifiedFIFO(&gNB->L1_tx_free); initNotifiedFIFO(&gNB->L1_tx_free);
initNotifiedFIFO(&gNB->L1_tx_filled); initNotifiedFIFO(&gNB->L1_tx_filled);
initNotifiedFIFO(&gNB->L1_tx_out); initNotifiedFIFO(&gNB->L1_tx_out);
...@@ -1602,20 +1598,16 @@ int main(int argc, char *argv[]) ...@@ -1602,20 +1598,16 @@ int main(int argc, char *argv[])
printDistribution(&gNB->phy_proc_rx,table_rx,"Total PHY proc rx"); printDistribution(&gNB->phy_proc_rx,table_rx,"Total PHY proc rx");
printStatIndent(&gNB->rx_pusch_stats,"RX PUSCH time"); printStatIndent(&gNB->rx_pusch_stats,"RX PUSCH time");
printStatIndent2(&gNB->ulsch_channel_estimation_stats,"ULSCH channel estimation time"); printStatIndent2(&gNB->ulsch_channel_estimation_stats,"ULSCH channel estimation time");
if (use_tpool == 1) printStatIndent2(&gNB->rx_pusch_init_stats,"RX PUSCH Initialization time");
{ printStatIndent2(&gNB->rx_pusch_symbol_processing_stats,"RX PUSCH Symbol Processing time");
printStatIndent2(&gNB->rx_pusch_init_stats,"RX PUSCH Initialization time");
printStatIndent2(&gNB->rx_pusch_symbol_processing_stats,"RX PUSCH Symbol Processing time"); // printStatIndent2(&gNB->ulsch_ptrs_processing_stats,"ULSCH PTRS Processing time");
} // printStatIndent2(&gNB->ulsch_rbs_extraction_stats,"ULSCH rbs extraction time");
else // printStatIndent2(&gNB->ulsch_channel_compensation_stats,"ULSCH channel compensation time");
{ // printStatIndent2(&gNB->ulsch_mrc_stats,"ULSCH mrc computation");
printStatIndent2(&gNB->ulsch_ptrs_processing_stats,"ULSCH PTRS Processing time"); // printStatIndent2(&gNB->ulsch_llr_stats,"ULSCH llr computation");
printStatIndent2(&gNB->ulsch_rbs_extraction_stats,"ULSCH rbs extraction time"); // printStatIndent(&gNB->ulsch_unscrambling_stats,"ULSCH unscrambling");
printStatIndent2(&gNB->ulsch_channel_compensation_stats,"ULSCH channel compensation time");
printStatIndent2(&gNB->ulsch_mrc_stats,"ULSCH mrc computation");
printStatIndent2(&gNB->ulsch_llr_stats,"ULSCH llr computation");
printStatIndent(&gNB->ulsch_unscrambling_stats,"ULSCH unscrambling");
}
printStatIndent(&gNB->ulsch_decoding_stats,"ULSCH total decoding time"); printStatIndent(&gNB->ulsch_decoding_stats,"ULSCH total decoding time");
// printStatIndent2(&gNB->ulsch_deinterleaving_stats,"ULSCH deinterleaving"); // printStatIndent2(&gNB->ulsch_deinterleaving_stats,"ULSCH deinterleaving");
// printStatIndent2(&gNB->ulsch_rate_unmatching_stats,"ULSCH rate matching rx"); // printStatIndent2(&gNB->ulsch_rate_unmatching_stats,"ULSCH rate matching rx");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment