Commit 453e257d authored by Sakthivel Velumani's avatar Sakthivel Velumani

Merge branch 'NR_RRC_harq_hacks' of...

Merge branch 'NR_RRC_harq_hacks' of https://gitlab.eurecom.fr/oai/openairinterface5g into NR_RRC_harq_hacks
parents eb5a7a9a 8f8894d7
......@@ -113,8 +113,8 @@ int nr_phy_init_RU(RU_t *ru) {
ru->prach_rxsigF = (int16_t**)malloc(ru->nb_rx * sizeof(int16_t*));
for (i=0; i<ru->nb_rx; i++) {
// for preamble format 1 and 2, more memory should be allocated
ru->prach_rxsigF[i] = (int16_t*)malloc16_clear( fp->ofdm_symbol_size*12*(1<<mu)*2*sizeof(int16_t) );
// largest size for PRACH FFT is 4x98304 (16*24576)
ru->prach_rxsigF[i] = (int16_t*)malloc16_clear( 4*98304*2*sizeof(int16_t) );
LOG_D(PHY,"[INIT] prach_vars->rxsigF[%d] = %p\n",i,ru->prach_rxsigF[i]);
}
......
......@@ -503,6 +503,7 @@ void rx_nr_prach_ru(RU_t *ru,
}
//Coherent combining of PRACH repetitions (assumes channel does not change, to be revisted for "long" PRACH)
LOG_D(PHY,"Doing PRACH combining of %d reptitions N_ZC %d\n",reps,N_ZC);
int16_t rxsigF_tmp[N_ZC<<1];
// if (k+N_ZC > dftlen) { // PRACH signal is split around DC
int16_t *rxsigF2=rxsigF[aa];
......@@ -583,8 +584,6 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
restricted_set = cfg->restricted_set_config.value;
AssertFatal(prach_sequence_length == 1, "no support yet for long prachSequenceLength\n");
uint8_t prach_fmt = prach_pdu->prach_format;
uint16_t N_ZC = (prach_sequence_length==0)?839:139;
......@@ -624,7 +623,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
if (LOG_DEBUGFLAG(PRACH)){
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],(N_ZC==839) ? 840: 140));
if (en>60) LOG_I(PHY,"frame %d, subframe %d : Trying preamble %d \n",frame,subframe,preamble_index);
if (en>60) LOG_D(PHY,"frame %d, subframe %d : Trying preamble %d \n",frame,subframe,preamble_index);
}
if (restricted_set == 0) {
// This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index
......@@ -750,10 +749,10 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
prach_ifft[i] += ((int32_t)prach_ifft_tmp[i<<1]*(int32_t)prach_ifft_tmp[(i<<1)] + (int32_t)prach_ifft_tmp[1+(i<<1)]*(int32_t)prach_ifft_tmp[1+(i<<1)])>>10;
}
if (LOG_DUMPFLAG(PRACH)) {
if (LOG_DUMPFLAG(PRACH)) {
if (aa==0) LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1);
if (aa==1) LOG_M("prach_rxF_comp1.m","prach_rxF_comp1",prachF,1024,1,1);
}
}
}// antennas_rx
} // new dft
......@@ -766,6 +765,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
lev = (int32_t)prach_ifft[(preamble_shift2+i)];
levdB = dB_fixed_times10(lev);
if (levdB>*max_preamble_energy) {
LOG_D(PHY,"preamble_index %d, delay %d en %d dB > %d dB\n",preamble_index,i,levdB,*max_preamble_energy);
*max_preamble_energy = levdB;
*max_preamble_delay = i; // Note: This has to be normalized to the 30.72 Ms/s sampling rate
*max_preamble = preamble_index;
......
......@@ -60,6 +60,7 @@ extern int64_t table_6_3_3_2_4_prachConfig_Index [256][10];
extern uint16_t nr_du[838];
extern int16_t nr_ru[2*839];
extern const char *prachfmt[9];
extern const char *prachfmt03[4];
// Note:
// - prach_fmt_id is an ID used to map to the corresponding PRACH format value in prachfmt
......@@ -210,9 +211,9 @@ int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, uint8_t slot){
#ifdef NR_PRACH_DEBUG
if (NCS>0)
LOG_I(PHY, "PRACH [UE %d] generate PRACH for RootSeqIndex %d, Preamble Index %d, PRACH Format %s, NCS %d (N_ZC %d): Preamble_offset %d, Preamble_shift %d\n", Mod_id,
rootSequenceIndex,
preamble_index,
prachfmt[prach_fmt_id],
rootSequenceIndex,
preamble_index,
prach_sequence_length == 0 ? prachfmt03[prach_fmt_id] : prachfmt[prach_fmt_id],
NCS,
N_ZC,
preamble_offset,
......
lte_dfts_sse4: lte_dfts.c
gcc-7 -O3 -std=gnu99 -msse4.1 -o lte_dfts_sse4 lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
oai_dfts_sse4: oai_dfts.c
gcc-7 -O3 -std=gnu99 -msse4.1 -o oai_dfts_sse4 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
lte_dfts_avx2: lte_dfts.c
gcc -O2 -std=gnu99 -mavx2 -g -ggdb -o lte_dfts_avx2 lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
oai_dfts_avx2: oai_dfts.c
gcc -O2 -std=gnu99 -mavx2 -g -ggdb -o oai_dfts_avx2 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
lte_dfts_avx2.s: lte_dfts.c
gcc -O2 -std=gnu99 -mavx2 -S lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
oai_dfts_avx2.s: oai_dfts.c
gcc -O2 -std=gnu99 -mavx2 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
lte_dfts_sse4.s: lte_dfts.c
gcc -O2 -std=gnu99 -msse4.1 -S lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
oai_dfts_sse4.s: oai_dfts.c
gcc -O2 -std=gnu99 -msse4.1 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
dft_cycles_avx2: lte_dfts_avx2
./lte_dfts_avx2 | egrep cycles
dft_cycles_avx2: oai_dfts_avx2
./oai_dfts_avx2 | egrep cycles
......@@ -5353,6 +5353,607 @@ void idft8192(int16_t *x,int16_t *y,unsigned char scale)
#endif
int16_t tw16384[3*2*4096];
#ifndef __AVX2__
void dft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd_q15_t xtmp[4096],ytmp[4096],*tw16384_128p=(simd_q15_t *)tw16384,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<4096; i+=4,j++) {
transpose16_ooff(x128+i,xtmp+j,1024);
}
dft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
dft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
dft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
dft4096((int16_t*)(xtmp+3072),(int16_t*)(ytmp+3072),1);
for (i=0; i<1024; i++) {
bfly4(ytmpp,ytmpp+1024,ytmpp+2048,ytmpp+3072,
y128p,y128p+1024,y128p+2048,y128p+3072,
tw16384_128p,tw16384_128p+1024,tw16384_128p+2048);
tw16384_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<256; i++) {
y128[0] = shiftright_int16(y128[0],1);
y128[1] = shiftright_int16(y128[1],1);
y128[2] = shiftright_int16(y128[2],1);
y128[3] = shiftright_int16(y128[3],1);
y128[4] = shiftright_int16(y128[4],1);
y128[5] = shiftright_int16(y128[5],1);
y128[6] = shiftright_int16(y128[6],1);
y128[7] = shiftright_int16(y128[7],1);
y128[8] = shiftright_int16(y128[8],1);
y128[9] = shiftright_int16(y128[9],1);
y128[10] = shiftright_int16(y128[10],1);
y128[11] = shiftright_int16(y128[11],1);
y128[12] = shiftright_int16(y128[12],1);
y128[13] = shiftright_int16(y128[13],1);
y128[14] = shiftright_int16(y128[14],1);
y128[15] = shiftright_int16(y128[15],1);
y128+=16;
}
}
_mm_empty();
_m_empty();
}
void idft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd_q15_t xtmp[4096],ytmp[4096],*tw16384_128p=(simd_q15_t *)tw16384,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<4096; i+=4,j++) {
transpose16_ooff(x128+i,xtmp+j,1024);
}
idft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
idft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
idft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
idft4096((int16_t*)(xtmp+3072),(int16_t*)(ytmp+3072),1);
for (i=0; i<1024; i++) {
ibfly4(ytmpp,ytmpp+1024,ytmpp+2048,ytmpp+3072,
y128p,y128p+1024,y128p+2048,y128p+3072,
tw16384_128p,tw16384_128p+1024,tw16384_128p+2048);
tw16384_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<256; i++) {
y128[0] = shiftright_int16(y128[0],scale);
y128[1] = shiftright_int16(y128[1],scale);
y128[2] = shiftright_int16(y128[2],scale);
y128[3] = shiftright_int16(y128[3],scale);
y128[4] = shiftright_int16(y128[4],scale);
y128[5] = shiftright_int16(y128[5],scale);
y128[6] = shiftright_int16(y128[6],scale);
y128[7] = shiftright_int16(y128[7],scale);
y128[8] = shiftright_int16(y128[8],scale);
y128[9] = shiftright_int16(y128[9],scale);
y128[10] = shiftright_int16(y128[10],scale);
y128[11] = shiftright_int16(y128[11],scale);
y128[12] = shiftright_int16(y128[12],scale);
y128[13] = shiftright_int16(y128[13],scale);
y128[14] = shiftright_int16(y128[14],scale);
y128[15] = shiftright_int16(y128[15],scale);
y128+=16;
}
}
_mm_empty();
_m_empty();
}
#else //__AVX2__
void dft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[2048],ytmp[2048],*tw16384_256p=(simd256_q15_t *)tw16384,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<2048; i+=4,j++) {
transpose16_ooff_simd256(x256+i,xtmp+j,512);
}
dft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
dft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
dft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
dft4096((int16_t*)(xtmp+1536),(int16_t*)(ytmp+1536),1);
for (i=0; i<512; i++) {
bfly4_256(ytmpp,ytmpp+512,ytmpp+1024,ytmpp+1536,
y256p,y256p+512,y256p+1024,y256p+1536,
tw16384_256p,tw16384_256p+512,tw16384_256p+1024);
tw16384_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<128; i++) {
y256[0] = shiftright_int16_simd256(y256[0],1);
y256[1] = shiftright_int16_simd256(y256[1],1);
y256[2] = shiftright_int16_simd256(y256[2],1);
y256[3] = shiftright_int16_simd256(y256[3],1);
y256[4] = shiftright_int16_simd256(y256[4],1);
y256[5] = shiftright_int16_simd256(y256[5],1);
y256[6] = shiftright_int16_simd256(y256[6],1);
y256[7] = shiftright_int16_simd256(y256[7],1);
y256[8] = shiftright_int16_simd256(y256[8],1);
y256[9] = shiftright_int16_simd256(y256[9],1);
y256[10] = shiftright_int16_simd256(y256[10],1);
y256[11] = shiftright_int16_simd256(y256[11],1);
y256[12] = shiftright_int16_simd256(y256[12],1);
y256[13] = shiftright_int16_simd256(y256[13],1);
y256[14] = shiftright_int16_simd256(y256[14],1);
y256[15] = shiftright_int16_simd256(y256[15],1);
y256+=16;
}
}
_mm_empty();
_m_empty();
}
void idft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[2048],ytmp[2048],*tw16384_256p=(simd256_q15_t *)tw16384,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<2048; i+=4,j++) {
transpose16_ooff_simd256(x256+i,xtmp+j,512);
}
idft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
idft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
idft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
idft4096((int16_t*)(xtmp+1536),(int16_t*)(ytmp+1536),1);
for (i=0; i<512; i++) {
ibfly4_256(ytmpp,ytmpp+512,ytmpp+1024,ytmpp+1536,
y256p,y256p+512,y256p+1024,y256p+1536,
tw16384_256p,tw16384_256p+512,tw16384_256p+1024);
tw16384_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<128; i++) {
y256[0] = shiftright_int16_simd256(y256[0],1);
y256[1] = shiftright_int16_simd256(y256[1],1);
y256[2] = shiftright_int16_simd256(y256[2],1);
y256[3] = shiftright_int16_simd256(y256[3],1);
y256[4] = shiftright_int16_simd256(y256[4],1);
y256[5] = shiftright_int16_simd256(y256[5],1);
y256[6] = shiftright_int16_simd256(y256[6],1);
y256[7] = shiftright_int16_simd256(y256[7],1);
y256[8] = shiftright_int16_simd256(y256[8],1);
y256[9] = shiftright_int16_simd256(y256[9],1);
y256[10] = shiftright_int16_simd256(y256[10],1);
y256[11] = shiftright_int16_simd256(y256[11],1);
y256[12] = shiftright_int16_simd256(y256[12],1);
y256[13] = shiftright_int16_simd256(y256[13],1);
y256[14] = shiftright_int16_simd256(y256[14],1);
y256[15] = shiftright_int16_simd256(y256[15],1);
y256+=16;
}
}
_mm_empty();
_m_empty();
}
#endif //__AVX2__
int16_t tw32768[2*16384] __attribute__((aligned(32)));
#ifndef __AVX2__
void dft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simdshort_q15_t xtmp[16384],*xtmpp,*x64 = (simdshort_q15_t *)x;
simd_q15_t ytmp[8192],*tw32768_128p=(simd_q15_t *)tw32768,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i;
simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<256; i++) {
transpose4_ooff(x64 ,xtmpp,8192);
transpose4_ooff(x64+2,xtmpp+1,8192);
transpose4_ooff(x64+4,xtmpp+2,8192);
transpose4_ooff(x64+6,xtmpp+3,8192);
transpose4_ooff(x64+8,xtmpp+4,8192);
transpose4_ooff(x64+10,xtmpp+5,8192);
transpose4_ooff(x64+12,xtmpp+6,8192);
transpose4_ooff(x64+14,xtmpp+7,8192);
transpose4_ooff(x64+16,xtmpp+8,8192);
transpose4_ooff(x64+18,xtmpp+9,8192);
transpose4_ooff(x64+20,xtmpp+10,8192);
transpose4_ooff(x64+22,xtmpp+11,8192);
transpose4_ooff(x64+24,xtmpp+12,8192);
transpose4_ooff(x64+26,xtmpp+13,8192);
transpose4_ooff(x64+28,xtmpp+14,8192);
transpose4_ooff(x64+30,xtmpp+15,8192);
transpose4_ooff(x64+32,xtmpp+16,8192);
transpose4_ooff(x64+34,xtmpp+17,8192);
transpose4_ooff(x64+36,xtmpp+18,8192);
transpose4_ooff(x64+38,xtmpp+19,8192);
transpose4_ooff(x64+40,xtmpp+20,8192);
transpose4_ooff(x64+42,xtmpp+21,8192);
transpose4_ooff(x64+44,xtmpp+22,8192);
transpose4_ooff(x64+46,xtmpp+23,8192);
transpose4_ooff(x64+48,xtmpp+24,8192);
transpose4_ooff(x64+50,xtmpp+25,8192);
transpose4_ooff(x64+52,xtmpp+26,8192);
transpose4_ooff(x64+54,xtmpp+27,8192);
transpose4_ooff(x64+56,xtmpp+28,8192);
transpose4_ooff(x64+58,xtmpp+29,8192);
transpose4_ooff(x64+60,xtmpp+30,8192);
transpose4_ooff(x64+62,xtmpp+31,8192);
x64+=64;
xtmpp+=32;
}
dft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
dft16384((int16_t*)(xtmp+8192),(int16_t*)(ytmp+4096),1);
for (i=0; i<4096; i++) {
bfly2(ytmpp,ytmpp+4096,
y128p,y128p+4096,
tw32768_128p);
tw32768_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
y128p = y128;
for (i=0; i<512; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
void idft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simdshort_q15_t xtmp[16384],*xtmpp,*x64 = (simdshort_q15_t *)x;
simd_q15_t ytmp[8192],*tw32768_128p=(simd_q15_t *)tw32768,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i;
simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<256; i++) {
transpose4_ooff(x64 ,xtmpp,8192);
transpose4_ooff(x64+2,xtmpp+1,8192);
transpose4_ooff(x64+4,xtmpp+2,8192);
transpose4_ooff(x64+6,xtmpp+3,8192);
transpose4_ooff(x64+8,xtmpp+4,8192);
transpose4_ooff(x64+10,xtmpp+5,8192);
transpose4_ooff(x64+12,xtmpp+6,8192);
transpose4_ooff(x64+14,xtmpp+7,8192);
transpose4_ooff(x64+16,xtmpp+8,8192);
transpose4_ooff(x64+18,xtmpp+9,8192);
transpose4_ooff(x64+20,xtmpp+10,8192);
transpose4_ooff(x64+22,xtmpp+11,8192);
transpose4_ooff(x64+24,xtmpp+12,8192);
transpose4_ooff(x64+26,xtmpp+13,8192);
transpose4_ooff(x64+28,xtmpp+14,8192);
transpose4_ooff(x64+30,xtmpp+15,8192);
transpose4_ooff(x64+32,xtmpp+16,8192);
transpose4_ooff(x64+34,xtmpp+17,8192);
transpose4_ooff(x64+36,xtmpp+18,8192);
transpose4_ooff(x64+38,xtmpp+19,8192);
transpose4_ooff(x64+40,xtmpp+20,8192);
transpose4_ooff(x64+42,xtmpp+21,8192);
transpose4_ooff(x64+44,xtmpp+22,8192);
transpose4_ooff(x64+46,xtmpp+23,8192);
transpose4_ooff(x64+48,xtmpp+24,8192);
transpose4_ooff(x64+50,xtmpp+25,8192);
transpose4_ooff(x64+52,xtmpp+26,8192);
transpose4_ooff(x64+54,xtmpp+27,8192);
transpose4_ooff(x64+56,xtmpp+28,8192);
transpose4_ooff(x64+58,xtmpp+29,8192);
transpose4_ooff(x64+60,xtmpp+30,8192);
transpose4_ooff(x64+62,xtmpp+31,8192);
x64+=64;
xtmpp+=32;
}
idft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
idft16384((int16_t*)(xtmp+8192),(int16_t*)(ytmp+4096),1);
for (i=0; i<4096; i++) {
ibfly2(ytmpp,ytmpp+4096,
y128p,y128p+4096,
tw32768_128p);
tw32768_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
y128p = y128;
for (i=0; i<512; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
#else // __AVX2__
void dft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[4096],*xtmpp,*x256 = (simd256_q15_t *)x;
simd256_q15_t ytmp[4096],*tw32768_256p=(simd256_q15_t *)tw32768,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i;
simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<256; i++) {
transpose4_ooff_simd256(x256 ,xtmpp,2048);
transpose4_ooff_simd256(x256+2,xtmpp+1,2048);
transpose4_ooff_simd256(x256+4,xtmpp+2,2048);
transpose4_ooff_simd256(x256+6,xtmpp+3,2048);
transpose4_ooff_simd256(x256+8,xtmpp+4,2048);
transpose4_ooff_simd256(x256+10,xtmpp+5,2048);
transpose4_ooff_simd256(x256+12,xtmpp+6,2048);
transpose4_ooff_simd256(x256+14,xtmpp+7,2048);
transpose4_ooff_simd256(x256+16,xtmpp+8,2048);
transpose4_ooff_simd256(x256+18,xtmpp+9,2048);
transpose4_ooff_simd256(x256+20,xtmpp+10,2048);
transpose4_ooff_simd256(x256+22,xtmpp+11,2048);
transpose4_ooff_simd256(x256+24,xtmpp+12,2048);
transpose4_ooff_simd256(x256+26,xtmpp+13,2048);
transpose4_ooff_simd256(x256+28,xtmpp+14,2048);
transpose4_ooff_simd256(x256+30,xtmpp+15,2048);
transpose4_ooff_simd256(x256+32,xtmpp+16,2048);
transpose4_ooff_simd256(x256+34,xtmpp+17,2048);
transpose4_ooff_simd256(x256+36,xtmpp+18,2048);
transpose4_ooff_simd256(x256+38,xtmpp+19,2048);
transpose4_ooff_simd256(x256+40,xtmpp+20,2048);
transpose4_ooff_simd256(x256+42,xtmpp+21,2048);
transpose4_ooff_simd256(x256+44,xtmpp+22,2048);
transpose4_ooff_simd256(x256+46,xtmpp+23,2048);
transpose4_ooff_simd256(x256+48,xtmpp+24,2048);
transpose4_ooff_simd256(x256+50,xtmpp+25,2048);
transpose4_ooff_simd256(x256+52,xtmpp+26,2048);
transpose4_ooff_simd256(x256+54,xtmpp+27,2048);
transpose4_ooff_simd256(x256+56,xtmpp+28,2048);
transpose4_ooff_simd256(x256+58,xtmpp+29,2048);
transpose4_ooff_simd256(x256+60,xtmpp+30,2048);
transpose4_ooff_simd256(x256+62,xtmpp+31,2048);
x256+=64;
xtmpp+=32;
}
dft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
dft16384((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
for (i=0; i<2048; i++) {
bfly2_256(ytmpp,ytmpp+2048,
y256p,y256p+2048,
tw32768_256p);
tw32768_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
y256p = y256;
for (i=0; i<64; i++) {
y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
y256p+=16;
}
}
_mm_empty();
_m_empty();
}
void idft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[4096],*xtmpp,*x256 = (simd256_q15_t *)x;
simd256_q15_t ytmp[4096],*tw32768_256p=(simd256_q15_t *)tw32768,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i;
simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<64; i++) {
transpose4_ooff_simd256(x256 ,xtmpp,2048);
transpose4_ooff_simd256(x256+2,xtmpp+1,2048);
transpose4_ooff_simd256(x256+4,xtmpp+2,2048);
transpose4_ooff_simd256(x256+6,xtmpp+3,2048);
transpose4_ooff_simd256(x256+8,xtmpp+4,2048);
transpose4_ooff_simd256(x256+10,xtmpp+5,2048);
transpose4_ooff_simd256(x256+12,xtmpp+6,2048);
transpose4_ooff_simd256(x256+14,xtmpp+7,2048);
transpose4_ooff_simd256(x256+16,xtmpp+8,2048);
transpose4_ooff_simd256(x256+18,xtmpp+9,2048);
transpose4_ooff_simd256(x256+20,xtmpp+10,2048);
transpose4_ooff_simd256(x256+22,xtmpp+11,2048);
transpose4_ooff_simd256(x256+24,xtmpp+12,2048);
transpose4_ooff_simd256(x256+26,xtmpp+13,2048);
transpose4_ooff_simd256(x256+28,xtmpp+14,2048);
transpose4_ooff_simd256(x256+30,xtmpp+15,2048);
transpose4_ooff_simd256(x256+32,xtmpp+16,2048);
transpose4_ooff_simd256(x256+34,xtmpp+17,2048);
transpose4_ooff_simd256(x256+36,xtmpp+18,2048);
transpose4_ooff_simd256(x256+38,xtmpp+19,2048);
transpose4_ooff_simd256(x256+40,xtmpp+20,2048);
transpose4_ooff_simd256(x256+42,xtmpp+21,2048);
transpose4_ooff_simd256(x256+44,xtmpp+22,2048);
transpose4_ooff_simd256(x256+46,xtmpp+23,2048);
transpose4_ooff_simd256(x256+48,xtmpp+24,2048);
transpose4_ooff_simd256(x256+50,xtmpp+25,2048);
transpose4_ooff_simd256(x256+52,xtmpp+26,2048);
transpose4_ooff_simd256(x256+54,xtmpp+27,2048);
transpose4_ooff_simd256(x256+56,xtmpp+28,2048);
transpose4_ooff_simd256(x256+58,xtmpp+29,2048);
transpose4_ooff_simd256(x256+60,xtmpp+30,2048);
transpose4_ooff_simd256(x256+62,xtmpp+31,2048);
x256+=64;
xtmpp+=32;
}
idft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
idft16384((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
for (i=0; i<2048; i++) {
ibfly2_256(ytmpp,ytmpp+2048,
y256p,y256p+2048,
tw32768_256p);
tw32768_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
y256p = y256;
for (i=0; i<256; i++) {
y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
y256p+=16;
}
}
_mm_empty();
_m_empty();
}
#endif
int16_t twa1536[1024],twb1536[1024];
// 512 x 3
......@@ -6176,12 +6777,99 @@ int16_t twb49152[32768] __attribute__((aligned(32)));
// 16384 x 3
void dft49152(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n");
int i,i2,j;
uint32_t tmp[3][16384] __attribute__((aligned(32)));
uint32_t tmpo[3][16384] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<16384; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
dft16384((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
dft16384((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
dft16384((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<32768; i+=8,i2+=4) {
bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+32768+i),(simd_q15_t*)(output+65536+i),
(simd_q15_t*)(twa49152+i),(simd_q15_t*)(twb49152+i));
}
if (scale==1) {
for (i=0; i<768; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
void idft49152(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n");
int i,i2,j;
uint32_t tmp[3][16384] __attribute__((aligned(32)));
uint32_t tmpo[3][16384] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<16384; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
idft16384((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
idft16384((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
idft16384((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<32768; i+=8,i2+=4) {
ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+32768+i),(simd_q15_t*)(output+65536+i),
(simd_q15_t*)(twa49152+i),(simd_q15_t*)(twb49152+i));
}
if (scale==1) {
for (i=0; i<768; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
int16_t twa73728[49152] __attribute__((aligned(32)));
......@@ -6198,17 +6886,104 @@ void idft73728(int16_t *input, int16_t *output,uint8_t scale) {
}
int16_t twa98304[49152] __attribute__((aligned(32)));
int16_t twb98304[49152] __attribute__((aligned(32)));
int16_t twa98304[65536] __attribute__((aligned(32)));
int16_t twb98304[65536] __attribute__((aligned(32)));
// 32768 x 3
void dft98304(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n");
int i,i2,j;
uint32_t tmp[3][32768] __attribute__((aligned(32)));
uint32_t tmpo[3][32768] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<32768; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
dft32768((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
dft32768((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
dft32768((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<65536; i+=8,i2+=4) {
bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+65536+i),(simd_q15_t*)(output+131072+i),
(simd_q15_t*)(twa98304+i),(simd_q15_t*)(twb98304+i));
}
if (scale==1) {
for (i=0; i<1536; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
void idft98304(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n");
int i,i2,j;
uint32_t tmp[3][32768] __attribute__((aligned(32)));
uint32_t tmpo[3][32768] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<32768; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
idft32768((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
idft32768((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
idft32768((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<65536; i+=8,i2+=4) {
ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+65536+i),(simd_q15_t*)(output+131072+i),
(simd_q15_t*)(twa98304+i),(simd_q15_t*)(twb98304+i));
}
if (scale==1) {
for (i=0; i<1536; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
......@@ -8823,6 +9598,8 @@ int dfts_autoinit(void)
init_rad2(2048,tw2048);
init_rad4(4096,tw4096);
init_rad2(8192,tw8192);
init_rad4(16384,tw16384);
init_rad2(32768,tw32768);
init_rad3(1536,twa1536,twb1536);
init_rad3(3072,twa3072,twb3072);
......@@ -8830,6 +9607,9 @@ int dfts_autoinit(void)
init_rad3(12288,twa12288,twb12288);
init_rad3(18432,twa18432,twb18432);
init_rad3(24576,twa24576,twb24576);
init_rad3(49152,twa49152,twb49152);
init_rad3(98304,twa98304,twb98304);
init_rad2_rep(24,tw24);
init_rad3_rep(36,twa36,twb36);
......@@ -8869,6 +9649,7 @@ int dfts_autoinit(void)
#ifndef MR_MAIN
void dft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<(int)DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx);
......@@ -8879,6 +9660,7 @@ void idft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<(int)IDFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx);
idft_ftab[sizeidx](sigF,sig,scale_flag);
};
#endif
/*---------------------------------------------------------------------------------------*/
......@@ -9049,9 +9831,9 @@ int main(int argc, char**argv)
time_stats_t ts;
#ifdef __AVX2__
simd256_q15_t x[4096],x2[4096],y[4096],tw0,tw1,tw2,tw3;
simd256_q15_t x[16384],x2[16384],y[16384],tw0,tw1,tw2,tw3;
#else
simd_q15_t x[8192],y[8192],tw0,tw1,tw2,tw3;
simd_q15_t x[32768],y[32768],tw0,tw1,tw2,tw3;
#endif
int i;
simd_q15_t *x128=(simd_q15_t*)x,*y128=(simd_q15_t*)y;
......@@ -9613,10 +10395,34 @@ int main(int argc, char**argv)
stop_meas(&ts);
}
printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
printf("\n\n8192-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
LOG_M("y8192.m","y8192",y,8192,1,1);
LOG_M("x8192.m","x8192",x,8192,1,1);
memset((void*)x,0,16384*sizeof(int32_t));
for (i=2;i<9602;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
for (i=2*(16384-4800);i<32768;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
reset_meas(&ts);
for (i=0; i<10000; i++) {
start_meas(&ts);
dft16384((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts);
}
printf("\n\n16384-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
LOG_M("y16384.m","y16384",y,16384,1,1);
LOG_M("x16384.m","x16384",x,16384,1,1);
memset((void*)x,0,1536*sizeof(int32_t));
for (i=2;i<1202;i++) {
if ((taus() & 1)==0)
......@@ -9765,6 +10571,30 @@ int main(int argc, char**argv)
LOG_M("y24576.m","y24576",y,24576,1,1);
LOG_M("x24576.m","x24576",x,24576,1,1);
memset((void*)x,0,49152*sizeof(int32_t));
for (i=2;i<28402;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
for (i=2*(49152-14400);i<98304;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
reset_meas(&ts);
for (i=0; i<10000; i++) {
start_meas(&ts);
idft49152((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts);
}
printf("\n\n49152-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
LOG_M("y49152.m","y49152",y,49152,1,1);
LOG_M("x49152.m","x49152",x,49152,1,1);
/*
int dftsizes[33]={24,36,48,60,72,96,108,120,144,180,192,216,240,288,300,324,360,384,432,480,540,576,600,648,720,768,864,900,960,972,1080,1152,1200};
void (*dft)(int16_t *x,int16_t *y,uint8_t scale)[33] = {dft24,dft36,dft48,dft60,dft72,dft96,dft108,dft120,dft144,dft180,dft192,dft216,dft240,dft288,dft300,dft324,dft360,dft384,dft432,dft480,dft540,dft576,dft600,dft648,dft720,dft768,dft864,dft900,dft960,dft972,dft1080,dft1152,dft1200};
for (int n=0;n<33;n++) {
......@@ -9797,7 +10627,7 @@ int main(int argc, char**argv)
LOG_M(ystr,ystr2,y,dftsizes[n]*4,1,1);
LOG_M(xstr,xstr2,x,dftsizes[n]*4,1,1);
}
*/
return(0);
}
......
......@@ -89,7 +89,7 @@ int main(int argc, char **argv){
int i, aa, aarx, **txdata, trial, n_frames = 1, prach_start, rx_prach_start; //, ntrials=1;
int N_RB_UL = 106, delay = 0, NCS_config = 13, rootSequenceIndex = 1, threequarter_fs = 0, mu = 1, fd_occasion = 0, loglvl = OAILOG_INFO, numRA = 0, prachStartSymbol = 0;
uint8_t snr1set = 0, ue_speed1set = 0, transmission_mode = 1, n_tx = 1, n_rx = 1, awgn_flag = 0, msg1_frequencystart = 0, num_prach_fd_occasions = 1, prach_format;
uint8_t frame = 1, subframe = 19, config_index = 98, prach_sequence_length = 1, num_root_sequences = 16, restrictedSetConfig = 0, N_dur, N_t_slot, start_symbol;
uint8_t frame = 1, subframe = 9, slot=19, config_index = 98, prach_sequence_length = 1, num_root_sequences = 16, restrictedSetConfig = 0, N_dur, N_t_slot, start_symbol;
uint16_t Nid_cell = 0, preamble_tx = 0, preamble_delay, format, format0, format1;
uint32_t tx_lev = 10000, prach_errors = 0, samp_count; //,tx_lev_dB;
uint64_t SSB_positions = 0x01, absoluteFrequencyPointA = 640000;
......@@ -118,7 +118,7 @@ int main(int argc, char **argv){
randominit(0);
while ((c = getopt (argc, argv, "hHaA:Cr:p:g:n:s:S:t:x:y:v:V:z:N:F:d:Z:L:R:E")) != -1) {
while ((c = getopt (argc, argv, "hHaA:Cc:r:p:g:n:s:S:t:x:y:v:V:z:N:F:d:Z:L:R:E")) != -1) {
switch (c) {
case 'a':
printf("Running AWGN simulation\n");
......@@ -127,6 +127,10 @@ int main(int argc, char **argv){
//ntrials=1;
break;
case 'c':
config_index = atoi(optarg);
break;
case 'd':
delay = atoi(optarg);
break;
......@@ -313,6 +317,11 @@ int main(int argc, char **argv){
}
}
if (config_index<67) { prach_sequence_length=0; slot = subframe*2; }
printf("Config_index %d, prach_sequence_length %d\n",config_index,prach_sequence_length);
// Configure log
logInit();
set_glog(loglvl);
......@@ -373,11 +382,11 @@ int main(int argc, char **argv){
gNB->gNB_config.prach_config.num_prach_fd_occasions.value = num_prach_fd_occasions;
gNB->gNB_config.prach_config.num_prach_fd_occasions_list = (nfapi_nr_num_prach_fd_occasions_t *) malloc(num_prach_fd_occasions*sizeof(nfapi_nr_num_prach_fd_occasions_t));
gNB->proc.slot_rx = subframe;
gNB->proc.slot_rx = slot;
get_nr_prach_info_from_index(config_index,
(int)frame,
(int)subframe,
(int)slot,
absoluteFrequencyPointA,
mu,
frame_parms->frame_type,
......@@ -562,7 +571,7 @@ int main(int argc, char **argv){
UE_nr_rxtx_proc_t proc={0};
proc.frame_tx = frame;
proc.nr_tti_tx = subframe;
proc.nr_tti_tx = slot;
nr_ue_prach_procedures(UE,&proc,0,0);
/* tx_lev_dB not used later, no need to set */
......@@ -571,19 +580,19 @@ int main(int argc, char **argv){
if (mu == 0)
samp_count = frame_parms->samples_per_subframe;
else
samp_count = (subframe%(frame_parms->slots_per_subframe/2)) ? frame_parms->samples_per_slotN0 : frame_parms->samples_per_slot0;
samp_count = ((slot)%(frame_parms->slots_per_subframe/2)) ? frame_parms->samples_per_slotN0 : frame_parms->samples_per_slot0;
prach_start = subframe*samp_count - UE->N_TA_offset;
prach_start = slot*samp_count - UE->N_TA_offset;
#ifdef NR_PRACH_DEBUG
LOG_M("txsig0.m", "txs0", &txdata[0][prach_start], samp_count, 1, 1);
LOG_M("txsig0.m", "txs0", &txdata[0][prach_start], frame_parms->samples_per_subframe, 1, 1);
//LOG_M("txsig1.m","txs1", txdata[1],FRAME_LENGTH_COMPLEX_SAMPLES,1,1);
#endif
// multipath channel
// dump_nr_prach_config(&gNB->frame_parms,subframe);
for (i = 0; i < samp_count<<1; i++) {
for (i = 0; i < frame_parms->samples_per_subframe<<1; i++) {
for (aa=0; aa<1; aa++) {
if (awgn_flag == 0) {
s_re[aa][i] = ((double)(((short *)&txdata[aa][prach_start]))[(i<<1)]);
......@@ -618,7 +627,10 @@ int main(int argc, char **argv){
ue_speed1 = ue_speed0 + 50;
}
rx_prach_start = subframe*frame_parms->get_samples_per_slot(subframe,frame_parms);
rx_prach_start = slot*frame_parms->get_samples_per_slot(slot,frame_parms);
if (n_frames==1) printf("slot %d, rx_prach_start %d\n",slot,rx_prach_start);
uint16_t preamble_rx, preamble_energy, N_ZC;
N_ZC = prach_sequence_length == 0 ? 839 : 139;
for (SNR=snr0; SNR<snr1; SNR+=.1) {
for (ue_speed=ue_speed0; ue_speed<ue_speed1; ue_speed+=10) {
......@@ -630,9 +642,8 @@ int main(int argc, char **argv){
for (trial=0; trial<n_frames; trial++) {
uint16_t preamble_rx, preamble_energy, N_ZC;
sigma2_dB = 10*log10((double)tx_lev) - SNR;
sigma2_dB = 10*log10((double)tx_lev) - SNR - 10*log10(N_RB_UL*12/N_ZC);
if (n_frames==1)
printf("sigma2_dB %f (SNR %f dB) tx_lev_dB %f\n",sigma2_dB,SNR,10*log10((double)tx_lev));
......@@ -651,20 +662,20 @@ int main(int argc, char **argv){
10*log10(tx_lev));
}
for (i = 0; i< frame_parms->get_samples_per_slot(subframe,frame_parms); i++) {
for (i = 0; i< frame_parms->samples_per_subframe; i++) {
for (aa = 0; aa < frame_parms->nb_antennas_rx; aa++) {
((short*) &gNB->common_vars.rxdata[aa][rx_prach_start])[2*i] = (short) (.167*(r_re[aa][i] +sqrt(sigma2/2)*gaussdouble(0.0,1.0)));
((short*) &gNB->common_vars.rxdata[aa][rx_prach_start])[2*i+1] = (short) (.167*(r_im[aa][i] + (iqim*r_re[aa][i]) + sqrt(sigma2/2)*gaussdouble(0.0,1.0)));
((short*) &ru->common.rxdata[aa][rx_prach_start])[2*i] = (short) (.167*(r_re[aa][i] +sqrt(sigma2/2)*gaussdouble(0.0,1.0)));
((short*) &ru->common.rxdata[aa][rx_prach_start])[2*i+1] = (short) (.167*(r_im[aa][i] + (iqim*r_re[aa][i]) + sqrt(sigma2/2)*gaussdouble(0.0,1.0)));
}
}
rx_nr_prach_ru(ru, prach_format, numRA, prachStartSymbol, frame, subframe);
rx_nr_prach_ru(ru, prach_format, numRA, prachStartSymbol, frame, slot);
gNB->prach_vars.rxsigF = ru->prach_rxsigF;
rx_nr_prach(gNB, prach_pdu, frame, subframe, &preamble_rx, &preamble_energy, &preamble_delay);
printf(" preamble_energy %d preamble_rx %d preamble_tx %d \n", preamble_energy, preamble_rx, preamble_tx);
// printf(" preamble_energy %d preamble_rx %d preamble_tx %d \n", preamble_energy, preamble_rx, preamble_tx);
if (preamble_rx != preamble_tx)
prach_errors++;
......
......@@ -21,7 +21,8 @@
#include <time.h>
#include <stdlib.h>
#include "SIMULATION/TOOLS/sim.h"
//#include "SIMULATION/TOOLS/sim.h"
unsigned int s0, s1, s2, b;
......
......@@ -46,6 +46,7 @@ uint16_t NCS_restricted_TypeB_delta_f_RA_5[14] = {36,57,60,63,65,68,71,77,81,8
uint16_t NCS_unrestricted_delta_f_RA_15[16] = {0,2,4,6,8,10,12,13,15,17,19,23,27,34,46,69};
const char *prachfmt[]={"A1","A2","A3","B1","B2","B3","B4","C0","C2"};
const char *prachfmt03[]={"0","1","2","3"};
uint16_t get_NCS(uint8_t index, uint16_t format0, uint8_t restricted_set_config) {
......
......@@ -290,8 +290,7 @@ void config_common(int Mod_idP, int pdsch_AntennaPorts, NR_ServingCellConfigComm
scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofDownlinkSlots,
scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofDownlinkSymbols,
scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSlots,
scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols
);
scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols);
if (return_tdd !=0){
LOG_E(PHY,"TDD configuration can not be done\n");
......
......@@ -305,12 +305,14 @@ void nr_schedule_pucch(int Mod_idP,
NR_BWP_Uplink_t *ubwp=secondaryCellGroup->spCellConfig->spCellConfigDedicated->uplinkConfig->uplinkBWP_ToAddModList->list.array[bwp_id-1];
nfapi_nr_ul_tti_request_t *UL_tti_req = &RC.nrmac[Mod_idP]->UL_tti_req[0];
NR_sched_pucch *curr_pucch = UE_list->UE_sched_ctrl[UE_id].sched_pucch;
NR_sched_pucch *temp_pucch;
int release_pucch = 0;
if (curr_pucch != NULL) {
if ((frameP == curr_pucch->frame) && (slotP == curr_pucch->ul_slot)) {
NR_sched_pucch *curr_pucch;
int nr_ulmix_slots = scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSlots;
if (scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols!=0)
nr_ulmix_slots++;
for (int k=0; k<nr_ulmix_slots; k++) {
curr_pucch = &UE_list->UE_sched_ctrl[UE_id].sched_pucch[k];
if ((curr_pucch->dai_c > 0) && (frameP == curr_pucch->frame) && (slotP == curr_pucch->ul_slot)) {
UL_tti_req->SFN = frameP;
UL_tti_req->Slot = slotP;
UL_tti_req->pdus_list[UL_tti_req->n_pdus].pdu_type = NFAPI_NR_UL_CONFIG_PUCCH_PDU_TYPE;
......@@ -331,16 +333,9 @@ void nr_schedule_pucch(int Mod_idP,
O_ack,
SR_flag);
release_pucch = 1;
curr_pucch->dai_c = 0;
}
}
if (release_pucch) {
temp_pucch = UE_list->UE_sched_ctrl[UE_id].sched_pucch;
UE_list->UE_sched_ctrl[UE_id].sched_pucch = UE_list->UE_sched_ctrl[UE_id].sched_pucch->next_sched_pucch;
free(temp_pucch);
}
}
bool is_xlsch_in_slot(uint64_t bitmap, sub_frame_t slot){
......@@ -366,7 +361,7 @@ void gNB_dlsch_ulsch_scheduler(module_id_t module_idP,
int UE_id;
uint64_t *dlsch_in_slot_bitmap=NULL;
uint64_t *ulsch_in_slot_bitmap=NULL;
NR_sched_pucch *pucch_sched = (NR_sched_pucch*) malloc(sizeof(NR_sched_pucch));
int pucch_sched;
UE_id=0;
int bwp_id = 1;
......@@ -378,6 +373,18 @@ void gNB_dlsch_ulsch_scheduler(module_id_t module_idP,
NR_ServingCellConfigCommon_t *scc = cc->ServingCellConfigCommon;
int num_slots_per_tdd = (nr_slots_per_frame[*scc->ssbSubcarrierSpacing])>>(7-scc->tdd_UL_DL_ConfigurationCommon->pattern1.dl_UL_TransmissionPeriodicity);
int nr_ulmix_slots = scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSlots;
if (scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols!=0)
nr_ulmix_slots++;
if (slot_txP == 0) {
for (int k=0; k<nr_ulmix_slots; k++) {
memset((void *) &UE_list->UE_sched_ctrl[UE_id].sched_pucch[k],
0,
sizeof(NR_sched_pucch));
}
}
start_meas(&RC.nrmac[module_idP]->eNB_scheduler);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_ULSCH_SCHEDULER,VCD_FUNCTION_IN);
......@@ -460,18 +467,18 @@ void gNB_dlsch_ulsch_scheduler(module_id_t module_idP,
// Phytest scheduling
if (get_softmodem_params()->phy_test && (is_xlsch_in_slot(*dlsch_in_slot_bitmap,slot_txP%num_slots_per_tdd))) {
nr_update_pucch_scheduling(module_idP, UE_id, frame_txP, slot_txP, num_slots_per_tdd,pucch_sched);
ue_sched_ctl->current_harq_pid = slot_txP % num_slots_per_tdd;
nr_schedule_uss_dlsch_phytest(module_idP, frame_txP, slot_txP, pucch_sched, NULL);
nr_update_pucch_scheduling(module_idP, UE_id, frame_txP, slot_txP, num_slots_per_tdd,&pucch_sched);
nr_schedule_uss_dlsch_phytest(module_idP, frame_txP, slot_txP, &UE_list->UE_sched_ctrl[UE_id].sched_pucch[pucch_sched], NULL);
// resetting ta flag
gNB->ta_len = 0;
}
// Test DL scheduling
if (get_softmodem_params()->phy_test == 0 && slot_txP == 1 && UE_list->fiveG_connected[UE_id]) {
nr_update_pucch_scheduling(module_idP, UE_id, frame_txP, slot_txP, num_slots_per_tdd,pucch_sched);
ue_sched_ctl->current_harq_pid = slot_txP % num_slots_per_tdd;
nr_schedule_uss_dlsch_phytest(module_idP, frame_txP, slot_txP, pucch_sched, NULL);
nr_update_pucch_scheduling(module_idP, UE_id, frame_txP, slot_txP, num_slots_per_tdd,&pucch_sched);
nr_schedule_uss_dlsch_phytest(module_idP, frame_txP, slot_txP, &UE_list->UE_sched_ctrl[UE_id].sched_pucch[pucch_sched], NULL);
// resetting ta flag
gNB->ta_len = 0;
UE_list->fiveG_connected[UE_id] = false;
......
......@@ -1330,6 +1330,11 @@ int add_new_nr_ue(module_id_t mod_idP, rnti_t rntiP){
int UE_id;
int i;
NR_UE_list_t *UE_list = &RC.nrmac[mod_idP]->UE_list;
NR_COMMON_channels_t *cc = RC.nrmac[mod_idP]->common_channels;
NR_ServingCellConfigCommon_t *scc = cc->ServingCellConfigCommon;
int num_slots_ul = scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSlots;
if (scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols>0)
num_slots_ul++;
LOG_I(MAC, "[gNB %d] Adding UE with rnti %x (next avail %d, num_UEs %d)\n",
mod_idP,
rntiP,
......@@ -1348,6 +1353,7 @@ int add_new_nr_ue(module_id_t mod_idP, rnti_t rntiP){
memset((void *) &UE_list->UE_sched_ctrl[UE_id],
0,
sizeof(NR_UE_sched_ctrl_t));
UE_list->UE_sched_ctrl[UE_id].sched_pucch = (NR_sched_pucch *)malloc(num_slots_ul*sizeof(NR_sched_pucch));
LOG_I(MAC, "gNB %d] Add NR UE_id %d : rnti %x\n",
mod_idP,
UE_id,
......@@ -1420,15 +1426,14 @@ void nr_update_pucch_scheduling(int Mod_idP,
frame_t frameP,
sub_frame_t slotP,
int slots_per_tdd,
NR_sched_pucch *sched_pucch) {
int *pucch_id) {
NR_ServingCellConfigCommon_t *scc = RC.nrmac[Mod_idP]->common_channels->ServingCellConfigCommon;
NR_UE_list_t *UE_list = &RC.nrmac[Mod_idP]->UE_list;
int first_ul_slot_tdd,k;
NR_sched_pucch *curr_pucch;
int first_ul_slot_tdd,k,i;
uint8_t pdsch_to_harq_feedback[8];
int found = 0;
int i = 0;
int nr_ulmix_slots = scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSlots;
if (scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols!=0)
nr_ulmix_slots++;
......@@ -1437,100 +1442,35 @@ void nr_update_pucch_scheduling(int Mod_idP,
NR_SearchSpace__searchSpaceType_PR ss_type = NR_SearchSpace__searchSpaceType_PR_ue_Specific;
get_pdsch_to_harq_feedback(Mod_idP,UE_id,ss_type,pdsch_to_harq_feedback);
// if the list of pucch to be scheduled is empty
if (UE_list->UE_sched_ctrl[UE_id].sched_pucch == NULL) {
sched_pucch->frame = frameP;
sched_pucch->next_sched_pucch = NULL;
sched_pucch->dai_c = 1;
sched_pucch->resource_indicator = 0; // in phytest with only 1 UE we are using just the 1st resource
if ( nr_ulmix_slots > 0 ) {
// for each possible ul or mixed slot
for (k=0; k<nr_ulmix_slots; k++) {
curr_pucch = &UE_list->UE_sched_ctrl[UE_id].sched_pucch[k];
// if there is free room in current pucch structure
if (curr_pucch->dai_c<MAX_ACK_BITS) {
curr_pucch->frame = frameP;
curr_pucch->dai_c++;
curr_pucch->resource_indicator = 0; // in phytest with only 1 UE we are using just the 1st resource
// first pucch occasion in first UL or MIXED slot
first_ul_slot_tdd = scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofDownlinkSlots;
for (k=0; k<nr_ulmix_slots; k++) { // for each possible UL or mixed slot
while (i<8 && found == 0) { // look if timing indicator is among allowed values
if (pdsch_to_harq_feedback[i]==(first_ul_slot_tdd+k)-(slotP % slots_per_tdd))
found = 1;
if (found == 0) i++;
}
if (found == 1) break;
}
if (found == 1) {
// computing slot in which pucch is scheduled
sched_pucch->ul_slot = first_ul_slot_tdd + k + (slotP - (slotP % slots_per_tdd));
sched_pucch->timing_indicator = i; // index in the list of timing indicators
}
else
AssertFatal(1==0,"No Uplink slot available in accordance to allowed timing indicator\n");
}
else
AssertFatal(1==0,"No Uplink Slots in this Frame\n");
UE_list->UE_sched_ctrl[UE_id].sched_pucch = sched_pucch;
}
else { // to be tested
curr_pucch = UE_list->UE_sched_ctrl[UE_id].sched_pucch;
if (curr_pucch->dai_c<MAX_ACK_BITS) { // we are scheduling at most MAX_UCI_BITS harq-ack in the same pucch
while (i<8 && found == 0) { // look if timing indicator is among allowed values for current pucch
if (pdsch_to_harq_feedback[i]==(curr_pucch->ul_slot % slots_per_tdd)-(slotP % slots_per_tdd))
i = 0;
while (i<8 && found == 0) { // look if timing indicator is among allowed values
if (pdsch_to_harq_feedback[i]==(first_ul_slot_tdd+k)-(slotP % slots_per_tdd))
found = 1;
if (found == 0) i++;
}
if (found == 1) { // scheduling this harq-ack in current pucch
sched_pucch = curr_pucch;
sched_pucch->dai_c = 1 + sched_pucch->dai_c;
sched_pucch->timing_indicator = pdsch_to_harq_feedback[i];
}
}
if (curr_pucch->dai_c==MAX_ACK_BITS || found == 0) { // if current pucch is full or no timing indicator allowed
// look for pucch occasions in other UL of mixed slots
for (k=scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofDownlinkSlots; k<slots_per_tdd; k++) { // for each possible UL or mixed slot
if (k!=(curr_pucch->ul_slot % slots_per_tdd)) { // skip current scheduled slot (already checked)
i = 0;
while (i<8 && found == 0) { // look if timing indicator is among allowed values
if (pdsch_to_harq_feedback[i]==k-(slotP % slots_per_tdd))
found = 1;
if (found == 0) i++;
}
if (found == 1) {
if (k<(curr_pucch->ul_slot % slots_per_tdd)) { // we need to add a pucch occasion before current pucch
sched_pucch->frame = frameP;
sched_pucch->ul_slot = k + (slotP - (slotP % slots_per_tdd));
sched_pucch->next_sched_pucch = curr_pucch;
sched_pucch->dai_c = 1;
sched_pucch->resource_indicator = 0; // in phytest with only 1 UE we are using just the 1st resource
sched_pucch->timing_indicator = pdsch_to_harq_feedback[i];
UE_list->UE_sched_ctrl[UE_id].sched_pucch = sched_pucch;
}
else {
while (curr_pucch->next_sched_pucch != NULL && k!=(curr_pucch->ul_slot % slots_per_tdd))
curr_pucch = curr_pucch->next_sched_pucch;
if (curr_pucch == NULL) { // creating a new item in the list
sched_pucch->frame = frameP;
sched_pucch->next_sched_pucch = NULL;
sched_pucch->dai_c = 1;
sched_pucch->timing_indicator = pdsch_to_harq_feedback[i];
sched_pucch->resource_indicator = 0; // in phytest with only 1 UE we are using just the 1st resource
sched_pucch->ul_slot = k + (slotP - (slotP % slots_per_tdd));
curr_pucch->next_sched_pucch = (NR_sched_pucch*) malloc(sizeof(NR_sched_pucch));
curr_pucch->next_sched_pucch = sched_pucch;
}
else {
if (curr_pucch->dai_c==MAX_ACK_BITS)
found = 0; // if pucch at index k is already full we have to find a new one in a following occasion
else { // scheduling this harq-ack in current pucch
sched_pucch = curr_pucch;
sched_pucch->dai_c = 1 + sched_pucch->dai_c;
sched_pucch->timing_indicator = pdsch_to_harq_feedback[i];
}
}
}
}
}
if (found == 1) {
// computing slot in which pucch is scheduled
curr_pucch->ul_slot = first_ul_slot_tdd + k + (slotP - (slotP % slots_per_tdd));
curr_pucch->timing_indicator = i; // index in the list of timing indicators
*pucch_id = k;
return;
}
}
}
AssertFatal(1==0,"No Uplink slot available in accordance to allowed timing indicator\n");
}
void find_monitoring_periodicity_offset_common(NR_SearchSpace_t *ss,
uint16_t *slot_period,
uint16_t *offset) {
......
......@@ -157,7 +157,7 @@ void nr_update_pucch_scheduling(int Mod_idP,
frame_t frameP,
sub_frame_t slotP,
int slots_per_tdd,
NR_sched_pucch *sched_pucch);
int *pucch_id);
void get_pdsch_to_harq_feedback(int Mod_idP,
int UE_id,
......
......@@ -185,7 +185,6 @@ typedef struct NR_sched_pucch {
uint8_t dai_c;
uint8_t timing_indicator;
uint8_t resource_indicator;
struct NR_sched_pucch *next_sched_pucch;
} NR_sched_pucch;
typedef struct NR_UE_harq {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment