Commit 3284e42e authored by Francesco Mani's avatar Francesco Mani

Merge branch 'NR_RRC_harq_hacks' of...

Merge branch 'NR_RRC_harq_hacks' of https://gitlab.eurecom.fr/oai/openairinterface5g into NR_RRC_harq_hacks
parents b0ff2cef 6fb19586
...@@ -113,8 +113,8 @@ int nr_phy_init_RU(RU_t *ru) { ...@@ -113,8 +113,8 @@ int nr_phy_init_RU(RU_t *ru) {
ru->prach_rxsigF = (int16_t**)malloc(ru->nb_rx * sizeof(int16_t*)); ru->prach_rxsigF = (int16_t**)malloc(ru->nb_rx * sizeof(int16_t*));
for (i=0; i<ru->nb_rx; i++) { for (i=0; i<ru->nb_rx; i++) {
// for preamble format 1 and 2, more memory should be allocated // largest size for PRACH FFT is 4x98304 (16*24576)
ru->prach_rxsigF[i] = (int16_t*)malloc16_clear( fp->ofdm_symbol_size*12*(1<<mu)*2*sizeof(int16_t) ); ru->prach_rxsigF[i] = (int16_t*)malloc16_clear( 4*98304*2*sizeof(int16_t) );
LOG_D(PHY,"[INIT] prach_vars->rxsigF[%d] = %p\n",i,ru->prach_rxsigF[i]); LOG_D(PHY,"[INIT] prach_vars->rxsigF[%d] = %p\n",i,ru->prach_rxsigF[i]);
} }
......
...@@ -283,6 +283,7 @@ void rx_nr_prach_ru(RU_t *ru, ...@@ -283,6 +283,7 @@ void rx_nr_prach_ru(RU_t *ru,
if (prachFormat == 0 || prachFormat == 1 || prachFormat == 2) { if (prachFormat == 0 || prachFormat == 1 || prachFormat == 2) {
dftlen=49152; dftlen=49152;
dft(DFT_49152,prach2,rxsigF[aa],1); dft(DFT_49152,prach2,rxsigF[aa],1);
LOG_M("prach_rxsigF.m","prach_rxF0",rxsigF[aa],49152,1,1);
} }
if (prachFormat == 1 || prachFormat == 2) { if (prachFormat == 1 || prachFormat == 2) {
dft(DFT_49152,prach2+98304,rxsigF[aa]+98304,1); dft(DFT_49152,prach2+98304,rxsigF[aa]+98304,1);
...@@ -503,6 +504,7 @@ void rx_nr_prach_ru(RU_t *ru, ...@@ -503,6 +504,7 @@ void rx_nr_prach_ru(RU_t *ru,
} }
//Coherent combining of PRACH repetitions (assumes channel does not change, to be revisted for "long" PRACH) //Coherent combining of PRACH repetitions (assumes channel does not change, to be revisted for "long" PRACH)
LOG_D(PHY,"Doing PRACH combining of %d reptitions N_ZC %d\n",reps,N_ZC);
int16_t rxsigF_tmp[N_ZC<<1]; int16_t rxsigF_tmp[N_ZC<<1];
// if (k+N_ZC > dftlen) { // PRACH signal is split around DC // if (k+N_ZC > dftlen) { // PRACH signal is split around DC
int16_t *rxsigF2=rxsigF[aa]; int16_t *rxsigF2=rxsigF[aa];
...@@ -583,8 +585,6 @@ void rx_nr_prach(PHY_VARS_gNB *gNB, ...@@ -583,8 +585,6 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
restricted_set = cfg->restricted_set_config.value; restricted_set = cfg->restricted_set_config.value;
AssertFatal(prach_sequence_length == 1, "no support yet for long prachSequenceLength\n");
uint8_t prach_fmt = prach_pdu->prach_format; uint8_t prach_fmt = prach_pdu->prach_format;
uint16_t N_ZC = (prach_sequence_length==0)?839:139; uint16_t N_ZC = (prach_sequence_length==0)?839:139;
...@@ -624,7 +624,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB, ...@@ -624,7 +624,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
if (LOG_DEBUGFLAG(PRACH)){ if (LOG_DEBUGFLAG(PRACH)){
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],(N_ZC==839) ? 840: 140)); int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],(N_ZC==839) ? 840: 140));
if (en>60) LOG_I(PHY,"frame %d, subframe %d : Trying preamble %d \n",frame,subframe,preamble_index); if (en>60) LOG_D(PHY,"frame %d, subframe %d : Trying preamble %d \n",frame,subframe,preamble_index);
} }
if (restricted_set == 0) { if (restricted_set == 0) {
// This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index // This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index
...@@ -750,10 +750,10 @@ void rx_nr_prach(PHY_VARS_gNB *gNB, ...@@ -750,10 +750,10 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
prach_ifft[i] += ((int32_t)prach_ifft_tmp[i<<1]*(int32_t)prach_ifft_tmp[(i<<1)] + (int32_t)prach_ifft_tmp[1+(i<<1)]*(int32_t)prach_ifft_tmp[1+(i<<1)])>>10; prach_ifft[i] += ((int32_t)prach_ifft_tmp[i<<1]*(int32_t)prach_ifft_tmp[(i<<1)] + (int32_t)prach_ifft_tmp[1+(i<<1)]*(int32_t)prach_ifft_tmp[1+(i<<1)])>>10;
} }
if (LOG_DUMPFLAG(PRACH)) { // if (LOG_DUMPFLAG(PRACH)) {
if (aa==0) LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); if (aa==0) LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1);
if (aa==1) LOG_M("prach_rxF_comp1.m","prach_rxF_comp1",prachF,1024,1,1); if (aa==1) LOG_M("prach_rxF_comp1.m","prach_rxF_comp1",prachF,1024,1,1);
} // }
}// antennas_rx }// antennas_rx
} // new dft } // new dft
...@@ -766,6 +766,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB, ...@@ -766,6 +766,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
lev = (int32_t)prach_ifft[(preamble_shift2+i)]; lev = (int32_t)prach_ifft[(preamble_shift2+i)];
levdB = dB_fixed_times10(lev); levdB = dB_fixed_times10(lev);
if (levdB>*max_preamble_energy) { if (levdB>*max_preamble_energy) {
LOG_D(PHY,"preamble_index %d, delay %d en %d dB > %d dB\n",preamble_index,i,levdB,*max_preamble_energy);
*max_preamble_energy = levdB; *max_preamble_energy = levdB;
*max_preamble_delay = i; // Note: This has to be normalized to the 30.72 Ms/s sampling rate *max_preamble_delay = i; // Note: This has to be normalized to the 30.72 Ms/s sampling rate
*max_preamble = preamble_index; *max_preamble = preamble_index;
......
...@@ -60,6 +60,7 @@ extern int64_t table_6_3_3_2_4_prachConfig_Index [256][10]; ...@@ -60,6 +60,7 @@ extern int64_t table_6_3_3_2_4_prachConfig_Index [256][10];
extern uint16_t nr_du[838]; extern uint16_t nr_du[838];
extern int16_t nr_ru[2*839]; extern int16_t nr_ru[2*839];
extern const char *prachfmt[9]; extern const char *prachfmt[9];
extern const char *prachfmt03[4];
// Note: // Note:
// - prach_fmt_id is an ID used to map to the corresponding PRACH format value in prachfmt // - prach_fmt_id is an ID used to map to the corresponding PRACH format value in prachfmt
...@@ -210,9 +211,9 @@ int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, uint8_t slot){ ...@@ -210,9 +211,9 @@ int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, uint8_t slot){
#ifdef NR_PRACH_DEBUG #ifdef NR_PRACH_DEBUG
if (NCS>0) if (NCS>0)
LOG_I(PHY, "PRACH [UE %d] generate PRACH for RootSeqIndex %d, Preamble Index %d, PRACH Format %s, NCS %d (N_ZC %d): Preamble_offset %d, Preamble_shift %d\n", Mod_id, LOG_I(PHY, "PRACH [UE %d] generate PRACH for RootSeqIndex %d, Preamble Index %d, PRACH Format %s, NCS %d (N_ZC %d): Preamble_offset %d, Preamble_shift %d\n", Mod_id,
rootSequenceIndex, rootSequenceIndex,
preamble_index, preamble_index,
prachfmt[prach_fmt_id], prach_sequence_length == 0 ? prachfmt03[prach_fmt_id] : prachfmt[prach_fmt_id],
NCS, NCS,
N_ZC, N_ZC,
preamble_offset, preamble_offset,
......
lte_dfts_sse4: lte_dfts.c oai_dfts_sse4: oai_dfts.c
gcc-7 -O3 -std=gnu99 -msse4.1 -o lte_dfts_sse4 lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS gcc-7 -O3 -std=gnu99 -msse4.1 -o oai_dfts_sse4 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
lte_dfts_avx2: lte_dfts.c oai_dfts_avx2: oai_dfts.c
gcc -O2 -std=gnu99 -mavx2 -g -ggdb -o lte_dfts_avx2 lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS gcc -O2 -std=gnu99 -mavx2 -g -ggdb -o oai_dfts_avx2 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
lte_dfts_avx2.s: lte_dfts.c oai_dfts_avx2.s: oai_dfts.c
gcc -O2 -std=gnu99 -mavx2 -S lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS gcc -O2 -std=gnu99 -mavx2 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
lte_dfts_sse4.s: lte_dfts.c oai_dfts_sse4.s: oai_dfts.c
gcc -O2 -std=gnu99 -msse4.1 -S lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS gcc -O2 -std=gnu99 -msse4.1 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
dft_cycles_avx2: lte_dfts_avx2 dft_cycles_avx2: oai_dfts_avx2
./lte_dfts_avx2 | egrep cycles ./oai_dfts_avx2 | egrep cycles
...@@ -5353,6 +5353,607 @@ void idft8192(int16_t *x,int16_t *y,unsigned char scale) ...@@ -5353,6 +5353,607 @@ void idft8192(int16_t *x,int16_t *y,unsigned char scale)
#endif #endif
int16_t tw16384[3*2*4096];
#ifndef __AVX2__
void dft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd_q15_t xtmp[4096],ytmp[4096],*tw16384_128p=(simd_q15_t *)tw16384,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<4096; i+=4,j++) {
transpose16_ooff(x128+i,xtmp+j,1024);
}
dft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
dft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
dft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
dft4096((int16_t*)(xtmp+3072),(int16_t*)(ytmp+3072),1);
for (i=0; i<1024; i++) {
bfly4(ytmpp,ytmpp+1024,ytmpp+2048,ytmpp+3072,
y128p,y128p+1024,y128p+2048,y128p+3072,
tw16384_128p,tw16384_128p+1024,tw16384_128p+2048);
tw16384_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<256; i++) {
y128[0] = shiftright_int16(y128[0],1);
y128[1] = shiftright_int16(y128[1],1);
y128[2] = shiftright_int16(y128[2],1);
y128[3] = shiftright_int16(y128[3],1);
y128[4] = shiftright_int16(y128[4],1);
y128[5] = shiftright_int16(y128[5],1);
y128[6] = shiftright_int16(y128[6],1);
y128[7] = shiftright_int16(y128[7],1);
y128[8] = shiftright_int16(y128[8],1);
y128[9] = shiftright_int16(y128[9],1);
y128[10] = shiftright_int16(y128[10],1);
y128[11] = shiftright_int16(y128[11],1);
y128[12] = shiftright_int16(y128[12],1);
y128[13] = shiftright_int16(y128[13],1);
y128[14] = shiftright_int16(y128[14],1);
y128[15] = shiftright_int16(y128[15],1);
y128+=16;
}
}
_mm_empty();
_m_empty();
}
void idft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd_q15_t xtmp[4096],ytmp[4096],*tw16384_128p=(simd_q15_t *)tw16384,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<4096; i+=4,j++) {
transpose16_ooff(x128+i,xtmp+j,1024);
}
idft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
idft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
idft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
idft4096((int16_t*)(xtmp+3072),(int16_t*)(ytmp+3072),1);
for (i=0; i<1024; i++) {
ibfly4(ytmpp,ytmpp+1024,ytmpp+2048,ytmpp+3072,
y128p,y128p+1024,y128p+2048,y128p+3072,
tw16384_128p,tw16384_128p+1024,tw16384_128p+2048);
tw16384_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<256; i++) {
y128[0] = shiftright_int16(y128[0],scale);
y128[1] = shiftright_int16(y128[1],scale);
y128[2] = shiftright_int16(y128[2],scale);
y128[3] = shiftright_int16(y128[3],scale);
y128[4] = shiftright_int16(y128[4],scale);
y128[5] = shiftright_int16(y128[5],scale);
y128[6] = shiftright_int16(y128[6],scale);
y128[7] = shiftright_int16(y128[7],scale);
y128[8] = shiftright_int16(y128[8],scale);
y128[9] = shiftright_int16(y128[9],scale);
y128[10] = shiftright_int16(y128[10],scale);
y128[11] = shiftright_int16(y128[11],scale);
y128[12] = shiftright_int16(y128[12],scale);
y128[13] = shiftright_int16(y128[13],scale);
y128[14] = shiftright_int16(y128[14],scale);
y128[15] = shiftright_int16(y128[15],scale);
y128+=16;
}
}
_mm_empty();
_m_empty();
}
#else //__AVX2__
void dft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[2048],ytmp[2048],*tw16384_256p=(simd256_q15_t *)tw16384,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<2048; i+=4,j++) {
transpose16_ooff_simd256(x256+i,xtmp+j,512);
}
dft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
dft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
dft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
dft4096((int16_t*)(xtmp+1536),(int16_t*)(ytmp+1536),1);
for (i=0; i<512; i++) {
bfly4_256(ytmpp,ytmpp+512,ytmpp+1024,ytmpp+1536,
y256p,y256p+512,y256p+1024,y256p+1536,
tw16384_256p,tw16384_256p+512,tw16384_256p+1024);
tw16384_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<128; i++) {
y256[0] = shiftright_int16_simd256(y256[0],1);
y256[1] = shiftright_int16_simd256(y256[1],1);
y256[2] = shiftright_int16_simd256(y256[2],1);
y256[3] = shiftright_int16_simd256(y256[3],1);
y256[4] = shiftright_int16_simd256(y256[4],1);
y256[5] = shiftright_int16_simd256(y256[5],1);
y256[6] = shiftright_int16_simd256(y256[6],1);
y256[7] = shiftright_int16_simd256(y256[7],1);
y256[8] = shiftright_int16_simd256(y256[8],1);
y256[9] = shiftright_int16_simd256(y256[9],1);
y256[10] = shiftright_int16_simd256(y256[10],1);
y256[11] = shiftright_int16_simd256(y256[11],1);
y256[12] = shiftright_int16_simd256(y256[12],1);
y256[13] = shiftright_int16_simd256(y256[13],1);
y256[14] = shiftright_int16_simd256(y256[14],1);
y256[15] = shiftright_int16_simd256(y256[15],1);
y256+=16;
}
}
_mm_empty();
_m_empty();
}
void idft16384(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[2048],ytmp[2048],*tw16384_256p=(simd256_q15_t *)tw16384,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i,j;
for (i=0,j=0; i<2048; i+=4,j++) {
transpose16_ooff_simd256(x256+i,xtmp+j,512);
}
idft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
idft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
idft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
idft4096((int16_t*)(xtmp+1536),(int16_t*)(ytmp+1536),1);
for (i=0; i<512; i++) {
ibfly4_256(ytmpp,ytmpp+512,ytmpp+1024,ytmpp+1536,
y256p,y256p+512,y256p+1024,y256p+1536,
tw16384_256p,tw16384_256p+512,tw16384_256p+1024);
tw16384_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
for (i=0; i<128; i++) {
y256[0] = shiftright_int16_simd256(y256[0],1);
y256[1] = shiftright_int16_simd256(y256[1],1);
y256[2] = shiftright_int16_simd256(y256[2],1);
y256[3] = shiftright_int16_simd256(y256[3],1);
y256[4] = shiftright_int16_simd256(y256[4],1);
y256[5] = shiftright_int16_simd256(y256[5],1);
y256[6] = shiftright_int16_simd256(y256[6],1);
y256[7] = shiftright_int16_simd256(y256[7],1);
y256[8] = shiftright_int16_simd256(y256[8],1);
y256[9] = shiftright_int16_simd256(y256[9],1);
y256[10] = shiftright_int16_simd256(y256[10],1);
y256[11] = shiftright_int16_simd256(y256[11],1);
y256[12] = shiftright_int16_simd256(y256[12],1);
y256[13] = shiftright_int16_simd256(y256[13],1);
y256[14] = shiftright_int16_simd256(y256[14],1);
y256[15] = shiftright_int16_simd256(y256[15],1);
y256+=16;
}
}
_mm_empty();
_m_empty();
}
#endif //__AVX2__
int16_t tw32768[2*16384] __attribute__((aligned(32)));
#ifndef __AVX2__
void dft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simdshort_q15_t xtmp[16384],*xtmpp,*x64 = (simdshort_q15_t *)x;
simd_q15_t ytmp[8192],*tw32768_128p=(simd_q15_t *)tw32768,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i;
simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<256; i++) {
transpose4_ooff(x64 ,xtmpp,8192);
transpose4_ooff(x64+2,xtmpp+1,8192);
transpose4_ooff(x64+4,xtmpp+2,8192);
transpose4_ooff(x64+6,xtmpp+3,8192);
transpose4_ooff(x64+8,xtmpp+4,8192);
transpose4_ooff(x64+10,xtmpp+5,8192);
transpose4_ooff(x64+12,xtmpp+6,8192);
transpose4_ooff(x64+14,xtmpp+7,8192);
transpose4_ooff(x64+16,xtmpp+8,8192);
transpose4_ooff(x64+18,xtmpp+9,8192);
transpose4_ooff(x64+20,xtmpp+10,8192);
transpose4_ooff(x64+22,xtmpp+11,8192);
transpose4_ooff(x64+24,xtmpp+12,8192);
transpose4_ooff(x64+26,xtmpp+13,8192);
transpose4_ooff(x64+28,xtmpp+14,8192);
transpose4_ooff(x64+30,xtmpp+15,8192);
transpose4_ooff(x64+32,xtmpp+16,8192);
transpose4_ooff(x64+34,xtmpp+17,8192);
transpose4_ooff(x64+36,xtmpp+18,8192);
transpose4_ooff(x64+38,xtmpp+19,8192);
transpose4_ooff(x64+40,xtmpp+20,8192);
transpose4_ooff(x64+42,xtmpp+21,8192);
transpose4_ooff(x64+44,xtmpp+22,8192);
transpose4_ooff(x64+46,xtmpp+23,8192);
transpose4_ooff(x64+48,xtmpp+24,8192);
transpose4_ooff(x64+50,xtmpp+25,8192);
transpose4_ooff(x64+52,xtmpp+26,8192);
transpose4_ooff(x64+54,xtmpp+27,8192);
transpose4_ooff(x64+56,xtmpp+28,8192);
transpose4_ooff(x64+58,xtmpp+29,8192);
transpose4_ooff(x64+60,xtmpp+30,8192);
transpose4_ooff(x64+62,xtmpp+31,8192);
x64+=64;
xtmpp+=32;
}
dft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
dft16384((int16_t*)(xtmp+8192),(int16_t*)(ytmp+4096),1);
for (i=0; i<4096; i++) {
bfly2(ytmpp,ytmpp+4096,
y128p,y128p+4096,
tw32768_128p);
tw32768_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
y128p = y128;
for (i=0; i<512; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
void idft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simdshort_q15_t xtmp[16384],*xtmpp,*x64 = (simdshort_q15_t *)x;
simd_q15_t ytmp[8192],*tw32768_128p=(simd_q15_t *)tw32768,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
simd_q15_t *ytmpp = &ytmp[0];
int i;
simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<256; i++) {
transpose4_ooff(x64 ,xtmpp,8192);
transpose4_ooff(x64+2,xtmpp+1,8192);
transpose4_ooff(x64+4,xtmpp+2,8192);
transpose4_ooff(x64+6,xtmpp+3,8192);
transpose4_ooff(x64+8,xtmpp+4,8192);
transpose4_ooff(x64+10,xtmpp+5,8192);
transpose4_ooff(x64+12,xtmpp+6,8192);
transpose4_ooff(x64+14,xtmpp+7,8192);
transpose4_ooff(x64+16,xtmpp+8,8192);
transpose4_ooff(x64+18,xtmpp+9,8192);
transpose4_ooff(x64+20,xtmpp+10,8192);
transpose4_ooff(x64+22,xtmpp+11,8192);
transpose4_ooff(x64+24,xtmpp+12,8192);
transpose4_ooff(x64+26,xtmpp+13,8192);
transpose4_ooff(x64+28,xtmpp+14,8192);
transpose4_ooff(x64+30,xtmpp+15,8192);
transpose4_ooff(x64+32,xtmpp+16,8192);
transpose4_ooff(x64+34,xtmpp+17,8192);
transpose4_ooff(x64+36,xtmpp+18,8192);
transpose4_ooff(x64+38,xtmpp+19,8192);
transpose4_ooff(x64+40,xtmpp+20,8192);
transpose4_ooff(x64+42,xtmpp+21,8192);
transpose4_ooff(x64+44,xtmpp+22,8192);
transpose4_ooff(x64+46,xtmpp+23,8192);
transpose4_ooff(x64+48,xtmpp+24,8192);
transpose4_ooff(x64+50,xtmpp+25,8192);
transpose4_ooff(x64+52,xtmpp+26,8192);
transpose4_ooff(x64+54,xtmpp+27,8192);
transpose4_ooff(x64+56,xtmpp+28,8192);
transpose4_ooff(x64+58,xtmpp+29,8192);
transpose4_ooff(x64+60,xtmpp+30,8192);
transpose4_ooff(x64+62,xtmpp+31,8192);
x64+=64;
xtmpp+=32;
}
idft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
idft16384((int16_t*)(xtmp+8192),(int16_t*)(ytmp+4096),1);
for (i=0; i<4096; i++) {
ibfly2(ytmpp,ytmpp+4096,
y128p,y128p+4096,
tw32768_128p);
tw32768_128p++;
y128p++;
ytmpp++;
}
if (scale>0) {
y128p = y128;
for (i=0; i<512; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
}
#else // __AVX2__
void dft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[4096],*xtmpp,*x256 = (simd256_q15_t *)x;
simd256_q15_t ytmp[4096],*tw32768_256p=(simd256_q15_t *)tw32768,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i;
simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<256; i++) {
transpose4_ooff_simd256(x256 ,xtmpp,2048);
transpose4_ooff_simd256(x256+2,xtmpp+1,2048);
transpose4_ooff_simd256(x256+4,xtmpp+2,2048);
transpose4_ooff_simd256(x256+6,xtmpp+3,2048);
transpose4_ooff_simd256(x256+8,xtmpp+4,2048);
transpose4_ooff_simd256(x256+10,xtmpp+5,2048);
transpose4_ooff_simd256(x256+12,xtmpp+6,2048);
transpose4_ooff_simd256(x256+14,xtmpp+7,2048);
transpose4_ooff_simd256(x256+16,xtmpp+8,2048);
transpose4_ooff_simd256(x256+18,xtmpp+9,2048);
transpose4_ooff_simd256(x256+20,xtmpp+10,2048);
transpose4_ooff_simd256(x256+22,xtmpp+11,2048);
transpose4_ooff_simd256(x256+24,xtmpp+12,2048);
transpose4_ooff_simd256(x256+26,xtmpp+13,2048);
transpose4_ooff_simd256(x256+28,xtmpp+14,2048);
transpose4_ooff_simd256(x256+30,xtmpp+15,2048);
transpose4_ooff_simd256(x256+32,xtmpp+16,2048);
transpose4_ooff_simd256(x256+34,xtmpp+17,2048);
transpose4_ooff_simd256(x256+36,xtmpp+18,2048);
transpose4_ooff_simd256(x256+38,xtmpp+19,2048);
transpose4_ooff_simd256(x256+40,xtmpp+20,2048);
transpose4_ooff_simd256(x256+42,xtmpp+21,2048);
transpose4_ooff_simd256(x256+44,xtmpp+22,2048);
transpose4_ooff_simd256(x256+46,xtmpp+23,2048);
transpose4_ooff_simd256(x256+48,xtmpp+24,2048);
transpose4_ooff_simd256(x256+50,xtmpp+25,2048);
transpose4_ooff_simd256(x256+52,xtmpp+26,2048);
transpose4_ooff_simd256(x256+54,xtmpp+27,2048);
transpose4_ooff_simd256(x256+56,xtmpp+28,2048);
transpose4_ooff_simd256(x256+58,xtmpp+29,2048);
transpose4_ooff_simd256(x256+60,xtmpp+30,2048);
transpose4_ooff_simd256(x256+62,xtmpp+31,2048);
x256+=64;
xtmpp+=32;
}
dft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
dft16384((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
for (i=0; i<2048; i++) {
bfly2_256(ytmpp,ytmpp+2048,
y256p,y256p+2048,
tw32768_256p);
tw32768_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
y256p = y256;
for (i=0; i<64; i++) {
y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
y256p+=16;
}
}
_mm_empty();
_m_empty();
}
void idft32768(int16_t *x,int16_t *y,unsigned char scale)
{
simd256_q15_t xtmp[4096],*xtmpp,*x256 = (simd256_q15_t *)x;
simd256_q15_t ytmp[4096],*tw32768_256p=(simd256_q15_t *)tw32768,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
simd256_q15_t *ytmpp = &ytmp[0];
int i;
simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
xtmpp = xtmp;
for (i=0; i<64; i++) {
transpose4_ooff_simd256(x256 ,xtmpp,2048);
transpose4_ooff_simd256(x256+2,xtmpp+1,2048);
transpose4_ooff_simd256(x256+4,xtmpp+2,2048);
transpose4_ooff_simd256(x256+6,xtmpp+3,2048);
transpose4_ooff_simd256(x256+8,xtmpp+4,2048);
transpose4_ooff_simd256(x256+10,xtmpp+5,2048);
transpose4_ooff_simd256(x256+12,xtmpp+6,2048);
transpose4_ooff_simd256(x256+14,xtmpp+7,2048);
transpose4_ooff_simd256(x256+16,xtmpp+8,2048);
transpose4_ooff_simd256(x256+18,xtmpp+9,2048);
transpose4_ooff_simd256(x256+20,xtmpp+10,2048);
transpose4_ooff_simd256(x256+22,xtmpp+11,2048);
transpose4_ooff_simd256(x256+24,xtmpp+12,2048);
transpose4_ooff_simd256(x256+26,xtmpp+13,2048);
transpose4_ooff_simd256(x256+28,xtmpp+14,2048);
transpose4_ooff_simd256(x256+30,xtmpp+15,2048);
transpose4_ooff_simd256(x256+32,xtmpp+16,2048);
transpose4_ooff_simd256(x256+34,xtmpp+17,2048);
transpose4_ooff_simd256(x256+36,xtmpp+18,2048);
transpose4_ooff_simd256(x256+38,xtmpp+19,2048);
transpose4_ooff_simd256(x256+40,xtmpp+20,2048);
transpose4_ooff_simd256(x256+42,xtmpp+21,2048);
transpose4_ooff_simd256(x256+44,xtmpp+22,2048);
transpose4_ooff_simd256(x256+46,xtmpp+23,2048);
transpose4_ooff_simd256(x256+48,xtmpp+24,2048);
transpose4_ooff_simd256(x256+50,xtmpp+25,2048);
transpose4_ooff_simd256(x256+52,xtmpp+26,2048);
transpose4_ooff_simd256(x256+54,xtmpp+27,2048);
transpose4_ooff_simd256(x256+56,xtmpp+28,2048);
transpose4_ooff_simd256(x256+58,xtmpp+29,2048);
transpose4_ooff_simd256(x256+60,xtmpp+30,2048);
transpose4_ooff_simd256(x256+62,xtmpp+31,2048);
x256+=64;
xtmpp+=32;
}
idft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
idft16384((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
for (i=0; i<2048; i++) {
ibfly2_256(ytmpp,ytmpp+2048,
y256p,y256p+2048,
tw32768_256p);
tw32768_256p++;
y256p++;
ytmpp++;
}
if (scale>0) {
y256p = y256;
for (i=0; i<256; i++) {
y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
y256p+=16;
}
}
_mm_empty();
_m_empty();
}
#endif
int16_t twa1536[1024],twb1536[1024]; int16_t twa1536[1024],twb1536[1024];
// 512 x 3 // 512 x 3
...@@ -6176,12 +6777,99 @@ int16_t twb49152[32768] __attribute__((aligned(32))); ...@@ -6176,12 +6777,99 @@ int16_t twb49152[32768] __attribute__((aligned(32)));
// 16384 x 3 // 16384 x 3
void dft49152(int16_t *input, int16_t *output,uint8_t scale) { void dft49152(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n"); int i,i2,j;
uint32_t tmp[3][16384] __attribute__((aligned(32)));
uint32_t tmpo[3][16384] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<16384; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
dft16384((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
dft16384((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
dft16384((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<32768; i+=8,i2+=4) {
bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+32768+i),(simd_q15_t*)(output+65536+i),
(simd_q15_t*)(twa49152+i),(simd_q15_t*)(twb49152+i));
}
if (scale==1) {
for (i=0; i<768; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
} }
void idft49152(int16_t *input, int16_t *output,uint8_t scale) { void idft49152(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n"); int i,i2,j;
uint32_t tmp[3][16384] __attribute__((aligned(32)));
uint32_t tmpo[3][16384] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<16384; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
idft16384((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
idft16384((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
idft16384((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<32768; i+=8,i2+=4) {
ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+32768+i),(simd_q15_t*)(output+65536+i),
(simd_q15_t*)(twa49152+i),(simd_q15_t*)(twb49152+i));
}
if (scale==1) {
for (i=0; i<768; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
} }
int16_t twa73728[49152] __attribute__((aligned(32))); int16_t twa73728[49152] __attribute__((aligned(32)));
...@@ -6198,17 +6886,104 @@ void idft73728(int16_t *input, int16_t *output,uint8_t scale) { ...@@ -6198,17 +6886,104 @@ void idft73728(int16_t *input, int16_t *output,uint8_t scale) {
} }
int16_t twa98304[49152] __attribute__((aligned(32))); int16_t twa98304[65536] __attribute__((aligned(32)));
int16_t twb98304[49152] __attribute__((aligned(32))); int16_t twb98304[65536] __attribute__((aligned(32)));
// 32768 x 3 // 32768 x 3
void dft98304(int16_t *input, int16_t *output,uint8_t scale) { void dft98304(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n"); int i,i2,j;
uint32_t tmp[3][32768] __attribute__((aligned(32)));
uint32_t tmpo[3][32768] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<32768; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
dft32768((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
dft32768((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
dft32768((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<65536; i+=8,i2+=4) {
bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+65536+i),(simd_q15_t*)(output+131072+i),
(simd_q15_t*)(twa98304+i),(simd_q15_t*)(twb98304+i));
}
if (scale==1) {
for (i=0; i<1536; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
} }
void idft98304(int16_t *input, int16_t *output,uint8_t scale) { void idft98304(int16_t *input, int16_t *output,uint8_t scale) {
AssertFatal(1==0,"Need to do this ..\n"); int i,i2,j;
uint32_t tmp[3][32768] __attribute__((aligned(32)));
uint32_t tmpo[3][32768] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<32768; i++) {
tmp[0][i] = ((uint32_t *)input)[j++];
tmp[1][i] = ((uint32_t *)input)[j++];
tmp[2][i] = ((uint32_t *)input)[j++];
}
idft32768((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
idft32768((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
idft32768((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<65536; i+=8,i2+=4) {
ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
(simd_q15_t*)(output+i),(simd_q15_t*)(output+65536+i),(simd_q15_t*)(output+131072+i),
(simd_q15_t*)(twa98304+i),(simd_q15_t*)(twb98304+i));
}
if (scale==1) {
for (i=0; i<1536; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty();
_m_empty();
} }
...@@ -8823,6 +9598,8 @@ int dfts_autoinit(void) ...@@ -8823,6 +9598,8 @@ int dfts_autoinit(void)
init_rad2(2048,tw2048); init_rad2(2048,tw2048);
init_rad4(4096,tw4096); init_rad4(4096,tw4096);
init_rad2(8192,tw8192); init_rad2(8192,tw8192);
init_rad4(16384,tw16384);
init_rad2(32768,tw32768);
init_rad3(1536,twa1536,twb1536); init_rad3(1536,twa1536,twb1536);
init_rad3(3072,twa3072,twb3072); init_rad3(3072,twa3072,twb3072);
...@@ -8830,6 +9607,9 @@ int dfts_autoinit(void) ...@@ -8830,6 +9607,9 @@ int dfts_autoinit(void)
init_rad3(12288,twa12288,twb12288); init_rad3(12288,twa12288,twb12288);
init_rad3(18432,twa18432,twb18432); init_rad3(18432,twa18432,twb18432);
init_rad3(24576,twa24576,twb24576); init_rad3(24576,twa24576,twb24576);
init_rad3(49152,twa49152,twb49152);
init_rad3(98304,twa98304,twb98304);
init_rad2_rep(24,tw24); init_rad2_rep(24,tw24);
init_rad3_rep(36,twa36,twb36); init_rad3_rep(36,twa36,twb36);
...@@ -8869,6 +9649,7 @@ int dfts_autoinit(void) ...@@ -8869,6 +9649,7 @@ int dfts_autoinit(void)
#ifndef MR_MAIN
void dft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){ void dft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<(int)DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx); AssertFatal((sizeidx>=0 && sizeidx<(int)DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx);
...@@ -8879,6 +9660,7 @@ void idft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){ ...@@ -8879,6 +9660,7 @@ void idft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){
AssertFatal((sizeidx>=0 && sizeidx<(int)IDFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx); AssertFatal((sizeidx>=0 && sizeidx<(int)IDFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx);
idft_ftab[sizeidx](sigF,sig,scale_flag); idft_ftab[sizeidx](sigF,sig,scale_flag);
}; };
#endif
/*---------------------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------------------*/
...@@ -9049,9 +9831,9 @@ int main(int argc, char**argv) ...@@ -9049,9 +9831,9 @@ int main(int argc, char**argv)
time_stats_t ts; time_stats_t ts;
#ifdef __AVX2__ #ifdef __AVX2__
simd256_q15_t x[4096],x2[4096],y[4096],tw0,tw1,tw2,tw3; simd256_q15_t x[16384],x2[16384],y[16384],tw0,tw1,tw2,tw3;
#else #else
simd_q15_t x[8192],y[8192],tw0,tw1,tw2,tw3; simd_q15_t x[32768],y[32768],tw0,tw1,tw2,tw3;
#endif #endif
int i; int i;
simd_q15_t *x128=(simd_q15_t*)x,*y128=(simd_q15_t*)y; simd_q15_t *x128=(simd_q15_t*)x,*y128=(simd_q15_t*)y;
...@@ -9613,10 +10395,34 @@ int main(int argc, char**argv) ...@@ -9613,10 +10395,34 @@ int main(int argc, char**argv)
stop_meas(&ts); stop_meas(&ts);
} }
printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); printf("\n\n8192-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
LOG_M("y8192.m","y8192",y,8192,1,1); LOG_M("y8192.m","y8192",y,8192,1,1);
LOG_M("x8192.m","x8192",x,8192,1,1); LOG_M("x8192.m","x8192",x,8192,1,1);
memset((void*)x,0,16384*sizeof(int32_t));
for (i=2;i<9602;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
for (i=2*(16384-4800);i<32768;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
reset_meas(&ts);
for (i=0; i<10000; i++) {
start_meas(&ts);
dft16384((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts);
}
printf("\n\n16384-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
LOG_M("y16384.m","y16384",y,16384,1,1);
LOG_M("x16384.m","x16384",x,16384,1,1);
memset((void*)x,0,1536*sizeof(int32_t)); memset((void*)x,0,1536*sizeof(int32_t));
for (i=2;i<1202;i++) { for (i=2;i<1202;i++) {
if ((taus() & 1)==0) if ((taus() & 1)==0)
...@@ -9765,6 +10571,30 @@ int main(int argc, char**argv) ...@@ -9765,6 +10571,30 @@ int main(int argc, char**argv)
LOG_M("y24576.m","y24576",y,24576,1,1); LOG_M("y24576.m","y24576",y,24576,1,1);
LOG_M("x24576.m","x24576",x,24576,1,1); LOG_M("x24576.m","x24576",x,24576,1,1);
memset((void*)x,0,49152*sizeof(int32_t));
for (i=2;i<28402;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
for (i=2*(49152-14400);i<98304;i++) {
if ((taus() & 1)==0)
((int16_t*)x)[i] = 364;
else
((int16_t*)x)[i] = -364;
}
reset_meas(&ts);
for (i=0; i<10000; i++) {
start_meas(&ts);
idft49152((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts);
}
printf("\n\n49152-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
LOG_M("y49152.m","y49152",y,49152,1,1);
LOG_M("x49152.m","x49152",x,49152,1,1);
/*
int dftsizes[33]={24,36,48,60,72,96,108,120,144,180,192,216,240,288,300,324,360,384,432,480,540,576,600,648,720,768,864,900,960,972,1080,1152,1200}; int dftsizes[33]={24,36,48,60,72,96,108,120,144,180,192,216,240,288,300,324,360,384,432,480,540,576,600,648,720,768,864,900,960,972,1080,1152,1200};
void (*dft)(int16_t *x,int16_t *y,uint8_t scale)[33] = {dft24,dft36,dft48,dft60,dft72,dft96,dft108,dft120,dft144,dft180,dft192,dft216,dft240,dft288,dft300,dft324,dft360,dft384,dft432,dft480,dft540,dft576,dft600,dft648,dft720,dft768,dft864,dft900,dft960,dft972,dft1080,dft1152,dft1200}; void (*dft)(int16_t *x,int16_t *y,uint8_t scale)[33] = {dft24,dft36,dft48,dft60,dft72,dft96,dft108,dft120,dft144,dft180,dft192,dft216,dft240,dft288,dft300,dft324,dft360,dft384,dft432,dft480,dft540,dft576,dft600,dft648,dft720,dft768,dft864,dft900,dft960,dft972,dft1080,dft1152,dft1200};
for (int n=0;n<33;n++) { for (int n=0;n<33;n++) {
...@@ -9797,7 +10627,7 @@ int main(int argc, char**argv) ...@@ -9797,7 +10627,7 @@ int main(int argc, char**argv)
LOG_M(ystr,ystr2,y,dftsizes[n]*4,1,1); LOG_M(ystr,ystr2,y,dftsizes[n]*4,1,1);
LOG_M(xstr,xstr2,x,dftsizes[n]*4,1,1); LOG_M(xstr,xstr2,x,dftsizes[n]*4,1,1);
} }
*/
return(0); return(0);
} }
......
...@@ -89,7 +89,7 @@ int main(int argc, char **argv){ ...@@ -89,7 +89,7 @@ int main(int argc, char **argv){
int i, aa, aarx, **txdata, trial, n_frames = 1, prach_start, rx_prach_start; //, ntrials=1; int i, aa, aarx, **txdata, trial, n_frames = 1, prach_start, rx_prach_start; //, ntrials=1;
int N_RB_UL = 106, delay = 0, NCS_config = 13, rootSequenceIndex = 1, threequarter_fs = 0, mu = 1, fd_occasion = 0, loglvl = OAILOG_INFO, numRA = 0, prachStartSymbol = 0; int N_RB_UL = 106, delay = 0, NCS_config = 13, rootSequenceIndex = 1, threequarter_fs = 0, mu = 1, fd_occasion = 0, loglvl = OAILOG_INFO, numRA = 0, prachStartSymbol = 0;
uint8_t snr1set = 0, ue_speed1set = 0, transmission_mode = 1, n_tx = 1, n_rx = 1, awgn_flag = 0, msg1_frequencystart = 0, num_prach_fd_occasions = 1, prach_format; uint8_t snr1set = 0, ue_speed1set = 0, transmission_mode = 1, n_tx = 1, n_rx = 1, awgn_flag = 0, msg1_frequencystart = 0, num_prach_fd_occasions = 1, prach_format;
uint8_t frame = 1, subframe = 19, config_index = 98, prach_sequence_length = 1, num_root_sequences = 16, restrictedSetConfig = 0, N_dur, N_t_slot, start_symbol; uint8_t frame = 1, subframe = 9, slot=19, config_index = 98, prach_sequence_length = 1, num_root_sequences = 16, restrictedSetConfig = 0, N_dur, N_t_slot, start_symbol;
uint16_t Nid_cell = 0, preamble_tx = 0, preamble_delay, format, format0, format1; uint16_t Nid_cell = 0, preamble_tx = 0, preamble_delay, format, format0, format1;
uint32_t tx_lev = 10000, prach_errors = 0, samp_count; //,tx_lev_dB; uint32_t tx_lev = 10000, prach_errors = 0, samp_count; //,tx_lev_dB;
uint64_t SSB_positions = 0x01, absoluteFrequencyPointA = 640000; uint64_t SSB_positions = 0x01, absoluteFrequencyPointA = 640000;
...@@ -118,7 +118,7 @@ int main(int argc, char **argv){ ...@@ -118,7 +118,7 @@ int main(int argc, char **argv){
randominit(0); randominit(0);
while ((c = getopt (argc, argv, "hHaA:Cr:p:g:n:s:S:t:x:y:v:V:z:N:F:d:Z:L:R:E")) != -1) { while ((c = getopt (argc, argv, "hHaA:Cc:r:p:g:n:s:S:t:x:y:v:V:z:N:F:d:Z:L:R:E")) != -1) {
switch (c) { switch (c) {
case 'a': case 'a':
printf("Running AWGN simulation\n"); printf("Running AWGN simulation\n");
...@@ -127,6 +127,10 @@ int main(int argc, char **argv){ ...@@ -127,6 +127,10 @@ int main(int argc, char **argv){
//ntrials=1; //ntrials=1;
break; break;
case 'c':
config_index = atoi(optarg);
break;
case 'd': case 'd':
delay = atoi(optarg); delay = atoi(optarg);
break; break;
...@@ -313,6 +317,11 @@ int main(int argc, char **argv){ ...@@ -313,6 +317,11 @@ int main(int argc, char **argv){
} }
} }
if (config_index<67) { prach_sequence_length=0; slot = subframe*2; }
printf("Config_index %d, prach_sequence_length %d\n",config_index,prach_sequence_length);
// Configure log // Configure log
logInit(); logInit();
set_glog(loglvl); set_glog(loglvl);
...@@ -373,11 +382,11 @@ int main(int argc, char **argv){ ...@@ -373,11 +382,11 @@ int main(int argc, char **argv){
gNB->gNB_config.prach_config.num_prach_fd_occasions.value = num_prach_fd_occasions; gNB->gNB_config.prach_config.num_prach_fd_occasions.value = num_prach_fd_occasions;
gNB->gNB_config.prach_config.num_prach_fd_occasions_list = (nfapi_nr_num_prach_fd_occasions_t *) malloc(num_prach_fd_occasions*sizeof(nfapi_nr_num_prach_fd_occasions_t)); gNB->gNB_config.prach_config.num_prach_fd_occasions_list = (nfapi_nr_num_prach_fd_occasions_t *) malloc(num_prach_fd_occasions*sizeof(nfapi_nr_num_prach_fd_occasions_t));
gNB->proc.slot_rx = subframe; gNB->proc.slot_rx = slot;
get_nr_prach_info_from_index(config_index, get_nr_prach_info_from_index(config_index,
(int)frame, (int)frame,
(int)subframe, (int)slot,
absoluteFrequencyPointA, absoluteFrequencyPointA,
mu, mu,
frame_parms->frame_type, frame_parms->frame_type,
...@@ -562,7 +571,7 @@ int main(int argc, char **argv){ ...@@ -562,7 +571,7 @@ int main(int argc, char **argv){
UE_nr_rxtx_proc_t proc={0}; UE_nr_rxtx_proc_t proc={0};
proc.frame_tx = frame; proc.frame_tx = frame;
proc.nr_tti_tx = subframe; proc.nr_tti_tx = slot;
nr_ue_prach_procedures(UE,&proc,0,0); nr_ue_prach_procedures(UE,&proc,0,0);
/* tx_lev_dB not used later, no need to set */ /* tx_lev_dB not used later, no need to set */
...@@ -571,19 +580,19 @@ int main(int argc, char **argv){ ...@@ -571,19 +580,19 @@ int main(int argc, char **argv){
if (mu == 0) if (mu == 0)
samp_count = frame_parms->samples_per_subframe; samp_count = frame_parms->samples_per_subframe;
else else
samp_count = (subframe%(frame_parms->slots_per_subframe/2)) ? frame_parms->samples_per_slotN0 : frame_parms->samples_per_slot0; samp_count = ((slot)%(frame_parms->slots_per_subframe/2)) ? frame_parms->samples_per_slotN0 : frame_parms->samples_per_slot0;
prach_start = subframe*samp_count - UE->N_TA_offset; prach_start = slot*samp_count - UE->N_TA_offset;
#ifdef NR_PRACH_DEBUG #ifdef NR_PRACH_DEBUG
LOG_M("txsig0.m", "txs0", &txdata[0][prach_start], samp_count, 1, 1); LOG_M("txsig0.m", "txs0", &txdata[0][prach_start], frame_parms->samples_per_subframe, 1, 1);
//LOG_M("txsig1.m","txs1", txdata[1],FRAME_LENGTH_COMPLEX_SAMPLES,1,1); //LOG_M("txsig1.m","txs1", txdata[1],FRAME_LENGTH_COMPLEX_SAMPLES,1,1);
#endif #endif
// multipath channel // multipath channel
// dump_nr_prach_config(&gNB->frame_parms,subframe); // dump_nr_prach_config(&gNB->frame_parms,subframe);
for (i = 0; i < samp_count<<1; i++) { for (i = 0; i < frame_parms->samples_per_subframe<<1; i++) {
for (aa=0; aa<1; aa++) { for (aa=0; aa<1; aa++) {
if (awgn_flag == 0) { if (awgn_flag == 0) {
s_re[aa][i] = ((double)(((short *)&txdata[aa][prach_start]))[(i<<1)]); s_re[aa][i] = ((double)(((short *)&txdata[aa][prach_start]))[(i<<1)]);
...@@ -618,7 +627,10 @@ int main(int argc, char **argv){ ...@@ -618,7 +627,10 @@ int main(int argc, char **argv){
ue_speed1 = ue_speed0 + 50; ue_speed1 = ue_speed0 + 50;
} }
rx_prach_start = subframe*frame_parms->get_samples_per_slot(subframe,frame_parms); rx_prach_start = slot*frame_parms->get_samples_per_slot(slot,frame_parms);
if (n_frames==1) printf("slot %d, rx_prach_start %d\n",slot,rx_prach_start);
uint16_t preamble_rx, preamble_energy, N_ZC;
N_ZC = prach_sequence_length == 0 ? 839 : 139;
for (SNR=snr0; SNR<snr1; SNR+=.1) { for (SNR=snr0; SNR<snr1; SNR+=.1) {
for (ue_speed=ue_speed0; ue_speed<ue_speed1; ue_speed+=10) { for (ue_speed=ue_speed0; ue_speed<ue_speed1; ue_speed+=10) {
...@@ -630,9 +642,8 @@ int main(int argc, char **argv){ ...@@ -630,9 +642,8 @@ int main(int argc, char **argv){
for (trial=0; trial<n_frames; trial++) { for (trial=0; trial<n_frames; trial++) {
uint16_t preamble_rx, preamble_energy, N_ZC;
sigma2_dB = 10*log10((double)tx_lev) - SNR; sigma2_dB = 10*log10((double)tx_lev) - SNR - 10*log10(N_RB_UL*12/N_ZC);
if (n_frames==1) if (n_frames==1)
printf("sigma2_dB %f (SNR %f dB) tx_lev_dB %f\n",sigma2_dB,SNR,10*log10((double)tx_lev)); printf("sigma2_dB %f (SNR %f dB) tx_lev_dB %f\n",sigma2_dB,SNR,10*log10((double)tx_lev));
...@@ -651,20 +662,20 @@ int main(int argc, char **argv){ ...@@ -651,20 +662,20 @@ int main(int argc, char **argv){
10*log10(tx_lev)); 10*log10(tx_lev));
} }
for (i = 0; i< frame_parms->get_samples_per_slot(subframe,frame_parms); i++) { for (i = 0; i< frame_parms->samples_per_subframe; i++) {
for (aa = 0; aa < frame_parms->nb_antennas_rx; aa++) { for (aa = 0; aa < frame_parms->nb_antennas_rx; aa++) {
((short*) &gNB->common_vars.rxdata[aa][rx_prach_start])[2*i] = (short) (.167*(r_re[aa][i] +sqrt(sigma2/2)*gaussdouble(0.0,1.0))); ((short*) &ru->common.rxdata[aa][rx_prach_start])[2*i] = (short) (.167*(r_re[aa][i] +sqrt(sigma2/2)*gaussdouble(0.0,1.0)));
((short*) &gNB->common_vars.rxdata[aa][rx_prach_start])[2*i+1] = (short) (.167*(r_im[aa][i] + (iqim*r_re[aa][i]) + sqrt(sigma2/2)*gaussdouble(0.0,1.0))); ((short*) &ru->common.rxdata[aa][rx_prach_start])[2*i+1] = (short) (.167*(r_im[aa][i] + (iqim*r_re[aa][i]) + sqrt(sigma2/2)*gaussdouble(0.0,1.0)));
} }
} }
rx_nr_prach_ru(ru, prach_format, numRA, prachStartSymbol, frame, subframe); rx_nr_prach_ru(ru, prach_format, numRA, prachStartSymbol, frame, slot);
gNB->prach_vars.rxsigF = ru->prach_rxsigF; gNB->prach_vars.rxsigF = ru->prach_rxsigF;
rx_nr_prach(gNB, prach_pdu, frame, subframe, &preamble_rx, &preamble_energy, &preamble_delay); rx_nr_prach(gNB, prach_pdu, frame, subframe, &preamble_rx, &preamble_energy, &preamble_delay);
printf(" preamble_energy %d preamble_rx %d preamble_tx %d \n", preamble_energy, preamble_rx, preamble_tx); // printf(" preamble_energy %d preamble_rx %d preamble_tx %d \n", preamble_energy, preamble_rx, preamble_tx);
if (preamble_rx != preamble_tx) if (preamble_rx != preamble_tx)
prach_errors++; prach_errors++;
......
...@@ -21,7 +21,8 @@ ...@@ -21,7 +21,8 @@
#include <time.h> #include <time.h>
#include <stdlib.h> #include <stdlib.h>
#include "SIMULATION/TOOLS/sim.h" //#include "SIMULATION/TOOLS/sim.h"
unsigned int s0, s1, s2, b; unsigned int s0, s1, s2, b;
......
...@@ -46,6 +46,7 @@ uint16_t NCS_restricted_TypeB_delta_f_RA_5[14] = {36,57,60,63,65,68,71,77,81,8 ...@@ -46,6 +46,7 @@ uint16_t NCS_restricted_TypeB_delta_f_RA_5[14] = {36,57,60,63,65,68,71,77,81,8
uint16_t NCS_unrestricted_delta_f_RA_15[16] = {0,2,4,6,8,10,12,13,15,17,19,23,27,34,46,69}; uint16_t NCS_unrestricted_delta_f_RA_15[16] = {0,2,4,6,8,10,12,13,15,17,19,23,27,34,46,69};
const char *prachfmt[]={"A1","A2","A3","B1","B2","B3","B4","C0","C2"}; const char *prachfmt[]={"A1","A2","A3","B1","B2","B3","B4","C0","C2"};
const char *prachfmt03[]={"0","1","2","3"};
uint16_t get_NCS(uint8_t index, uint16_t format0, uint8_t restricted_set_config) { uint16_t get_NCS(uint8_t index, uint16_t format0, uint8_t restricted_set_config) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment