Commit 5dbe3bef authored by Raymond Knopp's avatar Raymond Knopp

debugging and testing of AVX2-optimized DFTS (radix 2/4 only).

parent 47a6fa6e
......@@ -134,7 +134,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2")
endif()
if (CPUINFO MATCHES "sse4_2")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.2")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2 -msse4.2")
endif()
if (CPUINFO MATCHES "sse4_1")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1")
......
......@@ -203,8 +203,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
multadd_complex_vector_real_scalar(dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1),
phy_vars_ue->ch_est_alpha,dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1),
1,phy_vars_ue->lte_frame_parms.ofdm_symbol_size);
#ifdef DEBUG_CH
printf("k %d, first_carrier %d\n",k,phy_vars_ue->lte_frame_parms.first_carrier_offset);
#endif
if ((phy_vars_ue->lte_frame_parms.N_RB_DL==6) ||
(phy_vars_ue->lte_frame_parms.N_RB_DL==50) ||
(phy_vars_ue->lte_frame_parms.N_RB_DL==100)) {
......@@ -213,7 +214,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
// Treat first 2 pilots specially (left edge)
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#ifdef DEBUG_CH
printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(fl,
ch,
dl_ch,
......@@ -224,7 +227,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#ifdef DEBUG_CH
printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2l2,
ch,
dl_ch,
......@@ -235,15 +240,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
for (pilot_cnt=2; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-1); pilot_cnt+=2) {
// printf("%d\n",dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]);
// printf("pilot[%d][%d] (%d,%d)\n",p,pilot_cnt,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]);
#ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f,
ch,
dl_ch,
......@@ -254,13 +257,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
rxF+=12;
dl_ch+=8;
// printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k+6, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]);
#ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2,
ch,
dl_ch,
......@@ -281,15 +282,17 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))];
#ifdef DEBUG_CH
printf("second half k %d\n",k);
#endif
for (pilot_cnt=0; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-3); pilot_cnt+=2) {
// printf("pilot[%d][%d] (%d,%d)\n",p,pilot_cnt,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k+6, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]);
#ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f,
ch,
dl_ch,
......@@ -300,8 +303,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("**rb %d %d\n",rb,dl_ch-(int16_T *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]);
#ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2,
ch,
dl_ch,
......@@ -314,8 +318,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 49: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#ifdef DEBUG_CH
printf("pilot %d: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(fr,
ch,
dl_ch,
......@@ -326,7 +331,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 50: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#ifdef DEBUG_CH
printf("pilot %d: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2r2,
ch,
dl_ch,
......
......@@ -116,7 +116,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_ite
dlsch->max_turbo_iterations = max_turbo_iterations;
for (i=0; i<Mdlharq; i++) {
// msg("new_ue_dlsch: Harq process %d\n",i);
// printf("new_ue_dlsch: Harq process %d\n",i);
dlsch->harq_processes[i] = (LTE_DL_UE_HARQ_t *)malloc16(sizeof(LTE_DL_UE_HARQ_t));
if (dlsch->harq_processes[i]) {
......@@ -155,7 +155,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_ite
return(dlsch);
}
msg("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag);
printf("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag);
free_ue_dlsch(dlsch);
return(NULL);
......@@ -204,22 +204,22 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
time_stats_t *);
if (!dlsch_llr) {
msg("dlsch_decoding.c: NULL dlsch_llr pointer\n");
printf("dlsch_decoding.c: NULL dlsch_llr pointer\n");
return(dlsch->max_turbo_iterations);
}
if (!harq_process) {
msg("dlsch_decoding.c: NULL harq_process pointer\n");
printf("dlsch_decoding.c: NULL harq_process pointer\n");
return(dlsch->max_turbo_iterations);
}
if (!frame_parms) {
msg("dlsch_decoding.c: NULL frame_parms pointer\n");
printf("dlsch_decoding.c: NULL frame_parms pointer\n");
return(dlsch->max_turbo_iterations);
}
if (subframe>9) {
msg("dlsch_decoding.c: Illegal subframe index %d\n",subframe);
printf("dlsch_decoding.c: Illegal subframe index %d\n",subframe);
return(dlsch->max_turbo_iterations);
}
......@@ -232,13 +232,13 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
/*
if (nb_rb > frame_parms->N_RB_DL) {
msg("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
return(max_turbo_iterations);
}*/
/*harq_pid = dlsch->current_harq_pid;
if (harq_pid >= 8) {
msg("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
return(max_turbo_iterations);
}
*/
......@@ -250,7 +250,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
G = harq_process->G;
//get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe);
// msg("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi);
// printf("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi);
if (harq_process->round == 0) {
// This is a new packet, so compute quantities regarding segmentation
......@@ -269,7 +269,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
/*
else {
msg("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
return(max_turbo_iterations);
}
*/
......@@ -296,7 +296,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
break;
}
if (harq_process->C >= MAX_NUM_DLSCH_SEGMENTS/bw_scaling) {
if (harq_process->C > MAX_NUM_DLSCH_SEGMENTS/bw_scaling) {
LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling);
return((1+dlsch->max_turbo_iterations));
}
......@@ -320,7 +320,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
else if (Kr_bytes <= 768)
iind = 123 + ((Kr_bytes-256)>>3);
else {
msg("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes);
printf("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes);
return(dlsch->max_turbo_iterations);
}
......
......@@ -91,7 +91,7 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input
)
{
static short temp[2048*4] __attribute__((aligned(16)));
static short temp[2048*4] __attribute__((aligned(32)));
unsigned short i,j;
short k;
......@@ -139,12 +139,18 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input
msg("[PHY] symbol %d/%d (%p,%p -> %p)\n",i,nb_symbols,input,&input[i<<log2fftsize],&output[(i<<log2fftsize) + ((i)*nb_prefix_samples)]);
#endif
#ifndef __AVX2__
// handle 128-bit alignment for 128-bit SIMD (SSE4,NEON,AltiVEC)
idft((int16_t *)&input[i<<log2fftsize],
(log2fftsize==7) ? (int16_t *)temp : (int16_t *)&output[(i<<log2fftsize) + ((1+i)*nb_prefix_samples)],
1);
// write_output("fft_out.m","fftout",temp,(1<<log2fftsize)*2,1,1);
#else
// on AVX2 need 256-bit alignment
idft((int16_t *)&input[i<<log2fftsize],
(log2fftsize<=9) ? (int16_t *)temp : (int16_t *)&output[(i<<log2fftsize) + ((1+i)*nb_prefix_samples)],
1);
//memset(temp,0,1<<log2fftsize);
#endif
// Copy to frame buffer with Cyclic Extension
......@@ -158,12 +164,20 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input
// msg("Doing cyclic prefix method\n");
if (log2fftsize==7) {
#ifndef __AVX2__
if (log2fftsize==7)
#else
if (log2fftsize<=9)
#endif
{
for (j=0; j<((1<<log2fftsize)) ; j++) {
output_ptr[j] = temp_ptr[j];
}
}
j=(1<<log2fftsize);
for (k=-1; k>=-nb_prefix_samples; k--) {
......
......@@ -56,7 +56,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
unsigned int rx_offset;
void (*dft)(int16_t *,int16_t *, int);
int tmp_dft_in[256]; // This is for misalignment issues for 6 and 15 PRBs
int tmp_dft_in[2048]; // This is for misalignment issues for 6 and 15 PRBs
switch (frame_parms->log2_symbol_size) {
case 7:
......@@ -96,12 +96,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
if (l<0 || l>=7-frame_parms->Ncp) {
msg("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp);
printf("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp);
return(-1);
}
if (Ns<0 || Ns>=20) {
msg("slot_fep: Ns must be between 0 and 19\n");
printf("slot_fep: Ns must be between 0 and 19\n");
return(-1);
}
......@@ -111,12 +111,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
memset(&ue_common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],0,frame_parms->ofdm_symbol_size*sizeof(int));
rx_offset = sample_offset + slot_offset + nb_prefix_samples0 + subframe_offset - SOFFSET;
// Align with 128 bit
rx_offset = rx_offset - rx_offset % 4;
// Align with 256 bit
// rx_offset = rx_offset&0xfffffff8;
#ifdef DEBUG_FEP
// if (phy_vars_ue->frame <100)
msg("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol,
printf("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol,
nb_prefix_samples,nb_prefix_samples0,slot_offset,subframe_offset,sample_offset,rx_offset);
#endif
......@@ -127,9 +127,9 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
(short *)&ue_common_vars->rxdata[aa][0],
frame_parms->ofdm_symbol_size*sizeof(int));
if ((rx_offset&3)!=0) { // if input to dft is not 128-bit aligned, issue for size 6 and 15 PRBs
if ((rx_offset&7)!=0) { // if input to dft is not 256-bit aligned, issue for size 6,15 and 25 PRBs
memcpy((void *)tmp_dft_in,
(void *)&ue_common_vars->rxdata[aa][(rx_offset-nb_prefix_samples0) % frame_length_samples],
(void *)&ue_common_vars->rxdata[aa][rx_offset % frame_length_samples],
frame_parms->ofdm_symbol_size*sizeof(int));
dft((int16_t *)tmp_dft_in,
(int16_t *)&ue_common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],1);
......@@ -142,12 +142,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
}
} else {
rx_offset += (frame_parms->ofdm_symbol_size+nb_prefix_samples) +
(frame_parms->ofdm_symbol_size+nb_prefix_samples)*(l-1);
rx_offset += (frame_parms->ofdm_symbol_size+nb_prefix_samples)*l;// +
// (frame_parms->ofdm_symbol_size+nb_prefix_samples)*(l-1);
#ifdef DEBUG_FEP
// if (phy_vars_ue->frame <100)
msg("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol,
printf("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol,
nb_prefix_samples,nb_prefix_samples0,slot_offset,subframe_offset,sample_offset,rx_offset);
#endif
......@@ -158,7 +158,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
start_meas(&phy_vars_ue->rx_dft_stats);
if ((rx_offset&3)!=0) { // if input to dft is not 128-bit aligned, issue for size 6 and 15 PRBs
if ((rx_offset&7)!=0) { // if input to dft is not 128-bit aligned, issue for size 6 and 15 PRBs
memcpy((void *)tmp_dft_in,
(void *)&ue_common_vars->rxdata[aa][(rx_offset) % frame_length_samples],
frame_parms->ofdm_symbol_size*sizeof(int));
......@@ -182,7 +182,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
for (aa=0; aa<frame_parms->nb_antennas_tx_eNB; aa++) {
#ifdef DEBUG_FEP
msg("Channel estimation eNB %d, aatx %d, slot %d, symbol %d\n",eNB_id,aa,Ns,l);
printf("Channel estimation eNB %d, aatx %d, slot %d, symbol %d\n",eNB_id,aa,Ns,l);
#endif
start_meas(&phy_vars_ue->dlsch_channel_estimation_stats);
lte_dl_channel_estimation(phy_vars_ue,eNB_id,0,
......@@ -205,7 +205,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
// do frequency offset estimation here!
// use channel estimates from current symbol (=ch_t) and last symbol (ch_{t-1})
#ifdef DEBUG_FEP
msg("Frequency offset estimation\n");
printf("Frequency offset estimation\n");
#endif
if (l==(4-frame_parms->Ncp)) {
......@@ -222,7 +222,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
}
#ifdef DEBUG_FEP
msg("slot_fep: done\n");
printf("slot_fep: done\n");
#endif
return(0);
}
lte_dfts: lte_dfts.c
gcc -O2 -mavx2 -g -ggdb -o lte_dfts lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
lte_dfts_sse4: lte_dfts.c
gcc -O2 -msse4.1 -g -ggdb -o lte_dfts_sse4 lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
lte_dfts.s: lte_dfts.c
lte_dfts_avx2: lte_dfts.c
gcc -O2 -mavx2 -g -ggdb -o lte_dfts_avx2 lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
lte_dfts_avx2.s: lte_dfts.c
gcc -O2 -mavx2 -S lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
dft_cycles: lte_dfts
./lte_dfts | egrep cycles
lte_dfts_sse4.s: lte_dfts.c
gcc -O2 -msse4.1 -S lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
dft_cycles_avx2: lte_dfts_avx2
./lte_dfts_avx2 | egrep cycles
This diff is collapsed.
This diff is collapsed.
......@@ -3336,7 +3336,7 @@ PMI_FEEDBACK:
PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid]->G = coded_bits_per_codeword;
/*
// calculate uncoded BLER
uncoded_ber=0;
for (i=0;i<coded_bits_per_codeword;i++)
......@@ -3352,7 +3352,7 @@ PMI_FEEDBACK:
if (n_frames==1)
write_output("uncoded_ber_bit.m","uncoded_ber_bit",uncoded_ber_bit,coded_bits_per_codeword,1,0);
*/
start_meas(&PHY_vars_UE->dlsch_unscrambling_stats);
dlsch_unscrambling(&PHY_vars_UE->lte_frame_parms,
......@@ -3463,10 +3463,10 @@ PMI_FEEDBACK:
}
sprintf(fname,"rxsig0_r%d.m",round);
sprintf(vname,"rxs0_r%d.m",round);
sprintf(vname,"rxs0_r%d",round);
write_output(fname,vname, &PHY_vars_UE->lte_ue_common_vars.rxdata[0][0],10*PHY_vars_UE->lte_frame_parms.samples_per_tti,1,1);
sprintf(fname,"rxsigF0_r%d.m",round);
sprintf(vname,"rxs0F_r%d.m",round);
sprintf(vname,"rxs0F_r%d",round);
write_output(fname,vname, &PHY_vars_UE->lte_ue_common_vars.rxdataF[0][0],2*PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb,2,1);
if (PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) {
......@@ -3479,14 +3479,14 @@ PMI_FEEDBACK:
}
sprintf(fname,"dlsch00_r%d.m",round);
sprintf(vname,"dl00_r%d.m",round);
sprintf(vname,"dl00_r%d",round);
write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][0][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb,1,1);
if (PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) {
sprintf(fname,"dlsch01_r%d.m",round);
sprintf(vname,"dl01_r%d.m",round);
sprintf(vname,"dl01_r%d",round);
write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][1][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1);
......@@ -3494,7 +3494,7 @@ PMI_FEEDBACK:
if (PHY_vars_eNB->lte_frame_parms.nb_antennas_tx>1) {
sprintf(fname,"dlsch10_r%d.m",round);
sprintf(vname,"dl10_r%d.m",round);
sprintf(vname,"dl10_r%d",round);
write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][2][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1);
......@@ -3502,7 +3502,7 @@ PMI_FEEDBACK:
if ((PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) && (PHY_vars_eNB->lte_frame_parms.nb_antennas_tx>1)) {
sprintf(fname,"dlsch11_r%d.m",round);
sprintf(vname,"dl11_r%d.m",round);
sprintf(vname,"dl11_r%d",round);
write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][3][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment