Commit 5dbe3bef authored by Raymond Knopp's avatar Raymond Knopp

debugging and testing of AVX2-optimized DFTS (radix 2/4 only).

parent 47a6fa6e
...@@ -134,7 +134,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") ...@@ -134,7 +134,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2") set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2")
endif() endif()
if (CPUINFO MATCHES "sse4_2") if (CPUINFO MATCHES "sse4_2")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.2") set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2 -msse4.2")
endif() endif()
if (CPUINFO MATCHES "sse4_1") if (CPUINFO MATCHES "sse4_1")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1") set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1")
......
...@@ -203,8 +203,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -203,8 +203,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
multadd_complex_vector_real_scalar(dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1), multadd_complex_vector_real_scalar(dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1),
phy_vars_ue->ch_est_alpha,dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1), phy_vars_ue->ch_est_alpha,dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1),
1,phy_vars_ue->lte_frame_parms.ofdm_symbol_size); 1,phy_vars_ue->lte_frame_parms.ofdm_symbol_size);
#ifdef DEBUG_CH
printf("k %d, first_carrier %d\n",k,phy_vars_ue->lte_frame_parms.first_carrier_offset);
#endif
if ((phy_vars_ue->lte_frame_parms.N_RB_DL==6) || if ((phy_vars_ue->lte_frame_parms.N_RB_DL==6) ||
(phy_vars_ue->lte_frame_parms.N_RB_DL==50) || (phy_vars_ue->lte_frame_parms.N_RB_DL==50) ||
(phy_vars_ue->lte_frame_parms.N_RB_DL==100)) { (phy_vars_ue->lte_frame_parms.N_RB_DL==100)) {
...@@ -213,7 +214,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -213,7 +214,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
// Treat first 2 pilots specially (left edge) // Treat first 2 pilots specially (left edge)
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(fl, multadd_real_vector_complex_scalar(fl,
ch, ch,
dl_ch, dl_ch,
...@@ -224,7 +227,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -224,7 +227,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2l2, multadd_real_vector_complex_scalar(f2l2,
ch, ch,
dl_ch, dl_ch,
...@@ -235,15 +240,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -235,15 +240,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
for (pilot_cnt=2; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-1); pilot_cnt+=2) { for (pilot_cnt=2; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-1); pilot_cnt+=2) {
// printf("%d\n",dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]);
// printf("pilot[%d][%d] (%d,%d)\n",p,pilot_cnt,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
...@@ -254,13 +257,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -254,13 +257,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
// printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k+6, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
dl_ch, dl_ch,
...@@ -281,15 +282,17 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -281,15 +282,17 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))]; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))];
#ifdef DEBUG_CH
printf("second half k %d\n",k);
#endif
for (pilot_cnt=0; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-3); pilot_cnt+=2) { for (pilot_cnt=0; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-3); pilot_cnt+=2) {
// printf("pilot[%d][%d] (%d,%d)\n",p,pilot_cnt,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k+6, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
...@@ -300,8 +303,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -300,8 +303,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH
// printf("**rb %d %d\n",rb,dl_ch-(int16_T *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
dl_ch, dl_ch,
...@@ -314,8 +318,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -314,8 +318,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 49: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot %d: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(fr, multadd_real_vector_complex_scalar(fr,
ch, ch,
dl_ch, dl_ch,
...@@ -326,7 +331,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -326,7 +331,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 50: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot %d: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2r2, multadd_real_vector_complex_scalar(f2r2,
ch, ch,
dl_ch, dl_ch,
......
...@@ -116,7 +116,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_ite ...@@ -116,7 +116,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_ite
dlsch->max_turbo_iterations = max_turbo_iterations; dlsch->max_turbo_iterations = max_turbo_iterations;
for (i=0; i<Mdlharq; i++) { for (i=0; i<Mdlharq; i++) {
// msg("new_ue_dlsch: Harq process %d\n",i); // printf("new_ue_dlsch: Harq process %d\n",i);
dlsch->harq_processes[i] = (LTE_DL_UE_HARQ_t *)malloc16(sizeof(LTE_DL_UE_HARQ_t)); dlsch->harq_processes[i] = (LTE_DL_UE_HARQ_t *)malloc16(sizeof(LTE_DL_UE_HARQ_t));
if (dlsch->harq_processes[i]) { if (dlsch->harq_processes[i]) {
...@@ -155,7 +155,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_ite ...@@ -155,7 +155,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_ite
return(dlsch); return(dlsch);
} }
msg("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag); printf("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag);
free_ue_dlsch(dlsch); free_ue_dlsch(dlsch);
return(NULL); return(NULL);
...@@ -204,22 +204,22 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -204,22 +204,22 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
time_stats_t *); time_stats_t *);
if (!dlsch_llr) { if (!dlsch_llr) {
msg("dlsch_decoding.c: NULL dlsch_llr pointer\n"); printf("dlsch_decoding.c: NULL dlsch_llr pointer\n");
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (!harq_process) { if (!harq_process) {
msg("dlsch_decoding.c: NULL harq_process pointer\n"); printf("dlsch_decoding.c: NULL harq_process pointer\n");
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (!frame_parms) { if (!frame_parms) {
msg("dlsch_decoding.c: NULL frame_parms pointer\n"); printf("dlsch_decoding.c: NULL frame_parms pointer\n");
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (subframe>9) { if (subframe>9) {
msg("dlsch_decoding.c: Illegal subframe index %d\n",subframe); printf("dlsch_decoding.c: Illegal subframe index %d\n",subframe);
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
...@@ -232,13 +232,13 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -232,13 +232,13 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
/* /*
if (nb_rb > frame_parms->N_RB_DL) { if (nb_rb > frame_parms->N_RB_DL) {
msg("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb); printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
return(max_turbo_iterations); return(max_turbo_iterations);
}*/ }*/
/*harq_pid = dlsch->current_harq_pid; /*harq_pid = dlsch->current_harq_pid;
if (harq_pid >= 8) { if (harq_pid >= 8) {
msg("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid); printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
return(max_turbo_iterations); return(max_turbo_iterations);
} }
*/ */
...@@ -250,7 +250,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -250,7 +250,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
G = harq_process->G; G = harq_process->G;
//get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe); //get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe);
// msg("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi); // printf("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi);
if (harq_process->round == 0) { if (harq_process->round == 0) {
// This is a new packet, so compute quantities regarding segmentation // This is a new packet, so compute quantities regarding segmentation
...@@ -269,7 +269,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -269,7 +269,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
/* /*
else { else {
msg("dlsch_decoding.c: Ndi>0 not checked yet!!\n"); printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
return(max_turbo_iterations); return(max_turbo_iterations);
} }
*/ */
...@@ -296,7 +296,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -296,7 +296,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
break; break;
} }
if (harq_process->C >= MAX_NUM_DLSCH_SEGMENTS/bw_scaling) { if (harq_process->C > MAX_NUM_DLSCH_SEGMENTS/bw_scaling) {
LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling); LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling);
return((1+dlsch->max_turbo_iterations)); return((1+dlsch->max_turbo_iterations));
} }
...@@ -320,7 +320,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -320,7 +320,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
else if (Kr_bytes <= 768) else if (Kr_bytes <= 768)
iind = 123 + ((Kr_bytes-256)>>3); iind = 123 + ((Kr_bytes-256)>>3);
else { else {
msg("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes); printf("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes);
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
......
...@@ -91,7 +91,7 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input ...@@ -91,7 +91,7 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input
) )
{ {
static short temp[2048*4] __attribute__((aligned(16))); static short temp[2048*4] __attribute__((aligned(32)));
unsigned short i,j; unsigned short i,j;
short k; short k;
...@@ -139,12 +139,18 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input ...@@ -139,12 +139,18 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input
msg("[PHY] symbol %d/%d (%p,%p -> %p)\n",i,nb_symbols,input,&input[i<<log2fftsize],&output[(i<<log2fftsize) + ((i)*nb_prefix_samples)]); msg("[PHY] symbol %d/%d (%p,%p -> %p)\n",i,nb_symbols,input,&input[i<<log2fftsize],&output[(i<<log2fftsize) + ((i)*nb_prefix_samples)]);
#endif #endif
#ifndef __AVX2__
// handle 128-bit alignment for 128-bit SIMD (SSE4,NEON,AltiVEC)
idft((int16_t *)&input[i<<log2fftsize], idft((int16_t *)&input[i<<log2fftsize],
(log2fftsize==7) ? (int16_t *)temp : (int16_t *)&output[(i<<log2fftsize) + ((1+i)*nb_prefix_samples)], (log2fftsize==7) ? (int16_t *)temp : (int16_t *)&output[(i<<log2fftsize) + ((1+i)*nb_prefix_samples)],
1); 1);
// write_output("fft_out.m","fftout",temp,(1<<log2fftsize)*2,1,1); #else
// on AVX2 need 256-bit alignment
idft((int16_t *)&input[i<<log2fftsize],
(log2fftsize<=9) ? (int16_t *)temp : (int16_t *)&output[(i<<log2fftsize) + ((1+i)*nb_prefix_samples)],
1);
//memset(temp,0,1<<log2fftsize); #endif
// Copy to frame buffer with Cyclic Extension // Copy to frame buffer with Cyclic Extension
...@@ -158,12 +164,20 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input ...@@ -158,12 +164,20 @@ void PHY_ofdm_mod(int *input, /// pointer to complex input
// msg("Doing cyclic prefix method\n"); // msg("Doing cyclic prefix method\n");
if (log2fftsize==7) { #ifndef __AVX2__
if (log2fftsize==7)
#else
if (log2fftsize<=9)
#endif
{
for (j=0; j<((1<<log2fftsize)) ; j++) { for (j=0; j<((1<<log2fftsize)) ; j++) {
output_ptr[j] = temp_ptr[j]; output_ptr[j] = temp_ptr[j];
} }
} }
j=(1<<log2fftsize); j=(1<<log2fftsize);
for (k=-1; k>=-nb_prefix_samples; k--) { for (k=-1; k>=-nb_prefix_samples; k--) {
......
...@@ -56,7 +56,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -56,7 +56,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
unsigned int rx_offset; unsigned int rx_offset;
void (*dft)(int16_t *,int16_t *, int); void (*dft)(int16_t *,int16_t *, int);
int tmp_dft_in[256]; // This is for misalignment issues for 6 and 15 PRBs int tmp_dft_in[2048]; // This is for misalignment issues for 6 and 15 PRBs
switch (frame_parms->log2_symbol_size) { switch (frame_parms->log2_symbol_size) {
case 7: case 7:
...@@ -96,12 +96,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -96,12 +96,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
if (l<0 || l>=7-frame_parms->Ncp) { if (l<0 || l>=7-frame_parms->Ncp) {
msg("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp); printf("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp);
return(-1); return(-1);
} }
if (Ns<0 || Ns>=20) { if (Ns<0 || Ns>=20) {
msg("slot_fep: Ns must be between 0 and 19\n"); printf("slot_fep: Ns must be between 0 and 19\n");
return(-1); return(-1);
} }
...@@ -111,12 +111,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -111,12 +111,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
memset(&ue_common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],0,frame_parms->ofdm_symbol_size*sizeof(int)); memset(&ue_common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],0,frame_parms->ofdm_symbol_size*sizeof(int));
rx_offset = sample_offset + slot_offset + nb_prefix_samples0 + subframe_offset - SOFFSET; rx_offset = sample_offset + slot_offset + nb_prefix_samples0 + subframe_offset - SOFFSET;
// Align with 128 bit // Align with 256 bit
rx_offset = rx_offset - rx_offset % 4; // rx_offset = rx_offset&0xfffffff8;
#ifdef DEBUG_FEP #ifdef DEBUG_FEP
// if (phy_vars_ue->frame <100) // if (phy_vars_ue->frame <100)
msg("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol, printf("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol,
nb_prefix_samples,nb_prefix_samples0,slot_offset,subframe_offset,sample_offset,rx_offset); nb_prefix_samples,nb_prefix_samples0,slot_offset,subframe_offset,sample_offset,rx_offset);
#endif #endif
...@@ -127,9 +127,9 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -127,9 +127,9 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
(short *)&ue_common_vars->rxdata[aa][0], (short *)&ue_common_vars->rxdata[aa][0],
frame_parms->ofdm_symbol_size*sizeof(int)); frame_parms->ofdm_symbol_size*sizeof(int));
if ((rx_offset&3)!=0) { // if input to dft is not 128-bit aligned, issue for size 6 and 15 PRBs if ((rx_offset&7)!=0) { // if input to dft is not 256-bit aligned, issue for size 6,15 and 25 PRBs
memcpy((void *)tmp_dft_in, memcpy((void *)tmp_dft_in,
(void *)&ue_common_vars->rxdata[aa][(rx_offset-nb_prefix_samples0) % frame_length_samples], (void *)&ue_common_vars->rxdata[aa][rx_offset % frame_length_samples],
frame_parms->ofdm_symbol_size*sizeof(int)); frame_parms->ofdm_symbol_size*sizeof(int));
dft((int16_t *)tmp_dft_in, dft((int16_t *)tmp_dft_in,
(int16_t *)&ue_common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],1); (int16_t *)&ue_common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],1);
...@@ -142,12 +142,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -142,12 +142,12 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
} }
} else { } else {
rx_offset += (frame_parms->ofdm_symbol_size+nb_prefix_samples) + rx_offset += (frame_parms->ofdm_symbol_size+nb_prefix_samples)*l;// +
(frame_parms->ofdm_symbol_size+nb_prefix_samples)*(l-1); // (frame_parms->ofdm_symbol_size+nb_prefix_samples)*(l-1);
#ifdef DEBUG_FEP #ifdef DEBUG_FEP
// if (phy_vars_ue->frame <100) // if (phy_vars_ue->frame <100)
msg("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol, printf("slot_fep: frame %d: slot %d, symbol %d, nb_prefix_samples %d, nb_prefix_samples0 %d, slot_offset %d, subframe_offset %d, sample_offset %d,rx_offset %d\n", phy_vars_ue->frame_rx,Ns, symbol,
nb_prefix_samples,nb_prefix_samples0,slot_offset,subframe_offset,sample_offset,rx_offset); nb_prefix_samples,nb_prefix_samples0,slot_offset,subframe_offset,sample_offset,rx_offset);
#endif #endif
...@@ -158,7 +158,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -158,7 +158,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
start_meas(&phy_vars_ue->rx_dft_stats); start_meas(&phy_vars_ue->rx_dft_stats);
if ((rx_offset&3)!=0) { // if input to dft is not 128-bit aligned, issue for size 6 and 15 PRBs if ((rx_offset&7)!=0) { // if input to dft is not 128-bit aligned, issue for size 6 and 15 PRBs
memcpy((void *)tmp_dft_in, memcpy((void *)tmp_dft_in,
(void *)&ue_common_vars->rxdata[aa][(rx_offset) % frame_length_samples], (void *)&ue_common_vars->rxdata[aa][(rx_offset) % frame_length_samples],
frame_parms->ofdm_symbol_size*sizeof(int)); frame_parms->ofdm_symbol_size*sizeof(int));
...@@ -182,7 +182,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -182,7 +182,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
for (aa=0; aa<frame_parms->nb_antennas_tx_eNB; aa++) { for (aa=0; aa<frame_parms->nb_antennas_tx_eNB; aa++) {
#ifdef DEBUG_FEP #ifdef DEBUG_FEP
msg("Channel estimation eNB %d, aatx %d, slot %d, symbol %d\n",eNB_id,aa,Ns,l); printf("Channel estimation eNB %d, aatx %d, slot %d, symbol %d\n",eNB_id,aa,Ns,l);
#endif #endif
start_meas(&phy_vars_ue->dlsch_channel_estimation_stats); start_meas(&phy_vars_ue->dlsch_channel_estimation_stats);
lte_dl_channel_estimation(phy_vars_ue,eNB_id,0, lte_dl_channel_estimation(phy_vars_ue,eNB_id,0,
...@@ -205,7 +205,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -205,7 +205,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
// do frequency offset estimation here! // do frequency offset estimation here!
// use channel estimates from current symbol (=ch_t) and last symbol (ch_{t-1}) // use channel estimates from current symbol (=ch_t) and last symbol (ch_{t-1})
#ifdef DEBUG_FEP #ifdef DEBUG_FEP
msg("Frequency offset estimation\n"); printf("Frequency offset estimation\n");
#endif #endif
if (l==(4-frame_parms->Ncp)) { if (l==(4-frame_parms->Ncp)) {
...@@ -222,7 +222,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue, ...@@ -222,7 +222,7 @@ int slot_fep(PHY_VARS_UE *phy_vars_ue,
} }
#ifdef DEBUG_FEP #ifdef DEBUG_FEP
msg("slot_fep: done\n"); printf("slot_fep: done\n");
#endif #endif
return(0); return(0);
} }
lte_dfts: lte_dfts.c lte_dfts_sse4: lte_dfts.c
gcc -O2 -mavx2 -g -ggdb -o lte_dfts lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS gcc -O2 -msse4.1 -g -ggdb -o lte_dfts_sse4 lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
lte_dfts.s: lte_dfts.c lte_dfts_avx2: lte_dfts.c
gcc -O2 -mavx2 -g -ggdb -o lte_dfts_avx2 lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
lte_dfts_avx2.s: lte_dfts.c
gcc -O2 -mavx2 -S lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS gcc -O2 -mavx2 -S lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
dft_cycles: lte_dfts lte_dfts_sse4.s: lte_dfts.c
./lte_dfts | egrep cycles gcc -O2 -msse4.1 -S lte_dfts.c time_meas.c file_output.c ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR2_DIR/COMMON -DUSER_MODE -DMR_MAIN -DNB_ANTENNAS_RX=1 # -DD256STATS #-DD64STATS
dft_cycles_avx2: lte_dfts_avx2
./lte_dfts_avx2 | egrep cycles
This diff is collapsed.
This diff is collapsed.
...@@ -3336,7 +3336,7 @@ PMI_FEEDBACK: ...@@ -3336,7 +3336,7 @@ PMI_FEEDBACK:
PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid]->G = coded_bits_per_codeword; PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid]->G = coded_bits_per_codeword;
/*
// calculate uncoded BLER // calculate uncoded BLER
uncoded_ber=0; uncoded_ber=0;
for (i=0;i<coded_bits_per_codeword;i++) for (i=0;i<coded_bits_per_codeword;i++)
...@@ -3352,7 +3352,7 @@ PMI_FEEDBACK: ...@@ -3352,7 +3352,7 @@ PMI_FEEDBACK:
if (n_frames==1) if (n_frames==1)
write_output("uncoded_ber_bit.m","uncoded_ber_bit",uncoded_ber_bit,coded_bits_per_codeword,1,0); write_output("uncoded_ber_bit.m","uncoded_ber_bit",uncoded_ber_bit,coded_bits_per_codeword,1,0);
*/
start_meas(&PHY_vars_UE->dlsch_unscrambling_stats); start_meas(&PHY_vars_UE->dlsch_unscrambling_stats);
dlsch_unscrambling(&PHY_vars_UE->lte_frame_parms, dlsch_unscrambling(&PHY_vars_UE->lte_frame_parms,
...@@ -3463,10 +3463,10 @@ PMI_FEEDBACK: ...@@ -3463,10 +3463,10 @@ PMI_FEEDBACK:
} }
sprintf(fname,"rxsig0_r%d.m",round); sprintf(fname,"rxsig0_r%d.m",round);
sprintf(vname,"rxs0_r%d.m",round); sprintf(vname,"rxs0_r%d",round);
write_output(fname,vname, &PHY_vars_UE->lte_ue_common_vars.rxdata[0][0],10*PHY_vars_UE->lte_frame_parms.samples_per_tti,1,1); write_output(fname,vname, &PHY_vars_UE->lte_ue_common_vars.rxdata[0][0],10*PHY_vars_UE->lte_frame_parms.samples_per_tti,1,1);
sprintf(fname,"rxsigF0_r%d.m",round); sprintf(fname,"rxsigF0_r%d.m",round);
sprintf(vname,"rxs0F_r%d.m",round); sprintf(vname,"rxs0F_r%d",round);
write_output(fname,vname, &PHY_vars_UE->lte_ue_common_vars.rxdataF[0][0],2*PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb,2,1); write_output(fname,vname, &PHY_vars_UE->lte_ue_common_vars.rxdataF[0][0],2*PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb,2,1);
if (PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) { if (PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) {
...@@ -3479,14 +3479,14 @@ PMI_FEEDBACK: ...@@ -3479,14 +3479,14 @@ PMI_FEEDBACK:
} }
sprintf(fname,"dlsch00_r%d.m",round); sprintf(fname,"dlsch00_r%d.m",round);
sprintf(vname,"dl00_r%d.m",round); sprintf(vname,"dl00_r%d",round);
write_output(fname,vname, write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][0][0]), &(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][0][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb,1,1); PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb,1,1);
if (PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) { if (PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) {
sprintf(fname,"dlsch01_r%d.m",round); sprintf(fname,"dlsch01_r%d.m",round);
sprintf(vname,"dl01_r%d.m",round); sprintf(vname,"dl01_r%d",round);
write_output(fname,vname, write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][1][0]), &(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][1][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1); PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1);
...@@ -3494,7 +3494,7 @@ PMI_FEEDBACK: ...@@ -3494,7 +3494,7 @@ PMI_FEEDBACK:
if (PHY_vars_eNB->lte_frame_parms.nb_antennas_tx>1) { if (PHY_vars_eNB->lte_frame_parms.nb_antennas_tx>1) {
sprintf(fname,"dlsch10_r%d.m",round); sprintf(fname,"dlsch10_r%d.m",round);
sprintf(vname,"dl10_r%d.m",round); sprintf(vname,"dl10_r%d",round);
write_output(fname,vname, write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][2][0]), &(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][2][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1); PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1);
...@@ -3502,7 +3502,7 @@ PMI_FEEDBACK: ...@@ -3502,7 +3502,7 @@ PMI_FEEDBACK:
if ((PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) && (PHY_vars_eNB->lte_frame_parms.nb_antennas_tx>1)) { if ((PHY_vars_UE->lte_frame_parms.nb_antennas_rx>1) && (PHY_vars_eNB->lte_frame_parms.nb_antennas_tx>1)) {
sprintf(fname,"dlsch11_r%d.m",round); sprintf(fname,"dlsch11_r%d.m",round);
sprintf(vname,"dl11_r%d.m",round); sprintf(vname,"dl11_r%d",round);
write_output(fname,vname, write_output(fname,vname,
&(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][3][0]), &(PHY_vars_UE->lte_ue_common_vars.dl_ch_estimates[eNB_id][3][0]),
PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1); PHY_vars_UE->lte_frame_parms.ofdm_symbol_size*nsymb/2,1,1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment