Commit e7b2b644 authored by lfarizav's avatar lfarizav

new PRACH channel using AVX instructions

parent 2e2bc97b
...@@ -291,7 +291,7 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -291,7 +291,7 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
else else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate; delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
sincos256_ps(_mm256_set_ps(twopi*(4*f+7)*delay,twopi*(4*f+6)*delay,twopi*(4*f+5)*delay,twopi*(4*f+4)*delay,twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut256, &cos_lut256); sincos256_ps(_mm256_set_ps(twopi*(8*f+7)*delay,twopi*(8*f+6)*delay,twopi*(8*f+5)*delay,twopi*(8*f+4)*delay,twopi*(8*f+3)*delay,twopi*(8*f+2)*delay,twopi*(8*f+1)*delay,twopi*(8*f)*delay), &sin_lut256, &cos_lut256);
//cos_lut256=_mm256_set_ps(cos(twopi*(8*f+7)*delay),cos(twopi*(8*f+6)*delay),cos(twopi*(8*f+5)*delay),cos(twopi*(8*f+4)*delay),cos(twopi*(8*f+3)*delay),cos(twopi*(8*f+2)*delay),cos(twopi*(8*f+1)*delay),cos(twopi*(8*f)*delay)); //cos_lut256=_mm256_set_ps(cos(twopi*(8*f+7)*delay),cos(twopi*(8*f+6)*delay),cos(twopi*(8*f+5)*delay),cos(twopi*(8*f+4)*delay),cos(twopi*(8*f+3)*delay),cos(twopi*(8*f+2)*delay),cos(twopi*(8*f+1)*delay),cos(twopi*(8*f)*delay));
//sin_lut256=_mm256_set_ps(sin(twopi*(8*f+7)*delay),sin(twopi*(8*f+6)*delay),sin(twopi*(8*f+5)*delay),sin(twopi*(8*f+4)*delay),sin(twopi*(8*f+3)*delay),sin(twopi*(8*f+2)*delay),sin(twopi*(8*f+1)*delay),sin(twopi*(8*f)*delay)); //sin_lut256=_mm256_set_ps(sin(twopi*(8*f+7)*delay),sin(twopi*(8*f+6)*delay),sin(twopi*(8*f+5)*delay),sin(twopi*(8*f+4)*delay),sin(twopi*(8*f+3)*delay),sin(twopi*(8*f+2)*delay),sin(twopi*(8*f+1)*delay),sin(twopi*(8*f)*delay));
_mm256_storeu_ps(&cos_lut_f[l][8*f+(n_samples>>1)],cos_lut256); _mm256_storeu_ps(&cos_lut_f[l][8*f+(n_samples>>1)],cos_lut256);
...@@ -580,7 +580,74 @@ int init_freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_ ...@@ -580,7 +580,74 @@ int init_freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_
return(0); return(0);
} }
int init_freq_channel_prach_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb)
{
static int first_run=1;
float delta_f,twopi; // 90 kHz spacing
float delay;
int16_t f,f1;
uint8_t l;
int prach_samples, prach_pbr_offset_samples, max_nb_rb_samples;
__m256 cos_lut256,sin_lut256, cos_256, sin_256;
if ((n_samples&1)==0) {
fprintf(stderr, "freq_channel_init: n_samples has to be odd\n");
return(-1);
}
if (nb_rb-n_ra_prb<6) {
fprintf(stderr, "freq_channel_init: Impossible to allocate PRACH, check n_ra_prb value (r_ra_prb=%d)\n",n_ra_prb);
return(-1);
}
prach_samples = (prach_fmt<4)?13+839+12:3+139+2;
if (first_run)
{
cos_lut_f_prach = (float **)malloc16(prach_samples*sizeof(float*));
sin_lut_f_prach = (float **)malloc16(prach_samples*sizeof(float*));
for (f=max_nb_rb_samples/2-prach_pbr_offset_samples,f1=0; f<max_nb_rb_samples/2-prach_pbr_offset_samples+prach_samples; f++,f1++) {
cos_lut_f_prach[f1] = (float *)malloc16_clear((int)desc->nb_taps*sizeof(float));
sin_lut_f_prach[f1] = (float *)malloc16_clear((int)desc->nb_taps*sizeof(float));
}
first_run=0;
}
//cos_lut = (double **)malloc(prach_samples*sizeof(double*));
//sin_lut = (double **)malloc(prach_samples*sizeof(double*));
delta_f = (prach_fmt<4)?nb_rb*180000/((n_samples-1)*12):nb_rb*180000/((n_samples-1)*2);//1.25 khz for preamble format 1,2,3. 7.5 khz for preample format 4
max_nb_rb_samples = nb_rb*180000/delta_f;//7200 if prach_fmt<4
prach_pbr_offset_samples = (n_ra_prb+6)*180000/delta_f;//864 if n_ra_prb=0,7200 if n_ra_prb=44=50-6
twopi=2*M_PI*1e-6*delta_f;
//printf("prach_samples = %d, delta_f = %e, max_nb_rb_samples= %d, prach_pbr_offset_samples = %d, nb_taps = %d\n",prach_samples,delta_f,max_nb_rb_samples,prach_pbr_offset_samples,desc->nb_taps);
for (f=((max_nb_rb_samples/2-prach_pbr_offset_samples)>>3),f1=0; f<((max_nb_rb_samples/2-prach_pbr_offset_samples+prach_samples)>>3); f++,f1++) {//3600-864,3600-864+864|3600-7200,3600-7200+839
//freq=delta_f*(float)f*1e-6;// due to the fact that delays is in mus
//printf("[init_freq_channel_prach] freq %e\n",freq);
//cos_lut[f1] = (double *)malloc((int)desc->nb_taps*sizeof(double));
//sin_lut[f1] = (double *)malloc((int)desc->nb_taps*sizeof(double));
for (l=0; l<(int)desc->nb_taps; l++) {
if (desc->nb_taps==1)
delay = desc->delays[l];
else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
//cos_lut256=_mm256_set_ps(cos(twopi*(8*f+3)*delay),cos(twopi*(8*f+2)*delay),cos(twopi*(8*f+1)*delay),cos(twopi*(8*f)*delay));
//sin_lut256=_mm256_set_ps(sin(twopi*(8*f+3)*delay),sin(twopi*(8*f+2)*delay),sin(twopi*(8*f+1)*delay),sin(twopi*(8*f)*delay));
sincos256_ps(_mm256_set_ps(twopi*(8*f+7)*delay,twopi*(8*f+6)*delay,twopi*(8*f+5)*delay,twopi*(8*f+4)*delay,twopi*(8*f+3)*delay,twopi*(8*f+2)*delay,twopi*(8*f+1)*delay,twopi*(8*f)*delay),&sin_256,&cos_256);
cos_lut256=cos_256;
sin_lut256=sin_256;
_mm256_storeu_ps(&cos_lut_f_prach[l][8*f1],cos_lut256);
_mm256_storeu_ps(&sin_lut_f_prach[l][8*f1],sin_lut256);
//cos_lut[f1][l] = cos(2*M_PI*freq*delay);
//sin_lut[f1][l] = sin(2*M_PI*freq*delay);
//if (f<max_nb_rb_samples/2-prach_pbr_offset_samples+10)
//printf("freq: %e, f1: %d, f: %d, arg_sin_cos = %e, cos () = %e, sin () =n %e)\n",freq, f1,f, 2*M_PI*freq*delay, cos_lut[f1][l], sin_lut[f1][l]);
}
}
return(0);
}
int freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb) int freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb)
{ {
...@@ -688,6 +755,62 @@ int freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_s ...@@ -688,6 +755,62 @@ int freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_s
stop_meas(&desc->interp_freq_PRACH); stop_meas(&desc->interp_freq_PRACH);
return(0); return(0);
} }
int freq_channel_prach_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb)
{
int16_t f;
uint8_t aarx,aatx,l;
int prach_samples;
static int freq_channel_init=0;
static int n_samples_max=0;
__m256 chFx_256,chFy_256;
prach_samples = (prach_fmt<4)?13+839+12:3+139+2;
// do some error checking
if (nb_rb-n_ra_prb<6) {
fprintf(stderr, "freq_channel_init: Impossible to allocate PRACH, check r_ra_prb value (r_ra_prb=%d)\n",n_ra_prb);
return(-1);
}
if (freq_channel_init == 0) {
// we are initializing the lut for the largets possible n_samples=12*nb_rb+1
// if called with n_samples<12*nb_rb+1, we decimate the lut
n_samples_max=12*nb_rb+1;
if (init_freq_channel_prach_AVX_float(desc,nb_rb,n_samples_max,prach_fmt,n_ra_prb)==0)
freq_channel_init=1;
else
return(-1);
}
start_meas(&desc->interp_freq_PRACH);
for (f=0; f<(prach_samples>>3); f++) {
//clut = cos_lut[f];
//slut = sin_lut[f];
for (aarx=0; aarx<desc->nb_rx; aarx++) {
for (aatx=0; aatx<desc->nb_tx; aatx++) {
//desc->chF_prach[aarx+(aatx*desc->nb_rx)].x[f]=0.0;
//desc->chF_prach[aarx+(aatx*desc->nb_rx)].y[f]=0.0;
chFx_256=_mm256_setzero_ps();
chFy_256=_mm256_setzero_ps();
for (l=0; l<(int)desc->nb_taps; l++) {
//desc->chF_prach[aarx+(aatx*desc->nb_rx)].x[f]+=(desc->a[l][aarx+(aatx*desc->nb_rx)].x*cos_lut_f_prach[l][f]+
// desc->a[l][aarx+(aatx*desc->nb_rx)].y*sin_lut_f_prach[l][f]);
//desc->chF_prach[aarx+(aatx*desc->nb_rx)].y[f]+=(-desc->a[l][aarx+(aatx*desc->nb_rx)].x*sin_lut_f_prach[l][f]+
// desc->a[l][aarx+(aatx*desc->nb_rx)].y*cos_lut_f_prach[l][f]);
chFx_256=_mm256_add_ps(chFx_256,_mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].x),_mm256_loadu_ps(&cos_lut_f_prach[l][8*f])),_mm256_mul_ps(_mm256_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].y),_mm256_loadu_ps(&sin_lut_f_prach[l][8*f]))));
chFy_256=_mm256_add_ps(chFy_256,_mm256_sub_ps(_mm256_mul_ps(_mm256_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].y),_mm256_loadu_ps(&cos_lut_f_prach[l][8*f])),_mm256_mul_ps(_mm256_set1_ps(desc->a[l][aarx+(aatx*desc->nb_rx)].x),_mm256_loadu_ps(&sin_lut_f_prach[l][8*f]))));
}
_mm256_storeu_ps(&desc->chF_prach[aarx+(aatx*desc->nb_rx)].x[8*f],chFx_256);
_mm256_storeu_ps(&desc->chF_prach[aarx+(aatx*desc->nb_rx)].y[8*f],chFy_256);
}
}
//if (f<10 || (f>829&&f<839))
// printf("chF_prach[0][%d], (x,y) = (%e,%e)\n",f,desc->chF_prach[0][f].x,desc->chF_prach[0][f].y);
}
stop_meas(&desc->interp_freq_PRACH);
return(0);
}
//#endif //#endif
double compute_pbch_sinr(channel_desc_t *desc, double compute_pbch_sinr(channel_desc_t *desc,
channel_desc_t *desc_i1, channel_desc_t *desc_i1,
......
...@@ -510,11 +510,13 @@ int freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb, int16_t n_sample ...@@ -510,11 +510,13 @@ int freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb, int16_t n_sample
int freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb, int16_t n_samples); int freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb, int16_t n_samples);
int freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb); int freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb); int freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int freq_channel_prach_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int init_freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples); int init_freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples); int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples); int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
int init_freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb); int init_freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int init_freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb); int init_freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int init_freq_channel_prach_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
void sincos_ps(__m128 x, __m128 *s, __m128 *c); void sincos_ps(__m128 x, __m128 *s, __m128 *c);
void sincos256_ps(__m256 x, __m256 *s, __m256 *c); void sincos256_ps(__m256 x, __m256 *s, __m256 *c);
......
...@@ -951,7 +951,7 @@ void multipath_channel_prach_AVX_float(channel_desc_t *desc, ...@@ -951,7 +951,7 @@ void multipath_channel_prach_AVX_float(channel_desc_t *desc,
// do nothing - keep channel // do nothing - keep channel
} else { } else {
random_channel_freq(desc,0); random_channel_freq(desc,0);
freq_channel_prach_SSE_float(desc,nb_rb,n_samples,prach_fmt,n_ra_prb);//Find desc->chF_prach freq_channel_prach_AVX_float(desc,nb_rb,n_samples,prach_fmt,n_ra_prb);//Find desc->chF_prach
} }
for (f=0;f<(length>>3); f++) { for (f=0;f<(length>>3); f++) {
//rx_tmp.x = 0; //rx_tmp.x = 0;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment