Commit 2e2bc97b authored by lfarizav's avatar lfarizav

avx instructions for multipath_channel, box_muller, dac, adc and rf

parent 1a81fb5c
...@@ -327,6 +327,7 @@ int32_t signal_energy_nodc(int32_t *,uint32_t); ...@@ -327,6 +327,7 @@ int32_t signal_energy_nodc(int32_t *,uint32_t);
*/ */
double signal_energy_fp(double *s_re[2], double *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset); double signal_energy_fp(double *s_re[2], double *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset);
float signal_energy_fp_SSE_float(float *s_re[2], float *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset); float signal_energy_fp_SSE_float(float *s_re[2], float *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset);
float signal_energy_fp_AVX_float(float *s_re[2], float *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset);
/*!\fn double signal_energy_fp2(struct complex *, uint32_t); /*!\fn double signal_energy_fp2(struct complex *, uint32_t);
\brief Computes the signal energy per subcarrier \brief Computes the signal energy per subcarrier
*/ */
......
...@@ -349,7 +349,24 @@ float signal_energy_fp_SSE_float(float *s_re[2],float *s_im[2],uint32_t nb_anten ...@@ -349,7 +349,24 @@ float signal_energy_fp_SSE_float(float *s_re[2],float *s_im[2],uint32_t nb_anten
} }
return((V128[0]+V128[1]+V128[2]+V128[3])/length/nb_antennas); return((V128[0]+V128[1]+V128[2]+V128[3])/length/nb_antennas);
} }
float signal_energy_fp_AVX_float(float *s_re[2],float *s_im[2],uint32_t nb_antennas,uint32_t length,uint32_t offset)
{
int32_t aa,i;
__m256 V256, s_re256,s_im256;
V256 = _mm256_setzero_ps();
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_antennas; aa++) {
// V= V + (s_re[aa][i+offset]*s_re[aa][i+offset]) + (s_im[aa][i+offset]*s_im[aa][i+offset]);
s_re256=_mm256_loadu_ps(&s_re[aa][8*i+offset]);
s_im256=_mm256_loadu_ps(&s_im[aa][8*i+offset]);
s_re256=_mm256_mul_ps(s_re256,s_re256);
s_im256=_mm256_mul_ps(s_im256,s_im256);
V256=_mm256_add_ps(V256,_mm256_add_ps(s_re256,s_im256));
}
}
return((V256[0]+V256[1]+V256[2]+V256[3]+V256[4]+V256[5]+V256[6]+V256[7])/length/nb_antennas);
}
double signal_energy_fp2(struct complex *s,uint32_t length) double signal_energy_fp2(struct complex *s,uint32_t length)
{ {
......
...@@ -125,7 +125,40 @@ void adc_SSE_float(float *r_re[2], ...@@ -125,7 +125,40 @@ void adc_SSE_float(float *r_re[2],
} }
} }
} }
void adc_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
unsigned int samples,
unsigned int ofdm_symbol_size)
{
int i;
int aa;
__m256 r_re256,r_im256,gain256;
__m256i r_re256i, r_im256i,output256;
float gain = (float)(1<<(B-1));
gain256=_mm256_set1_ps(gain);
for (i=0; i<(length>>3); i++)
{
for (aa=0; aa<nb_rx_antennas; aa++)
{
r_re256=_mm256_loadu_ps(&r_re[aa][8*i+input_offset]);
r_im256=_mm256_loadu_ps(&r_im[aa][8*i+input_offset]);
r_re256=_mm256_mul_ps(r_re256,gain256);
r_im256=_mm256_mul_ps(r_im256,gain256);
r_re256i=_mm256_cvtps_epi32(r_re256);
r_im256i=_mm256_cvtps_epi32(r_im256);
r_re256i=_mm256_packs_epi32(r_re256i,r_re256i);
r_im256i=_mm256_packs_epi32(r_im256i,r_im256i);
output256=_mm256_unpacklo_epi16(r_re256i,r_im256i);
_mm256_storeu_si256((__m256i *)&output[aa][8*i+output_offset],output256);
}
}
}
void adc_freq(double *r_re[2], void adc_freq(double *r_re[2],
double *r_im[2], double *r_im[2],
unsigned int input_offset, unsigned int input_offset,
...@@ -230,7 +263,43 @@ void adc_prach_SSE_float(float *r_re[2], ...@@ -230,7 +263,43 @@ void adc_prach_SSE_float(float *r_re[2],
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) ); //printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
} }
} }
void adc_prach_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B)
{
int i;
int aa;
__m256 r_re256,r_im256,gain256;
__m256i r_re256i, r_im256i,output256;
float gain = (double)(1<<(B-1));
gain256=_mm256_set1_ps(gain);
//double gain = 1.0;
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_rx_antennas; aa++) {
//((short *)output[aa])[((i+output_offset/2)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
//((short *)output[aa])[1+((i+output_offset/2)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
r_re256=_mm256_loadu_ps(&r_re[aa][8*i+input_offset]);
r_im256=_mm256_loadu_ps(&r_im[aa][8*i+input_offset]);
r_re256=_mm256_mul_ps(r_re256,gain256);
r_im256=_mm256_mul_ps(r_im256,gain256);
r_re256i=_mm256_cvtps_epi32(r_re256);
r_im256i=_mm256_cvtps_epi32(r_im256);
r_re256i=_mm256_packs_epi32(r_re256i,r_re256i);
r_im256i=_mm256_packs_epi32(r_im256i,r_im256i);
output256=_mm256_unpacklo_epi16(r_re256i,r_im256i);
_mm256_storeu_si256((__m256i *)&output[aa][8*i+output_offset/2],output256);
}
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
}
/*void adc_freq(double *r_re[2], /*void adc_freq(double *r_re[2],
double *r_im[2], double *r_im[2],
unsigned int input_offset, unsigned int input_offset,
......
...@@ -222,6 +222,48 @@ double dac_fixed_gain_SSE_float(float *s_re[2], ...@@ -222,6 +222,48 @@ double dac_fixed_gain_SSE_float(float *s_re[2],
return(signal_energy_fp_SSE_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE); return(signal_energy_fp_SSE_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
} }
double dac_fixed_gain_AVX_float(float *s_re[2],
float *s_im[2],
uint32_t **input,
uint32_t input_offset,
uint32_t nb_tx_antennas,
uint32_t length,
uint32_t input_offset_meas,
uint32_t length_meas,
uint8_t B,
float txpwr_dBm,
int NB_RE)
{
int i;
int aa;
float amp,amp1,div;
__m256 input_re256, input_im256;
amp = //sqrt(NB_RE)*pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
amp1 = 0;
for (aa=0; aa<nb_tx_antennas; aa++) {
amp1 += sqrt((float)signal_energy((int32_t*)&input[aa][input_offset_meas],length_meas)/NB_RE);
}
amp1/=nb_tx_antennas;
div=amp/amp1;
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_tx_antennas; aa++) {
input_re256=_mm256_set_ps(((float)(((short *)input[aa]))[(((8*i+7)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+6)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+5)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+4)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+3)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+2)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+1)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i)+input_offset)<<1)]));
input_im256=_mm256_set_ps(((float)(((short *)input[aa]))[(((8*i+7)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+6)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+5)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+4)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+3)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+2)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+1)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i)+input_offset)<<1)+1]));
input_re256=_mm256_mul_ps(input_re256,_mm256_set1_ps(div));
input_im256=_mm256_mul_ps(input_im256,_mm256_set1_ps(div));
_mm256_storeu_ps(&s_re[aa][8*i],input_re256);
_mm256_storeu_ps(&s_im[aa][8*i],input_im256);
}
}
return(signal_energy_fp_AVX_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
}
double dac_fixed_gain_prach(double *s_re[2], double dac_fixed_gain_prach(double *s_re[2],
double *s_im[2], double *s_im[2],
uint32_t *input, uint32_t *input,
...@@ -351,3 +393,68 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2], ...@@ -351,3 +393,68 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
return(signal_energy_fp_SSE_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE); return(signal_energy_fp_SSE_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
} }
float dac_fixed_gain_prach_AVX_float(float *s_re[2],
float *s_im[2],
uint32_t *input,
uint32_t input_offset,
uint32_t nb_tx_antennas,
uint32_t length,
uint32_t input_offset_meas,
uint32_t length_meas,
uint8_t B,
float txpwr_dBm,
int NB_RE,
int ofdm_symbol_size)
{
int i;
int aa;
float amp,amp1,div;
__m256 input_re256, input_im256;
amp = //sqrt(NB_RE)*pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
amp1 = 0;
for (aa=0; aa<nb_tx_antennas; aa++) {
amp1 += sqrt((float)signal_energy_prach((int32_t*)&input[input_offset_meas],length_meas)/NB_RE);
}
amp1/=nb_tx_antennas;
// printf("DAC: amp1 %f dB (%d,%d), tx_power %f\n",20*log10(amp1),input_offset,input_offset_meas,txpwr_dBm);
/*
if (nb_tx_antennas==2)
amp1 = AMP/2;
else if (nb_tx_antennas==4)
amp1 = ((AMP*ONE_OVER_SQRT2_Q15)>>16);
else //assume (nb_tx_antennas==1)
amp1 = ((AMP*ONE_OVER_SQRT2_Q15)>>15);
amp1 = amp1*sqrt(512.0/300.0); //account for loss due to null carriers
//printf("DL: amp1 %f dB (%d,%d), tx_power %f\n",20*log10(amp1),input_offset,input_offset_meas,txpwr_dBm);
*/
div=amp/amp1;
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_tx_antennas; aa++) {
//s_re[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))]); ///(1<<(B-1));
//s_im[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))+1]); ///(1<<(B-1));
input_re256=_mm256_set_ps((float)(((short *)input))[2*(8*i+7)+input_offset],(float)(((short *)input))[2*(8*i+6)+input_offset],(float)(((short *)input))[2*(8*i+5)+input_offset],(float)(((short *)input))[2*(8*i+4)+input_offset],(float)(((short *)input))[2*(8*i+3)+input_offset],(float)(((short *)input))[2*(8*i+2)+input_offset],(float)(((short *)input))[2*(8*i+1)+input_offset],(float)(((short *)input))[2*(8*i)+input_offset]);
input_im256=_mm256_set_ps((float)(((short *)input))[2*(8*i+7)+1+input_offset],(float)(((short *)input))[2*(8*i+6)+1+input_offset],(float)(((short *)input))[2*(8*i+5)+1+input_offset],(float)(((short *)input))[2*(8*i+4)+1+input_offset],(float)(((short *)input))[2*(8*i+3)+1+input_offset],(float)(((short *)input))[2*(8*i+2)+1+input_offset],(float)(((short *)input))[2*(8*i+1)+1+input_offset],(float)(((short *)input))[2*(8*i)+1+input_offset]);
input_re256=_mm256_mul_ps(input_re256,_mm256_set1_ps(div));
input_im256=_mm256_mul_ps(input_im256,_mm256_set1_ps(div));
_mm256_storeu_ps(&s_re[aa][8*i],input_re256);
_mm256_storeu_ps(&s_im[aa][8*i],input_im256);
if (2*i+input_offset==12*2*ofdm_symbol_size)
i=0;
}
}
// printf("ener %e\n",signal_energy_fp(s_re,s_im,nb_tx_antennas,length,0));
return(signal_energy_fp_AVX_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
}
...@@ -81,6 +81,15 @@ void rf_rx_simple_freq_SSE_float(float *r_re[2], ...@@ -81,6 +81,15 @@ void rf_rx_simple_freq_SSE_float(float *r_re[2],
unsigned int symbols_per_tti, unsigned int symbols_per_tti,
unsigned int ofdm_symbol_size, unsigned int ofdm_symbol_size,
unsigned int n_samples); unsigned int n_samples);
void rf_rx_simple_freq_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int nb_rx_antennas,
unsigned int length,
float s_time,
float rx_gain_dB,
unsigned int symbols_per_tti,
unsigned int ofdm_symbol_size,
unsigned int n_samples);
void adc(double *r_re[2], void adc(double *r_re[2],
...@@ -101,6 +110,16 @@ void adc_SSE_float(float *r_re[2], ...@@ -101,6 +110,16 @@ void adc_SSE_float(float *r_re[2],
unsigned char B, unsigned char B,
unsigned int samples, unsigned int samples,
unsigned int ofdm_symbol_size); unsigned int ofdm_symbol_size);
void adc_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
unsigned int samples,
unsigned int ofdm_symbol_size);
void adc_freq(double *r_re[2], void adc_freq(double *r_re[2],
double *r_im[2], double *r_im[2],
unsigned int input_offset, unsigned int input_offset,
...@@ -126,6 +145,14 @@ void adc_prach_SSE_float(float *r_re[2], ...@@ -126,6 +145,14 @@ void adc_prach_SSE_float(float *r_re[2],
unsigned int nb_rx_antennas, unsigned int nb_rx_antennas,
unsigned int length, unsigned int length,
unsigned char B); unsigned char B);
void adc_prach_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B);
void dac(double *s_re[2], void dac(double *s_re[2],
double *s_im[2], double *s_im[2],
...@@ -172,6 +199,17 @@ float dac_fixed_gain_SSE_float(float *s_re[2], ...@@ -172,6 +199,17 @@ float dac_fixed_gain_SSE_float(float *s_re[2],
unsigned char B, unsigned char B,
float gain, float gain,
int NB_RE); int NB_RE);
float dac_fixed_gain_AVX_float(float *s_re[2],
float *s_im[2],
int **input,
unsigned int input_offset,
unsigned int nb_tx_antennas,
unsigned int length,
unsigned int input_offset_meas,
unsigned int length_meas,
unsigned char B,
float gain,
int NB_RE);
double dac_fixed_gain_prach(double *s_re[2], double dac_fixed_gain_prach(double *s_re[2],
double *s_im[2], double *s_im[2],
...@@ -197,3 +235,15 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2], ...@@ -197,3 +235,15 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
float gain, float gain,
int NB_RE, int NB_RE,
int ofdm_symbol_size); int ofdm_symbol_size);
float dac_fixed_gain_prach_AVX_float(float *s_re[2],
float *s_im[2],
int *input,
unsigned int input_offset,
unsigned int nb_tx_antennas,
unsigned int length,
unsigned int input_offset_meas,
unsigned int length_meas,
unsigned char B,
float gain,
int NB_RE,
int ofdm_symbol_size);
...@@ -473,6 +473,109 @@ clock_t start=clock();*/ ...@@ -473,6 +473,109 @@ clock_t start=clock();*/
printf("do_DL_sig time is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start); printf("do_DL_sig time is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start);
sum=(sum+stop-start);*/ sum=(sum+stop-start);*/
} }
void rf_rx_simple_freq_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int nb_rx_antennas,
unsigned int length,
float s_time,
float rx_gain_dB,
unsigned int symbols_per_tti,
unsigned int ofdm_symbol_size,
unsigned int n_samples)
{
/* static int first_run=0;
static double sum;
static int count;
if (!first_run)
{
first_run=1;
sum=0;
count=0;
}
count++;*/
__m256 rx256_re,rx256_im,rx256_gain_lin,gauss_0_256_sqrt_NOW,gauss_1_256_sqrt_NOW;//double
int i,a;
float rx_gain_lin = pow(10.0,.05*rx_gain_dB);
//static float out[4] __attribute__((aligned(16)));
//static float out1[4] __attribute__((aligned(16)));
//double rx_gain_lin = 1.0;
float N0W = pow(10.0,.1*(-174.0 - 10*log10(s_time*1e-9)));
float sqrt_NOW = rx_gain_lin*sqrt(.5*N0W);
//double N0W = 0.0;
// printf("s_time=%f, N0W=%g\n",s_time,10*log10(N0W));
//Loop over input
#ifdef DEBUG_RF
printf("N0W = %f dBm\n",10*log10(N0W));
printf("rx_gain = %f dB(%f)\n",rx_gain_dB,rx_gain_lin);
#endif
//rx128_gain_lin=mm_loadu_pd(rx_gain_lin);
/*count++;
clock_t start=clock();*/
rx256_gain_lin = _mm256_set1_ps(rx_gain_lin);
for (i=0; i<(length>>3); i++) {
for (a=0; a<nb_rx_antennas; a++) {
/*if (i%(ofdm_symbol_size>>2)>(n_samples>>2) && i%(ofdm_symbol_size>>2)<(ofdm_symbol_size>>2)-(n_samples>>2))
{
//printf("i = %d\n",i);
//_mm_storeu_pd(&r_re[a][2*i],_mm_setzero_pd());
//_mm_storeu_pd(&r_im[a][2*i],_mm_setzero_pd());
break;
}
else
{*/
rx256_re = _mm256_loadu_ps(&r_re[a][8*i]);//r_re[a][i],r_re[a][i+1]
rx256_im = _mm256_loadu_ps(&r_im[a][8*i]);//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
//gauss_0_128_sqrt_NOW = _mm_set1_ps(1);
//gauss_1_128_sqrt_NOW = _mm_set1_ps(1);
//gauss_0_128_sqrt_NOW = _mm_set_ps(gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0));
//gauss_0_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_AVX_float(&gauss_0_256_sqrt_NOW, &gauss_1_256_sqrt_NOW);
//gauss_0_256_sqrt_NOW = ziggurat_SSE_float();
//gauss_1_256_sqrt_NOW = ziggurat_SSE_float();
//stop_meas(&desc->ziggurat);
gauss_0_256_sqrt_NOW = _mm256_mul_ps(gauss_0_256_sqrt_NOW,_mm256_set1_ps(sqrt_NOW));
gauss_1_256_sqrt_NOW = _mm256_mul_ps(gauss_1_256_sqrt_NOW,_mm256_set1_ps(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
rx256_re = _mm256_add_ps(_mm256_mul_ps(rx256_re,rx256_gain_lin),gauss_0_256_sqrt_NOW);
rx256_im = _mm256_add_ps(_mm256_mul_ps(rx256_im,rx256_gain_lin),gauss_1_256_sqrt_NOW);
_mm256_storeu_ps(&r_re[a][8*i],rx256_re);
_mm256_storeu_ps(&r_im[a][8*i],rx256_im);
//}
}
}
/*rx128_re = _mm_loadu_ps(&r_re[a][4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im = _mm_loadu_ps(&r_im[a][4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
rx128_re_1 = _mm_loadu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im_1 = _mm_loadu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
//gauss_0_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW_1, &gauss_1_128_sqrt_NOW_1);
//stop_meas(&desc->ziggurat);
gauss_0_128_sqrt_NOW = _mm_mul_ps(gauss_0_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW = _mm_mul_ps(gauss_1_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_0_128_sqrt_NOW_1 = _mm_mul_ps(gauss_0_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW_1 = _mm_mul_ps(gauss_1_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
rx128_re = _mm_add_ps(_mm_mul_ps(rx128_re,rx128_gain_lin),gauss_0_128_sqrt_NOW);
rx128_im = _mm_add_ps(_mm_mul_ps(rx128_im,rx128_gain_lin),gauss_1_128_sqrt_NOW);
rx128_re_1 = _mm_add_ps(_mm_mul_ps(rx128_re_1,rx128_gain_lin),gauss_0_128_sqrt_NOW_1);
rx128_im_1 = _mm_add_ps(_mm_mul_ps(rx128_im_1,rx128_gain_lin),gauss_1_128_sqrt_NOW_1);
_mm_storeu_ps(&r_re[a][4*i+ofdm_symbol_size*j],rx128_re);
_mm_storeu_ps(&r_im[a][4*i+ofdm_symbol_size*j],rx128_im);
_mm_storeu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_re_1);
_mm_storeu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_im_1);*/
/*clock_t stop=clock();
printf("do_DL_sig time is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start);
sum=(sum+stop-start);*/
}
#ifdef RF_MAIN #ifdef RF_MAIN
#define INPUT_dBm -70.0 #define INPUT_dBm -70.0
......
...@@ -153,7 +153,7 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -153,7 +153,7 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
float delay; float delay;
int16_t f; int16_t f;
uint8_t l; uint8_t l;
__m128 cos_lut128,sin_lut128; __m128 cos_lut128,sin_lut128;//,cos_lut128_tmp,sin_lut128_tmp;
/*__m128 x128, log128, exp128; /*__m128 x128, log128, exp128;
__m256 x256, log256, exp256; __m256 x256, log256, exp256;
x128 = _mm_set_ps(1.0,2.0,3.0,4.0); x128 = _mm_set_ps(1.0,2.0,3.0,4.0);
...@@ -214,12 +214,15 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -214,12 +214,15 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
delay = desc->delays[l]; delay = desc->delays[l];
else else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate; delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
//sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128); sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128);
cos_lut128=_mm_set_ps(cos(twopi*(4*f+3)*delay),cos(twopi*(4*f+2)*delay),cos(twopi*(4*f+1)*delay),cos(twopi*(4*f)*delay)); //cos_lut128=_mm_set_ps(cos(twopi*(4*f+3)*delay),cos(twopi*(4*f+2)*delay),cos(twopi*(4*f+1)*delay),cos(twopi*(4*f)*delay));
sin_lut128=_mm_set_ps(sin(twopi*(4*f+3)*delay),sin(twopi*(4*f+2)*delay),sin(twopi*(4*f+1)*delay),sin(twopi*(4*f)*delay)); //sin_lut128=_mm_set_ps(sin(twopi*(4*f+3)*delay),sin(twopi*(4*f+2)*delay),sin(twopi*(4*f+1)*delay),sin(twopi*(4*f)*delay));
_mm_storeu_ps(&cos_lut_f[l][4*f+(n_samples>>1)],cos_lut128); _mm_storeu_ps(&cos_lut_f[l][4*f+(n_samples>>1)],cos_lut128);
_mm_storeu_ps(&sin_lut_f[l][4*f+(n_samples>>1)],sin_lut128); _mm_storeu_ps(&sin_lut_f[l][4*f+(n_samples>>1)],sin_lut128);
/*printf("sin128 %e,%e,%e,%e\n",sin_lut128_tmp[0],sin_lut128_tmp[1],sin_lut128_tmp[2],sin_lut128_tmp[3]);
printf("cos128 %e,%e,%e,%e\n",cos_lut128_tmp[0],cos_lut128_tmp[1],cos_lut128_tmp[2],cos_lut128_tmp[3]);
printf("sin %e,%e,%e,%e\n",sin_lut128[0],sin_lut128[1],sin_lut128[2],sin_lut128[3]);
printf("cos %e,%e,%e,%e\n",cos_lut128[0],cos_lut128[1],cos_lut128[2],cos_lut128[3]);*/
} }
} }
for (l=0; l<(int)desc->nb_taps; l++) for (l=0; l<(int)desc->nb_taps; l++)
...@@ -229,7 +232,7 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -229,7 +232,7 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
//printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]); //printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]);
} }
for (f=1; f<=(n_samples>>3); f++) { for (f=1; f<=(n_samples>>3)+1; f++) {
//count++; //count++;
//freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus //freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
for (l=0; l<(int)desc->nb_taps; l++) { for (l=0; l<(int)desc->nb_taps; l++) {
...@@ -237,8 +240,9 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -237,8 +240,9 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
delay = desc->delays[l]; delay = desc->delays[l];
else else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate; delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
cos_lut128=_mm_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay)); sincos_ps(_mm_set_ps(twopi*(4*f)*delay,twopi*(4*f-1)*delay,twopi*(4*f-2)*delay,twopi*(4*f-3)*delay), &sin_lut128, &cos_lut128);
sin_lut128=_mm_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay)); //cos_lut128=_mm_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay));
//sin_lut128=_mm_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay));
_mm_storeu_ps(&cos_lut_f[l][4*f-3+(n_samples>>1)],cos_lut128); _mm_storeu_ps(&cos_lut_f[l][4*f-3+(n_samples>>1)],cos_lut128);
_mm_storeu_ps(&sin_lut_f[l][4*f-3+(n_samples>>1)],sin_lut128); _mm_storeu_ps(&sin_lut_f[l][4*f-3+(n_samples>>1)],sin_lut128);
} }
...@@ -287,9 +291,9 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -287,9 +291,9 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
else else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate; delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
//sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128); sincos256_ps(_mm256_set_ps(twopi*(4*f+7)*delay,twopi*(4*f+6)*delay,twopi*(4*f+5)*delay,twopi*(4*f+4)*delay,twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut256, &cos_lut256);
cos_lut256=_mm256_set_ps(cos(twopi*(8*f+7)*delay),cos(twopi*(8*f+6)*delay),cos(twopi*(8*f+5)*delay),cos(twopi*(8*f+4)*delay),cos(twopi*(8*f+3)*delay),cos(twopi*(8*f+2)*delay),cos(twopi*(8*f+1)*delay),cos(twopi*(8*f)*delay)); //cos_lut256=_mm256_set_ps(cos(twopi*(8*f+7)*delay),cos(twopi*(8*f+6)*delay),cos(twopi*(8*f+5)*delay),cos(twopi*(8*f+4)*delay),cos(twopi*(8*f+3)*delay),cos(twopi*(8*f+2)*delay),cos(twopi*(8*f+1)*delay),cos(twopi*(8*f)*delay));
sin_lut256=_mm256_set_ps(sin(twopi*(8*f+7)*delay),sin(twopi*(8*f+6)*delay),sin(twopi*(8*f+5)*delay),sin(twopi*(8*f+4)*delay),sin(twopi*(8*f+3)*delay),sin(twopi*(8*f+2)*delay),sin(twopi*(8*f+1)*delay),sin(twopi*(8*f)*delay)); //sin_lut256=_mm256_set_ps(sin(twopi*(8*f+7)*delay),sin(twopi*(8*f+6)*delay),sin(twopi*(8*f+5)*delay),sin(twopi*(8*f+4)*delay),sin(twopi*(8*f+3)*delay),sin(twopi*(8*f+2)*delay),sin(twopi*(8*f+1)*delay),sin(twopi*(8*f)*delay));
_mm256_storeu_ps(&cos_lut_f[l][8*f+(n_samples>>1)],cos_lut256); _mm256_storeu_ps(&cos_lut_f[l][8*f+(n_samples>>1)],cos_lut256);
_mm256_storeu_ps(&sin_lut_f[l][8*f+(n_samples>>1)],sin_lut256); _mm256_storeu_ps(&sin_lut_f[l][8*f+(n_samples>>1)],sin_lut256);
...@@ -302,7 +306,7 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -302,7 +306,7 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
//printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]); //printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]);
} }
for (f=1; f<=(n_samples>>4); f++) { for (f=1; f<=(n_samples>>4)+1; f++) {
//count++; //count++;
//freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus //freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
for (l=0; l<(int)desc->nb_taps; l++) { for (l=0; l<(int)desc->nb_taps; l++) {
...@@ -310,8 +314,9 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa ...@@ -310,8 +314,9 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
delay = desc->delays[l]; delay = desc->delays[l];
else else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate; delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
cos_lut256=_mm256_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay),cos(twopi*(4*f-4)*delay),cos(twopi*(4*f-5)*delay),cos(twopi*(4*f-6)*delay),cos(twopi*(4*f-7)*delay)); sincos256_ps(_mm256_set_ps(twopi*(4*f)*delay,twopi*(4*f-1)*delay,twopi*(4*f-2)*delay,twopi*(4*f-3)*delay,twopi*(4*f-4)*delay,twopi*(4*f-5)*delay,twopi*(4*f-6)*delay,twopi*(4*f-7)*delay), &sin_lut256, &cos_lut256);
sin_lut256=_mm256_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay),sin(twopi*(4*f-4)*delay),sin(twopi*(4*f-5)*delay),sin(twopi*(4*f-6)*delay),sin(twopi*(4*f-7)*delay)); //cos_lut256=_mm256_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay),cos(twopi*(4*f-4)*delay),cos(twopi*(4*f-5)*delay),cos(twopi*(4*f-6)*delay),cos(twopi*(4*f-7)*delay));
//sin_lut256=_mm256_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay),sin(twopi*(4*f-4)*delay),sin(twopi*(4*f-5)*delay),sin(twopi*(4*f-6)*delay),sin(twopi*(4*f-7)*delay));
_mm256_storeu_ps(&cos_lut_f[l][8*f-7+(n_samples>>1)],cos_lut256); _mm256_storeu_ps(&cos_lut_f[l][8*f-7+(n_samples>>1)],cos_lut256);
_mm256_storeu_ps(&sin_lut_f[l][8*f-7+(n_samples>>1)],sin_lut256); _mm256_storeu_ps(&sin_lut_f[l][8*f-7+(n_samples>>1)],sin_lut256);
} }
......
...@@ -371,6 +371,17 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc, ...@@ -371,6 +371,17 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc,
uint8_t eNB_id, uint8_t eNB_id,
uint8_t prach_fmt, uint8_t prach_fmt,
uint8_t n_ra_prb); uint8_t n_ra_prb);
void multipath_channel_prach_AVX_float(channel_desc_t *desc,
float *tx_sig_re[2],
float *tx_sig_im[2],
float *rx_sig_re[2],
float *rx_sig_im[2],
LTE_DL_FRAME_PARMS* const fp,
uint32_t length,
uint8_t keep_channel,
uint8_t eNB_id,
uint8_t prach_fmt,
uint8_t n_ra_prb);
/* /*
\fn double compute_pbch_sinr(channel_desc_t *desc, \fn double compute_pbch_sinr(channel_desc_t *desc,
channel_desc_t *desc_i1, channel_desc_t *desc_i1,
......
...@@ -528,14 +528,14 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc, ...@@ -528,14 +528,14 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
// do nothing - keep channel // do nothing - keep channel
} else { } else {
random_channel_freq(desc,0); random_channel_freq(desc,0);
freq_channel_SSE_float(desc,nb_rb,n_samples);//Find desc->chF freq_channel_AVX_float(desc,nb_rb,n_samples);//Find desc->chF
} }
for (j=0;j<(symbols_per_tti>>2);j++){ /*for (j=0;j<(ofdm_symbol_size>>2);j++){
for (ii=0; ii<desc->nb_rx; ii++) { for (ii=0; ii<desc->nb_rx; ii++) {
_mm_storeu_ps(&rx_sig_re[ii][4*j*ofdm_symbol_size],_mm_setzero_ps()); _mm_storeu_ps(&rx_sig_re[ii][4*j*symbols_per_tti],_mm_setzero_ps());
_mm_storeu_ps(&rx_sig_im[ii][4*j*ofdm_symbol_size],_mm_setzero_ps()); _mm_storeu_ps(&rx_sig_im[ii][4*j*symbols_per_tti],_mm_setzero_ps());
} }
} }*/
for (f=0;f<((ofdm_symbol_size*symbols_per_tti)>>2); f++) {//f2 = 0-1024*14-1 ---- for 10 Mhz BW for (f=0;f<((ofdm_symbol_size*symbols_per_tti)>>2); f++) {//f2 = 0-1024*14-1 ---- for 10 Mhz BW
//printf("f is %d\n",f); //printf("f is %d\n",f);
for (ii=0; ii<desc->nb_rx; ii++) { for (ii=0; ii<desc->nb_rx; ii++) {
...@@ -649,14 +649,14 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc, ...@@ -649,14 +649,14 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
// do nothing - keep channel // do nothing - keep channel
} else { } else {
random_channel_freq(desc,0); random_channel_freq(desc,0);
freq_channel_SSE_float(desc,nb_rb,n_samples);//Find desc->chF freq_channel_AVX_float(desc,nb_rb,n_samples);//Find desc->chF
} }
for (j=0;j<(symbols_per_tti>>2);j++){ /*for (j=0;j<(symbols_per_tti>>2);j++){
for (ii=0; ii<desc->nb_rx; ii++) { for (ii=0; ii<desc->nb_rx; ii++) {
_mm256_storeu_ps(&rx_sig_re[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps()); _mm256_storeu_ps(&rx_sig_re[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps());
_mm256_storeu_ps(&rx_sig_im[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps()); _mm256_storeu_ps(&rx_sig_im[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps());
} }
} }*/
for (f=0;f<((ofdm_symbol_size*symbols_per_tti)>>3); f++) {//f2 = 0-1024*14-1 ---- for 10 Mhz BW for (f=0;f<((ofdm_symbol_size*symbols_per_tti)>>3); f++) {//f2 = 0-1024*14-1 ---- for 10 Mhz BW
//printf("f is %d\n",f); //printf("f is %d\n",f);
for (ii=0; ii<desc->nb_rx; ii++) { for (ii=0; ii<desc->nb_rx; ii++) {
...@@ -664,16 +664,16 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc, ...@@ -664,16 +664,16 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//rx_tmp.y = 0; //rx_tmp.y = 0;
rx_tmp256_re_f = _mm256_setzero_ps(); rx_tmp256_re_f = _mm256_setzero_ps();
rx_tmp256_im_f = _mm256_setzero_ps(); rx_tmp256_im_f = _mm256_setzero_ps();
if (f%(ofdm_symbol_size>>2)<(n_samples>>2))//1-300 if (f%(ofdm_symbol_size>>3)<(n_samples>>3))//1-300
{ {
for (j=0; j<desc->nb_tx; j++) { for (j=0; j<desc->nb_tx; j++) {
//first n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size //first n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y //RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y //RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][(4*f+1)]); tx256_re = _mm256_loadu_ps(&tx_sig_re[j][(8*f+1)]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][(4*f+1)]); tx256_im = _mm256_loadu_ps(&tx_sig_im[j][(8*f+1)]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]); chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[(8*(f%(ofdm_symbol_size>>3)))+(n_samples>>3)]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]); chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[(8*(f%(ofdm_symbol_size>>3)))+(n_samples>>3)]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_re*ch128_x //rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_re*ch128_x
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//-tx128_im*ch128_y // -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//-tx128_im*ch128_y
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_im*ch128_x //rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_im*ch128_x
...@@ -691,10 +691,10 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc, ...@@ -691,10 +691,10 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss; //rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256); rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256); rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][(4*f+1)],rx_tmp256_re_f); _mm256_storeu_ps(&rx_sig_re[ii][(8*f+1)],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][(4*f+1)],rx_tmp256_im_f); _mm256_storeu_ps(&rx_sig_im[ii][(8*f+1)],rx_tmp256_im_f);
} }
else if (f%(ofdm_symbol_size>>2)>(n_samples>>2) && f%(ofdm_symbol_size>>2)<(ofdm_symbol_size>>2)-(n_samples>>2)) else if (f%(ofdm_symbol_size>>3)>(n_samples>>3) && f%(ofdm_symbol_size>>3)<(ofdm_symbol_size>>3)-(n_samples>>3))
{ {
//rx_sig_re[ii][f+k*ofdm_symbol_size] = 0; //rx_sig_re[ii][f+k*ofdm_symbol_size] = 0;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = 0; //rx_sig_im[ii][f+k*ofdm_symbol_size] = 0;
...@@ -708,10 +708,10 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc, ...@@ -708,10 +708,10 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//last n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size //last n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y //RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y //RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][4*f]); tx256_re = _mm256_loadu_ps(&tx_sig_re[j][8*f]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][4*f]); tx256_im = _mm256_loadu_ps(&tx_sig_im[j][8*f]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]); chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[8*(f%(ofdm_symbol_size>>3)-((ofdm_symbol_size>>3)-(n_samples>>3)))]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]); chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[8*(f%(ofdm_symbol_size>>3)-((ofdm_symbol_size>>3)-(n_samples>>3)))]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x) //rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y); // -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x) //rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
...@@ -729,8 +729,8 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc, ...@@ -729,8 +729,8 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss; //rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256); rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256); rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][4*f],rx_tmp256_re_f); _mm256_storeu_ps(&rx_sig_re[ii][8*f],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][4*f],rx_tmp256_im_f); _mm256_storeu_ps(&rx_sig_im[ii][8*f],rx_tmp256_im_f);
} }
} // ii } // ii
} // f,f2,f3 } // f,f2,f3
...@@ -921,6 +921,73 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc, ...@@ -921,6 +921,73 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc,
} // ii } // ii
} // f } // f
} }
void multipath_channel_prach_AVX_float(channel_desc_t *desc,
float *tx_sig_re[2],
float *tx_sig_im[2],
float *rx_sig_re[2],
float *rx_sig_im[2],
LTE_DL_FRAME_PARMS* const fp,
uint32_t length,
uint8_t keep_channel,
uint8_t eNB_id,
uint8_t prach_fmt,
uint8_t n_ra_prb)
{
int ii,j,f;
__m256 rx_tmp256_re_f,rx_tmp256_im_f,rx_tmp256_re,rx_tmp256_im, rx_tmp256_1,rx_tmp256_2,rx_tmp256_3,rx_tmp256_4,tx256_re,tx256_im,chF256_x,chF256_y,pathloss256;
float path_loss = pow(10,desc->path_loss_dB/20);
pathloss256 = _mm256_set1_ps(path_loss);
int nb_rb, n_samples;
nb_rb=fp->N_RB_DL;
n_samples=fp->N_RB_DL*12+1;
#ifdef DEBUG_CH
printf("[CHANNEL_PRACH] keep = %d : path_loss = %g (%f), nb_rx %d, nb_tx %d, len %d \n",keep_channel,path_loss,desc->path_loss_dB,desc->nb_rx,desc->nb_tx,desc->channel_length);
#endif
if (keep_channel) {
// do nothing - keep channel
} else {
random_channel_freq(desc,0);
freq_channel_prach_SSE_float(desc,nb_rb,n_samples,prach_fmt,n_ra_prb);//Find desc->chF_prach
}
for (f=0;f<(length>>3); f++) {
//rx_tmp.x = 0;
//rx_tmp.y = 0;
rx_tmp256_re_f = _mm256_setzero_ps();
rx_tmp256_im_f = _mm256_setzero_ps();
for (ii=0; ii<desc->nb_rx; ii++) {
for (j=0; j<desc->nb_tx; j++) {
//RX_RE(k) = TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) = TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][(8*f)]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][(8*f)]);
chF256_x = _mm256_set1_ps(desc->chF_prach[ii+(j*desc->nb_rx)].x[8*f+(prach_fmt<4)?13:3]);
chF256_y = _mm256_set1_ps(desc->chF_prach[ii+(j*desc->nb_rx)].y[8*f+(prach_fmt<4)?13:3]);
//rx_tmp.x += (tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)-(tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
//rx_tmp.y += (tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)+(tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
rx_tmp256_1 = _mm256_mul_ps(tx256_re,chF256_x);
rx_tmp256_2 = _mm256_mul_ps(tx256_im,chF256_y);
rx_tmp256_3 = _mm256_mul_ps(tx256_im,chF256_x);
rx_tmp256_4 = _mm256_mul_ps(tx256_re,chF256_y);
rx_tmp256_re = _mm256_sub_ps(rx_tmp256_1,rx_tmp256_2);
rx_tmp256_im = _mm256_add_ps(rx_tmp256_3,rx_tmp256_4);
rx_tmp256_re_f = _mm256_add_ps(rx_tmp256_re_f,rx_tmp256_re);
rx_tmp256_im_f = _mm256_add_ps(rx_tmp256_im_f,rx_tmp256_im);
} // j
//printf("[multipath prach] k: %d\n",k/2);
//rx_sig_re[ii][f] = rx_tmp.x*path_loss;
//rx_sig_im[ii][f] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][8*f],rx_tmp256_re_f); // max index: length-dd -1 + dd = length -1
_mm256_storeu_ps(&rx_sig_im[ii][8*f],rx_tmp256_im_f);
} // ii
} // f
}
void multipath_channel_freq_test(channel_desc_t *desc, void multipath_channel_freq_test(channel_desc_t *desc,
double *tx_sig_re[2], double *tx_sig_re[2],
double *tx_sig_im[2], double *tx_sig_im[2],
......
This diff is collapsed.
...@@ -1125,9 +1125,9 @@ int eNB_trx_read(openair0_device *device, openair0_timestamp *ptimestamp, void * ...@@ -1125,9 +1125,9 @@ int eNB_trx_read(openair0_device *device, openair0_timestamp *ptimestamp, void *
for (UE_id=0; UE_id<NB_UE_INST; UE_id++){ for (UE_id=0; UE_id<NB_UE_INST; UE_id++){
if (is_prach_subframe(&PHY_vars_UE_g[UE_id][CC_id]->frame_parms,frame,subframe) && PHY_vars_UE_g[UE_id][CC_id]->generate_prach) if (is_prach_subframe(&PHY_vars_UE_g[UE_id][CC_id]->frame_parms,frame,subframe) && PHY_vars_UE_g[UE_id][CC_id]->generate_prach)
{ {
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
//clock_t start=clock(); //clock_t start=clock();
printf("subframe UL PRACH: %d\n",subframe); printf("subframe UL PRACH: %d\n",subframe);
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
do_UL_sig_freq_prach(UE2eNB, do_UL_sig_freq_prach(UE2eNB,
enb_data, enb_data,
ue_data, ue_data,
...@@ -1137,11 +1137,11 @@ int eNB_trx_read(openair0_device *device, openair0_timestamp *ptimestamp, void * ...@@ -1137,11 +1137,11 @@ int eNB_trx_read(openair0_device *device, openair0_timestamp *ptimestamp, void *
0, // frame is only used for abstraction 0, // frame is only used for abstraction
eNB_id, eNB_id,
CC_id); CC_id);
stop_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
//clock_t stop=clock(); //clock_t stop=clock();
/*printf("do_DL_sig time_prach is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count1*CLOCKS_PER_SEC),count1,sum+stop-start); /*printf("do_DL_sig time_prach is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count1*CLOCKS_PER_SEC),count1,sum+stop-start);
sum=(sum+stop-start); sum=(sum+stop-start);
count1++;*/ count1++;*/
stop_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
//write_output("txprachF.m","prach_txF", PHY_vars_UE_g[0][CC_id]->prach_vars[0]->prachF,12*frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,1,16); //write_output("txprachF.m","prach_txF", PHY_vars_UE_g[0][CC_id]->prach_vars[0]->prachF,12*frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,1,16);
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment