Commit 2e2bc97b authored by lfarizav's avatar lfarizav

avx instructions for multipath_channel, box_muller, dac, adc and rf

parent 1a81fb5c
......@@ -327,6 +327,7 @@ int32_t signal_energy_nodc(int32_t *,uint32_t);
*/
double signal_energy_fp(double *s_re[2], double *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset);
float signal_energy_fp_SSE_float(float *s_re[2], float *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset);
float signal_energy_fp_AVX_float(float *s_re[2], float *s_im[2], uint32_t nb_antennas, uint32_t length,uint32_t offset);
/*!\fn double signal_energy_fp2(struct complex *, uint32_t);
\brief Computes the signal energy per subcarrier
*/
......
......@@ -349,7 +349,24 @@ float signal_energy_fp_SSE_float(float *s_re[2],float *s_im[2],uint32_t nb_anten
}
return((V128[0]+V128[1]+V128[2]+V128[3])/length/nb_antennas);
}
float signal_energy_fp_AVX_float(float *s_re[2],float *s_im[2],uint32_t nb_antennas,uint32_t length,uint32_t offset)
{
int32_t aa,i;
__m256 V256, s_re256,s_im256;
V256 = _mm256_setzero_ps();
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_antennas; aa++) {
// V= V + (s_re[aa][i+offset]*s_re[aa][i+offset]) + (s_im[aa][i+offset]*s_im[aa][i+offset]);
s_re256=_mm256_loadu_ps(&s_re[aa][8*i+offset]);
s_im256=_mm256_loadu_ps(&s_im[aa][8*i+offset]);
s_re256=_mm256_mul_ps(s_re256,s_re256);
s_im256=_mm256_mul_ps(s_im256,s_im256);
V256=_mm256_add_ps(V256,_mm256_add_ps(s_re256,s_im256));
}
}
return((V256[0]+V256[1]+V256[2]+V256[3]+V256[4]+V256[5]+V256[6]+V256[7])/length/nb_antennas);
}
double signal_energy_fp2(struct complex *s,uint32_t length)
{
......
......@@ -125,7 +125,40 @@ void adc_SSE_float(float *r_re[2],
}
}
}
void adc_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
unsigned int samples,
unsigned int ofdm_symbol_size)
{
int i;
int aa;
__m256 r_re256,r_im256,gain256;
__m256i r_re256i, r_im256i,output256;
float gain = (float)(1<<(B-1));
gain256=_mm256_set1_ps(gain);
for (i=0; i<(length>>3); i++)
{
for (aa=0; aa<nb_rx_antennas; aa++)
{
r_re256=_mm256_loadu_ps(&r_re[aa][8*i+input_offset]);
r_im256=_mm256_loadu_ps(&r_im[aa][8*i+input_offset]);
r_re256=_mm256_mul_ps(r_re256,gain256);
r_im256=_mm256_mul_ps(r_im256,gain256);
r_re256i=_mm256_cvtps_epi32(r_re256);
r_im256i=_mm256_cvtps_epi32(r_im256);
r_re256i=_mm256_packs_epi32(r_re256i,r_re256i);
r_im256i=_mm256_packs_epi32(r_im256i,r_im256i);
output256=_mm256_unpacklo_epi16(r_re256i,r_im256i);
_mm256_storeu_si256((__m256i *)&output[aa][8*i+output_offset],output256);
}
}
}
void adc_freq(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
......@@ -230,7 +263,43 @@ void adc_prach_SSE_float(float *r_re[2],
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
}
void adc_prach_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B)
{
int i;
int aa;
__m256 r_re256,r_im256,gain256;
__m256i r_re256i, r_im256i,output256;
float gain = (double)(1<<(B-1));
gain256=_mm256_set1_ps(gain);
//double gain = 1.0;
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_rx_antennas; aa++) {
//((short *)output[aa])[((i+output_offset/2)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
//((short *)output[aa])[1+((i+output_offset/2)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
r_re256=_mm256_loadu_ps(&r_re[aa][8*i+input_offset]);
r_im256=_mm256_loadu_ps(&r_im[aa][8*i+input_offset]);
r_re256=_mm256_mul_ps(r_re256,gain256);
r_im256=_mm256_mul_ps(r_im256,gain256);
r_re256i=_mm256_cvtps_epi32(r_re256);
r_im256i=_mm256_cvtps_epi32(r_im256);
r_re256i=_mm256_packs_epi32(r_re256i,r_re256i);
r_im256i=_mm256_packs_epi32(r_im256i,r_im256i);
output256=_mm256_unpacklo_epi16(r_re256i,r_im256i);
_mm256_storeu_si256((__m256i *)&output[aa][8*i+output_offset/2],output256);
}
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
}
/*void adc_freq(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
......
......@@ -222,6 +222,48 @@ double dac_fixed_gain_SSE_float(float *s_re[2],
return(signal_energy_fp_SSE_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
}
double dac_fixed_gain_AVX_float(float *s_re[2],
float *s_im[2],
uint32_t **input,
uint32_t input_offset,
uint32_t nb_tx_antennas,
uint32_t length,
uint32_t input_offset_meas,
uint32_t length_meas,
uint8_t B,
float txpwr_dBm,
int NB_RE)
{
int i;
int aa;
float amp,amp1,div;
__m256 input_re256, input_im256;
amp = //sqrt(NB_RE)*pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
amp1 = 0;
for (aa=0; aa<nb_tx_antennas; aa++) {
amp1 += sqrt((float)signal_energy((int32_t*)&input[aa][input_offset_meas],length_meas)/NB_RE);
}
amp1/=nb_tx_antennas;
div=amp/amp1;
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_tx_antennas; aa++) {
input_re256=_mm256_set_ps(((float)(((short *)input[aa]))[(((8*i+7)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+6)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+5)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+4)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+3)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+2)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i+1)+input_offset)<<1)]),((float)(((short *)input[aa]))[(((8*i)+input_offset)<<1)]));
input_im256=_mm256_set_ps(((float)(((short *)input[aa]))[(((8*i+7)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+6)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+5)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+4)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+3)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+2)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i+1)+input_offset)<<1)+1]),((float)(((short *)input[aa]))[(((8*i)+input_offset)<<1)+1]));
input_re256=_mm256_mul_ps(input_re256,_mm256_set1_ps(div));
input_im256=_mm256_mul_ps(input_im256,_mm256_set1_ps(div));
_mm256_storeu_ps(&s_re[aa][8*i],input_re256);
_mm256_storeu_ps(&s_im[aa][8*i],input_im256);
}
}
return(signal_energy_fp_AVX_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
}
double dac_fixed_gain_prach(double *s_re[2],
double *s_im[2],
uint32_t *input,
......@@ -351,3 +393,68 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
return(signal_energy_fp_SSE_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
}
float dac_fixed_gain_prach_AVX_float(float *s_re[2],
float *s_im[2],
uint32_t *input,
uint32_t input_offset,
uint32_t nb_tx_antennas,
uint32_t length,
uint32_t input_offset_meas,
uint32_t length_meas,
uint8_t B,
float txpwr_dBm,
int NB_RE,
int ofdm_symbol_size)
{
int i;
int aa;
float amp,amp1,div;
__m256 input_re256, input_im256;
amp = //sqrt(NB_RE)*pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
amp1 = 0;
for (aa=0; aa<nb_tx_antennas; aa++) {
amp1 += sqrt((float)signal_energy_prach((int32_t*)&input[input_offset_meas],length_meas)/NB_RE);
}
amp1/=nb_tx_antennas;
// printf("DAC: amp1 %f dB (%d,%d), tx_power %f\n",20*log10(amp1),input_offset,input_offset_meas,txpwr_dBm);
/*
if (nb_tx_antennas==2)
amp1 = AMP/2;
else if (nb_tx_antennas==4)
amp1 = ((AMP*ONE_OVER_SQRT2_Q15)>>16);
else //assume (nb_tx_antennas==1)
amp1 = ((AMP*ONE_OVER_SQRT2_Q15)>>15);
amp1 = amp1*sqrt(512.0/300.0); //account for loss due to null carriers
//printf("DL: amp1 %f dB (%d,%d), tx_power %f\n",20*log10(amp1),input_offset,input_offset_meas,txpwr_dBm);
*/
div=amp/amp1;
for (i=0; i<(length>>3); i++) {
for (aa=0; aa<nb_tx_antennas; aa++) {
//s_re[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))]); ///(1<<(B-1));
//s_im[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))+1]); ///(1<<(B-1));
input_re256=_mm256_set_ps((float)(((short *)input))[2*(8*i+7)+input_offset],(float)(((short *)input))[2*(8*i+6)+input_offset],(float)(((short *)input))[2*(8*i+5)+input_offset],(float)(((short *)input))[2*(8*i+4)+input_offset],(float)(((short *)input))[2*(8*i+3)+input_offset],(float)(((short *)input))[2*(8*i+2)+input_offset],(float)(((short *)input))[2*(8*i+1)+input_offset],(float)(((short *)input))[2*(8*i)+input_offset]);
input_im256=_mm256_set_ps((float)(((short *)input))[2*(8*i+7)+1+input_offset],(float)(((short *)input))[2*(8*i+6)+1+input_offset],(float)(((short *)input))[2*(8*i+5)+1+input_offset],(float)(((short *)input))[2*(8*i+4)+1+input_offset],(float)(((short *)input))[2*(8*i+3)+1+input_offset],(float)(((short *)input))[2*(8*i+2)+1+input_offset],(float)(((short *)input))[2*(8*i+1)+1+input_offset],(float)(((short *)input))[2*(8*i)+1+input_offset]);
input_re256=_mm256_mul_ps(input_re256,_mm256_set1_ps(div));
input_im256=_mm256_mul_ps(input_im256,_mm256_set1_ps(div));
_mm256_storeu_ps(&s_re[aa][8*i],input_re256);
_mm256_storeu_ps(&s_im[aa][8*i],input_im256);
if (2*i+input_offset==12*2*ofdm_symbol_size)
i=0;
}
}
// printf("ener %e\n",signal_energy_fp(s_re,s_im,nb_tx_antennas,length,0));
return(signal_energy_fp_AVX_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
}
......@@ -81,6 +81,15 @@ void rf_rx_simple_freq_SSE_float(float *r_re[2],
unsigned int symbols_per_tti,
unsigned int ofdm_symbol_size,
unsigned int n_samples);
void rf_rx_simple_freq_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int nb_rx_antennas,
unsigned int length,
float s_time,
float rx_gain_dB,
unsigned int symbols_per_tti,
unsigned int ofdm_symbol_size,
unsigned int n_samples);
void adc(double *r_re[2],
......@@ -101,6 +110,16 @@ void adc_SSE_float(float *r_re[2],
unsigned char B,
unsigned int samples,
unsigned int ofdm_symbol_size);
void adc_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
unsigned int samples,
unsigned int ofdm_symbol_size);
void adc_freq(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
......@@ -126,6 +145,14 @@ void adc_prach_SSE_float(float *r_re[2],
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B);
void adc_prach_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
int **output,
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B);
void dac(double *s_re[2],
double *s_im[2],
......@@ -172,6 +199,17 @@ float dac_fixed_gain_SSE_float(float *s_re[2],
unsigned char B,
float gain,
int NB_RE);
float dac_fixed_gain_AVX_float(float *s_re[2],
float *s_im[2],
int **input,
unsigned int input_offset,
unsigned int nb_tx_antennas,
unsigned int length,
unsigned int input_offset_meas,
unsigned int length_meas,
unsigned char B,
float gain,
int NB_RE);
double dac_fixed_gain_prach(double *s_re[2],
double *s_im[2],
......@@ -197,3 +235,15 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
float gain,
int NB_RE,
int ofdm_symbol_size);
float dac_fixed_gain_prach_AVX_float(float *s_re[2],
float *s_im[2],
int *input,
unsigned int input_offset,
unsigned int nb_tx_antennas,
unsigned int length,
unsigned int input_offset_meas,
unsigned int length_meas,
unsigned char B,
float gain,
int NB_RE,
int ofdm_symbol_size);
......@@ -473,6 +473,109 @@ clock_t start=clock();*/
printf("do_DL_sig time is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start);
sum=(sum+stop-start);*/
}
void rf_rx_simple_freq_AVX_float(float *r_re[2],
float *r_im[2],
unsigned int nb_rx_antennas,
unsigned int length,
float s_time,
float rx_gain_dB,
unsigned int symbols_per_tti,
unsigned int ofdm_symbol_size,
unsigned int n_samples)
{
/* static int first_run=0;
static double sum;
static int count;
if (!first_run)
{
first_run=1;
sum=0;
count=0;
}
count++;*/
__m256 rx256_re,rx256_im,rx256_gain_lin,gauss_0_256_sqrt_NOW,gauss_1_256_sqrt_NOW;//double
int i,a;
float rx_gain_lin = pow(10.0,.05*rx_gain_dB);
//static float out[4] __attribute__((aligned(16)));
//static float out1[4] __attribute__((aligned(16)));
//double rx_gain_lin = 1.0;
float N0W = pow(10.0,.1*(-174.0 - 10*log10(s_time*1e-9)));
float sqrt_NOW = rx_gain_lin*sqrt(.5*N0W);
//double N0W = 0.0;
// printf("s_time=%f, N0W=%g\n",s_time,10*log10(N0W));
//Loop over input
#ifdef DEBUG_RF
printf("N0W = %f dBm\n",10*log10(N0W));
printf("rx_gain = %f dB(%f)\n",rx_gain_dB,rx_gain_lin);
#endif
//rx128_gain_lin=mm_loadu_pd(rx_gain_lin);
/*count++;
clock_t start=clock();*/
rx256_gain_lin = _mm256_set1_ps(rx_gain_lin);
for (i=0; i<(length>>3); i++) {
for (a=0; a<nb_rx_antennas; a++) {
/*if (i%(ofdm_symbol_size>>2)>(n_samples>>2) && i%(ofdm_symbol_size>>2)<(ofdm_symbol_size>>2)-(n_samples>>2))
{
//printf("i = %d\n",i);
//_mm_storeu_pd(&r_re[a][2*i],_mm_setzero_pd());
//_mm_storeu_pd(&r_im[a][2*i],_mm_setzero_pd());
break;
}
else
{*/
rx256_re = _mm256_loadu_ps(&r_re[a][8*i]);//r_re[a][i],r_re[a][i+1]
rx256_im = _mm256_loadu_ps(&r_im[a][8*i]);//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
//gauss_0_128_sqrt_NOW = _mm_set1_ps(1);
//gauss_1_128_sqrt_NOW = _mm_set1_ps(1);
//gauss_0_128_sqrt_NOW = _mm_set_ps(gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0));
//gauss_0_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_AVX_float(&gauss_0_256_sqrt_NOW, &gauss_1_256_sqrt_NOW);
//gauss_0_256_sqrt_NOW = ziggurat_SSE_float();
//gauss_1_256_sqrt_NOW = ziggurat_SSE_float();
//stop_meas(&desc->ziggurat);
gauss_0_256_sqrt_NOW = _mm256_mul_ps(gauss_0_256_sqrt_NOW,_mm256_set1_ps(sqrt_NOW));
gauss_1_256_sqrt_NOW = _mm256_mul_ps(gauss_1_256_sqrt_NOW,_mm256_set1_ps(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
rx256_re = _mm256_add_ps(_mm256_mul_ps(rx256_re,rx256_gain_lin),gauss_0_256_sqrt_NOW);
rx256_im = _mm256_add_ps(_mm256_mul_ps(rx256_im,rx256_gain_lin),gauss_1_256_sqrt_NOW);
_mm256_storeu_ps(&r_re[a][8*i],rx256_re);
_mm256_storeu_ps(&r_im[a][8*i],rx256_im);
//}
}
}
/*rx128_re = _mm_loadu_ps(&r_re[a][4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im = _mm_loadu_ps(&r_im[a][4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
rx128_re_1 = _mm_loadu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im_1 = _mm_loadu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
//gauss_0_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW_1, &gauss_1_128_sqrt_NOW_1);
//stop_meas(&desc->ziggurat);
gauss_0_128_sqrt_NOW = _mm_mul_ps(gauss_0_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW = _mm_mul_ps(gauss_1_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_0_128_sqrt_NOW_1 = _mm_mul_ps(gauss_0_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW_1 = _mm_mul_ps(gauss_1_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
rx128_re = _mm_add_ps(_mm_mul_ps(rx128_re,rx128_gain_lin),gauss_0_128_sqrt_NOW);
rx128_im = _mm_add_ps(_mm_mul_ps(rx128_im,rx128_gain_lin),gauss_1_128_sqrt_NOW);
rx128_re_1 = _mm_add_ps(_mm_mul_ps(rx128_re_1,rx128_gain_lin),gauss_0_128_sqrt_NOW_1);
rx128_im_1 = _mm_add_ps(_mm_mul_ps(rx128_im_1,rx128_gain_lin),gauss_1_128_sqrt_NOW_1);
_mm_storeu_ps(&r_re[a][4*i+ofdm_symbol_size*j],rx128_re);
_mm_storeu_ps(&r_im[a][4*i+ofdm_symbol_size*j],rx128_im);
_mm_storeu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_re_1);
_mm_storeu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_im_1);*/
/*clock_t stop=clock();
printf("do_DL_sig time is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start);
sum=(sum+stop-start);*/
}
#ifdef RF_MAIN
#define INPUT_dBm -70.0
......
......@@ -153,7 +153,7 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
float delay;
int16_t f;
uint8_t l;
__m128 cos_lut128,sin_lut128;
__m128 cos_lut128,sin_lut128;//,cos_lut128_tmp,sin_lut128_tmp;
/*__m128 x128, log128, exp128;
__m256 x256, log256, exp256;
x128 = _mm_set_ps(1.0,2.0,3.0,4.0);
......@@ -214,12 +214,15 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
delay = desc->delays[l];
else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
//sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128);
cos_lut128=_mm_set_ps(cos(twopi*(4*f+3)*delay),cos(twopi*(4*f+2)*delay),cos(twopi*(4*f+1)*delay),cos(twopi*(4*f)*delay));
sin_lut128=_mm_set_ps(sin(twopi*(4*f+3)*delay),sin(twopi*(4*f+2)*delay),sin(twopi*(4*f+1)*delay),sin(twopi*(4*f)*delay));
sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128);
//cos_lut128=_mm_set_ps(cos(twopi*(4*f+3)*delay),cos(twopi*(4*f+2)*delay),cos(twopi*(4*f+1)*delay),cos(twopi*(4*f)*delay));
//sin_lut128=_mm_set_ps(sin(twopi*(4*f+3)*delay),sin(twopi*(4*f+2)*delay),sin(twopi*(4*f+1)*delay),sin(twopi*(4*f)*delay));
_mm_storeu_ps(&cos_lut_f[l][4*f+(n_samples>>1)],cos_lut128);
_mm_storeu_ps(&sin_lut_f[l][4*f+(n_samples>>1)],sin_lut128);
/*printf("sin128 %e,%e,%e,%e\n",sin_lut128_tmp[0],sin_lut128_tmp[1],sin_lut128_tmp[2],sin_lut128_tmp[3]);
printf("cos128 %e,%e,%e,%e\n",cos_lut128_tmp[0],cos_lut128_tmp[1],cos_lut128_tmp[2],cos_lut128_tmp[3]);
printf("sin %e,%e,%e,%e\n",sin_lut128[0],sin_lut128[1],sin_lut128[2],sin_lut128[3]);
printf("cos %e,%e,%e,%e\n",cos_lut128[0],cos_lut128[1],cos_lut128[2],cos_lut128[3]);*/
}
}
for (l=0; l<(int)desc->nb_taps; l++)
......@@ -229,7 +232,7 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
//printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]);
}
for (f=1; f<=(n_samples>>3); f++) {
for (f=1; f<=(n_samples>>3)+1; f++) {
//count++;
//freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
for (l=0; l<(int)desc->nb_taps; l++) {
......@@ -237,8 +240,9 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
delay = desc->delays[l];
else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
cos_lut128=_mm_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay));
sin_lut128=_mm_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay));
sincos_ps(_mm_set_ps(twopi*(4*f)*delay,twopi*(4*f-1)*delay,twopi*(4*f-2)*delay,twopi*(4*f-3)*delay), &sin_lut128, &cos_lut128);
//cos_lut128=_mm_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay));
//sin_lut128=_mm_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay));
_mm_storeu_ps(&cos_lut_f[l][4*f-3+(n_samples>>1)],cos_lut128);
_mm_storeu_ps(&sin_lut_f[l][4*f-3+(n_samples>>1)],sin_lut128);
}
......@@ -287,9 +291,9 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
//sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128);
cos_lut256=_mm256_set_ps(cos(twopi*(8*f+7)*delay),cos(twopi*(8*f+6)*delay),cos(twopi*(8*f+5)*delay),cos(twopi*(8*f+4)*delay),cos(twopi*(8*f+3)*delay),cos(twopi*(8*f+2)*delay),cos(twopi*(8*f+1)*delay),cos(twopi*(8*f)*delay));
sin_lut256=_mm256_set_ps(sin(twopi*(8*f+7)*delay),sin(twopi*(8*f+6)*delay),sin(twopi*(8*f+5)*delay),sin(twopi*(8*f+4)*delay),sin(twopi*(8*f+3)*delay),sin(twopi*(8*f+2)*delay),sin(twopi*(8*f+1)*delay),sin(twopi*(8*f)*delay));
sincos256_ps(_mm256_set_ps(twopi*(4*f+7)*delay,twopi*(4*f+6)*delay,twopi*(4*f+5)*delay,twopi*(4*f+4)*delay,twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut256, &cos_lut256);
//cos_lut256=_mm256_set_ps(cos(twopi*(8*f+7)*delay),cos(twopi*(8*f+6)*delay),cos(twopi*(8*f+5)*delay),cos(twopi*(8*f+4)*delay),cos(twopi*(8*f+3)*delay),cos(twopi*(8*f+2)*delay),cos(twopi*(8*f+1)*delay),cos(twopi*(8*f)*delay));
//sin_lut256=_mm256_set_ps(sin(twopi*(8*f+7)*delay),sin(twopi*(8*f+6)*delay),sin(twopi*(8*f+5)*delay),sin(twopi*(8*f+4)*delay),sin(twopi*(8*f+3)*delay),sin(twopi*(8*f+2)*delay),sin(twopi*(8*f+1)*delay),sin(twopi*(8*f)*delay));
_mm256_storeu_ps(&cos_lut_f[l][8*f+(n_samples>>1)],cos_lut256);
_mm256_storeu_ps(&sin_lut_f[l][8*f+(n_samples>>1)],sin_lut256);
......@@ -302,7 +306,7 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
//printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]);
}
for (f=1; f<=(n_samples>>4); f++) {
for (f=1; f<=(n_samples>>4)+1; f++) {
//count++;
//freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
for (l=0; l<(int)desc->nb_taps; l++) {
......@@ -310,8 +314,9 @@ int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
delay = desc->delays[l];
else
delay = desc->delays[l]+NB_SAMPLES_CHANNEL_OFFSET/desc->sampling_rate;
cos_lut256=_mm256_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay),cos(twopi*(4*f-4)*delay),cos(twopi*(4*f-5)*delay),cos(twopi*(4*f-6)*delay),cos(twopi*(4*f-7)*delay));
sin_lut256=_mm256_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay),sin(twopi*(4*f-4)*delay),sin(twopi*(4*f-5)*delay),sin(twopi*(4*f-6)*delay),sin(twopi*(4*f-7)*delay));
sincos256_ps(_mm256_set_ps(twopi*(4*f)*delay,twopi*(4*f-1)*delay,twopi*(4*f-2)*delay,twopi*(4*f-3)*delay,twopi*(4*f-4)*delay,twopi*(4*f-5)*delay,twopi*(4*f-6)*delay,twopi*(4*f-7)*delay), &sin_lut256, &cos_lut256);
//cos_lut256=_mm256_set_ps(cos(twopi*(4*f)*delay),cos(twopi*(4*f-1)*delay),cos(twopi*(4*f-2)*delay),cos(twopi*(4*f-3)*delay),cos(twopi*(4*f-4)*delay),cos(twopi*(4*f-5)*delay),cos(twopi*(4*f-6)*delay),cos(twopi*(4*f-7)*delay));
//sin_lut256=_mm256_set_ps(sin(twopi*(4*f)*delay),sin(twopi*(4*f-1)*delay),sin(twopi*(4*f-2)*delay),sin(twopi*(4*f-3)*delay),sin(twopi*(4*f-4)*delay),sin(twopi*(4*f-5)*delay),sin(twopi*(4*f-6)*delay),sin(twopi*(4*f-7)*delay));
_mm256_storeu_ps(&cos_lut_f[l][8*f-7+(n_samples>>1)],cos_lut256);
_mm256_storeu_ps(&sin_lut_f[l][8*f-7+(n_samples>>1)],sin_lut256);
}
......
......@@ -371,6 +371,17 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc,
uint8_t eNB_id,
uint8_t prach_fmt,
uint8_t n_ra_prb);
void multipath_channel_prach_AVX_float(channel_desc_t *desc,
float *tx_sig_re[2],
float *tx_sig_im[2],
float *rx_sig_re[2],
float *rx_sig_im[2],
LTE_DL_FRAME_PARMS* const fp,
uint32_t length,
uint8_t keep_channel,
uint8_t eNB_id,
uint8_t prach_fmt,
uint8_t n_ra_prb);
/*
\fn double compute_pbch_sinr(channel_desc_t *desc,
channel_desc_t *desc_i1,
......
......@@ -528,14 +528,14 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
// do nothing - keep channel
} else {
random_channel_freq(desc,0);
freq_channel_SSE_float(desc,nb_rb,n_samples);//Find desc->chF
freq_channel_AVX_float(desc,nb_rb,n_samples);//Find desc->chF
}
for (j=0;j<(symbols_per_tti>>2);j++){
/*for (j=0;j<(ofdm_symbol_size>>2);j++){
for (ii=0; ii<desc->nb_rx; ii++) {
_mm_storeu_ps(&rx_sig_re[ii][4*j*ofdm_symbol_size],_mm_setzero_ps());
_mm_storeu_ps(&rx_sig_im[ii][4*j*ofdm_symbol_size],_mm_setzero_ps());
_mm_storeu_ps(&rx_sig_re[ii][4*j*symbols_per_tti],_mm_setzero_ps());
_mm_storeu_ps(&rx_sig_im[ii][4*j*symbols_per_tti],_mm_setzero_ps());
}
}
}*/
for (f=0;f<((ofdm_symbol_size*symbols_per_tti)>>2); f++) {//f2 = 0-1024*14-1 ---- for 10 Mhz BW
//printf("f is %d\n",f);
for (ii=0; ii<desc->nb_rx; ii++) {
......@@ -649,14 +649,14 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
// do nothing - keep channel
} else {
random_channel_freq(desc,0);
freq_channel_SSE_float(desc,nb_rb,n_samples);//Find desc->chF
freq_channel_AVX_float(desc,nb_rb,n_samples);//Find desc->chF
}
for (j=0;j<(symbols_per_tti>>2);j++){
/*for (j=0;j<(symbols_per_tti>>2);j++){
for (ii=0; ii<desc->nb_rx; ii++) {
_mm256_storeu_ps(&rx_sig_re[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps());
_mm256_storeu_ps(&rx_sig_im[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps());
}
}
}*/
for (f=0;f<((ofdm_symbol_size*symbols_per_tti)>>3); f++) {//f2 = 0-1024*14-1 ---- for 10 Mhz BW
//printf("f is %d\n",f);
for (ii=0; ii<desc->nb_rx; ii++) {
......@@ -664,16 +664,16 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//rx_tmp.y = 0;
rx_tmp256_re_f = _mm256_setzero_ps();
rx_tmp256_im_f = _mm256_setzero_ps();
if (f%(ofdm_symbol_size>>2)<(n_samples>>2))//1-300
if (f%(ofdm_symbol_size>>3)<(n_samples>>3))//1-300
{
for (j=0; j<desc->nb_tx; j++) {
//first n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][(4*f+1)]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][(4*f+1)]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]);
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][(8*f+1)]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][(8*f+1)]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[(8*(f%(ofdm_symbol_size>>3)))+(n_samples>>3)]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[(8*(f%(ofdm_symbol_size>>3)))+(n_samples>>3)]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_re*ch128_x
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//-tx128_im*ch128_y
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_im*ch128_x
......@@ -691,10 +691,10 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][(4*f+1)],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][(4*f+1)],rx_tmp256_im_f);
_mm256_storeu_ps(&rx_sig_re[ii][(8*f+1)],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][(8*f+1)],rx_tmp256_im_f);
}
else if (f%(ofdm_symbol_size>>2)>(n_samples>>2) && f%(ofdm_symbol_size>>2)<(ofdm_symbol_size>>2)-(n_samples>>2))
else if (f%(ofdm_symbol_size>>3)>(n_samples>>3) && f%(ofdm_symbol_size>>3)<(ofdm_symbol_size>>3)-(n_samples>>3))
{
//rx_sig_re[ii][f+k*ofdm_symbol_size] = 0;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = 0;
......@@ -708,10 +708,10 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//last n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][4*f]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][4*f]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]);
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][8*f]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][8*f]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[8*(f%(ofdm_symbol_size>>3)-((ofdm_symbol_size>>3)-(n_samples>>3)))]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[8*(f%(ofdm_symbol_size>>3)-((ofdm_symbol_size>>3)-(n_samples>>3)))]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
......@@ -729,8 +729,8 @@ void multipath_channel_freq_AVX_float(channel_desc_t *desc,
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][4*f],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][4*f],rx_tmp256_im_f);
_mm256_storeu_ps(&rx_sig_re[ii][8*f],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][8*f],rx_tmp256_im_f);
}
} // ii
} // f,f2,f3
......@@ -921,6 +921,73 @@ void multipath_channel_prach_SSE_float(channel_desc_t *desc,
} // ii
} // f
}
void multipath_channel_prach_AVX_float(channel_desc_t *desc,
float *tx_sig_re[2],
float *tx_sig_im[2],
float *rx_sig_re[2],
float *rx_sig_im[2],
LTE_DL_FRAME_PARMS* const fp,
uint32_t length,
uint8_t keep_channel,
uint8_t eNB_id,
uint8_t prach_fmt,
uint8_t n_ra_prb)
{
int ii,j,f;
__m256 rx_tmp256_re_f,rx_tmp256_im_f,rx_tmp256_re,rx_tmp256_im, rx_tmp256_1,rx_tmp256_2,rx_tmp256_3,rx_tmp256_4,tx256_re,tx256_im,chF256_x,chF256_y,pathloss256;
float path_loss = pow(10,desc->path_loss_dB/20);
pathloss256 = _mm256_set1_ps(path_loss);
int nb_rb, n_samples;
nb_rb=fp->N_RB_DL;
n_samples=fp->N_RB_DL*12+1;
#ifdef DEBUG_CH
printf("[CHANNEL_PRACH] keep = %d : path_loss = %g (%f), nb_rx %d, nb_tx %d, len %d \n",keep_channel,path_loss,desc->path_loss_dB,desc->nb_rx,desc->nb_tx,desc->channel_length);
#endif
if (keep_channel) {
// do nothing - keep channel
} else {
random_channel_freq(desc,0);
freq_channel_prach_SSE_float(desc,nb_rb,n_samples,prach_fmt,n_ra_prb);//Find desc->chF_prach
}
for (f=0;f<(length>>3); f++) {
//rx_tmp.x = 0;
//rx_tmp.y = 0;
rx_tmp256_re_f = _mm256_setzero_ps();
rx_tmp256_im_f = _mm256_setzero_ps();
for (ii=0; ii<desc->nb_rx; ii++) {
for (j=0; j<desc->nb_tx; j++) {
//RX_RE(k) = TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) = TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][(8*f)]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][(8*f)]);
chF256_x = _mm256_set1_ps(desc->chF_prach[ii+(j*desc->nb_rx)].x[8*f+(prach_fmt<4)?13:3]);
chF256_y = _mm256_set1_ps(desc->chF_prach[ii+(j*desc->nb_rx)].y[8*f+(prach_fmt<4)?13:3]);
//rx_tmp.x += (tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)-(tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
//rx_tmp.y += (tx_sig_im[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].x)+(tx_sig_re[ii][f] * desc->chF_prach[ii+(j*desc->nb_rx)][f+(prach_fmt<4)?13:3].y);
rx_tmp256_1 = _mm256_mul_ps(tx256_re,chF256_x);
rx_tmp256_2 = _mm256_mul_ps(tx256_im,chF256_y);
rx_tmp256_3 = _mm256_mul_ps(tx256_im,chF256_x);
rx_tmp256_4 = _mm256_mul_ps(tx256_re,chF256_y);
rx_tmp256_re = _mm256_sub_ps(rx_tmp256_1,rx_tmp256_2);
rx_tmp256_im = _mm256_add_ps(rx_tmp256_3,rx_tmp256_4);
rx_tmp256_re_f = _mm256_add_ps(rx_tmp256_re_f,rx_tmp256_re);
rx_tmp256_im_f = _mm256_add_ps(rx_tmp256_im_f,rx_tmp256_im);
} // j
//printf("[multipath prach] k: %d\n",k/2);
//rx_sig_re[ii][f] = rx_tmp.x*path_loss;
//rx_sig_im[ii][f] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][8*f],rx_tmp256_re_f); // max index: length-dd -1 + dd = length -1
_mm256_storeu_ps(&rx_sig_im[ii][8*f],rx_tmp256_im_f);
} // ii
} // f
}
void multipath_channel_freq_test(channel_desc_t *desc,
double *tx_sig_re[2],
double *tx_sig_im[2],
......
......@@ -499,7 +499,7 @@ void do_DL_sig_freq(channel_desc_t *eNB2UE[NUMBER_OF_eNB_MAX][NUMBER_OF_UE_MAX][
start_meas(&eNB2UE[eNB_id][UE_id][CC_id]->DL_dac_fixed_gain);
#ifdef SSE_float
tx_pwr = dac_fixed_gain_SSE_float(s_re_f,
tx_pwr = dac_fixed_gain_AVX_float(s_re_f,
s_im_f,
txdataF,
sf_offset,
......@@ -542,7 +542,7 @@ void do_DL_sig_freq(channel_desc_t *eNB2UE[NUMBER_OF_eNB_MAX][NUMBER_OF_UE_MAX][
//clock_t start=clock();
start_meas(&eNB2UE[eNB_id][UE_id][CC_id]->DL_multipath_channel_freq);
#ifdef SSE_float
multipath_channel_freq_SSE_float(eNB2UE[eNB_id][UE_id][CC_id],s_re_f,s_im_f,r_re0_f,r_im0_f,
multipath_channel_freq_AVX_float(eNB2UE[eNB_id][UE_id][CC_id],s_re_f,s_im_f,r_re0_f,r_im0_f,
frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,hold_channel,eNB_id,UE_id,CC_id,subframe&0x1);
#else
multipath_channel_freq(eNB2UE[eNB_id][UE_id][CC_id],s_re_f,s_im_f,r_re0_f,r_im0_f,
......@@ -609,7 +609,7 @@ void do_DL_sig_freq(channel_desc_t *eNB2UE[NUMBER_OF_eNB_MAX][NUMBER_OF_UE_MAX][
clock_t start=clock();*/
start_meas(&eNB2UE[eNB_id][UE_id][CC_id]->DL_rf_rx_simple_freq);
#ifdef SSE_float
rf_rx_simple_freq_SSE_float(r_re0_f,
rf_rx_simple_freq_AVX_float(r_re0_f,
r_im0_f,
nb_antennas_rx,
frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,
......@@ -682,7 +682,7 @@ void do_DL_sig_freq(channel_desc_t *eNB2UE[NUMBER_OF_eNB_MAX][NUMBER_OF_UE_MAX][
//printf("[ch_sim] sf_offset %d\n",sf_offset);
start_meas(&eNB2UE[eNB_id][UE_id][CC_id]->DL_adc);
#ifdef SSE_float
adc_SSE_float(r_re_p_f,
adc_AVX_float(r_re_p_f,
r_im_p_f,
0,
sf_offset,
......@@ -1074,7 +1074,7 @@ void do_UL_sig_freq(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB_MAX][
} else {
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_dac_fixed_gain);
#ifdef SSE_float
tx_pwr = dac_fixed_gain_SSE_float((float**)s_re_f,
tx_pwr = dac_fixed_gain_AVX_float((float**)s_re_f,
(float**)s_im_f,
txdataF,
sf_offset,
......@@ -1113,7 +1113,7 @@ void do_UL_sig_freq(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB_MAX][
//write_output("chsim_s_re_f_UL.m","chsm_sref_UL", s_re_f,10*frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,1,16);
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_multipath_channel_freq);
#ifdef SSE_float
multipath_channel_freq_SSE_float(UE2eNB[UE_id][eNB_id][CC_id],s_re_f,s_im_f,r_re0_f,r_im0_f,
multipath_channel_freq_AVX_float(UE2eNB[UE_id][eNB_id][CC_id],s_re_f,s_im_f,r_re0_f,r_im0_f,
frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,hold_channel,eNB_id,UE_id,CC_id,subframe&0x1);//ue timer subframe&0x1
#else
multipath_channel_freq(UE2eNB[UE_id][eNB_id][CC_id],s_re_f,s_im_f,r_re0_f,r_im0_f,
......@@ -1175,7 +1175,7 @@ void do_UL_sig_freq(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB_MAX][
#endif
start_meas(&UE2eNB[0][eNB_id][CC_id]->UL_rf_rx_simple_freq);
#ifdef SSE_float
rf_rx_simple_freq_SSE_float(r_re_p_f,
rf_rx_simple_freq_AVX_float(r_re_p_f,
r_im_p_f,
nb_antennas_rx,
frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,
......@@ -1210,7 +1210,7 @@ void do_UL_sig_freq(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB_MAX][
sf_offset = 0;//subframe*frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti;
start_meas(&UE2eNB[0][eNB_id][CC_id]->UL_adc);
#ifdef SSE_float
adc_SSE_float(r_re_p_f,
adc_AVX_float(r_re_p_f,
r_im_p_f,
sf_offset,
sf_offset,
......@@ -1385,9 +1385,10 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
//printf("multipath_channel_prach, UE too weak %e\n", ((double)PHY_vars_UE_g[UE_id][CC_id]->tx_power_dBm[subframe] +
//UE2eNB[UE_id][eNB_id][CC_id]->path_loss_dB));
} else {
start_meas(&UE2eNB[0][eNB_id][CC_id]->dac_fixed_gain_PRACH);
#ifdef SSE_float
tx_pwr = dac_fixed_gain_prach_SSE_float((float**)s_re_f_prach,
start_meas(&UE2eNB[0][eNB_id][CC_id]->dac_fixed_gain_PRACH);
tx_pwr = dac_fixed_gain_prach_AVX_float((float**)s_re_f_prach,
(float**)s_im_f_prach,
(int *)tx_prachF,
pointer_firstvalue_PRACH,
......@@ -1399,7 +1400,9 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
(float)PHY_vars_UE_g[UE_id][CC_id]->tx_power_dBm[subframe]-10*log10((double)PHY_vars_UE_g[UE_id][CC_id]->tx_total_RE[subframe]),
PHY_vars_UE_g[UE_id][CC_id]->tx_total_RE[subframe],
PHY_vars_UE_g[UE_id][CC_id]->frame_parms.ofdm_symbol_size); // This make the previous argument the total power
stop_meas(&UE2eNB[0][eNB_id][CC_id]->dac_fixed_gain_PRACH);
#else
start_meas(&UE2eNB[0][eNB_id][CC_id]->dac_fixed_gain_PRACH);
tx_pwr = dac_fixed_gain_prach((double**)s_re_f_prach,
(double**)s_im_f_prach,
(int *)tx_prachF,
......@@ -1412,8 +1415,9 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
(double)PHY_vars_UE_g[UE_id][CC_id]->tx_power_dBm[subframe]-10*log10((double)PHY_vars_UE_g[UE_id][CC_id]->tx_total_RE[subframe]),
PHY_vars_UE_g[UE_id][CC_id]->tx_total_RE[subframe],
PHY_vars_UE_g[UE_id][CC_id]->frame_parms.ofdm_symbol_size); // This make the previous argument the total power
stop_meas(&UE2eNB[0][eNB_id][CC_id]->dac_fixed_gain_PRACH);
#endif
stop_meas(&UE2eNB[0][eNB_id][CC_id]->dac_fixed_gain_PRACH);
//for (int idx=0;idx<10;idx++) printf("dumping raw PRACH UL tx subframe (input) %d: s_f[%d] = (%f,%f)\n", subframe, idx, s_re_f_prach[0][idx],s_im_f_prach[0][idx]);
//for (int idx=829;idx<839;idx++) printf("dumping raw PRACH UL tx subframe (input) %d: s_f[%d] = (%f,%f)\n", subframe, idx, s_re_f_prach[0][idx],s_im_f_prach[0][idx]);
/*LOG_D(OCM,"[SIM][UL] UE %d tx_pwr %f dBm (target %d dBm, num_RE %d) for subframe %d (sf_offset %d)\n",
......@@ -1426,15 +1430,18 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
// write_output("s_re_f_prach.m","s_re_f_prach_txF", s_re_f_prach,frame_parms->ofdm_symbol_size*12,1,1);
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->multipath_channel_freq_PRACH);
#ifdef SSE_float
multipath_channel_prach_SSE_float(UE2eNB[UE_id][eNB_id][CC_id],s_re_f_prach,s_im_f_prach,r_re0_f_prach,r_im0_f_prach,&PHY_vars_UE_g[UE_id][CC_id]->frame_parms,
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->multipath_channel_freq_PRACH);
multipath_channel_prach_AVX_float(UE2eNB[UE_id][eNB_id][CC_id],s_re_f_prach,s_im_f_prach,r_re0_f_prach,r_im0_f_prach,&PHY_vars_UE_g[UE_id][CC_id]->frame_parms,
(prach_fmt<4)?13+839+12:3+139+2,hold_channel,eNB_id,prach_fmt,n_ra_prb);
stop_meas(&UE2eNB[UE_id][eNB_id][CC_id]->multipath_channel_freq_PRACH);
#else
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->multipath_channel_freq_PRACH);
multipath_channel_prach(UE2eNB[UE_id][eNB_id][CC_id],s_re_f_prach,s_im_f_prach,r_re0_f_prach,r_im0_f_prach,&PHY_vars_UE_g[UE_id][CC_id]->frame_parms,
(prach_fmt<4)?13+839+12:3+139+2,hold_channel,eNB_id,prach_fmt,n_ra_prb);
#endif
stop_meas(&UE2eNB[UE_id][eNB_id][CC_id]->multipath_channel_freq_PRACH);
#endif
//for (int idx=0;idx<10;idx++) printf("dumping raw PRACH UL tx subframe (output) %d: r_f[%d] = (%f,%f)\n", subframe, idx, r_re0_f_prach[0][idx],r_im0_f_prach[0][idx]);
//for (int idx=829;idx<839;idx++) printf("dumping raw PRACH UL tx subframe (output) %d: r_f[%d] = (%f,%f)\n", subframe, idx, r_re0_f_prach[0][idx],r_im0_f_prach[0][idx]);
......@@ -1444,7 +1451,7 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
hold_channel,12*frame_parms->N_RB_DL+1,
UE2eNB[UE_id][eNB_id][CC_id]->path_loss_dB);*/
#ifdef SSE_float
rx_pwr = signal_energy_fp_SSE_float(r_re0_f_prach,r_im0_f_prach,nb_antennas_rx,frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti*12,0);
rx_pwr = signal_energy_fp_AVX_float(r_re0_f_prach,r_im0_f_prach,nb_antennas_rx,frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti*12,0);
#else
rx_pwr = signal_energy_fp(r_re0_f_prach,r_im0_f_prach,nb_antennas_rx,frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti*12,0);
#endif
......@@ -1483,9 +1490,9 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
/*for (int idx=0;idx<10;idx++) printf("dumping raw PRACH UL tx subframe (output) %d: r_re_im_p_f_prach[%d] = (%d,%d)\n", subframe, idx, (short)(r_re_p_f_prach[0][idx]),(short)(r_im_p_f_prach[0][idx]));
for (int idx=829;idx<839;idx++) printf("dumping raw PRACH UL tx subframe (output) %d: r_re_im_p_f_prach[%d] = (%d,%d)\n", subframe, idx, (short)(r_re_p_f_prach[0][idx]),(short)(r_im_p_f_prach[0][idx]));*/
//clock_t start=clock();
start_meas(&UE2eNB[0][eNB_id][CC_id]->rf_rx_simple_freq_PRACH);
#ifdef SSE_float
rf_rx_simple_freq_SSE_float(r_re_p_f_prach,
start_meas(&UE2eNB[0][eNB_id][CC_id]->rf_rx_simple_freq_PRACH);
rf_rx_simple_freq_AVX_float(r_re_p_f_prach,
r_im_p_f_prach,
nb_antennas_rx,
(prach_fmt<4)?839:139,
......@@ -1494,7 +1501,9 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
frame_parms->symbols_per_tti,
frame_parms->ofdm_symbol_size,
12.0*frame_parms->N_RB_DL);
stop_meas(&UE2eNB[0][eNB_id][CC_id]->rf_rx_simple_freq_PRACH);
#else
start_meas(&UE2eNB[0][eNB_id][CC_id]->rf_rx_simple_freq_PRACH);
rf_rx_simple_freq(r_re_p_f_prach,
r_im_p_f_prach,
nb_antennas_rx,
......@@ -1504,8 +1513,9 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
frame_parms->symbols_per_tti,
frame_parms->ofdm_symbol_size,
12.0*frame_parms->N_RB_DL);
#endif
stop_meas(&UE2eNB[0][eNB_id][CC_id]->rf_rx_simple_freq_PRACH);
#endif
/*clock_t stop=clock();
printf("UE_PRACH_channel time is %f s, AVERAGE time is %f s, count %d, sum %e, subframe %d\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start,subframe);
......@@ -1518,10 +1528,10 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
#endif
rx_prachF = PHY_vars_eNB_g[eNB_id][CC_id]->prach_vars.rxsigF;
sf_offset = pointer_firstvalue_PRACH;
start_meas(&UE2eNB[0][eNB_id][CC_id]->adc_PRACH);
#ifdef SSE_float
adc_prach_SSE_float(r_re_p_f_prach,
start_meas(&UE2eNB[0][eNB_id][CC_id]->adc_PRACH);
adc_prach_AVX_float(r_re_p_f_prach,
r_im_p_f_prach,
0,
sf_offset,
......@@ -1529,7 +1539,9 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
nb_antennas_rx,
(prach_fmt<4)?839:139,
12);
stop_meas(&UE2eNB[0][eNB_id][CC_id]->adc_PRACH);
#else
start_meas(&UE2eNB[0][eNB_id][CC_id]->adc_PRACH);
adc_prach(r_re_p_f_prach,
r_im_p_f_prach,
0,
......@@ -1538,8 +1550,9 @@ void do_UL_sig_freq_prach(channel_desc_t *UE2eNB[NUMBER_OF_UE_MAX][NUMBER_OF_eNB
nb_antennas_rx,
(prach_fmt<4)?839:139,
12);
stop_meas(&UE2eNB[0][eNB_id][CC_id]->adc_PRACH);
#endif
stop_meas(&UE2eNB[0][eNB_id][CC_id]->adc_PRACH);
//write_output("chsim_rxsigF.m","chsim_rx_sigF", PHY_vars_eNB_g[eNB_id][CC_id]->prach_vars.rxsigF[0],4*frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,1,16);
#ifdef DEBUG_SIM
......
......@@ -1125,9 +1125,9 @@ int eNB_trx_read(openair0_device *device, openair0_timestamp *ptimestamp, void *
for (UE_id=0; UE_id<NB_UE_INST; UE_id++){
if (is_prach_subframe(&PHY_vars_UE_g[UE_id][CC_id]->frame_parms,frame,subframe) && PHY_vars_UE_g[UE_id][CC_id]->generate_prach)
{
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
//clock_t start=clock();
printf("subframe UL PRACH: %d\n",subframe);
start_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
do_UL_sig_freq_prach(UE2eNB,
enb_data,
ue_data,
......@@ -1137,11 +1137,11 @@ int eNB_trx_read(openair0_device *device, openair0_timestamp *ptimestamp, void *
0, // frame is only used for abstraction
eNB_id,
CC_id);
stop_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
//clock_t stop=clock();
/*printf("do_DL_sig time_prach is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count1*CLOCKS_PER_SEC),count1,sum+stop-start);
sum=(sum+stop-start);
count1++;*/
stop_meas(&UE2eNB[UE_id][eNB_id][CC_id]->UL_PRACH_channel);
//write_output("txprachF.m","prach_txF", PHY_vars_UE_g[0][CC_id]->prach_vars[0]->prachF,12*frame_parms->ofdm_symbol_size*frame_parms->symbols_per_tti,1,16);
break;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment