Commit eebbd37e authored by lfarizav's avatar lfarizav

new sincos256_ps, log256_ps and exp_256 function using AVX instructions

parent 59582986
This diff is collapsed.
......@@ -313,6 +313,17 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
uint8_t UE_id,
uint8_t CC_id,
uint8_t th_id);
void multipath_channel_freq_AVX_float(channel_desc_t *desc,
float *tx_sig_re[2],
float *tx_sig_im[2],
float *rx_sig_re[2],
float *rx_sig_im[2],
uint32_t length,
uint8_t keep_channel,
uint8_t eNB_id,
uint8_t UE_id,
uint8_t CC_id,
uint8_t th_id);
/**\fn void multipath_channel_prach(channel_desc_t *desc,
double tx_sig_re[2],
double tx_sig_im[2],
......@@ -474,7 +485,9 @@ void table_nor(unsigned long seed);
double nfix(void);
__m128 nfix_SSE(__m128i iz);
__m128 log_ps(__m128 x);
__m256 log256_ps(__m256 x);
__m128 exp_ps(__m128 x);
__m256 exp256_ps(__m256 x);
double uniformrandom(void);
void uniformrandomSSE(__m128d *d1,__m128d *d2);
double ziggurat(double mean, double variance);
......@@ -482,14 +495,17 @@ __m128 ziggurat_SSE_float(void);
void boxmuller_SSE_float(__m128 *data1, __m128 *data2);
int freq_channel(channel_desc_t *desc,uint16_t nb_rb, int16_t n_samples);
int freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb, int16_t n_samples);
int freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb, int16_t n_samples);
int freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int init_freq_channel(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
int init_freq_channel_AVX_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples);
int init_freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
int init_freq_channel_prach_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb);
void sincos_ps(__m128 x, __m128 *s, __m128 *c);
void sincos256_ps(__m256 x, __m256 *s, __m256 *c);
uint8_t multipath_channel_nosigconv(channel_desc_t *desc);
void multipath_tv_channel(channel_desc_t *desc,
......
......@@ -615,6 +615,127 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
} // f,f2,f3
//}//k
}
void multipath_channel_freq_AVX_float(channel_desc_t *desc,
float *tx_sig_re[2],
float *tx_sig_im[2],
float *rx_sig_re[2],
float *rx_sig_im[2],
uint32_t length,
uint8_t keep_channel,
uint8_t eNB_id,
uint8_t UE_id,
uint8_t CC_id,
uint8_t th_id)
{
int ii,j,f;
__m256 rx_tmp256_re_f,rx_tmp256_im_f,rx_tmp256_re,rx_tmp256_im, rx_tmp256_1,rx_tmp256_2,rx_tmp256_3,rx_tmp256_4,tx256_re,tx256_im,chF256_x,chF256_y,pathloss256;
//struct complex rx_tmp;
float path_loss = pow(10,desc->path_loss_dB/20);
pathloss256 = _mm256_set1_ps(path_loss);
int nb_rb, n_samples, ofdm_symbol_size, symbols_per_tti;
nb_rb=PHY_vars_UE_g[UE_id][CC_id]->frame_parms.N_RB_DL;
n_samples=PHY_vars_UE_g[UE_id][CC_id]->frame_parms.N_RB_DL*12+1;
ofdm_symbol_size=length/PHY_vars_UE_g[UE_id][CC_id]->frame_parms.symbols_per_tti;
symbols_per_tti=length/PHY_vars_UE_g[UE_id][CC_id]->frame_parms.ofdm_symbol_size;
#ifdef DEBUG_CH
printf("[CHANNEL_FREQ] keep = %d : path_loss = %g (%f), nb_rx %d, nb_tx %d, dd %d, len %d \n",keep_channel,path_loss,desc->path_loss_dB,desc->nb_rx,desc->nb_tx,dd,desc->channel_length);
#endif
if (keep_channel) {
// do nothing - keep channel
} else {
random_channel_freq(desc,0);
freq_channel_SSE_float(desc,nb_rb,n_samples);//Find desc->chF
}
for (j=0;j<(symbols_per_tti>>2);j++){
for (ii=0; ii<desc->nb_rx; ii++) {
_mm256_storeu_ps(&rx_sig_re[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps());
_mm256_storeu_ps(&rx_sig_im[ii][4*j*ofdm_symbol_size],_mm256_setzero_ps());
}
}
for (f=0;f<((ofdm_symbol_size*symbols_per_tti)>>3); f++) {//f2 = 0-1024*14-1 ---- for 10 Mhz BW
//printf("f is %d\n",f);
for (ii=0; ii<desc->nb_rx; ii++) {
//rx_tmp.x = 0;
//rx_tmp.y = 0;
rx_tmp256_re_f = _mm256_setzero_ps();
rx_tmp256_im_f = _mm256_setzero_ps();
if (f%(ofdm_symbol_size>>2)<(n_samples>>2))//1-300
{
for (j=0; j<desc->nb_tx; j++) {
//first n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][(4*f+1)]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][(4*f+1)]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[(4*(f%(ofdm_symbol_size>>2)))+(n_samples>>2)]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_re*ch128_x
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//-tx128_im*ch128_y
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_im*ch128_x
// +(tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//+tx128_re*ch128_y
rx_tmp256_1 = _mm256_mul_ps(tx256_re,chF256_x);
rx_tmp256_2 = _mm256_mul_ps(tx256_im,chF256_y);
rx_tmp256_3 = _mm256_mul_ps(tx256_im,chF256_x);
rx_tmp256_4 = _mm256_mul_ps(tx256_re,chF256_y);
rx_tmp256_re = _mm256_sub_ps(rx_tmp256_1,rx_tmp256_2);
rx_tmp256_im = _mm256_add_ps(rx_tmp256_3,rx_tmp256_4);
rx_tmp256_re_f = _mm256_add_ps(rx_tmp256_re_f,rx_tmp256_re);
rx_tmp256_im_f = _mm256_add_ps(rx_tmp256_im_f,rx_tmp256_im);
} // j
//rx_sig_re[ii][f+k*ofdm_symbol_size] = rx_tmp.x*path_loss;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][(4*f+1)],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][(4*f+1)],rx_tmp256_im_f);
}
else if (f%(ofdm_symbol_size>>2)>(n_samples>>2) && f%(ofdm_symbol_size>>2)<(ofdm_symbol_size>>2)-(n_samples>>2))
{
//rx_sig_re[ii][f+k*ofdm_symbol_size] = 0;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = 0;
//_mm_storeu_pd(&rx_sig_re[ii][2*f],_mm_setzero_pd());
//_mm_storeu_pd(&rx_sig_im[ii][2*f],_mm_setzero_pd());
break;
}
else
{
for (j=0; j<desc->nb_tx; j++) {
//last n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re = _mm256_loadu_ps(&tx_sig_re[j][4*f]);
tx256_im = _mm256_loadu_ps(&tx_sig_im[j][4*f]);
chF256_x = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].x[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]);
chF256_y = _mm256_set1_ps(desc->chFf[ii+(j*desc->nb_rx)].y[4*(f%(ofdm_symbol_size>>2)-((ofdm_symbol_size>>2)-(n_samples>>2)))]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
// +(tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
rx_tmp256_1 = _mm256_mul_ps(tx256_re,chF256_x);
rx_tmp256_2 = _mm256_mul_ps(tx256_im,chF256_y);
rx_tmp256_3 = _mm256_mul_ps(tx256_im,chF256_x);
rx_tmp256_4 = _mm256_mul_ps(tx256_re,chF256_y);
rx_tmp256_re = _mm256_sub_ps(rx_tmp256_1,rx_tmp256_2);
rx_tmp256_im = _mm256_add_ps(rx_tmp256_3,rx_tmp256_4);
rx_tmp256_re_f = _mm256_add_ps(rx_tmp256_re_f,rx_tmp256_re);
rx_tmp256_im_f = _mm256_add_ps(rx_tmp256_im_f,rx_tmp256_im);
} // j
//rx_sig_re[ii][f+k*ofdm_symbol_size] = rx_tmp.x*path_loss;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f = _mm256_mul_ps(rx_tmp256_re_f,pathloss256);
rx_tmp256_im_f = _mm256_mul_ps(rx_tmp256_im_f,pathloss256);
_mm256_storeu_ps(&rx_sig_re[ii][4*f],rx_tmp256_re_f);
_mm256_storeu_ps(&rx_sig_im[ii][4*f],rx_tmp256_im_f);
}
} // ii
} // f,f2,f3
//}//k
}
#ifdef CHANNEL_SSE
void multipath_channel_prach(channel_desc_t *desc,
double *tx_sig_re[2],
......
......@@ -33,7 +33,7 @@
#ifdef USER_MODE
#include <stdio.h>
#include <stdlib.h>
#include <stdin.h>
#include <stdint.h>
#include <math.h>
#include <time.h>
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment