Commit d243b72e authored by Luis Ariza's avatar Luis Ariza

Starting Ziggurat version SSE2 coding

parent cd07e9eb
......@@ -1155,31 +1155,7 @@ int phy_init_lte_ue(PHY_VARS_UE *ue,
for (i=0; i<fp->nb_antennas_rx; i++) {
common_vars->rxdata[i] = (int32_t*) malloc16_clear( (fp->samples_per_tti*10+2048)*sizeof(int32_t) );
}
/*if (do_ofdm_mod)
{
common_vars->common_vars_rx_data_per_thread[0].rxdataF = (int32_t**)malloc16( fp->nb_antennas_rx*sizeof(int32_t*));
printf("[lte_init_f] address of rxdataF in memory: %p, thread %d\n",&common_vars->common_vars_rx_data_per_thread[0].rxdataF,0);
for (i=0; i<fp->nb_antennas_rx; i++) {
common_vars->common_vars_rx_data_per_thread[0].rxdataF[i] = (int32_t*)malloc16_clear((10*fp->ofdm_symbol_size*fp->symbols_per_tti)*sizeof(int32_t));
printf("[lte_init_f] address of rxdataF[i] in memory: %p, thread %d, antenna %d\n",&common_vars->common_vars_rx_data_per_thread[0].rxdataF[i],0,i);
}
//rxdata_temp. Please remove this dummy allocation when all arrays are size fixed. If not, the multichannel does not work.
rxdataF_temp = (int32_t**)malloc16( fp->nb_antennas_rx*sizeof(int32_t*) );
for (i=0; i<fp->nb_antennas_rx; i++) {
rxdataF_temp[i] = (int32_t*)malloc16_clear((14313)*sizeof(int32_t));
}
common_vars->common_vars_rx_data_per_thread[1].rxdataF = (int32_t**)malloc16( fp->nb_antennas_rx*sizeof(int32_t*) );
printf("[lte_init_f] address of rxdataF in memory: %p, thread %d\n",&common_vars->common_vars_rx_data_per_thread[1].rxdataF,1);
for (i=0; i<fp->nb_antennas_rx; i++) {
common_vars->common_vars_rx_data_per_thread[1].rxdataF[i] = (int32_t*)malloc16_clear((10*fp->ofdm_symbol_size*fp->symbols_per_tti)*sizeof(int32_t));
printf("[lte_init_f address of rxdataF[i] in memory: %p, thread %d, antenna %d\n",&common_vars->common_vars_rx_data_per_thread[1].rxdataF[i],1,i);
}
}
else
//{*/
for (th_id=0; th_id<RX_NB_TH_MAX; th_id++){
common_vars->common_vars_rx_data_per_thread[th_id].rxdataF = (int32_t**)malloc16( fp->nb_antennas_rx*sizeof(int32_t*) );
printf("[lte_init_t] address of rxdataF in memory: %p, thread %d\n",&common_vars->common_vars_rx_data_per_thread[th_id].rxdataF,th_id);
......@@ -1208,19 +1184,6 @@ int phy_init_lte_ue(PHY_VARS_UE *ue,
}
}
}
/*for (i=0; i<fp->ofdm_symbol_size*fp->symbols_per_tti; i++) {
for (int aa=0; aa<fp->nb_antennas_rx; aa++) {
((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)] = (short)(i);
((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[1+((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)] = (short)(1);
if (i < 300) {
printf("[lte-init] rxdataF (thread[%d]) %d: (%d,%d), memory addr %p\n",1,i,((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[1+((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],&common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa][1+((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)]);
printf("[lte-init] rxdataF (thread[%d]) %d: (%d,%d), memory addr %p\n",0,i,((short *)common_vars->common_vars_rx_data_per_thread[0].rxdataF[aa])[((i+4+2*fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],((short *)common_vars->common_vars_rx_data_per_thread[0].rxdataF[aa])[1+((i+4+2*fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],&common_vars->common_vars_rx_data_per_thread[0].rxdataF[aa][1+((i+4+2*fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)]);
}
}
}*/
}
// DLSCH
......
......@@ -17,6 +17,12 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: adc_SSE_float(), adc_prach(), adc_prach_SSE_float().
*-------------------------------------------------------------------------------
*/
#include <string.h>
#include <stdio.h>
......@@ -154,59 +160,7 @@ void adc_freq(double *r_re[2],
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
}
/*void adc_freq(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output1,//thread th_id
unsigned int **output2,//thread 0
unsigned int **output3,//thread 1
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
int thread)
{
int i;
//int th_id;
int aa;
double gain = (double)(1<<(B-1));*/
/*int dummy_rx[nb_rx_antennas][length] __attribute__((aligned(32)));
for (aa=0; aa<nb_rx_antennas; aa++) {
memset (&output1[aa][output_offset],0,length*sizeof(int));
}*/
//double gain = 1.0;
/*for (i=0; i<length; i++) {
for (aa=0; aa<nb_rx_antennas; aa++) {
((short *)output1[aa])[((i+output_offset)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
((short *)output1[aa])[1+((i+output_offset)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
if ((r_re[aa][i+input_offset]*gain) > 30000) {
//("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
if (i < 300) {
printf("rxdataF (thread[%d]) %d: (%d,%d)\n",thread,i,((short *)output1[aa])[((i+output_offset)<<1)],((short *)output1[aa])[1+((i+output_offset)<<1)]);
if (thread==0 && output_offset>length)
printf("rxdataF (thread[1]) %d: (%d,%d) \n",i,((short *)output3[aa])[((i+output_offset-length-4)<<1)],((short *)output3[aa])[1+((i+output_offset-length-4)<<1)]);
else if (thread==1)
printf("rxdataF (thread[0]) %d: (%d,%d) \n",i,((short *)output2[aa])[((i+output_offset+length+4)<<1)],((short *)output2[aa])[1+((i+output_offset+length+4)<<1)]);
}
}*/
/*for (aa=0; aa<nb_rx_antennas; aa++) {
for (th_id=1; th_id<2; th_id++)
{
memcpy((void *)output[aa][output_offset],
(void *)output[aa][output_offset],
length*sizeof(int));
}
}*/
/*}
printf("thread %d\n",(unsigned int)thread);
//write_output("adc_rxsigF_frame0.m","adc_rxsF0", output1[0],10*length,1,16);
//write_output("adc_rxsigF_frame1.m","adc_rxsF1", output2[0],10*length,1,16);
}*/
void adc_prach(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
......@@ -251,23 +205,82 @@ void adc_prach_SSE_float(float *r_re[2],
int i;
int aa;
double gain = (double)(1<<(B-1));
__m128 r_re128,r_im128,gain128;
__m128i r_re128i, r_im128i,output128;
float gain = (double)(1<<(B-1));
gain128=_mm_set1_ps(gain);
//double gain = 1.0;
for (i=0; i<length; i++) {
for (i=0; i<(length>>2); i++) {
for (aa=0; aa<nb_rx_antennas; aa++) {
((short *)output[aa])[((i+output_offset/2)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
((short *)output[aa])[1+((i+output_offset/2)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
#ifdef DEBUG_ADC
if (i<10)
printf("[adc_prach]i %d. input (%d,%d), output (%d,%d)\n",i,(short)(r_re[aa][i+input_offset]),(short)(r_im[aa][i+input_offset]),((short *)output[aa])[((i+output_offset/2)<<1)],((short *)output[aa])[1+((i+output_offset/2)<<1)]);
if (i>length-10&&i<length)
#endif
if ((r_re[aa][i+input_offset]*gain) > 30000) {
//("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
//((short *)output[aa])[((i+output_offset/2)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
//((short *)output[aa])[1+((i+output_offset/2)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
r_re128=_mm_loadu_ps(&r_re[aa][4*i+input_offset]);
r_im128=_mm_loadu_ps(&r_im[aa][4*i+input_offset]);
r_re128=_mm_mul_ps(r_re128,gain128);
r_im128=_mm_mul_ps(r_im128,gain128);
r_re128i=_mm_cvtps_epi32(r_re128);
r_im128i=_mm_cvtps_epi32(r_im128);
r_re128i=_mm_packs_epi32(r_re128i,r_re128i);
r_im128i=_mm_packs_epi32(r_im128i,r_im128i);
output128=_mm_unpacklo_epi16(r_re128i,r_im128i);
_mm_storeu_si128((__m128i *)&output[aa][4*i+output_offset/2],output128);
}
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
}
/*void adc_freq(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output1,//thread th_id
unsigned int **output2,//thread 0
unsigned int **output3,//thread 1
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
int thread)
{
int i;
//int th_id;
int aa;
double gain = (double)(1<<(B-1));*/
/*int dummy_rx[nb_rx_antennas][length] __attribute__((aligned(32)));
for (aa=0; aa<nb_rx_antennas; aa++) {
memset (&output1[aa][output_offset],0,length*sizeof(int));
}*/
//double gain = 1.0;
/*for (i=0; i<length; i++) {
for (aa=0; aa<nb_rx_antennas; aa++) {
((short *)output1[aa])[((i+output_offset)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
((short *)output1[aa])[1+((i+output_offset)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
if ((r_re[aa][i+input_offset]*gain) > 30000) {
//("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
if (i < 300) {
printf("rxdataF (thread[%d]) %d: (%d,%d)\n",thread,i,((short *)output1[aa])[((i+output_offset)<<1)],((short *)output1[aa])[1+((i+output_offset)<<1)]);
if (thread==0 && output_offset>length)
printf("rxdataF (thread[1]) %d: (%d,%d) \n",i,((short *)output3[aa])[((i+output_offset-length-4)<<1)],((short *)output3[aa])[1+((i+output_offset-length-4)<<1)]);
else if (thread==1)
printf("rxdataF (thread[0]) %d: (%d,%d) \n",i,((short *)output2[aa])[((i+output_offset+length+4)<<1)],((short *)output2[aa])[1+((i+output_offset+length+4)<<1)]);
}
}*/
/*for (aa=0; aa<nb_rx_antennas; aa++) {
for (th_id=1; th_id<2; th_id++)
{
memcpy((void *)output[aa][output_offset],
(void *)output[aa][output_offset],
length*sizeof(int));
}
}*/
/*}
printf("thread %d\n",(unsigned int)thread);
//write_output("adc_rxsigF_frame0.m","adc_rxsF0", output1[0],10*length,1,16);
//write_output("adc_rxsigF_frame1.m","adc_rxsF1", output2[0],10*length,1,16);
}*/
......@@ -17,6 +17,12 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: dac_fixed_gain_SSE_float(), dac_fixed_gain_prach(), dac_fixed_gain_prach_SSE_float().
*-------------------------------------------------------------------------------
*/
//#define DEBUG_DAC
......@@ -297,8 +303,8 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
int i;
int aa;
float amp,amp1;
int k;
float amp,amp1,div;
__m128 input_re128, input_im128;
amp = //sqrt(NB_RE)*pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
......@@ -322,27 +328,26 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
amp1 = amp1*sqrt(512.0/300.0); //account for loss due to null carriers
//printf("DL: amp1 %f dB (%d,%d), tx_power %f\n",20*log10(amp1),input_offset,input_offset_meas,txpwr_dBm);
*/
#ifdef DEBUG_DAC
printf("DAC: input_offset %d, amp %e, amp1 %e\n",input_offset,amp,amp1);
#endif
k=input_offset;
for (i=0; i<length*2; i+=2) {
div=amp/amp1;
for (i=0; i<(length>>2); i++) {
for (aa=0; aa<nb_tx_antennas; aa++) {
s_re[aa][i/2] = amp*((float)(((short *)input))[((k))])/amp1; ///(1<<(B-1));
s_im[aa][i/2] = amp*((float)(((short *)input))[((k))+1])/amp1; ///(1<<(B-1));
#ifdef DEBUG_DAC
if(i<20)
printf("DAC[%d]: input (%d,%d). output (%e,%e)\n",i/2,(((short *)input))[((k))],(((short *)input))[((k))+1],s_re[aa][i/2],s_im[aa][i/2]);
if (i>length*2-20&&i<length*2)
printf("DAC[%d]: input (%d,%d). output (%e,%e)\n",i/2,(((short *)input))[((k))],(((short *)input))[((k))+1],s_re[aa][i/2],s_im[aa][i/2]);
#endif
k+=2;
if (k==12*2*ofdm_symbol_size)
k=0;
//s_re[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))]); ///(1<<(B-1));
//s_im[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))+1]); ///(1<<(B-1));
input_re128=_mm_set_ps((float)(((short *)input))[2*(4*i+3)+input_offset],(float)(((short *)input))[2*(4*i+2)+input_offset],(float)(((short *)input))[2*(4*i+1)+input_offset],(float)(((short *)input))[2*(4*i)+input_offset]);
input_im128=_mm_set_ps((float)(((short *)input))[2*(4*i+3)+1+input_offset],(float)(((short *)input))[2*(4*i+2)+1+input_offset],(float)(((short *)input))[2*(4*i+1)+1+input_offset],(float)(((short *)input))[2*(4*i)+1+input_offset]);
input_re128=_mm_mul_ps(input_re128,_mm_set1_ps(div));
input_im128=_mm_mul_ps(input_im128,_mm_set1_ps(div));
_mm_storeu_ps(&s_re[aa][4*i],input_re128);
_mm_storeu_ps(&s_im[aa][4*i],input_im128);
if (2*i+input_offset==12*2*ofdm_symbol_size)
i=0;
}
}
// printf("ener %e\n",signal_energy_fp(s_re,s_im,nb_tx_antennas,length,0));
return(signal_energy_fp(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
return(signal_energy_fp_SSE_float(s_re,s_im,nb_tx_antennas,length_meas,0)/NB_RE);
}
......@@ -17,6 +17,12 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: rf_rx_simple_freq(), rf_rx_simple_freq_SSE_float().
*-------------------------------------------------------------------------------
*/
//#include "defs.h"
......@@ -294,17 +300,15 @@ clock_t start=clock();*/
}
else
{
//rx128_gain_lin=mm_mul_set1_ps(rx_gain_lin);
rx128_re = _mm_loadu_pd(&r_re[a][2*i]);//r_re[a][i],r_re[a][i+1]
rx128_im = _mm_loadu_pd(&r_im[a][2*i]);//r_im[a][i],r_im[a][i+1]
rx128_gain_lin = _mm_set1_pd(rx_gain_lin);
gauss_0_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
gauss_1_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_0_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
gauss_0_128_sqrt_NOW = _mm_mul_pd(gauss_0_128_sqrt_NOW,_mm_set1_pd(sqrt_NOW));
gauss_1_128_sqrt_NOW = _mm_mul_pd(gauss_1_128_sqrt_NOW,_mm_set1_pd(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
//r_re[a][i] = rx_gain_lin*(r_re[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
//r_im[a][i] = rx_gain_lin*(r_im[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
rx128_re = _mm_add_pd(rx128_re,gauss_0_128_sqrt_NOW);
rx128_im = _mm_add_pd(rx128_im,gauss_1_128_sqrt_NOW);
rx128_re = _mm_mul_pd(rx128_re,rx128_gain_lin);
......@@ -419,7 +423,6 @@ clock_t start=clock();*/
}
else
{*/
//rx128_gain_lin=mm_mul_set1_ps(rx_gain_lin);
rx128_re = _mm_loadu_ps(&r_re[a][4*i]);//r_re[a][i],r_re[a][i+1]
rx128_im = _mm_loadu_ps(&r_im[a][4*i]);//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
......@@ -427,17 +430,9 @@ clock_t start=clock();*/
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
//stop_meas(&desc->ziggurat);
//_mm_storeu_ps(out,gauss_0_128_sqrt_NOW);
//_mm_storeu_ps(out1,gauss_1_128_sqrt_NOW);
//printf("Ziggurat is %e,%e,%e,%e\n",ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//printf("boxmuller is %e,%e,%e,%e\n",gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0));
//printf("NOR SSE is %e,%e,%e,%e\n",out[0],out[1],out[2],out[3]);
//printf("NOR SSE1 is %e,%e,%e,%e\n",out1[0],out1[1],out1[2],out1[3]);
gauss_0_128_sqrt_NOW = _mm_mul_ps(gauss_0_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW = _mm_mul_ps(gauss_1_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
//r_re[a][i] = rx_gain_lin*(r_re[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
//r_im[a][i] = rx_gain_lin*(r_im[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
rx128_re = _mm_add_ps(_mm_mul_ps(rx128_re,rx128_gain_lin),gauss_0_128_sqrt_NOW);
rx128_im = _mm_add_ps(_mm_mul_ps(rx128_im,rx128_gain_lin),gauss_1_128_sqrt_NOW);
_mm_storeu_ps(&r_re[a][4*i],rx128_re);
......@@ -445,6 +440,29 @@ clock_t start=clock();*/
//}
}
}
/*rx128_re = _mm_loadu_ps(&r_re[a][4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im = _mm_loadu_ps(&r_im[a][4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
rx128_re_1 = _mm_loadu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im_1 = _mm_loadu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
//gauss_0_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW_1, &gauss_1_128_sqrt_NOW_1);
//stop_meas(&desc->ziggurat);
gauss_0_128_sqrt_NOW = _mm_mul_ps(gauss_0_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW = _mm_mul_ps(gauss_1_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_0_128_sqrt_NOW_1 = _mm_mul_ps(gauss_0_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW_1 = _mm_mul_ps(gauss_1_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
rx128_re = _mm_add_ps(_mm_mul_ps(rx128_re,rx128_gain_lin),gauss_0_128_sqrt_NOW);
rx128_im = _mm_add_ps(_mm_mul_ps(rx128_im,rx128_gain_lin),gauss_1_128_sqrt_NOW);
rx128_re_1 = _mm_add_ps(_mm_mul_ps(rx128_re_1,rx128_gain_lin),gauss_0_128_sqrt_NOW_1);
rx128_im_1 = _mm_add_ps(_mm_mul_ps(rx128_im_1,rx128_gain_lin),gauss_1_128_sqrt_NOW_1);
_mm_storeu_ps(&r_re[a][4*i+ofdm_symbol_size*j],rx128_re);
_mm_storeu_ps(&r_im[a][4*i+ofdm_symbol_size*j],rx128_im);
_mm_storeu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_re_1);
_mm_storeu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_im_1);*/
/*clock_t stop=clock();
printf("do_DL_sig time is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start);
sum=(sum+stop-start);*/
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -17,6 +17,13 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: do_DL_sig_freq(), do_UL_sig_freq(), init_channel_vars_freq(),
* do_UL_sig_freq_prach.
*-------------------------------------------------------------------------------
*/
#include <string.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment