Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangwenhui
OpenXG-RAN
Commits
d243b72e
Commit
d243b72e
authored
Mar 12, 2018
by
Luis Ariza
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Starting Ziggurat version SSE2 coding
parent
cd07e9eb
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
429 additions
and
451 deletions
+429
-451
openair1/PHY/INIT/lte_init.c
openair1/PHY/INIT/lte_init.c
+0
-37
openair1/SIMULATION/RF/adc.c
openair1/SIMULATION/RF/adc.c
+77
-64
openair1/SIMULATION/RF/dac.c
openair1/SIMULATION/RF/dac.c
+24
-19
openair1/SIMULATION/RF/rf.c
openair1/SIMULATION/RF/rf.c
+32
-14
openair1/SIMULATION/TOOLS/abstraction.c
openair1/SIMULATION/TOOLS/abstraction.c
+88
-95
openair1/SIMULATION/TOOLS/random_channel.c
openair1/SIMULATION/TOOLS/random_channel.c
+24
-24
openair1/SIMULATION/TOOLS/rangen_double.c
openair1/SIMULATION/TOOLS/rangen_double.c
+177
-198
targets/SIMU/USER/channel_sim.c
targets/SIMU/USER/channel_sim.c
+7
-0
No files found.
openair1/PHY/INIT/lte_init.c
View file @
d243b72e
...
...
@@ -1155,31 +1155,7 @@ int phy_init_lte_ue(PHY_VARS_UE *ue,
for
(
i
=
0
;
i
<
fp
->
nb_antennas_rx
;
i
++
)
{
common_vars
->
rxdata
[
i
]
=
(
int32_t
*
)
malloc16_clear
(
(
fp
->
samples_per_tti
*
10
+
2048
)
*
sizeof
(
int32_t
)
);
}
/*if (do_ofdm_mod)
{
common_vars->common_vars_rx_data_per_thread[0].rxdataF = (int32_t**)malloc16( fp->nb_antennas_rx*sizeof(int32_t*));
printf("[lte_init_f] address of rxdataF in memory: %p, thread %d\n",&common_vars->common_vars_rx_data_per_thread[0].rxdataF,0);
for (i=0; i<fp->nb_antennas_rx; i++) {
common_vars->common_vars_rx_data_per_thread[0].rxdataF[i] = (int32_t*)malloc16_clear((10*fp->ofdm_symbol_size*fp->symbols_per_tti)*sizeof(int32_t));
printf("[lte_init_f] address of rxdataF[i] in memory: %p, thread %d, antenna %d\n",&common_vars->common_vars_rx_data_per_thread[0].rxdataF[i],0,i);
}
//rxdata_temp. Please remove this dummy allocation when all arrays are size fixed. If not, the multichannel does not work.
rxdataF_temp = (int32_t**)malloc16( fp->nb_antennas_rx*sizeof(int32_t*) );
for (i=0; i<fp->nb_antennas_rx; i++) {
rxdataF_temp[i] = (int32_t*)malloc16_clear((14313)*sizeof(int32_t));
}
common_vars->common_vars_rx_data_per_thread[1].rxdataF = (int32_t**)malloc16( fp->nb_antennas_rx*sizeof(int32_t*) );
printf("[lte_init_f] address of rxdataF in memory: %p, thread %d\n",&common_vars->common_vars_rx_data_per_thread[1].rxdataF,1);
for (i=0; i<fp->nb_antennas_rx; i++) {
common_vars->common_vars_rx_data_per_thread[1].rxdataF[i] = (int32_t*)malloc16_clear((10*fp->ofdm_symbol_size*fp->symbols_per_tti)*sizeof(int32_t));
printf("[lte_init_f address of rxdataF[i] in memory: %p, thread %d, antenna %d\n",&common_vars->common_vars_rx_data_per_thread[1].rxdataF[i],1,i);
}
}
else
//{*/
for
(
th_id
=
0
;
th_id
<
RX_NB_TH_MAX
;
th_id
++
){
common_vars
->
common_vars_rx_data_per_thread
[
th_id
].
rxdataF
=
(
int32_t
**
)
malloc16
(
fp
->
nb_antennas_rx
*
sizeof
(
int32_t
*
)
);
printf
(
"[lte_init_t] address of rxdataF in memory: %p, thread %d
\n
"
,
&
common_vars
->
common_vars_rx_data_per_thread
[
th_id
].
rxdataF
,
th_id
);
...
...
@@ -1208,19 +1184,6 @@ int phy_init_lte_ue(PHY_VARS_UE *ue,
}
}
}
/*for (i=0; i<fp->ofdm_symbol_size*fp->symbols_per_tti; i++) {
for (int aa=0; aa<fp->nb_antennas_rx; aa++) {
((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)] = (short)(i);
((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[1+((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)] = (short)(1);
if (i < 300) {
printf("[lte-init] rxdataF (thread[%d]) %d: (%d,%d), memory addr %p\n",1,i,((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],((short *)common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa])[1+((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],&common_vars->common_vars_rx_data_per_thread[1].rxdataF[aa][1+((i+fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)]);
printf("[lte-init] rxdataF (thread[%d]) %d: (%d,%d), memory addr %p\n",0,i,((short *)common_vars->common_vars_rx_data_per_thread[0].rxdataF[aa])[((i+4+2*fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],((short *)common_vars->common_vars_rx_data_per_thread[0].rxdataF[aa])[1+((i+4+2*fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)],&common_vars->common_vars_rx_data_per_thread[0].rxdataF[aa][1+((i+4+2*fp->ofdm_symbol_size*fp->symbols_per_tti)<<1)]);
}
}
}*/
}
// DLSCH
...
...
openair1/SIMULATION/RF/adc.c
View file @
d243b72e
...
...
@@ -17,6 +17,12 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: adc_SSE_float(), adc_prach(), adc_prach_SSE_float().
*-------------------------------------------------------------------------------
*/
#include <string.h>
#include <stdio.h>
...
...
@@ -154,59 +160,7 @@ void adc_freq(double *r_re[2],
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
}
/*void adc_freq(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output1,//thread th_id
unsigned int **output2,//thread 0
unsigned int **output3,//thread 1
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
int thread)
{
int i;
//int th_id;
int aa;
double gain = (double)(1<<(B-1));*/
/*int dummy_rx[nb_rx_antennas][length] __attribute__((aligned(32)));
for (aa=0; aa<nb_rx_antennas; aa++) {
memset (&output1[aa][output_offset],0,length*sizeof(int));
}*/
//double gain = 1.0;
/*for (i=0; i<length; i++) {
for (aa=0; aa<nb_rx_antennas; aa++) {
((short *)output1[aa])[((i+output_offset)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
((short *)output1[aa])[1+((i+output_offset)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
if ((r_re[aa][i+input_offset]*gain) > 30000) {
//("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
if (i < 300) {
printf("rxdataF (thread[%d]) %d: (%d,%d)\n",thread,i,((short *)output1[aa])[((i+output_offset)<<1)],((short *)output1[aa])[1+((i+output_offset)<<1)]);
if (thread==0 && output_offset>length)
printf("rxdataF (thread[1]) %d: (%d,%d) \n",i,((short *)output3[aa])[((i+output_offset-length-4)<<1)],((short *)output3[aa])[1+((i+output_offset-length-4)<<1)]);
else if (thread==1)
printf("rxdataF (thread[0]) %d: (%d,%d) \n",i,((short *)output2[aa])[((i+output_offset+length+4)<<1)],((short *)output2[aa])[1+((i+output_offset+length+4)<<1)]);
}
}*/
/*for (aa=0; aa<nb_rx_antennas; aa++) {
for (th_id=1; th_id<2; th_id++)
{
memcpy((void *)output[aa][output_offset],
(void *)output[aa][output_offset],
length*sizeof(int));
}
}*/
/*}
printf("thread %d\n",(unsigned int)thread);
//write_output("adc_rxsigF_frame0.m","adc_rxsF0", output1[0],10*length,1,16);
//write_output("adc_rxsigF_frame1.m","adc_rxsF1", output2[0],10*length,1,16);
}*/
void
adc_prach
(
double
*
r_re
[
2
],
double
*
r_im
[
2
],
unsigned
int
input_offset
,
...
...
@@ -251,23 +205,82 @@ void adc_prach_SSE_float(float *r_re[2],
int
i
;
int
aa
;
double
gain
=
(
double
)(
1
<<
(
B
-
1
));
__m128
r_re128
,
r_im128
,
gain128
;
__m128i
r_re128i
,
r_im128i
,
output128
;
float
gain
=
(
double
)(
1
<<
(
B
-
1
));
gain128
=
_mm_set1_ps
(
gain
);
//double gain = 1.0;
for
(
i
=
0
;
i
<
length
;
i
++
)
{
for
(
i
=
0
;
i
<
(
length
>>
2
)
;
i
++
)
{
for
(
aa
=
0
;
aa
<
nb_rx_antennas
;
aa
++
)
{
((
short
*
)
output
[
aa
])[((
i
+
output_offset
/
2
)
<<
1
)]
=
(
short
)(
r_re
[
aa
][
i
+
input_offset
]
*
gain
);
((
short
*
)
output
[
aa
])[
1
+
((
i
+
output_offset
/
2
)
<<
1
)]
=
(
short
)(
r_im
[
aa
][
i
+
input_offset
]
*
gain
);
#ifdef DEBUG_ADC
if
(
i
<
10
)
printf
(
"[adc_prach]i %d. input (%d,%d), output (%d,%d)
\n
"
,
i
,(
short
)(
r_re
[
aa
][
i
+
input_offset
]),(
short
)(
r_im
[
aa
][
i
+
input_offset
]),((
short
*
)
output
[
aa
])[((
i
+
output_offset
/
2
)
<<
1
)],((
short
*
)
output
[
aa
])[
1
+
((
i
+
output_offset
/
2
)
<<
1
)]);
if
(
i
>
length
-
10
&&
i
<
length
)
#endif
if
((
r_re
[
aa
][
i
+
input_offset
]
*
gain
)
>
30000
)
{
//("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
//((short *)output[aa])[((i+output_offset/2)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
//((short *)output[aa])[1+((i+output_offset/2)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
r_re128
=
_mm_loadu_ps
(
&
r_re
[
aa
][
4
*
i
+
input_offset
]);
r_im128
=
_mm_loadu_ps
(
&
r_im
[
aa
][
4
*
i
+
input_offset
]);
r_re128
=
_mm_mul_ps
(
r_re128
,
gain128
);
r_im128
=
_mm_mul_ps
(
r_im128
,
gain128
);
r_re128i
=
_mm_cvtps_epi32
(
r_re128
);
r_im128i
=
_mm_cvtps_epi32
(
r_im128
);
r_re128i
=
_mm_packs_epi32
(
r_re128i
,
r_re128i
);
r_im128i
=
_mm_packs_epi32
(
r_im128i
,
r_im128i
);
output128
=
_mm_unpacklo_epi16
(
r_re128i
,
r_im128i
);
_mm_storeu_si128
((
__m128i
*
)
&
output
[
aa
][
4
*
i
+
output_offset
/
2
],
output128
);
}
//printf("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
}
/*void adc_freq(double *r_re[2],
double *r_im[2],
unsigned int input_offset,
unsigned int output_offset,
unsigned int **output1,//thread th_id
unsigned int **output2,//thread 0
unsigned int **output3,//thread 1
unsigned int nb_rx_antennas,
unsigned int length,
unsigned char B,
int thread)
{
int i;
//int th_id;
int aa;
double gain = (double)(1<<(B-1));*/
/*int dummy_rx[nb_rx_antennas][length] __attribute__((aligned(32)));
for (aa=0; aa<nb_rx_antennas; aa++) {
memset (&output1[aa][output_offset],0,length*sizeof(int));
}*/
//double gain = 1.0;
/*for (i=0; i<length; i++) {
for (aa=0; aa<nb_rx_antennas; aa++) {
((short *)output1[aa])[((i+output_offset)<<1)] = (short)(r_re[aa][i+input_offset]*gain);
((short *)output1[aa])[1+((i+output_offset)<<1)] = (short)(r_im[aa][i+input_offset]*gain);
if ((r_re[aa][i+input_offset]*gain) > 30000) {
//("Adc outputs %d %e %d \n",i,((short *)output[0])[((i+output_offset)<<1)], ((i+output_offset)<<1) );
}
if (i < 300) {
printf("rxdataF (thread[%d]) %d: (%d,%d)\n",thread,i,((short *)output1[aa])[((i+output_offset)<<1)],((short *)output1[aa])[1+((i+output_offset)<<1)]);
if (thread==0 && output_offset>length)
printf("rxdataF (thread[1]) %d: (%d,%d) \n",i,((short *)output3[aa])[((i+output_offset-length-4)<<1)],((short *)output3[aa])[1+((i+output_offset-length-4)<<1)]);
else if (thread==1)
printf("rxdataF (thread[0]) %d: (%d,%d) \n",i,((short *)output2[aa])[((i+output_offset+length+4)<<1)],((short *)output2[aa])[1+((i+output_offset+length+4)<<1)]);
}
}*/
/*for (aa=0; aa<nb_rx_antennas; aa++) {
for (th_id=1; th_id<2; th_id++)
{
memcpy((void *)output[aa][output_offset],
(void *)output[aa][output_offset],
length*sizeof(int));
}
}*/
/*}
printf("thread %d\n",(unsigned int)thread);
//write_output("adc_rxsigF_frame0.m","adc_rxsF0", output1[0],10*length,1,16);
//write_output("adc_rxsigF_frame1.m","adc_rxsF1", output2[0],10*length,1,16);
}*/
openair1/SIMULATION/RF/dac.c
View file @
d243b72e
...
...
@@ -17,6 +17,12 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: dac_fixed_gain_SSE_float(), dac_fixed_gain_prach(), dac_fixed_gain_prach_SSE_float().
*-------------------------------------------------------------------------------
*/
//#define DEBUG_DAC
...
...
@@ -297,8 +303,8 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
int
i
;
int
aa
;
float
amp
,
amp1
;
int
k
;
float
amp
,
amp1
,
div
;
__m128
input_re128
,
input_im128
;
amp
=
//sqrt(NB_RE)*pow(10.0,.05*txpwr_dBm)/sqrt(nb_tx_antennas); //this is amp per tx antenna
pow
(
10
.
0
,.
05
*
txpwr_dBm
)
/
sqrt
(
nb_tx_antennas
);
//this is amp per tx antenna
...
...
@@ -322,27 +328,26 @@ float dac_fixed_gain_prach_SSE_float(float *s_re[2],
amp1 = amp1*sqrt(512.0/300.0); //account for loss due to null carriers
//printf("DL: amp1 %f dB (%d,%d), tx_power %f\n",20*log10(amp1),input_offset,input_offset_meas,txpwr_dBm);
*/
#ifdef DEBUG_DAC
printf
(
"DAC: input_offset %d, amp %e, amp1 %e
\n
"
,
input_offset
,
amp
,
amp1
);
#endif
k
=
input_offset
;
for
(
i
=
0
;
i
<
length
*
2
;
i
+=
2
)
{
div
=
amp
/
amp1
;
for
(
i
=
0
;
i
<
(
length
>>
2
);
i
++
)
{
for
(
aa
=
0
;
aa
<
nb_tx_antennas
;
aa
++
)
{
s_re
[
aa
][
i
/
2
]
=
amp
*
((
float
)(((
short
*
)
input
))[((
k
))])
/
amp1
;
///(1<<(B-1));
s_im
[
aa
][
i
/
2
]
=
amp
*
((
float
)(((
short
*
)
input
))[((
k
))
+
1
])
/
amp1
;
///(1<<(B-1));
#ifdef DEBUG_DAC
if
(
i
<
20
)
printf
(
"DAC[%d]: input (%d,%d). output (%e,%e)
\n
"
,
i
/
2
,(((
short
*
)
input
))[((
k
))],(((
short
*
)
input
))[((
k
))
+
1
],
s_re
[
aa
][
i
/
2
],
s_im
[
aa
][
i
/
2
]);
if
(
i
>
length
*
2
-
20
&&
i
<
length
*
2
)
printf
(
"DAC[%d]: input (%d,%d). output (%e,%e)
\n
"
,
i
/
2
,(((
short
*
)
input
))[((
k
))],(((
short
*
)
input
))[((
k
))
+
1
],
s_re
[
aa
][
i
/
2
],
s_im
[
aa
][
i
/
2
]);
#endif
k
+=
2
;
if
(
k
==
12
*
2
*
ofdm_symbol_size
)
k
=
0
;
//s_re[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))]); ///(1<<(B-1));
//s_im[aa][i] = div*((float)(((short *)input))[((input_offset+2*i))+1]); ///(1<<(B-1));
input_re128
=
_mm_set_ps
((
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
+
3
)
+
input_offset
],(
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
+
2
)
+
input_offset
],(
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
+
1
)
+
input_offset
],(
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
)
+
input_offset
]);
input_im128
=
_mm_set_ps
((
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
+
3
)
+
1
+
input_offset
],(
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
+
2
)
+
1
+
input_offset
],(
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
+
1
)
+
1
+
input_offset
],(
float
)(((
short
*
)
input
))[
2
*
(
4
*
i
)
+
1
+
input_offset
]);
input_re128
=
_mm_mul_ps
(
input_re128
,
_mm_set1_ps
(
div
));
input_im128
=
_mm_mul_ps
(
input_im128
,
_mm_set1_ps
(
div
));
_mm_storeu_ps
(
&
s_re
[
aa
][
4
*
i
],
input_re128
);
_mm_storeu_ps
(
&
s_im
[
aa
][
4
*
i
],
input_im128
);
if
(
2
*
i
+
input_offset
==
12
*
2
*
ofdm_symbol_size
)
i
=
0
;
}
}
// printf("ener %e\n",signal_energy_fp(s_re,s_im,nb_tx_antennas,length,0));
return
(
signal_energy_fp
(
s_re
,
s_im
,
nb_tx_antennas
,
length_meas
,
0
)
/
NB_RE
);
return
(
signal_energy_fp
_SSE_float
(
s_re
,
s_im
,
nb_tx_antennas
,
length_meas
,
0
)
/
NB_RE
);
}
openair1/SIMULATION/RF/rf.c
View file @
d243b72e
...
...
@@ -17,6 +17,12 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: rf_rx_simple_freq(), rf_rx_simple_freq_SSE_float().
*-------------------------------------------------------------------------------
*/
//#include "defs.h"
...
...
@@ -294,17 +300,15 @@ clock_t start=clock();*/
}
else
{
//rx128_gain_lin=mm_mul_set1_ps(rx_gain_lin);
rx128_re
=
_mm_loadu_pd
(
&
r_re
[
a
][
2
*
i
]);
//r_re[a][i],r_re[a][i+1]
rx128_im
=
_mm_loadu_pd
(
&
r_im
[
a
][
2
*
i
]);
//r_im[a][i],r_im[a][i+1]
rx128_gain_lin
=
_mm_set1_pd
(
rx_gain_lin
);
gauss_0_128_sqrt_NOW
=
_mm_set_pd
(
ziggurat
(
0
.
0
,
1
.
0
),
ziggurat
(
0
.
0
,
1
.
0
));
gauss_1_128_sqrt_NOW
=
_mm_set_pd
(
ziggurat
(
0
.
0
,
1
.
0
),
ziggurat
(
0
.
0
,
1
.
0
));
//gauss_0_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float
(
&
gauss_0_128_sqrt_NOW
,
&
gauss_1_128_sqrt_NOW
);
gauss_0_128_sqrt_NOW
=
_mm_mul_pd
(
gauss_0_128_sqrt_NOW
,
_mm_set1_pd
(
sqrt_NOW
));
gauss_1_128_sqrt_NOW
=
_mm_mul_pd
(
gauss_1_128_sqrt_NOW
,
_mm_set1_pd
(
sqrt_NOW
));
// Amplify by receiver gain and apply 3rd order non-linearity
//r_re[a][i] = rx_gain_lin*(r_re[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
//r_im[a][i] = rx_gain_lin*(r_im[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
rx128_re
=
_mm_add_pd
(
rx128_re
,
gauss_0_128_sqrt_NOW
);
rx128_im
=
_mm_add_pd
(
rx128_im
,
gauss_1_128_sqrt_NOW
);
rx128_re
=
_mm_mul_pd
(
rx128_re
,
rx128_gain_lin
);
...
...
@@ -419,7 +423,6 @@ clock_t start=clock();*/
}
else
{*/
//rx128_gain_lin=mm_mul_set1_ps(rx_gain_lin);
rx128_re
=
_mm_loadu_ps
(
&
r_re
[
a
][
4
*
i
]);
//r_re[a][i],r_re[a][i+1]
rx128_im
=
_mm_loadu_ps
(
&
r_im
[
a
][
4
*
i
]);
//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
...
...
@@ -427,17 +430,9 @@ clock_t start=clock();*/
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float
(
&
gauss_0_128_sqrt_NOW
,
&
gauss_1_128_sqrt_NOW
);
//stop_meas(&desc->ziggurat);
//_mm_storeu_ps(out,gauss_0_128_sqrt_NOW);
//_mm_storeu_ps(out1,gauss_1_128_sqrt_NOW);
//printf("Ziggurat is %e,%e,%e,%e\n",ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//printf("boxmuller is %e,%e,%e,%e\n",gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0),gaussdouble(0.0,1.0));
//printf("NOR SSE is %e,%e,%e,%e\n",out[0],out[1],out[2],out[3]);
//printf("NOR SSE1 is %e,%e,%e,%e\n",out1[0],out1[1],out1[2],out1[3]);
gauss_0_128_sqrt_NOW
=
_mm_mul_ps
(
gauss_0_128_sqrt_NOW
,
_mm_set1_ps
(
sqrt_NOW
));
gauss_1_128_sqrt_NOW
=
_mm_mul_ps
(
gauss_1_128_sqrt_NOW
,
_mm_set1_ps
(
sqrt_NOW
));
// Amplify by receiver gain and apply 3rd order non-linearity
//r_re[a][i] = rx_gain_lin*(r_re[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
//r_im[a][i] = rx_gain_lin*(r_im[a][i] + sqrt(.5*N0W)*gaussdouble(0.0,1.0));
rx128_re
=
_mm_add_ps
(
_mm_mul_ps
(
rx128_re
,
rx128_gain_lin
),
gauss_0_128_sqrt_NOW
);
rx128_im
=
_mm_add_ps
(
_mm_mul_ps
(
rx128_im
,
rx128_gain_lin
),
gauss_1_128_sqrt_NOW
);
_mm_storeu_ps
(
&
r_re
[
a
][
4
*
i
],
rx128_re
);
...
...
@@ -445,6 +440,29 @@ clock_t start=clock();*/
//}
}
}
/*rx128_re = _mm_loadu_ps(&r_re[a][4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im = _mm_loadu_ps(&r_im[a][4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
rx128_re_1 = _mm_loadu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_re[a][i],r_re[a][i+1]
rx128_im_1 = _mm_loadu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j]);//r_im[a][i],r_im[a][i+1]
//start_meas(&desc->ziggurat);
//gauss_0_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//gauss_1_128_sqrt_NOW = _mm_set_ps(ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW_1, &gauss_1_128_sqrt_NOW_1);
//stop_meas(&desc->ziggurat);
gauss_0_128_sqrt_NOW = _mm_mul_ps(gauss_0_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW = _mm_mul_ps(gauss_1_128_sqrt_NOW,_mm_set1_ps(sqrt_NOW));
gauss_0_128_sqrt_NOW_1 = _mm_mul_ps(gauss_0_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
gauss_1_128_sqrt_NOW_1 = _mm_mul_ps(gauss_1_128_sqrt_NOW_1,_mm_set1_ps(sqrt_NOW));
// Amplify by receiver gain and apply 3rd order non-linearity
rx128_re = _mm_add_ps(_mm_mul_ps(rx128_re,rx128_gain_lin),gauss_0_128_sqrt_NOW);
rx128_im = _mm_add_ps(_mm_mul_ps(rx128_im,rx128_gain_lin),gauss_1_128_sqrt_NOW);
rx128_re_1 = _mm_add_ps(_mm_mul_ps(rx128_re_1,rx128_gain_lin),gauss_0_128_sqrt_NOW_1);
rx128_im_1 = _mm_add_ps(_mm_mul_ps(rx128_im_1,rx128_gain_lin),gauss_1_128_sqrt_NOW_1);
_mm_storeu_ps(&r_re[a][4*i+ofdm_symbol_size*j],rx128_re);
_mm_storeu_ps(&r_im[a][4*i+ofdm_symbol_size*j],rx128_im);
_mm_storeu_ps(&r_re[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_re_1);
_mm_storeu_ps(&r_im[a][(ofdm_symbol_size-n_samples)+4*i+ofdm_symbol_size*j],rx128_im_1);*/
/*clock_t stop=clock();
printf("do_DL_sig time is %f s, AVERAGE time is %f s, count %d, sum %e\n",(float) (stop-start)/CLOCKS_PER_SEC,(float) (sum+stop-start)/(count*CLOCKS_PER_SEC),count,sum+stop-start);
sum=(sum+stop-start);*/
...
...
openair1/SIMULATION/TOOLS/abstraction.c
View file @
d243b72e
This diff is collapsed.
Click to expand it.
openair1/SIMULATION/TOOLS/random_channel.c
View file @
d243b72e
This diff is collapsed.
Click to expand it.
openair1/SIMULATION/TOOLS/rangen_double.c
View file @
d243b72e
This diff is collapsed.
Click to expand it.
targets/SIMU/USER/channel_sim.c
View file @
d243b72e
...
...
@@ -17,6 +17,13 @@
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*-------------------------------------------------------------------------------
* Optimization using SIMD instructions
* Frecuency Domain Analysis
* Luis Felipe Ariza Vesga, email:lfarizav@unal.edu.co
* Functions: do_DL_sig_freq(), do_UL_sig_freq(), init_channel_vars_freq(),
* do_UL_sig_freq_prach.
*-------------------------------------------------------------------------------
*/
#include <string.h>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment