Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
canghaiwuhen
OpenXG-RAN
Commits
eebbd37e
Commit
eebbd37e
authored
Jun 12, 2018
by
lfarizav
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new sincos256_ps, log256_ps and exp_256 function using AVX instructions
parent
59582986
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
529 additions
and
2 deletions
+529
-2
openair1/SIMULATION/TOOLS/abstraction.c
openair1/SIMULATION/TOOLS/abstraction.c
+391
-1
openair1/SIMULATION/TOOLS/defs.h
openair1/SIMULATION/TOOLS/defs.h
+16
-0
openair1/SIMULATION/TOOLS/multipath_channel.c
openair1/SIMULATION/TOOLS/multipath_channel.c
+121
-0
openair1/SIMULATION/TOOLS/rangen_double.c
openair1/SIMULATION/TOOLS/rangen_double.c
+1
-1
No files found.
openair1/SIMULATION/TOOLS/abstraction.c
View file @
eebbd37e
...
...
@@ -154,6 +154,40 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
int16_t
f
;
uint8_t
l
;
__m128
cos_lut128
,
sin_lut128
;
__m128
x128
,
log128
,
exp128
;
__m256
x256
,
log256
,
exp256
;
x128
=
_mm_set_ps
(
1
.
0
,
2
.
0
,
3
.
0
,
4
.
0
);
x256
=
_mm256_set_ps
(
1
.
0
,
2
.
0
,
3
.
0
,
4
.
0
,
5
.
0
,
6
.
0
,
7
.
0
,
8
.
0
);
log128
=
log_ps
(
x128
);
log256
=
log256_ps
(
x256
);
exp128
=
exp_ps
(
x128
);
exp256
=
exp256_ps
(
x256
);
printf
(
"log128 %e,%e,%e,%e
\n
"
,
log128
[
0
],
log128
[
1
],
log128
[
2
],
log128
[
3
]);
printf
(
"log %e,%e,%e,%e
\n\n
"
,
log
(
1
),
log
(
2
),
log
(
3
),
log
(
4
));
printf
(
"exp128 %e,%e,%e,%e
\n
"
,
exp128
[
0
],
exp128
[
1
],
exp128
[
2
],
exp128
[
3
]);
printf
(
"exp %e,%e,%e,%e
\n\n
"
,
exp
(
1
),
exp
(
2
),
exp
(
3
),
exp
(
4
));
printf
(
"log256 %e,%e,%e,%e,%e,%e,%e,%e
\n
"
,
log256
[
0
],
log256
[
1
],
log256
[
2
],
log256
[
3
],
log256
[
4
],
log256
[
5
],
log256
[
6
],
log256
[
7
]);
printf
(
"log %e,%e,%e,%e,%e,%e,%e,%e
\n
"
,
log
(
1
),
log
(
2
),
log
(
3
),
log
(
4
),
log
(
5
),
log
(
6
),
log
(
7
),
log
(
8
));
printf
(
"exp256 %e,%e,%e,%e,%e,%e,%e,%e
\n
"
,
exp256
[
0
],
exp256
[
1
],
exp256
[
2
],
exp256
[
3
],
exp256
[
4
],
exp256
[
5
],
exp256
[
6
],
exp256
[
7
]);
printf
(
"exp %e,%e,%e,%e,%e,%e,%e,%e
\n
"
,
exp
(
1
),
exp
(
2
),
exp
(
3
),
exp
(
4
),
exp
(
5
),
exp
(
6
),
exp
(
7
),
exp
(
8
));
/*__m256 x256, sin256, cos256;
__m128 x128, sin128, cos128;
x128 = _mm_set_ps(1.0,2.0,3.0,4.0);
x256 = _mm256_set_ps(1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0);*/
/*printf("sincos in abstraction.c\n");
sincos256_ps(x256,&sin256,&cos256);
sincos_ps(x128,&sin128,&cos128);
printf("sin avx %e,%e,%e,%e,%e,%e,%e,%e\n",sin256[0],sin256[1],sin256[2],sin256[3],sin256[4],sin256[5],sin256[6],sin256[7]);
printf("sin %e,%e,%e,%e,%e,%e,%e,%e\n",sin(1.0),sin(2.0),sin(3.0),sin(4.0),sin(5.0),sin(6.0),sin(7.0),sin(8.0));
printf("cos avx %e,%e,%e,%e,%e,%e,%e,%e\n",cos256[0],cos256[1],cos256[2],cos256[3],cos256[4],cos256[5],cos256[6],cos256[7]);
printf("cos %e,%e,%e,%e,%e,%e,%e,%e\n\n",cos(1.0),cos(2.0),cos(3.0),cos(4.0),cos(5.0),cos(6.0),cos(7.0),cos(8.0));
printf("sin sse %e,%e,%e,%e\n",sin128[0],sin128[1],sin128[2],sin128[3]);
printf("sin %e,%e,%e,%e\n",sin(1.0),sin(2.0),sin(3.0),sin(4.0));
printf("cos sse %e,%e,%e,%e\n",cos128[0],cos128[1],cos128[2],cos128[3]);
printf("cos %e,%e,%e,%e\n",cos(1.0),cos(2.0),cos(3.0),cos(4.0));*/
if
((
n_samples
&
1
)
==
0
)
{
fprintf
(
stderr
,
"freq_channel_init: n_samples has to be odd
\n
"
);
...
...
@@ -181,7 +215,6 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
delay
=
desc
->
delays
[
l
];
else
delay
=
desc
->
delays
[
l
]
+
NB_SAMPLES_CHANNEL_OFFSET
/
desc
->
sampling_rate
;
//sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128);
cos_lut128
=
_mm_set_ps
(
cos
(
twopi
*
(
4
*
f
+
3
)
*
delay
),
cos
(
twopi
*
(
4
*
f
+
2
)
*
delay
),
cos
(
twopi
*
(
4
*
f
+
1
)
*
delay
),
cos
(
twopi
*
(
4
*
f
)
*
delay
));
sin_lut128
=
_mm_set_ps
(
sin
(
twopi
*
(
4
*
f
+
3
)
*
delay
),
sin
(
twopi
*
(
4
*
f
+
2
)
*
delay
),
sin
(
twopi
*
(
4
*
f
+
1
)
*
delay
),
sin
(
twopi
*
(
4
*
f
)
*
delay
));
...
...
@@ -218,6 +251,79 @@ int init_freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_sa
}*/
return
(
0
);
}
int
init_freq_channel_AVX_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
)
{
static
int
first_run
=
1
;
float
delta_f
,
twopi
;
// 90 kHz spacing
float
delay
;
int16_t
f
;
uint8_t
l
;
__m256
cos_lut256
,
sin_lut256
;
if
((
n_samples
&
1
)
==
0
)
{
fprintf
(
stderr
,
"freq_channel_init: n_samples has to be odd
\n
"
);
return
(
-
1
);
}
delta_f
=
nb_rb
*
180000
/
(
n_samples
-
1
);
if
(
first_run
)
{
cos_lut_f
=
(
float
**
)
malloc16
(((
int
)
desc
->
nb_taps
)
*
sizeof
(
float
*
));
sin_lut_f
=
(
float
**
)
malloc16
(((
int
)
desc
->
nb_taps
)
*
sizeof
(
float
*
));
for
(
f
=-
(
n_samples
>>
1
);
f
<=
(
n_samples
>>
1
);
f
++
)
{
cos_lut_f
[
f
+
(
n_samples
>>
1
)]
=
(
float
*
)
malloc16_clear
(
n_samples
*
sizeof
(
float
));
sin_lut_f
[
f
+
(
n_samples
>>
1
)]
=
(
float
*
)
malloc16_clear
(
n_samples
*
sizeof
(
float
));
}
first_run
=
0
;
}
twopi
=
2
*
M_PI
*
1e-6
*
delta_f
;
for
(
f
=-
(
n_samples
>>
4
);
f
<
0
;
f
++
)
{
//count++;
//freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
for
(
l
=
0
;
l
<
(
int
)
desc
->
nb_taps
;
l
++
)
{
if
(
desc
->
nb_taps
==
1
)
delay
=
desc
->
delays
[
l
];
else
delay
=
desc
->
delays
[
l
]
+
NB_SAMPLES_CHANNEL_OFFSET
/
desc
->
sampling_rate
;
//sincos_ps(_mm_set_ps(twopi*(4*f+3)*delay,twopi*(4*f+2)*delay,twopi*(4*f+1)*delay,twopi*(4*f)*delay), &sin_lut128, &cos_lut128);
cos_lut256
=
_mm256_set_ps
(
cos
(
twopi
*
(
8
*
f
+
7
)
*
delay
),
cos
(
twopi
*
(
8
*
f
+
6
)
*
delay
),
cos
(
twopi
*
(
8
*
f
+
5
)
*
delay
),
cos
(
twopi
*
(
8
*
f
+
4
)
*
delay
),
cos
(
twopi
*
(
8
*
f
+
3
)
*
delay
),
cos
(
twopi
*
(
8
*
f
+
2
)
*
delay
),
cos
(
twopi
*
(
8
*
f
+
1
)
*
delay
),
cos
(
twopi
*
(
8
*
f
)
*
delay
));
sin_lut256
=
_mm256_set_ps
(
sin
(
twopi
*
(
8
*
f
+
7
)
*
delay
),
sin
(
twopi
*
(
8
*
f
+
6
)
*
delay
),
sin
(
twopi
*
(
8
*
f
+
5
)
*
delay
),
sin
(
twopi
*
(
8
*
f
+
4
)
*
delay
),
sin
(
twopi
*
(
8
*
f
+
3
)
*
delay
),
sin
(
twopi
*
(
8
*
f
+
2
)
*
delay
),
sin
(
twopi
*
(
8
*
f
+
1
)
*
delay
),
sin
(
twopi
*
(
8
*
f
)
*
delay
));
_mm256_storeu_ps
(
&
cos_lut_f
[
l
][
8
*
f
+
(
n_samples
>>
1
)],
cos_lut256
);
_mm256_storeu_ps
(
&
sin_lut_f
[
l
][
8
*
f
+
(
n_samples
>>
1
)],
sin_lut256
);
}
}
for
(
l
=
0
;
l
<
(
int
)
desc
->
nb_taps
;
l
++
)
{
cos_lut_f
[
l
][(
n_samples
>>
1
)]
=
1
;
sin_lut_f
[
l
][(
n_samples
>>
1
)]
=
0
;
//printf("f %d,l %d (cos,sin) (%e,%e):\n",4*f,l,cos_lut_f[(n_samples>>1)][l],sin_lut_f[(n_samples>>1)][l]);
}
for
(
f
=
1
;
f
<=
(
n_samples
>>
4
);
f
++
)
{
//count++;
//freq=delta_f*(double)f*1e-6;// due to the fact that delays is in mus
for
(
l
=
0
;
l
<
(
int
)
desc
->
nb_taps
;
l
++
)
{
if
(
desc
->
nb_taps
==
1
)
delay
=
desc
->
delays
[
l
];
else
delay
=
desc
->
delays
[
l
]
+
NB_SAMPLES_CHANNEL_OFFSET
/
desc
->
sampling_rate
;
cos_lut256
=
_mm256_set_ps
(
cos
(
twopi
*
(
4
*
f
)
*
delay
),
cos
(
twopi
*
(
4
*
f
-
1
)
*
delay
),
cos
(
twopi
*
(
4
*
f
-
2
)
*
delay
),
cos
(
twopi
*
(
4
*
f
-
3
)
*
delay
),
cos
(
twopi
*
(
4
*
f
-
4
)
*
delay
),
cos
(
twopi
*
(
4
*
f
-
5
)
*
delay
),
cos
(
twopi
*
(
4
*
f
-
6
)
*
delay
),
cos
(
twopi
*
(
4
*
f
-
7
)
*
delay
));
sin_lut256
=
_mm256_set_ps
(
sin
(
twopi
*
(
4
*
f
)
*
delay
),
sin
(
twopi
*
(
4
*
f
-
1
)
*
delay
),
sin
(
twopi
*
(
4
*
f
-
2
)
*
delay
),
sin
(
twopi
*
(
4
*
f
-
3
)
*
delay
),
sin
(
twopi
*
(
4
*
f
-
4
)
*
delay
),
sin
(
twopi
*
(
4
*
f
-
5
)
*
delay
),
sin
(
twopi
*
(
4
*
f
-
6
)
*
delay
),
sin
(
twopi
*
(
4
*
f
-
7
)
*
delay
));
_mm256_storeu_ps
(
&
cos_lut_f
[
l
][
8
*
f
-
7
+
(
n_samples
>>
1
)],
cos_lut256
);
_mm256_storeu_ps
(
&
sin_lut_f
[
l
][
8
*
f
-
7
+
(
n_samples
>>
1
)],
sin_lut256
);
}
}
/*for (f=-(n_samples>>1); f<=(n_samples>>1); f++) {
for (l=0; l<(int)desc->nb_taps; l++) {
printf("f %d, l %d (cos,sin) (%e,%e):\n",f,l,cos_lut_f[l][f+(n_samples>>1)],sin_lut_f[l][f+(n_samples>>1)]);
}
}*/
return
(
0
);
}
int
freq_channel_SSE_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
)
{
int16_t
f
,
f2
,
d
;
...
...
@@ -282,7 +388,70 @@ int freq_channel_SSE_float(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples
}*/
return
(
0
);
}
int
freq_channel_AVX_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
)
{
int16_t
f
,
f2
,
d
;
uint8_t
aarx
,
aatx
,
l
;
static
int
freq_channel_init
=
0
;
static
int
n_samples_max
=
0
;
__m256
chFx_256
,
chFy_256
;
// do some error checking
// n_samples has to be a odd number because we assume the spectrum is symmetric around the DC and includes the DC
if
((
n_samples
&
1
)
==
0
)
{
fprintf
(
stderr
,
"freq_channel: n_samples has to be odd
\n
"
);
return
(
-
1
);
}
// printf("no of taps:%d,",(int)desc->nb_taps);
if
(
freq_channel_init
==
0
)
{
// we are initializing the lut for the largets possible n_samples=12*nb_rb+1
// if called with n_samples<12*nb_rb+1, we decimate the lut
n_samples_max
=
12
*
nb_rb
+
1
;
if
(
init_freq_channel_AVX_float
(
desc
,
nb_rb
,
n_samples_max
)
==
0
)
freq_channel_init
=
1
;
else
return
(
-
1
);
}
d
=
(
n_samples_max
-
1
)
/
(
n_samples
-
1
);
//printf("no_samples=%d, n_samples_max=%d, d=%d\n",n_samples,n_samples_max,d);
start_meas
(
&
desc
->
interp_freq
);
for
(
f
=-
(
n_samples_max
>>
4
),
f2
=-
(
n_samples
>>
4
);
f
<
(
n_samples_max
>>
4
);
f
+=
d
,
f2
++
)
{
//clut = cos_lut[(n_samples_max>>1)+f];
//slut = sin_lut[(n_samples_max>>1)+f];
for
(
aarx
=
0
;
aarx
<
desc
->
nb_rx
;
aarx
++
)
{
for
(
aatx
=
0
;
aatx
<
desc
->
nb_tx
;
aatx
++
)
{
chFx_256
=
_mm256_setzero_ps
();
chFy_256
=
_mm256_setzero_ps
();
//desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x=0.0;
//desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y=0.0;
for
(
l
=
0
;
l
<
(
int
)
desc
->
nb_taps
;
l
++
)
{
//desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].x+=(desc->a[l][aarx+(aatx*desc->nb_rx)].x*clut[l]+
// desc->a[l][aarx+(aatx*desc->nb_rx)].y*slut[l]);
//desc->chF[aarx+(aatx*desc->nb_rx)][(n_samples>>1)+f2].y+=(-desc->a[l][aarx+(aatx*desc->nb_rx)].x*slut[l]+
// desc->a[l][aarx+(aatx*desc->nb_rx)].y*clut[l]);
chFx_256
=
_mm256_add_ps
(
chFx_256
,
_mm256_add_ps
(
_mm256_mul_ps
(
_mm256_set1_ps
(
desc
->
a
[
l
][
aarx
+
(
aatx
*
desc
->
nb_rx
)].
x
),
_mm256_loadu_ps
(
&
cos_lut_f
[
l
][(
n_samples_max
>>
1
)
+
8
*
f
])),
_mm256_mul_ps
(
_mm256_set1_ps
(
desc
->
a
[
l
][
aarx
+
(
aatx
*
desc
->
nb_rx
)].
y
),
_mm256_loadu_ps
(
&
sin_lut_f
[
l
][(
n_samples_max
>>
1
)
+
8
*
f
]))));
chFy_256
=
_mm256_add_ps
(
chFy_256
,
_mm256_sub_ps
(
_mm256_mul_ps
(
_mm256_set1_ps
(
desc
->
a
[
l
][
aarx
+
(
aatx
*
desc
->
nb_rx
)].
y
),
_mm256_loadu_ps
(
&
cos_lut_f
[
l
][(
n_samples_max
>>
1
)
+
8
*
f
])),
_mm256_mul_ps
(
_mm256_set1_ps
(
desc
->
a
[
l
][
aarx
+
(
aatx
*
desc
->
nb_rx
)].
x
),
_mm256_loadu_ps
(
&
sin_lut_f
[
l
][(
n_samples_max
>>
1
)
+
8
*
f
]))));
}
_mm256_storeu_ps
(
&
desc
->
chFf
[
aarx
+
(
aatx
*
desc
->
nb_rx
)].
x
[(
n_samples
>>
1
)
+
8
*
f
],
chFx_256
);
_mm256_storeu_ps
(
&
desc
->
chFf
[
aarx
+
(
aatx
*
desc
->
nb_rx
)].
y
[(
n_samples
>>
1
)
+
8
*
f
],
chFy_256
);
//printf("chFx %e,%e,%e,%e\n",chFx_128[0],chFx_128[1],chFx_128[2],chFx_128[3]);
//printf("chFy %e,%e,%e,%e\n",chFy_128[0],chFy_128[1],chFy_128[2],chFy_128[3]);
}
}
}
stop_meas
(
&
desc
->
interp_freq
);
/*for (f=-(n_samples>>1); f<(n_samples>>1); f++) {
printf("f %d, (chF.x,chF.y) (%e,%e):\n",f,desc->chFf[0].x[(n_samples>>1)+f],desc->chFf[0].y[(n_samples>>1)+f]);
}*/
return
(
0
);
}
int
init_freq_channel_prach
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
,
int16_t
prach_fmt
,
int16_t
n_ra_prb
)
{
...
...
@@ -792,6 +961,102 @@ void sincos_ps(__m128 x, __m128 *s, __m128 *c) {
*
c
=
_mm_xor_ps
(
xmm2
,
sign_bit_cos
);
}
void
sincos256_ps
(
__m256
x
,
__m256
*
s
,
__m256
*
c
)
{
__m256
xmm1
,
xmm2
,
xmm3
=
_mm256_setzero_ps
(),
sign_bit_sin
,
y
;
__m256i
imm0
,
imm2
,
imm4
;
sign_bit_sin
=
x
;
/* take the absolute value */
x
=
_mm256_and_ps
(
x
,
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
~
0x80000000
)));
//_ps_inv_sign_mask
/* extract the sign bit (upper one) */
sign_bit_sin
=
_mm256_and_ps
(
sign_bit_sin
,
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
0x80000000
)));
//_ps_sign_mask
/* scale by 4/Pi */
y
=
_mm256_mul_ps
(
x
,
_mm256_set1_ps
(
1
.
27323954473516
f
));
/* store the integer part of y in imm2 */
imm2
=
_mm256_cvttps_epi32
(
y
);
/* j=(j+1) & (~1) (see the cephes sources) */
imm2
=
_mm256_add_epi32
(
imm2
,
_mm256_set1_epi32
(
1
));
//_pi32_1
imm2
=
_mm256_and_si256
(
imm2
,
_mm256_set1_epi32
(
~
1
));
//_pi32_inv1
y
=
_mm256_cvtepi32_ps
(
imm2
);
imm4
=
imm2
;
/* get the swap sign flag for the sine */
imm0
=
_mm256_and_si256
(
imm2
,
_mm256_set1_epi32
(
4
));
//_pi32_4
imm0
=
_mm256_slli_epi32
(
imm0
,
29
);
/* get the polynom selection mask for the sine*/
imm2
=
_mm256_and_si256
(
imm2
,
_mm256_set1_epi32
(
2
));
//_pi32_2
imm2
=
_mm256_cmpeq_epi32
(
imm2
,
_mm256_setzero_si256
());
__m256
swap_sign_bit_sin
=
_mm256_castsi256_ps
(
imm0
);
__m256
poly_mask
=
_mm256_castsi256_ps
(
imm2
);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1
=
_mm256_set1_ps
(
-
0
.
78515625
f
);
//_ps_minus_cephes_DP1
xmm2
=
_mm256_set1_ps
(
-
2.4187564849853515625e-4
f
);
//_ps_minus_cephes_DP2
xmm3
=
_mm256_set1_ps
(
-
3.77489497744594108e-8
f
);
//_ps_minus_cephes_DP3
xmm1
=
_mm256_mul_ps
(
y
,
xmm1
);
xmm2
=
_mm256_mul_ps
(
y
,
xmm2
);
xmm3
=
_mm256_mul_ps
(
y
,
xmm3
);
x
=
_mm256_add_ps
(
x
,
xmm1
);
x
=
_mm256_add_ps
(
x
,
xmm2
);
x
=
_mm256_add_ps
(
x
,
xmm3
);
imm4
=
_mm256_sub_epi32
(
imm4
,
_mm256_set1_epi32
(
2
));
//_pi32_2
imm4
=
_mm256_andnot_si256
(
imm4
,
_mm256_set1_epi32
(
4
));
//_pi32_4
imm4
=
_mm256_slli_epi32
(
imm4
,
29
);
__m256
sign_bit_cos
=
_mm256_castsi256_ps
(
imm4
);
sign_bit_sin
=
_mm256_xor_ps
(
sign_bit_sin
,
swap_sign_bit_sin
);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
__m256
z
=
_mm256_mul_ps
(
x
,
x
);
y
=
_mm256_set1_ps
(
2.443315711809948E-005
f
);
//_ps_coscof_p0
y
=
_mm256_mul_ps
(
y
,
z
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
-
1.388731625493765E-003
f
));
//_ps_coscof_p1
y
=
_mm256_mul_ps
(
y
,
z
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
4.166664568298827E-002
f
));
//_ps_coscof_p2
y
=
_mm256_mul_ps
(
y
,
z
);
y
=
_mm256_mul_ps
(
y
,
z
);
__m256
tmp
=
_mm256_mul_ps
(
z
,
_mm256_set1_ps
(
0
.
5
f
));
//_ps_0p5
y
=
_mm256_sub_ps
(
y
,
tmp
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
1
.
f
));
//_ps_1
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
__m256
y2
=
_mm256_set1_ps
(
-
1.9515295891E-4
f
);
//*(__m128*)_ps_sincof_p0;
y2
=
_mm256_mul_ps
(
y2
,
z
);
y2
=
_mm256_add_ps
(
y2
,
_mm256_set1_ps
(
8.3321608736E-3
f
));
//*(__m128*)_ps_sincof_p1);
y2
=
_mm256_mul_ps
(
y2
,
z
);
y2
=
_mm256_add_ps
(
y2
,
_mm256_set1_ps
(
-
1.6666654611E-1
f
));
//_ps_sincof_p2
y2
=
_mm256_mul_ps
(
y2
,
z
);
y2
=
_mm256_mul_ps
(
y2
,
x
);
y2
=
_mm256_add_ps
(
y2
,
x
);
/* select the correct result from the two polynoms */
xmm3
=
poly_mask
;
__m256
ysin2
=
_mm256_and_ps
(
xmm3
,
y2
);
__m256
ysin1
=
_mm256_andnot_ps
(
xmm3
,
y
);
y2
=
_mm256_sub_ps
(
y2
,
ysin2
);
y
=
_mm256_sub_ps
(
y
,
ysin1
);
xmm1
=
_mm256_add_ps
(
ysin1
,
ysin2
);
xmm2
=
_mm256_add_ps
(
y
,
y2
);
/* update the sign */
*
s
=
_mm256_xor_ps
(
xmm1
,
sign_bit_sin
);
*
c
=
_mm256_xor_ps
(
xmm2
,
sign_bit_cos
);
}
__m128
log_ps
(
__m128
x
)
{
...
...
@@ -864,6 +1129,77 @@ __m128 log_ps(__m128 x) {
return
x
;
}
__m256
log256_ps
(
__m256
x
)
{
__m256i
imm0
__attribute__
((
aligned
(
32
)));
__m256
one
__attribute__
((
aligned
(
32
)))
=
_mm256_set1_ps
(
1
.
f
);
__m256
invalid_mask
__attribute__
((
aligned
(
32
)))
=
_mm256_cmp_ps
(
x
,
_mm256_setzero_ps
(),
_CMP_LE_OS
);
x
=
_mm256_max_ps
(
x
,
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
0x00800000
)));
// cut off denormalized stuff
imm0
=
_mm256_srli_epi32
(
_mm256_castps_si256
(
x
),
23
);
// keep only the fractional part
x
=
_mm256_and_ps
(
x
,
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
~
0x7f800000
)));
//printf("ln inside x is %e,%e,%e,%e\n",x[0],x[1],x[2],x[3]);
x
=
_mm256_or_ps
(
x
,
_mm256_set1_ps
(
0
.
5
f
));
//printf("ln inside x is %e,%e,%e,%e\n",x[0],x[1],x[2],x[3]);
// now e=mm0:mm1 contain the really base-2 exponent
imm0
=
_mm256_sub_epi32
(
imm0
,
_mm256_set1_epi32
(
0x7f
));
__m256
e
=
_mm256_cvtepi32_ps
(
imm0
);
e
=
_mm256_add_ps
(
e
,
one
);
__m256
mask
=
_mm256_cmp_ps
(
x
,
_mm256_set1_ps
(
0
.
707106781186547524
f
),
_CMP_LT_OS
);
__m256
tmp
=
_mm256_and_ps
(
x
,
mask
);
x
=
_mm256_sub_ps
(
x
,
one
);
e
=
_mm256_sub_ps
(
e
,
_mm256_and_ps
(
one
,
mask
));
x
=
_mm256_add_ps
(
x
,
tmp
);
__m256
z
=
_mm256_mul_ps
(
x
,
x
);
__m256
y
=
_mm256_set1_ps
(
7.0376836292E-2
f
);
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
-
1.1514610310E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
1.1676998740E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
-
1.2420140846E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
1.4249322787E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
-
1.6668057665E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
2.0000714765E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
-
2.4999993993E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
3.3333331174E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_mul_ps
(
y
,
z
);
tmp
=
_mm256_mul_ps
(
e
,
_mm256_set1_ps
(
-
2.12194440e-4
f
));
y
=
_mm256_add_ps
(
y
,
tmp
);
tmp
=
_mm256_mul_ps
(
z
,
_mm256_set1_ps
(
0
.
5
f
));
y
=
_mm256_sub_ps
(
y
,
tmp
);
tmp
=
_mm256_mul_ps
(
e
,
_mm256_set1_ps
(
0
.
693359375
f
));
x
=
_mm256_add_ps
(
x
,
y
);
x
=
_mm256_add_ps
(
x
,
tmp
);
x
=
_mm256_or_ps
(
x
,
invalid_mask
);
// negative arg will be NAN
return
x
;
}
__m128
exp_ps
(
__m128
x
)
{
__m128
tmp
=
_mm_setzero_ps
(),
fx
;
...
...
@@ -918,7 +1254,61 @@ __m128 exp_ps(__m128 x) {
y
=
_mm_mul_ps
(
y
,
pow2n
);
return
y
;
}
__m256
exp256_ps
(
__m256
x
)
{
__m256
tmp
=
_mm256_setzero_ps
(),
fx
;
__m256i
imm0
;
__m256
one
=
_mm256_set1_ps
(
1
.
f
);
x
=
_mm256_min_ps
(
x
,
_mm256_set1_ps
(
88
.
3762626647949
f
));
x
=
_mm256_max_ps
(
x
,
_mm256_set1_ps
(
-
88
.
3762626647949
f
));
/* express exp(x) as exp(g + n*log(2)) */
fx
=
_mm256_mul_ps
(
x
,
_mm256_set1_ps
(
1
.
44269504088896341
f
));
fx
=
_mm256_add_ps
(
fx
,
_mm256_set1_ps
(
0
.
5
f
));
/* how to perform a floorf with SSE: just below */
/*emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0);*/
tmp
=
_mm256_floor_ps
(
fx
);
/* if greater, substract 1 */
__m256
mask
=
_mm256_cmp_ps
(
tmp
,
fx
,
_CMP_GT_OS
);
mask
=
_mm256_and_ps
(
mask
,
one
);
fx
=
_mm256_sub_ps
(
tmp
,
mask
);
tmp
=
_mm256_mul_ps
(
fx
,
_mm256_set1_ps
(
0
.
693359375
f
));
__m256
z
=
_mm256_mul_ps
(
fx
,
_mm256_set1_ps
(
-
2.12194440e-4
f
));
x
=
_mm256_sub_ps
(
x
,
tmp
);
x
=
_mm256_sub_ps
(
x
,
z
);
z
=
_mm256_mul_ps
(
x
,
x
);
__m256
y
=
_mm256_set1_ps
(
1.9875691500E-4
f
);
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
1.3981999507E-3
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
8.3334519073E-3
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
4.1665795894E-2
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
1.6666665459E-1
f
));
y
=
_mm256_mul_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
_mm256_set1_ps
(
5.0000001201E-1
f
));
y
=
_mm256_mul_ps
(
y
,
z
);
y
=
_mm256_add_ps
(
y
,
x
);
y
=
_mm256_add_ps
(
y
,
one
);
/* build 2^n */
imm0
=
_mm256_cvttps_epi32
(
fx
);
imm0
=
_mm256_add_epi32
(
imm0
,
_mm256_set1_epi32
(
0x7f
));
imm0
=
_mm256_slli_epi32
(
imm0
,
23
);
__m256
pow2n
=
_mm256_castsi256_ps
(
imm0
);
y
=
_mm256_mul_ps
(
y
,
pow2n
);
return
y
;
}
/*#ifdef abstraction_SSE
int freq_channel_prach(channel_desc_t *desc,uint16_t nb_rb,int16_t n_samples,int16_t prach_fmt,int16_t n_ra_prb)
{
...
...
openair1/SIMULATION/TOOLS/defs.h
View file @
eebbd37e
...
...
@@ -313,6 +313,17 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
uint8_t
UE_id
,
uint8_t
CC_id
,
uint8_t
th_id
);
void
multipath_channel_freq_AVX_float
(
channel_desc_t
*
desc
,
float
*
tx_sig_re
[
2
],
float
*
tx_sig_im
[
2
],
float
*
rx_sig_re
[
2
],
float
*
rx_sig_im
[
2
],
uint32_t
length
,
uint8_t
keep_channel
,
uint8_t
eNB_id
,
uint8_t
UE_id
,
uint8_t
CC_id
,
uint8_t
th_id
);
/**\fn void multipath_channel_prach(channel_desc_t *desc,
double tx_sig_re[2],
double tx_sig_im[2],
...
...
@@ -474,7 +485,9 @@ void table_nor(unsigned long seed);
double
nfix
(
void
);
__m128
nfix_SSE
(
__m128i
iz
);
__m128
log_ps
(
__m128
x
);
__m256
log256_ps
(
__m256
x
);
__m128
exp_ps
(
__m128
x
);
__m256
exp256_ps
(
__m256
x
);
double
uniformrandom
(
void
);
void
uniformrandomSSE
(
__m128d
*
d1
,
__m128d
*
d2
);
double
ziggurat
(
double
mean
,
double
variance
);
...
...
@@ -482,14 +495,17 @@ __m128 ziggurat_SSE_float(void);
void
boxmuller_SSE_float
(
__m128
*
data1
,
__m128
*
data2
);
int
freq_channel
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
);
int
freq_channel_SSE_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
);
int
freq_channel_AVX_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
);
int
freq_channel_prach
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
,
int16_t
prach_fmt
,
int16_t
n_ra_prb
);
int
freq_channel_prach_SSE_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
,
int16_t
prach_fmt
,
int16_t
n_ra_prb
);
int
init_freq_channel
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
);
int
init_freq_channel_SSE_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
);
int
init_freq_channel_AVX_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
);
int
init_freq_channel_prach
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
,
int16_t
prach_fmt
,
int16_t
n_ra_prb
);
int
init_freq_channel_prach_SSE_float
(
channel_desc_t
*
desc
,
uint16_t
nb_rb
,
int16_t
n_samples
,
int16_t
prach_fmt
,
int16_t
n_ra_prb
);
void
sincos_ps
(
__m128
x
,
__m128
*
s
,
__m128
*
c
);
void
sincos256_ps
(
__m256
x
,
__m256
*
s
,
__m256
*
c
);
uint8_t
multipath_channel_nosigconv
(
channel_desc_t
*
desc
);
void
multipath_tv_channel
(
channel_desc_t
*
desc
,
...
...
openair1/SIMULATION/TOOLS/multipath_channel.c
View file @
eebbd37e
...
...
@@ -615,6 +615,127 @@ void multipath_channel_freq_SSE_float(channel_desc_t *desc,
}
// f,f2,f3
//}//k
}
void
multipath_channel_freq_AVX_float
(
channel_desc_t
*
desc
,
float
*
tx_sig_re
[
2
],
float
*
tx_sig_im
[
2
],
float
*
rx_sig_re
[
2
],
float
*
rx_sig_im
[
2
],
uint32_t
length
,
uint8_t
keep_channel
,
uint8_t
eNB_id
,
uint8_t
UE_id
,
uint8_t
CC_id
,
uint8_t
th_id
)
{
int
ii
,
j
,
f
;
__m256
rx_tmp256_re_f
,
rx_tmp256_im_f
,
rx_tmp256_re
,
rx_tmp256_im
,
rx_tmp256_1
,
rx_tmp256_2
,
rx_tmp256_3
,
rx_tmp256_4
,
tx256_re
,
tx256_im
,
chF256_x
,
chF256_y
,
pathloss256
;
//struct complex rx_tmp;
float
path_loss
=
pow
(
10
,
desc
->
path_loss_dB
/
20
);
pathloss256
=
_mm256_set1_ps
(
path_loss
);
int
nb_rb
,
n_samples
,
ofdm_symbol_size
,
symbols_per_tti
;
nb_rb
=
PHY_vars_UE_g
[
UE_id
][
CC_id
]
->
frame_parms
.
N_RB_DL
;
n_samples
=
PHY_vars_UE_g
[
UE_id
][
CC_id
]
->
frame_parms
.
N_RB_DL
*
12
+
1
;
ofdm_symbol_size
=
length
/
PHY_vars_UE_g
[
UE_id
][
CC_id
]
->
frame_parms
.
symbols_per_tti
;
symbols_per_tti
=
length
/
PHY_vars_UE_g
[
UE_id
][
CC_id
]
->
frame_parms
.
ofdm_symbol_size
;
#ifdef DEBUG_CH
printf
(
"[CHANNEL_FREQ] keep = %d : path_loss = %g (%f), nb_rx %d, nb_tx %d, dd %d, len %d
\n
"
,
keep_channel
,
path_loss
,
desc
->
path_loss_dB
,
desc
->
nb_rx
,
desc
->
nb_tx
,
dd
,
desc
->
channel_length
);
#endif
if
(
keep_channel
)
{
// do nothing - keep channel
}
else
{
random_channel_freq
(
desc
,
0
);
freq_channel_SSE_float
(
desc
,
nb_rb
,
n_samples
);
//Find desc->chF
}
for
(
j
=
0
;
j
<
(
symbols_per_tti
>>
2
);
j
++
){
for
(
ii
=
0
;
ii
<
desc
->
nb_rx
;
ii
++
)
{
_mm256_storeu_ps
(
&
rx_sig_re
[
ii
][
4
*
j
*
ofdm_symbol_size
],
_mm256_setzero_ps
());
_mm256_storeu_ps
(
&
rx_sig_im
[
ii
][
4
*
j
*
ofdm_symbol_size
],
_mm256_setzero_ps
());
}
}
for
(
f
=
0
;
f
<
((
ofdm_symbol_size
*
symbols_per_tti
)
>>
3
);
f
++
)
{
//f2 = 0-1024*14-1 ---- for 10 Mhz BW
//printf("f is %d\n",f);
for
(
ii
=
0
;
ii
<
desc
->
nb_rx
;
ii
++
)
{
//rx_tmp.x = 0;
//rx_tmp.y = 0;
rx_tmp256_re_f
=
_mm256_setzero_ps
();
rx_tmp256_im_f
=
_mm256_setzero_ps
();
if
(
f
%
(
ofdm_symbol_size
>>
2
)
<
(
n_samples
>>
2
))
//1-300
{
for
(
j
=
0
;
j
<
desc
->
nb_tx
;
j
++
)
{
//first n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re
=
_mm256_loadu_ps
(
&
tx_sig_re
[
j
][(
4
*
f
+
1
)]);
tx256_im
=
_mm256_loadu_ps
(
&
tx_sig_im
[
j
][(
4
*
f
+
1
)]);
chF256_x
=
_mm256_set1_ps
(
desc
->
chFf
[
ii
+
(
j
*
desc
->
nb_rx
)].
x
[(
4
*
(
f
%
(
ofdm_symbol_size
>>
2
)))
+
(
n_samples
>>
2
)]);
chF256_y
=
_mm256_set1_ps
(
desc
->
chFf
[
ii
+
(
j
*
desc
->
nb_rx
)].
y
[(
4
*
(
f
%
(
ofdm_symbol_size
>>
2
)))
+
(
n_samples
>>
2
)]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_re*ch128_x
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//-tx128_im*ch128_y
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].x)//tx128_im*ch128_x
// +(tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f+(n_samples>>1)-1].y);//+tx128_re*ch128_y
rx_tmp256_1
=
_mm256_mul_ps
(
tx256_re
,
chF256_x
);
rx_tmp256_2
=
_mm256_mul_ps
(
tx256_im
,
chF256_y
);
rx_tmp256_3
=
_mm256_mul_ps
(
tx256_im
,
chF256_x
);
rx_tmp256_4
=
_mm256_mul_ps
(
tx256_re
,
chF256_y
);
rx_tmp256_re
=
_mm256_sub_ps
(
rx_tmp256_1
,
rx_tmp256_2
);
rx_tmp256_im
=
_mm256_add_ps
(
rx_tmp256_3
,
rx_tmp256_4
);
rx_tmp256_re_f
=
_mm256_add_ps
(
rx_tmp256_re_f
,
rx_tmp256_re
);
rx_tmp256_im_f
=
_mm256_add_ps
(
rx_tmp256_im_f
,
rx_tmp256_im
);
}
// j
//rx_sig_re[ii][f+k*ofdm_symbol_size] = rx_tmp.x*path_loss;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f
=
_mm256_mul_ps
(
rx_tmp256_re_f
,
pathloss256
);
rx_tmp256_im_f
=
_mm256_mul_ps
(
rx_tmp256_im_f
,
pathloss256
);
_mm256_storeu_ps
(
&
rx_sig_re
[
ii
][(
4
*
f
+
1
)],
rx_tmp256_re_f
);
_mm256_storeu_ps
(
&
rx_sig_im
[
ii
][(
4
*
f
+
1
)],
rx_tmp256_im_f
);
}
else
if
(
f
%
(
ofdm_symbol_size
>>
2
)
>
(
n_samples
>>
2
)
&&
f
%
(
ofdm_symbol_size
>>
2
)
<
(
ofdm_symbol_size
>>
2
)
-
(
n_samples
>>
2
))
{
//rx_sig_re[ii][f+k*ofdm_symbol_size] = 0;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = 0;
//_mm_storeu_pd(&rx_sig_re[ii][2*f],_mm_setzero_pd());
//_mm_storeu_pd(&rx_sig_im[ii][2*f],_mm_setzero_pd());
break
;
}
else
{
for
(
j
=
0
;
j
<
desc
->
nb_tx
;
j
++
)
{
//last n_samples>>1 samples of each frequency ofdm symbol out of ofdm_symbol_size
//RX_RE(k) += TX_RE(k).chF(k).x - TX_IM(k).chF(k).y
//RX_IM(k) += TX_IM(k).chF(k).x + TX_RE(k).chF(k).y
tx256_re
=
_mm256_loadu_ps
(
&
tx_sig_re
[
j
][
4
*
f
]);
tx256_im
=
_mm256_loadu_ps
(
&
tx_sig_im
[
j
][
4
*
f
]);
chF256_x
=
_mm256_set1_ps
(
desc
->
chFf
[
ii
+
(
j
*
desc
->
nb_rx
)].
x
[
4
*
(
f
%
(
ofdm_symbol_size
>>
2
)
-
((
ofdm_symbol_size
>>
2
)
-
(
n_samples
>>
2
)))]);
chF256_y
=
_mm256_set1_ps
(
desc
->
chFf
[
ii
+
(
j
*
desc
->
nb_rx
)].
y
[
4
*
(
f
%
(
ofdm_symbol_size
>>
2
)
-
((
ofdm_symbol_size
>>
2
)
-
(
n_samples
>>
2
)))]);
//rx_tmp.x += (tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
// -(tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
//rx_tmp.y += (tx_sig_im[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].x)
// +(tx_sig_re[j][f+k*ofdm_symbol_size] * desc->chF[ii+(j*desc->nb_rx)][f2].y);
rx_tmp256_1
=
_mm256_mul_ps
(
tx256_re
,
chF256_x
);
rx_tmp256_2
=
_mm256_mul_ps
(
tx256_im
,
chF256_y
);
rx_tmp256_3
=
_mm256_mul_ps
(
tx256_im
,
chF256_x
);
rx_tmp256_4
=
_mm256_mul_ps
(
tx256_re
,
chF256_y
);
rx_tmp256_re
=
_mm256_sub_ps
(
rx_tmp256_1
,
rx_tmp256_2
);
rx_tmp256_im
=
_mm256_add_ps
(
rx_tmp256_3
,
rx_tmp256_4
);
rx_tmp256_re_f
=
_mm256_add_ps
(
rx_tmp256_re_f
,
rx_tmp256_re
);
rx_tmp256_im_f
=
_mm256_add_ps
(
rx_tmp256_im_f
,
rx_tmp256_im
);
}
// j
//rx_sig_re[ii][f+k*ofdm_symbol_size] = rx_tmp.x*path_loss;
//rx_sig_im[ii][f+k*ofdm_symbol_size] = rx_tmp.y*path_loss;
rx_tmp256_re_f
=
_mm256_mul_ps
(
rx_tmp256_re_f
,
pathloss256
);
rx_tmp256_im_f
=
_mm256_mul_ps
(
rx_tmp256_im_f
,
pathloss256
);
_mm256_storeu_ps
(
&
rx_sig_re
[
ii
][
4
*
f
],
rx_tmp256_re_f
);
_mm256_storeu_ps
(
&
rx_sig_im
[
ii
][
4
*
f
],
rx_tmp256_im_f
);
}
}
// ii
}
// f,f2,f3
//}//k
}
#ifdef CHANNEL_SSE
void
multipath_channel_prach
(
channel_desc_t
*
desc
,
double
*
tx_sig_re
[
2
],
...
...
openair1/SIMULATION/TOOLS/rangen_double.c
View file @
eebbd37e
...
...
@@ -33,7 +33,7 @@
#ifdef USER_MODE
#include <stdio.h>
#include <stdlib.h>
#include <stdin.h>
#include <stdin
t
.h>
#include <math.h>
#include <time.h>
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment