Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZhouShuya
OpenXG-RAN
Commits
02f1ec69
Commit
02f1ec69
authored
Mar 24, 2018
by
Luis Ariza
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
SSE Ziggurat version improved
parent
fc657e21
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
391 additions
and
123 deletions
+391
-123
openair1/SCHED/phy_procedures_lte_ue.c
openair1/SCHED/phy_procedures_lte_ue.c
+4
-4
openair1/SIMULATION/RF/rf.c
openair1/SIMULATION/RF/rf.c
+3
-4
openair1/SIMULATION/TOOLS/rangen_double.c
openair1/SIMULATION/TOOLS/rangen_double.c
+382
-114
openair2/RRC/LITE/rrc_UE.c
openair2/RRC/LITE/rrc_UE.c
+2
-1
No files found.
openair1/SCHED/phy_procedures_lte_ue.c
View file @
02f1ec69
...
...
@@ -2896,7 +2896,7 @@ void ue_pbch_procedures(uint8_t eNB_id,PHY_VARS_UE *ue,UE_rxtx_proc_t *proc, uin
int
pbch_tx_ant
=
0
;
uint8_t
pbch_phase
;
uint16_t
frame_tx
;
static
uint8_t
first_run
=
1
;
static
uint8_t
first_run
=
0
;
uint8_t
pbch_trials
=
0
;
DevAssert
(
ue
);
...
...
@@ -2985,8 +2985,8 @@ void ue_pbch_procedures(uint8_t eNB_id,PHY_VARS_UE *ue,UE_rxtx_proc_t *proc, uin
//emos_dump_UE.mimo_mode = ue->pbch_vars[eNB_id]->decoded_output[1];
#endif
if
(
first_run
)
{
first_run
=
0
;
if
(
first_run
<=
ue
->
Mod_id
)
{
first_run
++
;
proc
->
frame_rx
=
(
proc
->
frame_rx
&
0xFFFFFC00
)
|
(
frame_tx
&
0x000003FF
);
proc
->
frame_tx
=
proc
->
frame_rx
;
...
...
@@ -5300,7 +5300,6 @@ else
if
((
ue
->
dlsch_SI
[
eNB_id
])
&&
(
ue
->
dlsch_SI
[
eNB_id
]
->
active
==
1
))
{
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME
(
VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC_SI
,
VCD_FUNCTION_IN
);
ue_pdsch_procedures
(
ue
,
proc
,
eNB_id
,
SI_PDSCH
,
...
...
@@ -5410,6 +5409,7 @@ else
if
(
(
subframe_rx
==
0
)
&&
(
ue
->
decode_MIB
==
1
))
{
//printf("[ue_pbch_procedures] subframe_rx %d ,ue->decode_MIB %d, UE %d\n",subframe_rx,ue->decode_MIB,ue->Mod_id);
ue_pbch_procedures
(
eNB_id
,
ue
,
proc
,
abstraction_flag
);
}
...
...
openair1/SIMULATION/RF/rf.c
View file @
02f1ec69
...
...
@@ -303,9 +303,9 @@ clock_t start=clock();*/
rx128_re
=
_mm_loadu_pd
(
&
r_re
[
a
][
2
*
i
]);
//r_re[a][i],r_re[a][i+1]
rx128_im
=
_mm_loadu_pd
(
&
r_im
[
a
][
2
*
i
]);
//r_im[a][i],r_im[a][i+1]
rx128_gain_lin
=
_mm_set1_pd
(
rx_gain_lin
);
//
gauss_0_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
//
gauss_1_128_sqrt_NOW = _mm_set_pd(ziggurat(0.0,1.0),ziggurat(0.0,1.0));
boxmuller_SSE_float
(
&
gauss_0_128_sqrt_NOW
,
&
gauss_1_128_sqrt_NOW
);
gauss_0_128_sqrt_NOW
=
_mm_set_pd
(
ziggurat
(
0
.
0
,
1
.
0
),
ziggurat
(
0
.
0
,
1
.
0
));
gauss_1_128_sqrt_NOW
=
_mm_set_pd
(
ziggurat
(
0
.
0
,
1
.
0
),
ziggurat
(
0
.
0
,
1
.
0
));
//
boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
gauss_0_128_sqrt_NOW
=
_mm_mul_pd
(
gauss_0_128_sqrt_NOW
,
_mm_set1_pd
(
sqrt_NOW
));
gauss_1_128_sqrt_NOW
=
_mm_mul_pd
(
gauss_1_128_sqrt_NOW
,
_mm_set1_pd
(
sqrt_NOW
));
// Amplify by receiver gain and apply 3rd order non-linearity
...
...
@@ -435,7 +435,6 @@ clock_t start=clock();*/
//boxmuller_SSE_float(&gauss_0_128_sqrt_NOW, &gauss_1_128_sqrt_NOW);
gauss_0_128_sqrt_NOW
=
ziggurat_SSE_float
();
gauss_1_128_sqrt_NOW
=
ziggurat_SSE_float
();
//stop_meas(&desc->ziggurat);
gauss_0_128_sqrt_NOW
=
_mm_mul_ps
(
gauss_0_128_sqrt_NOW
,
_mm_set1_ps
(
sqrt_NOW
));
gauss_1_128_sqrt_NOW
=
_mm_mul_ps
(
gauss_1_128_sqrt_NOW
,
_mm_set1_ps
(
sqrt_NOW
));
...
...
openair1/SIMULATION/TOOLS/rangen_double.c
View file @
02f1ec69
...
...
@@ -128,156 +128,119 @@ double nfix(void)
static
uint32_t
jsr4
[
4
]
__attribute__
((
aligned
(
16
)))
=
{
123456789
,
112548569
,
985584512
,
452236879
};
//This initialization depends on the seed for nor_table function in oaisim_functions.c file.
static
uint32_t
iz4
[
4
]
__attribute__
((
aligned
(
16
)));
static
float
out
[
4
]
__attribute__
((
aligned
(
16
)));
static
uint32_t
ssh3_sse4
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
ifabs4
[
4
]
__attribute__
((
aligned
(
16
)));
static
uint32_t
iz1
[
4
]
__attribute__
((
aligned
(
16
)));
static
uint32_t
iz2
[
4
]
__attribute__
((
aligned
(
16
)));
//static float out[4] __attribute__((aligned(16)));
//static int32_t ifabs4[4] __attribute__((aligned(16)));
static
int32_t
hz4
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
abshz4
[
4
]
__attribute__
((
aligned
(
16
)));
static
float
x4_option0
[
4
]
__attribute__
((
aligned
(
16
)));
static
float
x4
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
hz1
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
hz2
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
abshz
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
abshz1
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
abshz2
[
4
]
__attribute__
((
aligned
(
16
)));
static
__m128i
jsr_128
__attribute__
((
aligned
(
16
)));
static
__m128i
jz_128
__attribute__
((
aligned
(
16
)));
static
__m128i
hz_128
__attribute__
((
aligned
(
16
)));
static
__m128i
hz1_128
__attribute__
((
aligned
(
16
)));
static
__m128i
hz2_128
__attribute__
((
aligned
(
16
)));
static
__m128i
abs_hz_128
__attribute__
((
aligned
(
16
)));
static
__m128i
abs_hz1_128
__attribute__
((
aligned
(
16
)));
static
__m128i
abs_hz2_128
__attribute__
((
aligned
(
16
)));
static
__m128i
iz_128
__attribute__
((
aligned
(
16
)));
static
__m128
x128
__attribute__
((
aligned
(
16
)));
static
__m128i
ifabs
__attribute__
((
aligned
(
16
)));
static
__m128i
iz1_128
__attribute__
((
aligned
(
16
)));
static
__m128i
iz2_128
__attribute__
((
aligned
(
16
)));
static
__m128i
cmplt_option0_128
__attribute__
((
aligned
(
16
)));
static
int
count99
=
0
;
static
int
count0
=
0
;
static
int
nfix_first_run
=
0
;
static
__m128
x
__attribute__
((
aligned
(
16
)));
#define SHR3_SSE (jsr_128=_mm_loadu_si128((__m128i *)jsr4),jz_128=jsr_128, jsr_128=_mm_xor_si128(_mm_slli_epi32(jsr_128,13),jsr_128),jsr_128=_mm_xor_si128(_mm_srli_epi32(jsr_128,17),jsr_128),jsr_128=_mm_xor_si128(_mm_slli_epi32(jsr_128,5),jsr_128),_mm_storeu_si128((__m128i *)jsr4,jsr_128),_mm_add_epi32(jz_128,jsr_128))
#define UNI_SSE (_mm_add_ps(_mm_mul_ps(_mm_set1_ps(0.2328306e-9),_mm_cvtepi32_ps(SHR3_SSE)),_mm_set1_ps(0.5)))
#define NOR_SSE (hz_128=SHR3_SSE,_mm_storeu_si128((__m128i *)hz4,hz_128),iz_128=_mm_and_si128(hz_128,_mm_set1_epi32(127)),_mm_storeu_si128((__m128i *)iz4,iz_128),abs_hz_128=_mm_and_si128(hz_128, _mm_set1_epi32(~0x80000000)),
_mm_storeu_si128((__m128i *)abshz4,abs_hz_128
))
#define NOR_SSE (hz_128=SHR3_SSE,_mm_storeu_si128((__m128i *)hz4,hz_128),iz_128=_mm_and_si128(hz_128,_mm_set1_epi32(127)),_mm_storeu_si128((__m128i *)iz4,iz_128),abs_hz_128=_mm_and_si128(hz_128, _mm_set1_epi32(~0x80000000)),
cmplt_option0_128 = _mm_cmplt_epi32(abs_hz_128,_mm_setr_epi32(kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]])),count99=(count99>99)?0:count99+4,nfix_first_run=(count99>99)?0:1,(_mm_testc_si128(cmplt_option0_128,_mm_setr_epi32(0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF)))?x=_mm_mul_ps(_mm_cvtepi32_ps(hz_128),_mm_setr_ps(wn[iz4[0]],wn[iz4[1]],wn[iz4[2]],wn[iz4[3]])):nfix_SSE(
))
//
,ifabs=_mm_cmplt_epi32(_mm_max_epi32(_mm_sub_epi32(_mm_setzero_si128(),hz_128),hz_128),_mm_setr_epi32(kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]])),_mm_storeu_si128((__m128i *)ifabs4,ifabs),abs_hz_128=_mm_and_si128(hz_128, _mm_set1_epi32(~0x80000000)),_mm_storeu_si128((__m128i *)abshz4,abs_hz_128),printf("abs_hz_128 %d,%d,%d,%d\n",abshz4[0],abshz4[1],abshz4[2],abshz4[3]),printf("kn %d,%d,%d,%d\n",kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]]),printf("ifabs %x,%x,%x,%x\n",ifabs4[0],ifabs4[1],ifabs4[2],ifabs4[3]),x128=_mm_and_ps(_mm_cvtepi32_ps(_mm_cmplt_epi32(_mm_max_epi32(_mm_sub_epi32(_mm_setzero_si128(),hz_128),hz_128),_mm_setr_epi32(kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]]))),_mm_mul_ps(_mm_cvtepi32_ps(hz_128),_mm_setr_ps(wn[iz4[0]],wn[iz4[1]],wn[iz4[2]],wn[iz4[3]]))),printf("x128 %e,%e,%e,%e\n",x128[0],x128[1],x128[2],x128[3]),printf("iz %d,%d,%d,%d\n",iz4[0],iz4[1],iz4[2],iz4[3]),printf("wn*hz %e,%e,%e,%e\n",hz4[0]*wn[iz4[0]],hz4[1]*wn[iz4[1]],hz4[2]*wn[iz4[2]],hz4[3]*wn[iz4[3]]
))
//
#define NOR1_SSE (hz1_128=SHR3_SSE,_mm_storeu_si128((__m128i *)hz1,hz1_128),iz1_128=_mm_and_si128(hz1_128,_mm_set1_epi32(127)),_mm_storeu_si128((__m128i *)iz1,iz1_128),abs_hz1_128=_mm_and_si128(hz1_128, _mm_set1_epi32(~0x80000000)),_mm_storeu_si128((__m128i *)abshz1,abs_hz1_128
))
//,_mm_storeu_si128(ssh3_sse4,hz_128),printf("ssh3_sse4 %lu,%lu,%lu,%lu\n",ssh3_sse4[0],ssh3_sse4[1],ssh3_sse4[2],ssh3_sse4[3])
//#define NOR (hz=SHR3, printf("hz %d\n",hz),sign=(hz&128)>>7,printf("sign %s\n",(sign)?"-":"+"),iz=hz&127,printf("iz %d\n",iz), (abs(hz)<kn[iz])? (sign)?(-1)*hz*wn[iz]:hz*wn[iz] : (sign)?(-1)*nfix():nfix())
//#define NOR2_SSE (hz2_128=SHR3_SSE,_mm_storeu_si128((__m128i *)hz2,hz2_128),iz2_128=_mm_and_si128(hz2_128,_mm_set1_epi32(127)),_mm_storeu_si128((__m128i *)iz2,iz2_128),abs_hz2_128=_mm_and_si128(hz2_128, _mm_set1_epi32(~0x80000000)),_mm_storeu_si128((__m128i *)abshz2,abs_hz2_128))
//#define NOR_SSE (hz_128=SHR3_SSE,_mm_storeu_si128((__m128i *)hz4,hz_128),iz_128=_mm_and_si128(hz_128,_mm_set1_epi32(127)),_mm_storeu_si128((__m128i *)iz4,iz_128),abs_hz_128=_mm_and_si128(hz_128, _mm_set1_epi32(~0x80000000)),_mm_storeu_si128((__m128i *)abshz4,abs_hz_128),cmplt_option0_128 = _mm_cmplt_epi32(abs_hz_128,_mm_setr_epi32(kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]])),_mm_storeu_si128((__m128i *)cmplt_option0,cmplt_option0_128),count0=0,(cmplt_option0[0]==0xFFFFFFFF)?count99+=count0++:count0,(cmplt_option0[1]==0xFFFFFFFF)?count99+=count0++:count0,(cmplt_option0[2]==0xFFFFFFFF)?count99+=count0++:count0,(cmplt_option0[3]==0xFFFFFFFF)?count99+=count0++:count0,(cmplt_option0[0]==0xFFFFFFFF && cmplt_option0[1]==0xFFFFFFFF && cmplt_option0[2]==0xFFFFFFFF && cmplt_option0[3]==0xFFFFFFFF && count99<95 && count0==4)?_mm_mul_ps(_mm_cvtepi32_ps(hz_128),_mm_setr_ps(wn[iz4[0]],wn[iz4[1]],wn[iz4[2]],wn[iz4[3]])):nfix_SSE())
static
__m128
x
__attribute__
((
aligned
(
16
)));
static
__m128
y
__attribute__
((
aligned
(
16
)));
static
__m128i
cmplt_option0_128
__attribute__
((
aligned
(
16
)));
static
__m128i
cmplt_option1_128
__attribute__
((
aligned
(
16
)));
static
__m128i
cmplt_option2_128
__attribute__
((
aligned
(
16
)));
static
int32_t
cmplt_option0
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
cmplt_option1
[
4
]
__attribute__
((
aligned
(
16
)));
static
int32_t
cmplt_option2
[
4
]
__attribute__
((
aligned
(
16
)));
static
float
output0
[
4
]
__attribute__
((
aligned
(
16
)));
static
float
output1
[
4
]
__attribute__
((
aligned
(
16
)));
static
float
output2
[
4
]
__attribute__
((
aligned
(
16
)));
static
float
output
[
12
]
__attribute__
((
aligned
(
16
)));
static
int
option
=-
1
;
__m128
option012
(
void
)
{
int
i
;
for
(
i
=
0
;
i
<
4
;
i
++
)
{
if
(
abshz4
[
i
]
<
kn
[
iz4
[
i
]])
{
//,ifabs=_mm_cmplt_epi32(_mm_max_epi32(_mm_sub_epi32(_mm_setzero_si128(),hz_128),hz_128),_mm_setr_epi32(kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]])),_mm_storeu_si128((__m128i *)ifabs4,ifabs),abs_hz_128=_mm_and_si128(hz_128, _mm_set1_epi32(~0x80000000)),_mm_storeu_si128((__m128i *)abshz4,abs_hz_128),printf("abs_hz_128 %d,%d,%d,%d\n",abshz4[0],abshz4[1],abshz4[2],abshz4[3]),printf("kn %d,%d,%d,%d\n",kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]]),printf("ifabs %x,%x,%x,%x\n",ifabs4[0],ifabs4[1],ifabs4[2],ifabs4[3]),x128=_mm_and_ps(_mm_cvtepi32_ps(_mm_cmplt_epi32(_mm_max_epi32(_mm_sub_epi32(_mm_setzero_si128(),hz_128),hz_128),_mm_setr_epi32(kn[iz4[0]],kn[iz4[1]],kn[iz4[2]],kn[iz4[3]]))),_mm_mul_ps(_mm_cvtepi32_ps(hz_128),_mm_setr_ps(wn[iz4[0]],wn[iz4[1]],wn[iz4[2]],wn[iz4[3]]))),printf("x128 %e,%e,%e,%e\n",x128[0],x128[1],x128[2],x128[3]),printf("iz %d,%d,%d,%d\n",iz4[0],iz4[1],iz4[2],iz4[3]),printf("wn*hz %e,%e,%e,%e\n",hz4[0]*wn[iz4[0]],hz4[1]*wn[iz4[1]],hz4[2]*wn[iz4[2]],hz4[3]*wn[iz4[3]]))
}
}
}
//,_mm_storeu_si128(ssh3_sse4,hz_128),printf("ssh3_sse4 %lu,%lu,%lu,%lu\n",ssh3_sse4[0],ssh3_sse4[1],ssh3_sse4[2],ssh3_sse4[3])
//#define NOR (hz=SHR3, printf("hz %d\n",hz),sign=(hz&128)>>7,printf("sign %s\n",(sign)?"-":"+"),iz=hz&127,printf("iz %d\n",iz), (abs(hz)<kn[iz])? (sign)?(-1)*hz*wn[iz]:hz*wn[iz] : (sign)?(-1)*nfix():nfix())
__m128
nfix_SSE
(
void
)
{
static
int
count0
=
0
;
static
int
count1
=
0
;
static
int
count2
=
0
;
static
int
count
=
0
;
static
int
rand0
=
0
;
static
int
rand1
=
0
;
static
int
rand2
=
0
;
static
int
rand3
=
0
;
__m128
y
__attribute__
((
aligned
(
16
)));
__m128i
cmplt_option1_128
__attribute__
((
aligned
(
16
)));
__m128i
cmplt_option2_128
__attribute__
((
aligned
(
16
)));
int32_t
cmplt_option0
[
4
]
__attribute__
((
aligned
(
16
)));
int32_t
cmplt_option1
[
4
]
__attribute__
((
aligned
(
16
)));
int32_t
cmplt_option2
[
4
]
__attribute__
((
aligned
(
16
)));
float
output
[
12
]
__attribute__
((
aligned
(
16
)));
float
x4_option0
[
4
]
__attribute__
((
aligned
(
16
)));
float
x4
[
4
]
__attribute__
((
aligned
(
16
)));
int
i
;
static
float
r
=
3
.
442620
;
for
(;;)
{
if
(
count0
+
count1
+
count2
>
3
)
{
return
_mm_setr_ps
(
output
[
rand0
],
output
[
rand1
],
output
[
rand2
],
output
[
rand3
]);
}
uint32_t
iz4_i
[
4
]
__attribute__
((
aligned
(
16
)))
;
_mm_storeu_si128
((
__m128i
*
)
iz4_i
,
iz_128
);
NOR_SSE
;
//(abs(hz)<kn[iz])? hz*wn[iz]
cmplt_option0_128
=
_mm_cmplt_epi32
(
abs_hz_128
,
_mm_setr_epi32
(
kn
[
iz4
[
0
]],
kn
[
iz4
[
1
]],
kn
[
iz4
[
2
]],
kn
[
iz4
[
3
]]));
_mm_storeu_si128
((
__m128i
*
)
cmplt_option0
,
cmplt_option0_128
);
//x=hz * wn[iz];
x
=
_mm_mul_ps
(
_mm_cvtepi32_ps
(
hz_128
),
_mm_setr_ps
(
wn
[
iz4
[
0
]],
wn
[
iz4
[
1
]],
wn
[
iz4
[
2
]],
wn
[
iz4
[
3
]])
);
_mm_storeu_si128
((
__m128i
*
)
cmplt_option0
,
cmplt_option0_128
);
_mm_storeu_ps
(
x4_option0
,
x
);
count0
=
0
;
for
(
i
=
0
;
i
<
4
;
i
++
)
{
if
(
cmplt_option0
[
i
]
==
0xFFFFFFFF
)
{
//printf("count0 %d\n",count0);
output
[
count0
]
=
hz4
[
i
]
*
wn
[
iz4
[
i
]];
output
[
count0
]
=
hz4
[
i
]
*
wn
[
iz4_i
[
i
]];
count0
++
;
}
}
// if (fn[iz]+UNI*(fn[iz-1]-fn[iz])<exp(-0.5*x*x))
cmplt_option2_128
=
_mm_cvtps_epi32
(
_mm_cmplt_ps
(
_mm_add_ps
(
_mm_setr_ps
(
fn
[
iz4
[
0
]],
fn
[
iz4
[
1
]],
fn
[
iz4
[
2
]],
fn
[
iz4
[
3
]]),
_mm_mul_ps
(
UNI_SSE
,
_mm_sub_ps
(
_mm_setr_ps
(
fn
[
iz4
[
0
]
-
1
],
fn
[
iz4
[
1
]
-
1
],
fn
[
iz4
[
2
]
-
1
],
fn
[
iz4
[
3
]
-
1
]),
_mm_setr_ps
(
fn
[
iz4
[
0
]],
fn
[
iz4
[
1
]],
fn
[
iz4
[
2
]],
fn
[
iz4
[
3
]])))),
exp_ps
(
_mm_mul_ps
(
_mm_mul_ps
(
x
,
x
),
_mm_set1_ps
(
-
0
.
5
f
)))));
_mm_storeu_si128
((
__m128i
*
)
cmplt_option2
,
cmplt_option2_128
);
for
(
i
=
0
;
i
<
4
;
i
++
)
if
((
iz4_i
[
0
]
==
0
||
iz4_i
[
1
]
==
0
||
iz4_i
[
2
]
==
0
||
iz4_i
[
3
]
==
0
)
&&
nfix_first_run
==
0
&&
count0
>
0
)
{
if
(
cmplt_option2
[
i
]
==
0x80000000
)
{
//printf("count1 %d\n",count1);
output
[
count0
+
count1
]
=
x4_option0
[
i
];
count1
++
;
}
nfix_first_run
=
1
;
do
{
//x = - 0.2904764 * log (UNI);
x
=
_mm_mul_ps
(
_mm_set1_ps
(
-
0
.
2904764
f
),
log_ps
(
UNI_SSE
));
_mm_storeu_ps
(
x4
,
x
);
//y = - log (UNI);
y
=
_mm_mul_ps
(
_mm_set1_ps
(
-
1
.
0
f
),
log_ps
(
UNI_SSE
));
//(y+y < x*x)?
cmplt_option1_128
=
_mm_cvtps_epi32
(
_mm_cmplt_ps
(
_mm_add_ps
(
y
,
y
),
_mm_mul_ps
(
x
,
x
)));
_mm_storeu_si128
((
__m128i
*
)
cmplt_option1
,
cmplt_option1_128
);
for
(
i
=
0
;
i
<
4
;
i
++
)
{
if
(
cmplt_option1
[
i
]
==
0x80000000
)
{
output
[
3
]
=
(
hz4
[
i
]
>
0
)
?
x4
[
i
]
+
r
:-
x4
[
i
]
-
r
;
break
;
}
}
}
while
(
cmplt_option1
[
0
]
!=
0x80000000
&&
cmplt_option1
[
1
]
!=
0x80000000
&&
cmplt_option1
[
2
]
!=
0x80000000
&&
cmplt_option1
[
3
]
!=
0x80000000
);
return
_mm_setr_ps
(
output
[
0
],
output
[
1
],
output
[
2
],
output
[
3
]);
}
/*if (iz==0)
{
do
{
x = - 0.2904764 * log (UNI);
y = - log (UNI);
}
while (y+y < x*x);
return (hz>0)? r+x : -r-x;
}*/
//if (iz==0)
if
(
iz4
[
0
]
==
0
||
iz4
[
1
]
==
0
||
iz4
[
2
]
==
0
||
iz4
[
3
]
==
0
)
else
if
(
iz4
[
0
]
>
0
&&
iz4
[
1
]
>
0
&&
iz4
[
2
]
>
0
&&
iz4
[
3
]
>
0
&&
nfix_first_run
==
0
&&
count0
>
0
)
{
do
nfix_first_run
=
1
;
cmplt_option2_128
=
_mm_cvtps_epi32
(
_mm_cmplt_ps
(
_mm_add_ps
(
_mm_setr_ps
(
fn
[
iz4_i
[
0
]],
fn
[
iz4_i
[
1
]],
fn
[
iz4_i
[
2
]],
fn
[
iz4_i
[
3
]]),
_mm_mul_ps
(
UNI_SSE
,
_mm_sub_ps
(
_mm_setr_ps
(
fn
[
iz4_i
[
0
]
-
1
],
fn
[
iz4_i
[
1
]
-
1
],
fn
[
iz4_i
[
2
]
-
1
],
fn
[
iz4_i
[
3
]
-
1
]),
_mm_setr_ps
(
fn
[
iz4_i
[
0
]],
fn
[
iz4_i
[
1
]],
fn
[
iz4_i
[
2
]],
fn
[
iz4_i
[
3
]])))),
exp_ps
(
_mm_mul_ps
(
_mm_mul_ps
(
x
,
x
),
_mm_set1_ps
(
-
0
.
5
f
)))));
_mm_storeu_si128
((
__m128i
*
)
cmplt_option2
,
cmplt_option2_128
);
for
(
i
=
0
;
i
<
4
;
i
++
)
{
//x = - 0.2904764 * log (UNI);
x
=
_mm_mul_ps
(
_mm_set1_ps
(
-
0
.
2904764
f
),
log_ps
(
UNI_SSE
));
_mm_storeu_ps
(
x4
,
x
);
//y = - log (UNI);
y
=
_mm_mul_ps
(
_mm_set1_ps
(
-
1
.
0
f
),
log_ps
(
UNI_SSE
));
//(y+y < x*x)?
cmplt_option1_128
=
_mm_cvtps_epi32
(
_mm_cmplt_ps
(
_mm_add_ps
(
y
,
y
),
_mm_mul_ps
(
x
,
x
)));
_mm_storeu_si128
((
__m128i
*
)
cmplt_option1
,
cmplt_option1_128
);
for
(
i
=
0
;
i
<
4
;
i
++
)
{
if
(
cmplt_option1
[
i
]
==
0x80000000
)
{
printf
(
"count2 %d
\n
"
,
count2
);
output
[
count0
+
count1
+
count2
]
=
(
hz4
[
i
]
>
0
)
?
x4
[
i
]
+
r
:-
x4
[
i
]
-
r
;
count2
++
;
}
}
if
(
cmplt_option2
[
i
]
==
0x80000000
)
{
output
[
3
]
=
x4_option0
[
i
];
break
;
}
}
while
(
cmplt_option1
[
0
]
!=
0x80000000
&&
cmplt_option1
[
1
]
!=
0x80000000
&&
cmplt_option1
[
2
]
!=
0x80000000
&&
cmplt_option1
[
3
]
!=
0x80000000
);
}
count
=
count0
+
count1
+
count2
;
rand0
=
iz4
[
0
]
%
count
;
rand1
=
iz4
[
1
]
%
count
;
rand2
=
iz4
[
2
]
%
count
;
rand3
=
iz4
[
3
]
%
count
;
}
return
_mm_setr_ps
(
output
[
0
],
output
[
1
],
output
[
2
],
output
[
3
]);
}
}
/*!\Procedure to create tables for normal distribution kn,wn and fn. */
void
table_nor
(
unsigned
long
seed
)
{
...
...
@@ -316,7 +279,7 @@ double ziggurat(double mean, double variance)
}
__m128
ziggurat_SSE_float
(
void
)
{
return
nfix_SSE
()
;
return
NOR_SSE
;
}
void
boxmuller_SSE_float
(
__m128
*
data1
,
__m128
*
data2
)
{
...
...
@@ -516,3 +479,308 @@ void randominit(unsigned seed_init)
}
}*/
/*__m128 nfix1_SSE(void)
{
__m128 x1 __attribute__((aligned(16)));
__m128 y1 __attribute__((aligned(16)));
__m128i cmplt_option0_128 __attribute__((aligned(16)));
__m128i cmplt_option1_128 __attribute__((aligned(16)));
__m128i cmplt_option2_128 __attribute__((aligned(16)));
int32_t cmplt_option0[4] __attribute__((aligned(16)));
int32_t cmplt_option1[4] __attribute__((aligned(16)));
int32_t cmplt_option2[4] __attribute__((aligned(16)));
float output1[12] __attribute__((aligned(16)));
float x1_option0[4] __attribute__((aligned(16)));
float x4[4] __attribute__((aligned(16)));
int count0=0;
int count1=0;
int count2=0;
int i;
static float r = 3.442620;
static int nfix_first_run=0;
for (;;)
{
NOR1_SSE;
//(abs(hz)<kn[iz])? hz*wn[iz]
cmplt_option0_128 = _mm_cmplt_epi32(abs_hz1_128,_mm_setr_epi32(kn[iz1[0]],kn[iz1[1]],kn[iz1[2]],kn[iz1[3]]));
_mm_storeu_si128((__m128i *)cmplt_option0,cmplt_option0_128);
//x=hz * wn[iz];
for (i=0;i<4;i++)
{
if (cmplt_option0[i]==0xFFFFFFFF)
{
//printf("count0 %d\n",count0);
output1[count0]=hz1[i]*wn[iz1[i]];
count0++;
}
}
if (count0>3)
{
count99+=4;
if (count99>99)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output1[0],output1[1],output1[2],output1[3]);
}
//x=hz * wn[iz];
x1=_mm_mul_ps(_mm_cvtepi32_ps(hz1_128),_mm_setr_ps(wn[iz1[0]],wn[iz1[1]],wn[iz1[2]],wn[iz1[3]]));
_mm_storeu_ps(x1_option0,x1);
//printf("count0 is %d, count1 is %d, count2 is %d,count99 is %d\n",count0,count1,count2,count99);
if ((iz1[0]==0||iz1[1]==0||iz1[2]==0||iz1[3]==0)&&nfix_first_run==0&&count0>0)
{
//printf("\niz == 0 [%d,%d,%d,%d]\n\n",iz4[0],iz4[1],iz4[2],iz4[3]);
nfix_first_run=1;
do
{
//x = - 0.2904764 * log (UNI);
x1 = _mm_mul_ps(_mm_set1_ps(-0.2904764f), log_ps(UNI_SSE));
_mm_storeu_ps(x4,x1);
//y = - log (UNI);
y1 = _mm_mul_ps(_mm_set1_ps(-1.0f), log_ps(UNI_SSE));
//(y+y < x*x)?
cmplt_option1_128 = _mm_cvtps_epi32(_mm_cmplt_ps(_mm_add_ps(y1,y1),_mm_mul_ps(x1,x1)));
_mm_storeu_si128((__m128i *)cmplt_option1,cmplt_option1_128);
for (i=0;i<4;i++)
{
if (cmplt_option1[i]==0x80000000)
{
//printf("count22 %d\n",count2);
output1[3]=(hz1[i]>0)? x4[i]+r:-x4[i]-r;
count2++;
break;
}
}
}
while (cmplt_option1[0]!=0x80000000 || cmplt_option1[1]!=0x80000000 || cmplt_option1[2]!=0x80000000 || cmplt_option1[3]!=0x80000000);
if (count0+count2>3)
{
count99+=4;
if (count99>99)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output1[0],output1[1],output1[2],output1[3]);
}
}
if (iz1[0]>0&&iz1[1]>0&&iz1[2]>0&&iz1[3]>0&&nfix_first_run==0&&count0>0)
{
//printf("\niz > 0 [%d,%d,%d,%d]\n\n",iz4[0],iz4[1],iz4[2],iz4[3]);
nfix_first_run=1;
printf("\niz1 > 0 [%d,%d,%d,%d].\nfn [%e,%e,%e,%e].\n\n",iz1[0],iz1[1],iz1[2],iz1[3],fn[iz1[0]],fn[iz1[1]],fn[iz1[2]],fn[iz1[3]]);
printf("fn1 - 1 [%e,%e,%e,%e]\n",fn[iz1[0]-1],fn[iz1[1]-1],fn[iz1[2]-1],fn[iz1[3]-1]);
//if (iz==0)
printf("\niz [%d,%d,%d,%d]\n",iz4[0],iz4[1],iz4[2],iz4[3]);
printf("iz==0 [%d,%d,%d,%d]\n",iz4[0]==0,iz4[1]==0,iz4[2]==0,iz4[3]==0);
printf("iz>0 [%d,%d,%d,%d]\n\n",iz4[0]>0,iz4[1]>0,iz4[2]>0,iz4[3]>0);//
// if (fn[iz]+UNI*(fn[iz-1]-fn[iz])<exp(-0.5*x*x))
//printf("iz [%d,%d,%d,%d] is ok? %d\n",iz4[0],iz4[1],iz4[2],iz4[3],iz4[0]==0&&iz4[1]==0&&iz4[2]==0&&iz4[3]==0);
//printf("iz>0 inside [%d,%d,%d,%d]\n",iz4[0]>0,iz4[1]>0,iz4[2]>0,iz4[3]>0);
//printf("iz-1 [%d,%d,%d,%d]\n",iz4[0]-1,iz4[1]-1,iz4[2]-1,iz4[3]-1);
//printf("x [%e,%e,%e,%e]\n",x[0],x[1],x[2],x[3]);
//printf("exp [%e,%e,%e,%e]\n",exp(-0.5*x[0]*x[0]),exp(-0.5*x[1]*x[1]),exp(-0.5*x[2]*x[2]),exp(-0.5*x[3]*x[3]));//
cmplt_option2_128 = _mm_cvtps_epi32(_mm_cmplt_ps(_mm_add_ps(_mm_setr_ps(fn[iz1[0]],fn[iz1[1]],fn[iz1[2]],fn[iz1[3]]),_mm_mul_ps(UNI_SSE,_mm_sub_ps(_mm_setr_ps(fn[iz1[0]-1],fn[iz1[1]-1],fn[iz1[2]-1],fn[iz1[3]-1]),_mm_setr_ps(fn[iz1[0]],fn[iz1[1]],fn[iz1[2]],fn[iz1[3]])))),exp_ps(_mm_mul_ps(_mm_mul_ps(x1,x1),_mm_set1_ps(-0.5f)))));
//cmplt_option2_128 = _mm_cvtps_epi32(_mm_cmplt_ps(_mm_setr_ps(fn[iz4[0]],fn[iz4[1]],fn[iz4[2]],fn[iz4[3]]),exp_ps(_mm_mul_ps(_mm_mul_ps(x,x),_mm_set1_ps(-0.5f)))));
_mm_storeu_si128((__m128i *)cmplt_option2,cmplt_option2_128);
for (i=0;i<4;i++)
{
if (cmplt_option2[i]==0x80000000)
{
//printf("count1 %d\n",count1);
output1[3]=x1_option0[i];
count1++;
break;
}
}
if (count0+count1>3)
{
count99+=4;
if (count99>109)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output1[0],output1[1],output1[2],output1[3]);
}
}
NOR1_SSE;
//(abs(hz)<kn[iz])? hz*wn[iz]
cmplt_option0_128 = _mm_cmplt_epi32(abs_hz1_128,_mm_setr_epi32(kn[iz1[0]],kn[iz1[1]],kn[iz1[2]],kn[iz1[3]]));
_mm_storeu_si128((__m128i *)cmplt_option0,cmplt_option0_128);
for (i=count0;i<3;i++)
{
if (cmplt_option0[i-count0]==0xFFFFFFFF)
{
//printf("count0 %d\n",count0);
output1[count0]=hz1[i-count0]*wn[iz1[i-count0]];
count0++;
}
}
count99+=4;
if (count99>109)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output1[0],output1[1],output1[2],output1[3]);
}
}
__m128 nfix2_SSE(void)
{
__m128 x2 __attribute__((aligned(16)));
__m128 y2 __attribute__((aligned(16)));
__m128i cmplt_option0_128 __attribute__((aligned(16)));
__m128i cmplt_option1_128 __attribute__((aligned(16)));
__m128i cmplt_option2_128 __attribute__((aligned(16)));
int32_t cmplt_option0[4] __attribute__((aligned(16)));
int32_t cmplt_option1[4] __attribute__((aligned(16)));
int32_t cmplt_option2[4] __attribute__((aligned(16)));
float output2[12] __attribute__((aligned(16)));
float x2_option0[4] __attribute__((aligned(16)));
float x4[4] __attribute__((aligned(16)));
static int count0=0;
static int count1=0;
static int count2=0;
static int count99=0;
int i;
static float r = 3.442620;
static int nfix_first_run=0;
for (;;)
{
NOR2_SSE;
//(abs(hz)<kn[iz])? hz*wn[iz]
cmplt_option0_128 = _mm_cmplt_epi32(abs_hz2_128,_mm_setr_epi32(kn[iz2[0]],kn[iz2[1]],kn[iz2[2]],kn[iz2[3]]));
_mm_storeu_si128((__m128i *)cmplt_option0,cmplt_option0_128);
//x=hz * wn[iz];
for (i=0;i<4;i++)
{
if (cmplt_option0[i]==0xFFFFFFFF)
{
//printf("count0 %d\n",count0);
output2[count0]=hz2[i]*wn[iz2[i]];
count0++;
}
}
if (count0>3)
{
count99+=4;
if (count99>99)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output2[0],output2[1],output2[2],output2[3]);
}
//x=hz * wn[iz];
x2=_mm_mul_ps(_mm_cvtepi32_ps(hz2_128),_mm_setr_ps(wn[iz2[0]],wn[iz2[1]],wn[iz2[2]],wn[iz2[3]]));
_mm_storeu_ps(x2_option0,x2);
//printf("count0 is %d, count1 is %d, count2 is %d,count99 is %d\n",count0,count1,count2,count99);
if ((iz2[0]==0||iz2[1]==0||iz2[2]==0||iz2[3]==0)&&nfix_first_run==0&&count0>0)
{
//printf("\niz == 0 [%d,%d,%d,%d]\n\n",iz4[0],iz4[1],iz4[2],iz4[3]);
nfix_first_run=1;
do
{
//x = - 0.2904764 * log (UNI);
x2 = _mm_mul_ps(_mm_set1_ps(-0.2904764f), log_ps(UNI_SSE));
_mm_storeu_ps(x4,x2);
//y = - log (UNI);
y2 = _mm_mul_ps(_mm_set1_ps(-1.0f), log_ps(UNI_SSE));
//(y+y < x*x)?
cmplt_option1_128 = _mm_cvtps_epi32(_mm_cmplt_ps(_mm_add_ps(y2,y2),_mm_mul_ps(x2,x2)));
_mm_storeu_si128((__m128i *)cmplt_option1,cmplt_option1_128);
for (i=0;i<4;i++)
{
if (cmplt_option1[i]==0x80000000)
{
//printf("count22 %d\n",count2);
output2[3]=(hz2[i]>0)? x4[i]+r:-x4[i]-r;
count2++;
break;
}
}
}
while (cmplt_option1[0]!=0x80000000 || cmplt_option1[1]!=0x80000000 || cmplt_option1[2]!=0x80000000 || cmplt_option1[3]!=0x80000000);
if (count0+count2>3)
{
count99+=4;
if (count99>109)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output2[0],output2[1],output2[2],output2[3]);
}
}
if (iz2[0]>0&&iz2[1]>0&&iz2[2]>0&&iz2[3]>0&&nfix_first_run==0&&count0>0)
{
//printf("\niz > 0 [%d,%d,%d,%d]\n\n",iz4[0],iz4[1],iz4[2],iz4[3]);
nfix_first_run=1;
printf("\niz2 > 0 [%d,%d,%d,%d].\nfn [%e,%e,%e,%e].\n\n",iz2[0],iz2[1],iz2[2],iz2[3],fn[iz2[0]],fn[iz2[1]],fn[iz2[2]],fn[iz2[3]]);
printf("fn2 - 1 [%e,%e,%e,%e]\n",fn[iz2[0]-1],fn[iz2[1]-1],fn[iz2[2]-1],fn[iz2[3]-1]);
//if (iz==0)
printf("\niz [%d,%d,%d,%d]\n",iz4[0],iz4[1],iz4[2],iz4[3]);
printf("iz==0 [%d,%d,%d,%d]\n",iz4[0]==0,iz4[1]==0,iz4[2]==0,iz4[3]==0);
printf("iz>0 [%d,%d,%d,%d]\n\n",iz4[0]>0,iz4[1]>0,iz4[2]>0,iz4[3]>0);//
// if (fn[iz]+UNI*(fn[iz-1]-fn[iz])<exp(-0.5*x*x))
//printf("iz [%d,%d,%d,%d] is ok? %d\n",iz4[0],iz4[1],iz4[2],iz4[3],iz4[0]==0&&iz4[1]==0&&iz4[2]==0&&iz4[3]==0);
printf("iz>0 inside [%d,%d,%d,%d]\n",iz4[0]>0,iz4[1]>0,iz4[2]>0,iz4[3]>0);
printf("iz-1 [%d,%d,%d,%d]\n",iz4[0]-1,iz4[1]-1,iz4[2]-1,iz4[3]-1);
printf("x [%e,%e,%e,%e]\n",x[0],x[1],x[2],x[3]);
printf("exp [%e,%e,%e,%e]\n",exp(-0.5*x[0]*x[0]),exp(-0.5*x[1]*x[1]),exp(-0.5*x[2]*x[2]),exp(-0.5*x[3]*x[3]));//
cmplt_option2_128 = _mm_cvtps_epi32(_mm_cmplt_ps(_mm_add_ps(_mm_setr_ps(fn[iz2[0]],fn[iz2[1]],fn[iz2[2]],fn[iz2[3]]),_mm_mul_ps(UNI_SSE,_mm_sub_ps(_mm_setr_ps(fn[iz2[0]-1],fn[iz2[1]-1],fn[iz2[2]-1],fn[iz2[3]-1]),_mm_setr_ps(fn[iz2[0]],fn[iz2[1]],fn[iz2[2]],fn[iz2[3]])))),exp_ps(_mm_mul_ps(_mm_mul_ps(x2,x2),_mm_set1_ps(-0.5f)))));
//cmplt_option2_128 = _mm_cvtps_epi32(_mm_cmplt_ps(_mm_setr_ps(fn[iz4[0]],fn[iz4[1]],fn[iz4[2]],fn[iz4[3]]),exp_ps(_mm_mul_ps(_mm_mul_ps(x,x),_mm_set1_ps(-0.5f)))));
_mm_storeu_si128((__m128i *)cmplt_option2,cmplt_option2_128);
for (i=0;i<4;i++)
{
if (cmplt_option2[i]==0x80000000)
{
//printf("count1 %d\n",count1);
output2[3]=x2_option0[i];
count1++;
break;
}
}
if (count0+count1>3)
{
count99+=4;
if (count99>109)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output2[0],output2[1],output2[2],output2[3]);
}
}
NOR2_SSE;
//(abs(hz)<kn[iz])? hz*wn[iz]
cmplt_option0_128 = _mm_cmplt_epi32(abs_hz2_128,_mm_setr_epi32(kn[iz2[0]],kn[iz2[1]],kn[iz2[2]],kn[iz2[3]]));
_mm_storeu_si128((__m128i *)cmplt_option0,cmplt_option0_128);
for (i=count0;i<3;i++)
{
if (cmplt_option0[i-count0]==0xFFFFFFFF)
{
//printf("count0 %d\n",count0);
output2[count0]=hz2[i-count0]*wn[iz2[i-count0]];
count0++;
}
}
count99+=4;
if (count99>109)
{
count99=0;
nfix_first_run=0;
}
return _mm_setr_ps(output2[0],output2[1],output2[2],output2[3]);
}
}*/
openair2/RRC/LITE/rrc_UE.c
View file @
02f1ec69
...
...
@@ -2717,6 +2717,7 @@ int decode_BCCH_DLSCH_Message(
sizeof
(
SystemInformationBlockType1_t
)
);
LOG_D
(
RRC
,
"[UE %"
PRIu8
"] Decoding First SIB1
\n
"
,
ctxt_pP
->
module_id
);
decode_SIB1
(
ctxt_pP
,
eNB_index
,
rsrq
,
rsrp
);
//printf("decode_BCCH_DLSCH_Message \n");
}
}
...
...
@@ -2731,7 +2732,7 @@ int decode_BCCH_DLSCH_Message(
&
bcch_message
->
message
.
choice
.
c1
.
choice
.
systemInformation
,
sizeof
(
SystemInformation_t
)
);
LOG_
D
(
RRC
,
"[UE %"
PRIu8
"] Decoding SI for frameP %"
PRIu32
"
\n
"
,
LOG_
I
(
RRC
,
"[UE %"
PRIu8
"] Decoding SI for frameP %"
PRIu32
"
\n
"
,
ctxt_pP
->
module_id
,
ctxt_pP
->
frame
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment