Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZhouShuya
OpenXG-RAN
Commits
e68cfaf7
Commit
e68cfaf7
authored
Aug 18, 2020
by
Raymond Knopp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added AVX2 optimization for 64QAM LLR computation in NR ULSCH
parent
ffae55f6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
98 additions
and
3 deletions
+98
-3
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+98
-3
No files found.
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
View file @
e68cfaf7
...
@@ -239,10 +239,22 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -239,10 +239,22 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
uint8_t
symbol
)
uint8_t
symbol
)
{
{
#ifdef __AVX2__
int
off
=
((
nb_rb
&
1
)
==
1
)
?
4
:
0
;
#else
int
off
=
0
;
#endif
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
__m256i
*
rxF
=
(
__m256i
*
)
rxdataF_comp
;
__m256i
*
ch_mag
,
*
ch_magb
;
register
__m256i
xmm0
,
xmm1
,
xmm2
;
#else
__m128i
*
rxF
=
(
__m128i
*
)
rxdataF_comp
;
__m128i
*
rxF
=
(
__m128i
*
)
rxdataF_comp
;
__m128i
*
ch_mag
,
*
ch_magb
;
__m128i
*
ch_mag
,
*
ch_magb
;
register
__m128i
xmm0
,
xmm1
,
xmm2
;
register
__m128i
xmm0
,
xmm1
,
xmm2
;
#endif
#elif defined(__arm__)
#elif defined(__arm__)
int16x8_t
*
rxF
=
(
int16x8_t
*
)
&
rxdataF_comp
;
int16x8_t
*
rxF
=
(
int16x8_t
*
)
&
rxdataF_comp
;
int16x8_t
*
ch_mag
,
*
ch_magb
;
// [hna] This should be uncommented once channel estimation is implemented
int16x8_t
*
ch_mag
,
*
ch_magb
;
// [hna] This should be uncommented once channel estimation is implemented
...
@@ -250,28 +262,44 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -250,28 +262,44 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
#endif
#endif
int
i
;
int
i
;
unsigned
char
len_mod4
;
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ch_mag
=
(
__m256i
*
)
&
ul_ch_mag
[
0
][(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
ch_magb
=
(
__m256i
*
)
&
ul_ch_magb
[
0
][(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
#else
ch_mag
=
(
__m128i
*
)
&
ul_ch_mag
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_mag
=
(
__m128i
*
)
&
ul_ch_mag
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_magb
=
(
__m128i
*
)
&
ul_ch_magb
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_magb
=
(
__m128i
*
)
&
ul_ch_magb
[
0
][(
symbol
*
nb_rb
*
12
)];
#endif
#elif defined(__arm__)
#elif defined(__arm__)
ch_mag
=
(
int16x8_t
*
)
&
ul_ch_mag
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_mag
=
(
int16x8_t
*
)
&
ul_ch_mag
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_magb
=
(
int16x8_t
*
)
&
ul_ch_magb
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_magb
=
(
int16x8_t
*
)
&
ul_ch_magb
[
0
][(
symbol
*
nb_rb
*
12
)];
#endif
#endif
len_mod4
=
nb_re
&
3
;
#ifdef __AVX2__
int
len_mod8
=
nb_re
&
7
;
nb_re
=
nb_re
>>
3
;
// length in quad words (4 REs)
nb_re
+=
((
len_mod8
==
0
)
?
0
:
1
);
#else
int
len_mod4
=
nb_re
&
3
;
nb_re
=
nb_re
>>
2
;
// length in quad words (4 REs)
nb_re
=
nb_re
>>
2
;
// length in quad words (4 REs)
nb_re
+=
((
len_mod4
==
0
)
?
0
:
1
);
nb_re
+=
((
len_mod4
==
0
)
?
0
:
1
);
#endif
for
(
i
=
0
;
i
<
nb_re
;
i
++
)
{
for
(
i
=
0
;
i
<
nb_re
;
i
++
)
{
xmm0
=
rxF
[
i
];
xmm0
=
rxF
[
i
];
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
xmm1
=
_mm256_abs_epi16
(
xmm0
);
xmm1
=
_mm256_subs_epi16
(
ch_mag
[
i
],
xmm1
);
xmm2
=
_mm256_abs_epi16
(
xmm1
);
xmm2
=
_mm256_subs_epi16
(
ch_magb
[
i
],
xmm2
);
#else
xmm1
=
_mm_abs_epi16
(
xmm0
);
xmm1
=
_mm_abs_epi16
(
xmm0
);
xmm1
=
_mm_subs_epi16
(
ch_mag
[
i
],
xmm1
);
xmm1
=
_mm_subs_epi16
(
ch_mag
[
i
],
xmm1
);
xmm2
=
_mm_abs_epi16
(
xmm1
);
xmm2
=
_mm_abs_epi16
(
xmm1
);
xmm2
=
_mm_subs_epi16
(
ch_magb
[
i
],
xmm2
);
xmm2
=
_mm_subs_epi16
(
ch_magb
[
i
],
xmm2
);
#endif
#elif defined(__arm__)
#elif defined(__arm__)
xmm1
=
vabsq_s16
(
xmm0
);
xmm1
=
vabsq_s16
(
xmm0
);
xmm1
=
vsubq_s16
(
ch_mag
[
i
],
xmm1
);
xmm1
=
vsubq_s16
(
ch_mag
[
i
],
xmm1
);
...
@@ -283,12 +311,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -283,12 +311,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 1st RE
// 1st RE
// ---------------------------------------
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
0
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
1
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
0
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
1
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
0
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
1
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
0
);
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
0
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
1
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
1
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
0
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
0
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
1
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
1
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
0
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
0
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
1
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
1
);
#endif
#elif defined(__arm__)
#elif defined(__arm__)
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
0
);
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
0
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
1
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
1
);
...
@@ -305,12 +342,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -305,12 +342,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 2nd RE
// 2nd RE
// ---------------------------------------
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
2
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
3
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
2
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
3
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
2
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
3
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
2
);
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
2
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
3
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
3
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
2
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
2
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
3
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
3
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
2
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
2
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
3
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
3
);
#endif
#elif defined(__arm__)
#elif defined(__arm__)
ulsch_llr
[
2
]
=
vgetq_lane_s16
(
xmm0
,
2
);
ulsch_llr
[
2
]
=
vgetq_lane_s16
(
xmm0
,
2
);
ulsch_llr
[
3
]
=
vgetq_lane_s16
(
xmm0
,
3
);
ulsch_llr
[
3
]
=
vgetq_lane_s16
(
xmm0
,
3
);
...
@@ -327,12 +373,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -327,12 +373,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 3rd RE
// 3rd RE
// ---------------------------------------
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
4
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
5
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
4
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
5
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
4
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
5
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
4
);
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
4
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
5
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
5
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
4
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
4
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
5
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
5
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
4
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
4
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
5
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
5
);
#endif
#elif defined(__arm__)
#elif defined(__arm__)
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
4
);
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
4
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
5
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
5
);
...
@@ -349,12 +404,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -349,12 +404,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 4th RE
// 4th RE
// ---------------------------------------
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
6
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
7
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
6
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
7
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
6
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
7
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
6
);
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
6
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
7
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
7
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
6
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
6
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
7
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
7
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
6
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
6
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
7
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
7
);
#endif
#elif defined(__arm__)
#elif defined(__arm__)
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
6
);
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
6
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
7
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
7
);
...
@@ -366,6 +430,37 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -366,6 +430,37 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// ---------------------------------------
// ---------------------------------------
ulsch_llr
+=
6
;
ulsch_llr
+=
6
;
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
8
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
9
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
8
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
9
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
8
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
9
);
ulsch_llr
[
6
]
=
_mm256_extract_epi16
(
xmm0
,
10
);
ulsch_llr
[
7
]
=
_mm256_extract_epi16
(
xmm0
,
11
);
ulsch_llr
[
8
]
=
_mm256_extract_epi16
(
xmm1
,
10
);
ulsch_llr
[
9
]
=
_mm256_extract_epi16
(
xmm1
,
11
);
ulsch_llr
[
10
]
=
_mm256_extract_epi16
(
xmm2
,
10
);
ulsch_llr
[
11
]
=
_mm256_extract_epi16
(
xmm2
,
11
);
ulsch_llr
[
12
]
=
_mm256_extract_epi16
(
xmm0
,
12
);
ulsch_llr
[
13
]
=
_mm256_extract_epi16
(
xmm0
,
13
);
ulsch_llr
[
14
]
=
_mm256_extract_epi16
(
xmm1
,
12
);
ulsch_llr
[
15
]
=
_mm256_extract_epi16
(
xmm1
,
13
);
ulsch_llr
[
16
]
=
_mm256_extract_epi16
(
xmm2
,
12
);
ulsch_llr
[
17
]
=
_mm256_extract_epi16
(
xmm2
,
13
);
ulsch_llr
[
18
]
=
_mm256_extract_epi16
(
xmm0
,
14
);
ulsch_llr
[
19
]
=
_mm256_extract_epi16
(
xmm0
,
15
);
ulsch_llr
[
20
]
=
_mm256_extract_epi16
(
xmm1
,
14
);
ulsch_llr
[
21
]
=
_mm256_extract_epi16
(
xmm1
,
15
);
ulsch_llr
[
22
]
=
_mm256_extract_epi16
(
xmm2
,
14
);
ulsch_llr
[
23
]
=
_mm256_extract_epi16
(
xmm2
,
15
);
ulsch_llr
+=
24
;
#endif
}
}
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment