Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
promise
OpenXG-RAN
Commits
e68cfaf7
Commit
e68cfaf7
authored
4 years ago
by
Raymond Knopp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added AVX2 optimization for 64QAM LLR computation in NR ULSCH
parent
ffae55f6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
98 additions
and
3 deletions
+98
-3
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+98
-3
No files found.
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
View file @
e68cfaf7
...
...
@@ -239,10 +239,22 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
uint8_t
symbol
)
{
#ifdef __AVX2__
int
off
=
((
nb_rb
&
1
)
==
1
)
?
4
:
0
;
#else
int
off
=
0
;
#endif
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
__m256i
*
rxF
=
(
__m256i
*
)
rxdataF_comp
;
__m256i
*
ch_mag
,
*
ch_magb
;
register
__m256i
xmm0
,
xmm1
,
xmm2
;
#else
__m128i
*
rxF
=
(
__m128i
*
)
rxdataF_comp
;
__m128i
*
ch_mag
,
*
ch_magb
;
register
__m128i
xmm0
,
xmm1
,
xmm2
;
#endif
#elif defined(__arm__)
int16x8_t
*
rxF
=
(
int16x8_t
*
)
&
rxdataF_comp
;
int16x8_t
*
ch_mag
,
*
ch_magb
;
// [hna] This should be uncommented once channel estimation is implemented
...
...
@@ -250,28 +262,44 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
#endif
int
i
;
unsigned
char
len_mod4
;
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ch_mag
=
(
__m256i
*
)
&
ul_ch_mag
[
0
][(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
ch_magb
=
(
__m256i
*
)
&
ul_ch_magb
[
0
][(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
#else
ch_mag
=
(
__m128i
*
)
&
ul_ch_mag
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_magb
=
(
__m128i
*
)
&
ul_ch_magb
[
0
][(
symbol
*
nb_rb
*
12
)];
#endif
#elif defined(__arm__)
ch_mag
=
(
int16x8_t
*
)
&
ul_ch_mag
[
0
][(
symbol
*
nb_rb
*
12
)];
ch_magb
=
(
int16x8_t
*
)
&
ul_ch_magb
[
0
][(
symbol
*
nb_rb
*
12
)];
#endif
len_mod4
=
nb_re
&
3
;
#ifdef __AVX2__
int
len_mod8
=
nb_re
&
7
;
nb_re
=
nb_re
>>
3
;
// length in quad words (4 REs)
nb_re
+=
((
len_mod8
==
0
)
?
0
:
1
);
#else
int
len_mod4
=
nb_re
&
3
;
nb_re
=
nb_re
>>
2
;
// length in quad words (4 REs)
nb_re
+=
((
len_mod4
==
0
)
?
0
:
1
);
#endif
for
(
i
=
0
;
i
<
nb_re
;
i
++
)
{
xmm0
=
rxF
[
i
];
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
xmm1
=
_mm256_abs_epi16
(
xmm0
);
xmm1
=
_mm256_subs_epi16
(
ch_mag
[
i
],
xmm1
);
xmm2
=
_mm256_abs_epi16
(
xmm1
);
xmm2
=
_mm256_subs_epi16
(
ch_magb
[
i
],
xmm2
);
#else
xmm1
=
_mm_abs_epi16
(
xmm0
);
xmm1
=
_mm_subs_epi16
(
ch_mag
[
i
],
xmm1
);
xmm2
=
_mm_abs_epi16
(
xmm1
);
xmm2
=
_mm_subs_epi16
(
ch_magb
[
i
],
xmm2
);
#endif
#elif defined(__arm__)
xmm1
=
vabsq_s16
(
xmm0
);
xmm1
=
vsubq_s16
(
ch_mag
[
i
],
xmm1
);
...
...
@@ -283,12 +311,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 1st RE
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
0
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
1
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
0
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
1
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
0
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
1
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
0
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
1
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
0
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
1
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
0
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
1
);
#endif
#elif defined(__arm__)
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
0
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
1
);
...
...
@@ -305,12 +342,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 2nd RE
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
2
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
3
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
2
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
3
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
2
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
3
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
2
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
3
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
2
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
3
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
2
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
3
);
#endif
#elif defined(__arm__)
ulsch_llr
[
2
]
=
vgetq_lane_s16
(
xmm0
,
2
);
ulsch_llr
[
3
]
=
vgetq_lane_s16
(
xmm0
,
3
);
...
...
@@ -327,12 +373,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 3rd RE
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
4
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
5
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
4
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
5
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
4
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
5
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
4
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
5
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
4
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
5
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
4
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
5
);
#endif
#elif defined(__arm__)
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
4
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
5
);
...
...
@@ -349,12 +404,21 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// 4th RE
// ---------------------------------------
#if defined(__x86_64__) || defined(__i386__)
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
6
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
7
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
6
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
7
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
6
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
7
);
#else
ulsch_llr
[
0
]
=
_mm_extract_epi16
(
xmm0
,
6
);
ulsch_llr
[
1
]
=
_mm_extract_epi16
(
xmm0
,
7
);
ulsch_llr
[
2
]
=
_mm_extract_epi16
(
xmm1
,
6
);
ulsch_llr
[
3
]
=
_mm_extract_epi16
(
xmm1
,
7
);
ulsch_llr
[
4
]
=
_mm_extract_epi16
(
xmm2
,
6
);
ulsch_llr
[
5
]
=
_mm_extract_epi16
(
xmm2
,
7
);
#endif
#elif defined(__arm__)
ulsch_llr
[
0
]
=
vgetq_lane_s16
(
xmm0
,
6
);
ulsch_llr
[
1
]
=
vgetq_lane_s16
(
xmm0
,
7
);
...
...
@@ -366,6 +430,37 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
// ---------------------------------------
ulsch_llr
+=
6
;
#ifdef __AVX2__
ulsch_llr
[
0
]
=
_mm256_extract_epi16
(
xmm0
,
8
);
ulsch_llr
[
1
]
=
_mm256_extract_epi16
(
xmm0
,
9
);
ulsch_llr
[
2
]
=
_mm256_extract_epi16
(
xmm1
,
8
);
ulsch_llr
[
3
]
=
_mm256_extract_epi16
(
xmm1
,
9
);
ulsch_llr
[
4
]
=
_mm256_extract_epi16
(
xmm2
,
8
);
ulsch_llr
[
5
]
=
_mm256_extract_epi16
(
xmm2
,
9
);
ulsch_llr
[
6
]
=
_mm256_extract_epi16
(
xmm0
,
10
);
ulsch_llr
[
7
]
=
_mm256_extract_epi16
(
xmm0
,
11
);
ulsch_llr
[
8
]
=
_mm256_extract_epi16
(
xmm1
,
10
);
ulsch_llr
[
9
]
=
_mm256_extract_epi16
(
xmm1
,
11
);
ulsch_llr
[
10
]
=
_mm256_extract_epi16
(
xmm2
,
10
);
ulsch_llr
[
11
]
=
_mm256_extract_epi16
(
xmm2
,
11
);
ulsch_llr
[
12
]
=
_mm256_extract_epi16
(
xmm0
,
12
);
ulsch_llr
[
13
]
=
_mm256_extract_epi16
(
xmm0
,
13
);
ulsch_llr
[
14
]
=
_mm256_extract_epi16
(
xmm1
,
12
);
ulsch_llr
[
15
]
=
_mm256_extract_epi16
(
xmm1
,
13
);
ulsch_llr
[
16
]
=
_mm256_extract_epi16
(
xmm2
,
12
);
ulsch_llr
[
17
]
=
_mm256_extract_epi16
(
xmm2
,
13
);
ulsch_llr
[
18
]
=
_mm256_extract_epi16
(
xmm0
,
14
);
ulsch_llr
[
19
]
=
_mm256_extract_epi16
(
xmm0
,
15
);
ulsch_llr
[
20
]
=
_mm256_extract_epi16
(
xmm1
,
14
);
ulsch_llr
[
21
]
=
_mm256_extract_epi16
(
xmm1
,
15
);
ulsch_llr
[
22
]
=
_mm256_extract_epi16
(
xmm2
,
14
);
ulsch_llr
[
23
]
=
_mm256_extract_epi16
(
xmm2
,
15
);
ulsch_llr
+=
24
;
#endif
}
#if defined(__x86_64__) || defined(__i386__)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment