Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
littleBu
OpenXG-RAN
Commits
ce7bdf71
Commit
ce7bdf71
authored
1 month ago
by
Laurent THOMAS
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix simde lacking refix
parent
34970d38
Branches unavailable
2025.w08
2025.w07
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
105 additions
and
87 deletions
+105
-87
openair1/PHY/MODULATION/nr_modulation.c
openair1/PHY/MODULATION/nr_modulation.c
+75
-58
openair1/PHY/NR_TRANSPORT/nr_dlsch.c
openair1/PHY/NR_TRANSPORT/nr_dlsch.c
+30
-29
No files found.
openair1/PHY/MODULATION/nr_modulation.c
View file @
ce7bdf71
...
...
@@ -22,6 +22,9 @@
#include "nr_modulation.h"
#include "PHY/NR_REFSIG/nr_mod_table.h"
#include "executables/softmodem-common.h"
#include <simde/x86/avx512.h>
// Lacking declaration in present simde external package, will be detected as compilation error when they will add it
#define simde_mm512_extracti64x2_epi64(a...) _mm512_extracti64x2_epi64(a)
// #define DEBUG_DLSCH_PRECODING_PRINT_WITH_TRIVIAL // TODO: For debug, to be removed if want to merge to develop
// #define DEBUG_LAYER_MAPPING
...
...
@@ -263,13 +266,13 @@ void nr_layer_mapping(int nbCodes,
c16_t
*
tx0
=
tx_layers
[
0
];
c16_t
*
tx1
=
tx_layers
[
1
];
#if defined(__AVX512BW__)
__m512i
perm2a
=
_mm512_set_epi32
(
30
,
28
,
26
,
24
,
22
,
20
,
18
,
16
,
14
,
12
,
10
,
8
,
6
,
4
,
2
,
0
);
__m512i
perm2b
=
_mm512_set_epi32
(
31
,
29
,
27
,
25
,
23
,
21
,
19
,
17
,
15
,
13
,
11
,
9
,
7
,
5
,
3
,
1
);
simde__m512i
perm2a
=
simde
_mm512_set_epi32
(
30
,
28
,
26
,
24
,
22
,
20
,
18
,
16
,
14
,
12
,
10
,
8
,
6
,
4
,
2
,
0
);
simde__m512i
perm2b
=
simde
_mm512_set_epi32
(
31
,
29
,
27
,
25
,
23
,
21
,
19
,
17
,
15
,
13
,
11
,
9
,
7
,
5
,
3
,
1
);
for
(;
i
<
(
n_symbs
&
~
31
);
i
+=
32
)
{
__m512i
a
=
*
(
__m512i
*
)(
mod
+
i
);
__m512i
b
=
*
(
__m512i
*
)(
mod
+
i
+
16
);
*
(
__m512i
*
)
tx0
=
_mm512_permutex2var_epi32
(
a
,
perm2a
,
b
);
*
(
__m512i
*
)
tx1
=
_mm512_permutex2var_epi32
(
a
,
perm2b
,
b
);
simde__m512i
a
=
*
(
simde
__m512i
*
)(
mod
+
i
);
simde__m512i
b
=
*
(
simde
__m512i
*
)(
mod
+
i
+
16
);
*
(
simde__m512i
*
)
tx0
=
simde
_mm512_permutex2var_epi32
(
a
,
perm2a
,
b
);
*
(
simde__m512i
*
)
tx1
=
simde
_mm512_permutex2var_epi32
(
a
,
perm2b
,
b
);
tx0
+=
16
;
tx1
+=
16
;
}
...
...
@@ -308,55 +311,69 @@ void nr_layer_mapping(int nbCodes,
c16_t
*
tx1
=
tx_layers
[
1
];
c16_t
*
tx2
=
tx_layers
[
2
];
#if defined(__AVX512BW__)
__m512i
perm3_0
=
_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
,
12
,
9
,
6
,
3
,
0
);
__m512i
perm3_0b
=
_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
__m512i
perm3_1
=
_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
,
10
,
7
,
4
,
1
);
__m512i
perm3_1b
=
_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
__m512i
perm3_2
=
_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
,
11
,
8
,
5
,
2
);
__m512i
perm3_2b
=
_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
simde__m512i
perm3_0
=
simde_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
,
12
,
9
,
6
,
3
,
0
);
simde__m512i
perm3_0b
=
simde_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
simde__m512i
perm3_1
=
simde_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
,
10
,
7
,
4
,
1
);
simde__m512i
perm3_1b
=
simde_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
simde__m512i
perm3_2
=
simde_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
,
11
,
8
,
5
,
2
);
simde__m512i
perm3_2b
=
simde_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
for
(;
i
<
(
n_symbs
&
~
63
);
i
+=
48
)
{
__m512i
i0
=
*
(
__m512i
*
)(
mod
+
i
);
__m512i
i1
=
*
(
__m512i
*
)(
mod
+
i
+
16
);
__m512i
i2
=
*
(
__m512i
*
)(
mod
+
i
+
32
);
__m512i
d0
=
_mm512_permutex2var_epi32
(
i0
,
perm3_0
,
i1
);
*
(
__m512i
*
)
tx0
=
_mm512_permutex2var_epi32
(
d0
,
perm3_0b
,
i2
);
// 11000000
simde__m512i
i0
=
*
(
simde
__m512i
*
)(
mod
+
i
);
simde__m512i
i1
=
*
(
simde
__m512i
*
)(
mod
+
i
+
16
);
simde__m512i
i2
=
*
(
simde
__m512i
*
)(
mod
+
i
+
32
);
simde__m512i
d0
=
simde
_mm512_permutex2var_epi32
(
i0
,
perm3_0
,
i1
);
*
(
simde__m512i
*
)
tx0
=
simde
_mm512_permutex2var_epi32
(
d0
,
perm3_0b
,
i2
);
// 11000000
tx0
+=
16
;
d0
=
_mm512_permutex2var_epi32
(
i0
,
perm3_1
,
i1
);
*
(
__m512i
*
)
tx1
=
_mm512_permutex2var_epi32
(
d0
,
perm3_1b
,
i2
);
// 11000000
d0
=
simde
_mm512_permutex2var_epi32
(
i0
,
perm3_1
,
i1
);
*
(
simde__m512i
*
)
tx1
=
simde
_mm512_permutex2var_epi32
(
d0
,
perm3_1b
,
i2
);
// 11000000
tx1
+=
16
;
d0
=
_mm512_permutex2var_epi32
(
i0
,
perm3_2
,
i1
);
*
(
__m512i
*
)
tx2
=
_mm512_permutex2var_epi32
(
d0
,
perm3_2b
,
i2
);
// 11000000
d0
=
simde
_mm512_permutex2var_epi32
(
i0
,
perm3_2
,
i1
);
*
(
simde__m512i
*
)
tx2
=
simde
_mm512_permutex2var_epi32
(
d0
,
perm3_2b
,
i2
);
// 11000000
tx2
+=
16
;
}
#endif
...
...
@@ -432,16 +449,16 @@ void nr_layer_mapping(int nbCodes,
c16_t
*
tx2
=
tx_layers
[
2
];
c16_t
*
tx3
=
tx_layers
[
3
];
#if defined(__AVX512BW__)
__m512i
perm4
=
_mm512_set_epi32
(
15
,
11
,
7
,
3
,
14
,
10
,
6
,
2
,
13
,
9
,
5
,
1
,
12
,
8
,
4
,
0
);
simde__m512i
perm4
=
simde
_mm512_set_epi32
(
15
,
11
,
7
,
3
,
14
,
10
,
6
,
2
,
13
,
9
,
5
,
1
,
12
,
8
,
4
,
0
);
for
(;
i
<
(
n_symbs
&
~
15
);
i
+=
16
)
{
__m512i
e
=
_mm512_permutexvar_epi32
(
perm4
,
*
(
__m512i
*
)(
mod
+
i
));
*
(
simde__m128i
*
)
tx0
=
_mm512_extracti64x2_epi64
(
e
,
0
);
simde__m512i
e
=
simde_mm512_permutexvar_epi32
(
perm4
,
*
(
simde
__m512i
*
)(
mod
+
i
));
*
(
simde__m128i
*
)
tx0
=
simde
_mm512_extracti64x2_epi64
(
e
,
0
);
tx0
+=
4
;
*
(
simde__m128i
*
)
tx1
=
_mm512_extracti64x2_epi64
(
e
,
1
);
*
(
simde__m128i
*
)
tx1
=
simde
_mm512_extracti64x2_epi64
(
e
,
1
);
tx1
+=
4
;
*
(
simde__m128i
*
)
tx2
=
_mm512_extracti64x2_epi64
(
e
,
2
);
*
(
simde__m128i
*
)
tx2
=
simde
_mm512_extracti64x2_epi64
(
e
,
2
);
tx2
+=
4
;
*
(
simde__m128i
*
)
tx3
=
_mm512_extracti64x2_epi64
(
e
,
3
);
*
(
simde__m128i
*
)
tx3
=
simde
_mm512_extracti64x2_epi64
(
e
,
3
);
tx3
+=
4
;
}
#endif
...
...
This diff is collapsed.
Click to expand it.
openair1/PHY/NR_TRANSPORT/nr_dlsch.c
View file @
ce7bdf71
...
...
@@ -43,6 +43,7 @@
// #define DEBUG_DLSCH
// #define DEBUG_DLSCH_MAPPING
#include <simde/x86/avx512.h>
#define USE128BIT
static
void
nr_pdsch_codeword_scrambling
(
uint8_t
*
in
,
uint32_t
size
,
uint8_t
q
,
uint32_t
Nid
,
uint32_t
n_RNTI
,
uint32_t
*
out
)
...
...
@@ -106,14 +107,14 @@ static inline int dmrs_case2a(c16_t *txF, c16_t *mod_dmrs, const int amp_dmrs, i
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
zeros512
=
_mm512_setzero_si512
(),
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
zeros512
=
simde_mm512_setzero_si512
(),
amp_dmrs512
=
simde
_mm512_set1_epi16
(
amp_dmrs
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
perml
,
zeros512
));
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
perml
,
zeros512
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
permh
,
zeros512
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
permh
,
zeros512
));
out
+=
16
;
}
#endif
...
...
@@ -157,14 +158,14 @@ static inline int dmrs_case2b(c16_t *txF, c16_t *mod_dmrs, const int amp_dmrs, i
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
zeros512
=
_mm512_setzero_si512
(),
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
zeros512
=
simde_mm512_setzero_si512
(),
amp_dmrs512
=
simde
_mm512_set1_epi16
(
amp_dmrs
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
zeros512
,
perml
,
d0
));
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
zeros512
,
perml
,
d0
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
zeros512
,
permh
,
d0
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
zeros512
,
permh
,
d0
));
out
+=
16
;
}
#endif
...
...
@@ -209,16 +210,16 @@ static inline int dmrs_case1a(c16_t *txF, c16_t *txl, c16_t *mod_dmrs, const int
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
_mm512_set1_epi16
(
amp
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
amp_dmrs512
=
simde_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
simde
_mm512_set1_epi16
(
amp
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
__m512i
d1
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)
txl
),
amp512
);
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde__m512i
d1
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)
txl
),
amp512
);
txl
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
perml
,
d1
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
perml
,
d1
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
permh
,
d1
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
permh
,
d1
));
out
+=
16
;
}
#endif
...
...
@@ -266,16 +267,16 @@ static inline int dmrs_case1b(c16_t *txF, c16_t *txl, c16_t *mod_dmrs, const int
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
_mm512_set1_epi16
(
amp
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
amp_dmrs512
=
simde_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
simde
_mm512_set1_epi16
(
amp
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
__m512i
d1
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)
txl
),
amp512
);
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde__m512i
d1
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)
txl
),
amp512
);
txl
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d1
,
perml
,
d0
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d1
,
perml
,
d0
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d1
,
permh
,
d0
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d1
,
permh
,
d0
));
out
+=
16
;
}
#endif
...
...
@@ -364,10 +365,10 @@ static inline int no_ptrs_dmrs_case(c16_t *txF, c16_t *txl, const int amp, const
// Loop Over SCs:
int
i
=
0
;
#if defined(__AVX512BW__)
__m512i
amp512
=
_mm512_set1_epi16
(
amp
);
simde__m512i
amp512
=
simde
_mm512_set1_epi16
(
amp
);
for
(;
i
<
(
sz
&
~
15
);
i
+=
16
)
{
const
__m512i
txL
=
_mm512_loadu_si512
((
__m512i
*
)(
txl
+
i
));
_mm512_storeu_si512
((
__m512i
*
)(
txF
+
i
),
_mm512_mulhrs_epi16
(
amp512
,
txL
));
const
simde__m512i
txL
=
simde_mm512_loadu_si512
((
simde
__m512i
*
)(
txl
+
i
));
simde_mm512_storeu_si512
((
simde__m512i
*
)(
txF
+
i
),
simde
_mm512_mulhrs_epi16
(
amp512
,
txL
));
}
#endif
#if defined(__AVX2__)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment