Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
OpenXG
OpenXG-RAN
Commits
ce7bdf71
Commit
ce7bdf71
authored
Feb 17, 2025
by
Laurent THOMAS
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix simde lacking refix
parent
34970d38
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
105 additions
and
87 deletions
+105
-87
openair1/PHY/MODULATION/nr_modulation.c
openair1/PHY/MODULATION/nr_modulation.c
+75
-58
openair1/PHY/NR_TRANSPORT/nr_dlsch.c
openair1/PHY/NR_TRANSPORT/nr_dlsch.c
+30
-29
No files found.
openair1/PHY/MODULATION/nr_modulation.c
View file @
ce7bdf71
...
...
@@ -22,6 +22,9 @@
#include "nr_modulation.h"
#include "PHY/NR_REFSIG/nr_mod_table.h"
#include "executables/softmodem-common.h"
#include <simde/x86/avx512.h>
// Lacking declaration in present simde external package, will be detected as compilation error when they will add it
#define simde_mm512_extracti64x2_epi64(a...) _mm512_extracti64x2_epi64(a)
// #define DEBUG_DLSCH_PRECODING_PRINT_WITH_TRIVIAL // TODO: For debug, to be removed if want to merge to develop
// #define DEBUG_LAYER_MAPPING
...
...
@@ -263,13 +266,13 @@ void nr_layer_mapping(int nbCodes,
c16_t
*
tx0
=
tx_layers
[
0
];
c16_t
*
tx1
=
tx_layers
[
1
];
#if defined(__AVX512BW__)
__m512i
perm2a
=
_mm512_set_epi32
(
30
,
28
,
26
,
24
,
22
,
20
,
18
,
16
,
14
,
12
,
10
,
8
,
6
,
4
,
2
,
0
);
__m512i
perm2b
=
_mm512_set_epi32
(
31
,
29
,
27
,
25
,
23
,
21
,
19
,
17
,
15
,
13
,
11
,
9
,
7
,
5
,
3
,
1
);
simde__m512i
perm2a
=
simde
_mm512_set_epi32
(
30
,
28
,
26
,
24
,
22
,
20
,
18
,
16
,
14
,
12
,
10
,
8
,
6
,
4
,
2
,
0
);
simde__m512i
perm2b
=
simde
_mm512_set_epi32
(
31
,
29
,
27
,
25
,
23
,
21
,
19
,
17
,
15
,
13
,
11
,
9
,
7
,
5
,
3
,
1
);
for
(;
i
<
(
n_symbs
&
~
31
);
i
+=
32
)
{
__m512i
a
=
*
(
__m512i
*
)(
mod
+
i
);
__m512i
b
=
*
(
__m512i
*
)(
mod
+
i
+
16
);
*
(
__m512i
*
)
tx0
=
_mm512_permutex2var_epi32
(
a
,
perm2a
,
b
);
*
(
__m512i
*
)
tx1
=
_mm512_permutex2var_epi32
(
a
,
perm2b
,
b
);
simde__m512i
a
=
*
(
simde
__m512i
*
)(
mod
+
i
);
simde__m512i
b
=
*
(
simde
__m512i
*
)(
mod
+
i
+
16
);
*
(
simde__m512i
*
)
tx0
=
simde
_mm512_permutex2var_epi32
(
a
,
perm2a
,
b
);
*
(
simde__m512i
*
)
tx1
=
simde
_mm512_permutex2var_epi32
(
a
,
perm2b
,
b
);
tx0
+=
16
;
tx1
+=
16
;
}
...
...
@@ -308,55 +311,69 @@ void nr_layer_mapping(int nbCodes,
c16_t
*
tx1
=
tx_layers
[
1
];
c16_t
*
tx2
=
tx_layers
[
2
];
#if defined(__AVX512BW__)
__m512i
perm3_0
=
_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
,
12
,
9
,
6
,
3
,
0
);
__m512i
perm3_0b
=
_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
__m512i
perm3_1
=
_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
,
10
,
7
,
4
,
1
);
__m512i
perm3_1b
=
_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
__m512i
perm3_2
=
_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
,
11
,
8
,
5
,
2
);
__m512i
perm3_2b
=
_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
simde__m512i
perm3_0
=
simde_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
,
12
,
9
,
6
,
3
,
0
);
simde__m512i
perm3_0b
=
simde_mm512_set_epi32
(
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
simde__m512i
perm3_1
=
simde_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
,
10
,
7
,
4
,
1
);
simde__m512i
perm3_1b
=
simde_mm512_set_epi32
(
14
+
16
,
11
+
16
,
8
+
16
,
5
+
16
,
2
+
16
,
10
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
simde__m512i
perm3_2
=
simde_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
13
+
16
,
10
+
16
,
7
+
16
,
4
+
16
,
1
+
16
,
14
,
11
,
8
,
5
,
2
);
simde__m512i
perm3_2b
=
simde_mm512_set_epi32
(
15
+
16
,
12
+
16
,
9
+
16
,
6
+
16
,
3
+
16
,
0
+
16
,
9
,
8
,
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
for
(;
i
<
(
n_symbs
&
~
63
);
i
+=
48
)
{
__m512i
i0
=
*
(
__m512i
*
)(
mod
+
i
);
__m512i
i1
=
*
(
__m512i
*
)(
mod
+
i
+
16
);
__m512i
i2
=
*
(
__m512i
*
)(
mod
+
i
+
32
);
__m512i
d0
=
_mm512_permutex2var_epi32
(
i0
,
perm3_0
,
i1
);
*
(
__m512i
*
)
tx0
=
_mm512_permutex2var_epi32
(
d0
,
perm3_0b
,
i2
);
// 11000000
simde__m512i
i0
=
*
(
simde
__m512i
*
)(
mod
+
i
);
simde__m512i
i1
=
*
(
simde
__m512i
*
)(
mod
+
i
+
16
);
simde__m512i
i2
=
*
(
simde
__m512i
*
)(
mod
+
i
+
32
);
simde__m512i
d0
=
simde
_mm512_permutex2var_epi32
(
i0
,
perm3_0
,
i1
);
*
(
simde__m512i
*
)
tx0
=
simde
_mm512_permutex2var_epi32
(
d0
,
perm3_0b
,
i2
);
// 11000000
tx0
+=
16
;
d0
=
_mm512_permutex2var_epi32
(
i0
,
perm3_1
,
i1
);
*
(
__m512i
*
)
tx1
=
_mm512_permutex2var_epi32
(
d0
,
perm3_1b
,
i2
);
// 11000000
d0
=
simde
_mm512_permutex2var_epi32
(
i0
,
perm3_1
,
i1
);
*
(
simde__m512i
*
)
tx1
=
simde
_mm512_permutex2var_epi32
(
d0
,
perm3_1b
,
i2
);
// 11000000
tx1
+=
16
;
d0
=
_mm512_permutex2var_epi32
(
i0
,
perm3_2
,
i1
);
*
(
__m512i
*
)
tx2
=
_mm512_permutex2var_epi32
(
d0
,
perm3_2b
,
i2
);
// 11000000
d0
=
simde
_mm512_permutex2var_epi32
(
i0
,
perm3_2
,
i1
);
*
(
simde__m512i
*
)
tx2
=
simde
_mm512_permutex2var_epi32
(
d0
,
perm3_2b
,
i2
);
// 11000000
tx2
+=
16
;
}
#endif
...
...
@@ -432,16 +449,16 @@ void nr_layer_mapping(int nbCodes,
c16_t
*
tx2
=
tx_layers
[
2
];
c16_t
*
tx3
=
tx_layers
[
3
];
#if defined(__AVX512BW__)
__m512i
perm4
=
_mm512_set_epi32
(
15
,
11
,
7
,
3
,
14
,
10
,
6
,
2
,
13
,
9
,
5
,
1
,
12
,
8
,
4
,
0
);
simde__m512i
perm4
=
simde
_mm512_set_epi32
(
15
,
11
,
7
,
3
,
14
,
10
,
6
,
2
,
13
,
9
,
5
,
1
,
12
,
8
,
4
,
0
);
for
(;
i
<
(
n_symbs
&
~
15
);
i
+=
16
)
{
__m512i
e
=
_mm512_permutexvar_epi32
(
perm4
,
*
(
__m512i
*
)(
mod
+
i
));
*
(
simde__m128i
*
)
tx0
=
_mm512_extracti64x2_epi64
(
e
,
0
);
simde__m512i
e
=
simde_mm512_permutexvar_epi32
(
perm4
,
*
(
simde
__m512i
*
)(
mod
+
i
));
*
(
simde__m128i
*
)
tx0
=
simde
_mm512_extracti64x2_epi64
(
e
,
0
);
tx0
+=
4
;
*
(
simde__m128i
*
)
tx1
=
_mm512_extracti64x2_epi64
(
e
,
1
);
*
(
simde__m128i
*
)
tx1
=
simde
_mm512_extracti64x2_epi64
(
e
,
1
);
tx1
+=
4
;
*
(
simde__m128i
*
)
tx2
=
_mm512_extracti64x2_epi64
(
e
,
2
);
*
(
simde__m128i
*
)
tx2
=
simde
_mm512_extracti64x2_epi64
(
e
,
2
);
tx2
+=
4
;
*
(
simde__m128i
*
)
tx3
=
_mm512_extracti64x2_epi64
(
e
,
3
);
*
(
simde__m128i
*
)
tx3
=
simde
_mm512_extracti64x2_epi64
(
e
,
3
);
tx3
+=
4
;
}
#endif
...
...
openair1/PHY/NR_TRANSPORT/nr_dlsch.c
View file @
ce7bdf71
...
...
@@ -43,6 +43,7 @@
// #define DEBUG_DLSCH
// #define DEBUG_DLSCH_MAPPING
#include <simde/x86/avx512.h>
#define USE128BIT
static
void
nr_pdsch_codeword_scrambling
(
uint8_t
*
in
,
uint32_t
size
,
uint8_t
q
,
uint32_t
Nid
,
uint32_t
n_RNTI
,
uint32_t
*
out
)
...
...
@@ -106,14 +107,14 @@ static inline int dmrs_case2a(c16_t *txF, c16_t *mod_dmrs, const int amp_dmrs, i
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
zeros512
=
_mm512_setzero_si512
(),
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
zeros512
=
simde_mm512_setzero_si512
(),
amp_dmrs512
=
simde
_mm512_set1_epi16
(
amp_dmrs
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
perml
,
zeros512
));
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
perml
,
zeros512
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
permh
,
zeros512
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
permh
,
zeros512
));
out
+=
16
;
}
#endif
...
...
@@ -157,14 +158,14 @@ static inline int dmrs_case2b(c16_t *txF, c16_t *mod_dmrs, const int amp_dmrs, i
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
zeros512
=
_mm512_setzero_si512
(),
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
zeros512
=
simde_mm512_setzero_si512
(),
amp_dmrs512
=
simde
_mm512_set1_epi16
(
amp_dmrs
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
zeros512
,
perml
,
d0
));
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
zeros512
,
perml
,
d0
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
zeros512
,
permh
,
d0
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
zeros512
,
permh
,
d0
));
out
+=
16
;
}
#endif
...
...
@@ -209,16 +210,16 @@ static inline int dmrs_case1a(c16_t *txF, c16_t *txl, c16_t *mod_dmrs, const int
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
_mm512_set1_epi16
(
amp
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
amp_dmrs512
=
simde_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
simde
_mm512_set1_epi16
(
amp
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
__m512i
d1
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)
txl
),
amp512
);
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde__m512i
d1
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)
txl
),
amp512
);
txl
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
perml
,
d1
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
perml
,
d1
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d0
,
permh
,
d1
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d0
,
permh
,
d1
));
out
+=
16
;
}
#endif
...
...
@@ -266,16 +267,16 @@ static inline int dmrs_case1b(c16_t *txF, c16_t *txl, c16_t *mod_dmrs, const int
int
i
=
0
;
int
end
=
sz
/
2
;
#if defined(__AVX512BW__)
__m512i
amp_dmrs512
=
_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
_mm512_set1_epi16
(
amp
);
__m512i
perml
=
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
__m512i
permh
=
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
simde__m512i
amp_dmrs512
=
simde_mm512_set1_epi16
(
amp_dmrs
),
amp512
=
simde
_mm512_set1_epi16
(
amp
);
simde__m512i
perml
=
simde
_mm512_set_epi32
(
23
,
7
,
22
,
6
,
21
,
5
,
20
,
4
,
19
,
3
,
18
,
2
,
17
,
1
,
16
,
0
);
simde__m512i
permh
=
simde
_mm512_set_epi32
(
31
,
15
,
30
,
14
,
29
,
13
,
28
,
12
,
27
,
11
,
26
,
10
,
25
,
9
,
24
,
8
);
for
(;
i
<
(
end
&
~
15
);
i
+=
16
)
{
__m512i
d0
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
__m512i
d1
=
_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
__m512i
*
)
txl
),
amp512
);
simde__m512i
d0
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)(
mod_dmrs
+
i
)),
amp_dmrs512
);
simde__m512i
d1
=
simde_mm512_mulhrs_epi16
(
_mm512_loadu_si512
((
simde
__m512i
*
)
txl
),
amp512
);
txl
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d1
,
perml
,
d0
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d1
,
perml
,
d0
));
out
+=
16
;
_mm512_storeu_si512
((
__m512i
*
)
out
,
_mm512_permutex2var_epi32
(
d1
,
permh
,
d0
));
simde_mm512_storeu_si512
((
simde__m512i
*
)
out
,
simde
_mm512_permutex2var_epi32
(
d1
,
permh
,
d0
));
out
+=
16
;
}
#endif
...
...
@@ -364,10 +365,10 @@ static inline int no_ptrs_dmrs_case(c16_t *txF, c16_t *txl, const int amp, const
// Loop Over SCs:
int
i
=
0
;
#if defined(__AVX512BW__)
__m512i
amp512
=
_mm512_set1_epi16
(
amp
);
simde__m512i
amp512
=
simde
_mm512_set1_epi16
(
amp
);
for
(;
i
<
(
sz
&
~
15
);
i
+=
16
)
{
const
__m512i
txL
=
_mm512_loadu_si512
((
__m512i
*
)(
txl
+
i
));
_mm512_storeu_si512
((
__m512i
*
)(
txF
+
i
),
_mm512_mulhrs_epi16
(
amp512
,
txL
));
const
simde__m512i
txL
=
simde_mm512_loadu_si512
((
simde
__m512i
*
)(
txl
+
i
));
simde_mm512_storeu_si512
((
simde__m512i
*
)(
txF
+
i
),
simde
_mm512_mulhrs_epi16
(
amp512
,
txL
));
}
#endif
#if defined(__AVX2__)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment