Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zzha zzha
OpenXG-RAN
Commits
0c499324
Commit
0c499324
authored
Sep 15, 2023
by
Quency Lin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: CI failure due to SIMD misalignment
parent
797f8cec
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
66 additions
and
66 deletions
+66
-66
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+66
-66
No files found.
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
View file @
0c499324
...
@@ -950,30 +950,30 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
...
@@ -950,30 +950,30 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
simde__m128i
ch_mag_over_2
;
simde__m128i
ch_mag_over_2
;
simde__m128i
ch_mag_9_over_10
;
simde__m128i
ch_mag_9_over_10
;
simde__m128i
xmm0
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm0
;
simde__m128i
xmm1
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm1
;
simde__m128i
xmm2
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm2
;
simde__m128i
xmm3
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm3
;
simde__m128i
xmm4
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm4
;
simde__m128i
xmm5
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm5
;
simde__m128i
xmm6
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm6
;
simde__m128i
xmm7
__attribute__
((
aligned
(
16
)))
;
simde__m128i
xmm7
;
simde__m128i
rho_rpi
__attribute__
((
aligned
(
16
)))
;
simde__m128i
rho_rpi
;
simde__m128i
rho_rmi
__attribute__
((
aligned
(
16
)))
;
simde__m128i
rho_rmi
;
simde__m128i
rho_rs
[
8
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
rho_rs
[
8
];
simde__m128i
psi_rs
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
psi_rs
[
16
];
simde__m128i
psi_is
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
psi_is
[
16
];
simde__m128i
a_rs
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
a_rs
[
16
];
simde__m128i
a_is
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
a_is
[
16
];
simde__m128i
psi_as
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
psi_as
[
16
];
simde__m128i
a_sqs
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
a_sqs
[
16
];
simde__m128i
y0_s
[
8
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
y0_s
[
8
];
simde__m128i
y0r
__attribute__
((
aligned
(
16
)))
;
simde__m128i
y0r
;
simde__m128i
y0i
__attribute__
((
aligned
(
16
)))
;
simde__m128i
y0i
;
simde__m128i
y1r
__attribute__
((
aligned
(
16
)))
;
simde__m128i
y1r
;
simde__m128i
y1i
__attribute__
((
aligned
(
16
)))
;
simde__m128i
y1i
;
// In one iteration, we deal with 8 REs
// In one iteration, we deal with 8 REs
for
(
int
i
=
0
;
i
<
length
>>
2
;
i
+=
2
)
{
for
(
int
i
=
0
;
i
<
length
>>
2
;
i
+=
2
)
{
...
@@ -1070,7 +1070,7 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
...
@@ -1070,7 +1070,7 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
ch_mag_9_over_10
=
simde_mm_slli_epi16
(
ch_mag_9_over_10
,
2
);
ch_mag_9_over_10
=
simde_mm_slli_epi16
(
ch_mag_9_over_10
,
2
);
/// Compute bit metrics (lambda)
/// Compute bit metrics (lambda)
simde__m128i
bit_mets
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
bit_mets
[
16
];
for
(
int
j
=
0
;
j
<
8
;
j
+=
4
){
for
(
int
j
=
0
;
j
<
8
;
j
+=
4
){
bit_mets
[
j
+
0
]
=
simde_mm_subs_epi16
(
psi_as
[
j
+
0
],
a_sqs
[
j
+
0
]);
bit_mets
[
j
+
0
]
=
simde_mm_subs_epi16
(
psi_as
[
j
+
0
],
a_sqs
[
j
+
0
]);
bit_mets
[
j
+
0
]
=
simde_mm_adds_epi16
(
bit_mets
[
j
+
0
],
y0_s
[
j
+
0
]);
bit_mets
[
j
+
0
]
=
simde_mm_adds_epi16
(
bit_mets
[
j
+
0
],
y0_s
[
j
+
0
]);
...
@@ -1163,28 +1163,28 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
...
@@ -1163,28 +1163,28 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
simde__m256i
ch_mag_over_2
;
simde__m256i
ch_mag_over_2
;
simde__m256i
ch_mag_9_over_10
;
simde__m256i
ch_mag_9_over_10
;
simde__m256i
xmm2
__attribute__
((
aligned
(
16
)))
;
simde__m256i
xmm2
;
simde__m256i
xmm3
__attribute__
((
aligned
(
16
)))
;
simde__m256i
xmm3
;
simde__m256i
xmm4
__attribute__
((
aligned
(
16
)))
;
simde__m256i
xmm4
;
simde__m256i
xmm5
__attribute__
((
aligned
(
16
)))
;
simde__m256i
xmm5
;
simde__m256i
xmm6
__attribute__
((
aligned
(
16
)))
;
simde__m256i
xmm6
;
simde__m256i
xmm7
__attribute__
((
aligned
(
16
)))
;
simde__m256i
xmm7
;
simde__m256i
rho_rpi
__attribute__
((
aligned
(
16
)))
;
simde__m256i
rho_rpi
;
simde__m256i
rho_rmi
__attribute__
((
aligned
(
16
)))
;
simde__m256i
rho_rmi
;
simde__m256i
rho_rs
[
8
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
rho_rs
[
8
];
simde__m256i
psi_rs
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
psi_rs
[
16
];
simde__m256i
psi_is
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
psi_is
[
16
];
simde__m256i
a_rs
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
a_rs
[
16
];
simde__m256i
a_is
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
a_is
[
16
];
simde__m256i
psi_as
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
psi_as
[
16
];
simde__m256i
a_sqs
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
a_sqs
[
16
];
simde__m256i
y0_s
[
8
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
y0_s
[
8
];
simde__m256i
y0r
__attribute__
((
aligned
(
16
)))
;
simde__m256i
y0r
;
simde__m256i
y0i
__attribute__
((
aligned
(
16
)))
;
simde__m256i
y0i
;
simde__m256i
y1r
__attribute__
((
aligned
(
16
)))
;
simde__m256i
y1r
;
simde__m256i
y1i
__attribute__
((
aligned
(
16
)))
;
simde__m256i
y1i
;
// In one iteration, we deal with 8 REs
// In one iteration, we deal with 8 REs
for
(
int
i
=
0
;
i
<
length
>>
3
;
i
+=
2
)
{
for
(
int
i
=
0
;
i
<
length
>>
3
;
i
+=
2
)
{
...
@@ -1282,7 +1282,7 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
...
@@ -1282,7 +1282,7 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
/// Compute bit metrics (lambda)
/// Compute bit metrics (lambda)
simde__m256i
bit_mets
[
16
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
bit_mets
[
16
];
for
(
int
j
=
0
;
j
<
8
;
j
+=
4
){
for
(
int
j
=
0
;
j
<
8
;
j
+=
4
){
bit_mets
[
j
+
0
]
=
simde_mm256_subs_epi16
(
psi_as
[
j
+
0
],
a_sqs
[
j
+
0
]);
bit_mets
[
j
+
0
]
=
simde_mm256_subs_epi16
(
psi_as
[
j
+
0
],
a_sqs
[
j
+
0
]);
bit_mets
[
j
+
0
]
=
simde_mm256_adds_epi16
(
bit_mets
[
j
+
0
],
y0_s
[
j
+
0
]);
bit_mets
[
j
+
0
]
=
simde_mm256_adds_epi16
(
bit_mets
[
j
+
0
],
y0_s
[
j
+
0
]);
...
@@ -1454,7 +1454,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -1454,7 +1454,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
simde__m128i
rho_rmi
=
simde_mm_subs_epi16
(
xmm2
,
xmm3
);
// rho* = Re(rho) - Im(rho)
simde__m128i
rho_rmi
=
simde_mm_subs_epi16
(
xmm2
,
xmm3
);
// rho* = Re(rho) - Im(rho)
// Compute the different rhos
// Compute the different rhos
simde__m128i
rho_rs
[
32
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
rho_rs
[
32
];
rho_rs
[
27
]
=
simde_mm_mulhi_epi16
(
rho_rpi
,
ONE_OVER_SQRT_42
);
rho_rs
[
27
]
=
simde_mm_mulhi_epi16
(
rho_rpi
,
ONE_OVER_SQRT_42
);
rho_rs
[
28
]
=
simde_mm_mulhi_epi16
(
rho_rmi
,
ONE_OVER_SQRT_42
);
rho_rs
[
28
]
=
simde_mm_mulhi_epi16
(
rho_rmi
,
ONE_OVER_SQRT_42
);
rho_rs
[
18
]
=
simde_mm_mulhi_epi16
(
rho_rpi
,
THREE_OVER_SQRT_42
);
rho_rs
[
18
]
=
simde_mm_mulhi_epi16
(
rho_rpi
,
THREE_OVER_SQRT_42
);
...
@@ -1518,14 +1518,14 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -1518,14 +1518,14 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
xmm0
=
simde_mm_set1_epi16
(
0
);
// ZERO for abs_pi16
xmm0
=
simde_mm_set1_epi16
(
0
);
// ZERO for abs_pi16
xmm2
=
simde_mm_subs_epi16
(
rho_rs
[
0
],
y1r
);
xmm2
=
simde_mm_subs_epi16
(
rho_rs
[
0
],
y1r
);
simde__m128i
psi_r_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
psi_r_s
[
64
];
for
(
int
j
=
0
;
j
<
32
;
j
++
)
// psi_r_s[0~31], rho_rs[0~31]
for
(
int
j
=
0
;
j
<
32
;
j
++
)
// psi_r_s[0~31], rho_rs[0~31]
psi_r_s
[
j
]
=
simde_mm_abs_epi16
(
simde_mm_subs_epi16
(
rho_rs
[
j
],
y1r
));
psi_r_s
[
j
]
=
simde_mm_abs_epi16
(
simde_mm_subs_epi16
(
rho_rs
[
j
],
y1r
));
for
(
int
j
=
32
;
j
<
64
;
j
++
)
// psi_r_s[32~64], rho_rs[31~0]
for
(
int
j
=
32
;
j
<
64
;
j
++
)
// psi_r_s[32~64], rho_rs[31~0]
psi_r_s
[
j
]
=
simde_mm_abs_epi16
(
simde_mm_adds_epi16
(
rho_rs
[
63
-
j
],
y1r
));
psi_r_s
[
j
]
=
simde_mm_abs_epi16
(
simde_mm_adds_epi16
(
rho_rs
[
63
-
j
],
y1r
));
// simde__m128i psi_i calculation from rho_rpi or rho_rmi
// simde__m128i psi_i calculation from rho_rpi or rho_rmi
simde__m128i
psi_i_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
psi_i_s
[
64
];
const
uint8_t
rho_rs_index
[
32
]
=
{
7
,
15
,
23
,
31
,
24
,
16
,
8
,
0
,
6
,
14
,
22
,
30
,
25
,
17
,
9
,
1
,
5
,
13
,
21
,
29
,
26
,
18
,
10
,
2
,
4
,
12
,
20
,
28
,
27
,
19
,
11
,
3
};
const
uint8_t
rho_rs_index
[
32
]
=
{
7
,
15
,
23
,
31
,
24
,
16
,
8
,
0
,
6
,
14
,
22
,
30
,
25
,
17
,
9
,
1
,
5
,
13
,
21
,
29
,
26
,
18
,
10
,
2
,
4
,
12
,
20
,
28
,
27
,
19
,
11
,
3
};
for
(
int
k
=
0
;
k
<
32
;
k
+=
8
){
// psi_i_s[0~31]
for
(
int
k
=
0
;
k
<
32
;
k
+=
8
){
// psi_i_s[0~31]
for
(
int
j
=
k
;
j
<
k
+
4
;
j
++
)
for
(
int
j
=
k
;
j
<
k
+
4
;
j
++
)
...
@@ -1566,7 +1566,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -1566,7 +1566,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
y0i_seven_over_sqrt_21
=
simde_mm_mulhi_epi16
(
y0i
,
SEVEN_OVER_SQRT_42
);
y0i_seven_over_sqrt_21
=
simde_mm_mulhi_epi16
(
y0i
,
SEVEN_OVER_SQRT_42
);
y0i_seven_over_sqrt_21
=
simde_mm_slli_epi16
(
y0i_seven_over_sqrt_21
,
2
);
// Q2.14
y0i_seven_over_sqrt_21
=
simde_mm_slli_epi16
(
y0i_seven_over_sqrt_21
,
2
);
// Q2.14
simde__m128i
y0_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
y0_s
[
64
];
const
simde__m128i
y0r_over_s
[
8
]
=
{
y0r_seven_over_sqrt_21
,
y0r_five_over_sqrt_21
,
y0r_three_over_sqrt_21
,
y0r_one_over_sqrt_21
};
const
simde__m128i
y0r_over_s
[
8
]
=
{
y0r_seven_over_sqrt_21
,
y0r_five_over_sqrt_21
,
y0r_three_over_sqrt_21
,
y0r_one_over_sqrt_21
};
for
(
int
j
=
0
;
j
<
32
;
j
+=
8
){
for
(
int
j
=
0
;
j
<
32
;
j
+=
8
){
y0_s
[
j
+
0
]
=
simde_mm_adds_epi16
(
y0r_over_s
[
j
>>
3
],
y0i_seven_over_sqrt_21
);
y0_s
[
j
+
0
]
=
simde_mm_adds_epi16
(
y0r_over_s
[
j
>>
3
],
y0i_seven_over_sqrt_21
);
...
@@ -1582,10 +1582,10 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -1582,10 +1582,10 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
ch_mag_int_with_sigma2
=
simde_mm_srai_epi16
(
ch_mag_int
,
1
);
// *2
ch_mag_int_with_sigma2
=
simde_mm_srai_epi16
(
ch_mag_int
,
1
);
// *2
two_ch_mag_int_with_sigma2
=
ch_mag_int
;
// *4
two_ch_mag_int_with_sigma2
=
ch_mag_int
;
// *4
three_ch_mag_int_with_sigma2
=
simde_mm_adds_epi16
(
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
);
// *6
three_ch_mag_int_with_sigma2
=
simde_mm_adds_epi16
(
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
);
// *6
simde__m128i
a_r_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
a_r_s
[
64
];
simde__m128i
a_i_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
a_i_s
[
64
];
simde__m128i
psi_a_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
psi_a_s
[
64
];
simde__m128i
a_sq_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
a_sq_s
[
64
];
for
(
int
j
=
0
;
j
<
64
;
j
++
){
for
(
int
j
=
0
;
j
<
64
;
j
++
){
// Detection of interference term
// Detection of interference term
a_r_s
[
j
]
=
interference_abs_64qam_epi16
(
psi_r_s
[
j
],
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
,
three_ch_mag_int_with_sigma2
,
ONE_OVER_SQRT_2_42
,
THREE_OVER_SQRT_2_42
,
FIVE_OVER_SQRT_2_42
,
SEVEN_OVER_SQRT_2_42
);
a_r_s
[
j
]
=
interference_abs_64qam_epi16
(
psi_r_s
[
j
],
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
,
three_ch_mag_int_with_sigma2
,
ONE_OVER_SQRT_2_42
,
THREE_OVER_SQRT_2_42
,
FIVE_OVER_SQRT_2_42
,
SEVEN_OVER_SQRT_2_42
);
...
@@ -1634,7 +1634,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -1634,7 +1634,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
ch_mag_98_over_42_with_sigma2
=
simde_mm_slli_epi16
(
ch_mag_98_over_42_with_sigma2
,
2
);
ch_mag_98_over_42_with_sigma2
=
simde_mm_slli_epi16
(
ch_mag_98_over_42_with_sigma2
,
2
);
// Computing Metrics
// Computing Metrics
simde__m128i
bit_met_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m128i
bit_met_s
[
64
];
xmm1
=
simde_mm_adds_epi16
(
simde_mm_subs_epi16
(
psi_a_s
[
0
],
a_sq_s
[
0
]),
y0_s
[
0
]);
xmm1
=
simde_mm_adds_epi16
(
simde_mm_subs_epi16
(
psi_a_s
[
0
],
a_sq_s
[
0
]),
y0_s
[
0
]);
bit_met_s
[
0
]
=
simde_mm_subs_epi16
(
xmm1
,
ch_mag_98_over_42_with_sigma2
);
bit_met_s
[
0
]
=
simde_mm_subs_epi16
(
xmm1
,
ch_mag_98_over_42_with_sigma2
);
...
@@ -1997,7 +1997,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -1997,7 +1997,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
simde__m256i
rho_rmi
=
simde_mm256_subs_epi16
(
xmm2
,
xmm3
);
// rho* = Re(rho) - Im(rho)
simde__m256i
rho_rmi
=
simde_mm256_subs_epi16
(
xmm2
,
xmm3
);
// rho* = Re(rho) - Im(rho)
// Compute the different rhos
// Compute the different rhos
simde__m256i
rho_rs
[
32
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
rho_rs
[
32
];
rho_rs
[
27
]
=
simde_mm256_mulhi_epi16
(
rho_rpi
,
ONE_OVER_SQRT_42
);
rho_rs
[
27
]
=
simde_mm256_mulhi_epi16
(
rho_rpi
,
ONE_OVER_SQRT_42
);
rho_rs
[
28
]
=
simde_mm256_mulhi_epi16
(
rho_rmi
,
ONE_OVER_SQRT_42
);
rho_rs
[
28
]
=
simde_mm256_mulhi_epi16
(
rho_rmi
,
ONE_OVER_SQRT_42
);
rho_rs
[
18
]
=
simde_mm256_mulhi_epi16
(
rho_rpi
,
THREE_OVER_SQRT_42
);
rho_rs
[
18
]
=
simde_mm256_mulhi_epi16
(
rho_rpi
,
THREE_OVER_SQRT_42
);
...
@@ -2061,14 +2061,14 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -2061,14 +2061,14 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
xmm0
=
simde_mm256_broadcastw_epi16
(
simde_mm_set1_epi16
(
0
));
// ZERO for abs_pi16
xmm0
=
simde_mm256_broadcastw_epi16
(
simde_mm_set1_epi16
(
0
));
// ZERO for abs_pi16
xmm2
=
simde_mm256_subs_epi16
(
rho_rs
[
0
],
y1r
);
xmm2
=
simde_mm256_subs_epi16
(
rho_rs
[
0
],
y1r
);
simde__m256i
psi_r_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
psi_r_s
[
64
];
for
(
int
j
=
0
;
j
<
32
;
j
++
)
// psi_r_s[0~31], rho_rs[0~31]
for
(
int
j
=
0
;
j
<
32
;
j
++
)
// psi_r_s[0~31], rho_rs[0~31]
psi_r_s
[
j
]
=
simde_mm256_abs_epi16
(
simde_mm256_subs_epi16
(
rho_rs
[
j
],
y1r
));
psi_r_s
[
j
]
=
simde_mm256_abs_epi16
(
simde_mm256_subs_epi16
(
rho_rs
[
j
],
y1r
));
for
(
int
j
=
32
;
j
<
64
;
j
++
)
// psi_r_s[32~64], rho_rs[31~0]
for
(
int
j
=
32
;
j
<
64
;
j
++
)
// psi_r_s[32~64], rho_rs[31~0]
psi_r_s
[
j
]
=
simde_mm256_abs_epi16
(
simde_mm256_adds_epi16
(
rho_rs
[
63
-
j
],
y1r
));
psi_r_s
[
j
]
=
simde_mm256_abs_epi16
(
simde_mm256_adds_epi16
(
rho_rs
[
63
-
j
],
y1r
));
// simde__m256i psi_i calculation from rho_rpi or rho_rmi
// simde__m256i psi_i calculation from rho_rpi or rho_rmi
simde__m256i
psi_i_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
psi_i_s
[
64
];
const
uint8_t
rho_rs_index
[
32
]
=
{
7
,
15
,
23
,
31
,
24
,
16
,
8
,
0
,
6
,
14
,
22
,
30
,
25
,
17
,
9
,
1
,
5
,
13
,
21
,
29
,
26
,
18
,
10
,
2
,
4
,
12
,
20
,
28
,
27
,
19
,
11
,
3
};
const
uint8_t
rho_rs_index
[
32
]
=
{
7
,
15
,
23
,
31
,
24
,
16
,
8
,
0
,
6
,
14
,
22
,
30
,
25
,
17
,
9
,
1
,
5
,
13
,
21
,
29
,
26
,
18
,
10
,
2
,
4
,
12
,
20
,
28
,
27
,
19
,
11
,
3
};
for
(
int
k
=
0
;
k
<
32
;
k
+=
8
){
// psi_i_s[0~31]
for
(
int
k
=
0
;
k
<
32
;
k
+=
8
){
// psi_i_s[0~31]
for
(
int
j
=
k
;
j
<
k
+
4
;
j
++
)
for
(
int
j
=
k
;
j
<
k
+
4
;
j
++
)
...
@@ -2109,7 +2109,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -2109,7 +2109,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
y0i_seven_over_sqrt_21
=
simde_mm256_mulhi_epi16
(
y0i
,
SEVEN_OVER_SQRT_42
);
y0i_seven_over_sqrt_21
=
simde_mm256_mulhi_epi16
(
y0i
,
SEVEN_OVER_SQRT_42
);
y0i_seven_over_sqrt_21
=
simde_mm256_slli_epi16
(
y0i_seven_over_sqrt_21
,
2
);
// Q2.14
y0i_seven_over_sqrt_21
=
simde_mm256_slli_epi16
(
y0i_seven_over_sqrt_21
,
2
);
// Q2.14
simde__m256i
y0_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
y0_s
[
64
];
const
simde__m256i
y0r_over_s
[
8
]
=
{
y0r_seven_over_sqrt_21
,
y0r_five_over_sqrt_21
,
y0r_three_over_sqrt_21
,
y0r_one_over_sqrt_21
};
const
simde__m256i
y0r_over_s
[
8
]
=
{
y0r_seven_over_sqrt_21
,
y0r_five_over_sqrt_21
,
y0r_three_over_sqrt_21
,
y0r_one_over_sqrt_21
};
for
(
int
j
=
0
;
j
<
32
;
j
+=
8
){
for
(
int
j
=
0
;
j
<
32
;
j
+=
8
){
y0_s
[
j
+
0
]
=
simde_mm256_adds_epi16
(
y0r_over_s
[
j
>>
3
],
y0i_seven_over_sqrt_21
);
y0_s
[
j
+
0
]
=
simde_mm256_adds_epi16
(
y0r_over_s
[
j
>>
3
],
y0i_seven_over_sqrt_21
);
...
@@ -2125,10 +2125,10 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -2125,10 +2125,10 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
ch_mag_int_with_sigma2
=
simde_mm256_srai_epi16
(
ch_mag_int
,
1
);
// *2
ch_mag_int_with_sigma2
=
simde_mm256_srai_epi16
(
ch_mag_int
,
1
);
// *2
two_ch_mag_int_with_sigma2
=
ch_mag_int
;
// *4
two_ch_mag_int_with_sigma2
=
ch_mag_int
;
// *4
three_ch_mag_int_with_sigma2
=
simde_mm256_adds_epi16
(
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
);
// *6
three_ch_mag_int_with_sigma2
=
simde_mm256_adds_epi16
(
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
);
// *6
simde__m256i
a_r_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
a_r_s
[
64
];
simde__m256i
a_i_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
a_i_s
[
64
];
simde__m256i
psi_a_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
psi_a_s
[
64
];
simde__m256i
a_sq_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
a_sq_s
[
64
];
for
(
int
j
=
0
;
j
<
64
;
j
++
){
for
(
int
j
=
0
;
j
<
64
;
j
++
){
// Detection of interference term
// Detection of interference term
a_r_s
[
j
]
=
interference_abs_64qam_epi16_256
(
psi_r_s
[
j
],
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
,
three_ch_mag_int_with_sigma2
,
ONE_OVER_SQRT_2_42
,
THREE_OVER_SQRT_2_42
,
FIVE_OVER_SQRT_2_42
,
SEVEN_OVER_SQRT_2_42
);
a_r_s
[
j
]
=
interference_abs_64qam_epi16_256
(
psi_r_s
[
j
],
ch_mag_int_with_sigma2
,
two_ch_mag_int_with_sigma2
,
three_ch_mag_int_with_sigma2
,
ONE_OVER_SQRT_2_42
,
THREE_OVER_SQRT_2_42
,
FIVE_OVER_SQRT_2_42
,
SEVEN_OVER_SQRT_2_42
);
...
@@ -2177,7 +2177,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -2177,7 +2177,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
ch_mag_98_over_42_with_sigma2
=
simde_mm256_slli_epi16
(
ch_mag_98_over_42_with_sigma2
,
2
);
ch_mag_98_over_42_with_sigma2
=
simde_mm256_slli_epi16
(
ch_mag_98_over_42_with_sigma2
,
2
);
// Computing Metrics
// Computing Metrics
simde__m256i
bit_met_s
[
64
]
__attribute__
((
aligned
(
16
)))
;
simde__m256i
bit_met_s
[
64
];
xmm1
=
simde_mm256_adds_epi16
(
simde_mm256_subs_epi16
(
psi_a_s
[
0
],
a_sq_s
[
0
]),
y0_s
[
0
]);
xmm1
=
simde_mm256_adds_epi16
(
simde_mm256_subs_epi16
(
psi_a_s
[
0
],
a_sq_s
[
0
]),
y0_s
[
0
]);
bit_met_s
[
0
]
=
simde_mm256_subs_epi16
(
xmm1
,
ch_mag_98_over_42_with_sigma2
);
bit_met_s
[
0
]
=
simde_mm256_subs_epi16
(
xmm1
,
ch_mag_98_over_42_with_sigma2
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment