Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zzha zzha
OpenXG-RAN
Commits
e241edf2
Commit
e241edf2
authored
Jul 13, 2023
by
Quency
Committed by
Quency Lin
Oct 03, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat / SISO: Use 128/256 SIMDe for ARM64/x86
parent
b4647988
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
164 additions
and
5 deletions
+164
-5
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+164
-5
No files found.
openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
View file @
e241edf2
...
@@ -66,6 +66,36 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
...
@@ -66,6 +66,36 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
uint32_t
nb_re
,
uint32_t
nb_re
,
uint8_t
symbol
)
uint8_t
symbol
)
{
{
#ifdef USE_128BIT
simde__m128i
*
rxF
=
(
simde__m128i
*
)
rxdataF_comp
;
simde__m128i
*
ch_mag
;
simde__m128i
llr128
[
2
];
register
simde__m128i
xmm0
;
simde__m64
*
llr64
=
(
simde__m64
*
)
ulsch_llr
;
int
i
;
int
nb_rb
=
nb_re
/
NR_NB_SC_PER_RB
;
int
off
=
((
nb_rb
&
1
)
==
1
)
?
4
:
0
;
ch_mag
=
(
simde__m128i
*
)
&
ul_ch_mag
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
nb_re
>>=
2
;
// length in quad words (4 REs)
nb_re
+=
((
nb_re
&
3
)
==
0
?
0
:
1
);
// Each iteration does 4 RE (gives 16 16bit-llrs)
for
(
i
=
0
;
i
<
nb_re
;
i
++
)
{
xmm0
=
simde_mm_abs_epi16
(
rxF
[
i
]);
// registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0
=
simde_mm_subs_epi16
(
ch_mag
[
i
],
xmm0
);
// registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
llr128
[
0
]
=
simde_mm_unpacklo_epi32
(
rxF
[
i
],
xmm0
);
// llr128[0] contains the llrs of the 1st,2nd,5th and 6th REs
llr128
[
1
]
=
simde_mm_unpackhi_epi32
(
rxF
[
i
],
xmm0
);
// llr128[1] contains the llrs of the 3rd, 4th, 7th and 8th REs
llr64
[
0
]
=
(
simde__m64
)
simde_mm_extract_epi64
(
llr128
[
0
],
0
);
// llr32[0] low 16 bits-> y_R, high 16 bits-> y_I
llr64
[
1
]
=
(
simde__m64
)
simde_mm_extract_epi64
(
llr128
[
0
],
1
);
// llr32[2] low 16 bits-> y_R, high 16 bits-> y_I
llr64
[
2
]
=
(
simde__m64
)
simde_mm_extract_epi64
(
llr128
[
1
],
0
);
// llr32[4] low 16 bits-> y_R, high 16 bits-> y_I
llr64
[
3
]
=
(
simde__m64
)
simde_mm_extract_epi64
(
llr128
[
1
],
1
);
// llr32[6] low 16 bits-> y_R, high 16 bits-> y_I
llr64
+=
4
;
}
#else
simde__m256i
*
rxF_256
=
(
simde__m256i
*
)
rxdataF_comp
;
simde__m256i
*
rxF_256
=
(
simde__m256i
*
)
rxdataF_comp
;
simde__m256i
*
ch_mag
=
(
simde__m256i
*
)
ul_ch_mag
;
simde__m256i
*
ch_mag
=
(
simde__m256i
*
)
ul_ch_mag
;
int64_t
*
llr_64
=
(
int64_t
*
)
ulsch_llr
;
int64_t
*
llr_64
=
(
int64_t
*
)
ulsch_llr
;
...
@@ -91,6 +121,8 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
...
@@ -91,6 +121,8 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
*
llr_64
++
=
simde_mm256_extract_epi64
(
xmm2
,
2
);
*
llr_64
++
=
simde_mm256_extract_epi64
(
xmm2
,
2
);
*
llr_64
++
=
simde_mm256_extract_epi64
(
xmm2
,
3
);
*
llr_64
++
=
simde_mm256_extract_epi64
(
xmm2
,
3
);
}
}
#endif
simde_mm_empty
();
}
}
//----------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------
...
@@ -104,6 +136,38 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -104,6 +136,38 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
uint32_t
nb_re
,
uint32_t
nb_re
,
uint8_t
symbol
)
uint8_t
symbol
)
{
{
#ifdef USE_128BIT
int
nb_rb
=
nb_re
/
NR_NB_SC_PER_RB
;
int
off
=
((
nb_rb
&
1
)
==
1
)
?
4
:
0
;
simde__m128i
*
rxF
=
(
simde__m128i
*
)
rxdataF_comp
;
simde__m128i
*
ch_mag
,
*
ch_magb
;
register
simde__m128i
xmm0
,
xmm1
,
xmm2
;
int
i
;
ch_mag
=
(
simde__m128i
*
)
&
ul_ch_mag
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
ch_magb
=
(
simde__m128i
*
)
&
ul_ch_magb
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
nb_re
=
nb_re
>>
2
;
// length in 128-bit words (4 REs)
nb_re
+=
((
nb_re
&
3
)
==
0
?
0
:
1
);
simde__m64
*
llr64
=
(
simde__m64
*
)
ulsch_llr
;
// Each iteration does 4 RE (gives 24 16bit-llrs)
for
(
i
=
0
;
i
<
nb_re
;
i
++
)
{
xmm0
=
rxF
[
i
];
xmm1
=
simde_mm_abs_epi16
(
xmm0
);
xmm1
=
simde_mm_subs_epi16
(
ch_mag
[
i
],
xmm1
);
xmm2
=
simde_mm_abs_epi16
(
xmm1
);
xmm2
=
simde_mm_subs_epi16
(
ch_magb
[
i
],
xmm2
);
llr64
[
0
]
=
simde_mm_set_pi32
(
simde_mm_extract_epi32
(
xmm1
,
0
),
simde_mm_extract_epi32
(
xmm0
,
0
));
llr64
[
1
]
=
simde_mm_set_pi32
(
simde_mm_extract_epi32
(
xmm0
,
1
),
simde_mm_extract_epi32
(
xmm2
,
0
));
llr64
[
2
]
=
simde_mm_set_pi32
(
simde_mm_extract_epi32
(
xmm2
,
1
),
simde_mm_extract_epi32
(
xmm1
,
1
));
llr64
[
3
]
=
simde_mm_set_pi32
(
simde_mm_extract_epi32
(
xmm1
,
2
),
simde_mm_extract_epi32
(
xmm0
,
2
));
llr64
[
4
]
=
simde_mm_set_pi32
(
simde_mm_extract_epi32
(
xmm0
,
3
),
simde_mm_extract_epi32
(
xmm2
,
2
));
llr64
[
5
]
=
simde_mm_set_pi32
(
simde_mm_extract_epi32
(
xmm2
,
3
),
simde_mm_extract_epi32
(
xmm1
,
3
));
llr64
+=
6
;
}
#else
simde__m256i
*
rxF
=
(
simde__m256i
*
)
rxdataF_comp
;
simde__m256i
*
rxF
=
(
simde__m256i
*
)
rxdataF_comp
;
simde__m256i
xmm0
,
xmm1
,
xmm2
;
simde__m256i
xmm0
,
xmm1
,
xmm2
;
...
@@ -154,6 +218,8 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
...
@@ -154,6 +218,8 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
*
llr_32
++
=
simde_mm256_extract_epi32
(
xmm1
,
7
);
*
llr_32
++
=
simde_mm256_extract_epi32
(
xmm1
,
7
);
*
llr_32
++
=
simde_mm256_extract_epi32
(
xmm2
,
7
);
*
llr_32
++
=
simde_mm256_extract_epi32
(
xmm2
,
7
);
}
}
#endif
simde_mm_empty
();
}
}
void
nr_ulsch_256qam_llr
(
int32_t
*
rxdataF_comp
,
void
nr_ulsch_256qam_llr
(
int32_t
*
rxdataF_comp
,
...
@@ -164,6 +230,70 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
...
@@ -164,6 +230,70 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
uint32_t
nb_re
,
uint32_t
nb_re
,
uint8_t
symbol
)
uint8_t
symbol
)
{
{
#ifdef USE_128BIT
int
off
=
((
nb_rb
&
1
)
==
1
)
?
4
:
0
;
simde__m128i
*
rxF
=
(
simde__m128i
*
)
rxdataF_comp
;
simde__m128i
*
ch_mag
,
*
ch_magb
,
*
ch_magc
;
register
simde__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
;
simde__m128i
*
llr128
=
(
simde__m128i
*
)
ulsch_llr
;
ch_mag
=
(
simde__m128i
*
)
&
ul_ch_mag
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
ch_magb
=
(
simde__m128i
*
)
&
ul_ch_magb
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
ch_magc
=
(
simde__m128i
*
)
&
ul_ch_magc
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
int
len_mod4
=
nb_re
&
3
;
int
nb_re128
=
nb_re
>>
2
;
// length in 128-bit words (4 REs)
for
(
int
i
=
0
;
i
<
nb_re128
;
i
++
)
{
xmm0
=
simde_mm_abs_epi16
(
rxF
[
i
]);
// registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0
=
simde_mm_subs_epi16
(
ch_mag
[
i
],
xmm0
);
// registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
xmm1
=
simde_mm_abs_epi16
(
xmm0
);
xmm1
=
simde_mm_subs_epi16
(
ch_magb
[
i
],
xmm1
);
// contains 8 LLRs
xmm2
=
simde_mm_abs_epi16
(
xmm1
);
xmm2
=
simde_mm_subs_epi16
(
ch_magc
[
i
],
xmm2
);
// contains 8 LLRs
// rxF[i] A0 A1 A2 A3
// xmm0 B0 B1 B2 B3
// xmm1 C0 C1 C2 C3
// xmm2 D0 D1 D2 D3
xmm3
=
simde_mm_unpacklo_epi32
(
rxF
[
i
],
xmm0
);
// A0 B0 A1 B1
xmm4
=
simde_mm_unpackhi_epi32
(
rxF
[
i
],
xmm0
);
// A2 B2 A3 B3
xmm5
=
simde_mm_unpacklo_epi32
(
xmm1
,
xmm2
);
// C0 D0 C1 D1
xmm6
=
simde_mm_unpackhi_epi32
(
xmm1
,
xmm2
);
// C2 D2 C3 D3
llr128
[
0
]
=
simde_mm_unpacklo_epi64
(
xmm3
,
xmm5
);
// A0 B0 C0 D0
llr128
[
1
]
=
simde_mm_unpackhi_epi64
(
xmm3
,
xmm5
);
// A1 B1 C1 D1
llr128
[
2
]
=
simde_mm_unpacklo_epi64
(
xmm4
,
xmm6
);
// A2 B2 C2 D2
llr128
[
3
]
=
simde_mm_unpackhi_epi64
(
xmm4
,
xmm6
);
// A3 B3 C3 D3
llr128
+=
4
;
}
if
(
len_mod4
)
{
printf
(
"len_mod4=%d
\n
"
,
len_mod4
);
int
nb_re64
=
nb_re
>>
1
;
simde__m64
*
llr64
=
(
simde__m64
*
)
llr128
;
simde__m64
xmm0
,
xmm1
,
xmm2
;
simde__m64
*
rxF
=
(
simde__m64
*
)
rxdataF_comp
;
simde__m64
*
ch_mag
=
(
simde__m64
*
)
&
ul_ch_mag
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
simde__m64
*
ch_magb
=
(
simde__m64
*
)
&
ul_ch_magb
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
simde__m64
*
ch_magc
=
(
simde__m64
*
)
&
ul_ch_magc
[(
symbol
*
(
off
+
(
nb_rb
*
12
)))];
xmm0
=
simde_mm_abs_pi16
(
rxF
[
nb_re64
-
1
]);
// registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
xmm0
=
simde_mm_subs_pi16
(
ch_mag
[
nb_re
-
1
],
xmm0
);
// registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
// xmmtmpD2 contains 4 LLRs
xmm1
=
simde_mm_abs_pi16
(
xmm0
);
xmm1
=
simde_mm_subs_pi16
(
ch_magb
[
nb_re64
-
1
],
xmm1
);
// contains 4 LLRs
xmm2
=
simde_mm_abs_pi16
(
xmm1
);
xmm2
=
simde_mm_subs_pi16
(
ch_magc
[
nb_re64
-
1
],
xmm2
);
// contains 4 LLRs
// rxF[i] A0 A1
// xmm0 B0 B1
// xmm1 C0 C1
// xmm2 D0 D1
llr64
[
0
]
=
simde_m_punpckldq
(
rxF
[
nb_re64
-
1
],
xmm0
);
// A0 B0
llr64
[
2
]
=
simde_m_punpckhdq
(
rxF
[
nb_re64
-
1
],
xmm0
);
// A1 B1
llr64
[
1
]
=
simde_m_punpckldq
(
xmm1
,
xmm2
);
// C0 D0
llr64
[
3
]
=
simde_m_punpckhdq
(
xmm1
,
xmm2
);
// C1 D1
}
#else
simde__m256i
*
rxF
=
(
simde__m256i
*
)
rxdataF_comp
;
simde__m256i
*
rxF
=
(
simde__m256i
*
)
rxdataF_comp
;
simde__m256i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
;
simde__m256i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
;
simde__m256i
*
llr256
=
(
simde__m256i
*
)
ulsch_llr
;
simde__m256i
*
llr256
=
(
simde__m256i
*
)
ulsch_llr
;
...
@@ -198,6 +328,8 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
...
@@ -198,6 +328,8 @@ void nr_ulsch_256qam_llr(int32_t *rxdataF_comp,
*
llr256
++
=
simde_mm256_permute2x128_si256
(
xmm0
,
xmm1
,
0x31
);
// A4 B4 C4 D4 A5 B5 C5 D5
*
llr256
++
=
simde_mm256_permute2x128_si256
(
xmm0
,
xmm1
,
0x31
);
// A4 B4 C4 D4 A5 B5 C5 D5
*
llr256
++
=
simde_mm256_permute2x128_si256
(
xmm2
,
xmm3
,
0x31
);
// A6 B6 C6 D6 A7 B7 C7 D7
*
llr256
++
=
simde_mm256_permute2x128_si256
(
xmm2
,
xmm3
,
0x31
);
// A6 B6 C6 D6 A7 B7 C7 D7
}
}
#endif
simde_mm_empty
();
}
}
void
nr_ulsch_compute_llr
(
int32_t
*
rxdataF_comp
,
void
nr_ulsch_compute_llr
(
int32_t
*
rxdataF_comp
,
...
@@ -568,9 +700,7 @@ void nr_ulsch_qpsk_qpsk(c16_t *stream0_in, c16_t *stream1_in, c16_t *stream0_out
...
@@ -568,9 +700,7 @@ void nr_ulsch_qpsk_qpsk(c16_t *stream0_in, c16_t *stream1_in, c16_t *stream0_out
}
}
}
}
#endif
#endif
simde_mm_empty
();
simde_mm_empty
();
simde_m_empty
();
}
}
...
@@ -1875,7 +2005,6 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
...
@@ -1875,7 +2005,6 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in,
}
}
#endif
#endif
simde_mm_empty
();
simde_mm_empty
();
simde_m_empty
();
}
}
/*
/*
...
@@ -6664,9 +6793,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
...
@@ -6664,9 +6793,7 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in,
}
}
}
}
#endif
#endif
simde_mm_empty
();
simde_mm_empty
();
simde_m_empty
();
}
}
static
void
nr_ulsch_shift_llr
(
int16_t
**
llr_layers
,
uint32_t
nb_re
,
uint32_t
rxdataF_ext_offset
,
uint8_t
mod_order
,
int
shift
)
static
void
nr_ulsch_shift_llr
(
int16_t
**
llr_layers
,
uint32_t
nb_re
,
uint32_t
rxdataF_ext_offset
,
uint8_t
mod_order
,
int
shift
)
...
@@ -6692,3 +6819,35 @@ static void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rx
...
@@ -6692,3 +6819,35 @@ static void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rx
llr_layers1
[
i
]
=
simde_mm_srai_epi16
(
llr_layers1
[
i
],
shift
);
llr_layers1
[
i
]
=
simde_mm_srai_epi16
(
llr_layers1
[
i
],
shift
);
}
}
}
}
void
nr_ulsch_compute_ML_llr
(
NR_gNB_PUSCH
*
pusch_vars
,
uint32_t
symbol
,
c16_t
*
rxdataF_comp0
,
c16_t
*
rxdataF_comp1
,
c16_t
*
ul_ch_mag0
,
c16_t
*
ul_ch_mag1
,
c16_t
*
llr_layers0
,
c16_t
*
llr_layers1
,
c16_t
*
rho0
,
c16_t
*
rho1
,
uint32_t
nb_re
,
uint8_t
mod_order
)
{
switch
(
mod_order
)
{
case
2
:
nr_ulsch_qpsk_qpsk
(
rxdataF_comp0
,
rxdataF_comp1
,
llr_layers0
,
rho0
,
nb_re
);
nr_ulsch_qpsk_qpsk
(
rxdataF_comp1
,
rxdataF_comp0
,
llr_layers1
,
rho1
,
nb_re
);
nr_ulsch_shift_llr
(
pusch_vars
->
llr_layers
,
nb_re
,
pusch_vars
->
llr_offset
[
symbol
]
>>
1
,
2
,
4
);
break
;
case
4
:
nr_ulsch_qam16_qam16
(
rxdataF_comp0
,
rxdataF_comp1
,
ul_ch_mag0
,
ul_ch_mag1
,
llr_layers0
,
rho0
,
nb_re
);
nr_ulsch_qam16_qam16
(
rxdataF_comp1
,
rxdataF_comp0
,
ul_ch_mag1
,
ul_ch_mag0
,
llr_layers1
,
rho1
,
nb_re
);
break
;
case
6
:
nr_ulsch_qam64_qam64
(
rxdataF_comp0
,
rxdataF_comp1
,
ul_ch_mag0
,
ul_ch_mag1
,
llr_layers0
,
rho0
,
nb_re
);
nr_ulsch_qam64_qam64
(
rxdataF_comp1
,
rxdataF_comp0
,
ul_ch_mag1
,
ul_ch_mag0
,
llr_layers1
,
rho1
,
nb_re
);
break
;
default:
AssertFatal
(
1
==
0
,
"nr_ulsch_compute_llr: invalid Qm value, Qm = %d
\n
"
,
mod_order
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment