Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
spbro
OpenXG-RAN
Commits
f5dccc2a
Commit
f5dccc2a
authored
Jun 29, 2023
by
rmagueta
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Improvements in nr_dlsch_channel_compensation() function
parent
af498835
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
157 additions
and
230 deletions
+157
-230
openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
+157
-230
No files found.
openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
View file @
f5dccc2a
...
...
@@ -738,6 +738,20 @@ void nr_dlsch_deinterleaving(uint8_t symbol,
// Pre-processing for LLR computation
//==============================================================================================
simde__m128i
nr_dlsch_a_mult_conjb
(
simde__m128i
a
,
simde__m128i
b
,
unsigned
char
output_shift
)
{
simde__m128i
mmtmpD0
=
simde_mm_madd_epi16
(
b
,
a
);
simde__m128i
mmtmpD1
=
simde_mm_shufflelo_epi16
(
b
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
&
conjugate
[
0
]);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
a
);
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
simde__m128i
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
simde__m128i
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
return
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
}
static
void
nr_dlsch_channel_compensation
(
uint32_t
rx_size_symbol
,
int
nbRx
,
c16_t
rxdataF_ext
[][
rx_size_symbol
],
...
...
@@ -757,21 +771,18 @@ static void nr_dlsch_channel_compensation(uint32_t rx_size_symbol,
unsigned
char
output_shift
,
PHY_NR_MEASUREMENTS
*
measurements
)
{
unsigned
short
rb
;
unsigned
char
aarx
,
atx
;
simde__m128i
*
dl_ch128
,
*
dl_ch128_2
,
*
dl_ch_mag128
,
*
dl_ch_mag128b
,
*
dl_ch_mag128r
,
*
rxdataF128
,
*
rxdataF_comp128
,
*
rho128
;
simde__m128i
mmtmpD0
,
mmtmpD1
,
mmtmpD2
,
mmtmpD3
,
QAM_amp128
=
{
0
},
QAM_amp128b
=
{
0
},
QAM_amp128r
=
{
0
};
simde__m128i
*
dl_ch128
,
*
dl_ch128_2
,
*
dl_ch_mag128
,
*
dl_ch_mag128b
,
*
dl_ch_mag128r
,
*
rxdataF128
,
*
rxdataF_comp128
,
*
rho128
;
simde__m128i
mmtmpD0
,
mmtmpD1
,
mmtmpD2
,
mmtmpD3
,
QAM_amp128
=
{
0
},
QAM_amp128b
=
{
0
},
QAM_amp128r
=
{
0
};
uint32_t
nb_rb_0
=
length
/
12
+
((
length
%
12
)
?
1
:
0
);
for
(
int
l
=
0
;
l
<
n_layers
;
l
++
)
{
if
(
mod_order
==
4
)
{
QAM_amp128
=
simde_mm_set1_epi16
(
QAM16_n1
);
// 2/sqrt(10)
QAM_amp128
=
simde_mm_set1_epi16
(
QAM16_n1
);
// 2/sqrt(10)
QAM_amp128b
=
simde_mm_setzero_si128
();
QAM_amp128r
=
simde_mm_setzero_si128
();
}
else
if
(
mod_order
==
6
)
{
QAM_amp128
=
simde_mm_set1_epi16
(
QAM64_n1
);
//
QAM_amp128
=
simde_mm_set1_epi16
(
QAM64_n1
);
//
QAM_amp128b
=
simde_mm_set1_epi16
(
QAM64_n2
);
QAM_amp128r
=
simde_mm_setzero_si128
();
}
else
if
(
mod_order
==
8
)
{
...
...
@@ -782,257 +793,173 @@ static void nr_dlsch_channel_compensation(uint32_t rx_size_symbol,
// printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
for
(
aarx
=
0
;
aarx
<
frame_parms
->
nb_antennas_rx
;
aarx
++
)
{
dl_ch128
=
(
simde__m128i
*
)
dl_ch_estimates_ext
[(
l
*
frame_parms
->
nb_antennas_rx
)
+
aarx
];
for
(
int
aarx
=
0
;
aarx
<
frame_parms
->
nb_antennas_rx
;
aarx
++
)
{
dl_ch128
=
(
simde__m128i
*
)
dl_ch_estimates_ext
[(
l
*
frame_parms
->
nb_antennas_rx
)
+
aarx
];
dl_ch_mag128
=
(
simde__m128i
*
)
dl_ch_mag
[
l
][
aarx
];
dl_ch_mag128b
=
(
simde__m128i
*
)
dl_ch_magb
[
l
][
aarx
];
dl_ch_mag128r
=
(
simde__m128i
*
)
dl_ch_magr
[
l
][
aarx
];
rxdataF128
=
(
simde__m128i
*
)
rxdataF_ext
[
aarx
];
rxdataF128
=
(
simde__m128i
*
)
rxdataF_ext
[
aarx
];
rxdataF_comp128
=
(
simde__m128i
*
)(
rxdataF_comp
[
l
][
aarx
]
+
symbol
*
nb_rb
*
12
);
for
(
rb
=
0
;
rb
<
nb_rb_0
;
rb
++
)
{
if
(
mod_order
>
2
)
{
for
(
int
rb
=
0
;
rb
<
nb_rb_0
;
rb
++
)
{
if
(
mod_order
>
2
)
{
// get channel amplitude if not QPSK
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
0
],
dl_ch128
[
0
]);
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_madd_epi16
(
dl_ch128
[
1
],
dl_ch128
[
1
]);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD0
=
simde_mm_packs_epi32
(
mmtmpD0
,
mmtmpD1
);
//|H[0]|^2 |H[1]|^2 |H[2]|^2 |H[3]|^2 |H[4]|^2 |H[5]|^2 |H[6]|^2 |H[7]|^2
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
0
],
dl_ch128
[
0
]);
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_madd_epi16
(
dl_ch128
[
1
],
dl_ch128
[
1
]);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD0
=
simde_mm_packs_epi32
(
mmtmpD0
,
mmtmpD1
);
//|H[0]|^2 |H[1]|^2 |H[2]|^2 |H[3]|^2 |H[4]|^2 |H[5]|^2 |H[6]|^2 |H[7]|^2
// store channel magnitude here in a new field of dlsch
dl_ch_mag128
[
0
]
=
simde_mm_unpacklo_epi16
(
mmtmpD0
,
mmtmpD0
);
dl_ch_mag128
[
0
]
=
simde_mm_unpacklo_epi16
(
mmtmpD0
,
mmtmpD0
);
dl_ch_mag128b
[
0
]
=
dl_ch_mag128
[
0
];
dl_ch_mag128r
[
0
]
=
dl_ch_mag128
[
0
];
dl_ch_mag128
[
0
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128
[
0
],
QAM_amp128
);
dl_ch_mag128
[
0
]
=
simde_mm_slli_epi16
(
dl_ch_mag128
[
0
],
1
);
dl_ch_mag128b
[
0
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128b
[
0
],
QAM_amp128b
);
dl_ch_mag128b
[
0
]
=
simde_mm_slli_epi16
(
dl_ch_mag128b
[
0
],
1
);
dl_ch_mag128
[
0
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128
[
0
],
QAM_amp128
);
dl_ch_mag128b
[
0
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128b
[
0
],
QAM_amp128b
);
dl_ch_mag128r
[
0
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128r
[
0
],
QAM_amp128r
);
dl_ch_mag128r
[
0
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128r
[
0
],
QAM_amp128r
);
dl_ch_mag128r
[
0
]
=
simde_mm_slli_epi16
(
dl_ch_mag128r
[
0
],
1
);
//print_ints("Re(ch):",(int16_t*)&mmtmpD0);
//print_shorts("QAM_amp:",(int16_t*)&QAM_amp128);
//print_shorts("mag:",(int16_t*)&dl_ch_mag128[0]);
dl_ch_mag128
[
1
]
=
simde_mm_unpackhi_epi16
(
mmtmpD0
,
mmtmpD0
);
dl_ch_mag128
[
1
]
=
simde_mm_unpackhi_epi16
(
mmtmpD0
,
mmtmpD0
);
dl_ch_mag128b
[
1
]
=
dl_ch_mag128
[
1
];
dl_ch_mag128r
[
1
]
=
dl_ch_mag128
[
1
];
dl_ch_mag128
[
1
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128
[
1
],
QAM_amp128
);
dl_ch_mag128
[
1
]
=
simde_mm_slli_epi16
(
dl_ch_mag128
[
1
],
1
);
dl_ch_mag128b
[
1
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128b
[
1
],
QAM_amp128b
);
dl_ch_mag128b
[
1
]
=
simde_mm_slli_epi16
(
dl_ch_mag128b
[
1
],
1
);
dl_ch_mag128
[
1
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128
[
1
],
QAM_amp128
);
dl_ch_mag128b
[
1
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128b
[
1
],
QAM_amp128b
);
dl_ch_mag128r
[
1
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128r
[
1
],
QAM_amp128r
);
dl_ch_mag128r
[
1
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128r
[
1
],
QAM_amp128r
);
dl_ch_mag128r
[
1
]
=
simde_mm_slli_epi16
(
dl_ch_mag128r
[
1
],
1
);
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
2
],
dl_ch128
[
2
]);
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_packs_epi32
(
mmtmpD0
,
mmtmpD0
);
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
2
],
dl_ch128
[
2
]);
//[H_I(0)^2+H_Q(0)^2 H_I(1)^2+H_Q(1)^2 H_I(2)^2+H_Q(2)^2 H_I(3)^2+H_Q(3)^2]
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_packs_epi32
(
mmtmpD0
,
mmtmpD0
);
//[|H(0)|^2 |H(1)|^2 |H(2)|^2 |H(3)|^2 |H(0)|^2 |H(1)|^2 |H(2)|^2 |H(3)|^2]
dl_ch_mag128
[
2
]
=
simde_mm_unpacklo_epi16
(
mmtmpD1
,
mmtmpD1
);
//[|H(0)|^2 |H(0)|^2 |H(1)|^2 |H(1)|^2 |H(2)|^2 |H(2)|^2 |H(3)|^2 |H(3)|^2]
dl_ch_mag128
[
2
]
=
simde_mm_unpacklo_epi16
(
mmtmpD1
,
mmtmpD1
);
dl_ch_mag128b
[
2
]
=
dl_ch_mag128
[
2
];
dl_ch_mag128r
[
2
]
=
dl_ch_mag128
[
2
];
dl_ch_mag128
[
2
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128
[
2
],
QAM_amp128
);
dl_ch_mag128
[
2
]
=
simde_mm_slli_epi16
(
dl_ch_mag128
[
2
],
1
);
dl_ch_mag128b
[
2
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128b
[
2
],
QAM_amp128b
);
dl_ch_mag128b
[
2
]
=
simde_mm_slli_epi16
(
dl_ch_mag128b
[
2
],
1
);
dl_ch_mag128r
[
2
]
=
simde_mm_mulhi_epi16
(
dl_ch_mag128r
[
2
],
QAM_amp128r
);
dl_ch_mag128r
[
2
]
=
simde_mm_slli_epi16
(
dl_ch_mag128r
[
2
],
1
);
dl_ch_mag128
[
2
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128
[
2
],
QAM_amp128
);
dl_ch_mag128b
[
2
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128b
[
2
],
QAM_amp128b
);
dl_ch_mag128r
[
2
]
=
simde_mm_mulhrs_epi16
(
dl_ch_mag128r
[
2
],
QAM_amp128r
);
}
// multiply by conjugated channel
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
0
],
rxdataF128
[
0
]);
// print_ints("re",&mmtmpD0);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
0
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
&
conjugate
[
0
]);
// print_ints("im",&mmtmpD1);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
rxdataF128
[
0
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
// print_ints("re(shift)",&mmtmpD0);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
// print_ints("im(shift)",&mmtmpD1);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
// print_ints("c0",&mmtmpD2);
// print_ints("c1",&mmtmpD3);
rxdataF_comp128
[
0
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
#ifdef DEBUG_DLSCH_DEMOD
printf
(
"%%arx%d atx%d rb_index %d symbol %d shift %d
\n
"
,
aarx
,
l
,
rb
,
symbol
,
output_shift
);
printf
(
"rx_%d(%d,:)"
,
aarx
+
1
,
rb
+
1
);
print_shorts
(
" "
,(
int16_t
*
)
&
rxdataF128
[
0
]);
printf
(
"ch_%d%d(%d,:)"
,
aarx
+
1
,
l
+
1
,
rb
+
1
);
print_shorts
(
" "
,(
int16_t
*
)
&
dl_ch128
[
0
]);
printf
(
"rx_comp_%d%d(%d,:)"
,
aarx
+
1
,
l
+
1
,
rb
+
1
);
print_shorts
(
" "
,(
int16_t
*
)
&
rxdataF_comp128
[
0
]);
#endif
// multiply by conjugated channel
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
1
],
rxdataF128
[
1
]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
1
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
conjugate
);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
rxdataF128
[
1
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
rxdataF_comp128
[
1
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
#ifdef DEBUG_DLSCH_DEMOD
print_shorts
(
"rx:"
,(
int16_t
*
)
&
rxdataF128
[
1
]);
print_shorts
(
"ch:"
,(
int16_t
*
)
&
dl_ch128
[
1
]);
print_shorts
(
"pack:"
,(
int16_t
*
)
&
rxdataF_comp128
[
1
]);
#endif
// multiply by conjugated channel
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
2
],
rxdataF128
[
2
]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
2
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
conjugate
);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
rxdataF128
[
2
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
rxdataF_comp128
[
2
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
#ifdef DEBUG_DLSCH_DEMOD
print_shorts
(
"rx:"
,(
int16_t
*
)
&
rxdataF128
[
2
]);
print_shorts
(
"ch:"
,(
int16_t
*
)
&
dl_ch128
[
2
]);
print_shorts
(
"pack:"
,(
int16_t
*
)
&
rxdataF_comp128
[
2
]);
#endif
dl_ch128
+=
3
;
dl_ch_mag128
+=
3
;
dl_ch_mag128b
+=
3
;
dl_ch_mag128r
+=
3
;
rxdataF128
+=
3
;
rxdataF_comp128
+=
3
;
// Multiply received data by conjugated channel
rxdataF_comp128
[
0
]
=
nr_dlsch_a_mult_conjb
(
rxdataF128
[
0
],
dl_ch128
[
0
],
output_shift
);
rxdataF_comp128
[
1
]
=
nr_dlsch_a_mult_conjb
(
rxdataF128
[
1
],
dl_ch128
[
1
],
output_shift
);
rxdataF_comp128
[
2
]
=
nr_dlsch_a_mult_conjb
(
rxdataF128
[
2
],
dl_ch128
[
2
],
output_shift
);
dl_ch128
+=
3
;
dl_ch_mag128
+=
3
;
dl_ch_mag128b
+=
3
;
dl_ch_mag128r
+=
3
;
rxdataF128
+=
3
;
rxdataF_comp128
+=
3
;
}
}
}
if
(
rho
)
{
//we compute the Tx correlation matrix for each Rx antenna
//As an example the 2x2 MIMO case requires
//rho[aarx][nl*nl] = [cov(H_aarx_0,H_aarx_0) cov(H_aarx_0,H_aarx_1)
// cov(H_aarx_1,H_aarx_0) cov(H_aarx_1,H_aarx_1)], aarx=0,...,nb_antennas_rx-1
//int avg_rho_re[frame_parms->nb_antennas_rx][nl*nl];
//int avg_rho_im[frame_parms->nb_antennas_rx][nl*nl];
if
(
rho
)
{
// we compute the Tx correlation matrix for each Rx antenna
// As an example the 2x2 MIMO case requires
// rho[aarx][nl*nl] = [cov(H_aarx_0,H_aarx_0) cov(H_aarx_0,H_aarx_1)
// cov(H_aarx_1,H_aarx_0) cov(H_aarx_1,H_aarx_1)], aarx=0,...,nb_antennas_rx-1
for
(
aarx
=
0
;
aarx
<
frame_parms
->
nb_antennas_rx
;
aarx
++
)
{
// int avg_rho_re[frame_parms->nb_antennas_rx][nl*nl];
// int avg_rho_im[frame_parms->nb_antennas_rx][nl*nl];
for
(
int
aarx
=
0
;
aarx
<
frame_parms
->
nb_antennas_rx
;
aarx
++
)
{
for
(
int
l
=
0
;
l
<
n_layers
;
l
++
)
{
for
(
atx
=
0
;
atx
<
n_layers
;
atx
++
)
{
//avg_rho_re[aarx][l*n_layers+atx] = 0;
//avg_rho_im[aarx][l*n_layers+atx] = 0;
rho128
=
(
simde__m128i
*
)
&
rho
[
aarx
][
l
*
n_layers
+
atx
][
symbol
*
nb_rb
*
12
];
dl_ch128
=
(
simde__m128i
*
)
dl_ch_estimates_ext
[
l
*
frame_parms
->
nb_antennas_rx
+
aarx
];
dl_ch128_2
=
(
simde__m128i
*
)
dl_ch_estimates_ext
[
atx
*
frame_parms
->
nb_antennas_rx
+
aarx
];
for
(
rb
=
0
;
rb
<
nb_rb_0
;
rb
++
)
{
// multiply by conjugated channel
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
0
],
dl_ch128_2
[
0
]);
// print_ints("re",&mmtmpD0);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
0
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
&
conjugate
[
0
]);
// print_ints("im",&mmtmpD1);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
dl_ch128_2
[
0
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
// print_ints("re(shift)",&mmtmpD0);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
// print_ints("im(shift)",&mmtmpD1);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
// print_ints("c0",&mmtmpD2);
// print_ints("c1",&mmtmpD3);
rho128
[
0
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
//print_shorts("rx:",dl_ch128_2);
//print_shorts("ch:",dl_ch128);
//print_shorts("pack:",rho128);
/*avg_rho_re[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[0])[0]+
((int16_t*)&rho128[0])[2] +
((int16_t*)&rho128[0])[4] +
((int16_t*)&rho128[0])[6])/16;*/
/*avg_rho_im[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[0])[1]+
((int16_t*)&rho128[0])[3] +
((int16_t*)&rho128[0])[5] +
((int16_t*)&rho128[0])[7])/16;*/
// multiply by conjugated channel
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
1
],
dl_ch128_2
[
1
]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
1
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
conjugate
);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
dl_ch128_2
[
1
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
rho128
[
1
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
//print_shorts("rx:",dl_ch128_2+1);
//print_shorts("ch:",dl_ch128+1);
//print_shorts("pack:",rho128+1);
// multiply by conjugated channel
/*avg_rho_re[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[1])[0]+
((int16_t*)&rho128[1])[2] +
((int16_t*)&rho128[1])[4] +
((int16_t*)&rho128[1])[6])/16;*/
/*avg_rho_im[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[1])[1]+
((int16_t*)&rho128[1])[3] +
((int16_t*)&rho128[1])[5] +
((int16_t*)&rho128[1])[7])/16;*/
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
2
],
dl_ch128_2
[
2
]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
2
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
conjugate
);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
dl_ch128_2
[
2
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
rho128
[
2
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
//print_shorts("rx:",dl_ch128_2+2);
//print_shorts("ch:",dl_ch128+2);
//print_shorts("pack:",rho128+2);
/*avg_rho_re[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[2])[0]+
((int16_t*)&rho128[2])[2] +
((int16_t*)&rho128[2])[4] +
((int16_t*)&rho128[2])[6])/16;*/
/*avg_rho_im[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[2])[1]+
((int16_t*)&rho128[2])[3] +
((int16_t*)&rho128[2])[5] +
((int16_t*)&rho128[2])[7])/16;*/
for
(
int
atx
=
0
;
atx
<
n_layers
;
atx
++
)
{
// avg_rho_re[aarx][l*n_layers+atx] = 0;
// avg_rho_im[aarx][l*n_layers+atx] = 0;
rho128
=
(
simde__m128i
*
)
&
rho
[
aarx
][
l
*
n_layers
+
atx
][
symbol
*
nb_rb
*
12
];
dl_ch128
=
(
simde__m128i
*
)
dl_ch_estimates_ext
[
l
*
frame_parms
->
nb_antennas_rx
+
aarx
];
dl_ch128_2
=
(
simde__m128i
*
)
dl_ch_estimates_ext
[
atx
*
frame_parms
->
nb_antennas_rx
+
aarx
];
for
(
int
rb
=
0
;
rb
<
nb_rb_0
;
rb
++
)
{
// multiply by conjugated channel
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
0
],
dl_ch128_2
[
0
]);
// print_ints("re",&mmtmpD0);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
0
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
&
conjugate
[
0
]);
// print_ints("im",&mmtmpD1);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
dl_ch128_2
[
0
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
// print_ints("re(shift)",&mmtmpD0);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
// print_ints("im(shift)",&mmtmpD1);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
// print_ints("c0",&mmtmpD2);
// print_ints("c1",&mmtmpD3);
rho128
[
0
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
// print_shorts("rx:",dl_ch128_2);
// print_shorts("ch:",dl_ch128);
// print_shorts("pack:",rho128);
/*avg_rho_re[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[0])[0]+
((int16_t*)&rho128[0])[2] +
((int16_t*)&rho128[0])[4] +
((int16_t*)&rho128[0])[6])/16;*/
/*avg_rho_im[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[0])[1]+
((int16_t*)&rho128[0])[3] +
((int16_t*)&rho128[0])[5] +
((int16_t*)&rho128[0])[7])/16;*/
// multiply by conjugated channel
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
1
],
dl_ch128_2
[
1
]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
1
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
conjugate
);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
dl_ch128_2
[
1
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
rho128
[
1
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
// print_shorts("rx:",dl_ch128_2+1);
// print_shorts("ch:",dl_ch128+1);
// print_shorts("pack:",rho128+1);
// multiply by conjugated channel
/*avg_rho_re[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[1])[0]+
((int16_t*)&rho128[1])[2] +
((int16_t*)&rho128[1])[4] +
((int16_t*)&rho128[1])[6])/16;*/
/*avg_rho_im[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[1])[1]+
((int16_t*)&rho128[1])[3] +
((int16_t*)&rho128[1])[5] +
((int16_t*)&rho128[1])[7])/16;*/
mmtmpD0
=
simde_mm_madd_epi16
(
dl_ch128
[
2
],
dl_ch128_2
[
2
]);
// mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
mmtmpD1
=
simde_mm_shufflelo_epi16
(
dl_ch128
[
2
],
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_shufflehi_epi16
(
mmtmpD1
,
SIMDE_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
mmtmpD1
=
simde_mm_sign_epi16
(
mmtmpD1
,
*
(
simde__m128i
*
)
conjugate
);
mmtmpD1
=
simde_mm_madd_epi16
(
mmtmpD1
,
dl_ch128_2
[
2
]);
// mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpD0
=
simde_mm_srai_epi32
(
mmtmpD0
,
output_shift
);
mmtmpD1
=
simde_mm_srai_epi32
(
mmtmpD1
,
output_shift
);
mmtmpD2
=
simde_mm_unpacklo_epi32
(
mmtmpD0
,
mmtmpD1
);
mmtmpD3
=
simde_mm_unpackhi_epi32
(
mmtmpD0
,
mmtmpD1
);
rho128
[
2
]
=
simde_mm_packs_epi32
(
mmtmpD2
,
mmtmpD3
);
// print_shorts("rx:",dl_ch128_2+2);
// print_shorts("ch:",dl_ch128+2);
// print_shorts("pack:",rho128+2);
/*avg_rho_re[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[2])[0]+
((int16_t*)&rho128[2])[2] +
((int16_t*)&rho128[2])[4] +
((int16_t*)&rho128[2])[6])/16;*/
/*avg_rho_im[aarx][l*n_layers+atx] +=(((int16_t*)&rho128[2])[1]+
((int16_t*)&rho128[2])[3] +
((int16_t*)&rho128[2])[5] +
((int16_t*)&rho128[2])[7])/16;*/
dl_ch128
+=
3
;
dl_ch128_2
+=
3
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment