Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
常顺宇
OpenXG-RAN
Commits
1cb484f1
Commit
1cb484f1
authored
Jan 22, 2019
by
frtabu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix more trivial cppcheck errors and warnings
parent
d171e18c
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
1548 additions
and
2292 deletions
+1548
-2292
common/config/config_cmdline.c
common/config/config_cmdline.c
+2
-1
nfapi/oai_integration/nfapi_vnf.c
nfapi/oai_integration/nfapi_vnf.c
+4
-2
openair1/PHY/CODING/3gpplte_sse.c
openair1/PHY/CODING/3gpplte_sse.c
+118
-180
openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c
openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c
+165
-366
openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
+307
-458
openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
+247
-336
openair1/PHY/CODING/ccoding_byte.c
openair1/PHY/CODING/ccoding_byte.c
+50
-77
openair1/PHY/CODING/ccoding_byte_lte.c
openair1/PHY/CODING/ccoding_byte_lte.c
+8
-39
openair1/PHY/CODING/lte_rate_matching.c
openair1/PHY/CODING/lte_rate_matching.c
+22
-89
openair1/PHY/CODING/lte_segmentation.c
openair1/PHY/CODING/lte_segmentation.c
+15
-25
openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
+218
-329
openair1/PHY/LTE_TRANSPORT/prach.c
openair1/PHY/LTE_TRANSPORT/prach.c
+392
-390
No files found.
common/config/config_cmdline.c
View file @
1cb484f1
...
...
@@ -164,9 +164,10 @@ int processoption(paramdef_t *cfgoptions, char *value) {
*/
int
config_check_unknown_cmdlineopt
(
char
*
prefix
)
{
int
unknowndetected
=
0
;
char
testprefix
[
CONFIG_MAXOPTLENGTH
]
=
""
;
char
testprefix
[
CONFIG_MAXOPTLENGTH
];
int
finalcheck
=
0
;
memset
(
testpref
,
0
,
sizeof
(
testprefix
));
if
(
prefix
!=
NULL
)
{
if
(
strcmp
(
prefix
,
CONFIG_CHECKALLSECTIONS
)
==
0
)
finalcheck
=
1
;
...
...
nfapi/oai_integration/nfapi_vnf.c
View file @
1cb484f1
...
...
@@ -270,6 +270,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re
for
(
int
i
=
0
;
i
<
resp
->
pnf_phy
.
number_of_phys
;
++
i
)
{
phy_info
phy
;
memset
(
phy
,
0
,
sizeof
(
phy
));
phy
.
index
=
resp
->
pnf_phy
.
phy
[
i
].
phy_config_index
;
printf
(
"[VNF] (PHY:%d) phy_config_idx:%d
\n
"
,
i
,
resp
->
pnf_phy
.
phy
[
i
].
phy_config_index
);
...
...
@@ -287,6 +288,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re
for
(
int
i
=
0
;
i
<
resp
->
pnf_rf
.
number_of_rfs
;
++
i
)
{
rf_info
rf
;
memset
(
rf
,
0
,
sizeof
(
rf
));
rf
.
index
=
resp
->
pnf_rf
.
rf
[
i
].
rf_config_index
;
printf
(
"[VNF] (RF:%d) rf_config_idx:%d
\n
"
,
i
,
resp
->
pnf_rf
.
rf
[
i
].
rf_config_index
);
...
...
@@ -897,7 +899,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t
// for now just 1
printf
(
"[VNF] %d.%d pnf p7 %s:%d timing %
d %d %d %d
\n
"
,
p5_idx
,
phy
->
id
,
phy
->
remote_addr
,
phy
->
remote_port
,
p7_vnf
->
timing_window
,
p7_vnf
->
periodic_timing_period
,
p7_vnf
->
aperiodic_timing_enabled
,
p7_vnf
->
periodic_timing_period
);
printf
(
"[VNF] %d.%d pnf p7 %s:%d timing %
u %u %u %u
\n
"
,
p5_idx
,
phy
->
id
,
phy
->
remote_addr
,
phy
->
remote_port
,
p7_vnf
->
timing_window
,
p7_vnf
->
periodic_timing_period
,
p7_vnf
->
aperiodic_timing_enabled
,
p7_vnf
->
periodic_timing_period
);
req
->
header
.
message_id
=
NFAPI_CONFIG_REQUEST
;
req
->
header
.
phy_id
=
phy
->
id
;
...
...
@@ -919,7 +921,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t
req
->
nfapi_config
.
timing_window
.
tl
.
tag
=
NFAPI_NFAPI_TIMING_WINDOW_TAG
;
req
->
nfapi_config
.
timing_window
.
value
=
p7_vnf
->
timing_window
;
printf
(
"[VNF] Timing window:%
d
\n
"
,
p7_vnf
->
timing_window
);
printf
(
"[VNF] Timing window:%
u
\n
"
,
p7_vnf
->
timing_window
);
req
->
num_tlv
++
;
if
(
p7_vnf
->
periodic_timing_enabled
||
p7_vnf
->
aperiodic_timing_enabled
)
{
...
...
openair1/PHY/CODING/3gpplte_sse.c
View file @
1cb484f1
...
...
@@ -26,9 +26,9 @@
date: 09.2012
*/
#ifndef TC_MAIN
#include "coding_defs.h"
#include "coding_defs.h"
#else
#include <stdint.h>
#include <stdint.h>
#endif
#include <stdio.h>
#include <string.h>
...
...
@@ -66,11 +66,11 @@ struct treillis {
union
{
uint8x8_t
systematic_andp1_64
[
3
];
char
systematic_andp1_8
[
24
];
}
__attribute__
((
aligned
(
64
)));
}
__attribute__
((
aligned
(
64
)));
union
{
uint8x8_t
parity2_64
[
3
];
char
parity2_8
[
24
];
}
__attribute__
((
aligned
(
64
)));
}
__attribute__
((
aligned
(
64
)));
int
exit_state
;
};
#endif
...
...
@@ -79,23 +79,20 @@ struct treillis all_treillis[8][256];
int
all_treillis_initialized
=
0
;
static
inline
unsigned
char
threegpplte_rsc
(
unsigned
char
input
,
unsigned
char
*
state
)
{
static
inline
unsigned
char
threegpplte_rsc
(
unsigned
char
input
,
unsigned
char
*
state
)
{
unsigned
char
output
;
output
=
(
input
^
(
*
state
>>
2
)
^
(
*
state
>>
1
))
&
1
;
*
state
=
(((
input
<<
2
)
^
(
*
state
>>
1
))
^
((
*
state
>>
1
)
<<
2
)
^
((
*
state
)
<<
2
))
&
7
;
return
(
output
);
}
static
inline
void
threegpplte_rsc_termination
(
unsigned
char
*
x
,
unsigned
char
*
z
,
unsigned
char
*
state
)
{
static
inline
void
threegpplte_rsc_termination
(
unsigned
char
*
x
,
unsigned
char
*
z
,
unsigned
char
*
state
)
{
*
z
=
((
*
state
>>
2
)
^
(
*
state
))
&
1
;
*
x
=
((
*
state
)
^
(
*
state
>>
1
))
&
1
;
*
state
=
(
*
state
)
>>
1
;
}
static
void
treillis_table_init
(
void
)
{
static
void
treillis_table_init
(
void
)
{
//struct treillis t[][]=all_treillis;
//t=memalign(16,sizeof(struct treillis)*8*256);
int
i
,
j
,
b
;
...
...
@@ -128,14 +125,12 @@ static void treillis_table_init(void)
}
char
interleave_compact_byte
(
short
*
base_interleaver
,
unsigned
char
*
input
,
unsigned
char
*
output
,
int
n
)
{
char
interleave_compact_byte
(
short
*
base_interleaver
,
unsigned
char
*
input
,
unsigned
char
*
output
,
int
n
)
{
char
expandInput
[
768
*
8
]
__attribute__
((
aligned
(
32
)));
int
i
,
loop
=
n
>>
4
;
#if defined(__x86_64__) || defined(__i386__)
#ifndef __AVX2__
__m128i
*
i_128
=
(
__m128i
*
)
input
,
*
o_128
=
(
__m128i
*
)
expandInput
;
__m128i
*
i_128
=
(
__m128i
*
)
input
,
*
o_128
=
(
__m128i
*
)
expandInput
;
__m128i
tmp1
,
tmp2
,
tmp3
,
tmp4
;
__m128i
BIT_MASK
=
_mm_set_epi8
(
0
b00000001
,
0
b00000010
,
...
...
@@ -153,9 +148,8 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
0
b00100000
,
0
b01000000
,
0
b10000000
);
#else
__m256i
*
i_256
=
(
__m256i
*
)
input
,
*
o_256
=
(
__m256i
*
)
expandInput
;
__m256i
*
i_256
=
(
__m256i
*
)
input
,
*
o_256
=
(
__m256i
*
)
expandInput
;
__m256i
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
tmp6
,
tmp7
;
__m256i
BIT_MASK
=
_mm256_set_epi8
(
0
b00000001
,
0
b00000010
,
...
...
@@ -211,25 +205,26 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
0
b00001000
,
0
b00000100
,
0
b00000010
,
0
b00000001
};
0
b00000001
};
#endif
#ifndef __AVX2__
if
((
n
&
15
)
>
0
)
loop
++
;
#else
loop
=
n
>>
5
;
if
((
n
&
31
)
>
0
)
loop
++
;
#endif
#endif
for
(
i
=
0
;
i
<
loop
;
i
++
)
{
// int cur_byte=i<<3;
// for (b=0;b<8;b++)
// expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b);
#if defined(__x86_64__) || defined(__i386__)
#ifndef __AVX2__
tmp1
=
_mm_load_si128
(
i_128
++
);
// tmp1 = B0,B1,...,B15
...
...
@@ -237,29 +232,22 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp3
=
_mm_unpacklo_epi16
(
tmp2
,
tmp2
);
// tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3
tmp4
=
_mm_unpacklo_epi32
(
tmp3
,
tmp3
);
// tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);
tmp4
=
_mm_unpackhi_epi32
(
tmp3
,
tmp3
);
// tmp4 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);;
tmp3
=
_mm_unpackhi_epi16
(
tmp2
,
tmp2
);
// tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7
tmp4
=
_mm_unpacklo_epi32
(
tmp3
,
tmp3
);
// tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);;
tmp4
=
_mm_unpackhi_epi32
(
tmp3
,
tmp3
);
// tmp4 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);;
tmp2
=
_mm_unpackhi_epi8
(
tmp1
,
tmp1
);
// tmp2 = B8,B8,B9,B9,...,B15,B15
tmp3
=
_mm_unpacklo_epi16
(
tmp2
,
tmp2
);
// tmp3 = B8,B8,B8,B8,B9,B9,B9,B9,B10,B10,B10,B10,B11,B11,B11,B11
tmp4
=
_mm_unpacklo_epi32
(
tmp3
,
tmp3
);
// tmp4 = B8,B8,B8,B8,B8,B8,B8,B8,B9,B9,B9,B9,B9,B9,B9,B9
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);;
tmp4
=
_mm_unpackhi_epi32
(
tmp3
,
tmp3
);
// tmp4 = B10,B10,B10,B10,B10,B10,B10,B10,B11,B11,B11,B11,B11,B11,B11,B11
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);;
tmp3
=
_mm_unpackhi_epi16
(
tmp2
,
tmp2
);
// tmp3 = B12,B12,B12,B12,B13,B13,B13,B13,B14,B14,B14,B14,B15,B15,B15,B15
tmp4
=
_mm_unpacklo_epi32
(
tmp3
,
tmp3
);
// tmp4 = B12,B12,B12,B12,B12,B12,B12,B12,B13,B13,B13,B13,B13,B13,B13,B13
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);;
tmp4
=
_mm_unpackhi_epi32
(
tmp3
,
tmp3
);
// tmp4 = B14,B14,B14,B14,B14,B14,B14,B14,B15,B15,B15,B15,B15,B15,B15,B15
*
o_128
++=
_mm_cmpeq_epi8
(
_mm_and_si128
(
tmp4
,
BIT_MASK
),
BIT_MASK
);;
#else
...
...
@@ -281,7 +269,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)o_256);
o_256
[
4
]
=
_mm256_cmpeq_epi8
(
_mm256_and_si256
(
tmp7
,
BIT_MASK
),
BIT_MASK
);;
//print_bytes2("out",(uint8_t*)(o_256+4));
tmp3
=
_mm256_unpackhi_epi16
(
tmp2
,
tmp2
);
// tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7,B20,B20,B20,B20,...,B23,B23,B23,B23
tmp4
=
_mm256_unpacklo_epi32
(
tmp3
,
tmp3
);
// tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5,B20,B20...,B21..,B21
tmp5
=
_mm256_unpackhi_epi32
(
tmp3
,
tmp3
);
// tmp5 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7,B22...,B22,B23,...,B23
...
...
@@ -297,7 +284,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)(o_256+1));
o_256
[
5
]
=
_mm256_cmpeq_epi8
(
_mm256_and_si256
(
tmp7
,
BIT_MASK
),
BIT_MASK
);;
//print_bytes2("out",(uint8_t*)(o_256+4));
tmp2
=
_mm256_unpackhi_epi8
(
tmp1
,
tmp1
);
// tmp2 = B8 B9 B10 B11 B12 B13 B14 B15 B25 B26 B27 B28 B29 B30 B31
tmp3
=
_mm256_unpacklo_epi16
(
tmp2
,
tmp2
);
// tmp3 = B8,B9,B10,B11,B26,B27,B28,B29
tmp4
=
_mm256_unpacklo_epi32
(
tmp3
,
tmp3
);
// tmp4 - B8,B9,B26,B27
...
...
@@ -314,7 +300,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)(o_256+2));
o_256
[
6
]
=
_mm256_cmpeq_epi8
(
_mm256_and_si256
(
tmp7
,
BIT_MASK
),
BIT_MASK
);;
//print_bytes2("out",(uint8_t*)(o_256+4));
tmp3
=
_mm256_unpackhi_epi16
(
tmp2
,
tmp2
);
// tmp3 = B12 B13 B14 B15 B28 B29 B30 B31
tmp4
=
_mm256_unpacklo_epi32
(
tmp3
,
tmp3
);
// tmp4 = B12 B13 B28 B29
tmp5
=
_mm256_unpackhi_epi32
(
tmp3
,
tmp3
);
// tmp5 = B14 B15 B30 B31
...
...
@@ -330,48 +315,35 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)(o_256+3));
o_256
[
7
]
=
_mm256_cmpeq_epi8
(
_mm256_and_si256
(
tmp7
,
BIT_MASK
),
BIT_MASK
);;
//print_bytes2("out",(uint8_t*)(o_256+7));
o_256
+=
8
;
#endif
#elif defined(__arm__)
tmp1
=
vld1q_u8
((
uint8_t
*
)
i_128
);
tmp1
=
vld1q_u8
((
uint8_t
*
)
i_128
);
//print_bytes("tmp1:",(uint8_t*)&tmp1);
uint8x16x2_t
temp1
=
vzipq_u8
(
tmp1
,
tmp1
);
tmp2
=
temp1
.
val
[
0
];
uint16x8x2_t
temp2
=
vzipq_u16
((
uint16x8_t
)
tmp2
,(
uint16x8_t
)
tmp2
);
tmp3
=
temp2
.
val
[
0
];
uint32x4x2_t
temp3
=
vzipq_u32
((
uint32x4_t
)
tmp3
,(
uint32x4_t
)
tmp3
);
tmp4
=
temp3
.
val
[
0
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//1
//print_bytes("o:",(uint8_t*)(o_128-1));
tmp4
=
temp3
.
val
[
1
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//2
//print_bytes("o:",(uint8_t*)(o_128-1));
tmp3
=
temp2
.
val
[
1
];
temp3
=
vzipq_u32
((
uint32x4_t
)
tmp3
,(
uint32x4_t
)
tmp3
);
tmp4
=
temp3
.
val
[
0
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//3
//print_bytes("o:",(uint8_t*)(o_128-1));
tmp4
=
temp3
.
val
[
1
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//4
//and_tmp = vandq_u8((uint8x16_t)tmp4,BIT_MASK); print_bytes("and:",and_tmp);
//print_bytes("o:",(uint8_t*)(o_128-1));
temp1
=
vzipq_u8
(
tmp1
,
tmp1
);
tmp2
=
temp1
.
val
[
1
];
temp2
=
vzipq_u16
((
uint16x8_t
)
tmp2
,(
uint16x8_t
)
tmp2
);
...
...
@@ -379,38 +351,28 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
temp3
=
vzipq_u32
((
uint32x4_t
)
tmp3
,(
uint32x4_t
)
tmp3
);
tmp4
=
temp3
.
val
[
0
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//5
//print_bytes("o:",(uint8_t*)(o_128-1));
tmp4
=
temp3
.
val
[
1
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//6
//print_bytes("o:",(uint8_t*)(o_128-1));
temp2
=
vzipq_u16
((
uint16x8_t
)
tmp2
,(
uint16x8_t
)
tmp2
);
tmp3
=
temp2
.
val
[
1
];
temp3
=
vzipq_u32
((
uint32x4_t
)
tmp3
,(
uint32x4_t
)
tmp3
);
tmp4
=
temp3
.
val
[
0
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//7
//print_bytes("o:",(uint8_t*)(o_128-1));
tmp4
=
temp3
.
val
[
1
];
//print_bytes("tmp4:",(uint8_t*)&tmp4);
*
o_128
++=
vceqq_u8
(
vandq_u8
((
uint8x16_t
)
tmp4
,
BIT_MASK
),
BIT_MASK
);
//7
//print_bytes("o:",(uint8_t*)(o_128-1));
i_128
++
;
#endif
}
short
*
ptr_intl
=
base_interleaver
;
short
*
ptr_intl
=
base_interleaver
;
#if defined(__x86_64) || defined(__i386__)
#ifndef __AVX2__
__m128i
tmp
;
...
...
@@ -423,8 +385,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
uint8x16_t
tmp
;
const
uint8_t
__attribute__
((
aligned
(
16
)))
_Powers
[
16
]
=
{
1
,
2
,
4
,
8
,
16
,
32
,
64
,
128
,
1
,
2
,
4
,
8
,
16
,
32
,
64
,
128
};
// Set the powers of 2 (do it once for all, if applicable)
// Set the powers of 2 (do it once for all, if applicable)
uint8x16_t
Powers
=
vld1q_u8
(
_Powers
);
uint8_t
*
systematic2_ptr
=
(
uint8_t
*
)
output
;
#endif
...
...
@@ -435,8 +396,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
#endif
for
(
i
=
0
;
i
<
input_length_words
;
i
++
)
{
#if defined(__x86_64__) || defined(__i386__)
#ifndef __AVX2__
tmp
=
_mm_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
7
);
...
...
@@ -465,7 +424,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
5
);
...
...
@@ -474,7 +432,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
5
);
...
...
@@ -483,7 +440,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
5
);
...
...
@@ -492,7 +448,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
0
);
*
systematic2_ptr
++=
(
unsigned
int
)
_mm256_movemask_epi8
(
tmp
);
#endif
#elif defined(__arm__)
...
...
@@ -512,11 +467,10 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
vsetq_lane_u8
(
expandInput
[
*
ptr_intl
++
],
tmp
,
8
+
2
);
tmp
=
vsetq_lane_u8
(
expandInput
[
*
ptr_intl
++
],
tmp
,
8
+
1
);
tmp
=
vsetq_lane_u8
(
expandInput
[
*
ptr_intl
++
],
tmp
,
8
+
0
);
// Compute the mask from the input
// Compute the mask from the input
uint64x2_t
Mask
=
vpaddlq_u32
(
vpaddlq_u16
(
vpaddlq_u8
(
vandq_u8
(
tmp
,
Powers
))));
vst1q_lane_u8
(
systematic2_ptr
++
,
(
uint8x16_t
)
Mask
,
0
);
vst1q_lane_u8
(
systematic2_ptr
++
,
(
uint8x16_t
)
Mask
,
8
);
#endif
}
...
...
@@ -537,14 +491,12 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
void
threegpplte_turbo_encoder_sse
(
unsigned
char
*
input
,
unsigned
short
input_length_bytes
,
unsigned
char
*
output
,
unsigned
char
F
)
{
unsigned
char
F
)
{
int
i
;
unsigned
char
*
x
;
unsigned
char
state0
=
0
,
state1
=
0
;
unsigned
short
input_length_bits
=
input_length_bytes
<<
3
;
short
*
base_interleaver
;
short
*
base_interleaver
;
if
(
all_treillis_initialized
==
0
)
{
treillis_table_init
();
...
...
@@ -560,15 +512,12 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
base_interleaver
=
il_tb
+
f1f2mat
[
i
].
beg_index
;
}
unsigned
char
systematic2
[
768
]
__attribute__
((
aligned
(
32
)));
interleave_compact_byte
(
base_interleaver
,
input
,
systematic2
,
input_length_bytes
);
#if defined(__x86_64__) || defined(__i386__)
__m64
*
ptr_output
=
(
__m64
*
)
output
;
__m64
*
ptr_output
=
(
__m64
*
)
output
;
#elif defined(__arm__)
uint8x8_t
*
ptr_output
=
(
uint8x8_t
*
)
output
;
uint8x8_t
*
ptr_output
=
(
uint8x8_t
*
)
output
;
#endif
unsigned
char
cur_s1
,
cur_s2
;
int
code_rate
;
...
...
@@ -584,11 +533,8 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
_mm_add_pi8(all_treillis[state0][cur_s1].parity1_64[code_rate],
all_treillis[state1][cur_s2].parity2_64[code_rate]));
*/
*
ptr_output
++
=
_mm_add_pi8
(
all_treillis
[
state0
][
cur_s1
].
systematic_andp1_64
[
code_rate
],
all_treillis
[
state1
][
cur_s2
].
parity2_64
[
code_rate
]);
#elif defined(__arm__)
*
ptr_output
++
=
vadd_u8
(
all_treillis
[
state0
][
cur_s1
].
systematic_andp1_64
[
code_rate
],
all_treillis
[
state0
][
cur_s1
].
parity2_64
[
code_rate
]);
...
...
@@ -600,36 +546,30 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
}
x
=
output
+
(
input_length_bits
*
3
);
// Trellis termination
threegpplte_rsc_termination
(
&
x
[
0
],
&
x
[
1
],
&
state0
);
#ifdef DEBUG_TURBO_ENCODER
printf
(
"term: x0 %
d, x1 %d
, state0 %d
\n
"
,
x
[
0
],
x
[
1
],
state0
);
printf
(
"term: x0 %
u, x1 %u
, state0 %d
\n
"
,
x
[
0
],
x
[
1
],
state0
);
#endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination
(
&
x
[
2
],
&
x
[
3
],
&
state0
);
#ifdef DEBUG_TURBO_ENCODER
printf
(
"term: x0 %
d, x1 %d
, state0 %d
\n
"
,
x
[
2
],
x
[
3
],
state0
);
printf
(
"term: x0 %
u, x1 %u
, state0 %d
\n
"
,
x
[
2
],
x
[
3
],
state0
);
#endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination
(
&
x
[
4
],
&
x
[
5
],
&
state0
);
#ifdef DEBUG_TURBO_ENCODER
printf
(
"term: x0 %
d, x1 %d
, state0 %d
\n
"
,
x
[
4
],
x
[
5
],
state0
);
printf
(
"term: x0 %
u, x1 %u
, state0 %d
\n
"
,
x
[
4
],
x
[
5
],
state0
);
#endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination
(
&
x
[
6
],
&
x
[
7
],
&
state1
);
#ifdef DEBUG_TURBO_ENCODER
printf
(
"term: x0 %
d, x1 %d
, state1 %d
\n
"
,
x
[
6
],
x
[
7
],
state1
);
printf
(
"term: x0 %
u, x1 %u
, state1 %d
\n
"
,
x
[
6
],
x
[
7
],
state1
);
#endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination
(
&
x
[
8
],
&
x
[
9
],
&
state1
);
#ifdef DEBUG_TURBO_ENCODER
printf
(
"term: x0 %
d, x1 %d
, state1 %d
\n
"
,
x
[
8
],
x
[
9
],
state1
);
printf
(
"term: x0 %
u, x1 %u
, state1 %d
\n
"
,
x
[
8
],
x
[
9
],
state1
);
#endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination
(
&
x
[
10
],
&
x
[
11
],
&
state1
);
#ifdef DEBUG_TURBO_ENCODER
printf
(
"term: x0 %
d, x1 %d
, state1 %d
\n
"
,
x
[
10
],
x
[
11
],
state1
);
printf
(
"term: x0 %
u, x1 %u
, state1 %d
\n
"
,
x
[
10
],
x
[
11
],
state1
);
#endif //DEBUG_TURBO_ENCODER
#if defined(__x86_64__) || defined(__i386__)
_mm_empty
();
...
...
@@ -643,16 +583,17 @@ void init_encoder_sse (void) {
/* function which will be called by the shared lib loader, to check shared lib version
against main exec version. version mismatch no considered as fatal (interfaces not supposed to change)
*/
int
coding_checkbuildver
(
char
*
mainexec_buildversion
,
char
**
shlib_buildversion
)
{
int
coding_checkbuildver
(
char
*
mainexec_buildversion
,
char
**
shlib_buildversion
)
{
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "standalone built: " __DATE__ __TIME__
#endif
*
shlib_buildversion
=
PACKAGE_VERSION
;
if
(
strcmp
(
mainexec_buildversion
,
*
shlib_buildversion
)
!=
0
)
{
fprintf
(
stderr
,
"[CODING] shared lib version %s, doesn't match main version %s, compatibility should be checked
\n
"
,
mainexec_buildversion
,
*
shlib_buildversion
);
}
return
0
;
}
...
...
@@ -661,9 +602,7 @@ int coding_checkbuildver(char * mainexec_buildversion, char ** shlib_buildversi
#define F1 21
#define F2 120
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
unsigned
char
input
[
INPUT_LENGTH
+
32
],
state
,
state2
;
unsigned
char
output
[
12
+
(
3
*
(
INPUT_LENGTH
<<
3
))],
x
,
z
;
int
i
;
...
...
@@ -680,16 +619,16 @@ int main(int argc,char **argv)
printf
(
"
\n
"
);
for
(
state
=
0
;
state
<
8
;
state
++
)
{
state2
=
state
;
threegpplte_rsc_termination
(
&
x
,
&
z
,
&
state2
);
printf
(
"Termination: (%d->%d) : (%d,%d)
\n
"
,
state
,
state2
,
x
,
z
);
}
memset
((
void
*
)
input
,
0
,
INPUT_LENGTH
+
16
);
memset
((
void
*
)
input
,
0
,
INPUT_LENGTH
+
16
);
for
(
i
=
0
;
i
<
INPUT_LENGTH
;
i
++
)
{
input
[
i
]
=
i
*
219
;
printf
(
"Input %d : %
d
\n
"
,
i
,
input
[
i
]);
printf
(
"Input %d : %
u
\n
"
,
i
,
input
[
i
]);
}
threegpplte_turbo_encoder_sse
(
&
input
[
0
],
...
...
@@ -697,11 +636,10 @@ int main(int argc,char **argv)
&
output
[
0
],
0
);
for
(
i
=
0
;
i
<
12
+
(
INPUT_LENGTH
*
24
);
i
++
)
printf
(
"%u"
,
output
[
i
]);
for
(
i
=
0
;
i
<
12
+
(
INPUT_LENGTH
*
24
);
i
++
)
printf
(
"%d"
,
output
[
i
]);
printf
(
"
\n
"
);
return
(
0
);
}
...
...
openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c
View file @
1cb484f1
...
...
@@ -38,15 +38,15 @@
#include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG
#include "PHY/defs.h"
#include "PHY/CODING/defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#include "PHY/defs.h"
#include "PHY/CODING/defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#else
#include "defs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "defs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#define SHUFFLE16(a,b,c,d,e,f,g,h) _mm_set_epi8(h==-1?-1:h*2+1, \
...
...
@@ -75,44 +75,40 @@
#ifdef LLR8
typedef
int8_t
llr_t
;
// internal decoder LLR data is 8-bit fixed
typedef
int8_t
channel_t
;
#define MAX 64
typedef
int8_t
llr_t
;
// internal decoder LLR data is 8-bit fixed
typedef
int8_t
channel_t
;
#define MAX 64
#else
typedef
int16_t
llr_t
;
// internal decoder LLR data is 16-bit fixed
typedef
int16_t
channel_t
;
#define MAX 256
typedef
int16_t
llr_t
;
// internal decoder LLR data is 16-bit fixed
typedef
int16_t
channel_t
;
#define MAX 256
#endif
void
log_map
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
int
offset8_flag
,
void
log_map
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
int
offset8_flag
,
time_stats_t
*
alpha_stats
,
time_stats_t
*
beta_stats
,
time_stats_t
*
gamma_stats
,
time_stats_t
*
ext_stats
);
void
compute_gamma
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
);
void
compute_alpha
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
);
void
compute_beta
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
);
void
compute_ext
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
extrinsic
,
llr_t
*
ap
,
unsigned
short
frame_length
);
void
compute_gamma
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
);
void
compute_alpha
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
);
void
compute_beta
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
);
void
compute_ext
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
extrinsic
,
llr_t
*
ap
,
unsigned
short
frame_length
);
void
print_bytes
(
char
*
s
,
__m128i
*
x
)
{
void
print_bytes
(
char
*
s
,
__m128i
*
x
)
{
int8_t
*
tempb
=
(
int8_t
*
)
x
;
printf
(
"%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d
\n
"
,
s
,
tempb
[
0
],
tempb
[
1
],
tempb
[
2
],
tempb
[
3
],
tempb
[
4
],
tempb
[
5
],
tempb
[
6
],
tempb
[
7
],
tempb
[
8
],
tempb
[
9
],
tempb
[
10
],
tempb
[
11
],
tempb
[
12
],
tempb
[
13
],
tempb
[
14
],
tempb
[
15
]);
}
void
log_map
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
void
log_map
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
...
...
@@ -120,13 +116,10 @@ void log_map(llr_t* systematic,
time_stats_t
*
alpha_stats
,
time_stats_t
*
beta_stats
,
time_stats_t
*
gamma_stats
,
time_stats_t
*
ext_stats
)
{
time_stats_t
*
ext_stats
)
{
#ifdef DEBUG_LOGMAP
msg
(
"log_map, frame_length %d
\n
"
,
frame_length
);
#endif
start_meas
(
gamma_stats
)
;
compute_gamma
(
m11
,
m10
,
systematic
,
y_parity
,
frame_length
,
term_flag
)
;
stop_meas
(
gamma_stats
);
...
...
@@ -139,19 +132,15 @@ void log_map(llr_t* systematic,
start_meas
(
ext_stats
)
;
compute_ext
(
alpha
,
beta
,
m11
,
m10
,
ext
,
systematic
,
frame_length
)
;
stop_meas
(
ext_stats
);
}
void
compute_gamma
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
)
{
void
compute_gamma
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
)
{
int
k
,
K1
;
__m128i
*
systematic128
=
(
__m128i
*
)
systematic
;
__m128i
*
y_parity128
=
(
__m128i
*
)
y_parity
;
__m128i
*
m10_128
=
(
__m128i
*
)
m10
;
__m128i
*
m11_128
=
(
__m128i
*
)
m11
;
#ifdef DEBUG_LOGMAP
msg
(
"compute_gamma, %p,%p,%p,%p,framelength %d
\n
"
,
m11
,
m10
,
systematic
,
y_parity
,
frame_length
);
#endif
...
...
@@ -159,7 +148,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
K1
=
frame_length
>>
3
;
for
(
k
=
0
;
k
<
K1
;
k
++
)
{
m11_128
[
k
]
=
_mm_srai_epi16
(
_mm_adds_epi16
(
systematic128
[
k
],
y_parity128
[
k
]),
1
);
m10_128
[
k
]
=
_mm_srai_epi16
(
_mm_subs_epi16
(
systematic128
[
k
],
y_parity128
[
k
]),
1
);
/*
...
...
@@ -206,13 +194,11 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
(int16_t)_mm_extract_epi16(m10_128[k],6),
(int16_t)_mm_extract_epi16(m10_128[k],7));
*/
}
// Termination
m11_128
[
k
]
=
_mm_srai_epi16
(
_mm_adds_epi16
(
systematic128
[
k
+
term_flag
],
y_parity128
[
k
]),
1
);
m10_128
[
k
]
=
_mm_srai_epi16
(
_mm_subs_epi16
(
systematic128
[
k
+
term_flag
],
y_parity128
[
k
]),
1
);
// printf("gamma (term): %d,%d, %d,%d, %d,%d\n",m11[k<<3],m10[k<<3],m11[1+(k<<3)],m10[1+(k<<3)],m11[2+(k<<3)],m10[2+(k<<3)]);
#else
register
__m128i
sl
,
sh
,
ypl
,
yph
;
//K128=_mm_set1_epi8(-128);
...
...
@@ -231,7 +217,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
// m10_128[k] = _mm_subs_epi8(systematic128[k],y_parity128[k]);
// m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(y_parity128[k],K128)),K128);
// m10_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(_mm_sign_epi8(y_parity128[k],K128),K128)),K128);
/*
printf("gamma %d: s %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
k,
...
...
@@ -309,7 +294,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
}
// Termination
sl
=
_mm_cvtepi8_epi16
(
systematic128
[
k
+
term_flag
]);
sh
=
_mm_cvtepi8_epi16
(
_mm_srli_si128
(
systematic128
[
k
],
8
));
ypl
=
_mm_cvtepi8_epi16
(
y_parity128
[
k
+
term_flag
]);
...
...
@@ -318,7 +302,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
_mm_srai_epi16
(
_mm_adds_epi16
(
sh
,
yph
),
1
));
m10_128
[
k
]
=
_mm_packs_epi16
(
_mm_srai_epi16
(
_mm_subs_epi16
(
sl
,
ypl
),
1
),
_mm_srai_epi16
(
_mm_subs_epi16
(
sh
,
yph
),
1
));
// m11_128[k] = _mm_adds_epi8(systematic128[k+term_flag],y_parity128[k]);
// m10_128[k] = _mm_subs_epi8(systematic128[k+term_flag],y_parity128[k]);
// m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k+term_flag],K128),_mm_add_epi8(y_parity128[k],K128)),K128);
...
...
@@ -383,20 +366,17 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
#endif
_mm_empty
();
_m_empty
();
}
#define L 40
void
compute_alpha
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
)
{
void
compute_alpha
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
)
{
int
k
,
l
,
l2
,
K1
,
rerun_flag
=
0
;
__m128i
*
alpha128
=
(
__m128i
*
)
alpha
,
*
alpha_ptr
;
__m128i
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
,
*
m11p
,
*
m10p
;
__m128i
m_b0
,
m_b1
,
m_b2
,
m_b3
,
m_b4
,
m_b5
,
m_b6
,
m_b7
;
__m128i
new0
,
new1
,
new2
,
new3
,
new4
,
new5
,
new6
,
new7
;
__m128i
alpha_max
;
#ifndef LLR8
l2
=
L
>>
3
;
K1
=
(
frame_length
>>
3
);
...
...
@@ -439,19 +419,16 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
}
alpha_ptr
=
&
alpha128
[
0
];
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
for
(
k
=
0
;
k
<
l
;
k
++
)
{
a1
=
_mm_load_si128
(
&
alpha_ptr
[
1
]);
a3
=
_mm_load_si128
(
&
alpha_ptr
[
3
]);
a5
=
_mm_load_si128
(
&
alpha_ptr
[
5
]);
a7
=
_mm_load_si128
(
&
alpha_ptr
[
7
]);
m_b0
=
_mm_adds_epi16
(
a1
,
*
m11p
);
// m11
m_b4
=
_mm_subs_epi16
(
a1
,
*
m11p
);
// m00=-m11
m_b1
=
_mm_subs_epi16
(
a3
,
*
m10p
);
// m01=-m10
...
...
@@ -460,12 +437,10 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
m_b6
=
_mm_subs_epi16
(
a5
,
*
m10p
);
// m01=-m10
m_b3
=
_mm_subs_epi16
(
a7
,
*
m11p
);
// m00=-m11
m_b7
=
_mm_adds_epi16
(
a7
,
*
m11p
);
// m11
a0
=
_mm_load_si128
(
&
alpha_ptr
[
0
]);
a2
=
_mm_load_si128
(
&
alpha_ptr
[
2
]);
a4
=
_mm_load_si128
(
&
alpha_ptr
[
4
]);
a6
=
_mm_load_si128
(
&
alpha_ptr
[
6
]);
new0
=
_mm_subs_epi16
(
a0
,
*
m11p
);
// m00=-m11
new4
=
_mm_adds_epi16
(
a0
,
*
m11p
);
// m11
new1
=
_mm_adds_epi16
(
a2
,
*
m10p
);
// m10
...
...
@@ -474,7 +449,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
new6
=
_mm_adds_epi16
(
a4
,
*
m10p
);
// m10
new3
=
_mm_adds_epi16
(
a6
,
*
m11p
);
// m11
new7
=
_mm_subs_epi16
(
a6
,
*
m11p
);
// m00=-m11
a0
=
_mm_max_epi16
(
m_b0
,
new0
);
a1
=
_mm_max_epi16
(
m_b1
,
new1
);
a2
=
_mm_max_epi16
(
m_b2
,
new2
);
...
...
@@ -483,7 +457,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
a5
=
_mm_max_epi16
(
m_b5
,
new5
);
a6
=
_mm_max_epi16
(
m_b6
,
new6
);
a7
=
_mm_max_epi16
(
m_b7
,
new7
);
alpha_max
=
_mm_max_epi16
(
a0
,
a1
);
alpha_max
=
_mm_max_epi16
(
alpha_max
,
a2
);
alpha_max
=
_mm_max_epi16
(
alpha_max
,
a3
);
...
...
@@ -491,7 +464,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_max
=
_mm_max_epi16
(
alpha_max
,
a5
);
alpha_max
=
_mm_max_epi16
(
alpha_max
,
a6
);
alpha_max
=
_mm_max_epi16
(
alpha_max
,
a7
);
alpha_ptr
+=
8
;
m11p
++
;
m10p
++
;
...
...
@@ -503,7 +475,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_ptr
[
5
]
=
_mm_subs_epi16
(
a5
,
alpha_max
);
alpha_ptr
[
6
]
=
_mm_subs_epi16
(
a6
,
alpha_max
);
alpha_ptr
[
7
]
=
_mm_subs_epi16
(
a7
,
alpha_max
);
}
/*
...
...
@@ -981,9 +952,7 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
*/
#else
if
(
rerun_flag
==
0
)
{
alpha128
[
0
]
=
_mm_set_epi8
(
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
0
);
alpha128
[
1
]
=
_mm_set_epi8
(
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
);
alpha128
[
2
]
=
_mm_set_epi8
(
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
);
...
...
@@ -992,8 +961,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha128
[
5
]
=
_mm_set_epi8
(
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
);
alpha128
[
6
]
=
_mm_set_epi8
(
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
);
alpha128
[
7
]
=
_mm_set_epi8
(
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
,
-
MAX
/
2
);
}
else
{
alpha128
[
0
]
=
_mm_slli_si128
(
alpha128
[(
K1
<<
3
)],
1
);
alpha128
[
1
]
=
_mm_slli_si128
(
alpha128
[
1
+
(
K1
<<
3
)],
1
);
...
...
@@ -1025,15 +992,12 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
print_bytes("a6:",&alpha_ptr[6]);
print_bytes("a7:",&alpha_ptr[7]);
*/
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
for
(
k
=
0
;
k
<
l
;
k
++
)
{
m_b0
=
_mm_adds_epi8
(
alpha_ptr
[
1
],
*
m11p
);
// m11
m_b4
=
_mm_subs_epi8
(
alpha_ptr
[
1
],
*
m11p
);
// m00=-m11
m_b1
=
_mm_subs_epi8
(
alpha_ptr
[
3
],
*
m10p
);
// m01=-m10
...
...
@@ -1042,7 +1006,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
m_b6
=
_mm_subs_epi8
(
alpha_ptr
[
5
],
*
m10p
);
// m01=-m10
m_b3
=
_mm_subs_epi8
(
alpha_ptr
[
7
],
*
m11p
);
// m00=-m11
m_b7
=
_mm_adds_epi8
(
alpha_ptr
[
7
],
*
m11p
);
// m11
new0
=
_mm_subs_epi8
(
alpha_ptr
[
0
],
*
m11p
);
// m00=-m11
new4
=
_mm_adds_epi8
(
alpha_ptr
[
0
],
*
m11p
);
// m11
new1
=
_mm_adds_epi8
(
alpha_ptr
[
2
],
*
m10p
);
// m10
...
...
@@ -1051,7 +1014,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
new6
=
_mm_adds_epi8
(
alpha_ptr
[
4
],
*
m10p
);
// m10
new3
=
_mm_adds_epi8
(
alpha_ptr
[
6
],
*
m11p
);
// m11
new7
=
_mm_subs_epi8
(
alpha_ptr
[
6
],
*
m11p
);
// m00=-m11
alpha_ptr
+=
8
;
m11p
++
;
m10p
++
;
...
...
@@ -1063,8 +1025,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_ptr
[
5
]
=
_mm_max_epi8
(
m_b5
,
new5
);
alpha_ptr
[
6
]
=
_mm_max_epi8
(
m_b6
,
new6
);
alpha_ptr
[
7
]
=
_mm_max_epi8
(
m_b7
,
new7
);
// compute and subtract maxima
alpha_max
=
_mm_max_epi8
(
alpha_ptr
[
0
],
alpha_ptr
[
1
]);
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
2
]);
...
...
@@ -1073,7 +1033,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
5
]);
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
6
]);
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
7
]);
alpha_ptr
[
0
]
=
_mm_subs_epi8
(
alpha_ptr
[
0
],
alpha_max
);
alpha_ptr
[
1
]
=
_mm_subs_epi8
(
alpha_ptr
[
1
],
alpha_max
);
alpha_ptr
[
2
]
=
_mm_subs_epi8
(
alpha_ptr
[
2
],
alpha_max
);
...
...
@@ -1109,14 +1068,11 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
}
void
compute_beta
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
)
{
void
compute_beta
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
)
{
int
k
,
rerun_flag
=
0
;
__m128i
m11_128
,
m10_128
;
__m128i
m_b0
,
m_b1
,
m_b2
,
m_b3
,
m_b4
,
m_b5
,
m_b6
,
m_b7
;
__m128i
new0
,
new1
,
new2
,
new3
,
new4
,
new5
,
new6
,
new7
;
__m128i
*
beta128
,
*
alpha128
,
*
beta_ptr
;
__m128i
beta_max
;
int16_t
m11
,
m10
,
beta0_16
,
beta1_16
,
beta2_16
,
beta3_16
,
beta4_16
,
beta5_16
,
beta6_16
,
beta7_16
,
beta0_2
,
beta1_2
,
beta2_2
,
beta3_2
,
beta_m
;
...
...
@@ -1124,30 +1080,21 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
#ifdef LLR8
llr_t
beta2
,
beta3
,
beta4
,
beta5
,
beta6
,
beta7
;
__m128i
beta_16
;
#endif
#ifdef DEBUG_LOGMAP
msg
(
"compute_beta, %p,%p,%p,%p,framelength %d,F %d
\n
"
,
beta
,
m_11
,
m_10
,
alpha
,
frame_length
,
F
);
#endif
// termination for beta initialization
// printf("beta init: offset8 %d\n",offset8_flag);
m11
=
(
int16_t
)
m_11
[
2
+
frame_length
];
m10
=
(
int16_t
)
m_10
[
2
+
frame_length
];
// printf("m11,m10 %d,%d\n",m11,m10);
beta0
=
-
m11
;
//M0T_TERM;
beta1
=
m11
;
//M1T_TERM;
m11
=
(
int16_t
)
m_11
[
1
+
frame_length
];
m10
=
(
int16_t
)
m_10
[
1
+
frame_length
];
// printf("m11,m10 %d,%d\n",m11,m10);
beta0_2
=
beta0
-
m11
;
//+M0T_TERM;
beta1_2
=
beta0
+
m11
;
//+M1T_TERM;
beta2_2
=
beta1
+
m10
;
//M2T_TERM;
...
...
@@ -1155,7 +1102,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
m11
=
(
int16_t
)
m_11
[
frame_length
];
m10
=
(
int16_t
)
m_10
[
frame_length
];
// printf("m11,m10 %d,%d (%p)\n",m11,m10,m_11+frame_length);
beta0_16
=
beta0_2
-
m11
;
//+M0T_TERM;
beta1_16
=
beta0_2
+
m11
;
//+M1T_TERM;
beta2_16
=
beta1_2
+
m10
;
//+M2T_TERM;
...
...
@@ -1164,8 +1110,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta5_16
=
beta2_2
+
m10
;
//+M5T_TERM;
beta6_16
=
beta3_2
+
m11
;
//+M6T_TERM;
beta7_16
=
beta3_2
-
m11
;
//+M7T_TERM;
beta_m
=
(
beta0_16
>
beta1_16
)
?
beta0_16
:
beta1_16
;
beta_m
=
(
beta_m
>
beta2_16
)
?
beta_m
:
beta2_16
;
beta_m
=
(
beta_m
>
beta3_16
)
?
beta_m
:
beta3_16
;
...
...
@@ -1173,8 +1117,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_m
=
(
beta_m
>
beta5_16
)
?
beta_m
:
beta5_16
;
beta_m
=
(
beta_m
>
beta6_16
)
?
beta_m
:
beta6_16
;
beta_m
=
(
beta_m
>
beta7_16
)
?
beta_m
:
beta7_16
;
beta0_16
=
beta0_16
-
beta_m
;
beta1_16
=
beta1_16
-
beta_m
;
beta2_16
=
beta2_16
-
beta_m
;
...
...
@@ -1183,7 +1125,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta5_16
=
beta5_16
-
beta_m
;
beta6_16
=
beta6_16
-
beta_m
;
beta7_16
=
beta7_16
-
beta_m
;
#ifdef LLR8
beta_16
=
_mm_set_epi16
(
beta7_16
,
beta6_16
,
beta5_16
,
beta4_16
,
beta3_16
,
beta2_16
,
beta1_16
,
beta0_16
);
beta_16
=
_mm_packs_epi16
(
beta_16
,
beta_16
);
...
...
@@ -1199,8 +1140,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
#endif
for
(
rerun_flag
=
0
;;
rerun_flag
=
1
)
{
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
__m128i
*
)
&
alpha
[
0
];
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
__m128i
*
)
&
alpha
[
0
];
if
(
rerun_flag
==
0
)
{
#ifndef LLR8
...
...
@@ -1223,9 +1164,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr
[
7
]
=
alpha128
[
7
+
(
frame_length
>>
1
)];
#endif
}
else
{
beta128
=
(
__m128i
*
)
&
beta
[
0
];
beta128
=
(
__m128i
*
)
&
beta
[
0
];
#ifndef LLR8
beta_ptr
[
0
]
=
_mm_srli_si128
(
beta128
[
0
],
2
);
beta_ptr
[
1
]
=
_mm_srli_si128
(
beta128
[
1
],
2
);
beta_ptr
[
2
]
=
_mm_srli_si128
(
beta128
[
2
],
2
);
...
...
@@ -1255,7 +1195,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr
[
5
]
=
_mm_insert_epi16
(
beta_ptr
[
5
],
beta5_16
,
7
);
beta_ptr
[
6
]
=
_mm_insert_epi16
(
beta_ptr
[
6
],
beta6_16
,
7
);
beta_ptr
[
7
]
=
_mm_insert_epi16
(
beta_ptr
[
7
],
beta7_16
,
7
);
/*
beta[7+(frame_length<<3)] = beta0_16;
beta[15+(frame_length<<3)] = beta1_16;
...
...
@@ -1277,18 +1216,15 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr
[
6
]
=
_mm_insert_epi8
(
beta_ptr
[
6
],
beta6
,
15
);
beta_ptr
[
7
]
=
_mm_insert_epi8
(
beta_ptr
[
7
],
beta7
,
15
);
}
else
{
}
#endif
#ifndef LLR8
int
loopval
=
((
rerun_flag
==
0
)
?
0
:
((
frame_length
-
L
)
>>
3
));
for
(
k
=
(
frame_length
>>
3
)
-
1
;
k
>=
loopval
;
k
--
)
{
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
m_b0
=
_mm_adds_epi16
(
beta_ptr
[
4
],
m11_128
);
//m11
m_b1
=
_mm_subs_epi16
(
beta_ptr
[
4
],
m11_128
);
//m00
m_b2
=
_mm_subs_epi16
(
beta_ptr
[
5
],
m10_128
);
//m01
...
...
@@ -1297,7 +1233,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
m_b5
=
_mm_subs_epi16
(
beta_ptr
[
6
],
m10_128
);
//m01
m_b6
=
_mm_subs_epi16
(
beta_ptr
[
7
],
m11_128
);
//m00
m_b7
=
_mm_adds_epi16
(
beta_ptr
[
7
],
m11_128
);
//m11
new0
=
_mm_subs_epi16
(
beta_ptr
[
0
],
m11_128
);
//m00
new1
=
_mm_adds_epi16
(
beta_ptr
[
0
],
m11_128
);
//m11
new2
=
_mm_adds_epi16
(
beta_ptr
[
1
],
m10_128
);
//m10
...
...
@@ -1306,9 +1241,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
new5
=
_mm_adds_epi16
(
beta_ptr
[
2
],
m10_128
);
//m10
new6
=
_mm_adds_epi16
(
beta_ptr
[
3
],
m11_128
);
//m11
new7
=
_mm_subs_epi16
(
beta_ptr
[
3
],
m11_128
);
//m00
beta_ptr
-=
8
;
beta_ptr
[
0
]
=
_mm_max_epi16
(
m_b0
,
new0
);
beta_ptr
[
1
]
=
_mm_max_epi16
(
m_b1
,
new1
);
beta_ptr
[
2
]
=
_mm_max_epi16
(
m_b2
,
new2
);
...
...
@@ -1317,7 +1250,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr
[
5
]
=
_mm_max_epi16
(
m_b5
,
new5
);
beta_ptr
[
6
]
=
_mm_max_epi16
(
m_b6
,
new6
);
beta_ptr
[
7
]
=
_mm_max_epi16
(
m_b7
,
new7
);
beta_max
=
_mm_max_epi16
(
beta_ptr
[
0
],
beta_ptr
[
1
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
2
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
3
]);
...
...
@@ -1325,7 +1257,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
5
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
6
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
7
]);
beta_ptr
[
0
]
=
_mm_subs_epi16
(
beta_ptr
[
0
],
beta_max
);
beta_ptr
[
1
]
=
_mm_subs_epi16
(
beta_ptr
[
1
],
beta_max
);
beta_ptr
[
2
]
=
_mm_subs_epi16
(
beta_ptr
[
2
],
beta_max
);
...
...
@@ -1334,14 +1265,11 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr
[
5
]
=
_mm_subs_epi16
(
beta_ptr
[
5
],
beta_max
);
beta_ptr
[
6
]
=
_mm_subs_epi16
(
beta_ptr
[
6
],
beta_max
);
beta_ptr
[
7
]
=
_mm_subs_epi16
(
beta_ptr
[
7
],
beta_max
);
}
#else
#ifdef DEBUG_LOGMAP
printf
(
"beta0 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta0 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
0
],
0
),
_mm_extract_epi8
(
beta_ptr
[
0
],
1
),
...
...
@@ -1359,7 +1287,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8
(
beta_ptr
[
0
],
13
),
_mm_extract_epi8
(
beta_ptr
[
0
],
14
),
_mm_extract_epi8
(
beta_ptr
[
0
],
15
));
printf
(
"beta1 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta1 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
1
],
0
),
_mm_extract_epi8
(
beta_ptr
[
1
],
1
),
...
...
@@ -1377,7 +1305,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8
(
beta_ptr
[
1
],
13
),
_mm_extract_epi8
(
beta_ptr
[
1
],
14
),
_mm_extract_epi8
(
beta_ptr
[
1
],
15
));
printf
(
"beta2 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta2 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
2
],
0
),
_mm_extract_epi8
(
beta_ptr
[
2
],
1
),
...
...
@@ -1395,7 +1323,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8
(
beta_ptr
[
2
],
13
),
_mm_extract_epi8
(
beta_ptr
[
2
],
14
),
_mm_extract_epi8
(
beta_ptr
[
2
],
15
));
printf
(
"beta3 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta3 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
3
],
0
),
_mm_extract_epi8
(
beta_ptr
[
3
],
1
),
...
...
@@ -1413,7 +1341,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8
(
beta_ptr
[
3
],
13
),
_mm_extract_epi8
(
beta_ptr
[
3
],
14
),
_mm_extract_epi8
(
beta_ptr
[
3
],
15
));
printf
(
"beta4 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta4 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
4
],
0
),
_mm_extract_epi8
(
beta_ptr
[
4
],
1
),
...
...
@@ -1431,7 +1359,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8
(
beta_ptr
[
4
],
13
),
_mm_extract_epi8
(
beta_ptr
[
4
],
14
),
_mm_extract_epi8
(
beta_ptr
[
4
],
15
));
printf
(
"beta5 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta5 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
5
],
0
),
_mm_extract_epi8
(
beta_ptr
[
5
],
1
),
...
...
@@ -1449,7 +1377,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8
(
beta_ptr
[
5
],
13
),
_mm_extract_epi8
(
beta_ptr
[
5
],
14
),
_mm_extract_epi8
(
beta_ptr
[
5
],
15
));
printf
(
"beta6 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta6 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
6
],
0
),
_mm_extract_epi8
(
beta_ptr
[
6
],
1
),
...
...
@@ -1467,7 +1395,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8
(
beta_ptr
[
6
],
13
),
_mm_extract_epi8
(
beta_ptr
[
6
],
14
),
_mm_extract_epi8
(
beta_ptr
[
6
],
15
));
printf
(
"beta7 %
d
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
printf
(
"beta7 %
u
: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d
\n
"
,
(
frame_length
>>
4
),
_mm_extract_epi8
(
beta_ptr
[
7
],
0
),
_mm_extract_epi8
(
beta_ptr
[
7
],
1
),
...
...
@@ -1491,9 +1419,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
__m128i
zeros
=
_mm_set1_epi8
(
0
);
for
(
k
=
(
frame_length
>>
4
)
-
1
;
k
>=
loopval
;
k
--
)
{
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
/*
if ((offset8_flag==1) && (k==((frame_length>>4)-9))) {
beta_ptr[0] = _mm_insert_epi8(beta_ptr[0],beta0,15);
...
...
@@ -1506,9 +1433,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15);
}*/
// print_bytes("m11:",&m11_128);
m_b0
=
_mm_adds_epi8
(
beta_ptr
[
4
],
m11_128
);
//m11
m_b1
=
_mm_subs_epi8
(
beta_ptr
[
4
],
m11_128
);
//m00
m_b2
=
_mm_subs_epi8
(
beta_ptr
[
5
],
m10_128
);
//m01
...
...
@@ -1517,7 +1441,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
m_b5
=
_mm_subs_epi8
(
beta_ptr
[
6
],
m10_128
);
//m01
m_b6
=
_mm_subs_epi8
(
beta_ptr
[
7
],
m11_128
);
//m00
m_b7
=
_mm_adds_epi8
(
beta_ptr
[
7
],
m11_128
);
//m11
new0
=
_mm_subs_epi8
(
beta_ptr
[
0
],
m11_128
);
//m00
new1
=
_mm_adds_epi8
(
beta_ptr
[
0
],
m11_128
);
//m11
new2
=
_mm_adds_epi8
(
beta_ptr
[
1
],
m10_128
);
//m10
...
...
@@ -1526,9 +1449,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
new5
=
_mm_adds_epi8
(
beta_ptr
[
2
],
m10_128
);
//m10
new6
=
_mm_adds_epi8
(
beta_ptr
[
3
],
m11_128
);
//m11
new7
=
_mm_subs_epi8
(
beta_ptr
[
3
],
m11_128
);
//m00
beta_ptr
-=
8
;
beta_ptr
[
0
]
=
_mm_max_epi8
(
m_b0
,
new0
);
beta_ptr
[
1
]
=
_mm_max_epi8
(
m_b1
,
new1
);
beta_ptr
[
2
]
=
_mm_max_epi8
(
m_b2
,
new2
);
...
...
@@ -1537,7 +1458,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr
[
5
]
=
_mm_max_epi8
(
m_b5
,
new5
);
beta_ptr
[
6
]
=
_mm_max_epi8
(
m_b6
,
new6
);
beta_ptr
[
7
]
=
_mm_max_epi8
(
m_b7
,
new7
);
beta_max
=
_mm_max_epi8
(
beta_ptr
[
0
],
beta_ptr
[
1
]);
beta_max
=
_mm_max_epi8
(
beta_max
,
beta_ptr
[
2
]);
beta_max
=
_mm_max_epi8
(
beta_max
,
beta_ptr
[
3
]);
...
...
@@ -1553,7 +1473,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr
[
5
]
=
_mm_subs_epi8
(
beta_ptr
[
5
],
beta_max
);
beta_ptr
[
6
]
=
_mm_subs_epi8
(
beta_ptr
[
6
],
beta_max
);
beta_ptr
[
7
]
=
_mm_subs_epi8
(
beta_ptr
[
7
],
beta_max
);
/*
printf("beta0 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
k,
...
...
@@ -1700,7 +1619,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[7],14),
_mm_extract_epi8(beta_ptr[7],15));
*/
}
#endif
...
...
@@ -1713,8 +1631,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_m_empty
();
}
void
compute_ext
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
llr_t
*
ext
,
llr_t
*
systematic
,
unsigned
short
frame_length
)
{
void
compute_ext
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
llr_t
*
ext
,
llr_t
*
systematic
,
unsigned
short
frame_length
)
{
__m128i
*
alpha128
=
(
__m128i
*
)
alpha
;
__m128i
*
beta128
=
(
__m128i
*
)
beta
;
__m128i
*
m11_128
,
*
m10_128
,
*
ext_128
;
...
...
@@ -1724,26 +1641,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
__m128i
m10_1
,
m10_2
,
m10_3
,
m10_4
;
__m128i
m11_1
,
m11_2
,
m11_3
,
m11_4
;
int
k
;
//
// LLR computation, 8 consequtive bits per loop
//
#ifdef DEBUG_LOGMAP
msg
(
"compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d
\n
"
,
alpha
,
beta
,
m_11
,
m_10
,
ext
,
systematic
,
frame_length
);
#endif
alpha_ptr
=
alpha128
;
beta_ptr
=
&
beta128
[
8
];
#ifndef LLR8
for
(
k
=
0
;
k
<
(
frame_length
>>
3
);
k
++
)
{
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
3
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
3
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
3
];
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
3
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
3
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
3
];
/*
printf("EXT %03d\n",k);
print_shorts("a0:",&alpha_ptr[0]);
...
...
@@ -1809,23 +1720,18 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
m11_1
=
_mm_max_epi16
(
m11_1
,
m11_2
);
m11_1
=
_mm_max_epi16
(
m11_1
,
m11_3
);
m11_1
=
_mm_max_epi16
(
m11_1
,
m11_4
);
// print_shorts("m11_1:",&m11_1);
m01_1
=
_mm_subs_epi16
(
m01_1
,
*
m10_128
);
m00_1
=
_mm_subs_epi16
(
m00_1
,
*
m11_128
);
m10_1
=
_mm_adds_epi16
(
m10_1
,
*
m10_128
);
m11_1
=
_mm_adds_epi16
(
m11_1
,
*
m11_128
);
// print_shorts("m10_1:",&m10_1);
// print_shorts("m11_1:",&m11_1);
m01_1
=
_mm_max_epi16
(
m01_1
,
m00_1
);
m10_1
=
_mm_max_epi16
(
m10_1
,
m11_1
);
// print_shorts("m01_1:",&m01_1);
// print_shorts("m10_1:",&m10_1);
*
ext_128
=
_mm_subs_epi16
(
m10_1
,
m01_1
);
/*
print_shorts("ext:",ext_128);
print_shorts("m11:",m11_128);
...
...
@@ -1834,7 +1740,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
print_shorts("m01_1:",&m01_1);
print_shorts("syst:",systematic_128);
*/
alpha_ptr
+=
8
;
beta_ptr
+=
8
;
}
...
...
@@ -1842,11 +1747,9 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
#else
for
(
k
=
0
;
k
<
(
frame_length
>>
4
);
k
++
)
{
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
4
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
4
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
4
];
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
4
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
4
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
4
];
m00_4
=
_mm_adds_epi8
(
alpha_ptr
[
7
],
beta_ptr
[
3
]);
//ALPHA_BETA_4m00;
m11_4
=
_mm_adds_epi8
(
alpha_ptr
[
7
],
beta_ptr
[
7
]);
//ALPHA_BETA_4m11;
m00_3
=
_mm_adds_epi8
(
alpha_ptr
[
6
],
beta_ptr
[
7
]);
//ALPHA_BETA_3m00;
...
...
@@ -1863,7 +1766,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
m10_2
=
_mm_adds_epi8
(
alpha_ptr
[
3
],
beta_ptr
[
5
]);
//ALPHA_BETA_2m10;
m10_1
=
_mm_adds_epi8
(
alpha_ptr
[
2
],
beta_ptr
[
1
]);
//ALPHA_BETA_1m10;
m01_1
=
_mm_adds_epi8
(
alpha_ptr
[
2
],
beta_ptr
[
5
]);
//ALPHA_BETA_1m01;
m01_1
=
_mm_max_epi8
(
m01_1
,
m01_2
);
m01_1
=
_mm_max_epi8
(
m01_1
,
m01_3
);
m01_1
=
_mm_max_epi8
(
m01_1
,
m01_4
);
...
...
@@ -1876,29 +1778,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
m11_1
=
_mm_max_epi8
(
m11_1
,
m11_2
);
m11_1
=
_mm_max_epi8
(
m11_1
,
m11_3
);
m11_1
=
_mm_max_epi8
(
m11_1
,
m11_4
);
m01_1
=
_mm_subs_epi8
(
m01_1
,
*
m10_128
);
m00_1
=
_mm_subs_epi8
(
m00_1
,
*
m11_128
);
m10_1
=
_mm_adds_epi8
(
m10_1
,
*
m10_128
);
m11_1
=
_mm_adds_epi8
(
m11_1
,
*
m11_128
);
m01_1
=
_mm_max_epi8
(
m01_1
,
m00_1
);
m10_1
=
_mm_max_epi8
(
m10_1
,
m11_1
);
*
ext_128
=
_mm_subs_epi8
(
m10_1
,
m01_1
);
alpha_ptr
+=
8
;
beta_ptr
+=
8
;
}
#endif
_mm_empty
();
_m_empty
();
}
...
...
@@ -1906,8 +1799,7 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
//int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8],
int
*
pi2tab
[
188
],
*
pi5tab
[
188
],
*
pi4tab
[
188
],
*
pi6tab
[
188
];
void
free_td
()
{
void
free_td
()
{
int
ind
;
for
(
ind
=
0
;
ind
<
188
;
ind
++
)
{
...
...
@@ -1918,21 +1810,17 @@ void free_td()
}
}
void
init_td
()
{
void
init_td
()
{
int
ind
,
i
,
i2
,
i3
,
j
,
n
,
n2
,
pi
,
pi3
;
short
*
base_interleaver
;
short
*
base_interleaver
;
for
(
ind
=
0
;
ind
<
188
;
ind
++
)
{
n
=
f1f2mat
[
ind
].
nb_bits
;
base_interleaver
=
il_tb
+
f1f2mat
[
ind
].
beg_index
;
pi2tab
[
ind
]
=
malloc
((
n
+
8
)
*
sizeof
(
int
));
pi5tab
[
ind
]
=
malloc
((
n
+
8
)
*
sizeof
(
int
));
pi4tab
[
ind
]
=
malloc
((
n
+
8
)
*
sizeof
(
int
));
pi6tab
[
ind
]
=
malloc
((
n
+
8
)
*
sizeof
(
int
));
#ifdef LLR8
if
((
n
&
15
)
>
0
)
{
...
...
@@ -1941,7 +1829,6 @@ void init_td()
n2
=
n
;
for
(
j
=
0
,
i
=
0
;
i
<
n2
;
i
++
,
j
+=
16
)
{
if
(
j
>=
n2
)
j
-=
(
n2
-
1
);
...
...
@@ -1956,10 +1843,8 @@ void init_td()
j
=
i2
;
for
(
i3
=
0
;
i3
<
(
n
>>
3
);
i3
++
,
i
++
,
j
+=
8
)
{
// if (j>=n)
// j-=(n-1);
pi2tab
[
ind
][
i
]
=
j
;
// printf("pi2[%d] = %d\n",i,j);
}
...
...
@@ -1967,7 +1852,6 @@ void init_td()
#endif
for
(
i
=
0
;
i
<
n2
;
i
++
)
{
pi
=
base_interleaver
[
i
];
//(unsigned int)threegpplte_interleaver(f1,f2,n);
pi3
=
pi2tab
[
ind
][
pi
];
...
...
@@ -1975,7 +1859,6 @@ void init_td()
pi5tab
[
ind
][
pi3
]
=
pi2tab
[
ind
][
i
];
pi6tab
[
ind
][
pi
]
=
pi2tab
[
ind
][
i
];
}
}
}
...
...
@@ -1991,33 +1874,25 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
time_stats_t
*
gamma_stats
,
time_stats_t
*
ext_stats
,
time_stats_t
*
intl1_stats
,
time_stats_t
*
intl2_stats
)
{
time_stats_t
*
intl2_stats
)
{
/* y is a pointer to the input
decoded_bytes is a pointer to the decoded output
n is the size in bits of the coded block, with the tail */
int
n2
;
#ifdef LLR8
llr_t
y8
[
3
*
(
n
+
16
)]
__attribute__
((
aligned
(
16
)));
#endif
llr_t
systematic0
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
systematic1
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
systematic2
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
yparity1
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
yparity2
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
ext
[
n
+
128
]
__attribute__
((
aligned
(
16
)));
llr_t
ext2
[
n
+
128
]
__attribute__
((
aligned
(
16
)));
llr_t
alpha
[(
n
+
16
)
*
8
]
__attribute__
((
aligned
(
16
)));
llr_t
beta
[(
n
+
16
)
*
8
]
__attribute__
((
aligned
(
16
)));
llr_t
m11
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
m10
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
int
*
pi2_p
,
*
pi4_p
,
*
pi5_p
,
*
pi6_p
;
llr_t
*
s
,
*
s1
,
*
s2
,
*
yp1
,
*
yp2
,
*
yp
;
__m128i
*
yp128
;
...
...
@@ -2026,12 +1901,10 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
unsigned
int
crc
,
oldcrc
,
crc_len
;
uint8_t
temp
;
__m128i
tmp128
[(
n
+
8
)
>>
3
];
__m128i
tmp
,
zeros
=
_mm_setzero_si128
();
#ifdef LLR8
__m128i
MAX128
=
_mm_set1_epi16
(
MAX
/
2
);
#endif
register
__m128i
tmpe
;
int
offset8_flag
=
0
;
...
...
@@ -2040,9 +1913,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
return
255
;
}
start_meas
(
init_stats
);
#ifdef LLR8
if
((
n
&
15
)
>
0
)
{
...
...
@@ -2087,199 +1958,154 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
//((__m128i *)y8)[i] = _mm_packs_epi16(((__m128i *)y)[j],((__m128i *)y)[j+1]);
}
yp128
=
(
__m128i
*
)
y8
;
yp128
=
(
__m128i
*
)
y8
;
#else
yp128
=
(
__m128i
*
)
y
;
yp128
=
(
__m128i
*
)
y
;
#endif
s
=
systematic0
;
s1
=
systematic1
;
s2
=
systematic2
;
yp1
=
yparity1
;
yp2
=
yparity2
;
#ifndef LLR8
for
(
i
=
0
;
i
<
n2
;
i
+=
8
)
{
pi2_p
=
&
pi2tab
[
iind
][
i
];
j
=
pi2_p
[
0
];
tmpe
=
_mm_load_si128
(
yp128
);
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
0
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
1
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
2
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
1
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
3
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
4
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
5
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
2
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
6
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
7
);
tmpe
=
_mm_load_si128
(
&
yp128
[
1
]);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
0
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
3
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
1
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
2
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
3
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
4
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
4
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
5
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
6
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
5
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
7
);
tmpe
=
_mm_load_si128
(
&
yp128
[
2
]);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
0
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
1
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
6
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
2
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
3
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
4
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
7
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
5
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
6
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
7
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
yp128
+=
3
;
}
#else
for
(
i
=
0
;
i
<
n2
;
i
+=
16
)
{
pi2_p
=
&
pi2tab
[
iind
][
i
];
j
=
pi2_p
[
0
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
0
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
1
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
2
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
1
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
3
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
4
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
5
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
2
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
6
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
7
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
8
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
3
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
9
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
10
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
11
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
4
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
12
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
13
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
14
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
5
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
0
],
15
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
0
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
1
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
6
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
2
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
3
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
4
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
7
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
5
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
6
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
7
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
8
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
8
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
9
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
10
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
9
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
11
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
12
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
13
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
10
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
14
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
1
],
15
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
0
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
11
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
1
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
2
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
3
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
12
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
4
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
5
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
6
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
13
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
7
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
8
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
9
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
14
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
10
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
11
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
12
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j
=
pi2_p
[
15
];
s
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
13
);
yp1
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
14
);
yp2
[
j
]
=
_mm_extract_epi8
(
yp128
[
2
],
15
);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
yp128
+=
3
;
}
#endif
yp
=
(
llr_t
*
)
yp128
;
yp
=
(
llr_t
*
)
yp128
;
#ifdef LLR8
if
(
n2
>
n
)
{
...
...
@@ -2290,7 +2116,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0;
s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0;
s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/
yp
=
(
llr_t
*
)(
y8
+
n
);
yp
=
(
llr_t
*
)(
y8
+
n
);
}
#endif
...
...
@@ -2341,68 +2167,55 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
#ifdef DEBUG_LOGMAP
msg
(
"
\n
"
);
#endif //DEBUG_LOGMAP
stop_meas
(
init_stats
);
// do log_map from first parity bit
log_map
(
systematic0
,
yparity1
,
m11
,
m10
,
alpha
,
beta
,
ext
,
n2
,
0
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
while
(
iteration_cnt
++
<
max_iterations
)
{
#ifdef DEBUG_LOGMAP
printf
(
"
\n
*******************ITERATION %d (n %d), ext %p
\n\n
"
,
iteration_cnt
,
n
,
ext
);
#endif //DEBUG_LOGMAP
start_meas
(
intl1_stats
);
#ifndef LLR8
pi4_p
=
pi4tab
[
iind
];
for
(
i
=
0
;
i
<
(
n2
>>
3
);
i
++
)
{
// steady-state portion
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
0
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
1
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
2
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
3
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
4
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
5
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
6
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
7
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
0
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
1
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
2
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
3
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
4
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
5
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
6
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],((
llr_t
*
)
ext
)[
*
pi4_p
++
],
7
);
}
#else
pi4_p
=
pi4tab
[
iind
];
for
(
i
=
0
;
i
<
(
n2
>>
4
);
i
++
)
{
// steady-state portion
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
0
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
1
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
2
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
3
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
4
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
5
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
6
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
7
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
8
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
9
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
10
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
11
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
12
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
13
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
14
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
15
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
0
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
1
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
2
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
3
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
4
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
5
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
6
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
7
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
8
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
9
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
10
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
11
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
12
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
13
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
14
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
15
);
}
#endif
stop_meas
(
intl1_stats
);
// do log_map from second parity bit
log_map
(
systematic2
,
yparity2
,
m11
,
m10
,
alpha
,
beta
,
ext2
,
n2
,
1
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
#ifndef LLR8
pi5_p
=
pi5tab
[
iind
];
...
...
@@ -2415,7 +2228,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
tmp
=
_mm_insert_epi16
(
tmp
,
ext2
[
*
pi5_p
++
],
5
);
tmp
=
_mm_insert_epi16
(
tmp
,
ext2
[
*
pi5_p
++
],
6
);
tmp
=
_mm_insert_epi16
(
tmp
,
ext2
[
*
pi5_p
++
],
7
);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi16
(
_mm_subs_epi16
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi16
(
_mm_subs_epi16
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
}
if
(
iteration_cnt
>
1
)
{
...
...
@@ -2423,17 +2236,16 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
pi6_p
=
pi6tab
[
iind
];
for
(
i
=
0
;
i
<
(
n2
>>
3
);
i
++
)
{
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
7
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
6
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
5
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
4
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
3
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
2
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
1
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
0
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
7
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
6
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
5
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
4
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
3
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
2
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
1
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
0
);
tmp
=
_mm_cmpgt_epi8
(
_mm_packs_epi16
(
tmp
,
zeros
),
zeros
);
decoded_bytes
[
i
]
=
(
unsigned
char
)
_mm_movemask_epi8
(
tmp
);
}
}
...
...
@@ -2460,8 +2272,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
tmp
=
_mm_insert_epi8
(
tmp
,
ext2
[
*
pi5_p
++
],
15
);
//decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros));
tmp128
[
i
]
=
_mm_adds_epi8
(((
__m128i
*
)
ext2
)[
i
],((
__m128i
*
)
systematic2
)[
i
]);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi8
(
_mm_subs_epi8
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi8
(
_mm_subs_epi8
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
}
/* LT modification, something wrong here
...
...
@@ -2526,7 +2337,6 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
oldcrc
=
*
((
unsigned
int
*
)(
&
decoded_bytes
[(
n
>>
3
)
-
crc_len
]));
switch
(
crc_type
)
{
case
CRC24_A
:
oldcrc
&=
0x00ffffff
;
crc
=
crc24a
(
&
decoded_bytes
[
F
>>
3
],
...
...
@@ -2573,9 +2383,9 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
// do log_map from first parity bit
if
(
iteration_cnt
<
max_iterations
)
{
log_map
(
systematic1
,
yparity1
,
m11
,
m10
,
alpha
,
beta
,
ext
,
n2
,
0
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
__m128i
*
ext_128
=
(
__m128i
*
)
ext
;
__m128i
*
s1_128
=
(
__m128i
*
)
systematic1
;
__m128i
*
s0_128
=
(
__m128i
*
)
systematic0
;
__m128i
*
ext_128
=
(
__m128i
*
)
ext
;
__m128i
*
s1_128
=
(
__m128i
*
)
systematic1
;
__m128i
*
s0_128
=
(
__m128i
*
)
systematic0
;
#ifndef LLR8
int
myloop
=
n2
>>
3
;
...
...
@@ -2601,27 +2411,21 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
#ifdef TEST_DEBUG
int
test_logmap8
()
{
int
test_logmap8
()
{
unsigned
char
test
[
8
];
//_declspec(align(16)) char channel_output[512];
//_declspec(align(16)) unsigned char output[512],decoded_output[16], *inPtr, *outPtr;
short
channel_output
[
512
];
unsigned
char
output
[
512
],
decoded_output
[
16
];
unsigned
int
i
,
crc
,
ret
;
test
[
0
]
=
7
;
test
[
1
]
=
0xa5
;
test
[
2
]
=
0x11
;
test
[
3
]
=
0x92
;
test
[
4
]
=
0xfe
;
crc
=
crc24a
(
test
,
40
)
>>
8
;
*
(
unsigned
int
*
)(
&
test
[
5
])
=
crc
;
*
(
unsigned
int
*
)(
&
test
[
5
])
=
crc
;
printf
(
"crc24 = %x
\n
"
,
crc
);
threegpplte_turbo_encoder
(
test
,
//input
8
,
//input length bytes
...
...
@@ -2646,20 +2450,15 @@ int test_logmap8()
0
,
// filler bits
0
);
// decoder instance
for
(
i
=
0
;
i
<
8
;
i
++
)
printf
(
"output %
d
=> %x (input %x)
\n
"
,
i
,
decoded_output
[
i
],
test
[
i
]);
printf
(
"output %
u
=> %x (input %x)
\n
"
,
i
,
decoded_output
[
i
],
test
[
i
]);
}
int
main
()
{
int
main
()
{
test_logmap8
();
return
(
0
);
}
...
...
openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
View file @
1cb484f1
...
...
@@ -41,53 +41,53 @@
#include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG
#include "PHY/impl_defs_top.h"
#include "PHY/defs_common.h"
#include "PHY/CODING/coding_defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#include "PHY/impl_defs_top.h"
#include "PHY/defs_common.h"
#include "PHY/CODING/coding_defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#else
#include "defs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "defs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#ifdef MEX
#include "mex.h"
#include "mex.h"
#endif
//#define DEBUG_LOGMAP
#ifdef DEBUG_LOGMAP
#define print_shorts(s,x) fprintf(fdsse4,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7])
#define print_shorts(s,x) fprintf(fdsse4,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7])
#endif
#undef __AVX2__
#ifdef DEBUG_LOGMAP
FILE
*
fdsse4
;
FILE
*
fdsse4
;
#endif
typedef
int16_t
llr_t
;
// internal decoder LLR data is 16-bit fixed
typedef
int16_t
channel_t
;
#define MAX 256
void
log_map16
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
void
log_map16
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
int
offset8_flag
,
time_stats_t
*
alpha_stats
,
time_stats_t
*
beta_stats
,
time_stats_t
*
gamma_stats
,
time_stats_t
*
ext_stats
);
void
compute_gamma16
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
);
void
compute_alpha16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
);
void
compute_beta16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
);
void
compute_ext16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
extrinsic
,
llr_t
*
ap
,
unsigned
short
frame_length
);
void
compute_gamma16
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
);
void
compute_alpha16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
);
void
compute_beta16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
);
void
compute_ext16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
extrinsic
,
llr_t
*
ap
,
unsigned
short
frame_length
);
void
log_map16
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
void
log_map16
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
...
...
@@ -95,13 +95,10 @@ void log_map16(llr_t* systematic,
time_stats_t
*
alpha_stats
,
time_stats_t
*
beta_stats
,
time_stats_t
*
gamma_stats
,
time_stats_t
*
ext_stats
)
{
time_stats_t
*
ext_stats
)
{
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"log_map, frame_length %d
\n
"
,
frame_length
);
#endif
start_meas
(
gamma_stats
)
;
compute_gamma16
(
m11
,
m10
,
systematic
,
y_parity
,
frame_length
,
term_flag
)
;
stop_meas
(
gamma_stats
);
...
...
@@ -114,13 +111,10 @@ void log_map16(llr_t* systematic,
start_meas
(
ext_stats
)
;
compute_ext16
(
alpha
,
beta
,
m11
,
m10
,
ext
,
systematic
,
frame_length
)
;
stop_meas
(
ext_stats
);
}
void
compute_gamma16
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
)
{
void
compute_gamma16
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
)
{
int
k
,
K1
;
#if defined(__x86_64__)||defined(__i386__)
__m128i
*
systematic128
=
(
__m128i
*
)
systematic
;
...
...
@@ -133,18 +127,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity
int16x8_t
*
m10_128
=
(
int16x8_t
*
)
m10
;
int16x8_t
*
m11_128
=
(
int16x8_t
*
)
m11
;
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"compute_gamma (sse_16bit), %p,%p,%p,%p,framelength %d
\n
"
,
m11
,
m10
,
systematic
,
y_parity
,
frame_length
);
#endif
#ifndef __AVX2__
K1
=
frame_length
>>
3
;
#else
if
((
frame_length
&
15
)
>
0
)
K1
=
(
frame_length
+
1
)
>>
4
;
else
K1
=
frame_length
>>
4
;
#endif
for
(
k
=
0
;
k
<
K1
;
k
++
)
{
...
...
@@ -153,21 +147,20 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity
m11_128
[
k
]
=
_mm_srai_epi16
(
_mm_adds_epi16
(
systematic128
[
k
],
y_parity128
[
k
]),
1
);
m10_128
[
k
]
=
_mm_srai_epi16
(
_mm_subs_epi16
(
systematic128
[
k
],
y_parity128
[
k
]),
1
);
#else
((
__m256i
*
)
m11_128
)[
k
]
=
_mm256_srai_epi16
(
_mm256_adds_epi16
(((
__m256i
*
)
systematic128
)[
k
],((
__m256i
*
)
y_parity128
)[
k
]),
1
);
((
__m256i
*
)
m11_128
)[
k
]
=
_mm256_srai_epi16
(
_mm256_adds_epi16
(((
__m256i
*
)
systematic128
)[
k
],((
__m256i
*
)
y_parity128
)[
k
]),
1
);
// ((__m256i*)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i*)y_parity128)[k],((__m256i*)systematic128)[k]),1);
((
__m256i
*
)
m10_128
)[
k
]
=
_mm256_srai_epi16
(
_mm256_subs_epi16
(((
__m256i
*
)
systematic128
)[
k
],((
__m256i
*
)
y_parity128
)[
k
]),
1
);
((
__m256i
*
)
m10_128
)[
k
]
=
_mm256_srai_epi16
(
_mm256_subs_epi16
(((
__m256i
*
)
systematic128
)[
k
],((
__m256i
*
)
y_parity128
)[
k
]),
1
);
#endif
#elif defined(__arm__)
m11_128
[
k
]
=
vhaddq_s16
(
systematic128
[
k
],
y_parity128
[
k
]);
m10_128
[
k
]
=
vhsubq_s16
(
systematic128
[
k
],
y_parity128
[
k
]);
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Loop index k %d
\n
"
,
k
);
print_shorts
(
"sys"
,(
int16_t
*
)
&
systematic128
[
k
]);
print_shorts
(
"yp"
,(
int16_t
*
)
&
y_parity128
[
k
]);
print_shorts
(
"m11"
,(
int16_t
*
)
&
m11_128
[
k
]);
print_shorts
(
"m10"
,(
int16_t
*
)
&
m10_128
[
k
]);
print_shorts
(
"sys"
,(
int16_t
*
)
&
systematic128
[
k
]);
print_shorts
(
"yp"
,(
int16_t
*
)
&
y_parity128
[
k
]);
print_shorts
(
"m11"
,(
int16_t
*
)
&
m11_128
[
k
]);
print_shorts
(
"m10"
,(
int16_t
*
)
&
m10_128
[
k
]);
#endif
}
...
...
@@ -185,20 +178,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity
m11_128
[
k
]
=
vhaddq_s16
(
systematic128
[
k
+
term_flag
],
y_parity128
[
k
]);
m10_128
[
k
]
=
vhsubq_s16
(
systematic128
[
k
+
term_flag
],
y_parity128
[
k
]);
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Loop index k %d (term flag %d)
\n
"
,
k
,
term_flag
);
print_shorts
(
"sys"
,(
int16_t
*
)
&
systematic128
[
k
]);
print_shorts
(
"yp"
,(
int16_t
*
)
&
y_parity128
[
k
]);
print_shorts
(
"m11"
,(
int16_t
*
)
&
m11_128
[
k
]);
print_shorts
(
"m10"
,(
int16_t
*
)
&
m10_128
[
k
]);
fprintf
(
fdsse4
,
"Loop index k %d (term flag %d)
\n
"
,
k
,
term_flag
);
print_shorts
(
"sys"
,(
int16_t
*
)
&
systematic128
[
k
]);
print_shorts
(
"yp"
,(
int16_t
*
)
&
y_parity128
[
k
]);
print_shorts
(
"m11"
,(
int16_t
*
)
&
m11_128
[
k
]);
print_shorts
(
"m10"
,(
int16_t
*
)
&
m10_128
[
k
]);
#endif
}
#define L 40
void
compute_alpha16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
)
{
void
compute_alpha16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
)
{
int
k
,
l
,
l2
,
K1
,
rerun_flag
=
0
;
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
alpha128
=
(
__m128i
*
)
alpha
,
*
alpha_ptr
,
*
m11p
,
*
m10p
;
...
...
@@ -215,7 +206,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
__m256i
m11m10_256
;
__m256i
alpha_max
;
#endif
#elif defined(__arm__)
int16x8_t
*
alpha128
=
(
int16x8_t
*
)
alpha
,
*
alpha_ptr
;
int16x8_t
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
,
*
m11p
,
*
m10p
;
...
...
@@ -228,6 +218,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"compute_alpha (sse_16bit)
\n
"
);
#endif
for
(
l
=
K1
;;
l
=
l2
,
rerun_flag
=
1
)
{
#if defined(__x86_64__) || defined(__i386__)
alpha128
=
(
__m128i
*
)
alpha
;
...
...
@@ -259,14 +250,14 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Initial alpha
\n
"
);
print_shorts
(
"a0"
,(
int16_t
*
)
&
alpha128
[
0
]);
print_shorts
(
"a1"
,(
int16_t
*
)
&
alpha128
[
1
]);
print_shorts
(
"a2"
,(
int16_t
*
)
&
alpha128
[
2
]);
print_shorts
(
"a3"
,(
int16_t
*
)
&
alpha128
[
3
]);
print_shorts
(
"a4"
,(
int16_t
*
)
&
alpha128
[
4
]);
print_shorts
(
"a5"
,(
int16_t
*
)
&
alpha128
[
5
]);
print_shorts
(
"a6"
,(
int16_t
*
)
&
alpha128
[
6
]);
print_shorts
(
"a7"
,(
int16_t
*
)
&
alpha128
[
7
]);
print_shorts
(
"a0"
,(
int16_t
*
)
&
alpha128
[
0
]);
print_shorts
(
"a1"
,(
int16_t
*
)
&
alpha128
[
1
]);
print_shorts
(
"a2"
,(
int16_t
*
)
&
alpha128
[
2
]);
print_shorts
(
"a3"
,(
int16_t
*
)
&
alpha128
[
3
]);
print_shorts
(
"a4"
,(
int16_t
*
)
&
alpha128
[
4
]);
print_shorts
(
"a5"
,(
int16_t
*
)
&
alpha128
[
5
]);
print_shorts
(
"a6"
,(
int16_t
*
)
&
alpha128
[
6
]);
print_shorts
(
"a7"
,(
int16_t
*
)
&
alpha128
[
7
]);
#endif
}
else
{
//set initial alpha in columns 1-7 from final alpha from last run in columns 0-6
...
...
@@ -280,14 +271,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha128
[
6
]
=
_mm_slli_si128
(
alpha128
[
6
+
frame_length
],
2
);
alpha128
[
7
]
=
_mm_slli_si128
(
alpha128
[
7
+
frame_length
],
2
);
#elif defined(__arm__)
alpha128
[
0
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
frame_length
],
16
);
alpha128
[
0
]
=
vsetq_lane_s16
(
alpha
[
8
],
alpha128
[
0
],
3
);
alpha128
[
1
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
1
+
frame_length
],
16
);
alpha128
[
1
]
=
vsetq_lane_s16
(
alpha
[
24
],
alpha128
[
0
],
3
);
alpha128
[
2
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
2
+
frame_length
],
16
);
alpha128
[
2
]
=
vsetq_lane_s16
(
alpha
[
40
],
alpha128
[
0
],
3
);
alpha128
[
3
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
3
+
frame_length
],
16
);
alpha128
[
3
]
=
vsetq_lane_s16
(
alpha
[
56
],
alpha128
[
0
],
3
);
alpha128
[
4
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
4
+
frame_length
],
16
);
alpha128
[
4
]
=
vsetq_lane_s16
(
alpha
[
72
],
alpha128
[
0
],
3
);
alpha128
[
5
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
5
+
frame_length
],
16
);
alpha128
[
5
]
=
vsetq_lane_s16
(
alpha
[
88
],
alpha128
[
0
],
3
);
alpha128
[
6
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
6
+
frame_length
],
16
);
alpha128
[
6
]
=
vsetq_lane_s16
(
alpha
[
104
],
alpha128
[
0
],
3
);
alpha128
[
7
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
7
+
frame_length
],
16
);
alpha128
[
7
]
=
vsetq_lane_s16
(
alpha
[
120
],
alpha128
[
0
],
3
);
alpha128
[
0
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
frame_length
],
16
);
alpha128
[
0
]
=
vsetq_lane_s16
(
alpha
[
8
],
alpha128
[
0
],
3
);
alpha128
[
1
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
1
+
frame_length
],
16
);
alpha128
[
1
]
=
vsetq_lane_s16
(
alpha
[
24
],
alpha128
[
0
],
3
);
alpha128
[
2
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
2
+
frame_length
],
16
);
alpha128
[
2
]
=
vsetq_lane_s16
(
alpha
[
40
],
alpha128
[
0
],
3
);
alpha128
[
3
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
3
+
frame_length
],
16
);
alpha128
[
3
]
=
vsetq_lane_s16
(
alpha
[
56
],
alpha128
[
0
],
3
);
alpha128
[
4
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
4
+
frame_length
],
16
);
alpha128
[
4
]
=
vsetq_lane_s16
(
alpha
[
72
],
alpha128
[
0
],
3
);
alpha128
[
5
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
5
+
frame_length
],
16
);
alpha128
[
5
]
=
vsetq_lane_s16
(
alpha
[
88
],
alpha128
[
0
],
3
);
alpha128
[
6
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
6
+
frame_length
],
16
);
alpha128
[
6
]
=
vsetq_lane_s16
(
alpha
[
104
],
alpha128
[
0
],
3
);
alpha128
[
7
]
=
(
int16x8_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
7
+
frame_length
],
16
);
alpha128
[
7
]
=
vsetq_lane_s16
(
alpha
[
120
],
alpha128
[
0
],
3
);
#endif
// set initial alpha in column 0 to (0,-MAX/2,...,-MAX/2)
alpha
[
8
]
=
-
MAX
/
2
;
...
...
@@ -299,31 +298,30 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha
[
56
]
=
-
MAX
/
2
;
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Second run
\n
"
);
print_shorts
(
"a0"
,(
int16_t
*
)
&
alpha128
[
0
]);
print_shorts
(
"a1"
,(
int16_t
*
)
&
alpha128
[
1
]);
print_shorts
(
"a2"
,(
int16_t
*
)
&
alpha128
[
2
]);
print_shorts
(
"a3"
,(
int16_t
*
)
&
alpha128
[
3
]);
print_shorts
(
"a4"
,(
int16_t
*
)
&
alpha128
[
4
]);
print_shorts
(
"a5"
,(
int16_t
*
)
&
alpha128
[
5
]);
print_shorts
(
"a6"
,(
int16_t
*
)
&
alpha128
[
6
]);
print_shorts
(
"a7"
,(
int16_t
*
)
&
alpha128
[
7
]);
print_shorts
(
"a0"
,(
int16_t
*
)
&
alpha128
[
0
]);
print_shorts
(
"a1"
,(
int16_t
*
)
&
alpha128
[
1
]);
print_shorts
(
"a2"
,(
int16_t
*
)
&
alpha128
[
2
]);
print_shorts
(
"a3"
,(
int16_t
*
)
&
alpha128
[
3
]);
print_shorts
(
"a4"
,(
int16_t
*
)
&
alpha128
[
4
]);
print_shorts
(
"a5"
,(
int16_t
*
)
&
alpha128
[
5
]);
print_shorts
(
"a6"
,(
int16_t
*
)
&
alpha128
[
6
]);
print_shorts
(
"a7"
,(
int16_t
*
)
&
alpha128
[
7
]);
#endif
}
alpha_ptr
=
&
alpha128
[
0
];
//#ifdef __AVX2__
#if defined(__x86_64__) || defined(__i386__)
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
#elif defined(__arm__)
m11p
=
(
int16x8_t
*
)
m_11
;
m10p
=
(
int16x8_t
*
)
m_10
;
m11p
=
(
int16x8_t
*
)
m_11
;
m10p
=
(
int16x8_t
*
)
m_10
;
#endif
for
(
k
=
0
;
k
<
l
;
k
++
)
{
#if defined(__x86_64__) || defined(__i386__)
//#ifndef __AVX2__
#if 1
...
...
@@ -331,7 +329,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a3
=
_mm_load_si128
(
&
alpha_ptr
[
3
]);
a5
=
_mm_load_si128
(
&
alpha_ptr
[
5
]);
a7
=
_mm_load_si128
(
&
alpha_ptr
[
7
]);
m_b0
=
_mm_adds_epi16
(
a1
,
*
m11p
);
// m11
m_b4
=
_mm_subs_epi16
(
a1
,
*
m11p
);
// m00=-m11
m_b1
=
_mm_subs_epi16
(
a3
,
*
m10p
);
// m01=-m10
...
...
@@ -340,12 +337,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
m_b6
=
_mm_subs_epi16
(
a5
,
*
m10p
);
// m01=-m10
m_b3
=
_mm_subs_epi16
(
a7
,
*
m11p
);
// m00=-m11
m_b7
=
_mm_adds_epi16
(
a7
,
*
m11p
);
// m11
a0
=
_mm_load_si128
(
&
alpha_ptr
[
0
]);
a2
=
_mm_load_si128
(
&
alpha_ptr
[
2
]);
a4
=
_mm_load_si128
(
&
alpha_ptr
[
4
]);
a6
=
_mm_load_si128
(
&
alpha_ptr
[
6
]);
new0
=
_mm_subs_epi16
(
a0
,
*
m11p
);
// m00=-m11
new4
=
_mm_adds_epi16
(
a0
,
*
m11p
);
// m11
new1
=
_mm_adds_epi16
(
a2
,
*
m10p
);
// m10
...
...
@@ -354,7 +349,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
new6
=
_mm_adds_epi16
(
a4
,
*
m10p
);
// m10
new3
=
_mm_adds_epi16
(
a6
,
*
m11p
);
// m11
new7
=
_mm_subs_epi16
(
a6
,
*
m11p
);
// m00=-m11
a0
=
_mm_max_epi16
(
m_b0
,
new0
);
a1
=
_mm_max_epi16
(
m_b1
,
new1
);
a2
=
_mm_max_epi16
(
m_b2
,
new2
);
...
...
@@ -363,7 +357,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a5
=
_mm_max_epi16
(
m_b5
,
new5
);
a6
=
_mm_max_epi16
(
m_b6
,
new6
);
a7
=
_mm_max_epi16
(
m_b7
,
new7
);
alpha_max
=
_mm_max_epi16
(
a0
,
a1
);
alpha_max
=
_mm_max_epi16
(
alpha_max
,
a2
);
alpha_max
=
_mm_max_epi16
(
alpha_max
,
a3
);
...
...
@@ -378,29 +371,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a75
=
_mm256_load_si256
(
&
alpha_ptr256
[
3
]);
m11m10_256
=
_mm256_insertf128_si256
(
m11m10_256
,
*
m11p
,
0
);
m11m10_256
=
_mm256_insertf128_si256
(
m11m10_256
,
*
m10p
,
1
);
m_b01
=
_mm256_adds_epi16
(
a13
,
m11m10_256
);
//negative m10
m_b23
=
_mm256_subs_epi16
(
a75
,
m11m10_256
);
//negative m10
m_b45
=
_mm256_subs_epi16
(
a13
,
m11m10_256
);
//negative m10
m_b67
=
_mm256_adds_epi16
(
a75
,
m11m10_256
);
//negative m10
new01
=
_mm256_subs_epi16
(
a02
,
m11m10_256
);
//negative m10
new23
=
_mm256_adds_epi16
(
a64
,
m11m10_256
);
//negative m10
new45
=
_mm256_adds_epi16
(
a02
,
m11m10_256
);
//negative m10
new67
=
_mm256_subs_epi16
(
a64
,
m11m10_256
);
//negative m10
a01
=
_mm256_max_epi16
(
m_b01
,
new01
);
a23
=
_mm256_max_epi16
(
m_b23
,
new23
);
a45
=
_mm256_max_epi16
(
m_b45
,
new45
);
a67
=
_mm256_max_epi16
(
m_b67
,
new67
);
alpha_max
=
_mm256_max_epi16
(
a01
,
a23
);
alpha_max
=
_mm256_max_epi16
(
alpha_max
,
a45
);
alpha_max
=
_mm256_max_epi16
(
alpha_max
,
a67
);
alpha_max
=
_mm256_max_epi16
(
alpha_max
,
_mm256_permutevar8x32_epi32
(
alpha_max
,
_mm256_set_epi32
(
3
,
2
,
1
,
0
,
7
,
6
,
5
,
4
)));
#endif
#elif defined(__arm__)
m_b0
=
vqaddq_s16
(
alpha_ptr
[
1
],
*
m11p
);
// m11
...
...
@@ -411,7 +397,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
m_b6
=
vqsubq_s16
(
alpha_ptr
[
5
],
*
m10p
);
// m01=-m10
m_b3
=
vqsubq_s16
(
alpha_ptr
[
7
],
*
m11p
);
// m00=-m11
m_b7
=
vqaddq_s16
(
alpha_ptr
[
7
],
*
m11p
);
// m11
new0
=
vqsubq_s16
(
alpha_ptr
[
0
],
*
m11p
);
// m00=-m11
new4
=
vqaddq_s16
(
alpha_ptr
[
0
],
*
m11p
);
// m11
new1
=
vqaddq_s16
(
alpha_ptr
[
2
],
*
m10p
);
// m10
...
...
@@ -428,7 +413,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a5
=
vmaxq_s16
(
m_b5
,
new5
);
a6
=
vmaxq_s16
(
m_b6
,
new6
);
a7
=
vmaxq_s16
(
m_b7
,
new7
);
// compute and subtract maxima
alpha_max
=
vmaxq_s16
(
a0
,
a1
);
alpha_max
=
vmaxq_s16
(
alpha_max
,
a2
);
...
...
@@ -437,9 +421,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha_max
=
vmaxq_s16
(
alpha_max
,
a5
);
alpha_max
=
vmaxq_s16
(
alpha_max
,
a6
);
alpha_max
=
vmaxq_s16
(
alpha_max
,
a7
);
#endif
alpha_ptr
+=
8
;
//#ifdef __AVX2__
m11p
++
;
...
...
@@ -456,12 +438,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha_ptr
[
6
]
=
_mm_subs_epi16
(
a6
,
alpha_max
);
alpha_ptr
[
7
]
=
_mm_subs_epi16
(
a7
,
alpha_max
);
#else
a01
=
_mm256_subs_epi16
(
a01
,
alpha_max
);
a23
=
_mm256_subs_epi16
(
a23
,
alpha_max
);
a45
=
_mm256_subs_epi16
(
a45
,
alpha_max
);
a67
=
_mm256_subs_epi16
(
a67
,
alpha_max
);
alpha_ptr256
[
0
]
=
_mm256_permute2x128_si256
(
a01
,
a23
,
0x20
);
//a02
alpha_ptr256
[
1
]
=
_mm256_permute2x128_si256
(
a01
,
a23
,
0x13
);
//a13
alpha_ptr256
[
2
]
=
_mm256_permute2x128_si256
(
a45
,
a67
,
0x02
);
//a64
...
...
@@ -477,49 +457,44 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha_ptr
[
6
]
=
vqsubq_s16
(
a6
,
alpha_max
);
alpha_ptr
[
7
]
=
vqsubq_s16
(
a7
,
alpha_max
);
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Loop index %d
\n
"
,
k
);
print_shorts
(
"mb0"
,(
int16_t
*
)
&
m_b0
);
print_shorts
(
"mb1"
,(
int16_t
*
)
&
m_b1
);
print_shorts
(
"mb2"
,(
int16_t
*
)
&
m_b2
);
print_shorts
(
"mb3"
,(
int16_t
*
)
&
m_b3
);
print_shorts
(
"mb4"
,(
int16_t
*
)
&
m_b4
);
print_shorts
(
"mb5"
,(
int16_t
*
)
&
m_b5
);
print_shorts
(
"mb6"
,(
int16_t
*
)
&
m_b6
);
print_shorts
(
"mb7"
,(
int16_t
*
)
&
m_b7
);
print_shorts
(
"mb0"
,(
int16_t
*
)
&
m_b0
);
print_shorts
(
"mb1"
,(
int16_t
*
)
&
m_b1
);
print_shorts
(
"mb2"
,(
int16_t
*
)
&
m_b2
);
print_shorts
(
"mb3"
,(
int16_t
*
)
&
m_b3
);
print_shorts
(
"mb4"
,(
int16_t
*
)
&
m_b4
);
print_shorts
(
"mb5"
,(
int16_t
*
)
&
m_b5
);
print_shorts
(
"mb6"
,(
int16_t
*
)
&
m_b6
);
print_shorts
(
"mb7"
,(
int16_t
*
)
&
m_b7
);
fprintf
(
fdsse4
,
"Loop index %d, new
\n
"
,
k
);
print_shorts
(
"new0"
,(
int16_t
*
)
&
new0
);
print_shorts
(
"new1"
,(
int16_t
*
)
&
new1
);
print_shorts
(
"new2"
,(
int16_t
*
)
&
new2
);
print_shorts
(
"new3"
,(
int16_t
*
)
&
new3
);
print_shorts
(
"new4"
,(
int16_t
*
)
&
new4
);
print_shorts
(
"new5"
,(
int16_t
*
)
&
new5
);
print_shorts
(
"new6"
,(
int16_t
*
)
&
new6
);
print_shorts
(
"new7"
,(
int16_t
*
)
&
new7
);
print_shorts
(
"new0"
,(
int16_t
*
)
&
new0
);
print_shorts
(
"new1"
,(
int16_t
*
)
&
new1
);
print_shorts
(
"new2"
,(
int16_t
*
)
&
new2
);
print_shorts
(
"new3"
,(
int16_t
*
)
&
new3
);
print_shorts
(
"new4"
,(
int16_t
*
)
&
new4
);
print_shorts
(
"new5"
,(
int16_t
*
)
&
new5
);
print_shorts
(
"new6"
,(
int16_t
*
)
&
new6
);
print_shorts
(
"new7"
,(
int16_t
*
)
&
new7
);
fprintf
(
fdsse4
,
"Loop index %d, after max
\n
"
,
k
);
print_shorts
(
"a0"
,(
int16_t
*
)
&
a0
);
print_shorts
(
"a1"
,(
int16_t
*
)
&
a1
);
print_shorts
(
"a2"
,(
int16_t
*
)
&
a2
);
print_shorts
(
"a3"
,(
int16_t
*
)
&
a3
);
print_shorts
(
"a4"
,(
int16_t
*
)
&
a4
);
print_shorts
(
"a5"
,(
int16_t
*
)
&
a5
);
print_shorts
(
"a6"
,(
int16_t
*
)
&
a6
);
print_shorts
(
"a7"
,(
int16_t
*
)
&
a7
);
print_shorts
(
"a0"
,(
int16_t
*
)
&
a0
);
print_shorts
(
"a1"
,(
int16_t
*
)
&
a1
);
print_shorts
(
"a2"
,(
int16_t
*
)
&
a2
);
print_shorts
(
"a3"
,(
int16_t
*
)
&
a3
);
print_shorts
(
"a4"
,(
int16_t
*
)
&
a4
);
print_shorts
(
"a5"
,(
int16_t
*
)
&
a5
);
print_shorts
(
"a6"
,(
int16_t
*
)
&
a6
);
print_shorts
(
"a7"
,(
int16_t
*
)
&
a7
);
fprintf
(
fdsse4
,
"Loop index %d
\n
"
,
k
);
print_shorts
(
"a0"
,(
int16_t
*
)
&
alpha_ptr
[
0
]);
print_shorts
(
"a1"
,(
int16_t
*
)
&
alpha_ptr
[
1
]);
print_shorts
(
"a2"
,(
int16_t
*
)
&
alpha_ptr
[
2
]);
print_shorts
(
"a3"
,(
int16_t
*
)
&
alpha_ptr
[
3
]);
print_shorts
(
"a4"
,(
int16_t
*
)
&
alpha_ptr
[
4
]);
print_shorts
(
"a5"
,(
int16_t
*
)
&
alpha_ptr
[
5
]);
print_shorts
(
"a6"
,(
int16_t
*
)
&
alpha_ptr
[
6
]);
print_shorts
(
"a7"
,(
int16_t
*
)
&
alpha_ptr
[
7
]);
print_shorts
(
"a0"
,(
int16_t
*
)
&
alpha_ptr
[
0
]);
print_shorts
(
"a1"
,(
int16_t
*
)
&
alpha_ptr
[
1
]);
print_shorts
(
"a2"
,(
int16_t
*
)
&
alpha_ptr
[
2
]);
print_shorts
(
"a3"
,(
int16_t
*
)
&
alpha_ptr
[
3
]);
print_shorts
(
"a4"
,(
int16_t
*
)
&
alpha_ptr
[
4
]);
print_shorts
(
"a5"
,(
int16_t
*
)
&
alpha_ptr
[
5
]);
print_shorts
(
"a6"
,(
int16_t
*
)
&
alpha_ptr
[
6
]);
print_shorts
(
"a7"
,(
int16_t
*
)
&
alpha_ptr
[
7
]);
#endif
}
if
(
rerun_flag
==
1
)
...
...
@@ -528,37 +503,28 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
}
void
compute_beta16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
)
{
void
compute_beta16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
)
{
int
k
,
rerun_flag
=
0
;
#if defined(__x86_64__) || defined(__i386__)
__m128i
m11_128
,
m10_128
;
__m128i
m_b0
,
m_b1
,
m_b2
,
m_b3
,
m_b4
,
m_b5
,
m_b6
,
m_b7
;
__m128i
new0
,
new1
,
new2
,
new3
,
new4
,
new5
,
new6
,
new7
;
__m128i
*
beta128
,
*
alpha128
,
*
beta_ptr
;
__m128i
beta_max
;
#elif defined(__arm__)
int16x8_t
m11_128
,
m10_128
;
int16x8_t
m_b0
,
m_b1
,
m_b2
,
m_b3
,
m_b4
,
m_b5
,
m_b6
,
m_b7
;
int16x8_t
new0
,
new1
,
new2
,
new3
,
new4
,
new5
,
new6
,
new7
;
int16x8_t
*
beta128
,
*
alpha128
,
*
beta_ptr
;
int16x8_t
beta_max
;
#endif
int16_t
m11
,
m10
,
beta0_16
,
beta1_16
,
beta2_16
,
beta3_16
,
beta4_16
,
beta5_16
,
beta6_16
,
beta7_16
,
beta0_2
,
beta1_2
,
beta2_2
,
beta3_2
,
beta_m
;
llr_t
beta0
,
beta1
;
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"compute_beta, %p,%p,%p,%p,framelength %d,F %d
\n
"
,
beta
,
m_11
,
m_10
,
alpha
,
frame_length
,
F
);
#endif
// termination for beta initialization
// fprintf(fdsse4,"beta init: offset8 %d\n",offset8_flag);
m11
=
(
int16_t
)
m_11
[
2
+
frame_length
];
//#ifndef __AVX2__
...
...
@@ -570,16 +536,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"m11,m10 %d,%d
\n
"
,
m11
,
m10
);
#endif
beta0
=
-
m11
;
//M0T_TERM;
beta1
=
m11
;
//M1T_TERM;
m11
=
(
int16_t
)
m_11
[
1
+
frame_length
];
m10
=
(
int16_t
)
m_10
[
1
+
frame_length
];
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"m11,m10 %d,%d
\n
"
,
m11
,
m10
);
#endif
beta0_2
=
beta0
-
m11
;
//+M0T_TERM;
beta1_2
=
beta0
+
m11
;
//+M1T_TERM;
beta2_2
=
beta1
+
m10
;
//M2T_TERM;
...
...
@@ -597,8 +560,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta5_16
=
beta2_2
+
m10
;
//+M5T_TERM;
beta6_16
=
beta3_2
+
m11
;
//+M6T_TERM;
beta7_16
=
beta3_2
-
m11
;
//+M7T_TERM;
beta_m
=
(
beta0_16
>
beta1_16
)
?
beta0_16
:
beta1_16
;
beta_m
=
(
beta_m
>
beta2_16
)
?
beta_m
:
beta2_16
;
beta_m
=
(
beta_m
>
beta3_16
)
?
beta_m
:
beta3_16
;
...
...
@@ -606,8 +567,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_m
=
(
beta_m
>
beta5_16
)
?
beta_m
:
beta5_16
;
beta_m
=
(
beta_m
>
beta6_16
)
?
beta_m
:
beta6_16
;
beta_m
=
(
beta_m
>
beta7_16
)
?
beta_m
:
beta7_16
;
beta0_16
=
beta0_16
-
beta_m
;
beta1_16
=
beta1_16
-
beta_m
;
beta2_16
=
beta2_16
-
beta_m
;
...
...
@@ -619,12 +578,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
for
(
rerun_flag
=
0
;;
rerun_flag
=
1
)
{
#if defined(__x86_64__) || defined(__i386__)
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
__m128i
*
)
&
alpha
[
0
];
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
__m128i
*
)
&
alpha
[
0
];
#elif defined(__arm__)
beta_ptr
=
(
int16x8_t
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
int16x8_t
*
)
&
alpha
[
0
];
beta_ptr
=
(
int16x8_t
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
int16x8_t
*
)
&
alpha
[
0
];
#endif
if
(
rerun_flag
==
0
)
{
beta_ptr
[
0
]
=
alpha128
[(
frame_length
)];
beta_ptr
[
1
]
=
alpha128
[
1
+
(
frame_length
)];
...
...
@@ -636,18 +596,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr
[
7
]
=
alpha128
[
7
+
(
frame_length
)];
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"beta init
\n
"
);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
#endif
}
else
{
#if defined(__x86_64__) || defined(__i386__)
beta128
=
(
__m128i
*
)
&
beta
[
0
];
beta128
=
(
__m128i
*
)
&
beta
[
0
];
beta_ptr
[
0
]
=
_mm_srli_si128
(
beta128
[
0
],
2
);
beta_ptr
[
1
]
=
_mm_srli_si128
(
beta128
[
1
],
2
);
beta_ptr
[
2
]
=
_mm_srli_si128
(
beta128
[
2
],
2
);
...
...
@@ -657,31 +617,38 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr
[
6
]
=
_mm_srli_si128
(
beta128
[
6
],
2
);
beta_ptr
[
7
]
=
_mm_srli_si128
(
beta128
[
7
],
2
);
#elif defined(__arm__)
beta128
=
(
int16x8_t
*
)
&
beta
[
0
];
beta_ptr
=
(
int16x8_t
*
)
&
beta
[
frame_length
<<
3
];
beta_ptr
[
0
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
0
],
16
);
beta_ptr
[
0
]
=
vsetq_lane_s16
(
beta
[
3
],
beta_ptr
[
0
],
4
);
beta_ptr
[
1
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
1
],
16
);
beta_ptr
[
1
]
=
vsetq_lane_s16
(
beta
[
11
],
beta_ptr
[
1
],
4
);
beta_ptr
[
2
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
2
],
16
);
beta_ptr
[
2
]
=
vsetq_lane_s16
(
beta
[
19
],
beta_ptr
[
2
],
4
);
beta_ptr
[
3
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
3
],
16
);
beta_ptr
[
3
]
=
vsetq_lane_s16
(
beta
[
27
],
beta_ptr
[
3
],
4
);
beta_ptr
[
4
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
4
],
16
);
beta_ptr
[
4
]
=
vsetq_lane_s16
(
beta
[
35
],
beta_ptr
[
4
],
4
);
beta_ptr
[
5
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
5
],
16
);
beta_ptr
[
5
]
=
vsetq_lane_s16
(
beta
[
43
],
beta_ptr
[
5
],
4
);
beta_ptr
[
6
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
6
],
16
);
beta_ptr
[
6
]
=
vsetq_lane_s16
(
beta
[
51
],
beta_ptr
[
6
],
4
);
beta_ptr
[
7
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
7
],
16
);
beta_ptr
[
7
]
=
vsetq_lane_s16
(
beta
[
59
],
beta_ptr
[
7
],
4
);
beta128
=
(
int16x8_t
*
)
&
beta
[
0
];
beta_ptr
=
(
int16x8_t
*
)
&
beta
[
frame_length
<<
3
];
beta_ptr
[
0
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
0
],
16
);
beta_ptr
[
0
]
=
vsetq_lane_s16
(
beta
[
3
],
beta_ptr
[
0
],
4
);
beta_ptr
[
1
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
1
],
16
);
beta_ptr
[
1
]
=
vsetq_lane_s16
(
beta
[
11
],
beta_ptr
[
1
],
4
);
beta_ptr
[
2
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
2
],
16
);
beta_ptr
[
2
]
=
vsetq_lane_s16
(
beta
[
19
],
beta_ptr
[
2
],
4
);
beta_ptr
[
3
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
3
],
16
);
beta_ptr
[
3
]
=
vsetq_lane_s16
(
beta
[
27
],
beta_ptr
[
3
],
4
);
beta_ptr
[
4
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
4
],
16
);
beta_ptr
[
4
]
=
vsetq_lane_s16
(
beta
[
35
],
beta_ptr
[
4
],
4
);
beta_ptr
[
5
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
5
],
16
);
beta_ptr
[
5
]
=
vsetq_lane_s16
(
beta
[
43
],
beta_ptr
[
5
],
4
);
beta_ptr
[
6
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
6
],
16
);
beta_ptr
[
6
]
=
vsetq_lane_s16
(
beta
[
51
],
beta_ptr
[
6
],
4
);
beta_ptr
[
7
]
=
(
int16x8_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
7
],
16
);
beta_ptr
[
7
]
=
vsetq_lane_s16
(
beta
[
59
],
beta_ptr
[
7
],
4
);
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"beta init (second run)
\n
"
);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
#endif
}
#if defined(__x86_64__) || defined(__i386__)
beta_ptr
[
0
]
=
_mm_insert_epi16
(
beta_ptr
[
0
],
beta0_16
,
7
);
beta_ptr
[
1
]
=
_mm_insert_epi16
(
beta_ptr
[
1
],
beta1_16
,
7
);
...
...
@@ -701,26 +668,23 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr
[
6
]
=
vsetq_lane_s16
(
beta6_16
,
beta_ptr
[
6
],
7
);
beta_ptr
[
7
]
=
vsetq_lane_s16
(
beta7_16
,
beta_ptr
[
7
],
7
);
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"beta init (after insert)
\n
"
);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
#endif
int
loopval
=
((
rerun_flag
==
0
)
?
0
:
((
frame_length
-
L
)
>>
3
));
for
(
k
=
(
frame_length
>>
3
)
-
1
;
k
>=
loopval
;
k
--
)
{
#if defined(__x86_64__) || defined(__i386__)
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
//#ifndef __AVX2__
#if 1
m_b0
=
_mm_adds_epi16
(
beta_ptr
[
4
],
m11_128
);
//m11
...
...
@@ -731,8 +695,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
m_b5
=
_mm_subs_epi16
(
beta_ptr
[
6
],
m10_128
);
//m01
m_b6
=
_mm_subs_epi16
(
beta_ptr
[
7
],
m11_128
);
//m00
m_b7
=
_mm_adds_epi16
(
beta_ptr
[
7
],
m11_128
);
//m11
new0
=
_mm_subs_epi16
(
beta_ptr
[
0
],
m11_128
);
//m00
new1
=
_mm_adds_epi16
(
beta_ptr
[
0
],
m11_128
);
//m11
new2
=
_mm_adds_epi16
(
beta_ptr
[
1
],
m10_128
);
//m10
...
...
@@ -741,16 +703,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
new5
=
_mm_adds_epi16
(
beta_ptr
[
2
],
m10_128
);
//m10
new6
=
_mm_adds_epi16
(
beta_ptr
[
3
],
m11_128
);
//m11
new7
=
_mm_subs_epi16
(
beta_ptr
[
3
],
m11_128
);
//m00
#else
b01
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
0
]);
b23
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
1
]);
b45
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
2
]);
b67
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
3
]);
b01
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
0
]);
b23
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
1
]);
b45
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
2
]);
b67
=
_mm256_load_si256
(
&
((
_m256i
*
)
beta_ptr
)[
3
]);
m11m10_256
=
_mm256_insertf128_si256
(
m11m10_256
,
m11_128
,
0
);
m11m10_256
=
_mm256_insertf128_si256
(
m11m10_256
,
m10_128
,
1
);
m_b02
=
_mm256_adds_epi16
(
b45
,
m11m10_256
);
//negative m10
m_b13
=
_mm256_subs_epi16
(
b45
,
m11m10_256
);
//negative m10
m_b64
=
_mm256_subs_epi16
(
b67
,
m11m10_256
);
//negative m10
...
...
@@ -760,9 +719,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
new64
=
_mm256_adds_epi16
(
b23
,
m11m10_256
);
//negative m10
new75
=
_mm256_subs_epi16
(
b24
,
m11m10_256
);
//negative m10
#endif
beta_ptr
-=
8
;
//#ifndef __AVX2__
#if 1
beta_ptr
[
0
]
=
_mm_max_epi16
(
m_b0
,
new0
);
...
...
@@ -773,7 +730,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr
[
5
]
=
_mm_max_epi16
(
m_b5
,
new5
);
beta_ptr
[
6
]
=
_mm_max_epi16
(
m_b6
,
new6
);
beta_ptr
[
7
]
=
_mm_max_epi16
(
m_b7
,
new7
);
beta_max
=
_mm_max_epi16
(
beta_ptr
[
0
],
beta_ptr
[
1
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
2
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
3
]);
...
...
@@ -781,7 +737,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
5
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
6
]);
beta_max
=
_mm_max_epi16
(
beta_max
,
beta_ptr
[
7
]);
beta_ptr
[
0
]
=
_mm_subs_epi16
(
beta_ptr
[
0
],
beta_max
);
beta_ptr
[
1
]
=
_mm_subs_epi16
(
beta_ptr
[
1
],
beta_max
);
beta_ptr
[
2
]
=
_mm_subs_epi16
(
beta_ptr
[
2
],
beta_max
);
...
...
@@ -795,26 +750,22 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
b13
=
_mm256_max_epi16
(
m_b13
,
new13
);
b64
=
_mm256_max_epi16
(
m_b64
,
new64
);
b75
=
_mm256_max_epi16
(
m_b75
,
new75
);
beta_max
=
_mm256_max_epi16
(
b02
,
b13
);
beta_max
=
_mm256_max_epi16
(
beta_max
,
b64
);
beta_max
=
_mm256_max_epi16
(
beta_max
,
b75
);
beta_max
=
_mm256_max_epi16
(
beta_max
,
_mm256_permutevar8x32_epi32
(
betaa_max
,
_mm256_set_epi32
(
3
,
2
,
1
,
0
,
7
,
6
,
5
,
4
)));
b02
=
_mm256_subs_epi16
(
b02
,
beta_max
);
b13
=
_mm256_subs_epi16
(
b13
,
beta_max
);
b64
=
_mm256_subs_epi16
(
b64
,
beta_max
);
b75
=
_mm256_subs_epi16
(
b75
,
beta_max
);
((
_m256i
*
)
beta_ptr
)[
0
])
=
_mm256_permute2x128_si256
(
b02
,
b13
,
0x02
);
//b01
((
_m256i
*
)
beta_ptr
)[
1
])
=
_mm256_permute2x128_si256
(
b02
,
b13
,
0x31
);
//b23
((
_m256i
*
)
beta_ptr
)[
2
])
=
_mm256_permute2x128_si256
(
b64
,
b75
,
0x13
);
//b45
((
_m256i
*
)
beta_ptr
)[
3
])
=
_mm256_permute2x128_si256
(
b64
,
b75
,
0x20
);
//b67
((
_m256i
*
)
beta_ptr
)[
0
])
=
_mm256_permute2x128_si256
(
b02
,
b13
,
0x02
);
//b01
((
_m256i
*
)
beta_ptr
)[
1
])
=
_mm256_permute2x128_si256
(
b02
,
b13
,
0x31
);
//b23
((
_m256i
*
)
beta_ptr
)[
2
])
=
_mm256_permute2x128_si256
(
b64
,
b75
,
0x13
);
//b45
((
_m256i
*
)
beta_ptr
)[
3
])
=
_mm256_permute2x128_si256
(
b64
,
b75
,
0x20
);
//b67
#endif
#elif defined(__arm__)
m11_128
=
((
int16x8_t
*
)
m_11
)[
k
];
m10_128
=
((
int16x8_t
*
)
m_10
)[
k
];
m11_128
=
((
int16x8_t
*
)
m_11
)[
k
];
m10_128
=
((
int16x8_t
*
)
m_10
)[
k
];
m_b0
=
vqaddq_s16
(
beta_ptr
[
4
],
m11_128
);
//m11
m_b1
=
vqsubq_s16
(
beta_ptr
[
4
],
m11_128
);
//m00
m_b2
=
vqsubq_s16
(
beta_ptr
[
5
],
m10_128
);
//m01
...
...
@@ -823,7 +774,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
m_b5
=
vqsubq_s16
(
beta_ptr
[
6
],
m10_128
);
//m01
m_b6
=
vqsubq_s16
(
beta_ptr
[
7
],
m11_128
);
//m00
m_b7
=
vqaddq_s16
(
beta_ptr
[
7
],
m11_128
);
//m11
new0
=
vqsubq_s16
(
beta_ptr
[
0
],
m11_128
);
//m00
new1
=
vqaddq_s16
(
beta_ptr
[
0
],
m11_128
);
//m11
new2
=
vqaddq_s16
(
beta_ptr
[
1
],
m10_128
);
//m10
...
...
@@ -832,9 +782,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
new5
=
vqaddq_s16
(
beta_ptr
[
2
],
m10_128
);
//m10
new6
=
vqaddq_s16
(
beta_ptr
[
3
],
m11_128
);
//m11
new7
=
vqsubq_s16
(
beta_ptr
[
3
],
m11_128
);
//m00
beta_ptr
-=
8
;
beta_ptr
[
0
]
=
vmaxq_s16
(
m_b0
,
new0
);
beta_ptr
[
1
]
=
vmaxq_s16
(
m_b1
,
new1
);
beta_ptr
[
2
]
=
vmaxq_s16
(
m_b2
,
new2
);
...
...
@@ -843,7 +791,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr
[
5
]
=
vmaxq_s16
(
m_b5
,
new5
);
beta_ptr
[
6
]
=
vmaxq_s16
(
m_b6
,
new6
);
beta_ptr
[
7
]
=
vmaxq_s16
(
m_b7
,
new7
);
beta_max
=
vmaxq_s16
(
beta_ptr
[
0
],
beta_ptr
[
1
]);
beta_max
=
vmaxq_s16
(
beta_max
,
beta_ptr
[
2
]);
beta_max
=
vmaxq_s16
(
beta_max
,
beta_ptr
[
3
]);
...
...
@@ -851,7 +798,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_max
=
vmaxq_s16
(
beta_max
,
beta_ptr
[
5
]);
beta_max
=
vmaxq_s16
(
beta_max
,
beta_ptr
[
6
]);
beta_max
=
vmaxq_s16
(
beta_max
,
beta_ptr
[
7
]);
beta_ptr
[
0
]
=
vqsubq_s16
(
beta_ptr
[
0
],
beta_max
);
beta_ptr
[
1
]
=
vqsubq_s16
(
beta_ptr
[
1
],
beta_max
);
beta_ptr
[
2
]
=
vqsubq_s16
(
beta_ptr
[
2
],
beta_max
);
...
...
@@ -861,20 +807,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr
[
6
]
=
vqsubq_s16
(
beta_ptr
[
6
],
beta_max
);
beta_ptr
[
7
]
=
vqsubq_s16
(
beta_ptr
[
7
],
beta_max
);
#endif
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Loop index %d, mb
\n
"
,
k
);
fprintf
(
fdsse4
,
"beta init (after max)
\n
"
);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
print_shorts
(
"b0"
,(
int16_t
*
)
&
beta_ptr
[
0
]);
print_shorts
(
"b1"
,(
int16_t
*
)
&
beta_ptr
[
1
]);
print_shorts
(
"b2"
,(
int16_t
*
)
&
beta_ptr
[
2
]);
print_shorts
(
"b3"
,(
int16_t
*
)
&
beta_ptr
[
3
]);
print_shorts
(
"b4"
,(
int16_t
*
)
&
beta_ptr
[
4
]);
print_shorts
(
"b5"
,(
int16_t
*
)
&
beta_ptr
[
5
]);
print_shorts
(
"b6"
,(
int16_t
*
)
&
beta_ptr
[
6
]);
print_shorts
(
"b7"
,(
int16_t
*
)
&
beta_ptr
[
7
]);
#endif
}
if
(
rerun_flag
==
1
)
...
...
@@ -882,8 +826,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
}
}
void
compute_ext16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
llr_t
*
ext
,
llr_t
*
systematic
,
unsigned
short
frame_length
)
{
void
compute_ext16
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
llr_t
*
ext
,
llr_t
*
systematic
,
unsigned
short
frame_length
)
{
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
alpha128
=
(
__m128i
*
)
alpha
;
__m128i
*
beta128
=
(
__m128i
*
)
beta
;
...
...
@@ -903,28 +846,21 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
int16x8_t
m10_1
,
m10_2
,
m10_3
,
m10_4
;
int16x8_t
m11_1
,
m11_2
,
m11_3
,
m11_4
;
#endif
int
k
;
//
// LLR computation, 8 consequtive bits per loop
//
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"compute_ext (sse_16bit), %p, %p, %p, %p, %p, %p ,framelength %d
\n
"
,
alpha
,
beta
,
m_11
,
m_10
,
ext
,
systematic
,
frame_length
);
#endif
alpha_ptr
=
alpha128
;
beta_ptr
=
&
beta128
[
8
];
for
(
k
=
0
;
k
<
(
frame_length
>>
3
);
k
++
)
{
#if defined(__x86_64__) || defined(__i386__)
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
3
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
3
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
3
];
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
3
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
3
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
3
];
/*
fprintf(fdsse4,"EXT %03d\n",k);
print_shorts("a0:",&alpha_ptr[0]);
...
...
@@ -944,7 +880,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
print_shorts("b6:",&beta_ptr[6]);
print_shorts("b7:",&beta_ptr[7]);
*/
//#ifndef __AVX2__
#if 1
m00_4
=
_mm_adds_epi16
(
alpha_ptr
[
7
],
beta_ptr
[
3
]);
//ALPHA_BETA_4m00;
...
...
@@ -964,31 +899,23 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m10_1
=
_mm_adds_epi16
(
alpha_ptr
[
2
],
beta_ptr
[
1
]);
//ALPHA_BETA_1m10;
m01_1
=
_mm_adds_epi16
(
alpha_ptr
[
2
],
beta_ptr
[
5
]);
//ALPHA_BETA_1m01;
#else
m00_1
=
_mm_adds_epi16
(
alpha_ptr
[
0
],
beta_ptr
[
0
]);
//ALPHA_BETA_1m00;
m10_1
=
_mm_adds_epi16
(
alpha_ptr
[
2
],
beta_ptr
[
1
]);
//ALPHA_BETA_1m10;
m11_1
=
_mm_adds_epi16
(
alpha_ptr
[
0
],
beta_ptr
[
4
]);
//ALPHA_BETA_1m11;
m01_1
=
_mm_adds_epi16
(
alpha_ptr
[
2
],
beta_ptr
[
5
]);
//ALPHA_BETA_1m01;
m11_2
=
_mm_adds_epi16
(
alpha_ptr
[
1
],
beta_ptr
[
0
]);
//ALPHA_BETA_2m11;
m01_2
=
_mm_adds_epi16
(
alpha_ptr
[
3
],
beta_ptr
[
1
]);
//ALPHA_BETA_2m01;
m00_2
=
_mm_adds_epi16
(
alpha_ptr
[
1
],
beta_ptr
[
4
]);
//ALPHA_BETA_2m00;
m10_2
=
_mm_adds_epi16
(
alpha_ptr
[
3
],
beta_ptr
[
5
]);
//ALPHA_BETA_2m10;
m11_3
=
_mm_adds_epi16
(
alpha_ptr
[
6
],
beta_ptr
[
3
]);
//ALPHA_BETA_3m11;
m01_3
=
_mm_adds_epi16
(
alpha_ptr
[
4
],
beta_ptr
[
2
]);
//ALPHA_BETA_3m01;
m00_3
=
_mm_adds_epi16
(
alpha_ptr
[
6
],
beta_ptr
[
7
]);
//ALPHA_BETA_3m00;
m10_3
=
_mm_adds_epi16
(
alpha_ptr
[
4
],
beta_ptr
[
6
]);
//ALPHA_BETA_3m10;
m00_4
=
_mm_adds_epi16
(
alpha_ptr
[
7
],
beta_ptr
[
3
]);
//ALPHA_BETA_4m00;
m10_4
=
_mm_adds_epi16
(
alpha_ptr
[
5
],
beta_ptr
[
2
]);
//ALPHA_BETA_4m10;
m11_4
=
_mm_adds_epi16
(
alpha_ptr
[
7
],
beta_ptr
[
7
]);
//ALPHA_BETA_4m11;
m01_4
=
_mm_adds_epi16
(
alpha_ptr
[
5
],
beta_ptr
[
6
]);
//ALPHA_BETA_4m01;
#endif
/*
print_shorts("m11_1:",&m11_1);
print_shorts("m11_2:",&m11_2);
...
...
@@ -1019,36 +946,30 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m11_1
=
_mm_max_epi16
(
m11_1
,
m11_2
);
m11_1
=
_mm_max_epi16
(
m11_1
,
m11_3
);
m11_1
=
_mm_max_epi16
(
m11_1
,
m11_4
);
// print_shorts("m11_1:",&m11_1);
m01_1
=
_mm_subs_epi16
(
m01_1
,
*
m10_128
);
m00_1
=
_mm_subs_epi16
(
m00_1
,
*
m11_128
);
m10_1
=
_mm_adds_epi16
(
m10_1
,
*
m10_128
);
m11_1
=
_mm_adds_epi16
(
m11_1
,
*
m11_128
);
// print_shorts("m10_1:",&m10_1);
// print_shorts("m11_1:",&m11_1);
m01_1
=
_mm_max_epi16
(
m01_1
,
m00_1
);
m10_1
=
_mm_max_epi16
(
m10_1
,
m11_1
);
// print_shorts("m01_1:",&m01_1);
// print_shorts("m10_1:",&m10_1);
*
ext_128
=
_mm_subs_epi16
(
m10_1
,
m01_1
);
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"ext %p
\n
"
,
ext_128
);
print_shorts
(
"ext:"
,(
int16_t
*
)
ext_128
);
print_shorts
(
"m11:"
,(
int16_t
*
)
m11_128
);
print_shorts
(
"m10:"
,(
int16_t
*
)
m10_128
);
print_shorts
(
"m10_1:"
,(
int16_t
*
)
&
m10_1
);
print_shorts
(
"m01_1:"
,(
int16_t
*
)
&
m01_1
);
print_shorts
(
"ext:"
,(
int16_t
*
)
ext_128
);
print_shorts
(
"m11:"
,(
int16_t
*
)
m11_128
);
print_shorts
(
"m10:"
,(
int16_t
*
)
m10_128
);
print_shorts
(
"m10_1:"
,(
int16_t
*
)
&
m10_1
);
print_shorts
(
"m01_1:"
,(
int16_t
*
)
&
m01_1
);
#endif
#elif defined(__arm__)
m11_128
=
(
int16x8_t
*
)
&
m_11
[
k
<<
3
];
m10_128
=
(
int16x8_t
*
)
&
m_10
[
k
<<
3
];
ext_128
=
(
int16x8_t
*
)
&
ext
[
k
<<
3
];
m11_128
=
(
int16x8_t
*
)
&
m_11
[
k
<<
3
];
m10_128
=
(
int16x8_t
*
)
&
m_10
[
k
<<
3
];
ext_128
=
(
int16x8_t
*
)
&
ext
[
k
<<
3
];
m00_4
=
vqaddq_s16
(
alpha_ptr
[
7
],
beta_ptr
[
3
]);
//ALPHA_BETA_4m00;
m11_4
=
vqaddq_s16
(
alpha_ptr
[
7
],
beta_ptr
[
7
]);
//ALPHA_BETA_4m11;
m00_3
=
vqaddq_s16
(
alpha_ptr
[
6
],
beta_ptr
[
7
]);
//ALPHA_BETA_3m00;
...
...
@@ -1065,7 +986,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m10_2
=
vqaddq_s16
(
alpha_ptr
[
3
],
beta_ptr
[
5
]);
//ALPHA_BETA_2m10;
m10_1
=
vqaddq_s16
(
alpha_ptr
[
2
],
beta_ptr
[
1
]);
//ALPHA_BETA_1m10;
m01_1
=
vqaddq_s16
(
alpha_ptr
[
2
],
beta_ptr
[
5
]);
//ALPHA_BETA_1m01;
m01_1
=
vmaxq_s16
(
m01_1
,
m01_2
);
m01_1
=
vmaxq_s16
(
m01_1
,
m01_3
);
m01_1
=
vmaxq_s16
(
m01_1
,
m01_4
);
...
...
@@ -1078,18 +998,12 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m11_1
=
vmaxq_s16
(
m11_1
,
m11_2
);
m11_1
=
vmaxq_s16
(
m11_1
,
m11_3
);
m11_1
=
vmaxq_s16
(
m11_1
,
m11_4
);
m01_1
=
vqsubq_s16
(
m01_1
,
*
m10_128
);
m00_1
=
vqsubq_s16
(
m00_1
,
*
m11_128
);
m10_1
=
vqaddq_s16
(
m10_1
,
*
m10_128
);
m11_1
=
vqaddq_s16
(
m11_1
,
*
m11_128
);
m01_1
=
vmaxq_s16
(
m01_1
,
m00_1
);
m10_1
=
vmaxq_s16
(
m10_1
,
m11_1
);
*
ext_128
=
vqsubq_s16
(
m10_1
,
m01_1
);
#endif
alpha_ptr
+=
8
;
...
...
@@ -1102,8 +1016,7 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
//int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8],
int
*
pi2tab16
[
188
],
*
pi5tab16
[
188
],
*
pi4tab16
[
188
],
*
pi6tab16
[
188
];
void
free_td16
(
void
)
{
void
free_td16
(
void
)
{
int
ind
;
for
(
ind
=
0
;
ind
<
188
;
ind
++
)
{
...
...
@@ -1114,14 +1027,11 @@ void free_td16(void)
}
}
void
init_td16
(
void
)
{
void
init_td16
(
void
)
{
int
ind
,
i
,
i2
,
i3
,
j
,
n
,
pi
,
pi3
;
short
*
base_interleaver
;
short
*
base_interleaver
;
for
(
ind
=
0
;
ind
<
188
;
ind
++
)
{
n
=
f1f2mat
[
ind
].
nb_bits
;
base_interleaver
=
il_tb
+
f1f2mat
[
ind
].
beg_index
;
#ifdef MEX
...
...
@@ -1141,10 +1051,8 @@ void init_td16(void)
j
=
i2
;
for
(
i3
=
0
;
i3
<
(
n
>>
3
);
i3
++
,
i
++
,
j
+=
8
)
{
// if (j>=n)
// j-=(n-1);
pi2tab16
[
ind
][
i
]
=
j
;
// fprintf(fdsse4,"pi2[%d] = %d\n",i,j);
}
...
...
@@ -1157,7 +1065,6 @@ void init_td16(void)
pi5tab16
[
ind
][
pi3
]
=
pi2tab16
[
ind
][
i
];
pi6tab16
[
ind
][
pi
]
=
pi2tab16
[
ind
][
i
];
}
}
}
...
...
@@ -1176,52 +1083,41 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
time_stats_t
*
ext_stats
,
time_stats_t
*
intl1_stats
,
time_stats_t
*
intl2_stats
)
{
/* y is a pointer to the input
decoded_bytes is a pointer to the decoded output
n is the size in bits of the coded block, with the tail */
llr_t
systematic0
[
n
+
16
]
__attribute__
((
aligned
(
32
)));
llr_t
systematic1
[
n
+
16
]
__attribute__
((
aligned
(
32
)));
llr_t
systematic2
[
n
+
16
]
__attribute__
((
aligned
(
32
)));
llr_t
yparity1
[
n
+
16
]
__attribute__
((
aligned
(
32
)));
llr_t
yparity2
[
n
+
16
]
__attribute__
((
aligned
(
32
)));
llr_t
ext
[
n
+
128
]
__attribute__
((
aligned
(
32
)));
llr_t
ext2
[
n
+
128
]
__attribute__
((
aligned
(
32
)));
llr_t
alpha
[(
n
+
16
)
*
8
]
__attribute__
((
aligned
(
32
)));
llr_t
beta
[(
n
+
16
)
*
8
]
__attribute__
((
aligned
(
32
)));
llr_t
m11
[
n
+
32
]
__attribute__
((
aligned
(
32
)));
llr_t
m10
[
n
+
32
]
__attribute__
((
aligned
(
32
)));
int
*
pi2_p
,
*
pi4_p
,
*
pi5_p
,
*
pi6_p
;
llr_t
*
s
,
*
s1
,
*
s2
,
*
yp1
,
*
yp2
,
*
yp
;
unsigned
int
i
,
j
,
iind
;
//,pi;
unsigned
char
iteration_cnt
=
0
;
unsigned
int
crc
,
oldcrc
,
crc_len
;
uint8_t
temp
;
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
yp128
;
__m128i
tmp
,
zeros
=
_mm_setzero_si128
();
__m128i
tmpe
;
#elif defined(__arm__)
int16x8_t
*
yp128
;
// int16x8_t tmp128[(n+8)>>3];
// int16x8_t tmp128[(n+8)>>3];
int16x8_t
tmp
,
zeros
=
vdupq_n_s16
(
0
);
const
uint16_t
__attribute__
((
aligned
(
16
)))
_Powers
[
8
]
=
{
1
,
2
,
4
,
8
,
16
,
32
,
64
,
128
};
uint16x8_t
Powers
=
vld1q_u16
(
_Powers
);
#endif
int
offset8_flag
=
0
;
#ifdef DEBUG_LOGMAP
fdsse4
=
fopen
(
"dump_sse4.txt"
,
"w"
);
printf
(
"tc sse4_16 (y) %p
\n
"
,
y
);
#endif
...
...
@@ -1230,12 +1126,8 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
return
255
;
}
start_meas
(
init_stats
);
for
(
iind
=
0
;
iind
<
188
&&
f1f2mat
[
iind
].
nb_bits
!=
n
;
iind
++
);
if
(
iind
==
188
)
{
...
...
@@ -1262,32 +1154,23 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
}
#if defined(__x86_64__) || defined(__i386__)
yp128
=
(
__m128i
*
)
y
;
yp128
=
(
__m128i
*
)
y
;
#elif defined(__arm__)
yp128
=
(
int16x8_t
*
)
y
;
yp128
=
(
int16x8_t
*
)
y
;
#endif
s
=
systematic0
;
s1
=
systematic1
;
s2
=
systematic2
;
yp1
=
yparity1
;
yp2
=
yparity2
;
for
(
i
=
0
;
i
<
n
;
i
+=
8
)
{
pi2_p
=
&
pi2tab16
[
iind
][
i
];
j
=
pi2_p
[
0
];
#if defined(__x86_64__) || defined(__i386__)
tmpe
=
_mm_load_si128
(
yp128
);
// fprintf(fdsse4,"yp128 %p\n",yp128);
// print_shorts("tmpe",(int16_t *)&tmpe);
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
0
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
1
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
2
);
...
...
@@ -1295,7 +1178,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf
(
fdsse4
,
"init0: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
j
=
pi2_p
[
1
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
3
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
4
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
5
);
...
...
@@ -1303,7 +1185,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf
(
fdsse4
,
"init1: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
j
=
pi2_p
[
2
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
6
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
7
);
tmpe
=
_mm_load_si128
(
&
yp128
[
1
]);
...
...
@@ -1312,7 +1193,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf
(
fdsse4
,
"init2: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
j
=
pi2_p
[
3
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
1
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
2
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
3
);
...
...
@@ -1320,7 +1200,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf
(
fdsse4
,
"init3: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
j
=
pi2_p
[
4
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
4
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
5
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
6
);
...
...
@@ -1328,7 +1207,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf
(
fdsse4
,
"init4: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
j
=
pi2_p
[
5
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
7
);
tmpe
=
_mm_load_si128
(
&
yp128
[
2
]);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
0
);
...
...
@@ -1336,9 +1214,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"init5: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
j
=
pi2_p
[
6
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
2
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
3
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
4
);
...
...
@@ -1346,60 +1222,49 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf
(
fdsse4
,
"init6: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
j
=
pi2_p
[
7
];
s
[
j
]
=
_mm_extract_epi16
(
tmpe
,
5
);
yp1
[
j
]
=
_mm_extract_epi16
(
tmpe
,
6
);
yp2
[
j
]
=
_mm_extract_epi16
(
tmpe
,
7
);
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"init7: j %d, s[j] %d yp1[j] %d yp2[j] %d
\n
"
,
j
,
s
[
j
],
yp1
[
j
],
yp2
[
j
]);
#endif
#elif defined(__arm__)
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
0
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
1
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
2
);
j
=
pi2_p
[
1
];
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
3
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
4
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
5
);
j
=
pi2_p
[
2
];
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
6
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
0
],
7
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
0
);
j
=
pi2_p
[
3
];
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
1
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
2
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
3
);
j
=
pi2_p
[
4
];
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
4
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
5
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
6
);
j
=
pi2_p
[
5
];
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
1
],
7
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
0
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
1
);
j
=
pi2_p
[
6
];
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
2
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
3
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
4
);
j
=
pi2_p
[
7
];
s
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
5
);
yp1
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
6
);
yp2
[
j
]
=
vgetq_lane_s16
(
yp128
[
2
],
7
);
#endif
yp128
+=
3
;
}
yp
=
(
llr_t
*
)
yp128
;
yp
=
(
llr_t
*
)
yp128
;
// Termination
for
(
i
=
n
;
i
<
n
+
3
;
i
++
)
{
...
...
@@ -1410,7 +1275,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
yp1
[
i
]
=
*
yp
;
yp
++
;
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Term 1 (%
d
): %d %d
\n
"
,
i
,
s
[
i
],
yp1
[
i
]);
fprintf
(
fdsse4
,
"Term 1 (%
u
): %d %d
\n
"
,
i
,
s
[
i
],
yp1
[
i
]);
#endif //DEBUG_LOGMAP
}
...
...
@@ -1422,32 +1287,25 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
yp2
[
i
-
8
]
=
*
yp
;
yp
++
;
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"Term 2 (%
d
): %d %d
\n
"
,
i
-
3
,
s
[
i
],
yp2
[
i
-
8
]);
fprintf
(
fdsse4
,
"Term 2 (%
u
): %d %d
\n
"
,
i
-
3
,
s
[
i
],
yp2
[
i
-
8
]);
#endif //DEBUG_LOGMAP
}
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"
\n
"
);
#endif //DEBUG_LOGMAP
stop_meas
(
init_stats
);
// do log_map from first parity bit
log_map16
(
systematic0
,
yparity1
,
m11
,
m10
,
alpha
,
beta
,
ext
,
n
,
0
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
while
(
iteration_cnt
++
<
max_iterations
)
{
#ifdef DEBUG_LOGMAP
fprintf
(
fdsse4
,
"
\n
*******************ITERATION %d (n %d), ext %p
\n\n
"
,
iteration_cnt
,
n
,
ext
);
#endif //DEBUG_LOGMAP
start_meas
(
intl1_stats
);
pi4_p
=
pi4tab16
[
iind
];
for
(
i
=
0
;
i
<
(
n
>>
3
);
i
++
)
{
// steady-state portion
#if defined(__x86_64__) || defined(__i386__)
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],
ext
[
*
pi4_p
++
],
0
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],
ext
[
*
pi4_p
++
],
1
);
...
...
@@ -1457,30 +1315,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],
ext
[
*
pi4_p
++
],
5
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],
ext
[
*
pi4_p
++
],
6
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi16
(((
__m128i
*
)
systematic2
)[
i
],
ext
[
*
pi4_p
++
],
7
);
#elif defined(__arm__)
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
0
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
1
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
2
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
3
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
4
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
5
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
6
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
7
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
0
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
1
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
2
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
3
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
4
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
5
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
6
);
((
int16x8_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s16
(
ext
[
*
pi4_p
++
],((
int16x8_t
*
)
systematic2
)[
i
],
7
);
#endif
#ifdef DEBUG_LOGMAP
print_shorts
(
"syst2"
,(
int16_t
*
)
&
((
__m128i
*
)
systematic2
)[
i
]);
print_shorts
(
"syst2"
,(
int16_t
*
)
&
((
__m128i
*
)
systematic2
)[
i
]);
#endif
}
stop_meas
(
intl1_stats
);
// do log_map from second parity bit
log_map16
(
systematic2
,
yparity2
,
m11
,
m10
,
alpha
,
beta
,
ext2
,
n
,
1
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
pi5_p
=
pi5tab16
[
iind
];
for
(
i
=
0
;
i
<
(
n
>>
3
);
i
++
)
{
...
...
@@ -1493,7 +1345,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
tmp
=
_mm_insert_epi16
(
tmp
,
ext2
[
*
pi5_p
++
],
5
);
tmp
=
_mm_insert_epi16
(
tmp
,
ext2
[
*
pi5_p
++
],
6
);
tmp
=
_mm_insert_epi16
(
tmp
,
ext2
[
*
pi5_p
++
],
7
);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi16
(
_mm_subs_epi16
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi16
(
_mm_subs_epi16
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
#elif defined(__arm__)
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi5_p
++
],
tmp
,
0
);
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi5_p
++
],
tmp
,
1
);
...
...
@@ -1503,10 +1355,10 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi5_p
++
],
tmp
,
5
);
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi5_p
++
],
tmp
,
6
);
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi5_p
++
],
tmp
,
7
);
((
int16x8_t
*
)
systematic1
)[
i
]
=
vqaddq_s16
(
vqsubq_s16
(
tmp
,((
int16x8_t
*
)
ext
)[
i
]),((
int16x8_t
*
)
systematic0
)[
i
]);
((
int16x8_t
*
)
systematic1
)[
i
]
=
vqaddq_s16
(
vqsubq_s16
(
tmp
,((
int16x8_t
*
)
ext
)[
i
]),((
int16x8_t
*
)
systematic0
)[
i
]);
#endif
#ifdef DEBUG_LOGMAP
print_shorts
(
"syst1"
,(
int16_t
*
)
&
((
__m128i
*
)
systematic1
)[
i
]);
print_shorts
(
"syst1"
,(
int16_t
*
)
&
((
__m128i
*
)
systematic1
)[
i
]);
#endif
}
...
...
@@ -1516,16 +1368,16 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
for
(
i
=
0
;
i
<
(
n
>>
3
);
i
++
)
{
#if defined(__x86_64__) || defined(__i386__)
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
7
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
6
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
5
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
4
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
3
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
2
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
1
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
0
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
7
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
6
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
5
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
4
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
3
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
2
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
1
);
tmp
=
_mm_insert_epi16
(
tmp
,
((
llr_t
*
)
ext2
)[
*
pi6_p
++
],
0
);
#ifdef DEBUG_LOGMAP
print_shorts
(
"tmp"
,(
int16_t
*
)
&
tmp
);
print_shorts
(
"tmp"
,(
int16_t
*
)
&
tmp
);
#endif
tmp
=
_mm_cmpgt_epi8
(
_mm_packs_epi16
(
tmp
,
zeros
),
zeros
);
decoded_bytes
[
i
]
=
(
unsigned
char
)
_mm_movemask_epi8
(
tmp
);
...
...
@@ -1538,18 +1390,18 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi6_p
++
],
tmp
,
2
);
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi6_p
++
],
tmp
,
1
);
tmp
=
vsetq_lane_s16
(
ext2
[
*
pi6_p
++
],
tmp
,
0
);
// This does:
// [1 2 4 8 16 32 64 128] .* I(ext_i > 0) = 2.^[b0 b1 b2 b3 b4 b5 b6 b7], where bi =I(ext_i > 0)
// [2^b0 + 2^b1 2^b2 + 2^b3 2^b4 + 2^b5 2^b6 + 2^b7]
// [2^b0 + 2^b1 + 2^b2 + 2^b3 2^b4 + 2^b5 + 2^b6 + 2^b7]
// Mask64 = 2^b0 + 2^b1 + 2^b2 + 2^b3 + 2^b4 + 2^b5 + 2^b6 + 2^b7
// This does:
// [1 2 4 8 16 32 64 128] .* I(ext_i > 0) = 2.^[b0 b1 b2 b3 b4 b5 b6 b7], where bi =I(ext_i > 0)
// [2^b0 + 2^b1 2^b2 + 2^b3 2^b4 + 2^b5 2^b6 + 2^b7]
// [2^b0 + 2^b1 + 2^b2 + 2^b3 2^b4 + 2^b5 + 2^b6 + 2^b7]
// Mask64 = 2^b0 + 2^b1 + 2^b2 + 2^b3 + 2^b4 + 2^b5 + 2^b6 + 2^b7
uint64x2_t
Mask
=
vpaddlq_u32
(
vpaddlq_u16
(
vandq_u16
(
vcgtq_s16
(
tmp
,
zeros
),
Powers
)));
uint64x1_t
Mask64
=
vget_high_u64
(
Mask
)
+
vget_low_u64
(
Mask
);
decoded_bytes
[
i
]
=
(
uint8_t
)
Mask64
;
#endif
#ifdef DEBUG_LOGMAP
print_shorts
(
"tmp"
,(
int16_t
*
)
&
tmp
);
fprintf
(
fdsse4
,
"decoded_bytes[%d
] %x
\n
"
,
i
,
decoded_bytes
[
i
]);
print_shorts
(
"tmp"
,(
int16_t
*
)
&
tmp
);
fprintf
(
fdsse4
,
"decoded_bytes[%u
] %x
\n
"
,
i
,
decoded_bytes
[
i
]);
#endif
}
}
...
...
@@ -1559,7 +1411,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
oldcrc
=
*
((
unsigned
int
*
)(
&
decoded_bytes
[(
n
>>
3
)
-
crc_len
]));
switch
(
crc_type
)
{
case
CRC24_A
:
oldcrc
&=
0x00ffffff
;
crc
=
crc24a
(
&
decoded_bytes
[
F
>>
3
],
...
...
@@ -1610,13 +1461,13 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
if
(
iteration_cnt
<
max_iterations
)
{
log_map16
(
systematic1
,
yparity1
,
m11
,
m10
,
alpha
,
beta
,
ext
,
n
,
0
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
ext_128
=
(
__m128i
*
)
ext
;
__m128i
*
s1_128
=
(
__m128i
*
)
systematic1
;
__m128i
*
s0_128
=
(
__m128i
*
)
systematic0
;
__m128i
*
ext_128
=
(
__m128i
*
)
ext
;
__m128i
*
s1_128
=
(
__m128i
*
)
systematic1
;
__m128i
*
s0_128
=
(
__m128i
*
)
systematic0
;
#elif defined(__arm__)
int16x8_t
*
ext_128
=
(
int16x8_t
*
)
ext
;
int16x8_t
*
s1_128
=
(
int16x8_t
*
)
systematic1
;
int16x8_t
*
s0_128
=
(
int16x8_t
*
)
systematic0
;
int16x8_t
*
ext_128
=
(
int16x8_t
*
)
ext
;
int16x8_t
*
s1_128
=
(
int16x8_t
*
)
systematic1
;
int16x8_t
*
s0_128
=
(
int16x8_t
*
)
systematic0
;
#endif
int
myloop
=
n
>>
3
;
...
...
@@ -1632,11 +1483,9 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
}
// fprintf(fdsse4,"crc %x, oldcrc %x\n",crc,oldcrc);
#ifdef DEBUG_LOGMAP
fclose
(
fdsse4
);
#endif
#if defined(__x86_64__) || defined(__i386__)
_mm_empty
();
_m_empty
();
...
...
openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
View file @
1cb484f1
...
...
@@ -39,19 +39,19 @@
#include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG
#include "PHY/defs_common.h"
#include "PHY/CODING/coding_defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#include "PHY/defs_common.h"
#include "PHY/CODING/coding_defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#else
#include "defs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "defs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#ifdef MEX
#include "mex.h"
#include "mex.h"
#endif
#include "common/ran_context.h"
...
...
@@ -86,32 +86,28 @@ typedef int8_t channel_t;
#define MAX8 127
void
log_map8
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
int
offset8_flag
,
void
log_map8
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
int
offset8_flag
,
time_stats_t
*
alpha_stats
,
time_stats_t
*
beta_stats
,
time_stats_t
*
gamma_stats
,
time_stats_t
*
ext_stats
);
void
compute_gamma8
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
);
void
compute_alpha8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
);
void
compute_beta8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
);
void
compute_ext8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
extrinsic
,
llr_t
*
ap
,
unsigned
short
frame_length
);
void
print_bytes
(
char
*
s
,
int8_t
*
x
)
{
void
compute_gamma8
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
);
void
compute_alpha8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
);
void
compute_beta8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
);
void
compute_ext8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
extrinsic
,
llr_t
*
ap
,
unsigned
short
frame_length
);
void
print_bytes
(
char
*
s
,
int8_t
*
x
)
{
printf
(
"%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d
\n
"
,
s
,
x
[
0
],
x
[
1
],
x
[
2
],
x
[
3
],
x
[
4
],
x
[
5
],
x
[
6
],
x
[
7
],
x
[
8
],
x
[
9
],
x
[
10
],
x
[
11
],
x
[
12
],
x
[
13
],
x
[
14
],
x
[
15
]);
}
void
log_map8
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
void
log_map8
(
llr_t
*
systematic
,
channel_t
*
y_parity
,
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
ext
,
llr_t
*
ext
,
unsigned
short
frame_length
,
unsigned
char
term_flag
,
unsigned
char
F
,
...
...
@@ -119,32 +115,38 @@ void log_map8(llr_t* systematic,
time_stats_t
*
alpha_stats
,
time_stats_t
*
beta_stats
,
time_stats_t
*
gamma_stats
,
time_stats_t
*
ext_stats
)
{
time_stats_t
*
ext_stats
)
{
#ifdef DEBUG_LOGMAP
printf
(
"log_map, frame_length %d
\n
"
,
frame_length
);
#endif
if
(
gamma_stats
)
start_meas
(
gamma_stats
)
;
compute_gamma8
(
m11
,
m10
,
systematic
,
y_parity
,
frame_length
,
term_flag
)
;
if
(
gamma_stats
)
stop_meas
(
gamma_stats
);
if
(
alpha_stats
)
start_meas
(
alpha_stats
)
;
compute_alpha8
(
alpha
,
beta
,
m11
,
m10
,
frame_length
,
F
)
;
if
(
alpha_stats
)
stop_meas
(
alpha_stats
);
if
(
beta_stats
)
start_meas
(
beta_stats
)
;
compute_beta8
(
alpha
,
beta
,
m11
,
m10
,
frame_length
,
F
,
offset8_flag
)
;
if
(
beta_stats
)
stop_meas
(
beta_stats
);
if
(
ext_stats
)
start_meas
(
ext_stats
)
;
compute_ext8
(
alpha
,
beta
,
m11
,
m10
,
ext
,
systematic
,
frame_length
)
;
if
(
ext_stats
)
stop_meas
(
ext_stats
);
compute_ext8
(
alpha
,
beta
,
m11
,
m10
,
ext
,
systematic
,
frame_length
)
;
if
(
ext_stats
)
stop_meas
(
ext_stats
);
}
void
compute_gamma8
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
)
{
void
compute_gamma8
(
llr_t
*
m11
,
llr_t
*
m10
,
llr_t
*
systematic
,
channel_t
*
y_parity
,
unsigned
short
frame_length
,
unsigned
char
term_flag
)
{
int
k
,
K1
;
#if defined(__x86_64__)||defined(__i386__)
__m128i
*
systematic128
=
(
__m128i
*
)
systematic
;
...
...
@@ -157,11 +159,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
int8x16_t
*
m10_128
=
(
int8x16_t
*
)
m10
;
int8x16_t
*
m11_128
=
(
int8x16_t
*
)
m11
;
#endif
#ifdef DEBUG_LOGMAP
printf
(
"compute_gamma, %p,%p,%p,%p,framelength %d
\n
"
,
m11
,
m10
,
systematic
,
y_parity
,
frame_length
);
#endif
#if defined(__x86_64__) || defined(__i386__)
register
__m128i
sl
,
sh
,
ypl
,
yph
;
//K128=_mm_set1_epi8(-128);
#endif
...
...
@@ -181,11 +181,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
m11_128
[
k
]
=
vhaddq_s8
(
systematic128
[
k
],
y_parity128
[
k
]);
m10_128
[
k
]
=
vhsubq_s8
(
systematic128
[
k
],
y_parity128
[
k
]);
#endif
}
// Termination
#if defined(__x86_64__) || defined(__i386__)
sl
=
_mm_cvtepi8_epi16
(
systematic128
[
k
+
term_flag
]);
sh
=
_mm_cvtepi8_epi16
(
_mm_srli_si128
(
systematic128
[
k
],
8
));
...
...
@@ -199,15 +197,12 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
m11_128
[
k
]
=
vhaddq_s8
(
systematic128
[
k
+
term_flag
],
y_parity128
[
k
]);
m10_128
[
k
]
=
vhsubq_s8
(
systematic128
[
k
+
term_flag
],
y_parity128
[
k
]);
#endif
}
#define L 16
void
compute_alpha8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
)
{
void
compute_alpha8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
)
{
int
k
,
loopval
,
rerun_flag
;
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
alpha128
=
(
__m128i
*
)
alpha
,
*
alpha_ptr
;
__m128i
*
m11p
,
*
m10p
;
...
...
@@ -223,7 +218,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
#endif
// Set initial state: first colum is known
// the other columns are unknown, so all states are set to same value
#if defined(__x86_64__) || defined(__i386__)
alpha128
[
0
]
=
_mm_set_epi8
(
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
0
);
alpha128
[
1
]
=
_mm_set_epi8
(
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
);
...
...
@@ -233,12 +227,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha128
[
5
]
=
_mm_set_epi8
(
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
);
alpha128
[
6
]
=
_mm_set_epi8
(
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
);
alpha128
[
7
]
=
_mm_set_epi8
(
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
,
-
MAX8
/
2
);
for
(
loopval
=
frame_length
>>
4
,
rerun_flag
=
0
;
rerun_flag
<
2
;
loopval
=
L
,
rerun_flag
++
)
{
for
(
loopval
=
frame_length
>>
4
,
rerun_flag
=
0
;
rerun_flag
<
2
;
loopval
=
L
,
rerun_flag
++
)
{
alpha_ptr
=
&
alpha128
[
0
];
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
m11p
=
(
__m128i
*
)
m_11
;
m10p
=
(
__m128i
*
)
m_10
;
for
(
k
=
0
;
k
<
loopval
;
k
++
)
{
m_b0
=
_mm_adds_epi8
(
alpha_ptr
[
1
],
*
m11p
);
// m11
...
...
@@ -249,7 +242,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
m_b6
=
_mm_subs_epi8
(
alpha_ptr
[
5
],
*
m10p
);
// m01=-m10
m_b3
=
_mm_subs_epi8
(
alpha_ptr
[
7
],
*
m11p
);
// m00=-m11
m_b7
=
_mm_adds_epi8
(
alpha_ptr
[
7
],
*
m11p
);
// m11
new0
=
_mm_subs_epi8
(
alpha_ptr
[
0
],
*
m11p
);
// m00=-m11
new4
=
_mm_adds_epi8
(
alpha_ptr
[
0
],
*
m11p
);
// m11
new1
=
_mm_adds_epi8
(
alpha_ptr
[
2
],
*
m10p
);
// m10
...
...
@@ -258,7 +250,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
new6
=
_mm_adds_epi8
(
alpha_ptr
[
4
],
*
m10p
);
// m10
new3
=
_mm_adds_epi8
(
alpha_ptr
[
6
],
*
m11p
);
// m11
new7
=
_mm_subs_epi8
(
alpha_ptr
[
6
],
*
m11p
);
// m00=-m11
alpha_ptr
+=
8
;
m11p
++
;
m10p
++
;
...
...
@@ -270,7 +261,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_ptr
[
5
]
=
_mm_max_epi8
(
m_b5
,
new5
);
alpha_ptr
[
6
]
=
_mm_max_epi8
(
m_b6
,
new6
);
alpha_ptr
[
7
]
=
_mm_max_epi8
(
m_b7
,
new7
);
// compute and subtract maxima
alpha_max
=
_mm_max_epi8
(
alpha_ptr
[
0
],
alpha_ptr
[
1
]);
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
2
]);
...
...
@@ -279,7 +269,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
5
]);
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
6
]);
alpha_max
=
_mm_max_epi8
(
alpha_max
,
alpha_ptr
[
7
]);
alpha_ptr
[
0
]
=
_mm_subs_epi8
(
alpha_ptr
[
0
],
alpha_max
);
alpha_ptr
[
1
]
=
_mm_subs_epi8
(
alpha_ptr
[
1
],
alpha_max
);
alpha_ptr
[
2
]
=
_mm_subs_epi8
(
alpha_ptr
[
2
],
alpha_max
);
...
...
@@ -308,8 +297,8 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha
[
80
]
=
-
MAX8
/
2
;
alpha
[
96
]
=
-
MAX8
/
2
;
alpha
[
112
]
=
-
MAX8
/
2
;
}
#elif defined(__arm__)
alpha128
[
0
]
=
vdupq_n_s8
(
-
MAX8
/
2
);
alpha128
[
0
]
=
vsetq_lane_s8
(
0
,
alpha128
[
0
],
0
);
...
...
@@ -320,12 +309,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha128
[
5
]
=
vdupq_n_s8
(
-
MAX8
/
2
);
alpha128
[
6
]
=
vdupq_n_s8
(
-
MAX8
/
2
);
alpha128
[
7
]
=
vdupq_n_s8
(
-
MAX8
/
2
);
for
(
loopval
=
frame_length
>>
4
,
rerun_flag
=
0
;
rerun_flag
<
2
;
loopval
=
L
,
rerun_flag
++
)
{
for
(
loopval
=
frame_length
>>
4
,
rerun_flag
=
0
;
rerun_flag
<
2
;
loopval
=
L
,
rerun_flag
++
)
{
alpha_ptr
=
&
alpha128
[
0
];
m11p
=
(
int8x16_t
*
)
m_11
;
m10p
=
(
int8x16_t
*
)
m_10
;
m11p
=
(
int8x16_t
*
)
m_11
;
m10p
=
(
int8x16_t
*
)
m_10
;
for
(
k
=
0
;
k
<
loopval
;
k
++
)
{
m_b0
=
vqaddq_s8
(
alpha_ptr
[
1
],
*
m11p
);
// m11
...
...
@@ -336,7 +324,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
m_b6
=
vqsubq_s8
(
alpha_ptr
[
5
],
*
m10p
);
// m01=-m10
m_b3
=
vqsubq_s8
(
alpha_ptr
[
7
],
*
m11p
);
// m00=-m11
m_b7
=
vqaddq_s8
(
alpha_ptr
[
7
],
*
m11p
);
// m11
new0
=
vqsubq_s8
(
alpha_ptr
[
0
],
*
m11p
);
// m00=-m11
new4
=
vqaddq_s8
(
alpha_ptr
[
0
],
*
m11p
);
// m11
new1
=
vqaddq_s8
(
alpha_ptr
[
2
],
*
m10p
);
// m10
...
...
@@ -345,7 +332,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
new6
=
vqaddq_s8
(
alpha_ptr
[
4
],
*
m10p
);
// m10
new3
=
vqaddq_s8
(
alpha_ptr
[
6
],
*
m11p
);
// m11
new7
=
vqsubq_s8
(
alpha_ptr
[
6
],
*
m11p
);
// m00=-m11
alpha_ptr
+=
8
;
m11p
++
;
m10p
++
;
...
...
@@ -357,7 +343,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_ptr
[
5
]
=
vmaxq_s8
(
m_b5
,
new5
);
alpha_ptr
[
6
]
=
vmaxq_s8
(
m_b6
,
new6
);
alpha_ptr
[
7
]
=
vmaxq_s8
(
m_b7
,
new7
);
// compute and subtract maxima
alpha_max
=
vmaxq_s8
(
alpha_ptr
[
0
],
alpha_ptr
[
1
]);
alpha_max
=
vmaxq_s8
(
alpha_max
,
alpha_ptr
[
2
]);
...
...
@@ -366,7 +351,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_max
=
vmaxq_s8
(
alpha_max
,
alpha_ptr
[
5
]);
alpha_max
=
vmaxq_s8
(
alpha_max
,
alpha_ptr
[
6
]);
alpha_max
=
vmaxq_s8
(
alpha_max
,
alpha_ptr
[
7
]);
alpha_ptr
[
0
]
=
vqsubq_s8
(
alpha_ptr
[
0
],
alpha_max
);
alpha_ptr
[
1
]
=
vqsubq_s8
(
alpha_ptr
[
1
],
alpha_max
);
alpha_ptr
[
2
]
=
vqsubq_s8
(
alpha_ptr
[
2
],
alpha_max
);
...
...
@@ -380,14 +364,22 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
// Set intial state for next iteration from the last state
// as a column end states are the first states of the next column
int
K1
=
frame_length
>>
1
;
alpha128
[
0
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
K1
],
8
);
alpha128
[
0
]
=
vsetq_lane_s8
(
alpha
[
8
],
alpha128
[
0
],
7
);
alpha128
[
1
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
1
+
K1
],
8
);
alpha128
[
1
]
=
vsetq_lane_s8
(
alpha
[
24
],
alpha128
[
0
],
7
);
alpha128
[
2
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
2
+
K1
],
8
);
alpha128
[
2
]
=
vsetq_lane_s8
(
alpha
[
40
],
alpha128
[
0
],
7
);
alpha128
[
3
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
3
+
K1
],
8
);
alpha128
[
3
]
=
vsetq_lane_s8
(
alpha
[
56
],
alpha128
[
0
],
7
);
alpha128
[
4
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
4
+
K1
],
8
);
alpha128
[
4
]
=
vsetq_lane_s8
(
alpha
[
72
],
alpha128
[
0
],
7
);
alpha128
[
5
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
5
+
K1
],
8
);
alpha128
[
5
]
=
vsetq_lane_s8
(
alpha
[
88
],
alpha128
[
0
],
7
);
alpha128
[
6
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
6
+
K1
],
8
);
alpha128
[
6
]
=
vsetq_lane_s8
(
alpha
[
104
],
alpha128
[
0
],
7
);
alpha128
[
7
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
7
+
K1
],
8
);
alpha128
[
7
]
=
vsetq_lane_s8
(
alpha
[
120
],
alpha128
[
0
],
7
);
alpha128
[
0
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
K1
],
8
);
alpha128
[
0
]
=
vsetq_lane_s8
(
alpha
[
8
],
alpha128
[
0
],
7
);
alpha128
[
1
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
1
+
K1
],
8
);
alpha128
[
1
]
=
vsetq_lane_s8
(
alpha
[
24
],
alpha128
[
0
],
7
);
alpha128
[
2
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
2
+
K1
],
8
);
alpha128
[
2
]
=
vsetq_lane_s8
(
alpha
[
40
],
alpha128
[
0
],
7
);
alpha128
[
3
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
3
+
K1
],
8
);
alpha128
[
3
]
=
vsetq_lane_s8
(
alpha
[
56
],
alpha128
[
0
],
7
);
alpha128
[
4
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
4
+
K1
],
8
);
alpha128
[
4
]
=
vsetq_lane_s8
(
alpha
[
72
],
alpha128
[
0
],
7
);
alpha128
[
5
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
5
+
K1
],
8
);
alpha128
[
5
]
=
vsetq_lane_s8
(
alpha
[
88
],
alpha128
[
0
],
7
);
alpha128
[
6
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
6
+
K1
],
8
);
alpha128
[
6
]
=
vsetq_lane_s8
(
alpha
[
104
],
alpha128
[
0
],
7
);
alpha128
[
7
]
=
(
int8x16_t
)
vshlq_n_s64
((
int64x2_t
)
alpha128
[
7
+
K1
],
8
);
alpha128
[
7
]
=
vsetq_lane_s8
(
alpha
[
120
],
alpha128
[
0
],
7
);
alpha
[
16
]
=
-
MAX8
/
2
;
alpha
[
32
]
=
-
MAX8
/
2
;
alpha
[
48
]
=
-
MAX8
/
2
;
...
...
@@ -395,35 +387,28 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha
[
80
]
=
-
MAX8
/
2
;
alpha
[
96
]
=
-
MAX8
/
2
;
alpha
[
112
]
=
-
MAX8
/
2
;
}
#endif
#endif
}
void
compute_beta8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
)
{
void
compute_beta8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
unsigned
short
frame_length
,
unsigned
char
F
,
int
offset8_flag
)
{
int
k
,
rerun_flag
,
loopval
;
#if defined(__x86_64__) || defined(__i386__)
__m128i
m11_128
,
m10_128
;
__m128i
m_b0
,
m_b1
,
m_b2
,
m_b3
,
m_b4
,
m_b5
,
m_b6
,
m_b7
;
__m128i
new0
,
new1
,
new2
,
new3
,
new4
,
new5
,
new6
,
new7
;
__m128i
*
beta128
,
*
alpha128
,
*
beta_ptr
;
__m128i
beta_max
;
#elif defined(__arm__)
int8x16_t
m11_128
,
m10_128
;
int8x16_t
m_b0
,
m_b1
,
m_b2
,
m_b3
,
m_b4
,
m_b5
,
m_b6
,
m_b7
;
int8x16_t
new0
,
new1
,
new2
,
new3
,
new4
,
new5
,
new6
,
new7
;
int8x16_t
*
beta128
,
*
alpha128
,
*
beta_ptr
;
int8x16_t
beta_max
;
#endif
llr_t
beta0
,
beta1
;
llr_t
beta2
,
beta3
,
beta4
,
beta5
,
beta6
,
beta7
;
if
(
frame_length
>
6144
)
{
...
...
@@ -433,13 +418,12 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
// we are supposed to run compute_alpha just before compute_beta
// so the initial states of backward computation can be set from last value of alpha states (forward computation)
#if defined(__x86_64__) || defined(__i386__)
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
__m128i
*
)
&
alpha
[
0
];
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
__m128i
*
)
&
alpha
[
0
];
#elif defined(__arm__)
beta_ptr
=
(
int8x16_t
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
int8x16_t
*
)
&
alpha
[
0
];
beta_ptr
=
(
int8x16_t
*
)
&
beta
[
frame_length
<<
3
];
alpha128
=
(
int8x16_t
*
)
&
alpha
[
0
];
#endif
beta_ptr
[
0
]
=
alpha128
[(
frame_length
>>
1
)];
beta_ptr
[
1
]
=
alpha128
[
1
+
(
frame_length
>>
1
)];
...
...
@@ -449,18 +433,15 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr
[
5
]
=
alpha128
[
5
+
(
frame_length
>>
1
)];
beta_ptr
[
6
]
=
alpha128
[
6
+
(
frame_length
>>
1
)];
beta_ptr
[
7
]
=
alpha128
[
7
+
(
frame_length
>>
1
)];
int
overlap
=
(
frame_length
>>
4
)
>
L
?
(
frame_length
>>
4
)
-
L
:
0
;
for
(
rerun_flag
=
0
,
loopval
=
0
;
rerun_flag
<
2
;
loopval
=
overlap
,
rerun_flag
++
)
{
if
(
offset8_flag
==
0
)
{
// FIXME! beta0-beta7 are used uninitialized. FIXME!
// workaround: init with 0
beta0
=
beta1
=
beta2
=
beta3
=
beta4
=
beta5
=
beta6
=
beta7
=
0
;
#if defined(__x86_64__) || defined(__i386__)
beta_ptr
[
0
]
=
_mm_insert_epi8
(
beta_ptr
[
0
],
beta0
,
15
);
beta_ptr
[
1
]
=
_mm_insert_epi8
(
beta_ptr
[
1
],
beta1
,
15
);
...
...
@@ -483,16 +464,17 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
}
#if defined(__x86_64__) || defined(__i386__)
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
#elif defined(__arm__)
beta_ptr
=
(
int8x16_t
*
)
&
beta
[
frame_length
<<
3
];
beta_ptr
=
(
int8x16_t
*
)
&
beta
[
frame_length
<<
3
];
#endif
for
(
k
=
(
frame_length
>>
4
)
-
1
;
k
>=
loopval
;
k
--
)
{
#if defined(__x86_64__) || defined(__i386__)
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
m11_128
=
((
__m128i
*
)
m_11
)[
k
];
m10_128
=
((
__m128i
*
)
m_10
)[
k
];
m_b0
=
_mm_adds_epi8
(
beta_ptr
[
4
],
m11_128
);
//m11
m_b1
=
_mm_subs_epi8
(
beta_ptr
[
4
],
m11_128
);
//m00
m_b2
=
_mm_subs_epi8
(
beta_ptr
[
5
],
m10_128
);
//m01
...
...
@@ -501,7 +483,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
m_b5
=
_mm_subs_epi8
(
beta_ptr
[
6
],
m10_128
);
//m01
m_b6
=
_mm_subs_epi8
(
beta_ptr
[
7
],
m11_128
);
//m00
m_b7
=
_mm_adds_epi8
(
beta_ptr
[
7
],
m11_128
);
//m11
new0
=
_mm_subs_epi8
(
beta_ptr
[
0
],
m11_128
);
//m00
new1
=
_mm_adds_epi8
(
beta_ptr
[
0
],
m11_128
);
//m11
new2
=
_mm_adds_epi8
(
beta_ptr
[
1
],
m10_128
);
//m10
...
...
@@ -510,9 +491,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
new5
=
_mm_adds_epi8
(
beta_ptr
[
2
],
m10_128
);
//m10
new6
=
_mm_adds_epi8
(
beta_ptr
[
3
],
m11_128
);
//m11
new7
=
_mm_subs_epi8
(
beta_ptr
[
3
],
m11_128
);
//m00
beta_ptr
-=
8
;
beta_ptr
[
0
]
=
_mm_max_epi8
(
m_b0
,
new0
);
beta_ptr
[
1
]
=
_mm_max_epi8
(
m_b1
,
new1
);
beta_ptr
[
2
]
=
_mm_max_epi8
(
m_b2
,
new2
);
...
...
@@ -521,7 +500,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr
[
5
]
=
_mm_max_epi8
(
m_b5
,
new5
);
beta_ptr
[
6
]
=
_mm_max_epi8
(
m_b6
,
new6
);
beta_ptr
[
7
]
=
_mm_max_epi8
(
m_b7
,
new7
);
beta_max
=
_mm_max_epi8
(
beta_ptr
[
0
],
beta_ptr
[
1
]);
beta_max
=
_mm_max_epi8
(
beta_max
,
beta_ptr
[
2
]);
beta_max
=
_mm_max_epi8
(
beta_max
,
beta_ptr
[
3
]);
...
...
@@ -529,7 +507,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_max
=
_mm_max_epi8
(
beta_max
,
beta_ptr
[
5
]);
beta_max
=
_mm_max_epi8
(
beta_max
,
beta_ptr
[
6
]);
beta_max
=
_mm_max_epi8
(
beta_max
,
beta_ptr
[
7
]);
beta_ptr
[
0
]
=
_mm_subs_epi8
(
beta_ptr
[
0
],
beta_max
);
beta_ptr
[
1
]
=
_mm_subs_epi8
(
beta_ptr
[
1
],
beta_max
);
beta_ptr
[
2
]
=
_mm_subs_epi8
(
beta_ptr
[
2
],
beta_max
);
...
...
@@ -539,8 +516,8 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr
[
6
]
=
_mm_subs_epi8
(
beta_ptr
[
6
],
beta_max
);
beta_ptr
[
7
]
=
_mm_subs_epi8
(
beta_ptr
[
7
],
beta_max
);
#elif defined(__arm__)
m11_128
=
((
int8x16_t
*
)
m_11
)[
k
];
m10_128
=
((
int8x16_t
*
)
m_10
)[
k
];
m11_128
=
((
int8x16_t
*
)
m_11
)[
k
];
m10_128
=
((
int8x16_t
*
)
m_10
)[
k
];
m_b0
=
vqaddq_s8
(
beta_ptr
[
4
],
m11_128
);
//m11
m_b1
=
vqsubq_s8
(
beta_ptr
[
4
],
m11_128
);
//m00
m_b2
=
vqsubq_s8
(
beta_ptr
[
5
],
m10_128
);
//m01
...
...
@@ -549,7 +526,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
m_b5
=
vqsubq_s8
(
beta_ptr
[
6
],
m10_128
);
//m01
m_b6
=
vqsubq_s8
(
beta_ptr
[
7
],
m11_128
);
//m00
m_b7
=
vqaddq_s8
(
beta_ptr
[
7
],
m11_128
);
//m11
new0
=
vqsubq_s8
(
beta_ptr
[
0
],
m11_128
);
//m00
new1
=
vqaddq_s8
(
beta_ptr
[
0
],
m11_128
);
//m11
new2
=
vqaddq_s8
(
beta_ptr
[
1
],
m10_128
);
//m10
...
...
@@ -558,9 +534,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
new5
=
vqaddq_s8
(
beta_ptr
[
2
],
m10_128
);
//m10
new6
=
vqaddq_s8
(
beta_ptr
[
3
],
m11_128
);
//m11
new7
=
vqsubq_s8
(
beta_ptr
[
3
],
m11_128
);
//m00
beta_ptr
-=
8
;
beta_ptr
[
0
]
=
vmaxq_s8
(
m_b0
,
new0
);
beta_ptr
[
1
]
=
vmaxq_s8
(
m_b1
,
new1
);
beta_ptr
[
2
]
=
vmaxq_s8
(
m_b2
,
new2
);
...
...
@@ -569,7 +543,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr
[
5
]
=
vmaxq_s8
(
m_b5
,
new5
);
beta_ptr
[
6
]
=
vmaxq_s8
(
m_b6
,
new6
);
beta_ptr
[
7
]
=
vmaxq_s8
(
m_b7
,
new7
);
beta_max
=
vmaxq_s8
(
beta_ptr
[
0
],
beta_ptr
[
1
]);
beta_max
=
vmaxq_s8
(
beta_max
,
beta_ptr
[
2
]);
beta_max
=
vmaxq_s8
(
beta_max
,
beta_ptr
[
3
]);
...
...
@@ -577,7 +550,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_max
=
vmaxq_s8
(
beta_max
,
beta_ptr
[
5
]);
beta_max
=
vmaxq_s8
(
beta_max
,
beta_ptr
[
6
]);
beta_max
=
vmaxq_s8
(
beta_max
,
beta_ptr
[
7
]);
beta_ptr
[
0
]
=
vqsubq_s8
(
beta_ptr
[
0
],
beta_max
);
beta_ptr
[
1
]
=
vqsubq_s8
(
beta_ptr
[
1
],
beta_max
);
beta_ptr
[
2
]
=
vqsubq_s8
(
beta_ptr
[
2
],
beta_max
);
...
...
@@ -592,10 +564,9 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
// Set intial state for next iteration from the last state
// as column last states are the first states of the next column
// The initial state of column 0 is coming from tail bits (to be computed)
#if defined(__x86_64__) || defined(__i386__)
beta128
=
(
__m128i
*
)
&
beta
[
0
];
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
beta128
=
(
__m128i
*
)
&
beta
[
0
];
beta_ptr
=
(
__m128i
*
)
&
beta
[
frame_length
<<
3
];
beta_ptr
[
0
]
=
_mm_srli_si128
(
beta128
[
0
],
1
);
beta_ptr
[
1
]
=
_mm_srli_si128
(
beta128
[
1
],
1
);
beta_ptr
[
2
]
=
_mm_srli_si128
(
beta128
[
2
],
1
);
...
...
@@ -605,23 +576,29 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr
[
6
]
=
_mm_srli_si128
(
beta128
[
6
],
1
);
beta_ptr
[
7
]
=
_mm_srli_si128
(
beta128
[
7
],
1
);
#elif defined(__arm__)
beta128
=
(
int8x16_t
*
)
&
beta
[
0
];
beta_ptr
=
(
int8x16_t
*
)
&
beta
[
frame_length
<<
3
];
beta_ptr
[
0
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
0
],
8
);
beta_ptr
[
0
]
=
vsetq_lane_s8
(
beta
[
7
],
beta_ptr
[
0
],
8
);
beta_ptr
[
1
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
1
],
8
);
beta_ptr
[
1
]
=
vsetq_lane_s8
(
beta
[
23
],
beta_ptr
[
1
],
8
);
beta_ptr
[
2
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
2
],
8
);
beta_ptr
[
2
]
=
vsetq_lane_s8
(
beta
[
39
],
beta_ptr
[
2
],
8
);
beta_ptr
[
3
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
3
],
8
);
beta_ptr
[
3
]
=
vsetq_lane_s8
(
beta
[
55
],
beta_ptr
[
3
],
8
);
beta_ptr
[
4
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
4
],
8
);
beta_ptr
[
4
]
=
vsetq_lane_s8
(
beta
[
71
],
beta_ptr
[
4
],
8
);
beta_ptr
[
5
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
5
],
8
);
beta_ptr
[
5
]
=
vsetq_lane_s8
(
beta
[
87
],
beta_ptr
[
5
],
8
);
beta_ptr
[
6
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
6
],
8
);
beta_ptr
[
6
]
=
vsetq_lane_s8
(
beta
[
103
],
beta_ptr
[
6
],
8
);
beta_ptr
[
7
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
7
],
8
);
beta_ptr
[
7
]
=
vsetq_lane_s8
(
beta
[
119
],
beta_ptr
[
7
],
8
);
beta128
=
(
int8x16_t
*
)
&
beta
[
0
];
beta_ptr
=
(
int8x16_t
*
)
&
beta
[
frame_length
<<
3
];
beta_ptr
[
0
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
0
],
8
);
beta_ptr
[
0
]
=
vsetq_lane_s8
(
beta
[
7
],
beta_ptr
[
0
],
8
);
beta_ptr
[
1
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
1
],
8
);
beta_ptr
[
1
]
=
vsetq_lane_s8
(
beta
[
23
],
beta_ptr
[
1
],
8
);
beta_ptr
[
2
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
2
],
8
);
beta_ptr
[
2
]
=
vsetq_lane_s8
(
beta
[
39
],
beta_ptr
[
2
],
8
);
beta_ptr
[
3
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
3
],
8
);
beta_ptr
[
3
]
=
vsetq_lane_s8
(
beta
[
55
],
beta_ptr
[
3
],
8
);
beta_ptr
[
4
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
4
],
8
);
beta_ptr
[
4
]
=
vsetq_lane_s8
(
beta
[
71
],
beta_ptr
[
4
],
8
);
beta_ptr
[
5
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
5
],
8
);
beta_ptr
[
5
]
=
vsetq_lane_s8
(
beta
[
87
],
beta_ptr
[
5
],
8
);
beta_ptr
[
6
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
6
],
8
);
beta_ptr
[
6
]
=
vsetq_lane_s8
(
beta
[
103
],
beta_ptr
[
6
],
8
);
beta_ptr
[
7
]
=
(
int8x16_t
)
vshrq_n_s64
((
int64x2_t
)
beta128
[
7
],
8
);
beta_ptr
[
7
]
=
vsetq_lane_s8
(
beta
[
119
],
beta_ptr
[
7
],
8
);
#endif
}
}
void
compute_ext8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
llr_t
*
ext
,
llr_t
*
systematic
,
unsigned
short
frame_length
)
{
void
compute_ext8
(
llr_t
*
alpha
,
llr_t
*
beta
,
llr_t
*
m_11
,
llr_t
*
m_10
,
llr_t
*
ext
,
llr_t
*
systematic
,
unsigned
short
frame_length
)
{
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
alpha128
=
(
__m128i
*
)
alpha
;
__m128i
*
beta128
=
(
__m128i
*
)
beta
;
...
...
@@ -642,27 +619,20 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
int8x16_t
m11_1
,
m11_2
,
m11_3
,
m11_4
;
#endif
int
k
;
//
// LLR computation, 8 consequtive bits per loop
//
#ifdef DEBUG_LOGMAP
printf
(
"compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d
\n
"
,
alpha
,
beta
,
m_11
,
m_10
,
ext
,
systematic
,
frame_length
);
#endif
alpha_ptr
=
alpha128
;
beta_ptr
=
&
beta128
[
8
];
for
(
k
=
0
;
k
<
(
frame_length
>>
4
);
k
++
)
{
#if defined(__x86_64__) || defined(__i386__)
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
4
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
4
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
4
];
m11_128
=
(
__m128i
*
)
&
m_11
[
k
<<
4
];
m10_128
=
(
__m128i
*
)
&
m_10
[
k
<<
4
];
ext_128
=
(
__m128i
*
)
&
ext
[
k
<<
4
];
m00_4
=
_mm_adds_epi8
(
alpha_ptr
[
7
],
beta_ptr
[
3
]);
//ALPHA_BETA_4m00;
m11_4
=
_mm_adds_epi8
(
alpha_ptr
[
7
],
beta_ptr
[
7
]);
//ALPHA_BETA_4m11;
m00_3
=
_mm_adds_epi8
(
alpha_ptr
[
6
],
beta_ptr
[
7
]);
//ALPHA_BETA_3m00;
...
...
@@ -679,7 +649,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m10_2
=
_mm_adds_epi8
(
alpha_ptr
[
3
],
beta_ptr
[
5
]);
//ALPHA_BETA_2m10;
m10_1
=
_mm_adds_epi8
(
alpha_ptr
[
2
],
beta_ptr
[
1
]);
//ALPHA_BETA_1m10;
m01_1
=
_mm_adds_epi8
(
alpha_ptr
[
2
],
beta_ptr
[
5
]);
//ALPHA_BETA_1m01;
m01_1
=
_mm_max_epi8
(
m01_1
,
m01_2
);
m01_1
=
_mm_max_epi8
(
m01_1
,
m01_3
);
m01_1
=
_mm_max_epi8
(
m01_1
,
m01_4
);
...
...
@@ -692,28 +661,19 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m11_1
=
_mm_max_epi8
(
m11_1
,
m11_2
);
m11_1
=
_mm_max_epi8
(
m11_1
,
m11_3
);
m11_1
=
_mm_max_epi8
(
m11_1
,
m11_4
);
m01_1
=
_mm_subs_epi8
(
m01_1
,
*
m10_128
);
m00_1
=
_mm_subs_epi8
(
m00_1
,
*
m11_128
);
m10_1
=
_mm_adds_epi8
(
m10_1
,
*
m10_128
);
m11_1
=
_mm_adds_epi8
(
m11_1
,
*
m11_128
);
m01_1
=
_mm_max_epi8
(
m01_1
,
m00_1
);
m10_1
=
_mm_max_epi8
(
m10_1
,
m11_1
);
*
ext_128
=
_mm_subs_epi8
(
m10_1
,
m01_1
);
alpha_ptr
+=
8
;
beta_ptr
+=
8
;
#elif defined(__arm__)
m11_128
=
(
int8x16_t
*
)
&
m_11
[
k
<<
4
];
m10_128
=
(
int8x16_t
*
)
&
m_10
[
k
<<
4
];
ext_128
=
(
int8x16_t
*
)
&
ext
[
k
<<
4
];
m11_128
=
(
int8x16_t
*
)
&
m_11
[
k
<<
4
];
m10_128
=
(
int8x16_t
*
)
&
m_10
[
k
<<
4
];
ext_128
=
(
int8x16_t
*
)
&
ext
[
k
<<
4
];
m00_4
=
vqaddq_s8
(
alpha_ptr
[
7
],
beta_ptr
[
3
]);
//ALPHA_BETA_4m00;
m11_4
=
vqaddq_s8
(
alpha_ptr
[
7
],
beta_ptr
[
7
]);
//ALPHA_BETA_4m11;
m00_3
=
vqaddq_s8
(
alpha_ptr
[
6
],
beta_ptr
[
7
]);
//ALPHA_BETA_3m00;
...
...
@@ -730,7 +690,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m10_2
=
vqaddq_s8
(
alpha_ptr
[
3
],
beta_ptr
[
5
]);
//ALPHA_BETA_2m10;
m10_1
=
vqaddq_s8
(
alpha_ptr
[
2
],
beta_ptr
[
1
]);
//ALPHA_BETA_1m10;
m01_1
=
vqaddq_s8
(
alpha_ptr
[
2
],
beta_ptr
[
5
]);
//ALPHA_BETA_1m01;
m01_1
=
vmaxq_s8
(
m01_1
,
m01_2
);
m01_1
=
vmaxq_s8
(
m01_1
,
m01_3
);
m01_1
=
vmaxq_s8
(
m01_1
,
m01_4
);
...
...
@@ -743,27 +702,17 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m11_1
=
vmaxq_s8
(
m11_1
,
m11_2
);
m11_1
=
vmaxq_s8
(
m11_1
,
m11_3
);
m11_1
=
vmaxq_s8
(
m11_1
,
m11_4
);
m01_1
=
vqsubq_s8
(
m01_1
,
*
m10_128
);
m00_1
=
vqsubq_s8
(
m00_1
,
*
m11_128
);
m10_1
=
vqaddq_s8
(
m10_1
,
*
m10_128
);
m11_1
=
vqaddq_s8
(
m11_1
,
*
m11_128
);
m01_1
=
vmaxq_s8
(
m01_1
,
m00_1
);
m10_1
=
vmaxq_s8
(
m10_1
,
m11_1
);
*
ext_128
=
vqsubq_s8
(
m10_1
,
m01_1
);
alpha_ptr
+=
8
;
beta_ptr
+=
8
;
#endif
}
}
...
...
@@ -771,8 +720,7 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
//int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8],
int
*
pi2tab8
[
188
],
*
pi5tab8
[
188
],
*
pi4tab8
[
188
],
*
pi6tab8
[
188
];
void
free_td8
(
void
)
{
void
free_td8
(
void
)
{
int
ind
;
for
(
ind
=
0
;
ind
<
188
;
ind
++
)
{
...
...
@@ -787,14 +735,11 @@ void free_td8(void)
extern
RAN_CONTEXT_t
RC
;
void
init_td8
(
void
)
{
void
init_td8
(
void
)
{
int
ind
,
i
,
j
,
n
,
n2
,
pi
,
pi3
;
short
*
base_interleaver
;
short
*
base_interleaver
;
for
(
ind
=
0
;
ind
<
188
;
ind
++
)
{
n
=
f1f2mat
[
ind
].
nb_bits
;
base_interleaver
=
il_tb
+
f1f2mat
[
ind
].
beg_index
;
#ifdef MEX
...
...
@@ -816,7 +761,6 @@ void init_td8(void)
n2
=
n
;
for
(
j
=
0
,
i
=
0
;
i
<
n2
;
i
++
,
j
+=
16
)
{
if
(
j
>=
n2
)
j
-=
(
n2
-
1
);
...
...
@@ -831,7 +775,6 @@ void init_td8(void)
pi5tab8
[
ind
][
pi3
]
=
pi2tab8
[
ind
][
i
];
pi6tab8
[
ind
][
pi
]
=
pi2tab8
[
ind
][
i
];
}
}
}
...
...
@@ -853,31 +796,22 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
/* y is a pointer to the input
decoded_bytes is a pointer to the decoded output
n is the size in bits of the coded block, with the tail */
int
n2
;
llr_t
y8
[
3
*
(
n
+
16
)]
__attribute__
((
aligned
(
16
)));
llr_t
systematic0
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
systematic1
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
systematic2
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
yparity1
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
yparity2
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
ext
[
n
+
128
]
__attribute__
((
aligned
(
16
)));
llr_t
ext2
[
n
+
128
]
__attribute__
((
aligned
(
16
)));
llr_t
alpha
[(
n
+
16
)
*
8
]
__attribute__
((
aligned
(
16
)));
llr_t
beta
[(
n
+
16
)
*
8
]
__attribute__
((
aligned
(
16
)));
llr_t
m11
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
llr_t
m10
[
n
+
16
]
__attribute__
((
aligned
(
16
)));
// int *pi2_p,*pi4_p,*pi5_p,*pi6_p;
int
*
pi4_p
,
*
pi5_p
,
*
pi6_p
;
llr_t
*
s
,
*
s1
,
*
s2
,
*
yp1
,
*
yp2
,
*
yp
;
unsigned
int
i
,
j
,
iind
;
//,pi;
unsigned
char
iteration_cnt
=
0
;
unsigned
int
crc
,
oldcrc
,
crc_len
;
...
...
@@ -892,11 +826,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
int8x16_t
tmp
,
zeros
=
vdupq_n_s8
(
0
);
const
uint8_t
__attribute__
((
aligned
(
16
)))
_Powers
[
16
]
=
{
1
,
2
,
4
,
8
,
16
,
32
,
64
,
128
,
1
,
2
,
4
,
8
,
16
,
32
,
64
,
128
};
// Set the powers of 2 (do it once for all, if applicable)
uint8x16_t
Powers
=
vld1q_u8
(
_Powers
);
#endif
int
offset8_flag
=
0
;
if
(
crc_type
>
3
)
{
...
...
@@ -904,17 +836,14 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
return
255
;
}
if
(
init_stats
)
start_meas
(
init_stats
);
if
((
n
&
15
)
>
0
)
{
n2
=
n
+
8
;
offset8_flag
=
1
;
}
else
n2
=
n
;
for
(
iind
=
0
;
iind
<
188
&&
f1f2mat
[
iind
].
nb_bits
!=
n
;
iind
++
);
if
(
iind
==
188
)
{
...
...
@@ -941,13 +870,12 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
}
#if defined(__x86_64__) || defined(__i386__)
// note: this makes valgrind freak
__m128i
avg
=
_mm_set1_epi32
(
0
);
for
(
i
=
0
;
i
<
(
3
*
(
n
>>
4
))
+
1
;
i
++
)
{
__m128i
tmp
=
_mm_abs_epi16
(
_mm_unpackhi_epi16
(((
__m128i
*
)
y
)[
i
],((
__m128i
*
)
y
)[
i
]));
avg
=
_mm_add_epi32
(
_mm_cvtepi16_epi32
(
_mm_abs_epi16
(((
__m128i
*
)
y
)[
i
])),
avg
);
__m128i
tmp
=
_mm_abs_epi16
(
_mm_unpackhi_epi16
(((
__m128i
*
)
y
)[
i
],((
__m128i
*
)
y
)[
i
]));
avg
=
_mm_add_epi32
(
_mm_cvtepi16_epi32
(
_mm_abs_epi16
(((
__m128i
*
)
y
)[
i
])),
avg
);
avg
=
_mm_add_epi32
(
_mm_cvtepi16_epi32
(
tmp
),
avg
);
}
...
...
@@ -971,15 +899,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
for
(
i
=
0
,
j
=
0
;
i
<
(
3
*
(
n2
>>
4
))
+
1
;
i
++
,
j
+=
2
)
((
__m128i
*
)
y8
)[
i
]
=
_mm_packs_epi16
(
_mm_srai_epi16
(((
__m128i
*
)
y
)[
j
],
3
),
_mm_srai_epi16
(((
__m128i
*
)
y
)[
j
+
1
],
4
));
yp128
=
(
__m128i
*
)
y8
;
yp128
=
(
__m128i
*
)
y8
;
#elif defined(__arm__)
int32x4_t
avg
=
vdupq_n_s32
(
0
);
for
(
i
=
0
;
i
<
(
3
*
(
n
>>
4
))
+
1
;
i
++
)
{
int16x8_t
tmp
=
vabsq_s16
(((
int16x8_t
*
)
y
)[
i
]);
avg
=
vqaddq_s32
(
avg
,
vaddl_s16
(((
int16x4_t
*
)
&
tmp
)[
0
],((
int16x4_t
*
)
&
tmp
)[
1
]));
int16x8_t
tmp
=
vabsq_s16
(((
int16x8_t
*
)
y
)[
i
]);
avg
=
vqaddq_s32
(
avg
,
vaddl_s16
(((
int16x4_t
*
)
&
tmp
)[
0
],((
int16x4_t
*
)
&
tmp
)[
1
]));
}
int32_t
round_avg
=
(
vgetq_lane_s32
(
avg
,
0
)
+
vgetq_lane_s32
(
avg
,
1
)
+
vgetq_lane_s32
(
avg
,
2
)
+
vgetq_lane_s32
(
avg
,
3
))
/
(
n
*
3
);
...
...
@@ -999,10 +925,8 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
for
(
i
=
0
,
j
=
0
;
i
<
(
3
*
(
n2
>>
3
))
+
1
;
i
++
,
j
+=
2
)
((
int8x8_t
*
)
y8
)[
i
]
=
vqmovn_s16
(
vshrq_n_s16
(((
int16x8_t
*
)
y
)[
j
],
3
));
yp128
=
(
int8x16_t
*
)
y8
;
yp128
=
(
int8x16_t
*
)
y8
;
#endif
s
=
systematic0
;
s1
=
systematic1
;
s2
=
systematic2
;
...
...
@@ -1020,8 +944,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
}
#endif
yp
=
(
llr_t
*
)
yp128
;
yp
=
(
llr_t
*
)
yp128
;
if
(
n2
>
n
)
{
/*
...
...
@@ -1031,7 +954,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0;
s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0;
s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/
yp
=
(
llr_t
*
)(
y8
+
n
);
yp
=
(
llr_t
*
)(
y8
+
n
);
}
// printf("n=%d,n2=%d\n",n,n2);
...
...
@@ -1045,7 +968,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
yp1
[
i
]
=
*
yp
;
yp
++
;
#ifdef DEBUG_LOGMAP
printf
(
"Term 1 (%
d
): %d %d
\n
"
,
i
,
s
[
i
],
yp1
[
i
]);
printf
(
"Term 1 (%
u
): %d %d
\n
"
,
i
,
s
[
i
],
yp1
[
i
]);
#endif //DEBUG_LOGMAP
}
...
...
@@ -1057,7 +980,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
yp2
[
i
-
16
]
=
*
yp
;
yp
++
;
#ifdef DEBUG_LOGMAP
printf
(
"Term 2 (%
d
): %d %d
\n
"
,
i
-
16
,
s
[
i
],
yp2
[
i
-
16
]);
printf
(
"Term 2 (%
u
): %d %d
\n
"
,
i
-
16
,
s
[
i
],
yp2
[
i
-
16
]);
#endif //DEBUG_LOGMAP
}
...
...
@@ -1068,63 +991,59 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
if
(
init_stats
)
stop_meas
(
init_stats
);
// do log_map from first parity bit
log_map8
(
systematic0
,
yparity1
,
m11
,
m10
,
alpha
,
beta
,
ext
,
n2
,
0
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
while
(
iteration_cnt
++
<
max_iterations
)
{
while
(
iteration_cnt
++
<
max_iterations
)
{
#ifdef DEBUG_LOGMAP
printf
(
"
\n
*******************ITERATION %d (n %d, n2 %d), ext %p
\n\n
"
,
iteration_cnt
,
n
,
n2
,
ext
);
#endif //DEBUG_LOGMAP
if
(
intl1_stats
)
start_meas
(
intl1_stats
);
pi4_p
=
pi4tab8
[
iind
];
for
(
i
=
0
;
i
<
(
n2
>>
4
);
i
++
)
{
// steady-state portion
#if defined(__x86_64__) || defined(__i386__)
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
0
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
1
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
2
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
3
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
4
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
5
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
6
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
7
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
8
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
9
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
10
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
11
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
12
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
13
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
14
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
15
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
0
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
1
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
2
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
3
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
4
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
5
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
6
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
7
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
8
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
9
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
10
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
11
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
12
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
13
);
tmp
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
14
);
((
__m128i
*
)
systematic2
)[
i
]
=
_mm_insert_epi8
(
tmp
,((
llr_t
*
)
ext
)[
*
pi4_p
++
],
15
);
#elif defined(__arm__)
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
0
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
1
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
2
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
3
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
4
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
5
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
6
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
7
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
8
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
9
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
10
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
11
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
12
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
13
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
14
);
((
int8x16_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
15
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
0
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
1
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
2
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
3
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
4
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
5
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
6
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
7
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
8
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
9
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
10
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
11
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
12
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
13
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
14
);
((
int8x16_t
*
)
systematic2
)[
i
]
=
vsetq_lane_s8
(((
llr_t
*
)
ext
)[
*
pi4_p
++
],
tmp
,
15
);
#endif
}
if
(
intl1_stats
)
stop_meas
(
intl1_stats
);
// do log_map from second parity bit
log_map8
(
systematic2
,
yparity2
,
m11
,
m10
,
alpha
,
beta
,
ext2
,
n2
,
1
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
pi5_p
=
pi5tab8
[
iind
];
uint16_t
decoded_bytes_interl
[
6144
/
16
]
__attribute__
((
aligned
(
16
)));
...
...
@@ -1148,7 +1067,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp
=
_mm_insert_epi8
(
tmp
,
ext2
[
*
pi5_p
++
],
14
);
tmp
=
_mm_insert_epi8
(
tmp
,
ext2
[
*
pi5_p
++
],
15
);
decoded_bytes_interl
[
i
]
=
(
uint16_t
)
_mm_movemask_epi8
(
_mm_cmpgt_epi8
(
tmp
,
zeros
));
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi8
(
_mm_subs_epi8
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi8
(
_mm_subs_epi8
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
#elif defined(__arm__)
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
0
);
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
1
);
...
...
@@ -1167,12 +1086,11 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
14
);
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
15
);
uint64x2_t
Mask
=
vpaddlq_u32
(
vpaddlq_u16
(
vpaddlq_u8
(
vandq_u8
(
vcgtq_s8
(
tmp
,
zeros
),
Powers
))));
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
0
],
(
uint8x16_t
)
Mask
,
0
);
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
1
],
(
uint8x16_t
)
Mask
,
8
);
((
int8x16_t
*
)
systematic1
)[
i
]
=
vqaddq_s8
(
vqsubq_s8
(
tmp
,((
int8x16_t
*
)
ext
)[
i
]),((
int8x16_t
*
)
systematic0
)[
i
]);
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
0
],
(
uint8x16_t
)
Mask
,
0
);
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
1
],
(
uint8x16_t
)
Mask
,
8
);
((
int8x16_t
*
)
systematic1
)[
i
]
=
vqaddq_s8
(
vqsubq_s8
(
tmp
,((
int8x16_t
*
)
ext
)[
i
]),((
int8x16_t
*
)
systematic0
)[
i
]);
#endif
}
}
else
{
for
(
i
=
0
;
i
<
(
n2
>>
4
);
i
++
)
{
#if defined(__x86_64__) || defined(__i386__)
...
...
@@ -1193,8 +1111,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp
=
_mm_insert_epi8
(
tmp
,
ext2
[
*
pi5_p
++
],
14
);
tmp
=
_mm_insert_epi8
(
tmp
,
ext2
[
*
pi5_p
++
],
15
);
tmp128
[
i
]
=
_mm_adds_epi8
(((
__m128i
*
)
ext2
)[
i
],((
__m128i
*
)
systematic2
)[
i
]);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi8
(
_mm_subs_epi8
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
((
__m128i
*
)
systematic1
)[
i
]
=
_mm_adds_epi8
(
_mm_subs_epi8
(
tmp
,((
__m128i
*
)
ext
)[
i
]),((
__m128i
*
)
systematic0
)[
i
]);
#elif defined(__arm__)
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
0
);
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
1
);
...
...
@@ -1213,9 +1130,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
14
);
tmp
=
vsetq_lane_s8
(
ext2
[
*
pi5_p
++
],
tmp
,
15
);
tmp128
[
i
]
=
vqaddq_s8
(((
int8x16_t
*
)
ext2
)[
i
],((
int8x16_t
*
)
systematic2
)[
i
]);
((
int8x16_t
*
)
systematic1
)[
i
]
=
vqaddq_s8
(
vqsubq_s8
(
tmp
,((
int8x16_t
*
)
ext
)[
i
]),((
int8x16_t
*
)
systematic0
)[
i
]);
((
int8x16_t
*
)
systematic1
)[
i
]
=
vqaddq_s8
(
vqsubq_s8
(
tmp
,((
int8x16_t
*
)
ext
)[
i
]),((
int8x16_t
*
)
systematic0
)[
i
]);
#endif
}
}
...
...
@@ -1225,11 +1140,10 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
if
(
intl2_stats
)
start_meas
(
intl2_stats
);
if
((
n2
&
0x7f
)
==
0
)
{
// n2 is a multiple of 128 bits
// re-order the decoded bits in theregular order
// as it is presently ordered as 16 sequential columns
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
dbytes
=
(
__m128i
*
)
decoded_bytes_interl
;
__m128i
*
dbytes
=
(
__m128i
*
)
decoded_bytes_interl
;
__m128i
shuffle
=
SHUFFLE16
(
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
__m128i
mask
__attribute__
((
aligned
(
16
)));
int
n_128
=
n2
>>
7
;
...
...
@@ -1239,7 +1153,6 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
__m128i
tmp
__attribute__
((
aligned
(
16
)));
tmp
=
_mm_shuffle_epi8
(
dbytes
[
i
],
shuffle
);
__m128i
tmp2
__attribute__
((
aligned
(
16
)))
;
tmp2
=
_mm_and_si128
(
tmp
,
mask
);
tmp2
=
_mm_cmpeq_epi16
(
tmp2
,
mask
);
// printf("decoded_bytes %p\n",decoded_bytes);
...
...
@@ -1253,17 +1166,17 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
decoded_bytes
[
n_128
*
j
+
i
]
=
(
uint8_t
)
_mm_movemask_epi8
(
_mm_packs_epi16
(
tmp2
,
zeros
));
}
}
#elif defined(__arm__)
uint8x16_t
*
dbytes
=
(
uint8x16_t
*
)
decoded_bytes_interl
;
uint8x16_t
*
dbytes
=
(
uint8x16_t
*
)
decoded_bytes_interl
;
uint16x8_t
mask
__attribute__
((
aligned
(
16
)));
int
n_128
=
n2
>>
7
;
for
(
i
=
0
;
i
<
n_128
;
i
++
)
{
mask
=
vdupq_n_u16
(
1
);
uint8x16_t
tmp
__attribute__
((
aligned
(
16
)));
tmp
=
vcombine_u8
(
vrev64_u8
(((
uint8x8_t
*
)
&
dbytes
[
i
])[
1
]),
vrev64_u8
(((
uint8x8_t
*
)
&
dbytes
[
i
])[
0
]));
tmp
=
vcombine_u8
(
vrev64_u8
(((
uint8x8_t
*
)
&
dbytes
[
i
])[
1
]),
vrev64_u8
(((
uint8x8_t
*
)
&
dbytes
[
i
])[
0
]));
vst1q_lane_u8
(
&
decoded_bytes
[
n_128
*
0
+
i
],(
uint8x16_t
)
vpaddlq_u32
(
vpaddlq_u16
(
vpaddlq_u8
(
vandq_u8
(
tmp
,
Powers
)))),
0
);
int
j
;
for
(
j
=
1
;
j
<
16
;
j
++
)
{
...
...
@@ -1314,8 +1227,8 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
tmp128
)[
*
pi6_p
++
],
tmp
,
9
);
tmp
=
vsetq_lane_s8
(((
llr_t
*
)
tmp128
)[
*
pi6_p
++
],
tmp
,
8
);
uint64x2_t
Mask
=
vpaddlq_u32
(
vpaddlq_u16
(
vpaddlq_u8
(
vandq_u8
(
vcgtq_s8
(
tmp
,
zeros
),
Powers
))));
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
0
],
(
uint8x16_t
)
Mask
,
0
);
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
1
],
(
uint8x16_t
)
Mask
,
8
);
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
0
],
(
uint8x16_t
)
Mask
,
0
);
vst1q_lane_u8
(
&
((
uint8_t
*
)
&
decoded_bytes
[
i
])[
1
],
(
uint8x16_t
)
Mask
,
8
);
#endif
}
}
...
...
@@ -1324,7 +1237,6 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
oldcrc
=
*
((
unsigned
int
*
)(
&
decoded_bytes
[(
n
>>
3
)
-
crc_len
]));
switch
(
crc_type
)
{
case
CRC24_A
:
oldcrc
&=
0x00ffffff
;
crc
=
crc24a
(
&
decoded_bytes
[
F
>>
3
],
...
...
@@ -1372,13 +1284,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
if
(
iteration_cnt
<
max_iterations
)
{
log_map8
(
systematic1
,
yparity1
,
m11
,
m10
,
alpha
,
beta
,
ext
,
n2
,
0
,
F
,
offset8_flag
,
alpha_stats
,
beta_stats
,
gamma_stats
,
ext_stats
);
#if defined(__x86_64__) || defined(__i386__)
__m128i
*
ext_128
=
(
__m128i
*
)
ext
;
__m128i
*
s1_128
=
(
__m128i
*
)
systematic1
;
__m128i
*
s0_128
=
(
__m128i
*
)
systematic0
;
__m128i
*
ext_128
=
(
__m128i
*
)
ext
;
__m128i
*
s1_128
=
(
__m128i
*
)
systematic1
;
__m128i
*
s0_128
=
(
__m128i
*
)
systematic0
;
#elif defined(__arm__)
int8x16_t
*
ext_128
=
(
int8x16_t
*
)
ext
;
int8x16_t
*
s1_128
=
(
int8x16_t
*
)
systematic1
;
int8x16_t
*
s0_128
=
(
int8x16_t
*
)
systematic0
;
int8x16_t
*
ext_128
=
(
int8x16_t
*
)
ext
;
int8x16_t
*
s1_128
=
(
int8x16_t
*
)
systematic1
;
int8x16_t
*
s0_128
=
(
int8x16_t
*
)
systematic0
;
#endif
int
myloop
=
n2
>>
4
;
...
...
@@ -1394,5 +1306,4 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
}
return
(
iteration_cnt
);
}
openair1/PHY/CODING/ccoding_byte.c
View file @
1cb484f1
...
...
@@ -47,27 +47,20 @@ void
ccodedot11_encode
(
unsigned
int
numbytes
,
unsigned
char
*
inPtr
,
unsigned
char
*
outPtr
,
unsigned
char
puncturing
)
{
unsigned
char
puncturing
)
{
unsigned
int
state
;
unsigned
char
c
,
out
,
shiftbit
=
0
;
// printf("In ccodedot11_encode (%d,%p,%p,%d)\n",numbytes,inPtr,outPtr,puncturing);
#ifdef DEBUG_CCODE
unsigned
int
dummy
;
#endif //DEBUG_CCODE
int
bit_index
;
/* The input bit is shifted in position 8 of the state.
Shiftbit will take values between 1 and 8 */
state
=
0
;
#ifdef DEBUG_CCODE
dummy
=
0
;
#endif //DEBUG_CCODE
/* Do not increment inPtr until we read the next octet */
bit_index
=
0
;
...
...
@@ -80,7 +73,6 @@ ccodedot11_encode (unsigned int numbytes,
switch
(
puncturing
)
{
case
0
:
//rate 1/2
for
(
shiftbit
=
0
;
shiftbit
<
8
;
shiftbit
++
)
{
state
>>=
1
;
if
((
c
&
(
1
<<
shiftbit
))
!=
0
)
{
...
...
@@ -88,22 +80,18 @@ ccodedot11_encode (unsigned int numbytes,
}
out
=
ccodedot11_table
[
state
];
*
outPtr
++
=
out
&
1
;
*
outPtr
++
=
(
out
>>
1
)
&
1
;
#ifdef DEBUG_CCODE
printf
(
"%d: %d -> %d (%d
)
\n
"
,
dummy
,
state
,
out
,
ccodedot11_table
[
state
]);
printf
(
"%u: %u -> %d (%u
)
\n
"
,
dummy
,
state
,
out
,
ccodedot11_table
[
state
]);
dummy
+=
2
;
#endif //DEBUG_CCODE
}
break
;
case
1
:
// rate 3/4
for
(
shiftbit
=
0
;
shiftbit
<
8
;
shiftbit
++
)
{
state
>>=
1
;
if
((
c
&
(
1
<<
shiftbit
))
!=
0
)
{
...
...
@@ -119,10 +107,9 @@ ccodedot11_encode (unsigned int numbytes,
*
outPtr
++
=
(
out
>>
1
)
&
1
;
#ifdef DEBUG_CCODE
printf
(
"%d: %d -> %d (%d
)
\n
"
,
dummy
,
state
,
out
,
ccodedot11_table
[
state
]);
printf
(
"%u: %u -> %d (%u
)
\n
"
,
dummy
,
state
,
out
,
ccodedot11_table
[
state
]);
dummy
+=
2
;
#endif //DEBUG_CCODE
bit_index
=
(
bit_index
==
2
)
?
0
:
(
bit_index
+
1
);
}
...
...
@@ -130,7 +117,6 @@ ccodedot11_encode (unsigned int numbytes,
case
2
:
// rate 2/3
for
(
shiftbit
=
0
;
shiftbit
<
8
;
shiftbit
++
)
{
state
>>=
1
;
if
((
c
&
(
1
<<
shiftbit
))
!=
0
)
{
...
...
@@ -138,19 +124,16 @@ ccodedot11_encode (unsigned int numbytes,
}
out
=
ccodedot11_table
[
state
];
*
outPtr
++
=
out
&
1
;
if
(
bit_index
==
0
)
*
outPtr
++
=
(
out
>>
1
)
&
1
;
#ifdef DEBUG_CCODE
printf
(
"%d: %d -> %d (%d
)
\n
"
,
dummy
,
state
,
out
,
ccodedot11_table
[
state
]);
printf
(
"%d: %u -> %d (%u
)
\n
"
,
dummy
,
state
,
out
,
ccodedot11_table
[
state
]);
dummy
+=
2
;
#endif //DEBUG_CCODE
bit_index
=
(
bit_index
==
0
)
?
1
:
0
;
}
break
;
...
...
@@ -181,8 +164,6 @@ ccodedot11_encode (unsigned int numbytes,
}
*/
}
...
...
@@ -197,8 +178,7 @@ ccodedot11_encode (unsigned int numbytes,
/* Basic code table initialization for constraint length 7 */
/* Input in MSB, followed by state in 6 LSBs */
void
ccodedot11_init
(
void
)
{
void
ccodedot11_init
(
void
)
{
unsigned
int
i
,
j
,
k
,
sum
;
for
(
i
=
0
;
i
<
128
;
i
++
)
{
...
...
@@ -219,8 +199,7 @@ void ccodedot11_init(void)
}
/* Input in LSB, followed by state in 6 MSBs */
void
ccodedot11_init_inv
(
void
)
{
void
ccodedot11_init_inv
(
void
)
{
unsigned
int
i
,
j
,
k
,
sum
;
for
(
i
=
0
;
i
<
128
;
i
++
)
{
...
...
@@ -251,21 +230,15 @@ void ccodedot11_init_inv(void)
#ifdef DEBUG_CCODE
#include <stdio.h>
main
()
{
main
()
{
unsigned
char
test
[]
=
"0Thebigredfox"
;
unsigned
char
output
[
512
],
*
inPtr
,
*
outPtr
;
unsigned
int
i
;
test
[
0
]
=
128
;
test
[
1
]
=
0
;
ccodedot11_init
();
inPtr
=
test
;
outPtr
=
output
;
ccodedot11_encode
(
16
,
inPtr
,
outPtr
,
0
);
for
(
i
=
0
;
i
<
32
;
i
++
)
printf
(
"%x "
,
output
[
i
]);
...
...
openair1/PHY/CODING/ccoding_byte_lte.c
View file @
1cb484f1
...
...
@@ -52,20 +52,16 @@ ccodelte_encode (int32_t numbits,
uint8_t
add_crc
,
uint8_t
*
inPtr
,
uint8_t
*
outPtr
,
uint16_t
rnti
)
{
uint16_t
rnti
)
{
uint32_t
state
;
uint8_t
c
,
out
,
first_bit
;
int8_t
shiftbit
=
0
;
uint16_t
c16
;
uint16_t
next_last_byte
=
0
;
uint32_t
crc
=
0
;
#ifdef DEBUG_CCODE
uint32_t
dummy
=
0
;
#endif //DEBUG_CCODE
/* The input bit is shifted in position 8 of the state.
Shiftbit will take values between 1 and 8 */
state
=
0
;
...
...
@@ -137,17 +133,12 @@ ccodelte_encode (int32_t numbits,
#endif //DEBUG_CCODE
/* Do not increment inPtr until we read the next octet */
while
(
numbits
>
0
)
{
c
=
*
inPtr
++
;
#ifdef DEBUG_CCODE
printf
(
"** %x **
\n
"
,
c
);
#endif //DEBUG_CCODE
// for (shiftbit = 0; (shiftbit<8) && (numbits>0);shiftbit++,numbits--) {
for
(
shiftbit
=
7
;
(
shiftbit
>=
0
)
&&
(
numbits
>
0
);
shiftbit
--
,
numbits
--
)
{
state
>>=
1
;
...
...
@@ -157,23 +148,18 @@ ccodelte_encode (int32_t numbits,
}
out
=
ccodelte_table
[
state
];
*
outPtr
++
=
out
&
1
;
*
outPtr
++
=
(
out
>>
1
)
&
1
;
*
outPtr
++
=
(
out
>>
2
)
&
1
;
#ifdef DEBUG_CCODE
printf
(
"numbits %d, input %d, outbit %d: %d -> %d (%d%d%d)
\n
"
,
numbits
,
state
>>
6
,
dummy
,
state
,
out
,
out
&
1
,(
out
>>
1
)
&
1
,(
out
>>
2
)
&
1
);
dummy
+=
3
;
#endif //DEBUG_CCODE
}
}
// now code 8-bit CRC for UCI
if
(
add_crc
==
1
)
{
c
=
(
uint8_t
)(
crc
>>
24
);
// for (shiftbit = 0; (shiftbit<8);shiftbit++) {
...
...
@@ -185,22 +171,18 @@ ccodelte_encode (int32_t numbits,
}
out
=
ccodelte_table
[
state
];
*
outPtr
++
=
out
&
1
;
*
outPtr
++
=
(
out
>>
1
)
&
1
;
*
outPtr
++
=
(
out
>>
2
)
&
1
;
#ifdef DEBUG_CCODE
printf
(
"crc bit %d input %d, outbit %d: %d -> %d (%
d
)
\n
"
,
shiftbit
,
state
>>
6
,
dummy
,
state
,
out
,
ccodelte_table
[
state
]);
printf
(
"crc bit %d input %d, outbit %d: %d -> %d (%
u
)
\n
"
,
shiftbit
,
state
>>
6
,
dummy
,
state
,
out
,
ccodelte_table
[
state
]);
dummy
+=
3
;
#endif //DEBUG_CCODE
}
}
// now code 16-bit CRC for DCI
if
(
add_crc
==
2
)
{
c16
=
(
uint16_t
)(
crc
>>
16
);
// for (shiftbit = 0; (shiftbit<16);shiftbit++) {
...
...
@@ -212,16 +194,13 @@ ccodelte_encode (int32_t numbits,
}
out
=
ccodelte_table
[
state
];
*
outPtr
++
=
out
&
1
;
*
outPtr
++
=
(
out
>>
1
)
&
1
;
*
outPtr
++
=
(
out
>>
2
)
&
1
;
#ifdef DEBUG_CCODE
printf
(
"crc bit %d input %d, outbit %d: %d -> %d (%
d
)
\n
"
,
shiftbit
,
state
>>
6
,
dummy
,
state
,
out
,
ccodelte_table
[
state
]);
printf
(
"crc bit %d input %d, outbit %d: %d -> %d (%
u
)
\n
"
,
shiftbit
,
state
>>
6
,
dummy
,
state
,
out
,
ccodelte_table
[
state
]);
dummy
+=
3
;
#endif //DEBUG_CCODE
}
}
}
...
...
@@ -238,8 +217,7 @@ ccodelte_encode (int32_t numbits,
/* Basic code table initialization for constraint length 7 */
/* Input in MSB, followed by state in 6 LSBs */
void
ccodelte_init
(
void
)
{
void
ccodelte_init
(
void
)
{
unsigned
int
i
,
j
,
k
,
sum
;
for
(
i
=
0
;
i
<
128
;
i
++
)
{
...
...
@@ -260,8 +238,7 @@ void ccodelte_init(void)
}
/* Input in LSB, followed by state in 6 MSBs */
void
ccodelte_init_inv
(
void
)
{
void
ccodelte_init_inv
(
void
)
{
unsigned
int
i
,
j
,
k
,
sum
;
for
(
i
=
0
;
i
<
128
;
i
++
)
{
...
...
@@ -281,8 +258,7 @@ void ccodelte_init_inv(void)
}
}
void
ccodedab_init
(
void
)
{
void
ccodedab_init
(
void
)
{
unsigned
int
i
,
j
,
k
,
sum
;
for
(
i
=
0
;
i
<
128
;
i
++
)
{
...
...
@@ -303,8 +279,7 @@ void ccodedab_init(void)
}
/* Input in LSB, followed by state in 6 MSBs */
void
ccodedab_init_inv
(
void
)
{
void
ccodedab_init_inv
(
void
)
{
unsigned
int
i
,
j
,
k
,
sum
;
for
(
i
=
0
;
i
<
128
;
i
++
)
{
...
...
@@ -334,21 +309,15 @@ void ccodedab_init_inv(void)
#ifdef CCODE_MAIN
#include <stdio.h>
main
()
{
main
()
{
unsigned
char
test
[]
=
"Thebigredfox"
;
unsigned
char
output
[
512
],
*
inPtr
,
*
outPtr
;
unsigned
int
i
;
test
[
0
]
=
128
;
test
[
1
]
=
0
;
ccodelte_init
();
inPtr
=
test
;
outPtr
=
output
;
ccodelte_encode
(
21
,
inPtr
,
outPtr
);
for
(
i
=
0
;
i
<
21
*
3
;
i
++
)
printf
(
"%x "
,
output
[
i
]);
...
...
openair1/PHY/CODING/lte_rate_matching.c
View file @
1cb484f1
...
...
@@ -25,8 +25,8 @@
date: 21.10.2009
*/
#ifdef MAIN
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdlib.h>
#endif
#include "PHY/defs_eNB.h"
#include "PHY/LTE_TRANSPORT/transport_common.h"
...
...
@@ -42,9 +42,7 @@ static uint32_t bitrev_cc[32] = {1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31,0,16
//#define RM_DEBUG2 1
//#define RM_DEBUG_CC 1
uint32_t
sub_block_interleaving_turbo
(
uint32_t
D
,
uint8_t
*
d
,
uint8_t
*
w
)
{
uint32_t
sub_block_interleaving_turbo
(
uint32_t
D
,
uint8_t
*
d
,
uint8_t
*
w
)
{
uint32_t
RTC
=
(
D
>>
5
),
ND
,
ND3
;
uint32_t
row
,
col
,
Kpi
;
uint32_t
index3
,
k
,
k2
;
...
...
@@ -64,7 +62,6 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
printf
(
"RTC = %d, Kpi=%d, ND=%d
\n
"
,
RTC
,
Kpi
,
ND
);
#endif
ND3
=
ND
*
3
;
// copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212
d
[(
3
*
D
)
+
2
]
=
d
[
2
];
k
=
0
;
...
...
@@ -80,12 +77,9 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
index3
=
bitrev_x3
[
col
];
//3*index;
for
(
row
=
0
;
row
<
RTC
;
row
++
)
{
w
[
k
]
=
d1
[
index3
];
//d[index3-ND3];
w
[
Kpi
+
k2
]
=
d2
[
index3
];
//d[index3-ND3+1];
w
[
Kpi
+
1
+
k2
]
=
d3
[
index3
];
//d[index3-ND3+5];
#ifdef RM_DEBUG
printf
(
"row %d, index %d, index-Nd %d index-Nd+1 %d (k,Kpi+2k,Kpi+2k+1) (%d,%d,%d) w(%d,%d,%d)
\n
"
,
row
,
index
,
index
-
ND
,((
index
+
1
)
%
Kpi
)
-
ND
,
k
,
Kpi
+
(
k
<<
1
),
Kpi
+
(
k
<<
1
)
+
1
,
w
[
k
],
w
[
Kpi
+
(
k
<<
1
)],
w
[
Kpi
+
1
+
(
k
<<
1
)]);
...
...
@@ -100,7 +94,8 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
#endif
index3
+=
96
;
k
++
;
k2
+=
2
;
k
++
;
k2
+=
2
;
}
}
...
...
@@ -120,9 +115,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
}
uint32_t
sub_block_interleaving_cc
(
uint32_t
D
,
uint8_t
*
d
,
uint8_t
*
w
)
{
uint32_t
sub_block_interleaving_cc
(
uint32_t
D
,
uint8_t
*
d
,
uint8_t
*
w
)
{
uint32_t
RCC
=
(
D
>>
5
),
ND
,
ND3
;
uint32_t
row
,
col
,
Kpi
,
index
;
uint32_t
index3
,
k
;
...
...
@@ -141,7 +134,6 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w)
printf
(
"RCC = %d, Kpi=%d, ND=%d
\n
"
,
RCC
,
Kpi
,
ND
);
#endif
ND3
=
ND
*
3
;
k
=
0
;
for
(
col
=
0
;
col
<
32
;
col
++
)
{
...
...
@@ -180,9 +172,7 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w)
return
(
RCC
);
}
void
sub_block_deinterleaving_turbo
(
uint32_t
D
,
int16_t
*
d
,
int16_t
*
w
)
{
void
sub_block_deinterleaving_turbo
(
uint32_t
D
,
int16_t
*
d
,
int16_t
*
w
)
{
uint32_t
RTC
=
(
D
>>
5
),
ND
,
ND3
;
uint32_t
row
,
col
,
Kpi
,
index
;
uint32_t
index3
,
k
,
k2
;
...
...
@@ -199,7 +189,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w)
printf
(
"RTC = %d, Kpi=%d, ND=%d
\n
"
,
RTC
,
Kpi
,
ND
);
#endif
ND3
=
ND
*
3
;
// copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212
k
=
0
;
k2
=
0
;
...
...
@@ -215,7 +204,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w)
index3
=
bitrev_x3
[
col
];
//3*index;
for
(
row
=
0
;
row
<
RTC
;
row
++
)
{
d1
[
index3
]
=
w
[
k
];
d2
[
index3
]
=
w
[
Kpi
+
k2
];
d3
[
index3
]
=
w
[
Kpi
+
1
+
k2
];
...
...
@@ -229,12 +217,9 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w)
// if (ND>0)
// d[2] = LTE_NULL;//d[(3*D)+2];
}
void
sub_block_deinterleaving_cc
(
uint32_t
D
,
int8_t
*
d
,
int8_t
*
w
)
{
void
sub_block_deinterleaving_cc
(
uint32_t
D
,
int8_t
*
d
,
int8_t
*
w
)
{
//WANG_Hao uint32_t RCC = (D>>5), ND, ND3;
uint32_t
RCC
=
(
D
>>
5
);
ptrdiff_t
ND
,
ND3
;
...
...
@@ -251,10 +236,9 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w)
ND
=
Kpi
-
D
;
#ifdef RM_DEBUG2
printf
(
"sub_block_interleaving_cc : D = %d (%d), d %p, w %p
\n
"
,
D
,
D
*
3
,
d
,
w
);
printf
(
"RCC = %d, Kpi=%d, ND=%d
\n
"
,
RCC
,
Kpi
,
ND
);
printf
(
"RCC = %d, Kpi=%d, ND=%
l
d
\n
"
,
RCC
,
Kpi
,
ND
);
#endif
ND3
=
ND
*
3
;
k
=
0
;
for
(
col
=
0
;
col
<
32
;
col
++
)
{
...
...
@@ -265,24 +249,20 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w)
index3
=
3
*
index
;
for
(
row
=
0
;
row
<
RCC
;
row
++
)
{
d
[
index3
-
ND3
]
=
w
[
k
];
d
[
index3
-
ND3
+
1
]
=
w
[
Kpi
+
k
];
d
[
index3
-
ND3
+
2
]
=
w
[(
Kpi
<<
1
)
+
k
];
#ifdef RM_DEBUG2
printf
(
"row %d, index %d k %d index3-ND3 %d w(%d,%d,%d)
\n
"
,
row
,
index
,
k
,
index3
-
ND3
,
w
[
k
],
w
[
Kpi
+
k
],
w
[(
Kpi
<<
1
)
+
k
]);
printf
(
"row %d, index %d k %d index3-ND3 %
l
d w(%d,%d,%d)
\n
"
,
row
,
index
,
k
,
index3
-
ND3
,
w
[
k
],
w
[
Kpi
+
k
],
w
[(
Kpi
<<
1
)
+
k
]);
#endif
index3
+=
96
;
index
+=
32
;
k
++
;
}
}
}
uint32_t
generate_dummy_w
(
uint32_t
D
,
uint8_t
*
w
,
uint8_t
F
)
{
uint32_t
generate_dummy_w
(
uint32_t
D
,
uint8_t
*
w
,
uint8_t
F
)
{
uint32_t
RTC
=
(
D
>>
5
),
ND
;
uint32_t
col
,
Kpi
,
index
;
int32_t
k
,
k2
;
...
...
@@ -301,8 +281,6 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F)
printf
(
"dummy sub_block_interleaving_turbo : D = %d (%d)
\n
"
,
D
,
D
*
3
);
printf
(
"RTC = %d, Kpi=%d, ND=%d, F=%d (Nulled %d)
\n
"
,
RTC
,
Kpi
,
ND
,
F
,(
2
*
F
+
3
*
ND
));
#endif
k
=
0
;
k2
=
0
;
wKpi
=
&
w
[
Kpi
];
...
...
@@ -371,9 +349,7 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F)
return
(
RTC
);
}
uint32_t
generate_dummy_w_cc
(
uint32_t
D
,
uint8_t
*
w
)
{
uint32_t
generate_dummy_w_cc
(
uint32_t
D
,
uint8_t
*
w
)
{
uint32_t
RCC
=
(
D
>>
5
),
ND
;
uint32_t
col
,
Kpi
,
index
;
int32_t
k
;
...
...
@@ -392,7 +368,6 @@ uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w)
printf
(
"RCC = %d, Kpi=%d, ND=%d, (Nulled %d)
\n
"
,
RCC
,
Kpi
,
ND
,
3
*
ND
);
#endif
// ND3 = ND*3;
// copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212
k
=
0
;
...
...
@@ -466,8 +441,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
uint8_t
nb_rb
)
// uint8_t m)
{
uint32_t
Nir
,
Ncb
,
Gp
,
GpmodC
,
E
,
Ncbmod
,
ind
,
k
;
// int cnt=0;
uint8_t
*
e2
;
...
...
@@ -487,11 +460,11 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
if
(
Mdlharq
>
0
)
{
// Downlink
Nir
=
Nsoft
/
Kmimo
/
cmin
(
8
,
Mdlharq
);
Ncb
=
cmin
(
Nir
/
C
,
3
*
(
RTC
<<
5
));
}
else
{
// Uplink
}
else
{
// Uplink
Nir
=
0
;
Ncb
=
3
*
(
RTC
<<
5
);
// Kw
}
#ifdef RM_DEBUG_TX
if
(
rvidx
==
0
&&
r
==
0
)
{
...
...
@@ -518,7 +491,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
AssertFatal
(
Qm
>
0
,
"Qm is 0
\n
"
);
Gp
=
G
/
Nl
/
Qm
;
GpmodC
=
Gp
%
C
;
#ifdef RM_DEBUG
printf
(
"lte_rate_matching_turbo: Ncb %d, Kw %d, Nir/C %d, rvidx %d, G %d, Qm %d, Nl%d, r %d
\n
"
,
Ncb
,
3
*
(
RTC
<<
5
),
Nir
/
C
,
rvidx
,
G
,
Qm
,
Nl
,
r
);
#endif
...
...
@@ -529,16 +501,12 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
E
=
Nl
*
Qm
*
((
GpmodC
==
0
?
0
:
1
)
+
(
Gp
/
C
));
Ncbmod
=
Ncb
%
(
RTC
<<
3
);
ind
=
RTC
*
(
2
+
(
rvidx
*
(((
Ncbmod
==
0
)
?
0
:
1
)
+
(
Ncb
/
(
RTC
<<
3
)))
*
2
));
#ifdef RM_DEBUG_TX
printf
(
"lte_rate_matching_turbo: E %d, k0 %d, Ncbmod %d, Ncb/(RTC<<3) %d
\n
"
,
E
,
ind
,
Ncbmod
,
Ncb
/
(
RTC
<<
3
));
#endif
//e2=e+(r*E);
e2
=
e
;
k
=
0
;
for
(;
(
ind
<
Ncb
)
&&
(
k
<
E
);
ind
++
)
{
...
...
@@ -633,25 +601,16 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
uint32_t
lte_rate_matching_cc
(
uint32_t
RCC
,
uint16_t
E
,
uint8_t
*
w
,
uint8_t
*
e
)
{
uint8_t
*
e
)
{
uint32_t
ind
=
0
,
k
;
uint16_t
Kw
=
3
*
(
RCC
<<
5
);
#ifdef RM_DEBUG_CC
uint32_t
nulled
=
0
;
printf
(
"lte_rate_matching_cc: Kw %d, E %d
\n
"
,
Kw
,
E
);
#endif
for
(
k
=
0
;
k
<
E
;
k
++
)
{
while
(
w
[
ind
]
==
LTE_NULL
)
{
#ifdef RM_DEBUG_CC
nulled
++
;
printf
(
"RM_TX_CC : ind %d, NULL
\n
"
,
ind
);
...
...
@@ -662,7 +621,6 @@ uint32_t lte_rate_matching_cc(uint32_t RCC,
ind
=
0
;
}
e
[
k
]
=
w
[
ind
];
#ifdef RM_DEBUG_CC
// printf("k %d ind %d, w %c(%d)\n",k,ind,w[ind],w[ind]);
...
...
@@ -695,10 +653,7 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
uint8_t
Qm
,
uint8_t
Nl
,
uint8_t
r
,
uint32_t
*
E_out
)
{
uint32_t
*
E_out
)
{
uint32_t
Nir
,
Ncb
,
Gp
,
GpmodC
,
E
,
Ncbmod
,
ind
,
k
;
int16_t
*
soft_input2
;
// int32_t w_tmp;
...
...
@@ -715,8 +670,7 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
if
(
Mdlharq
>
0
)
{
// Downlink
Nir
=
Nsoft
/
Kmimo
/
cmin
(
8
,
Mdlharq
);
Ncb
=
cmin
(
Nir
/
C
,
3
*
(
RTC
<<
5
));
}
else
{
// Uplink
}
else
{
// Uplink
Nir
=
0
;
Ncb
=
3
*
(
RTC
<<
5
);
}
...
...
@@ -726,17 +680,13 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
Gp
=
G
/
Nl
/
Qm
;
GpmodC
=
Gp
%
C
;
if
(
r
<
(
C
-
(
GpmodC
)))
E
=
Nl
*
Qm
*
(
Gp
/
C
);
else
E
=
Nl
*
Qm
*
((
GpmodC
==
0
?
0
:
1
)
+
(
Gp
/
C
));
Ncbmod
=
Ncb
%
(
RTC
<<
3
);
ind
=
RTC
*
(
2
+
(
rvidx
*
(((
Ncbmod
==
0
)
?
0
:
1
)
+
(
Ncb
/
(
RTC
<<
3
)))
*
2
));
#ifdef RM_DEBUG
printf
(
"lte_rate_matching_turbo_rx: Clear %d, E %d, Ncb %d, Kw %d, rvidx %d, G %d, Qm %d, Nl%d, r %d
\n
"
,
clear
,
E
,
Ncb
,
3
*
(
RTC
<<
5
),
rvidx
,
G
,
Qm
,
Nl
,
r
);
#endif
...
...
@@ -831,10 +781,8 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
ind=0;
}
*/
*
E_out
=
E
;
return
(
0
);
}
...
...
@@ -842,28 +790,19 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
uint16_t
E
,
int8_t
*
w
,
uint8_t
*
dummy_w
,
int8_t
*
soft_input
)
{
int8_t
*
soft_input
)
{
uint32_t
ind
=
0
,
k
;
uint16_t
Kw
=
3
*
(
RCC
<<
5
);
uint32_t
acc
=
1
;
int16_t
w16
[
Kw
];
#ifdef RM_DEBUG_CC
uint32_t
nulled
=
0
;
printf
(
"lte_rate_matching_cc_rx: Kw %d, E %d, w %p, soft_input %p
\n
"
,
3
*
(
RCC
<<
5
),
E
,
w
,
soft_input
);
#endif
memset
(
w
,
0
,
Kw
);
memset
(
w16
,
0
,
Kw
*
sizeof
(
int16_t
));
for
(
k
=
0
;
k
<
E
;
k
++
)
{
while
(
dummy_w
[
ind
]
==
LTE_NULL
)
{
#ifdef RM_DEBUG_CC
nulled
++
;
...
...
@@ -883,10 +822,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
#ifdef RM_DEBUG_CC
printf
(
"RM_RX_CC k %d (%d) ind: %d (%d)
\n
"
,
k
,
soft_input
[
k
],
ind
,
w16
[
ind
]);
#endif
w16
[
ind
]
+=
soft_input
[
k
];
ind
++
;
if
(
ind
==
Kw
)
{
...
...
@@ -907,7 +843,6 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
}
#ifdef RM_DEBUG_CC
printf
(
"Nulled %d
\n
"
,
nulled
);
#endif
}
...
...
@@ -915,8 +850,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
#ifdef MAIN
void
main
()
{
void
main
()
{
uint8_t
d
[
96
+
3
+
(
3
*
6144
)];
uint8_t
w
[
3
*
6144
],
e
[
12
*
6144
];
uint32_t
RTC
,
G
,
rvidx
;
...
...
@@ -924,7 +858,6 @@ void main()
uint32_t
mod_order
=
4
;
uint32_t
first_dlsch_symbol
=
2
;
uint32_t
i
;
G
=
(
nb_rb
*
(
12
*
mod_order
)
*
(
12
-
first_dlsch_symbol
-
3
))
;
//( nb_rb * (12 * mod_order) * (14-first_dlsch_symbol-3)) :
// initialize 96 first positions to "LTE_NULL"
...
...
openair1/PHY/CODING/lte_segmentation.c
View file @
1cb484f1
...
...
@@ -38,9 +38,7 @@ int lte_segmentation(unsigned char *input_buffer,
unsigned
int
*
Cminus
,
unsigned
int
*
Kplus
,
unsigned
int
*
Kminus
,
unsigned
int
*
F
)
{
unsigned
int
*
F
)
{
unsigned
int
L
,
Bprime
,
Bprime_by_C
,
r
,
Kr
,
k
,
s
,
crc
;
if
(
B
<=
6144
)
{
...
...
@@ -56,7 +54,7 @@ int lte_segmentation(unsigned char *input_buffer,
Bprime
=
B
+
((
*
C
)
*
L
);
#ifdef DEBUG_SEGMENTATION
printf
(
"Bprime %
d
\n
"
,
Bprime
);
printf
(
"Bprime %
u
\n
"
,
Bprime
);
#endif
}
...
...
@@ -68,7 +66,7 @@ int lte_segmentation(unsigned char *input_buffer,
// Find K+
Bprime_by_C
=
Bprime
/
(
*
C
);
#ifdef DEBUG_SEGMENTATION
printf
(
"Bprime_by_C %
d
\n
"
,
Bprime_by_C
);
printf
(
"Bprime_by_C %
u
\n
"
,
Bprime_by_C
);
#endif
// Bprime = Bprime_by_C>>3;
...
...
@@ -93,17 +91,16 @@ int lte_segmentation(unsigned char *input_buffer,
*
Kminus
=
(
*
Kplus
-
32
);
}
else
if
(
Bprime_by_C
<=
6144
)
{
// increase by 8 bytes til here
*
Kplus
=
(
Bprime_by_C
>>
6
)
<<
6
;
#ifdef DEBUG_SEGMENTATION
printf
(
"Bprime_by_C_by_C %
d , Kplus %d
\n
"
,
Bprime_by_C
,
*
Kplus
);
printf
(
"Bprime_by_C_by_C %
u , Kplus %u
\n
"
,
Bprime_by_C
,
*
Kplus
);
#endif
if
(
*
Kplus
<
Bprime_by_C
)
*
Kplus
=
*
Kplus
+
64
;
#ifdef DEBUG_SEGMENTATION
printf
(
"Bprime_by_C_by_C %
d , Kplus2 %d
\n
"
,
Bprime_by_C
,
*
Kplus
);
printf
(
"Bprime_by_C_by_C %
u , Kplus2 %u
\n
"
,
Bprime_by_C
,
*
Kplus
);
#endif
*
Kminus
=
(
*
Kplus
-
64
);
}
else
{
...
...
@@ -116,25 +113,21 @@ int lte_segmentation(unsigned char *input_buffer,
*
Kminus
=
0
;
*
Cminus
=
0
;
}
else
{
// printf("More than one segment (%d), exiting \n",*C);
// exit(-1);
*
Cminus
=
((
*
C
)
*
(
*
Kplus
)
-
(
Bprime
))
/
((
*
Kplus
)
-
(
*
Kminus
));
*
Cplus
=
(
*
C
)
-
(
*
Cminus
);
}
AssertFatal
(
Bprime
<=
(
*
Cplus
)
*
(
*
Kplus
)
+
(
*
Cminus
)
*
(
*
Kminus
),
"Bprime %d < (*Cplus %d)*(*Kplus %d) + (*Cminus %d)*(*Kminus %d)
\n
"
,
Bprime
,
*
Cplus
,
*
Kplus
,
*
Cminus
,
*
Kminus
);
*
F
=
((
*
Cplus
)
*
(
*
Kplus
)
+
(
*
Cminus
)
*
(
*
Kminus
)
-
(
Bprime
));
#ifdef DEBUG_SEGMENTATION
printf
(
"C %
d, Cplus %d, Cminus %d, Kplus %d, Kminus %d, Bprime_bytes %d, Bprime %d, F %d
\n
"
,
*
C
,
*
Cplus
,
*
Cminus
,
*
Kplus
,
*
Kminus
,
Bprime
>>
3
,
Bprime
,
*
F
);
printf
(
"C %
u, Cplus %u, Cminus %u, Kplus %u, Kminus %u, Bprime_bytes %u, Bprime %u, F %u
\n
"
,
*
C
,
*
Cplus
,
*
Cminus
,
*
Kplus
,
*
Kminus
,
Bprime
>>
3
,
Bprime
,
*
F
);
#endif
if
((
input_buffer
)
&&
(
output_buffers
))
{
for
(
k
=
0
;
k
<*
F
>>
3
;
k
++
)
{
output_buffers
[
0
][
k
]
=
0
;
}
...
...
@@ -142,7 +135,6 @@ int lte_segmentation(unsigned char *input_buffer,
s
=
0
;
for
(
r
=
0
;
r
<*
C
;
r
++
)
{
if
(
r
<*
Cminus
)
Kr
=
*
Kminus
;
else
...
...
@@ -157,11 +149,11 @@ int lte_segmentation(unsigned char *input_buffer,
if
(
*
C
>
1
)
{
// add CRC
crc
=
crc24b
(
output_buffers
[
r
],
Kr
-
24
)
>>
8
;
output_buffers
[
r
][(
Kr
-
24
)
>>
3
]
=
((
uint8_t
*
)
&
crc
)[
2
];
output_buffers
[
r
][
1
+
((
Kr
-
24
)
>>
3
)]
=
((
uint8_t
*
)
&
crc
)[
1
];
output_buffers
[
r
][
2
+
((
Kr
-
24
)
>>
3
)]
=
((
uint8_t
*
)
&
crc
)[
0
];
output_buffers
[
r
][(
Kr
-
24
)
>>
3
]
=
((
uint8_t
*
)
&
crc
)[
2
];
output_buffers
[
r
][
1
+
((
Kr
-
24
)
>>
3
)]
=
((
uint8_t
*
)
&
crc
)[
1
];
output_buffers
[
r
][
2
+
((
Kr
-
24
)
>>
3
)]
=
((
uint8_t
*
)
&
crc
)[
0
];
#ifdef DEBUG_SEGMENTATION
printf
(
"Segment %
d
: CRC %x
\n
"
,
r
,
crc
);
printf
(
"Segment %
u
: CRC %x
\n
"
,
r
,
crc
);
#endif
}
...
...
@@ -175,9 +167,7 @@ int lte_segmentation(unsigned char *input_buffer,
#ifdef MAIN
main
()
{
main
()
{
unsigned
int
Kplus
,
Kminus
,
C
,
Cplus
,
Cminus
,
F
,
Bbytes
;
for
(
Bbytes
=
5
;
Bbytes
<
2
*
768
;
Bbytes
++
)
{
...
...
openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
View file @
1cb484f1
...
...
@@ -34,8 +34,7 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
unsigned
char
Ns
,
unsigned
char
p
,
unsigned
char
l
,
unsigned
char
symbol
)
{
unsigned
char
symbol
)
{
int
pilot
[
2
][
200
]
__attribute__
((
aligned
(
16
)));
unsigned
char
nu
,
aarx
;
unsigned
short
k
;
...
...
@@ -45,16 +44,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
// unsigned int n;
// int i;
static
int
interpolateS11S12
=
1
;
uint16_t
Nid_cell
=
(
eNB_offset
==
0
)
?
ue
->
frame_parms
.
Nid_cell
:
ue
->
measurements
.
adj_cell_id
[
eNB_offset
-
1
];
uint8_t
nushift
,
pilot0
,
pilot1
,
pilot2
,
pilot3
;
uint8_t
previous_thread_id
=
ue
->
current_thread_id
[
Ns
>>
1
]
==
0
?
(
RX_NB_TH
-
1
)
:
(
ue
->
current_thread_id
[
Ns
>>
1
]
-
1
);
int
**
dl_ch_estimates
=
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates
[
eNB_offset
];
int
**
dl_ch_estimates_previous
=
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
previous_thread_id
].
dl_ch_estimates
[
eNB_offset
];
int
**
rxdataF
=
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
rxdataF
;
pilot0
=
0
;
if
(
ue
->
frame_parms
.
Ncp
==
0
)
{
// normal prefix
pilot1
=
4
;
pilot2
=
7
;
...
...
@@ -81,7 +78,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
return
(
-
1
);
}
//ch_offset = (l*(ue->frame_parms.ofdm_symbol_size));
if
(
ue
->
high_speed_flag
==
0
)
// use second channel estimate position for temporary storage
ch_offset
=
ue
->
frame_parms
.
ofdm_symbol_size
;
...
...
@@ -89,11 +85,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
ch_offset
=
ue
->
frame_parms
.
ofdm_symbol_size
*
symbol
;
symbol_offset
=
ue
->
frame_parms
.
ofdm_symbol_size
*
symbol
;
k
=
(
nu
+
nushift
)
%
6
;
#ifdef DEBUG_CH
printf
(
"Channel Estimation : ThreadId %d, eNB_offset %d cell_id %d ch_offset %d, OFDM size %d, Ncp=%d, l=%d, Ns=%d, k=%d
\n
"
,
ue
->
current_thread_id
[
Ns
>>
1
],
eNB_offset
,
Nid_cell
,
ch_offset
,
ue
->
frame_parms
.
ofdm_symbol_size
,
printf
(
"Channel Estimation : ThreadId %d, eNB_offset %d cell_id %d ch_offset %d, OFDM size %d, Ncp=%d, l=%d, Ns=%d, k=%d
\n
"
,
ue
->
current_thread_id
[
Ns
>>
1
],
eNB_offset
,
Nid_cell
,
ch_offset
,
ue
->
frame_parms
.
ofdm_symbol_size
,
ue
->
frame_parms
.
Ncp
,
l
,
Ns
,
k
);
#endif
...
...
@@ -107,10 +102,8 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
fr
=
filt24_0r2
;
//for first pilot of rightmost RB
// f2r2=filt24_0r2;
f2r2
=
filt24_2r
;
f_dc
=
filt24_0_dcr
;
f2_dc
=
filt24_2_dcl
;
break
;
case
1
:
...
...
@@ -174,8 +167,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
break
;
}
// generate pilot
lte_dl_cell_spec_rx
(
ue
,
eNB_offset
,
...
...
@@ -184,27 +175,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
(
l
==
0
)
?
0
:
1
,
p
);
for
(
aarx
=
0
;
aarx
<
ue
->
frame_parms
.
nb_antennas_rx
;
aarx
++
)
{
pil
=
(
int16_t
*
)
&
pilot
[
p
][
0
];
rxF
=
(
int16_t
*
)
&
rxdataF
[
aarx
][((
symbol_offset
+
k
+
ue
->
frame_parms
.
first_carrier_offset
))];
dl_ch
=
(
int16_t
*
)
&
dl_ch_estimates
[(
p
<<
1
)
+
aarx
][
ch_offset
];
// if (eNb_id==0)
memset
(
dl_ch
,
0
,
4
*
(
ue
->
frame_parms
.
ofdm_symbol_size
));
if
(
ue
->
high_speed_flag
==
0
)
// multiply previous channel estimate by ch_est_alpha
multadd_complex_vector_real_scalar
(
dl_ch
-
(
ue
->
frame_parms
.
ofdm_symbol_size
<<
1
),
ue
->
ch_est_alpha
,
dl_ch
-
(
ue
->
frame_parms
.
ofdm_symbol_size
<<
1
),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
#ifdef DEBUG_CH
printf
(
"k %d, first_carrier %d
\n
"
,
k
,
ue
->
frame_parms
.
first_carrier_offset
);
#endif
if
((
ue
->
frame_parms
.
N_RB_DL
==
6
)
||
(
ue
->
frame_parms
.
N_RB_DL
==
50
)
||
(
ue
->
frame_parms
.
N_RB_DL
==
100
))
{
//First half of pilots
// Treat first 2 pilots specially (left edge)
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
...
...
@@ -219,7 +208,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
// Re Im
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
...
...
@@ -234,28 +222,22 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
dl_ch
+=
16
;
for
(
pilot_cnt
=
2
;
pilot_cnt
<
((
ue
->
frame_parms
.
N_RB_DL
)
-
1
);
pilot_cnt
+=
2
)
{
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
//Re
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
//Im
#ifdef DEBUG_CH
printf
(
"pilot %d
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
#endif
multadd_real_vector_complex_scalar
(
f
,
ch
,
dl_ch
,
24
);
pil
+=
2
;
// Re Im
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %d
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
#endif
multadd_real_vector_complex_scalar
(
f2
,
ch
,
...
...
@@ -264,29 +246,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
16
;
}
// printf("Second half\n");
// Second half of RBs
k
=
(
nu
+
nushift
)
%
6
;
if
(
k
>
6
)
k
-=
6
;
rxF
=
(
int16_t
*
)
&
rxdataF
[
aarx
][((
symbol_offset
+
1
+
k
))];
#ifdef DEBUG_CH
printf
(
"second half k %d
\n
"
,
k
);
#endif
for
(
pilot_cnt
=
0
;
pilot_cnt
<
((
ue
->
frame_parms
.
N_RB_DL
)
-
3
);
pilot_cnt
+=
2
)
{
for
(
pilot_cnt
=
0
;
pilot_cnt
<
((
ue
->
frame_parms
.
N_RB_DL
)
-
3
);
pilot_cnt
+=
2
)
{
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %d
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
#endif
multadd_real_vector_complex_scalar
(
f
,
ch
,
...
...
@@ -295,11 +273,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %d
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
#endif
multadd_real_vector_complex_scalar
(
f2
,
ch
,
...
...
@@ -308,13 +285,12 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
16
;
}
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %
d
: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %
u
: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
#endif
multadd_real_vector_complex_scalar
(
fr
,
ch
,
...
...
@@ -323,34 +299,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
// Re Im
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %
d
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %
u
: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
#endif
multadd_real_vector_complex_scalar
(
f2r2
,
ch
,
dl_ch
,
24
);
}
else
if
(
ue
->
frame_parms
.
N_RB_DL
==
25
)
{
}
else
if
(
ue
->
frame_parms
.
N_RB_DL
==
25
)
{
//printf("Channel estimation\n");
// Treat first 2 pilots specially (left edge)
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
fl
,
ch
,
dl_ch
,
...
...
@@ -358,17 +325,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
// Re Im
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
f2l2
,
ch
,
dl_ch
,
...
...
@@ -378,21 +341,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
dl_ch
+=
16
;
for
(
pilot_cnt
=
2
;
pilot_cnt
<
24
;
pilot_cnt
+=
2
)
{
// printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
// printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]);
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
f
,
ch
,
dl_ch
,
...
...
@@ -400,13 +357,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
// Re Im
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
pilot_cnt
+
1
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
...
...
@@ -417,39 +371,31 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
16
;
}
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot 24: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
f_dc
,
ch
,
dl_ch
,
24
);
pil
+=
2
;
// Re Im
dl_ch
+=
8
;
// printf("Second half\n");
// Second half of RBs
rxF
=
(
int16_t
*
)
&
rxdataF
[
aarx
][((
symbol_offset
+
1
+
k
))];
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot 25: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
f2_dc
,
ch
,
dl_ch
,
...
...
@@ -459,19 +405,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
dl_ch
+=
16
;
for
(
pilot_cnt
=
0
;
pilot_cnt
<
22
;
pilot_cnt
+=
2
)
{
// printf("* pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
// printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]);
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %d rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
26
+
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
26
+
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
f
,
ch
,
dl_ch
,
...
...
@@ -479,16 +421,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot %d : rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
27
+
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
printf
(
"pilot %u : rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
27
+
pilot_cnt
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
f2
,
ch
,
dl_ch
,
...
...
@@ -496,20 +435,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
16
;
}
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot 49: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
fr
,
ch
,
dl_ch
,
...
...
@@ -517,28 +451,20 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
// Re Im
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
#ifdef DEBUG_CH
printf
(
"pilot 50: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d)
\n
"
,
rxF
[
0
],
rxF
[
1
],
ch
[
0
],
ch
[
1
],
pil
[
0
],
pil
[
1
]);
// ch[0] = 1024;
// ch[1] = -128;
#endif
multadd_real_vector_complex_scalar
(
f2r2
,
ch
,
dl_ch
,
24
);
}
else
if
(
ue
->
frame_parms
.
N_RB_DL
==
15
)
{
//printf("First Half\n");
for
(
rb
=
0
;
rb
<
28
;
rb
+=
4
)
{
//printf("aarx=%d\n",aarx);
//printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
//printf("rx[%d][%d] -> (%d,%d)\n",p,
...
...
@@ -555,7 +481,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
// Re Im
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
//printf("ch -> (%d,%d)\n",ch[0],ch[1]);
...
...
@@ -566,7 +491,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
16
;
}
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
...
...
@@ -578,13 +502,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
24
);
pil
+=
2
;
// Re Im
dl_ch
+=
8
;
//printf("Second half\n");
//Second half of RBs
rxF
=
(
int16_t
*
)
&
rxdataF
[
aarx
][((
symbol_offset
+
1
+
nushift
+
(
3
*
p
)))];
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
multadd_real_vector_complex_scalar
(
f2
,
ch
,
dl_ch
,
...
...
@@ -602,7 +524,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
// rxF[1]);
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
multadd_real_vector_complex_scalar
(
f
,
ch
,
dl_ch
,
...
...
@@ -610,10 +531,8 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
8
;
ch
[
0
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
0
]
-
(
int32_t
)
pil
[
1
]
*
rxF
[
1
])
>>
15
);
ch
[
1
]
=
(
int16_t
)(((
int32_t
)
pil
[
0
]
*
rxF
[
1
]
+
(
int32_t
)
pil
[
1
]
*
rxF
[
0
])
>>
15
);
multadd_real_vector_complex_scalar
(
f2
,
ch
,
dl_ch
,
...
...
@@ -621,17 +540,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil
+=
2
;
rxF
+=
12
;
dl_ch
+=
16
;
}
}
else
{
LOG_E
(
PHY
,
"channel estimation not implemented for ue->frame_parms.N_RB_DL = %d
\n
"
,
ue
->
frame_parms
.
N_RB_DL
);
}
if
(
ue
->
perfect_ce
==
0
)
{
// Temporal Interpolation
// printf("ch_offset %d\n",ch_offset);
dl_ch
=
(
int16_t
*
)
&
dl_ch_estimates
[(
p
<<
1
)
+
aarx
][
ch_offset
];
if
(
ue
->
high_speed_flag
==
0
)
{
...
...
@@ -642,14 +558,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
if
((
symbol
==
0
))
{
// printf("Interpolating %d->0\n",4-ue->frame_parms.Ncp);
// dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][(4-ue->frame_parms.Ncp)*(ue->frame_parms.ofdm_symbol_size)];
if
(((
Ns
>>
1
)
!=
0
)
||
(
((
Ns
>>
1
)
==
0
)
&&
interpolateS11S12
))
{
if
(((
Ns
>>
1
)
!=
0
)
||
(
((
Ns
>>
1
)
==
0
)
&&
interpolateS11S12
))
{
//LOG_I(PHY,"Interpolate s11-->s0 to get s12 and s13 Ns %d \n", Ns);
dl_ch_prev
=
(
int16_t
*
)
&
dl_ch_estimates_previous
[(
p
<<
1
)
+
aarx
][
pilot3
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)];
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
21845
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
10923
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
10923
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
21845
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
}
...
...
@@ -661,50 +574,38 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
//LOG_I(PHY,"Interpolate s0-->s4 to get s1 s2 and s3 Ns %d \n", Ns);
if
(
ue
->
frame_parms
.
Ncp
==
0
)
{
// pilot spacing 4 symbols (1/4,1/2,3/4 combination)
uint8_t
previous_subframe
;
if
(
Ns
>>
1
==
0
)
previous_subframe
=
9
;
else
previous_subframe
=
((
Ns
>>
1
)
-
1
)
%
9
;
if
((
subframe_select
(
&
ue
->
frame_parms
,
previous_subframe
)
==
SF_UL
))
{
if
((
subframe_select
(
&
ue
->
frame_parms
,
previous_subframe
)
==
SF_UL
))
{
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
328
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
32440
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
328
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
32440
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
8192
,
dl_ch_prev
+
(
3
*
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
32440
,
dl_ch_prev
+
(
3
*
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
}
else
{
}
else
{
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
24576
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
8192
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
16384
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
16384
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
8192
,
dl_ch_prev
+
(
3
*
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
24576
,
dl_ch_prev
+
(
3
*
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
}
}
else
{
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
328
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
21845
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
21845
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
10923
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
}
// pilot spacing 3 symbols (1/3,2/3 combination)
}
else
if
(
symbol
==
pilot2
)
{
dl_ch_prev
=
(
int16_t
*
)
&
dl_ch_estimates
[(
p
<<
1
)
+
aarx
][
pilot1
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)];
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
21845
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
10923
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
10923
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
21845
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
}
else
{
// symbol == pilot3
...
...
@@ -714,54 +615,45 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
if
(
ue
->
frame_parms
.
Ncp
==
0
)
{
// pilot spacing 4 symbols (1/4,1/2,3/4 combination)
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
24576
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
8192
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
16384
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
16384
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
8192
,
dl_ch_prev
+
(
3
*
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
24576
,
dl_ch_prev
+
(
3
*
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
}
else
{
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
10923
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
21845
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch_prev
,
21845
,
dl_ch_prev
+
(
2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
),
1
,
ue
->
frame_parms
.
ofdm_symbol_size
);
multadd_complex_vector_real_scalar
(
dl_ch
,
10923
,
dl_ch_prev
+
(
2
*
((
ue
->
frame_parms
.
ofdm_symbol_size
)
<<
1
)),
0
,
ue
->
frame_parms
.
ofdm_symbol_size
);
}
// pilot spacing 3 symbols (1/3,2/3 combination)
if
((
ue
->
rx_offset_diff
!=
0
)
&&
((
Ns
>>
1
)
==
9
))
{
if
((
ue
->
rx_offset_diff
!=
0
)
&&
((
Ns
>>
1
)
==
9
))
{
//LOG_I(PHY,"Extrapolate s7-->s11 to get s12 and s13 Ns %d\n", Ns);
interpolateS11S12
=
0
;
//LOG_E(PHY,"Interpolate s7--s11 s12 s13 pilot 3 Ns %d l %d symbol %d \n", Ns, l, symbol);
int16_t
*
dlChEst_ofdm11
=
(
int16_t
*
)
&
dl_ch_estimates
[(
p
<<
1
)
+
aarx
][
pilot3
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)];
int16_t
*
dlChEst_ofdm7
=
(
int16_t
*
)
&
dl_ch_estimates
[(
p
<<
1
)
+
aarx
][
pilot2
*
(
ue
->
frame_parms
.
ofdm_symbol_size
)];
// interpolate ofdm s12: 5/4*ofdms11 + -1/4*ofdms7 5/4 q1.15 40960 -1/4 q1.15 8192
int16_t
*
dlChEst_ofdm12
=
(
int16_t
*
)
&
dl_ch_estimates
[(
p
<<
1
)
+
aarx
][
12
*
ue
->
frame_parms
.
ofdm_symbol_size
];
for
(
int
i
=
0
;
i
<
(
2
*
ue
->
frame_parms
.
ofdm_symbol_size
);
i
++
)
{
for
(
int
i
=
0
;
i
<
(
2
*
ue
->
frame_parms
.
ofdm_symbol_size
);
i
++
)
{
int64_t
tmp_mult
=
0
;
tmp_mult
=
((
int64_t
)
dlChEst_ofdm11
[
i
]
*
40960
-
(
int64_t
)
dlChEst_ofdm7
[
i
]
*
8192
);
tmp_mult
=
tmp_mult
>>
15
;
dlChEst_ofdm12
[
i
]
=
tmp_mult
;
}
// interpolate ofdm s13: 3/2*ofdms11 + -1/2*ofdms7 3/2 q1.15 49152 1/2 q1.15 16384
int16_t
*
dlChEst_ofdm13
=
(
int16_t
*
)
&
dl_ch_estimates
[(
p
<<
1
)
+
aarx
][
13
*
ue
->
frame_parms
.
ofdm_symbol_size
];
for
(
int
i
=
0
;
i
<
(
2
*
ue
->
frame_parms
.
ofdm_symbol_size
);
i
++
)
{
for
(
int
i
=
0
;
i
<
(
2
*
ue
->
frame_parms
.
ofdm_symbol_size
);
i
++
)
{
int64_t
tmp_mult
=
0
;
tmp_mult
=
((
int64_t
)
dlChEst_ofdm11
[
i
]
*
49152
-
(
int64_t
)
dlChEst_ofdm7
[
i
]
*
16384
);
tmp_mult
=
tmp_mult
>>
15
;
dlChEst_ofdm13
[
i
]
=
tmp_mult
;
}
}
}
}
}
}
...
...
@@ -797,16 +689,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
break
;
}
if
(
((
Ns
%
2
)
==
0
)
&&
(
l
==
pilot0
))
{
if
(
((
Ns
%
2
)
==
0
)
&&
(
l
==
pilot0
))
{
// do ifft of channel estimate
for
(
aarx
=
0
;
aarx
<
ue
->
frame_parms
.
nb_antennas_rx
;
aarx
++
)
for
(
p
=
0
;
p
<
ue
->
frame_parms
.
nb_antenna_ports_eNB
;
p
++
)
{
if
(
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates
[
eNB_offset
][(
p
<<
1
)
+
aarx
])
{
if
(
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates
[
eNB_offset
][(
p
<<
1
)
+
aarx
])
{
//LOG_I(PHY,"Channel Impulse Computation Slot %d ThreadId %d Symbol %d \n", Ns, ue->current_thread_id[Ns>>1], l);
idft
((
int16_t
*
)
&
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates
[
eNB_offset
][(
p
<<
1
)
+
aarx
][
8
],
(
int16_t
*
)
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates_time
[
eNB_offset
][(
p
<<
1
)
+
aarx
],
1
);
idft
((
int16_t
*
)
&
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates
[
eNB_offset
][(
p
<<
1
)
+
aarx
][
8
],
(
int16_t
*
)
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates_time
[
eNB_offset
][(
p
<<
1
)
+
aarx
],
1
);
}
}
}
...
...
@@ -814,7 +704,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
T
(
T_UE_PHY_DL_CHANNEL_ESTIMATE
,
T_INT
(
eNB_id
),
T_INT
(
ue
->
proc
.
proc_rxtx
[
ue
->
current_thread_id
[
Ns
>>
1
]].
frame_rx
%
1024
),
T_INT
(
ue
->
proc
.
proc_rxtx
[
ue
->
current_thread_id
[
Ns
>>
1
]].
subframe_rx
),
T_INT
(
0
),
T_BUFFER
(
&
ue
->
common_vars
.
common_vars_rx_data_per_thread
[
ue
->
current_thread_id
[
Ns
>>
1
]].
dl_ch_estimates_time
[
eNB_offset
][
0
][
0
],
512
*
4
));
return
(
0
);
}
openair1/PHY/LTE_TRANSPORT/prach.c
View file @
1cb484f1
...
...
@@ -39,7 +39,7 @@
#include "prach_extern.h"
#if (LTE_RRC_VERSION < MAKE_VERSION(14, 0, 0))
#define rx_prach0 rx_prach
#define rx_prach0 rx_prach
#endif
void
rx_prach0
(
PHY_VARS_eNB
*
eNB
,
...
...
@@ -53,11 +53,8 @@ void rx_prach0(PHY_VARS_eNB *eNB,
,
uint8_t
br_flag
,
uint8_t
ce_level
#endif
)
{
)
{
int
i
;
LTE_DL_FRAME_PARMS
*
fp
;
lte_frame_type_t
frame_type
;
uint16_t
rootSequenceIndex
;
...
...
@@ -69,7 +66,6 @@ void rx_prach0(PHY_VARS_eNB *eNB,
int16_t
*
prachF
=
NULL
;
int16_t
**
rxsigF
=
NULL
;
int
nb_rx
;
int16_t
*
prach2
;
uint8_t
preamble_index
;
uint16_t
NCS
,
NCS2
;
...
...
@@ -93,26 +89,23 @@ void rx_prach0(PHY_VARS_eNB *eNB,
int16_t
levdB
;
int
fft_size
,
log2_ifft_size
;
int16_t
prach_ifft_tmp
[
2048
*
2
]
__attribute__
((
aligned
(
32
)));
int32_t
*
prach_ifft
=
(
int32_t
*
)
NULL
;
int32_t
*
prach_ifft
=
(
int32_t
*
)
NULL
;
int32_t
**
prach_ifftp
=
(
int32_t
**
)
NULL
;
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
int
prach_ifft_cnt
=
0
;
#endif
if
(
ru
)
{
fp
=
&
ru
->
frame_parms
;
nb_rx
=
ru
->
nb_rx
;
}
else
if
(
eNB
)
{
}
else
if
(
eNB
)
{
fp
=
&
eNB
->
frame_parms
;
nb_rx
=
fp
->
nb_antennas_rx
;
}
else
AssertFatal
(
1
==
0
,
"rx_prach called without valid RU or eNB descriptor
\n
"
);
}
else
AssertFatal
(
1
==
0
,
"rx_prach called without valid RU or eNB descriptor
\n
"
);
frame_type
=
fp
->
frame_type
;
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if
(
br_flag
==
1
)
{
AssertFatal
(
fp
->
prach_emtc_config_common
.
prach_Config_enabled
==
1
,
"emtc prach_Config is not enabled
\n
"
);
...
...
@@ -129,8 +122,7 @@ void rx_prach0(PHY_VARS_eNB *eNB,
max_preamble
+=
ce_level
;
max_preamble_energy
+=
ce_level
;
max_preamble_delay
+=
ce_level
;
}
else
}
else
#endif
{
rootSequenceIndex
=
fp
->
prach_config_common
.
rootSequenceIndex
;
...
...
@@ -148,13 +140,16 @@ void rx_prach0(PHY_VARS_eNB *eNB,
if
(
eNB
)
{
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if
(
br_flag
==
1
)
{
prach_ifftp
=
eNB
->
prach_vars_br
.
prach_ifft
[
ce_level
];
subframe
=
eNB
->
proc
.
subframe_prach_br
;
prachF
=
eNB
->
prach_vars_br
.
prachF
;
rxsigF
=
eNB
->
prach_vars_br
.
rxsigF
[
ce_level
];
if
(
LOG_DEBUGFLAG
(
PRACH
)){
if
(((
ru
->
proc
.
frame_prach
)
&
1023
)
<
20
)
LOG_I
(
PHY
,
"PRACH (eNB) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d, rootSequenceIndex %d, repetition number %d,numRepetitionsPrePreambleAttempt %d
\n
"
,
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
if
(((
ru
->
proc
.
frame_prach
)
&
1023
)
<
20
)
LOG_I
(
PHY
,
"PRACH (eNB) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d, rootSequenceIndex %d, repetition number %d,numRepetitionsPrePreambleAttempt %d
\n
"
,
br_flag
,
ce_level
,
ru
->
proc
.
frame_prach
,
subframe
,
fp
->
prach_emtc_config_common
.
prach_ConfigInfo
.
prach_FreqOffset
[
ce_level
],
prach_ConfigIndex
,
rootSequenceIndex
,
...
...
@@ -168,17 +163,20 @@ void rx_prach0(PHY_VARS_eNB *eNB,
subframe
=
eNB
->
proc
.
subframe_prach
;
prachF
=
eNB
->
prach_vars
.
prachF
;
rxsigF
=
eNB
->
prach_vars
.
rxsigF
[
0
];
if
(
LOG_DEBUGFLAG
(
PRACH
)){
if
(((
ru
->
proc
.
frame_prach
)
&
1023
)
<
20
)
LOG_I
(
PHY
,
"PRACH (eNB) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d , rootSequenceIndex %d
\n
"
,
subframe
,
fp
->
prach_config_common
.
prach_ConfigInfo
.
prach_FreqOffset
,
prach_ConfigIndex
,
rootSequenceIndex
);
}
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
if
(((
ru
->
proc
.
frame_prach
)
&
1023
)
<
20
)
LOG_I
(
PHY
,
"PRACH (eNB) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d , rootSequenceIndex %d
\n
"
,
subframe
,
fp
->
prach_config_common
.
prach_ConfigInfo
.
prach_FreqOffset
,
prach_ConfigIndex
,
rootSequenceIndex
);
}
}
else
{
}
else
{
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if
(
br_flag
==
1
)
{
subframe
=
ru
->
proc
.
subframe_prach_br
;
rxsigF
=
ru
->
prach_rxsigF_br
[
ce_level
];
if
(
LOG_DEBUGFLAG
(
PRACH
)){
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
if
(((
ru
->
proc
.
frame_prach
)
&
1023
)
<
20
)
LOG_I
(
PHY
,
"PRACH (RU) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d
\n
"
,
br_flag
,
ce_level
,
ru
->
proc
.
frame_prach
,
subframe
,
fp
->
prach_emtc_config_common
.
prach_ConfigInfo
.
prach_FreqOffset
[
ce_level
],
prach_ConfigIndex
);
}
...
...
@@ -187,12 +185,12 @@ void rx_prach0(PHY_VARS_eNB *eNB,
{
subframe
=
ru
->
proc
.
subframe_prach
;
rxsigF
=
ru
->
prach_rxsigF
;
if
(
LOG_DEBUGFLAG
(
PRACH
)){
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
if
(((
ru
->
proc
.
frame_prach
)
&
1023
)
<
20
)
LOG_I
(
PHY
,
"PRACH (RU) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d
\n
"
,
subframe
,
fp
->
prach_config_common
.
prach_ConfigInfo
.
prach_FreqOffset
,
prach_ConfigIndex
);
}
}
}
AssertFatal
(
ru
!=
NULL
,
"ru is null
\n
"
);
...
...
@@ -200,21 +198,25 @@ void rx_prach0(PHY_VARS_eNB *eNB,
for
(
aa
=
0
;
aa
<
nb_rx
;
aa
++
)
{
if
(
ru
->
if_south
==
LOCAL_RF
)
{
// set the time-domain signal if we have to use it in this node
// DJP - indexing below in subframe zero takes us off the beginning of the array???
prach
[
aa
]
=
(
int16_t
*
)
&
ru
->
common
.
rxdata
[
aa
][(
subframe
*
fp
->
samples_per_tti
)
-
ru
->
N_TA_offset
];
prach
[
aa
]
=
(
int16_t
*
)
&
ru
->
common
.
rxdata
[
aa
][(
subframe
*
fp
->
samples_per_tti
)
-
ru
->
N_TA_offset
];
if
(
LOG_DUMPFLAG
(
PRACH
))
{
int32_t
en0
=
signal_energy
((
int32_t
*
)
prach
[
aa
],
fp
->
samples_per_tti
);
if
(
LOG_DUMPFLAG
(
PRACH
))
{
int32_t
en0
=
signal_energy
((
int32_t
*
)
prach
[
aa
],
fp
->
samples_per_tti
);
int8_t
dbEn0
=
dB_fixed
(
en0
);
int8_t
rach_dBm
=
dbEn0
-
ru
->
rx_total_gain_dB
;
char
buffer
[
80
];
if
(
dbEn0
>
32
&&
prach
[
0
]
!=
NULL
)
{
static
int
counter
=
0
;
sprintf
(
buffer
,
"%s%d"
,
"/tmp/prach_rx"
,
counter
);
LOG_M
(
buffer
,
"prach_rx"
,
prach
[
0
],
fp
->
samples_per_tti
,
1
,
13
);
}
if
(
dB_fixed
(
en0
)
>
32
)
{
sprintf
(
buffer
,
"rach_dBm:%d"
,
rach_dBm
);
if
(
prach
[
0
]
!=
NULL
)
LOG_M
(
"prach_rx"
,
"prach_rx"
,
prach
[
0
],
fp
->
samples_per_tti
,
1
,
1
);
LOG_I
(
PHY
,
"RU %d, br_flag %d ce_level %d frame %d subframe %d per_tti:%d prach:%p (energy %d) TA:%d %s rxdata:%p index:%d
\n
"
,
ru
->
idx
,
br_flag
,
ce_level
,
ru
->
proc
.
frame_prach
,
subframe
,
fp
->
samples_per_tti
,
prach
[
aa
],
dbEn0
,
ru
->
N_TA_offset
,
buffer
,
ru
->
common
.
rxdata
[
aa
],
...
...
@@ -237,9 +239,7 @@ void rx_prach0(PHY_VARS_eNB *eNB,
if
(
eNB
)
start_meas
(
&
eNB
->
rx_prach
);
prach_root_sequence_map
=
(
prach_fmt
<
4
)
?
prach_root_sequence_map0_3
:
prach_root_sequence_map4
;
// PDP is oversampled, e.g. 1024 sample instead of 839
// Adapt the NCS (zero-correlation zones) with oversampling factor e.g. 1024/839
NCS2
=
(
N_ZC
==
839
)
?
((
NCS
<<
10
)
/
839
)
:
((
NCS
<<
8
)
/
139
);
...
...
@@ -295,16 +295,17 @@ void rx_prach0(PHY_VARS_eNB *eNB,
case
100
:
if
(
fp
->
threequarter_fs
==
1
)
Ncp
=
(
Ncp
*
3
)
>>
2
;
break
;
}
if
(((
eNB
!=
NULL
)
&&
(
ru
->
function
!=
NGFI_RAU_IF4p5
))
||
((
eNB
==
NULL
)
&&
(
ru
->
function
==
NGFI_RRU_IF4p5
)))
{
// compute the DFTs of the PRACH temporal resources
// Do forward transform
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
LOG_D
(
PHY
,
"rx_prach: Doing FFT for N_RB_UL %d nb_rx:%d Ncp:%d
\n
"
,
fp
->
N_RB_UL
,
nb_rx
,
Ncp
);
}
for
(
aa
=
0
;
aa
<
nb_rx
;
aa
++
)
{
AssertFatal
(
prach
[
aa
]
!=
NULL
,
"prach[%d] is null
\n
"
,
aa
);
prach2
=
prach
[
aa
]
+
(
Ncp
<<
1
);
...
...
@@ -412,43 +413,36 @@ void rx_prach0(PHY_VARS_eNB *eNB,
//LOG_D(PHY,"Shifting prach_rxF from %d to 0\n",k);
if
((
k
+
(
839
*
2
))
>
dftsize_x2
)
{
// PRACH signal is split around DC
memmove
((
void
*
)
&
rxsigF
[
aa
][
dftsize_x2
-
k
],(
void
*
)
&
rxsigF
[
aa
][
0
],(
k
+
(
839
*
2
)
-
dftsize_x2
)
*
2
);
memmove
((
void
*
)
&
rxsigF
[
aa
][
0
],(
void
*
)(
&
rxsigF
[
aa
][
k
]),(
dftsize_x2
-
k
)
*
2
);
memmove
((
void
*
)
&
rxsigF
[
aa
][
dftsize_x2
-
k
],(
void
*
)
&
rxsigF
[
aa
][
0
],(
k
+
(
839
*
2
)
-
dftsize_x2
)
*
2
);
memmove
((
void
*
)
&
rxsigF
[
aa
][
0
],(
void
*
)(
&
rxsigF
[
aa
][
k
]),(
dftsize_x2
-
k
)
*
2
);
}
else
// PRACH signal is not split around DC
memmove
((
void
*
)
&
rxsigF
[
aa
][
0
],(
void
*
)(
&
rxsigF
[
aa
][
k
]),
839
*
4
);
}
else
// PRACH signal is not split around DC
memmove
((
void
*
)
&
rxsigF
[
aa
][
0
],(
void
*
)(
&
rxsigF
[
aa
][
k
]),
839
*
4
);
}
}
if
((
eNB
==
NULL
)
&&
(
ru
!=
NULL
)
&&
ru
->
function
==
NGFI_RRU_IF4p5
)
{
if
((
eNB
==
NULL
)
&&
ru
->
function
==
NGFI_RRU_IF4p5
)
{
/// **** send_IF4 of rxsigF to RAU **** ///
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if
(
br_flag
==
1
)
send_IF4p5
(
ru
,
ru
->
proc
.
frame_prach
,
ru
->
proc
.
subframe_prach
,
IF4p5_PRACH
+
1
+
ce_level
);
else
#endif
send_IF4p5
(
ru
,
ru
->
proc
.
frame_prach
,
ru
->
proc
.
subframe_prach
,
IF4p5_PRACH
);
return
;
}
else
if
(
eNB
!=
NULL
)
{
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
((
en
>
60
)
&&
(
br_flag
==
1
))
LOG_I
(
PHY
,
"PRACH (br_flag %d,ce_level %d, n_ra_prb %d, k %d): Frame %d, Subframe %d => %d dB
\n
"
,
br_flag
,
ce_level
,
n_ra_prb
,
k
,
eNB
->
proc
.
frame_rx
,
eNB
->
proc
.
subframe_rx
,
en
);
}
}
// in case of RAU and prach received rx_thread wakes up prach
// here onwards is for eNodeB_3GPP or NGFI_RAU_IF4p5
preamble_offset_old
=
99
;
uint8_t
update_TA
=
4
;
uint8_t
update_TA2
=
1
;
switch
(
eNB
->
frame_parms
.
N_RB_DL
)
{
case
6
:
update_TA
=
16
;
...
...
@@ -465,18 +459,22 @@ void rx_prach0(PHY_VARS_eNB *eNB,
case
75
:
update_TA
=
3
;
update_TA2
=
2
;
break
;
case
100
:
update_TA
=
1
;
break
;
}
*
max_preamble_energy
=
0
;
for
(
preamble_index
=
0
;
preamble_index
<
64
;
preamble_index
++
)
{
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
(
LOG_DEBUGFLAG
(
PRACH
)){
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
(
en
>
60
)
LOG_I
(
PHY
,
"frame %d, subframe %d : Trying preamble %d (br_flag %d)
\n
"
,
ru
->
proc
.
frame_prach
,
subframe
,
preamble_index
,
br_flag
);
}
if
(
restricted_set
==
0
)
{
// This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index
preamble_offset
=
((
NCS
==
0
)
?
preamble_index
:
(
preamble_index
/
(
N_ZC
/
NCS
)));
...
...
@@ -486,9 +484,7 @@ void rx_prach0(PHY_VARS_eNB *eNB,
new_dft
=
1
;
// This is the \nu corresponding to the preamble index
preamble_shift
=
0
;
}
else
{
}
else
{
preamble_shift
-=
NCS
;
if
(
preamble_shift
<
0
)
...
...
@@ -519,7 +515,6 @@ void rx_prach0(PHY_VARS_eNB *eNB,
}
u
=
prach_root_sequence_map
[
index
];
uint16_t
n_group_ra
=
0
;
if
(
(
du
[
u
]
<
(
N_ZC
/
3
))
&&
(
du
[
u
]
>=
NCS
)
)
{
...
...
@@ -560,46 +555,51 @@ void rx_prach0(PHY_VARS_eNB *eNB,
// Compute DFT of RX signal (conjugate input, results in conjugate output) for each new rootSequenceIndex
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
(
en
>
60
)
LOG_I
(
PHY
,
"frame %d, subframe %d : preamble index %d: offset %d, preamble shift %d (br_flag %d, en %d)
\n
"
,
ru
->
proc
.
frame_prach
,
subframe
,
preamble_index
,
preamble_offset
,
preamble_shift
,
br_flag
,
en
);
}
log2_ifft_size
=
10
;
fft_size
=
6144
;
if
(
new_dft
==
1
)
{
new_dft
=
0
;
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if
(
br_flag
==
1
)
{
Xu
=
(
int16_t
*
)
eNB
->
X_u_br
[
ce_level
][
preamble_offset
-
first_nonzero_root_idx
];
Xu
=
(
int16_t
*
)
eNB
->
X_u_br
[
ce_level
][
preamble_offset
-
first_nonzero_root_idx
];
prach_ifft
=
prach_ifftp
[
prach_ifft_cnt
++
];
if
(
eNB
->
prach_vars_br
.
repetition_number
[
ce_level
]
==
1
)
memset
(
prach_ifft
,
0
,((
N_ZC
==
839
)
?
2048
:
256
)
*
sizeof
(
int32_t
));
}
else
}
else
#endif
{
Xu
=
(
int16_t
*
)
eNB
->
X_u
[
preamble_offset
-
first_nonzero_root_idx
];
Xu
=
(
int16_t
*
)
eNB
->
X_u
[
preamble_offset
-
first_nonzero_root_idx
];
prach_ifft
=
prach_ifftp
[
0
];
memset
(
prach_ifft
,
0
,((
N_ZC
==
839
)
?
2048
:
256
)
*
sizeof
(
int32_t
));
}
memset
(
prachF
,
0
,
sizeof
(
int16_t
)
*
2
*
1024
);
if
(
LOG_DUMPFLAG
(
PRACH
))
{
if
(
prach
[
0
]
!=
NULL
)
LOG_M
(
"prach_rx0.m"
,
"prach_rx0"
,
prach
[
0
],
6144
+
792
,
1
,
1
);
LOG_M
(
"prach_rx1.m"
,
"prach_rx1"
,
prach
[
1
],
6144
+
792
,
1
,
1
);
LOG_M
(
"prach_rxF0.m"
,
"prach_rxF0"
,
rxsigF
[
0
],
24576
,
1
,
1
);
LOG_M
(
"prach_rxF1.m"
,
"prach_rxF1"
,
rxsigF
[
1
],
6144
,
1
,
1
);
}
for
(
aa
=
0
;
aa
<
nb_rx
;
aa
++
)
{
for
(
aa
=
0
;
aa
<
nb_rx
;
aa
++
)
{
// Do componentwise product with Xu* on each antenna
k
=
0
;
for
(
offset
=
0
;
offset
<
(
N_ZC
<<
1
);
offset
+=
2
)
{
prachF
[
offset
]
=
(
int16_t
)(((
int32_t
)
Xu
[
offset
]
*
rxsigF
[
aa
][
k
]
+
(
int32_t
)
Xu
[
offset
+
1
]
*
rxsigF
[
aa
][
k
+
1
])
>>
15
);
prachF
[
offset
+
1
]
=
(
int16_t
)(((
int32_t
)
Xu
[
offset
]
*
rxsigF
[
aa
][
k
+
1
]
-
(
int32_t
)
Xu
[
offset
+
1
]
*
rxsigF
[
aa
][
k
])
>>
15
);
k
+=
2
;
if
(
k
==
(
12
*
2
*
fp
->
ofdm_symbol_size
))
k
=
0
;
}
...
...
@@ -608,19 +608,22 @@ void rx_prach0(PHY_VARS_eNB *eNB,
if
(
N_ZC
==
839
)
{
log2_ifft_size
=
10
;
idft1024
(
prachF
,
prach_ifft_tmp
,
1
);
// compute energy and accumulate over receive antennas and repetitions for BR
for
(
i
=
0
;
i
<
2048
;
i
++
)
for
(
i
=
0
;
i
<
2048
;
i
++
)
prach_ifft
[
i
]
+=
(
prach_ifft_tmp
[
i
<<
1
]
*
prach_ifft_tmp
[
i
<<
1
]
+
prach_ifft_tmp
[
1
+
(
i
<<
1
)]
*
prach_ifft_tmp
[
1
+
(
i
<<
1
)])
>>
10
;
}
else
{
idft256
(
prachF
,
prach_ifft_tmp
,
1
);
log2_ifft_size
=
8
;
// compute energy and accumulate over receive antennas and repetitions for BR
for
(
i
=
0
;
i
<
256
;
i
++
)
for
(
i
=
0
;
i
<
256
;
i
++
)
prach_ifft
[
i
]
+=
(
prach_ifft_tmp
[
i
<<
1
]
*
prach_ifft_tmp
[(
i
<<
1
)]
+
prach_ifft_tmp
[
1
+
(
i
<<
1
)]
*
prach_ifft_tmp
[
1
+
(
i
<<
1
)])
>>
10
;
}
if
(
LOG_DUMPFLAG
(
PRACH
))
{
if
(
aa
==
0
)
LOG_M
(
"prach_rxF_comp0.m"
,
"prach_rxF_comp0"
,
prachF
,
1024
,
1
,
1
);
if
(
aa
==
1
)
LOG_M
(
"prach_rxF_comp1.m"
,
"prach_rxF_comp1"
,
prachF
,
1024
,
1
,
1
);
}
}
// antennas_rx
...
...
@@ -628,17 +631,19 @@ void rx_prach0(PHY_VARS_eNB *eNB,
// check energy in nth time shift, for
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if
((
br_flag
==
0
)
||
(
eNB
->
prach_vars_br
.
repetition_number
[
ce_level
]
==
eNB
->
frame_parms
.
prach_emtc_config_common
.
prach_ConfigInfo
.
prach_numRepetitionPerPreambleAttempt
[
ce_level
]))
#endif
{
if
(
LOG_DEBUGFLAG
(
PRACH
)){
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
(
en
>
60
)
LOG_I
(
PHY
,
"frame %d, subframe %d: Checking for peak in time-domain (br_flag %d, en %d)
\n
"
,
ru
->
proc
.
frame_prach
,
subframe
,
br_flag
,
en
);
}
preamble_shift2
=
((
preamble_shift
==
0
)
?
0
:
((
preamble_shift
<<
log2_ifft_size
)
/
N_ZC
));
preamble_shift2
=
((
preamble_shift
==
0
)
?
0
:
((
preamble_shift
<<
log2_ifft_size
)
/
N_ZC
));
for
(
i
=
0
;
i
<
NCS2
;
i
++
)
{
lev
=
(
int32_t
)
prach_ifft
[(
preamble_shift2
+
i
)];
...
...
@@ -648,8 +653,10 @@ void rx_prach0(PHY_VARS_eNB *eNB,
*
max_preamble_energy
=
levdB
;
*
max_preamble_delay
=
((
i
*
fft_size
)
>>
log2_ifft_size
)
*
update_TA
/
update_TA2
;
*
max_preamble
=
preamble_index
;
if
(
LOG_DEBUGFLAG
(
PRACH
)){
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
(
LOG_DEBUGFLAG
(
PRACH
))
{
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
((
en
>
60
)
&&
(
br_flag
==
1
))
LOG_D
(
PHY
,
"frame %d, subframe %d : max_preamble_energy %d, max_preamble_delay %d, max_preamble %d (br_flag %d,ce_level %d, levdB %d, lev %d)
\n
"
,
ru
->
proc
.
frame_prach
,
subframe
,
...
...
@@ -658,12 +665,12 @@ void rx_prach0(PHY_VARS_eNB *eNB,
}
}
}
}
}
// preamble_index
if
(
LOG_DUMPFLAG
(
PRACH
))
{
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],
840
));
if
(
en
>
60
)
{
k
=
(
12
*
n_ra_prb
)
-
6
*
fp
->
N_RB_UL
;
...
...
@@ -678,8 +685,7 @@ void rx_prach0(PHY_VARS_eNB *eNB,
LOG_M
(
"prach_rxF_comp0.m"
,
"prach_rxF_comp0"
,
prachF
,
1024
,
1
,
1
);
LOG_M
(
"Xu.m"
,
"xu"
,
Xu
,
N_ZC
,
1
,
1
);
LOG_M
(
"prach_ifft0.m"
,
"prach_t0"
,
prach_ifft
,
1024
,
1
,
1
);
}
else
{
}
else
{
LOG_E
(
PHY
,
"Dumping prach (br_flag %d), k = %d (n_ra_prb %d)
\n
"
,
br_flag
,
k
,
n_ra_prb
);
LOG_M
(
"rxsigF_br.m"
,
"prach_rxF_br"
,
&
rxsigF
[
0
][
0
],
12288
,
1
,
1
);
LOG_M
(
"prach_rxF_comp0_br.m"
,
"prach_rxF_comp0_br"
,
prachF
,
1024
,
1
,
1
);
...
...
@@ -687,11 +693,10 @@ void rx_prach0(PHY_VARS_eNB *eNB,
LOG_M
(
"prach_ifft0_br.m"
,
"prach_t0_br"
,
prach_ifft
,
1024
,
1
,
1
);
exit
(
-
1
);
}
}
}
/* LOG_DUMPFLAG(PRACH) */
if
(
eNB
)
stop_meas
(
&
eNB
->
rx_prach
);
if
(
eNB
)
stop_meas
(
&
eNB
->
rx_prach
);
}
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
...
...
@@ -704,16 +709,15 @@ void rx_prach(PHY_VARS_eNB *eNB,
uint16_t
Nf
,
uint8_t
tdd_mapindex
,
uint8_t
br_flag
)
{
int
i
;
int
prach_mask
=
0
;
if
(
br_flag
==
0
)
{
rx_prach0
(
eNB
,
ru
,
max_preamble
,
max_preamble_energy
,
max_preamble_delay
,
Nf
,
tdd_mapindex
,
0
,
0
);
}
else
{
// This is procedure for eMTC, basically handling the repetitions
}
else
{
// This is procedure for eMTC, basically handling the repetitions
prach_mask
=
is_prach_subframe
(
&
eNB
->
frame_parms
,
eNB
->
proc
.
frame_prach_br
,
eNB
->
proc
.
subframe_prach_br
);
for
(
i
=
0
;
i
<
4
;
i
++
)
{
for
(
i
=
0
;
i
<
4
;
i
++
)
{
if
((
eNB
->
frame_parms
.
prach_emtc_config_common
.
prach_ConfigInfo
.
prach_CElevel_enable
[
i
]
==
1
)
&&
((
prach_mask
&
(
1
<<
(
i
+
1
)))
>
0
))
{
// check that prach CE level is active now
...
...
@@ -722,14 +726,12 @@ void rx_prach(PHY_VARS_eNB *eNB,
// increment repetition number
eNB
->
prach_vars_br
.
repetition_number
[
i
]
++
;
// do basic PRACH reception
rx_prach0
(
eNB
,
ru
,
max_preamble
,
max_preamble_energy
,
max_preamble_delay
,
Nf
,
tdd_mapindex
,
1
,
i
);
// if last repetition, clear counter
if
(
eNB
->
prach_vars_br
.
repetition_number
[
i
]
==
eNB
->
frame_parms
.
prach_emtc_config_common
.
prach_ConfigInfo
.
prach_numRepetitionPerPreambleAttempt
[
i
])
{
eNB
->
prach_vars_br
.
repetition_number
[
i
]
=
0
;
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment