Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG UE
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Michael Black
OpenXG UE
Commits
37d49cdd
Commit
37d49cdd
authored
Jan 01, 2016
by
Raymond Knopp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
AVX2 updates to turbo encoder interleaver. Also speedup in RSC encoder. 40% speedup improvement.
parent
948239da
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
37 deletions
+46
-37
openair1/PHY/CODING/3gpplte_sse.c
openair1/PHY/CODING/3gpplte_sse.c
+43
-36
openair1/PHY/CODING/Makefile
openair1/PHY/CODING/Makefile
+1
-1
openair1/PHY/INIT/lte_init.c
openair1/PHY/INIT/lte_init.c
+2
-0
No files found.
openair1/PHY/CODING/3gpplte_sse.c
View file @
37d49cdd
...
@@ -59,16 +59,12 @@ unsigned long long threegpplte_interleaver_tmp;
...
@@ -59,16 +59,12 @@ unsigned long long threegpplte_interleaver_tmp;
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
struct
treillis
{
struct
treillis
{
union
{
union
{
__m64
systematic_64
[
3
];
__m64
systematic_andp1_64
[
3
];
char
systematic_8
[
24
];
uint8_t
systematic_andp1_8
[
24
];
};
union
{
__m64
parity1_64
[
3
];
char
parity1_8
[
24
];
};
};
union
{
union
{
__m64
parity2_64
[
3
];
__m64
parity2_64
[
3
];
char
parity2_8
[
24
];
uint8_t
parity2_8
[
24
];
};
};
int
exit_state
;
int
exit_state
;
}
__attribute__
((
aligned
(
64
)));
}
__attribute__
((
aligned
(
64
)));
...
@@ -77,12 +73,8 @@ struct treillis {
...
@@ -77,12 +73,8 @@ struct treillis {
struct
treillis
{
struct
treillis
{
union
{
union
{
uint8x8_t
systematic_64
[
3
];
uint8x8_t
systematic_andp1_64
[
3
];
char
systematic_8
[
24
];
char
systematic_andp1_8
[
24
];
}
__attribute__
((
aligned
(
64
)));
union
{
uint8x8_t
parity1_64
[
3
];
char
parity1_8
[
24
];
}
__attribute__
((
aligned
(
64
)));
}
__attribute__
((
aligned
(
64
)));
union
{
union
{
uint8x8_t
parity2_64
[
3
];
uint8x8_t
parity2_64
[
3
];
...
@@ -93,6 +85,7 @@ struct treillis {
...
@@ -93,6 +85,7 @@ struct treillis {
#endif
#endif
struct
treillis
all_treillis
[
8
][
256
];
struct
treillis
all_treillis
[
8
][
256
];
int
all_treillis_initialized
=
0
;
int
all_treillis_initialized
=
0
;
static
inline
unsigned
char
threegpplte_rsc
(
unsigned
char
input
,
unsigned
char
*
state
)
static
inline
unsigned
char
threegpplte_rsc
(
unsigned
char
input
,
unsigned
char
*
state
)
...
@@ -118,18 +111,20 @@ void treillis_table_init(void)
...
@@ -118,18 +111,20 @@ void treillis_table_init(void)
unsigned
char
v
,
current_state
;
unsigned
char
v
,
current_state
;
// clear all_treillis
// clear all_treillis
for
(
i
=
0
;
i
<
8
;
i
++
)
for
(
i
=
0
;
i
<
8
;
i
++
)
{
bzero
(
all_treillis
[
i
],
sizeof
(
all_treillis
[
0
])
);
bzero
(
all_treillis
[
i
],
sizeof
(
all_treillis
[
0
])
);
}
for
(
i
=
0
;
i
<
8
;
i
++
)
{
//all possible initial states
for
(
i
=
0
;
i
<
8
;
i
++
)
{
//all possible initial states
for
(
j
=
0
;
j
<=
255
;
j
++
)
{
// all possible values of a byte
for
(
j
=
0
;
j
<=
255
;
j
++
)
{
// all possible values of a byte
current_state
=
i
;
current_state
=
i
;
for
(
b
=
0
;
b
<
8
;
b
++
)
{
// pre-compute the image of the byte j in _m128i vector right place
for
(
b
=
0
;
b
<
8
;
b
++
)
{
// pre-compute the image of the byte j in _m128i vector right place
all_treillis
[
i
][
j
].
systematic_8
[
b
*
3
]
=
(
j
&
(
1
<<
(
7
-
b
)))
>>
(
7
-
b
);
all_treillis
[
i
][
j
].
systematic_
andp1_
8
[
b
*
3
]
=
(
j
&
(
1
<<
(
7
-
b
)))
>>
(
7
-
b
);
v
=
threegpplte_rsc
(
all_treillis
[
i
][
j
].
systematic_8
[
b
*
3
]
,
v
=
threegpplte_rsc
(
all_treillis
[
i
][
j
].
systematic_
andp1_
8
[
b
*
3
]
,
&
current_state
);
&
current_state
);
all_treillis
[
i
][
j
].
parity1_8
[
b
*
3
+
1
]
=
v
;
// for the yparity1
all_treillis
[
i
][
j
].
systematic_andp1_8
[
b
*
3
+
1
]
=
v
;
// for the yparity1
// all_treillis[i][j].parity1_8[b*3+1]=v; // for the yparity1
all_treillis
[
i
][
j
].
parity2_8
[
b
*
3
+
2
]
=
v
;
// for the yparity2
all_treillis
[
i
][
j
].
parity2_8
[
b
*
3
+
2
]
=
v
;
// for the yparity2
}
}
...
@@ -236,10 +231,12 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
...
@@ -236,10 +231,12 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
if
((
n
&
31
)
>
0
)
if
((
n
&
31
)
>
0
)
loop
++
;
loop
++
;
#endif
#endif
for
(
i
=
0
;
i
<
loop
;
i
++
)
{
for
(
i
=
0
;
i
<
loop
;
i
++
)
{
/
* int cur_byte=i<<3; */
/
/ int cur_byte=i<<3;
/
* for (b=0;b<8;b++) */
/
/ for (b=0;b<8;b++)
/
* expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b); */
/
/ expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b);
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
#ifndef __AVX2__
#ifndef __AVX2__
...
@@ -419,6 +416,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
...
@@ -419,6 +416,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
i_128
++
;
i_128
++
;
#endif
#endif
}
}
short
*
ptr_intl
=
base_interleaver
;
short
*
ptr_intl
=
base_interleaver
;
#if defined(__x86_64) || defined(__i386__)
#if defined(__x86_64) || defined(__i386__)
...
@@ -438,7 +436,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
...
@@ -438,7 +436,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
uint8x16_t
Powers
=
vld1q_u8
(
_Powers
);
uint8x16_t
Powers
=
vld1q_u8
(
_Powers
);
uint8_t
*
systematic2_ptr
=
(
uint8_t
*
)
output
;
uint8_t
*
systematic2_ptr
=
(
uint8_t
*
)
output
;
#endif
#endif
#ifndef __AVX2
#ifndef __AVX2
__
int
input_length_words
=
n
>>
1
;
int
input_length_words
=
n
>>
1
;
#else
#else
int
input_length_words
=
n
>>
2
;
int
input_length_words
=
n
>>
2
;
...
@@ -473,6 +471,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
...
@@ -473,6 +471,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
5
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
5
);
...
@@ -481,6 +480,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
...
@@ -481,6 +480,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
8
+
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
5
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
5
);
...
@@ -488,7 +488,8 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
...
@@ -488,7 +488,8 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
3
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
3
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
16
+
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
7
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
6
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
5
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
5
);
...
@@ -497,6 +498,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
...
@@ -497,6 +498,7 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
2
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
1
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
0
);
tmp
=
_mm256_insert_epi8
(
tmp
,
expandInput
[
*
ptr_intl
++
],
24
+
0
);
*
systematic2_ptr
++=
(
unsigned
int
)
_mm256_movemask_epi8
(
tmp
);
*
systematic2_ptr
++=
(
unsigned
int
)
_mm256_movemask_epi8
(
tmp
);
#endif
#endif
#elif defined(__arm__)
#elif defined(__arm__)
...
@@ -552,8 +554,9 @@ void threegpplte_turbo_encoder(unsigned char *input,
...
@@ -552,8 +554,9 @@ void threegpplte_turbo_encoder(unsigned char *input,
unsigned
short
input_length_bits
=
input_length_bytes
<<
3
;
unsigned
short
input_length_bits
=
input_length_bytes
<<
3
;
short
*
base_interleaver
;
short
*
base_interleaver
;
if
(
all_treillis_initialized
==
0
)
if
(
all_treillis_initialized
==
0
)
{
treillis_table_init
();
treillis_table_init
();
}
// look for f1 and f2 precomputed interleaver values
// look for f1 and f2 precomputed interleaver values
for
(
i
=
0
;
i
<
188
&&
f1f2mat
[
i
].
nb_bits
!=
input_length_bits
;
i
++
);
for
(
i
=
0
;
i
<
188
&&
f1f2mat
[
i
].
nb_bits
!=
input_length_bits
;
i
++
);
...
@@ -566,7 +569,7 @@ void threegpplte_turbo_encoder(unsigned char *input,
...
@@ -566,7 +569,7 @@ void threegpplte_turbo_encoder(unsigned char *input,
}
}
unsigned
char
systematic2
[
768
];
unsigned
char
systematic2
[
768
]
__attribute__
((
aligned
(
32
)))
;
interleave_compact_byte
(
base_interleaver
,
input
,
systematic2
,
input_length_bytes
);
interleave_compact_byte
(
base_interleaver
,
input
,
systematic2
,
input_length_bytes
);
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
...
@@ -580,22 +583,26 @@ void threegpplte_turbo_encoder(unsigned char *input,
...
@@ -580,22 +583,26 @@ void threegpplte_turbo_encoder(unsigned char *input,
for
(
state0
=
state1
=
i
=
0
;
i
<
input_length_bytes
;
i
++
)
{
for
(
state0
=
state1
=
i
=
0
;
i
<
input_length_bytes
;
i
++
)
{
cur_s1
=
input
[
i
];
cur_s1
=
input
[
i
];
cur_s2
=
systematic2
[
i
];
cur_s2
=
systematic2
[
i
];
for
(
code_rate
=
0
;
code_rate
<
3
;
code_rate
++
)
{
for
(
code_rate
=
0
;
code_rate
<
3
;
code_rate
++
)
{
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__)
*
ptr_output
++
=
_mm_add_pi8
(
all_treillis
[
state0
][
cur_s1
].
systematic_64
[
code_rate
],
/*
_mm_add_pi8
(
all_treillis
[
state0
][
cur_s1
].
parity1_64
[
code_rate
],
*ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_64[code_rate],
all_treillis
[
state1
][
cur_s2
].
parity2_64
[
code_rate
]));
_mm_add_pi8(all_treillis[state0][cur_s1].parity1_64[code_rate],
all_treillis[state1][cur_s2].parity2_64[code_rate]));
*/
*
ptr_output
++
=
_mm_add_pi8
(
all_treillis
[
state0
][
cur_s1
].
systematic_andp1_64
[
code_rate
],
all_treillis
[
state1
][
cur_s2
].
parity2_64
[
code_rate
]);
#elif defined(__arm__)
#elif defined(__arm__)
uint8x8_t
ptmp
=
vadd_u8
(
all_treillis
[
state0
][
cur_s1
].
parity1_64
[
code_rate
],
*
ptr_output
++
=
vadd_u8
(
all_treillis
[
state0
][
cur_s1
].
systematic_andp1_64
[
code_rate
],
all_treillis
[
state1
][
cur_s2
].
parity2_64
[
code_rate
]);
all_treillis
[
state0
][
cur_s1
].
parity1_64
[
code_rate
]);
*
ptr_output
++
=
vadd_u8
(
all_treillis
[
state0
][
cur_s1
].
systematic_64
[
code_rate
],
ptmp
);
#endif
#endif
}
}
state0
=
all_treillis
[
state0
][
cur_s1
].
exit_state
;
state0
=
all_treillis
[
state0
][
cur_s1
].
exit_state
;
state1
=
all_treillis
[
state1
][
cur_s2
].
exit_state
;
state1
=
all_treillis
[
state1
][
cur_s2
].
exit_state
;
}
}
x
=
output
+
(
input_length_bits
*
3
);
x
=
output
+
(
input_length_bits
*
3
);
...
...
openair1/PHY/CODING/Makefile
View file @
37d49cdd
...
@@ -5,7 +5,7 @@ RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c
...
@@ -5,7 +5,7 @@ RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c
all
:
3gpplte_sse
all
:
3gpplte_sse
3gpplte_sse
:
$(TURBO_SRC)
3gpplte_sse
:
$(TURBO_SRC)
gcc
-o
3gpplte_sse 3gpplte_sse.c
-msse4
-Wall
-g
-ggdb
-D
MAIN
gcc
-o
3gpplte_sse 3gpplte_sse.c
-msse4
-Wall
-g
-ggdb
-D
TC_MAIN
-I
../..
...
...
openair1/PHY/INIT/lte_init.c
View file @
37d49cdd
...
@@ -888,6 +888,8 @@ void phy_init_lte_top(LTE_DL_FRAME_PARMS *lte_frame_parms)
...
@@ -888,6 +888,8 @@ void phy_init_lte_top(LTE_DL_FRAME_PARMS *lte_frame_parms)
ccodelte_init
();
ccodelte_init
();
ccodelte_init_inv
();
ccodelte_init_inv
();
treillis_table_init
();
phy_generate_viterbi_tables
();
phy_generate_viterbi_tables
();
phy_generate_viterbi_tables_lte
();
phy_generate_viterbi_tables_lte
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment