Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangjie
OpenXG-RAN
Commits
71a8d19b
Commit
71a8d19b
authored
May 18, 2020
by
Sy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use of avx2 & avx512 at CN Processing level
parent
3ed622af
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
827 additions
and
129 deletions
+827
-129
cmake_targets/CMakeLists.txt
cmake_targets/CMakeLists.txt
+8
-4
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
+6
-3
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/Makefile
...ODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/Makefile
+5
-5
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/cnProc_gen_avx2.c
...DPC_decoder/nrLDPC_tools/generator_avx2/cnProc_gen_avx2.c
+691
-0
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/cnProc_gen_avx2.h
...DPC_decoder/nrLDPC_tools/generator_avx2/cnProc_gen_avx2.h
+6
-0
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/main.c
.../CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/main.c
+17
-0
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/Makefile
...ING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/Makefile
+27
-0
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512.c
...decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512.c
+43
-37
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512.h
...decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512.h
+6
-0
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/main.c
...ODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/main.c
+16
-0
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13.h
...r/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13.h
+2
-1
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/main.c
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/main.c
+0
-79
No files found.
cmake_targets/CMakeLists.txt
View file @
71a8d19b
...
...
@@ -1305,26 +1305,30 @@ set(PHY_TURBOIF
set
(
PHY_LDPC_ORIG_SRC
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c
)
set
(
PHY_LDPC_OPTIM_SRC
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c
)
set
(
PHY_LDPC_OPTIM8SEG_SRC
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
)
set
(
PHY_LDPC_OPTIM8SEGMULTI_SRC
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c
)
set
(
PHY_NR_CODINGIF
...
...
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
View file @
71a8d19b
...
...
@@ -147,7 +147,8 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
#endif
if
(
BG
==
1
)
{
if
(
Z
==
384
){
nrLDPC_cnProc_BG1_Z384_13
(
p_procBuf
->
cnProcBuf
,
p_procBuf
->
cnProcBufRes
);
nrLDPC_cnProc_BG1_Z384_13_AVX512
(
p_procBuf
->
cnProcBuf
,
p_procBuf
->
cnProcBufRes
);
//nrLDPC_cnProc_BG1_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
}
else
{
nrLDPC_cnProc_BG1
(
p_lut
,
p_procBuf
,
Z
);
}
...
...
@@ -250,7 +251,8 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
if
(
BG
==
1
)
{
if
(
Z
==
384
){
nrLDPC_cnProc_BG1_Z384_13
(
p_procBuf
->
cnProcBuf
,
p_procBuf
->
cnProcBufRes
);
nrLDPC_cnProc_BG1_Z384_13_AVX512
(
p_procBuf
->
cnProcBuf
,
p_procBuf
->
cnProcBufRes
);
//nrLDPC_cnProc_BG1_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
}
else
{
nrLDPC_cnProc_BG1
(
p_lut
,
p_procBuf
,
Z
);
}
...
...
@@ -363,7 +365,8 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
#endif
if
(
BG
==
1
)
{
if
(
Z
==
384
){
nrLDPC_cnProc_BG1_Z384_13
(
p_procBuf
->
cnProcBuf
,
p_procBuf
->
cnProcBufRes
);
nrLDPC_cnProc_BG1_Z384_13_AVX512
(
p_procBuf
->
cnProcBuf
,
p_procBuf
->
cnProcBufRes
);
//nrLDPC_cnProc_BG1_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
}
else
{
nrLDPC_cnProc_BG1
(
p_lut
,
p_procBuf
,
Z
);
}
...
...
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/Makefile
→
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/
generator_avx2/
Makefile
View file @
71a8d19b
C
=
gcc
CFLAGS
=
-W
-Wall
-mavx2
CFLAGS
=
-W
-Wall
-mavx2
LDFLAGS
=
EXEC
=
cnProc_gen
EXEC
=
cnProc_gen
_avx2
SRC
=
$(
wildcard
*
.c
)
OBJ
=
$(SRC:.c=.o)
all
:
$(EXEC)
cnProc_gen
:
$(OBJ)
cnProc_gen
_avx2
:
$(OBJ)
$(CC)
-o
$@
$^
$(LDFLAGS)
-O2
-pg
main.o
:
cnProc_gen.h
main.o
:
cnProc_gen
_avx2
.h
%.o
:
%.c
$(CC)
-o
$@
-c
$<
$(CFLAGS)
-I
${OPENAIR_HOME}
/openair1
-g
-pg
...
...
@@ -24,4 +24,4 @@ mrproper: clean
rm
-rf
$(EXEC)
zip
:
tar
-zcvf
sauvegarde.tar.gz main.c cnProc_gen
.c cnProc_geno
.h Makefile
tar
-zcvf
sauvegarde.tar.gz main.c cnProc_gen
_avx2.c cnProc_gen_avx2
.h Makefile
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/cnProc_gen_avx2.c
0 → 100644
View file @
71a8d19b
#include <stdint.h>
#include <immintrin.h>
#include "../../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h"
#include "../../nrLDPC_bnProc.h"
#include "cnProc_gen_avx2.h"
void
nrLDPC_cnProc_BG1_generator_AVX2
(
uint16_t
Z
,
int
R
)
{
const
char
*
ratestr
[
3
]
=
{
"13"
,
"23"
,
"89"
};
if
(
R
<
0
||
R
>
2
)
{
printf
(
"Illegal R %d
\n
"
,
R
);
abort
();}
// system("mkdir -p ldpc_gen_files");
char
fname
[
50
];
sprintf
(
fname
,
"../ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s_AVX2.c"
,
Z
,
ratestr
[
R
]);
FILE
*
fd
=
fopen
(
fname
,
"w"
);
if
(
fd
==
NULL
)
{
printf
(
"Cannot create %s
\n
"
);
abort
();}
fprintf
(
fd
,
"#include <stdint.h>
\n
"
);
fprintf
(
fd
,
"#include <immintrin.h>
\n
"
);
fprintf
(
fd
,
"void nrLDPC_cnProc_BG1_Z%d_%s_AVX2(int8_t* cnProcBuf,int8_t* cnProcBufRes) {
\n
"
,
Z
,
ratestr
[
R
]);
const
uint8_t
*
lut_numCnInCnGroups
;
const
uint32_t
*
lut_startAddrCnGroups
=
lut_startAddrCnGroups_BG1
;
if
(
R
==
0
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R13
;
else
if
(
R
==
1
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R23
;
else
if
(
R
==
2
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R89
;
else
{
printf
(
"aborting, illegal R %d
\n
"
,
R
);
fclose
(
fd
);
abort
();}
//__m256i* p_cnProcBuf;
//__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t
M
;
uint32_t
j
;
uint32_t
k
;
// Offset to each bit within a group in terms of 32 Byte
uint32_t
bitOffsetInGroup
;
//__m256i ymm0, min, sgn;
//__m256i* p_cnProcBufResBit;
// const __m256i* p_ones = (__m256i*) ones256_epi8;
// const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
// const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
// =====================================================================
// Process group with 3 BNs
fprintf
(
fd
,
"//Process group with 3 BNs
\n
"
);
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
const
uint8_t
lut_idxCnProcG3
[
3
][
2
]
=
{{
12
,
24
},
{
0
,
24
},
{
0
,
12
}};
fprintf
(
fd
,
" __m256i ymm0, min, sgn,ones,maxLLR;
\n
"
);
fprintf
(
fd
,
" ones = _mm256_set1_epi8((char)1);
\n
"
);
fprintf
(
fd
,
" maxLLR = _mm256_set1_epi8((char)127);
\n
"
);
if
(
lut_numCnInCnGroups
[
0
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
0
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
0
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 3
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
3
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i+=2) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
1
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
0
]
+
1
);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
1
]
+
1
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
(
j
*
bitOffsetInGroup
)
+
1
);
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 4 BNs
fprintf
(
fd
,
"//Process group with 4 BNs
\n
"
);
// Offset is 5*384/32 = 60
const
uint8_t
lut_idxCnProcG4
[
4
][
3
]
=
{{
60
,
120
,
180
},
{
0
,
120
,
180
},
{
0
,
60
,
180
},
{
0
,
60
,
120
}};
if
(
lut_numCnInCnGroups
[
1
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
1
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
1
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
4
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
5
)
+
lut_idxCnProcG4
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
3
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
5
)
+
lut_idxCnProcG4
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 5 BNs
fprintf
(
fd
,
"//Process group with 5 BNs
\n
"
);
// Offset is 18*384/32 = 216
const
uint16_t
lut_idxCnProcG5
[
5
][
4
]
=
{{
216
,
432
,
648
,
864
},
{
0
,
432
,
648
,
864
},
{
0
,
216
,
648
,
864
},
{
0
,
216
,
432
,
864
},
{
0
,
216
,
432
,
648
}};
if
(
lut_numCnInCnGroups
[
2
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
2
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
2
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
5
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
5
)
+
lut_idxCnProcG5
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
4
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
5
)
+
lut_idxCnProcG5
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 6 BNs
fprintf
(
fd
,
"//Process group with 6 BNs
\n
"
);
// Offset is 8*384/32 = 96
const
uint16_t
lut_idxCnProcG6
[
6
][
5
]
=
{{
96
,
192
,
288
,
384
,
480
},
{
0
,
192
,
288
,
384
,
480
},
{
0
,
96
,
288
,
384
,
480
},
{
0
,
96
,
192
,
384
,
480
},
{
0
,
96
,
192
,
288
,
480
},
{
0
,
96
,
192
,
288
,
384
}};
if
(
lut_numCnInCnGroups
[
3
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
3
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
3
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
6
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
5
)
+
lut_idxCnProcG6
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
5
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
5
)
+
lut_idxCnProcG6
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 7 BNs
fprintf
(
fd
,
"//Process group with 7 BNs
\n
"
);
// Offset is 5*384/32 = 60
const
uint16_t
lut_idxCnProcG7
[
7
][
6
]
=
{{
60
,
120
,
180
,
240
,
300
,
360
},
{
0
,
120
,
180
,
240
,
300
,
360
},
{
0
,
60
,
180
,
240
,
300
,
360
},
{
0
,
60
,
120
,
240
,
300
,
360
},
{
0
,
60
,
120
,
180
,
300
,
360
},
{
0
,
60
,
120
,
180
,
240
,
360
},
{
0
,
60
,
120
,
180
,
240
,
300
}};
if
(
lut_numCnInCnGroups
[
4
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
4
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
4
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
7
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
5
)
+
lut_idxCnProcG7
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
6
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
5
)
+
lut_idxCnProcG7
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 8 BNs
fprintf
(
fd
,
"//Process group with 8 BNs
\n
"
);
// Offset is 2*384/32 = 24
const
uint8_t
lut_idxCnProcG8
[
8
][
7
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
}};
if
(
lut_numCnInCnGroups
[
5
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
5
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
5
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
8
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
5
)
+
lut_idxCnProcG8
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
7
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
5
)
+
lut_idxCnProcG8
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 9 BNs
fprintf
(
fd
,
"//Process group with 9 BNs
\n
"
);
// Offset is 2*384/32 = 24
const
uint8_t
lut_idxCnProcG9
[
9
][
8
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
}};
if
(
lut_numCnInCnGroups
[
6
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
6
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
6
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 9
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
9
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
5
)
+
lut_idxCnProcG9
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
8
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
5
)
+
lut_idxCnProcG9
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 10 BNs
fprintf
(
fd
,
"//Process group with 10 BNs
\n
"
);
// Offset is 1*384/32 = 12
const
uint8_t
lut_idxCnProcG10
[
10
][
9
]
=
{{
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
}};
if
(
lut_numCnInCnGroups
[
7
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
7
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
7
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 10
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
10
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
5
)
+
lut_idxCnProcG10
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
9
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
5
)
+
lut_idxCnProcG10
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
// =====================================================================
// Process group with 19 BNs
fprintf
(
fd
,
"//Process group with 19 BNs
\n
"
);
// Offset is 4*384/32 = 12
const
uint16_t
lut_idxCnProcG19
[
19
][
18
]
=
{{
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
}};
if
(
lut_numCnInCnGroups
[
8
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
8
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
8
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 19
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
19
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
5
)
+
lut_idxCnProcG19
[
j
][
0
]);
// sgn = _mm256_sign_epi8(ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
18
;
k
++
)
{
fprintf
(
fd
,
" ymm0 = ((__m256i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
5
)
+
lut_idxCnProcG19
[
j
][
k
]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
fprintf
(
fd
,
"}
\n
"
);
fclose
(
fd
);
}
//end of the function nrLDPC_cnProc_BG1
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/
cnProc_gen
.h
→
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/
generator_avx2/cnProc_gen_avx2
.h
View file @
71a8d19b
#ifndef NRLDPC_CN_GEN
#define NRLDPC_CN_GEN
void
nrLDPC_cnProc_BG1_generator
(
uint16_t
Z
,
int
R
);
void
nrLDPC_cnProc_BG1_generator
_AVX2
(
uint16_t
Z
,
int
R
);
#endif
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx2/main.c
0 → 100644
View file @
71a8d19b
#include <stdio.h>
#include <immintrin.h>
#include "../../nrLDPC_types.h"
#include "../../nrLDPC_init.h"
#include "../../nrLDPC_bnProc.h"
#include "cnProc_gen_avx2.h"
int
main
(
int
argc
,
char
*
argv
[])
{
// Z=384, R=1/3
nrLDPC_cnProc_BG1_generator_AVX2
(
384
,
0
);
return
(
0
);
}
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/Makefile
0 → 100644
View file @
71a8d19b
C
=
gcc
CFLAGS
=
-W
-Wall
LDFLAGS
=
EXEC
=
cnProc_gen_avx512
SRC
=
$(
wildcard
*
.c
)
OBJ
=
$(SRC:.c=.o)
all
:
$(EXEC)
cnProc_gen_avx512
:
$(OBJ)
$(CC)
-o
$@
$^
$(LDFLAGS)
-O2
-pg
main.o
:
cnProc_gen_avx512.h
%.o
:
%.c
$(CC)
-o
$@
-c
$<
$(CFLAGS)
-I
${OPENAIR_HOME}
/openair1
-g
-pg
.PHONY
:
clean mrproper
clean
:
rm
-rf
*
.o
mrproper
:
clean
rm
-rf
$(EXEC)
zip
:
tar
-zcvf
sauvegarde.tar.gz main.c cnProc_gen_avx512.c cnProc_gen_avx512.h Makefile
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/
cnProc_gen
.c
→
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/
generator_avx512/cnProc_gen_avx512
.c
View file @
71a8d19b
#include <stdint.h>
#include <immintrin.h>
#include "../nrLDPCdecoder_defs.h"
#include "../nrLDPC_types.h"
#include "../nrLDPC_bnProc.h"
#include "cnProc_gen.h"
#include "../
../
nrLDPCdecoder_defs.h"
#include "../
../
nrLDPC_types.h"
#include "../
../
nrLDPC_bnProc.h"
#include "cnProc_gen
_avx512
.h"
void
nrLDPC_cnProc_BG1_generator
(
uint16_t
Z
,
int
R
)
void
nrLDPC_cnProc_BG1_generator
_AVX512
(
uint16_t
Z
,
int
R
)
{
const
char
*
ratestr
[
3
]
=
{
"13"
,
"23"
,
"89"
};
if
(
R
<
0
||
R
>
2
)
{
printf
(
"Illegal R %d
\n
"
,
R
);
abort
();}
system
(
"mkdir -p ldpc_gen_files"
);
//
system("mkdir -p ldpc_gen_files");
char
fname
[
50
];
sprintf
(
fname
,
"
ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s
.c"
,
Z
,
ratestr
[
R
]);
sprintf
(
fname
,
"
../ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s_AVX512
.c"
,
Z
,
ratestr
[
R
]);
FILE
*
fd
=
fopen
(
fname
,
"w"
);
if
(
fd
==
NULL
)
{
printf
(
"Cannot create %s
\n
"
);
abort
();}
...
...
@@ -31,7 +31,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
fprintf
(
fd
,
"}
\n
"
);
fprintf
(
fd
,
"void nrLDPC_cnProc_BG1_Z%d_%s(int8_t* cnProcBuf,int8_t* cnProcBufRes) {
\n
"
,
Z
,
ratestr
[
R
]);
fprintf
(
fd
,
"void nrLDPC_cnProc_BG1_Z%d_%s
_AVX512
(int8_t* cnProcBuf,int8_t* cnProcBufRes) {
\n
"
,
Z
,
ratestr
[
R
]);
const
uint8_t
*
lut_numCnInCnGroups
;
const
uint32_t
*
lut_startAddrCnGroups
=
lut_startAddrCnGroups_BG1
;
...
...
@@ -48,7 +48,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
uint32_t
M
;
uint32_t
j
;
uint32_t
k
;
// Offset to each bit within a group in terms of
32
Byte
// Offset to each bit within a group in terms of
64
Byte
uint32_t
bitOffsetInGroup
;
//__m512i zmm0, min, sgn;
...
...
@@ -79,11 +79,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
0
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
0
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
0
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -103,7 +103,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i+=2) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -128,7 +128,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
0
]
+
1
);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -165,11 +165,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
1
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
1
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
1
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -188,7 +188,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
5
)
+
lut_idxCnProcG4
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -230,11 +230,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
2
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
2
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
2
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -253,7 +253,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
5
)
+
lut_idxCnProcG5
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -295,11 +295,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
3
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
3
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
3
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -318,7 +318,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
5
)
+
lut_idxCnProcG6
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -362,11 +362,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
4
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
4
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
4
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -385,7 +385,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
5
)
+
lut_idxCnProcG7
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -430,11 +430,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
5
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
5
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
5
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -453,7 +453,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
5
)
+
lut_idxCnProcG8
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -499,11 +499,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
6
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
6
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
6
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -522,7 +522,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
5
)
+
lut_idxCnProcG9
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -569,11 +569,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
7
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
7
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
7
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -592,7 +592,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
5
)
+
lut_idxCnProcG10
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -642,11 +642,11 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if
(
lut_numCnInCnGroups
[
8
]
>
0
)
{
// Number of groups of
32
CNs for parallel processing
// Number of groups of
64
CNs for parallel processing
// Ceil for values not divisible by 64
M
=
(
lut_numCnInCnGroups
[
8
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of
32
Byte
// Set the offset to each bit within a group in terms of
64
Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
8
]
*
NR_LDPC_ZMAX
)
>>
5
;
...
...
@@ -665,7 +665,7 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of
32
CNs (first BN)
// Abs and sign of
64
CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
5
)
+
lut_idxCnProcG19
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
...
...
@@ -699,3 +699,9 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
fprintf
(
fd
,
"}
\n
"
);
fclose
(
fd
);
}
//end of the function nrLDPC_cnProc_BG1
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512.h
0 → 100644
View file @
71a8d19b
#ifndef NRLDPC_CN_GEN
#define NRLDPC_CN_GEN
void
nrLDPC_cnProc_BG1_generator_AVX512
(
uint16_t
Z
,
int
R
);
#endif
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/main.c
0 → 100644
View file @
71a8d19b
#include <stdio.h>
#include <immintrin.h>
//#include "../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h"
#include "../../nrLDPC_init.h"
#include "../../nrLDPC_bnProc.h"
#include "cnProc_gen_avx512.h"
int
main
(
int
argc
,
char
*
argv
[])
{
// Z=384, R=1/3
nrLDPC_cnProc_BG1_generator_AVX512
(
384
,
0
);
return
(
0
);
}
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13.h
View file @
71a8d19b
#ifndef NR_CN_PROC_BG1_OPTIM
#define NR_CN_PROC_BG1_OPTIM
void
nrLDPC_cnProc_BG1_Z384_13
(
int8_t
*
cnProcBuf
,
int8_t
*
cnProcBufRes
);
void
nrLDPC_cnProc_BG1_Z384_13_AVX512
(
int8_t
*
cnProcBuf
,
int8_t
*
cnProcBufRes
);
void
nrLDPC_cnProc_BG1_Z384_13_AVX2
(
int8_t
*
cnProcBuf
,
int8_t
*
cnProcBufRes
);
#endif
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/main.c
deleted
100644 → 0
View file @
3ed622af
#include <stdio.h>
#include <immintrin.h>
//#include "../nrLDPCdecoder_defs.h"
#include "../nrLDPC_types.h"
#include "../nrLDPC_init.h"
//#include "../nrLDPC_mPass.h"
//#include "../nrLDPC_cnProc.h"
#include "../nrLDPC_bnProc.h"
#include "cnProc_gen.h"
int
main
(
int
argc
,
char
*
argv
[])
{
//short lift_size[51]= {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
// unsigned int errors, errors_bit, crc_misses;
// double errors_bit_uncoded;
//short block_length=8448; // decoder supports length: 1201 -> 1280, 2401 -> 2560
// short No_iteration=5;
//int n_segments=1;
//double rate=0.333;
//int nom_rate=1;
//int denom_rate=3;
//double SNR0=-2.0,SNR,SNR_lin;
//unsigned char qbits=8;
// unsigned int decoded_errors[10000]; // initiate the size of matrix equivalent to size of SNR
//int c,i=0, i1 = 0;
// int n_trials = 1;
// double SNR_step = 0.1;
// randominit(0);
//int test_uncoded= 0;
//short BG=1,Zc,Kb;
// cpu_freq_GHz = get_cpu_freq_GHz();
//printf("the decoder supports BG2, Kb=10, Z=128 & 256\n");
//printf(" range of blocklength: 1201 -> 1280, 2401 -> 2560\n");
// printf("block length %d: \n", block_length);
//printf("n_trials %d: \n", n_trials);
// printf("SNR0 %f: \n", SNR0);
//find minimum value in all sets of lifting size
/* Zc=0;
for (i1=0; i1 < 51; i1++)
{
if (lift_size[i1] >= (double) block_length/Kb)
{
Zc = lift_size[i1];
//printf("%d\n",Zc);
break;
}
}*/
// Allocate LDPC decoder buffers
// p_nrLDPC_procBuf = nrLDPC_init_mem();
// load_nrLDPClib();
// load_nrLDPClib_ref("_orig", &encoder_orig);
// Z=384, R=1/3
nrLDPC_cnProc_BG1_generator
(
384
,
0
);
//nrLDPC_cnProc_BG1(&lut_numCnInCnGroups, &cnProcBuf, 380);
//for (block_length=8;block_length<=MAX_BLOCK_LENGTH;block_length+=8)
//determine number of bits in codeword
/*
char fname[200];
sprintf(fname,"cnProc_BG1_Zc_%d.c",384);
FILE *fd=fopen(fname,"w");
// AssertFatal(fd!=NULL,"cannot open %s\n",fname);
*/
//fclose(fd);
return
(
0
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment