Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangjie
OpenXG-RAN
Commits
265b5c4d
Commit
265b5c4d
authored
May 29, 2020
by
Sy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Test for AVX512
parent
4542322b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
68 additions
and
288 deletions
+68
-288
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_BG1_avx512.c
...der/nrLDPC_tools/generator_avx512/cnProc_gen_BG1_avx512.c
+68
-288
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512
...C_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512
+0
-0
No files found.
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_BG1_avx512.c
View file @
265b5c4d
...
...
@@ -20,7 +20,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
"#include <stdint.h>
\n
"
);
fprintf
(
fd
,
"#include <immintrin.h>
\n
"
);
fprintf
(
fd
,
"#include
\"
../include/avx512fintrin.h
\"\n
"
);
fprintf
(
fd
,
"__m512i _mm512_sign_epi16(__m512i a, __m512i b){
\n
"
);
/* Emulate _mm512_sign_epi16() with instructions that exist in the AVX-512 instruction set */
fprintf
(
fd
,
"b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
\n
"
);
...
...
@@ -69,8 +69,8 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Offsets are in units of bitOffsetInGroup (1*384/64)=6
// Offsets are in units of bitOffsetInGroup (1*384/64)=6
//
const uint8_t lut_idxCnProcG3[3][2] = {{6,12}, {0,12}, {0,6}};
const
uint8_t
lut_idxCnProcG3
[
3
][
2
]
=
{{
12
,
24
},
{
0
,
24
},
{
0
,
12
}};
const
uint8_t
lut_idxCnProcG3
[
3
][
2
]
=
{{
6
,
12
},
{
0
,
12
},
{
0
,
6
}};
fprintf
(
fd
,
" __m512i zmm0, min, sgn,ones,maxLLR;
\n
"
);
fprintf
(
fd
,
" ones = _mm512_set1_epi8((char)1);
\n
"
);
...
...
@@ -80,17 +80,10 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
0
]
*
Z
+
63
)
>>
31
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
0
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
0
]
*
Z
+
31
)
>>
7
;
M
=
(
lut_numCnInCnGroups
[
0
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
0
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
0
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 3
...
...
@@ -111,11 +104,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i+=2) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
31
)
+
lut_idxCnProcG3
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
7
)
+
lut_idxCnProcG3
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -123,11 +112,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// 32 CNs of second BN
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
31
)
+
lut_idxCnProcG3
[
j
][
1
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
7
)
+
lut_idxCnProcG3
[
j
][
1
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
1
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -140,19 +125,11 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
31
)
+
lut_idxCnProcG3
[
j
][
0
]
+
1
);
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
7
)
+
lut_idxCnProcG3
[
j
][
0
]
+
1
);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
0
]
+
1
);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -160,11 +137,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// 32 CNs of second BN
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
31
)
+
lut_idxCnProcG3
[
j
][
1
]
+
1
);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
7
)
+
lut_idxCnProcG3
[
j
][
1
]
+
1
);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
lut_idxCnProcG3
[
j
][
1
]
+
1
);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -177,11 +150,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
31
)
+
(
j
*
bitOffsetInGroup
)
+
1
);
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
7
)
+
(
j
*
bitOffsetInGroup
)
+
1
);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
0
]
>>
5
)
+
(
j
*
bitOffsetInGroup
)
+
1
);
fprintf
(
fd
,
" }
\n
"
);
}
...
...
@@ -191,24 +160,16 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 4 BNs
fprintf
(
fd
,
"//Process group with 4 BNs
\n
"
);
// Offset is 5*384/64 = 30
// const uint8_t lut_idxCnProcG4[4][3] = {{30,60,90}, {0,60,90}, {0,30,90}, {0,30,60}};
const
uint8_t
lut_idxCnProcG4
[
4
][
3
]
=
{{
60
,
120
,
180
},
{
0
,
120
,
180
},
{
0
,
60
,
180
},
{
0
,
60
,
120
}};
const
uint8_t
lut_idxCnProcG4
[
4
][
3
]
=
{{
30
,
60
,
90
},
{
0
,
60
,
90
},
{
0
,
30
,
90
},
{
0
,
30
,
60
}};
if
(
lut_numCnInCnGroups
[
1
]
>
0
)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
1
]
*
Z
+
63
)
>>
31
;
M
=
(
lut_numCnInCnGroups
[
1
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
1
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
1
]
*
Z
+
31
)
>>
7
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
1
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
1
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
...
...
@@ -227,11 +188,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
31
)
+
lut_idxCnProcG4
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
7
)
+
lut_idxCnProcG4
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
5
)
+
lut_idxCnProcG4
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -241,11 +198,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Loop over BNs
for
(
k
=
1
;
k
<
3
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
31
)
+
lut_idxCnProcG4
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
7
)
+
lut_idxCnProcG4
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
5
)
+
lut_idxCnProcG4
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -259,11 +212,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
1
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
@@ -273,28 +222,18 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 5 BNs
fprintf
(
fd
,
"//Process group with 5 BNs
\n
"
);
// Offset is 18*384/64 = 216
//const uint16_t lut_idxCnProcG5[5][4] = {{108,216,324,432}, {0,216,324,432},
// {0,108,324,432}, {0,108,216,432}, {0,108,216,324}};
const
uint16_t
lut_idxCnProcG5
[
5
][
4
]
=
{{
108
,
216
,
324
,
432
},
{
0
,
216
,
324
,
432
},
{
0
,
108
,
324
,
432
},
{
0
,
108
,
216
,
432
},
{
0
,
108
,
216
,
324
}};
const
uint16_t
lut_idxCnProcG5
[
5
][
4
]
=
{{
216
,
432
,
648
,
864
},
{
0
,
432
,
648
,
864
},
{
0
,
216
,
648
,
864
},
{
0
,
216
,
432
,
864
},
{
0
,
216
,
432
,
648
}};
if
(
lut_numCnInCnGroups
[
2
]
>
0
)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
2
]
*
Z
+
63
)
>>
31
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
2
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
2
]
*
Z
+
31
)
>>
7
;
M
=
(
lut_numCnInCnGroups
[
2
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
2
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
2
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
...
...
@@ -314,11 +253,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
31
)
+
lut_idxCnProcG5
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
7
)
+
lut_idxCnProcG5
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
5
)
+
lut_idxCnProcG5
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -328,11 +263,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Loop over BNs
for
(
k
=
1
;
k
<
4
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
31
)
+
lut_idxCnProcG5
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
7
)
+
lut_idxCnProcG5
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
5
)
+
lut_idxCnProcG5
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -346,11 +277,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
2
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
@@ -359,28 +286,19 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 6 BNs
fprintf
(
fd
,
"//Process group with 6 BNs
\n
"
);
// Offset is 8*384/64 = 48
/*
const uint16_t lut_idxCnProcG6[6][5] = {{48,96,144,192,240}, {0,96,144,192,240},
const
uint16_t
lut_idxCnProcG6
[
6
][
5
]
=
{{
48
,
96
,
144
,
192
,
240
},
{
0
,
96
,
144
,
192
,
240
},
{
0
,
48
,
144
,
192
,
240
},
{
0
,
48
,
96
,
192
,
240
},
{0,48,96,144,240}, {0,48,96,144,192}};*/
{
0
,
48
,
96
,
144
,
240
},
{
0
,
48
,
96
,
144
,
192
}};
const
uint16_t
lut_idxCnProcG6
[
6
][
5
]
=
{{
96
,
192
,
288
,
384
,
480
},
{
0
,
192
,
288
,
384
,
480
},
{
0
,
96
,
288
,
384
,
480
},
{
0
,
96
,
192
,
384
,
480
},
{
0
,
96
,
192
,
288
,
480
},
{
0
,
96
,
192
,
288
,
384
}};
if
(
lut_numCnInCnGroups
[
3
]
>
0
)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
3
]
*
Z
+
63
)
>>
31
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
3
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
3
]
*
Z
+
31
)
>>
7
;
M
=
(
lut_numCnInCnGroups
[
3
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
3
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
3
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
...
...
@@ -400,11 +318,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
31
)
+
lut_idxCnProcG6
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
7
)
+
lut_idxCnProcG6
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
5
)
+
lut_idxCnProcG6
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -414,11 +328,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Loop over BNs
for
(
k
=
1
;
k
<
5
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
31
)
+
lut_idxCnProcG6
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
7
)
+
lut_idxCnProcG6
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
5
)
+
lut_idxCnProcG6
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -432,11 +342,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
3
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
@@ -446,32 +352,20 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 7 BNs
fprintf
(
fd
,
"//Process group with 7 BNs
\n
"
);
// Offset is 5*384/64 = 30
/*
const uint16_t lut_idxCnProcG7[7][6] = {{30,60,90,120,150,180}, {0,60,90,120,150,180},
const
uint16_t
lut_idxCnProcG7
[
7
][
6
]
=
{{
30
,
60
,
90
,
120
,
150
,
180
},
{
0
,
60
,
90
,
120
,
150
,
180
},
{
0
,
30
,
90
,
120
,
150
,
180
},
{
0
,
30
,
60
,
120
,
150
,
180
},
{
0
,
30
,
60
,
90
,
150
,
180
},
{
0
,
30
,
60
,
90
,
120
,
180
},
{0,30,60,90,120,150}};
*/
{
0
,
30
,
60
,
90
,
120
,
150
}};
const
uint16_t
lut_idxCnProcG7
[
7
][
6
]
=
{{
60
,
120
,
180
,
240
,
300
,
360
},
{
0
,
120
,
180
,
240
,
300
,
360
},
{
0
,
60
,
180
,
240
,
300
,
360
},
{
0
,
60
,
120
,
240
,
300
,
360
},
{
0
,
60
,
120
,
180
,
300
,
360
},
{
0
,
60
,
120
,
180
,
240
,
360
},
{
0
,
60
,
120
,
180
,
240
,
300
}};
if
(
lut_numCnInCnGroups
[
4
]
>
0
)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
4
]
*
Z
+
63
)
>>
31
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
4
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
4
]
*
Z
+
31
)
>>
7
;
M
=
(
lut_numCnInCnGroups
[
4
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
4
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
4
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
...
...
@@ -491,11 +385,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
31
)
+
lut_idxCnProcG7
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
7
)
+
lut_idxCnProcG7
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
5
)
+
lut_idxCnProcG7
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -505,11 +395,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Loop over BNs
for
(
k
=
1
;
k
<
6
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
31
)
+
lut_idxCnProcG7
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
7
)
+
lut_idxCnProcG7
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
5
)
+
lut_idxCnProcG7
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -523,11 +409,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
4
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
@@ -537,15 +419,10 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 8 BNs
fprintf
(
fd
,
"//Process group with 8 BNs
\n
"
);
// Offset is 2*384/64 = 12
/*
const uint8_t lut_idxCnProcG8[8][7] = {{12,24,36,48,56,72,84}, {0,24,36,48,56,72,84},
const
uint8_t
lut_idxCnProcG8
[
8
][
7
]
=
{{
12
,
24
,
36
,
48
,
56
,
72
,
84
},
{
0
,
24
,
36
,
48
,
56
,
72
,
84
},
{
0
,
12
,
36
,
48
,
56
,
72
,
84
},
{
0
,
12
,
24
,
48
,
56
,
72
,
84
},
{
0
,
12
,
24
,
36
,
56
,
72
,
84
},
{
0
,
12
,
24
,
36
,
48
,
72
,
84
},
{0,12,24,36,48,56,84}, {0,12,24,36,48,120,72}};*/
const
uint8_t
lut_idxCnProcG8
[
8
][
7
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
}};
{
0
,
12
,
24
,
36
,
48
,
56
,
84
},
{
0
,
12
,
24
,
36
,
48
,
120
,
72
}};
...
...
@@ -553,17 +430,10 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
5
]
*
Z
+
63
)
>>
31
;
M
=
(
lut_numCnInCnGroups
[
5
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
5
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
5
]
*
Z
+
31
)
>>
7
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
5
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
5
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
...
...
@@ -583,11 +453,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
31
)
+
lut_idxCnProcG8
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
7
)
+
lut_idxCnProcG8
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
5
)
+
lut_idxCnProcG8
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -597,11 +463,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Loop over BNs
for
(
k
=
1
;
k
<
7
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
31
)
+
lut_idxCnProcG8
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
7
)
+
lut_idxCnProcG8
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
5
)
+
lut_idxCnProcG8
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -615,11 +477,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
5
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
@@ -628,34 +486,23 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 9 BNs
fprintf
(
fd
,
"//Process group with 9 BNs
\n
"
);
// Offset is 2*384/64 = 12
/*
const uint8_t lut_idxCnProcG9[9][8] = {{12,24,36,48,60,72,84,96}, {0,24,36,48,60,72,84,96},
const
uint8_t
lut_idxCnProcG9
[
9
][
8
]
=
{{
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
},
{
0
,
24
,
36
,
48
,
60
,
72
,
84
,
96
},
{
0
,
12
,
36
,
48
,
60
,
72
,
84
,
96
},
{
0
,
12
,
24
,
48
,
60
,
72
,
84
,
96
},
{
0
,
12
,
24
,
36
,
60
,
72
,
84
,
96
},
{
0
,
12
,
24
,
36
,
48
,
72
,
84
,
96
},
{
0
,
12
,
24
,
36
,
48
,
60
,
84
,
96
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
96
},
{0,12,24,36,48,60,72,84}};*/
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
}};
const
uint8_t
lut_idxCnProcG9
[
9
][
8
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
}};
if
(
lut_numCnInCnGroups
[
6
]
>
0
)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
6
]
*
Z
+
63
)
>>
31
;
M
=
(
lut_numCnInCnGroups
[
6
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
6
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
6
]
*
Z
+
31
)
>>
7
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
6
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
6
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 9
...
...
@@ -675,11 +522,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
31
)
+
lut_idxCnProcG9
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
7
)
+
lut_idxCnProcG9
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
5
)
+
lut_idxCnProcG9
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -689,11 +532,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Loop over BNs
for
(
k
=
1
;
k
<
8
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
31
)
+
lut_idxCnProcG9
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
7
)
+
lut_idxCnProcG9
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
5
)
+
lut_idxCnProcG9
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -707,11 +546,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
6
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
@@ -720,17 +555,11 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 10 BNs
fprintf
(
fd
,
"//Process group with 10 BNs
\n
"
);
// Offset is 1*384/64 = 6
/*
const uint8_t lut_idxCnProcG10[10][9] = {{6,12,18,24,30,36,42,48,54}, {0,12,18,24,30,36,42,48,54},
const
uint8_t
lut_idxCnProcG10
[
10
][
9
]
=
{{
6
,
12
,
18
,
24
,
30
,
36
,
42
,
48
,
54
},
{
0
,
12
,
18
,
24
,
30
,
36
,
42
,
48
,
54
},
{
0
,
6
,
18
,
24
,
30
,
36
,
42
,
48
,
54
},
{
0
,
6
,
12
,
24
,
30
,
36
,
42
,
48
,
54
},
{
0
,
6
,
12
,
18
,
30
,
36
,
42
,
48
,
54
},
{
0
,
6
,
12
,
18
,
24
,
36
,
42
,
48
,
54
},
{
0
,
6
,
12
,
18
,
24
,
30
,
42
,
48
,
54
},
{
0
,
6
,
12
,
18
,
24
,
30
,
36
,
48
,
54
},
{0,6,12,18,24,30,36,42,54}, {0,6,12,36,24,30,36,42,48}};*/
const
uint8_t
lut_idxCnProcG10
[
10
][
9
]
=
{{
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
}};
{
0
,
6
,
12
,
18
,
24
,
30
,
36
,
42
,
54
},
{
0
,
6
,
12
,
36
,
24
,
30
,
36
,
42
,
48
}};
...
...
@@ -740,17 +569,10 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
7
]
*
Z
+
63
)
>>
31
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
7
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
7
]
*
Z
+
31
)
>>
7
;
M
=
(
lut_numCnInCnGroups
[
7
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
7
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
7
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 10
...
...
@@ -770,11 +592,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
31
)
+
lut_idxCnProcG10
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
7
)
+
lut_idxCnProcG10
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
5
)
+
lut_idxCnProcG10
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -784,11 +602,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Loop over BNs
for
(
k
=
1
;
k
<
9
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
31
)
+
lut_idxCnProcG10
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
7
)
+
lut_idxCnProcG10
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
5
)
+
lut_idxCnProcG10
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -802,11 +616,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
7
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
@@ -816,7 +626,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
// Process group with 19 BNs
fprintf
(
fd
,
"//Process group with 19 BNs
\n
"
);
// Offset is 4*384/64 = 24
/*
const uint16_t lut_idxCnProcG19[19][18] = {{24,48,72,96,120,144,168,192,216,240,264,288,312,336,360,384,408,432}, {0,48,72,96,120,144,168,192,216,240,264,288,312,336,360,384,408,432},
const
uint16_t
lut_idxCnProcG19
[
19
][
18
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
,
432
},
...
...
@@ -825,35 +635,17 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
312
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
336
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
360
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
384
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
408
,
432
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
432
},
{0,24,48,72,96,120,144,168,192,216,240,264,288,312,336,360,384,408}};*/
const
uint16_t
lut_idxCnProcG19
[
19
][
18
]
=
{{
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
}};
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
,
216
,
240
,
264
,
288
,
312
,
336
,
360
,
384
,
408
}};
if
(
lut_numCnInCnGroups
[
8
]
>
0
)
{
// Number of groups of 64 CNs for parallel processing
// Ceil for values not divisible by 64
<<<<<<<
HEAD
M
=
(
lut_numCnInCnGroups
[
8
]
*
Z
+
63
)
>>
31
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
8
]
*
NR_LDPC_ZMAX
)
>>
31
;
=======
M
=
(
lut_numCnInCnGroups
[
8
]
*
Z
+
31
)
>>
7
;
M
=
(
lut_numCnInCnGroups
[
8
]
*
Z
+
63
)
>>
5
;
// Set the offset to each bit within a group in terms of 64 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
8
]
*
NR_LDPC_ZMAX
)
>>
7
;
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
8
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 19
...
...
@@ -873,11 +665,7 @@ const uint16_t lut_idxCnProcG19[19][18] = {{48,96,144,192,240,288,336,384,432,48
fprintf
(
fd
,
" for (int i=0;i<%d;i++) {
\n
"
,
M
);
// Abs and sign of 64 CNs (first BN)
// zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
31
)
+
lut_idxCnProcG19
[
j
][
0
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
7
)
+
lut_idxCnProcG19
[
j
][
0
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
5
)
+
lut_idxCnProcG19
[
j
][
0
]);
// sgn = _mm512_sign_epi16(ones, zmm0);
fprintf
(
fd
,
" sgn = _mm512_sign_epi16(ones, zmm0);
\n
"
);
// min = _mm512_abs_epi8(zmm0);
...
...
@@ -887,11 +675,7 @@ const uint16_t lut_idxCnProcG19[19][18] = {{48,96,144,192,240,288,336,384,432,48
// Loop over BNs
for
(
k
=
1
;
k
<
18
;
k
++
)
{
<<<<<<<
HEAD
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
31
)
+
lut_idxCnProcG19
[
j
][
k
]);
=======
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
7
)
+
lut_idxCnProcG19
[
j
][
k
]);
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" zmm0 = ((__m512i*)cnProcBuf)[%d+i];
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
5
)
+
lut_idxCnProcG19
[
j
][
k
]);
// min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf
(
fd
,
" min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
\n
"
);
...
...
@@ -905,11 +689,7 @@ const uint16_t lut_idxCnProcG19[19][18] = {{48,96,144,192,240,288,336,384,432,48
fprintf
(
fd
,
" min = _mm512_min_epu8(min, maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++;
<<<<<<<
HEAD
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
31
)
+
(
j
*
bitOffsetInGroup
));
=======
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
7
)
+
(
j
*
bitOffsetInGroup
));
>>>>>>>
6
d9ceaa712033dc4f28050886be43572df7c2b68
fprintf
(
fd
,
" ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);
\n
"
,(
lut_startAddrCnGroups
[
8
]
>>
5
)
+
(
j
*
bitOffsetInGroup
));
fprintf
(
fd
,
" }
\n
"
);
}
}
...
...
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_avx512/cnProc_gen_avx512
View file @
265b5c4d
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment