Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
promise
OpenXG-RAN
Commits
fa3a637b
Commit
fa3a637b
authored
May 04, 2020
by
Raymond Knopp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cleanup of generator, added directory/file creation for output
parent
550ab500
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
434 additions
and
420 deletions
+434
-420
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/cnProc_gen.c
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/cnProc_gen.c
+434
-420
No files found.
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/cnProc_gen.c
View file @
fa3a637b
...
...
@@ -11,636 +11,650 @@
void
nrLDPC_cnProc_BG1_generator
(
t_nrLDPC_procBuf
*
p_procBuf
,
uint16_t
Z
,
int
R
)
{
printf
(
"void nrLDPC_cnProc_BG1_Z%d
\n
"
,
Z
)
;
const
char
*
ratestr
[
3
]
=
{
"13"
,
"23"
,
"89"
}
;
const
uint8_t
*
lut_numCnInCnGroups
;
const
uint32_t
*
lut_startAddrCnGroups
=
lut_startAddrCnGroups_BG1
;
if
(
R
<
0
||
R
>
2
)
{
printf
(
"Illegal R %d
\n
"
,
R
);
abort
();}
if
(
R
==
0
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R13
;
else
if
(
R
==
1
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R23
;
else
if
(
R
==
2
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R89
;
else
{
printf
(
"aborting, illegal R %d
\n
"
,
R
);
abort
();}
int8_t
*
cnProcBuf
=
p_procBuf
->
cnProcBuf
;
int8_t
*
cnProcBufRes
=
p_procBuf
->
cnProcBufRes
;
system
(
"mkdir -p ldpc_gen_files"
);
//__m256i* p_cnProcBuf;
//__m256i* p_cnProcBufRes;
char
fname
[
50
];
sprintf
(
fname
,
"ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s.c"
,
Z
,
ratestr
[
R
]);
FILE
*
fd
=
fopen
(
fname
,
"w"
);
if
(
fd
==
NULL
)
{
printf
(
"Cannot create %s
\n
"
);
abort
();}
// Number of CNs in Groups
uint32_t
M
;
uint32_t
i
;
uint32_t
j
;
uint32_t
k
;
// Offset to each bit within a group in terms of 32 Byte
uint32_t
bitOffsetInGroup
;
fprintf
(
fd
,
"void nrLDPC_cnProc_BG1_Z%d_%s(t_nrLDPC_procBuf* p_procBuf) {
\n
"
,
Z
,
ratestr
[
R
]);
//__m256i ymm0, min, sgn
;
//__m256i* p_cnProcBufResBit
;
const
uint8_t
*
lut_numCnInCnGroups
;
const
uint32_t
*
lut_startAddrCnGroups
=
lut_startAddrCnGroups_BG1
;
// const __m256i* p_ones = (__m256i*) ones256_epi8;
if
(
R
==
0
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R13
;
else
if
(
R
==
1
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R23
;
else
if
(
R
==
2
)
lut_numCnInCnGroups
=
lut_numCnInCnGroups_BG1_R89
;
else
{
printf
(
"aborting, illegal R %d
\n
"
,
R
);
fclose
(
fd
);
abort
();}
int8_t
*
cnProcBuf
=
p_procBuf
->
cnProcBuf
;
int8_t
*
cnProcBufRes
=
p_procBuf
->
cnProcBufRes
;
//__m256i* p_cnProcBuf;
//__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t
M
;
uint32_t
i
;
uint32_t
j
;
uint32_t
k
;
// Offset to each bit within a group in terms of 32 Byte
uint32_t
bitOffsetInGroup
;
//__m256i ymm0, min, sgn;
//__m256i* p_cnProcBufResBit;
// const __m256i* p_ones = (__m256i*) ones256_epi8;
// const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
// const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
// const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
// =====================================================================
// Process group with 3 BNs
// =====================================================================
// Process group with 3 BNs
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
const
uint8_t
lut_idxCnProcG3
[
3
][
2
]
=
{{
12
,
24
},
{
0
,
24
},
{
0
,
12
}};
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
const
uint8_t
lut_idxCnProcG3
[
3
][
2
]
=
{{
12
,
24
},
{
0
,
24
},
{
0
,
12
}};
printf
(
"
__m256i ymm0, min, sgn;
\n
"
);
printf
(
"
const __m256i* p_ones = (__m256i*) ones256_epi8;
\n
"
);
printf
(
"
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
\n
"
);
fprintf
(
fd
,
"
__m256i ymm0, min, sgn;
\n
"
);
fprintf
(
fd
,
"
const __m256i* p_ones = (__m256i*) ones256_epi8;
\n
"
);
fprintf
(
fd
,
"
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
\n
"
);
if
(
lut_numCnInCnGroups
[
0
]
>
0
)
if
(
lut_numCnInCnGroups
[
0
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
0
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
0
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
0
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
0
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 3
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Set pointers to start of group 3
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
3
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
3
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
0
]
+
lut_idxCnProcG3
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
0
]
+
lut_idxCnProcG3
[
j
][
1
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcB ufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
0
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
0
]
+
lut_idxCnProcG3
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
0
]
+
lut_idxCnProcG3
[
j
][
1
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcB ufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
0
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 4 BNs
// =====================================================================
// Process group with 4 BNs
// Offset is 5*384/32 = 60
const
uint8_t
lut_idxCnProcG4
[
4
][
3
]
=
{{
60
,
120
,
180
},
{
0
,
120
,
180
},
{
0
,
60
,
180
},
{
0
,
60
,
120
}};
// Offset is 5*384/32 = 60
const
uint8_t
lut_idxCnProcG4
[
4
][
3
]
=
{{
60
,
120
,
180
},
{
0
,
120
,
180
},
{
0
,
60
,
180
},
{
0
,
60
,
120
}};
if
(
lut_numCnInCnGroups
[
1
]
>
0
)
if
(
lut_numCnInCnGroups
[
1
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
1
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
1
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
1
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
1
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
4
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
4
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
1
]
+
lut_idxCnProcG4
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
1
]
+
lut_idxCnProcG4
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
3
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
3
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
1
]
+
lut_idxCnProcG4
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
1
]
+
lut_idxCnProcG4
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
1
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
1
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 5 BNs
// =====================================================================
// Process group with 5 BNs
// Offset is 18*384/32 = 216
const
uint16_t
lut_idxCnProcG5
[
5
][
4
]
=
{{
216
,
432
,
648
,
864
},
{
0
,
432
,
648
,
864
},
{
0
,
216
,
648
,
864
},
{
0
,
216
,
432
,
864
},
{
0
,
216
,
432
,
648
}};
// Offset is 18*384/32 = 216
const
uint16_t
lut_idxCnProcG5
[
5
][
4
]
=
{{
216
,
432
,
648
,
864
},
{
0
,
432
,
648
,
864
},
{
0
,
216
,
648
,
864
},
{
0
,
216
,
432
,
864
},
{
0
,
216
,
432
,
648
}};
if
(
lut_numCnInCnGroups
[
2
]
>
0
)
if
(
lut_numCnInCnGroups
[
2
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
2
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
2
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
2
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
2
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
5
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
5
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
2
]
+
lut_idxCnProcG5
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
2
]
+
lut_idxCnProcG5
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
4
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
4
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
2
]
+
lut_idxCnProcG5
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
2
]
+
lut_idxCnProcG5
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
2
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
2
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 6 BNs
// =====================================================================
// Process group with 6 BNs
// Offset is 8*384/32 = 96
const
uint16_t
lut_idxCnProcG6
[
6
][
5
]
=
{{
96
,
192
,
288
,
384
,
480
},
{
0
,
192
,
288
,
384
,
480
},
{
0
,
96
,
288
,
384
,
480
},
{
0
,
96
,
192
,
384
,
480
},
{
0
,
96
,
192
,
288
,
480
},
{
0
,
96
,
192
,
288
,
384
}};
// Offset is 8*384/32 = 96
const
uint16_t
lut_idxCnProcG6
[
6
][
5
]
=
{{
96
,
192
,
288
,
384
,
480
},
{
0
,
192
,
288
,
384
,
480
},
{
0
,
96
,
288
,
384
,
480
},
{
0
,
96
,
192
,
384
,
480
},
{
0
,
96
,
192
,
288
,
480
},
{
0
,
96
,
192
,
288
,
384
}};
if
(
lut_numCnInCnGroups
[
3
]
>
0
)
if
(
lut_numCnInCnGroups
[
3
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
3
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
3
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
3
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
3
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
6
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
6
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
3
]
+
lut_idxCnProcG6
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
3
]
+
lut_idxCnProcG6
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
5
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
5
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
3
]
+
lut_idxCnProcG6
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
3
]
+
lut_idxCnProcG6
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
3
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
3
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 7 BNs
// =====================================================================
// Process group with 7 BNs
// Offset is 5*384/32 = 60
const
uint16_t
lut_idxCnProcG7
[
7
][
6
]
=
{{
60
,
120
,
180
,
240
,
300
,
360
},
{
0
,
120
,
180
,
240
,
300
,
360
},
{
0
,
60
,
180
,
240
,
300
,
360
},
{
0
,
60
,
120
,
240
,
300
,
360
},
{
0
,
60
,
120
,
180
,
300
,
360
},
{
0
,
60
,
120
,
180
,
240
,
360
},
{
0
,
60
,
120
,
180
,
240
,
300
}};
// Offset is 5*384/32 = 60
const
uint16_t
lut_idxCnProcG7
[
7
][
6
]
=
{{
60
,
120
,
180
,
240
,
300
,
360
},
{
0
,
120
,
180
,
240
,
300
,
360
},
{
0
,
60
,
180
,
240
,
300
,
360
},
{
0
,
60
,
120
,
240
,
300
,
360
},
{
0
,
60
,
120
,
180
,
300
,
360
},
{
0
,
60
,
120
,
180
,
240
,
360
},
{
0
,
60
,
120
,
180
,
240
,
300
}};
if
(
lut_numCnInCnGroups
[
4
]
>
0
)
if
(
lut_numCnInCnGroups
[
4
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
4
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
4
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
4
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
4
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
7
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
7
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
4
]
+
lut_idxCnProcG7
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
4
]
+
lut_idxCnProcG7
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
6
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
6
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
4
]
+
lut_idxCnProcG7
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
4
]
+
lut_idxCnProcG7
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
4
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
4
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 8 BNs
// =====================================================================
// Process group with 8 BNs
// Offset is 2*384/32 = 24
const
uint8_t
lut_idxCnProcG8
[
8
][
7
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
}};
// Offset is 2*384/32 = 24
const
uint8_t
lut_idxCnProcG8
[
8
][
7
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
}};
if
(
lut_numCnInCnGroups
[
5
]
>
0
)
if
(
lut_numCnInCnGroups
[
5
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
5
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
5
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
5
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
5
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
8
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
8
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
5
]
+
lut_idxCnProcG8
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
5
]
+
lut_idxCnProcG8
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
7
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
7
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
5
]
+
lut_idxCnProcG8
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
5
]
+
lut_idxCnProcG8
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
5
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
5
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 9 BNs
// =====================================================================
// Process group with 9 BNs
// Offset is 2*384/32 = 24
const
uint8_t
lut_idxCnProcG9
[
9
][
8
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
}};
// Offset is 2*384/32 = 24
const
uint8_t
lut_idxCnProcG9
[
9
][
8
]
=
{{
24
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
48
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
72
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
96
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
120
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
144
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
168
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
192
},
{
0
,
24
,
48
,
72
,
96
,
120
,
144
,
168
}};
if
(
lut_numCnInCnGroups
[
6
]
>
0
)
if
(
lut_numCnInCnGroups
[
6
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
6
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
6
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
6
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
6
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 9
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 9
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
9
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
9
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
6
]
+
lut_idxCnProcG9
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
6
]
+
lut_idxCnProcG9
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
8
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
8
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
6
]
+
lut_idxCnProcG9
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
6
]
+
lut_idxCnProcG9
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
6
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
6
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 10 BNs
// =====================================================================
// Process group with 10 BNs
// Offset is 1*384/32 = 12
const
uint8_t
lut_idxCnProcG10
[
10
][
9
]
=
{{
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
}};
// Offset is 1*384/32 = 12
const
uint8_t
lut_idxCnProcG10
[
10
][
9
]
=
{{
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
24
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
36
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
48
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
60
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
72
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
84
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
96
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
108
},
{
0
,
12
,
24
,
36
,
48
,
60
,
72
,
84
,
96
}};
if
(
lut_numCnInCnGroups
[
7
]
>
0
)
if
(
lut_numCnInCnGroups
[
7
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
7
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
7
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
7
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
7
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 10
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 10
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
10
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
10
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
7
]
+
lut_idxCnProcG10
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
7
]
+
lut_idxCnProcG10
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
9
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
9
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
7
]
+
lut_idxCnProcG10
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
7
]
+
lut_idxCnProcG10
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
7
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
7
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
// =====================================================================
// Process group with 19 BNs
// Offset is 4*384/32 = 12
const
uint16_t
lut_idxCnProcG19
[
19
][
18
]
=
{{
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
}};
// =====================================================================
// Process group with 19 BNs
// Offset is 4*384/32 = 12
const
uint16_t
lut_idxCnProcG19
[
19
][
18
]
=
{{
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
528
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
576
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
624
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
672
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
720
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
768
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
816
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
864
},
{
0
,
48
,
96
,
144
,
192
,
240
,
288
,
336
,
384
,
432
,
480
,
528
,
576
,
624
,
672
,
720
,
768
,
816
}};
if
(
lut_numCnInCnGroups
[
8
]
>
0
)
if
(
lut_numCnInCnGroups
[
8
]
>
0
)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
8
]
*
Z
+
31
)
>>
5
;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M
=
(
lut_numCnInCnGroups
[
8
]
*
Z
+
31
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
8
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup
=
(
lut_numCnInCnGroups_BG1_R13
[
8
]
*
NR_LDPC_ZMAX
)
>>
5
;
// Set pointers to start of group 19
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 19
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
19
;
j
++
)
// Loop over every BN
int
iprime
=
0
;
for
(
j
=
0
;
j
<
19
;
j
++
)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
// Loop over CNs
for
(
i
=
0
;
i
<
M
;
i
++
,
iprime
++
)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
8
]
+
lut_idxCnProcG19
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf
(
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
printf
(
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
8
]
+
lut_idxCnProcG19
[
j
][
0
]
+
i
);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(*p_ones, ymm0);
\n
"
);
// min = _mm256_abs_epi8(ymm0);
fprintf
(
fd
,
" min = _mm256_abs_epi8(ymm0);
\n
"
);
// Loop over BNs
for
(
k
=
1
;
k
<
18
;
k
++
)
// Loop over BNs
for
(
k
=
1
;
k
<
18
;
k
++
)
{
printf
(
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
8
]
+
lut_idxCnProcG19
[
j
][
k
]
+
i
);
fprintf
(
fd
,
" ymm0 = ((__m256i*)&cnProcBuf)[%d];
\n
"
,
lut_startAddrCnGroups
[
8
]
+
lut_idxCnProcG19
[
j
][
k
]
+
i
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf
(
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf
(
fd
,
" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf
(
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf
(
fd
,
" sgn = _mm256_sign_epi8(sgn, ymm0);
\n
"
);
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf
(
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf
(
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
8
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf
(
fd
,
" min = _mm256_min_epu8(min, *p_maxLLR);
\n
"
);
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf
(
fd
,
" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);
\n
"
,
lut_numCnInCnGroups
[
8
]
+
(
j
*
bitOffsetInGroup
)
+
i
);
}
}
}
fprintf
(
fd
,
"}
\n
"
);
fclose
(
fd
);
}
//end of the function nrLDPC_cnProc_BG1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment