Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lizhongxiao
OpenXG-RAN
Commits
3c013cf1
Commit
3c013cf1
authored
Jan 15, 2023
by
Hongzhi Wang
Committed by
laurent
Apr 11, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
minor changes to build/run on x86
parent
2c666a8e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
707 additions
and
248 deletions
+707
-248
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
+1
-1
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
...der/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
+706
-247
No files found.
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
View file @
3c013cf1
...
...
@@ -35,7 +35,7 @@
#include "nrLDPC_bnProc.h"
#define UNROLL_CN_PROC 1
#define UNROLL_BN_PROC 1
//
#define UNROLL_BN_PROC_PC 1
#define UNROLL_BN_PROC_PC 1
#define UNROLL_BN2CN_PROC 1
/*----------------------------------------------------------------------
| cn processing files -->AVX512
...
...
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
View file @
3c013cf1
...
...
@@ -80,17 +80,66 @@ void nrLDPC_bnProcPc_BG1_generator_AVX2(const char *dir, int R)
uint32_t
cnOffsetInGroup
;
uint8_t
idxBnGroup
=
0
;
fprintf
(
fd
,
" __m256i ymm0, ymm1, ymmRes0, ymmRes1;
\n
"
);
fprintf
(
fd
,
"
simde__m256
i* p_bnProcBuf;
\n
"
);
fprintf
(
fd
,
"
simde__m256
i* p_llrProcBuf;
\n
"
);
fprintf
(
fd
,
"
simde
__m256i* p_llrRes;
\n
"
);
// fprintf(fd,"
simde
__m256i* p_bnProcBufRes; \n");
// fprintf(fd,"
simde
__m256i* p_llrProcBuf256; \n");
fprintf
(
fd
,
"
__m128
i* p_bnProcBuf;
\n
"
);
fprintf
(
fd
,
"
__m128
i* p_llrProcBuf;
\n
"
);
fprintf
(
fd
,
" __m256i* p_llrRes;
\n
"
);
// fprintf(fd," __m256i* p_bnProcBufRes; \n");
// fprintf(fd," __m256i* p_llrProcBuf256; \n");
fprintf
(
fd
,
" uint32_t M ;
\n
"
);
fprintf
(
fd
,
"// Process group with 1 CNs
\n
"
);
/*
// Process group with 1 CNs
// if (lut_numBnInBnGroups[0] > 0)
// {
// If elements in group move to next address
// idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[0] );
// Set the offset to each CN within a group in terms of 16 Byte
// cnOffsetInGroup = (lut_numBnInBnGroups[0]*NR_LDPC_ZMAX)>>4;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m128i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_llrProcBuf = (__m128i*) &llrProcBuf [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// fprintf(fd," p_llrProcBuf256 = (__m256i*) &llrProcBuf [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (int i=0,j=0;i<M;i++,j+=2) {\n");
fprintf(fd," p_bnProcBufRes[i] = p_llrProcBuf256[i];\n");
// First 16 LLRs of first CN
fprintf(fd," ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
fprintf(fd," ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
fprintf(fd," ymmRes0 = simde_mm256_adds_epi16(ymm0, ymm1);\n");
// Second 16 LLRs of first CN
fprintf(fd," ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j + 1 ]);\n");
fprintf(fd," ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j + 1 ]);\n");
fprintf(fd," ymmRes1 = simde_mm256_adds_epi16(ymm0, ymm1);\n");
// Pack results back to epi8
fprintf(fd," ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf(fd," p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
fprintf(fd,"}\n");
//}
*/
// =====================================================================
// Process group with 2 CNs
...
...
@@ -111,24 +160,39 @@ fprintf(fd, "// Process group with 2 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
1
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0
;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
" for (int i=0
,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
2
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
...
...
@@ -154,27 +218,39 @@ fprintf(fd, "// Process group with 3 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
2
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
3
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -198,24 +274,39 @@ fprintf(fd, "// Process group with 4 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
3
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
4
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -224,7 +315,7 @@ fprintf(fd, "// Process group with 4 CNs \n");
// =====================================================================
// Process group with 5 CNs
fprintf
(
fd
,
"// Process group with 5 CNs
\n
"
);
fprintf
(
fd
,
"// Process group with 5 CNs
\n
"
);
// Process group with 5 CNs
...
...
@@ -240,24 +331,38 @@ fprintf(fd, "// Process group with 4 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
4
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
5
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -283,24 +388,38 @@ fprintf(fd, "// Process group with 6 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
5
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
6
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -325,27 +444,44 @@ fprintf(fd, "// Process group with 7 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
6
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
7
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
//fprintf(fd," (__m256i*) &llrRes[%d + i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
// Process group with 8 CNs
...
...
@@ -365,27 +501,42 @@ fprintf(fd, "// Process group with 8 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
7
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
8
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
//fprintf(fd," (__m256i*) &llrRes[%d + i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
fprintf
(
fd
,
"
}
\n
"
);
fprintf
(
fd
,
"
p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
...
...
@@ -407,27 +558,41 @@ fprintf(fd, "// Process group with 9 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
8
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
9
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
//fprintf(fd," (__m256i*) &llrRes[%d + i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -450,27 +615,40 @@ fprintf(fd, "// Process group with 10 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
9
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
10
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -495,24 +673,38 @@ fprintf(fd, "// Process group with 11 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
10
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0
;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
" for (int i=0
,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
11
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -522,7 +714,7 @@ fprintf(fd, "// Process group with 11 CNs \n");
fprintf
(
fd
,
"// Process group with 12 CNs
\n
"
);
// Process group with
1
2 CNs
// Process group with 2 CNs
if
(
lut_numBnInBnGroups
[
11
]
>
0
)
{
...
...
@@ -536,24 +728,38 @@ fprintf(fd, "// Process group with 12 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
11
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0
;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
" for (int i=0
,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
12
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -564,7 +770,7 @@ fprintf(fd, "// Process group with 12 CNs \n");
fprintf
(
fd
,
"// Process group with 13 CNs
\n
"
);
// Process group with
1
3 CNs
// Process group with 3 CNs
if
(
lut_numBnInBnGroups
[
12
]
>
0
)
{
...
...
@@ -578,24 +784,38 @@ fprintf(fd, "// Process group with 13 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
12
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
13
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -603,7 +823,7 @@ fprintf(fd, "// Process group with 13 CNs \n");
// =====================================================================
// Process group with
1
4 CNs
// Process group with 4 CNs
fprintf
(
fd
,
"// Process group with 14 CNs
\n
"
);
...
...
@@ -621,24 +841,38 @@ fprintf(fd, "// Process group with 14 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
13
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
14
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -663,24 +897,38 @@ fprintf(fd, "// Process group with 15 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
14
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
15
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -706,24 +954,38 @@ fprintf(fd, "// Process group with 16 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
15
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
16
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -748,24 +1010,38 @@ fprintf(fd, "// Process group with 17 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
16
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (
simde__m256
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
simde__m256
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (
simde
__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_bnProcBuf = (
__m128
i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (
__m128
i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
"
for (int i=0;i<M;i++
) {
\n
"
);
fprintf
(
fd
,
"
for (int i=0,j=0;i<M;i++,j+=2
) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
17
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -790,29 +1066,44 @@ fprintf(fd, "// Process group with 18 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
17
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
18
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
// Process group with
1
9 CNs
// Process group with 9 CNs
fprintf
(
fd
,
"// Process group with 19 CNs
\n
"
);
...
...
@@ -830,24 +1121,38 @@ fprintf(fd, "// Process group with 19 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
18
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
19
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -872,24 +1177,38 @@ fprintf(fd, "// Process group with 20 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
19
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
20
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -918,24 +1237,38 @@ fprintf(fd, "// Process group with 21 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
20
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
21
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -959,30 +1292,44 @@ fprintf(fd, "// Process group with 22 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
21
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
22
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
// Process group with
2
3 CNs
// Process group with
1
3 CNs
fprintf
(
fd
,
"// Process group with <23 CNs
\n
"
);
...
...
@@ -1001,24 +1348,38 @@ fprintf(fd, "// Process group with <23 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
22
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
23
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -1026,11 +1387,11 @@ fprintf(fd, "// Process group with <23 CNs \n");
// =====================================================================
// Process group with
2
4 CNs
// Process group with 4 CNs
fprintf
(
fd
,
"// Process group with 24 CNs
\n
"
);
// Process group with
2
4 CNs
// Process group with 4 CNs
if
(
lut_numBnInBnGroups
[
23
]
>
0
)
{
...
...
@@ -1044,24 +1405,38 @@ fprintf(fd, "// Process group with 24 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
23
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
24
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -1086,24 +1461,38 @@ fprintf(fd, "// Process group with 25 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
24
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
25
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
@@ -1129,35 +1518,49 @@ fprintf(fd, "// Process group with 26 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
25
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
26
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
// Process group with
2
7 CNs
// Process group with
1
7 CNs
fprintf
(
fd
,
"// Process group with 27 CNs
\n
"
);
// Process group with
2
7 CNs
// Process group with
1
7 CNs
if
(
lut_numBnInBnGroups
[
26
]
>
0
)
{
...
...
@@ -1171,35 +1574,49 @@ fprintf(fd, "// Process group with 27 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
26
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
27
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
// Process group with
2
8 CNs
// Process group with
1
8 CNs
fprintf
(
fd
,
"// Process group with 28 CNs
\n
"
);
// Process group with
2
8 CNs
// Process group with 8 CNs
if
(
lut_numBnInBnGroups
[
27
]
>
0
)
{
...
...
@@ -1213,34 +1630,48 @@ fprintf(fd, "// Process group with 28 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
27
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
28
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
// Process group with
2
9 CNs
// Process group with 9 CNs
fprintf
(
fd
,
"// Process group with 29 CNs
\n
"
);
// Process group with
2
9 CNs
// Process group with 9 CNs
if
(
lut_numBnInBnGroups
[
28
]
>
0
)
{
...
...
@@ -1254,35 +1685,49 @@ fprintf(fd, "// Process group with 29 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
28
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
29
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
// =====================================================================
// Process group with
3
0 CNs
// Process group with
2
0 CNs
fprintf
(
fd
,
"// Process group with 30 CNs
\n
"
);
// Process group with
3
0 CNs
// Process group with
2
0 CNs
if
(
lut_numBnInBnGroups
[
29
]
>
0
)
{
...
...
@@ -1296,24 +1741,38 @@ fprintf(fd, "// Process group with 30 CNs \n");
cnOffsetInGroup
=
(
lut_numBnInBnGroups
[
29
]
*
NR_LDPC_ZMAX
)
>>
4
;
// Set pointers to start of group 2
fprintf
(
fd
,
" p_bnProcBuf = (simde__m256i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (simde__m256i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (simde__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// First 16 LLRs of first CN
fprintf
(
fd
,
" p_bnProcBuf = (__m128i*) &bnProcBuf [%d];
\n
"
,
lut_startAddrBnGroups
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrProcBuf = (__m128i*) &llrProcBuf [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
fprintf
(
fd
,
" p_llrRes = (__m256i*) &llrRes [%d];
\n
"
,
lut_startAddrBnGroupsLlr
[
idxBnGroup
]);
// Loop over BNs
fprintf
(
fd
,
" for (int i=0;i<M;i++) {
\n
"
);
fprintf
(
fd
,
" p_llrRes[i] = p_bnProcBuf [i];
\n
"
);
fprintf
(
fd
,
" for (int i=0,j=0;i<M;i++,j+=2) {
\n
"
);
// First 16 LLRs of first CN
fprintf
(
fd
,
" ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
\n
"
);
// Loop over CNs
for
(
k
=
1
;
k
<
30
;
k
++
)
{
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);
\n
"
,
k
*
cnOffsetInGroup
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
}
// Add LLR from receiver input
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_adds_epi8(p_llrRes[i], p_llrProcBuf[i]);
\n
"
);
fprintf
(
fd
,
" ymm0 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
\n
"
);
fprintf
(
fd
,
" ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
\n
"
);
fprintf
(
fd
,
" ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
\n
"
);
fprintf
(
fd
,
" ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
\n
"
);
// Pack results back to epi8
fprintf
(
fd
,
" ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
\n
"
);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
fprintf
(
fd
,
" p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
\n
"
);
fprintf
(
fd
,
"}
\n
"
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment