Commit 559e8fdc authored by Sy's avatar Sy

degradation of performance in BLER fixed | bnProc & bnProcPc unrolled |...

degradation of performance in   BLER fixed | bnProc & bnProcPc unrolled | small improvement in times
parent 0f704894
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
......@@ -48,7 +49,6 @@
#include "nrLDPC_tools/ldpc_gen_files/cnProc_avx512/nrLDPC_cnProc_BG2_R13_AVX512.h"
#include "nrLDPC_tools/ldpc_gen_files/cnProc_avx512/nrLDPC_cnProc_BG2_R23_AVX512.h"
#else
/*----------------------------------------------------------------------
......@@ -81,7 +81,18 @@
#include "nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R23_AVX2.h"
//bnProc----------------------------------------------------------------
#ifdef __AVX512BW__
//BG1-------------------------------------------------------------------
#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h"
#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h"
#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h"
//BG2 --------------------------------------------------------------------
#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h"
#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h"
#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h"
#else
#include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R13_AVX2.h"
#include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R23_AVX2.h"
#include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R89_AVX2.h"
......@@ -90,6 +101,7 @@
#include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.h"
#include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.h"
#endif
......@@ -380,17 +392,29 @@ if (BG==1)
{
case 13:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 23:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 89:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R89_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R89_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
}
......@@ -401,20 +425,32 @@ if (BG==1)
{
case 15:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R15_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R15_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 13:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 23:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
......@@ -634,23 +670,36 @@ if (BG==1)
#endif
// nrLDPC_bnProc(p_lut, p_procBuf, Z);
if (BG==1)
{
switch (R)
{
case 13:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 23:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 89:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R89_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R89_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
}
......@@ -661,20 +710,32 @@ if (BG==1)
{
case 15:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R15_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R15_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 13:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 23:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
......@@ -684,7 +745,6 @@ if (BG==1)
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProc);
#endif
......@@ -915,23 +975,36 @@ if (BG==1)
//nrLDPC_bnProc(p_lut, p_procBuf, Z);
if (BG==1)
{
switch (R)
{
case 13:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 23:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 89:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG1_R89_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG1_R89_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
}
......@@ -942,20 +1015,32 @@ if (BG==1)
{
case 15:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R15_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R15_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 13:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
case 23:
{
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
......@@ -964,7 +1049,6 @@ if (BG==1)
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProc);
#endif
......@@ -1052,5 +1136,3 @@ if (BG==1)
C=gcc
CFLAGS=-W -Wall -mavx2
LDFLAGS=
EXEC=bnProc_gen_avx512
SRC= $(wildcard *.c)
OBJ= $(SRC:.c=.o)
all: $(EXEC)
bnProc_gen_avx512: $(OBJ)
@$(CC) -o $@ $^ $(LDFLAGS) -O2
%.o: %.c
@$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g -std=c99
.PHONY: clean mrproper
clean:
@rm -rf *.o
mrproper: clean
@rm -rf $(EXEC)
zip:
@tar -zcvf sauvegarde.tar.gz main.c bnProc_gen_BG1_avx512.c bnProc_gen_BG2_avx512.c Makefile
#include <stdint.h>
#include <immintrin.h>
#include "../../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h"
void nrLDPC_bnProc_BG1_generator_AVX512(int R)
{
const char *ratestr[3]={"13","23","89"};
if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
// system("mkdir -p ../ldpc_gen_files");
char fname[50];
sprintf(fname,"../ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R%s_AVX512.h",ratestr[R]);
FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create \n");abort();}
//fprintf(fd,"#include <stdint.h>\n");
//fprintf(fd,"#include <immintrin.h>\n");
fprintf(fd,"static inline void nrLDPC_bnProc_BG1_R%s_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {\n", ratestr[R]);
const uint8_t* lut_numBnInBnGroups;
const uint32_t* lut_startAddrBnGroups;
const uint16_t* lut_startAddrBnGroupsLlr;
if (R==0) {
lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R13;
lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R13;
lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R13;
}
else if (R==1){
lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R23;
lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R23;
lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R23;
}
else if (R==2) {
lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R89;
lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R89;
lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R89;
}
else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
//uint32_t M;
//uint32_t M32rem;
// uint32_t i;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t cnOffsetInGroup;
uint8_t idxBnGroup = 0;
fprintf(fd," __m512i* p_bnProcBuf; \n");
fprintf(fd," __m512i* p_bnProcBufRes; \n");
fprintf(fd," __m512i* p_llrRes; \n");
fprintf(fd," __m512i* p_res; \n");
fprintf(fd," uint32_t M, i; \n");
// =====================================================================
// Process group with 1 CN
// Already done in bnProcBufPc
// =====================================================================
// =====================================================================
fprintf(fd, "// Process group with 2 CNs \n");
if (lut_numBnInBnGroups[1] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs or parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[1] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<2; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 3 CNs \n");
if (lut_numBnInBnGroups[2] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[2] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
for (k=0; k<3; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 4 CNs \n");
if (lut_numBnInBnGroups[3] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[3] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
for (k=0; k<4; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 5 CNs \n");
if (lut_numBnInBnGroups[4] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[4] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<5; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 6 CNs \n");
// Process group with 6 CNs
if (lut_numBnInBnGroups[5] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[5] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<6; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 7 CNs \n");
// Process group with 7 CNs
if (lut_numBnInBnGroups[6] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[6] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<7; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 8 CNs \n");
// Process group with 8 CNs
if (lut_numBnInBnGroups[7] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[7] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<8; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 9 CNs \n");
// Process group with 9 CNs
if (lut_numBnInBnGroups[8] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[8] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<9; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 10 CNs \n");
// Process group with 10 CNs
if (lut_numBnInBnGroups[9] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[9] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<10; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 11 CNs \n");
if (lut_numBnInBnGroups[10] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[10] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<11; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 12 CNs \n");
if (lut_numBnInBnGroups[11] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[11] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<12; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 13 CNs \n");
if (lut_numBnInBnGroups[12] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[12] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<13; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 14 CNs \n");
if (lut_numBnInBnGroups[13] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[13] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<14; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 15 CNs \n");
if (lut_numBnInBnGroups[14] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[14] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<15; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 16 CNs \n");
if (lut_numBnInBnGroups[15] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[15] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<16; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
// Process group with 17 CNs
fprintf(fd, "// Process group with 17 CNs \n");
// Process group with 17 CNs
if (lut_numBnInBnGroups[16] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[16] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<17; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 18 CNs \n");
// Process group with 8 CNs
if (lut_numBnInBnGroups[17] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[17] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<18; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 19 CNs \n");
if (lut_numBnInBnGroups[18] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[18] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<19; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 20 CNs \n");
if (lut_numBnInBnGroups[19] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[19] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<20; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 21 CNs \n");
if (lut_numBnInBnGroups[20] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[20] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<21; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 22 CNs \n");
if (lut_numBnInBnGroups[21] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[21] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<22; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with <23 CNs \n");
if (lut_numBnInBnGroups[22] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[22] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<23; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 24 CNs \n");
// Process group with 4 CNs
if (lut_numBnInBnGroups[23] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[23] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<24; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 25 CNs \n");
if (lut_numBnInBnGroups[24] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[24] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<25; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 26 CNs \n");
if (lut_numBnInBnGroups[25] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[25] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<26; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 27 CNs \n");
// Process group with 17 CNs
if (lut_numBnInBnGroups[26] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[26] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<27; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 28 CNs \n");
// Process group with 8 CNs
if (lut_numBnInBnGroups[27] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[27] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<28; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 29 CNs \n");
// Process group with 9 CNs
if (lut_numBnInBnGroups[28] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[28] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<29; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 30 CNs \n");
// Process group with 20 CNs
if (lut_numBnInBnGroups[29] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[29] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<30; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
fprintf(fd,"}\n");
fclose(fd);
}//end of the function nrLDPC_bnProc_BG1
#include <stdint.h>
#include <immintrin.h>
#include "../../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h"
void nrLDPC_bnProc_BG2_generator_AVX512(int R)
{
const char *ratestr[3]={"15","13","23"};
if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
// system("mkdir -p ../ldpc_gen_files");
char fname[50];
sprintf(fname,"../ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R%s_AVX512.h",ratestr[R]);
FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create \n");abort();}
fprintf(fd,"#include <stdint.h>\n");
fprintf(fd,"#include <immintrin.h>\n");
fprintf(fd,"void nrLDPC_bnProc_BG2_R%s_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {\n",ratestr[R]);
const uint8_t* lut_numBnInBnGroups;
const uint32_t* lut_startAddrBnGroups;
const uint16_t* lut_startAddrBnGroupsLlr;
if (R==0) {
lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R15;
lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R15;
lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15;
}
else if (R==1){
lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R13;
lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R13;
lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13;
}
else if (R==2) {
lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R23;
lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R23;
lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23;
}
else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
//uint32_t M;
//uint32_t M32rem;
// uint32_t i;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t cnOffsetInGroup;
uint8_t idxBnGroup = 0;
fprintf(fd," __m512i* p_bnProcBuf; \n");
fprintf(fd," __m512i* p_bnProcBufRes; \n");
fprintf(fd," __m512i* p_llrRes; \n");
fprintf(fd," __m512i* p_res; \n");
fprintf(fd," uint32_t M, i; \n");
// =====================================================================
// Process group with 1 CN
// Already done in bnProcBufPc
// =====================================================================
// =====================================================================
fprintf(fd, "// Process group with 2 CNs \n");
if (lut_numBnInBnGroups[1] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs or parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[1] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<2; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 3 CNs \n");
if (lut_numBnInBnGroups[2] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[2] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
for (k=0; k<3; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 4 CNs \n");
if (lut_numBnInBnGroups[3] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[3] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
for (k=0; k<4; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 5 CNs \n");
if (lut_numBnInBnGroups[4] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[4] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<5; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 6 CNs \n");
// Process group with 6 CNs
if (lut_numBnInBnGroups[5] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[5] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<6; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 7 CNs \n");
// Process group with 7 CNs
if (lut_numBnInBnGroups[6] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[6] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<7; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 8 CNs \n");
// Process group with 8 CNs
if (lut_numBnInBnGroups[7] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[7] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<8; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 9 CNs \n");
// Process group with 9 CNs
if (lut_numBnInBnGroups[8] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[8] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<9; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 10 CNs \n");
// Process group with 10 CNs
if (lut_numBnInBnGroups[9] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[9] );
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<10; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 11 CNs \n");
if (lut_numBnInBnGroups[10] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[10] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<11; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 12 CNs \n");
if (lut_numBnInBnGroups[11] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[11] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<12; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 13 CNs \n");
if (lut_numBnInBnGroups[12] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[12] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<13; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 14 CNs \n");
if (lut_numBnInBnGroups[13] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[13] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<14; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 15 CNs \n");
if (lut_numBnInBnGroups[14] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[14] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<15; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 16 CNs \n");
if (lut_numBnInBnGroups[15] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[15] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<16; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
// Process group with 17 CNs
fprintf(fd, "// Process group with 17 CNs \n");
// Process group with 17 CNs
if (lut_numBnInBnGroups[16] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[16] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<17; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 18 CNs \n");
// Process group with 8 CNs
if (lut_numBnInBnGroups[17] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[17] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<18; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 19 CNs \n");
if (lut_numBnInBnGroups[18] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[18] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<19; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 20 CNs \n");
if (lut_numBnInBnGroups[19] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[19] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<20; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 21 CNs \n");
if (lut_numBnInBnGroups[20] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[20] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<21; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 22 CNs \n");
if (lut_numBnInBnGroups[21] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[21] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<22; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with <23 CNs \n");
if (lut_numBnInBnGroups[22] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[22] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<23; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 24 CNs \n");
// Process group with 4 CNs
if (lut_numBnInBnGroups[23] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[23] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<24; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 25 CNs \n");
if (lut_numBnInBnGroups[24] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[24] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<25; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 26 CNs \n");
if (lut_numBnInBnGroups[25] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[25] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<26; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 27 CNs \n");
// Process group with 17 CNs
if (lut_numBnInBnGroups[26] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[26] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<27; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 28 CNs \n");
// Process group with 8 CNs
if (lut_numBnInBnGroups[27] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[27] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<28; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 29 CNs \n");
// Process group with 9 CNs
if (lut_numBnInBnGroups[28] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[28] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<29; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
// =====================================================================
fprintf(fd, "// Process group with 30 CNs \n");
// Process group with 20 CNs
if (lut_numBnInBnGroups[29] > 0)
{
// If elements in group move to next address
idxBnGroup++;
// Number of groups of 32 BNs for parallel processing
fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[29] );;
// Set the offset to each CN within a group in terms of 16 Byte
cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>6;
// Set pointers to start of group 2
fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
// Loop over CNs
for (k=0; k<30; k++)
{
fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup);
fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
// Loop over BNs
fprintf(fd," for (i=0;i<M;i++) {\n");
fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup);
fprintf(fd,"}\n");
}
}
fprintf(fd,"}\n");
fclose(fd);
}//end of the function nrLDPC_bnProc_BG1
#include <stdio.h>
#include <stdint.h>
#define NB_R 3
void nrLDPC_bnProc_BG1_generator_AVX512(int);
void nrLDPC_bnProc_BG2_generator_AVX512(int);
//void nrLDPC_bnProcPc_BG1_generator_AVX2(int);
//void nrLDPC_bnProcPc_BG2_generator_AVX2(int);
int main()
{
int R[NB_R]={0,1,2};
for(int i=0; i<NB_R;i++){
nrLDPC_bnProc_BG1_generator_AVX512(R[i]);
nrLDPC_bnProc_BG2_generator_AVX512(R[i]);
// nrLDPC_bnProcPc_BG1_generator_AVX2(R[i]);
// nrLDPC_bnProcPc_BG2_generator_AVX2(R[i]);
}
return(0);
}
static inline void nrLDPC_bnProcPc_BG2_R89_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) {
__m256i ymm0, ymm1, ymmRes0, ymmRes1;
__m128i* p_bnProcBuf;
__m256i* p_bnProcBufRes;
__m128i* p_llrProcBuf;
__m256i* p_llrProcBuf256;
__m256i* p_llrRes;
uint32_t M ;
// Process group with 1 CNs
M = (0*Z + 31)>>5;
p_bnProcBuf = (__m128i*) &bnProcBuf [0];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [0];
p_llrProcBuf = (__m128i*) &llrProcBuf [0];
p_llrProcBuf256 = (__m256i*) &llrProcBuf [0];
p_llrRes = (__m256i*) &llrRes [0];
for (int i=0,j=0;i<M;i++,j+=2) {
p_bnProcBufRes[i] = p_llrProcBuf256[i];
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
ymmRes0 = _mm256_adds_epi16(ymm0, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j+1]);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
ymmRes1 = _mm256_adds_epi16(ymm0, ymm1);
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
p_llrRes++;
}
// Process group with 2 CNs
M = (3*Z + 31)>>5;
p_bnProcBuf = (__m128i*) &bnProcBuf [1152];
p_llrProcBuf = (__m128i*) &llrProcBuf [1152];
p_llrRes = (__m256i*) &llrRes [1152];
for (int i=0,j=0;i<M;i++,j+=2) {
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
p_llrRes++;
}
// Process group with 3 CNs
M = (5*Z + 31)>>5;
p_bnProcBuf = (__m128i*) &bnProcBuf [3456];
p_llrProcBuf = (__m128i*) &llrProcBuf [2304];
p_llrRes = (__m256i*) &llrRes [2304];
for (int i=0,j=0;i<M;i++,j+=2) {
ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[240 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
p_llrRes++;
}
// Process group with 4 CNs
M = (3*Z + 31)>>5;
p_bnProcBuf = (__m128i*) &bnProcBuf [9216];
p_llrProcBuf = (__m128i*) &llrProcBuf [4224];
p_llrRes = (__m256i*) &llrRes [4224];
for (int i=0,j=0;i<M;i++,j+=2) {
ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[216 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[216 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
p_llrRes++;
}
// Process group with 5 CNs
M = (2*Z + 31)>>5;
p_bnProcBuf = (__m128i*) &bnProcBuf [13824];
p_llrProcBuf = (__m128i*) &llrProcBuf [5376];
p_llrRes = (__m256i*) &llrRes [5376];
for (int i=0,j=0;i<M;i++,j+=2) {
ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[192 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
p_llrRes++;
}
// Process group with 6 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m128i*) &bnProcBuf [17664];
p_llrProcBuf = (__m128i*) &llrProcBuf [6144];
p_llrRes = (__m256i*) &llrRes [6144];
for (int i=0,j=0;i<M;i++,j+=2) {
ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
p_llrRes++;
}
// Process group with 7 CNs
// Process group with 8 CNs
// Process group with 9 CNs
// Process group with 10 CNs
// Process group with 11 CNs
// Process group with 12 CNs
// Process group with 13 CNs
// Process group with 14 CNs
// Process group with 15 CNs
// Process group with 16 CNs
// Process group with 17 CNs
// Process group with 18 CNs
// Process group with 19 CNs
// Process group with 20 CNs
// Process group with 21 CNs
// Process group with 22 CNs
// Process group with 23 CNs
// Process group with 24 CNs
// Process group with 25 CNs
// Process group with 26 CNs
// Process group with 27 CNs
// Process group with 28 CNs
// Process group with 29 CNs
// Process group with 30 CNs
}
#include <stdint.h>
#include <immintrin.h>
void nrLDPC_bnProc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m256i* p_bnProcBuf;
__m256i* p_bnProcBufRes;
__m256i* p_llrRes;
__m256i* p_res;
static inline void nrLDPC_bnProc_BG1_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m512i* p_bnProcBuf;
__m512i* p_bnProcBufRes;
__m512i* p_llrRes;
__m512i* p_res;
uint32_t M, i;
// Process group with 2 CNs
// Process group with 3 CNs
// Process group with 4 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [16128];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [16128];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
// Process group with 5 CNs
M = (2*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [14592];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [14592];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [17664];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [17664];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [14592];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [14592];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [14592];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [14592];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [14592];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
// Process group with 6 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [18432];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [18432];
M = (2*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [19584];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [19584];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [15360];
p_llrRes = (__m512i*) &llrRes [16896];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [15360];
p_llrRes = (__m512i*) &llrRes [16896];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [15360];
p_llrRes = (__m512i*) &llrRes [16896];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [15360];
p_llrRes = (__m512i*) &llrRes [16896];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [15360];
p_llrRes = (__m512i*) &llrRes [16896];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [15360];
p_llrRes = (__m512i*) &llrRes [16896];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
// Process group with 7 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [20736];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [20736];
M = (4*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [24192];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [24192];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [15744];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [15744];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [15744];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [15744];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [15744];
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [15744];
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [15744];
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
}
// Process group with 8 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [23424];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [23424];
M = (3*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [34944];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [34944];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [16128];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [16128];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [16128];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [16128];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [16128];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [16128];
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [16128];
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [16128];
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
// Process group with 9 CNs
M = (2*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [26496];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [26496];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [44160];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [44160];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [16512];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[192];
p_llrRes = (__m256i*) &llrRes [16512];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [20352];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
// Process group with 10 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [33408];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [33408];
M = (4*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [47616];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [47616];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [17280];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[192];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [17280];
p_res = &p_bnProcBufRes[216];
p_llrRes = (__m512i*) &llrRes [20736];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]);
}
// Process group with 11 CNs
// Process group with 12 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [37248];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [37248];
M = (3*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [62976];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [62976];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [17664];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [17664];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [17664];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [17664];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [17664];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [17664];
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [17664];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [17664];
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [17664];
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
}
// Process group with 13 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [41856];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [41856];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [18048];
p_res = &p_bnProcBufRes[162];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [18048];
p_res = &p_bnProcBufRes[180];
p_llrRes = (__m512i*) &llrRes [22272];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [18048];
// Process group with 12 CNs
M = (4*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [75648];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [75648];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [18048];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [18048];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [18048];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [18048];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [18048];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [18048];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
}
// Process group with 14 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [46848];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [46848];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[192];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[216];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[240];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[264];
p_llrRes = (__m512i*) &llrRes [23424];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [18432];
// Process group with 13 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [94080];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [94080];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m256i*) &llrRes [18432];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
// Process group with 15 CNs
// Process group with 16 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [52224];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [52224];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [24960];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [18816];
// Process group with 14 CNs
// Process group with 15 CNs
// Process group with 16 CNs
// Process group with 17 CNs
// Process group with 18 CNs
// Process group with 19 CNs
// Process group with 20 CNs
// Process group with 21 CNs
// Process group with 22 CNs
// Process group with <23 CNs
// Process group with 24 CNs
// Process group with 25 CNs
// Process group with 26 CNs
// Process group with 27 CNs
// Process group with 28 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [99072];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [99072];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[180];
p_llrRes = (__m256i*) &llrRes [18816];
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
// Process group with 17 CNs
// Process group with 18 CNs
// Process group with 19 CNs
// Process group with 20 CNs
// Process group with 21 CNs
// Process group with 22 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [58368];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [58368];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[102];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[114];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[138];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[138 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [19200];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
}
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[150];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]);
}
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
}
p_res = &p_bnProcBufRes[180];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[162];
p_llrRes = (__m512i*) &llrRes [25344];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]);
}
p_res = &p_bnProcBufRes[192];
p_llrRes = (__m256i*) &llrRes [19200];
// Process group with 29 CNs
// Process group with 30 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [109824];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [109824];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[204];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[216];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[228];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[240];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[252];
p_llrRes = (__m256i*) &llrRes [19200];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
// Process group with <23 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [66816];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [66816];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [19584];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [19584];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[102];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[114];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]);
}
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
p_res = &p_bnProcBufRes[180];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
}
p_res = &p_bnProcBufRes[192];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[138];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[138 + i]);
}
p_res = &p_bnProcBufRes[204];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
}
p_res = &p_bnProcBufRes[216];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[150];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]);
}
p_res = &p_bnProcBufRes[228];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
}
p_res = &p_bnProcBufRes[240];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[162];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]);
}
p_res = &p_bnProcBufRes[252];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
}
p_res = &p_bnProcBufRes[264];
p_llrRes = (__m256i*) &llrRes [19584];
p_res = &p_bnProcBufRes[174];
p_llrRes = (__m512i*) &llrRes [25728];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[174 + i]);
}
// Process group with 24 CNs
// Process group with 25 CNs
// Process group with 26 CNs
// Process group with 27 CNs
// Process group with 28 CNs
// Process group with 29 CNs
// Process group with 30 CNs
}
static inline void nrLDPC_bnProc_BG1_R23_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m512i* p_bnProcBuf;
__m512i* p_bnProcBufRes;
__m512i* p_llrRes;
__m512i* p_res;
uint32_t M, i;
// Process group with 2 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [3456];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [3456];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [3456];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [3456];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
// Process group with 3 CNs
M = (5*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [4224];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [4224];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [3840];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [3840];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [3840];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
// Process group with 4 CNs
M = (3*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [9984];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [9984];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [5760];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [5760];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [5760];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [5760];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
// Process group with 5 CNs
M = (7*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [14592];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [14592];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [6912];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [6912];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m512i*) &llrRes [6912];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [6912];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m512i*) &llrRes [6912];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
}
// Process group with 6 CNs
M = (8*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [28032];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [28032];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
}
p_res = &p_bnProcBufRes[192];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
}
p_res = &p_bnProcBufRes[240];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
}
// Process group with 7 CNs
// Process group with 8 CNs
// Process group with 9 CNs
// Process group with 10 CNs
// Process group with 11 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [46464];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [46464];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [12672];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
// Process group with 12 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [50688];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [50688];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [13056];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
// Process group with 13 CNs
// Process group with 14 CNs
// Process group with 15 CNs
// Process group with 16 CNs
// Process group with 17 CNs
// Process group with 18 CNs
// Process group with 19 CNs
// Process group with 20 CNs
// Process group with 21 CNs
// Process group with 22 CNs
// Process group with <23 CNs
// Process group with 24 CNs
// Process group with 25 CNs
// Process group with 26 CNs
// Process group with 27 CNs
// Process group with 28 CNs
// Process group with 29 CNs
// Process group with 30 CNs
}
static inline void nrLDPC_bnProc_BG1_R89_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m512i* p_bnProcBuf;
__m512i* p_bnProcBufRes;
__m512i* p_llrRes;
__m512i* p_res;
uint32_t M, i;
// Process group with 2 CNs
M = (3*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [384];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [384];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [384];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [384];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
// Process group with 3 CNs
M = (21*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [2688];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [2688];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [1536];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [1536];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
p_res = &p_bnProcBufRes[252];
p_llrRes = (__m512i*) &llrRes [1536];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]);
}
// Process group with 4 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [26880];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [26880];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [9600];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
// Process group with 5 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [28416];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [28416];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [9984];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [9984];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [9984];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [9984];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [9984];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
// Process group with 6 CNs
// Process group with 7 CNs
// Process group with 8 CNs
// Process group with 9 CNs
// Process group with 10 CNs
// Process group with 11 CNs
// Process group with 12 CNs
// Process group with 13 CNs
// Process group with 14 CNs
// Process group with 15 CNs
// Process group with 16 CNs
// Process group with 17 CNs
// Process group with 18 CNs
// Process group with 19 CNs
// Process group with 20 CNs
// Process group with 21 CNs
// Process group with 22 CNs
// Process group with <23 CNs
// Process group with 24 CNs
// Process group with 25 CNs
// Process group with 26 CNs
// Process group with 27 CNs
// Process group with 28 CNs
// Process group with 29 CNs
// Process group with 30 CNs
}
#include <stdint.h>
#include <immintrin.h>
void nrLDPC_bnProc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m256i* p_bnProcBuf;
__m256i* p_bnProcBufRes;
__m256i* p_llrRes;
__m256i* p_res;
void nrLDPC_bnProc_BG2_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m512i* p_bnProcBuf;
__m512i* p_bnProcBufRes;
__m512i* p_llrRes;
__m512i* p_res;
uint32_t M, i;
// Process group with 2 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [6912];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [6912];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [6912];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [6912];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [6912];
p_llrRes = (__m512i*) &llrRes [6912];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [6912];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [6912];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
// Process group with 3 CNs
// Process group with 4 CNs
M = (2*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [7680];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [7680];
M = (2*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [7680];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [7680];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [7296];
p_llrRes = (__m512i*) &llrRes [7296];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [7296];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [7296];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [7296];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [7296];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [7296];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [7296];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
// Process group with 5 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [10752];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [10752];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [10752];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [10752];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [8064];
p_llrRes = (__m512i*) &llrRes [8064];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [8064];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [8064];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [8064];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [8064];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [8064];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [8064];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [8064];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [8064];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
// Process group with 6 CNs
M = (5*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [12672];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [12672];
M = (5*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [12672];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [12672];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [8448];
p_llrRes = (__m512i*) &llrRes [8448];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [8448];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [8448];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [8448];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [8448];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[180];
p_llrRes = (__m256i*) &llrRes [8448];
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [8448];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
p_res = &p_bnProcBufRes[240];
p_llrRes = (__m256i*) &llrRes [8448];
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m512i*) &llrRes [8448];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[300];
p_llrRes = (__m256i*) &llrRes [8448];
p_res = &p_bnProcBufRes[150];
p_llrRes = (__m512i*) &llrRes [8448];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[300 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]);
}
// Process group with 7 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [24192];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [24192];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [24192];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [24192];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [10368];
p_llrRes = (__m512i*) &llrRes [10368];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [10368];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [10368];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [10368];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [10368];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [10368];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [10368];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [10368];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [10368];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [10368];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [10368];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [10368];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [10368];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
// Process group with 8 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [26880];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [26880];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [26880];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [26880];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [10752];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [10752];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [10752];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [10752];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [10752];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [10752];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [10752];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [10752];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [10752];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
// Process group with 9 CNs
// Process group with 10 CNs
// Process group with 11 CNs
// Process group with 12 CNs
// Process group with 13 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [29952];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [29952];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [29952];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [29952];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [11136];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [11136];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [11136];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
// Process group with 14 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [34944];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [34944];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [34944];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [34944];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [11520];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m256i*) &llrRes [11520];
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [11520];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
// Process group with 15 CNs
// Process group with 16 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [40320];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [40320];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [40320];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [40320];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [11904];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[144];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[156];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
p_res = &p_bnProcBufRes[168];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[180];
p_llrRes = (__m256i*) &llrRes [11904];
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [11904];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
// Process group with 17 CNs
// Process group with 18 CNs
......
#include <stdint.h>
#include <immintrin.h>
void nrLDPC_bnProc_BG2_R15_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m512i* p_bnProcBuf;
__m512i* p_bnProcBufRes;
__m512i* p_llrRes;
__m512i* p_res;
uint32_t M, i;
// Process group with 2 CNs
// Process group with 3 CNs
// Process group with 4 CNs
// Process group with 5 CNs
M = (2*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [14592];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [14592];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [14592];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [14592];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [14592];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [14592];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [14592];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
// Process group with 6 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [18432];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [18432];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [15360];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [15360];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [15360];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [15360];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [15360];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [15360];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
// Process group with 7 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [20736];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [20736];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [15744];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [15744];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [15744];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [15744];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [15744];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [15744];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [15744];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
// Process group with 8 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [23424];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [23424];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [16128];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
// Process group with 9 CNs
M = (2*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [26496];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [26496];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [16512];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
// Process group with 10 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [33408];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [33408];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [17280];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
// Process group with 11 CNs
// Process group with 12 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [37248];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [37248];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [17664];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
// Process group with 13 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [41856];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [41856];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [18048];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
// Process group with 14 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [46848];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [46848];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [18432];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
// Process group with 15 CNs
// Process group with 16 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [52224];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [52224];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [18816];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
// Process group with 17 CNs
// Process group with 18 CNs
// Process group with 19 CNs
// Process group with 20 CNs
// Process group with 21 CNs
// Process group with 22 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [58368];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [58368];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[102];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
}
p_res = &p_bnProcBufRes[114];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [19200];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
// Process group with <23 CNs
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [66816];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [66816];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[42];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
p_res = &p_bnProcBufRes[66];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
}
p_res = &p_bnProcBufRes[78];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]);
}
p_res = &p_bnProcBufRes[84];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
}
p_res = &p_bnProcBufRes[90];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
}
p_res = &p_bnProcBufRes[102];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
}
p_res = &p_bnProcBufRes[114];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
}
p_res = &p_bnProcBufRes[126];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]);
}
p_res = &p_bnProcBufRes[132];
p_llrRes = (__m512i*) &llrRes [19584];
for (i=0;i<M;i++) {
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
}
// Process group with 24 CNs
// Process group with 25 CNs
// Process group with 26 CNs
// Process group with 27 CNs
// Process group with 28 CNs
// Process group with 29 CNs
// Process group with 30 CNs
}
#include <stdint.h>
#include <immintrin.h>
void nrLDPC_bnProc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m256i* p_bnProcBuf;
__m256i* p_bnProcBufRes;
__m256i* p_llrRes;
__m256i* p_res;
void nrLDPC_bnProc_BG2_R23_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m512i* p_bnProcBuf;
__m512i* p_bnProcBufRes;
__m512i* p_llrRes;
__m512i* p_res;
uint32_t M, i;
// Process group with 2 CNs
M = (3*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [1152];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [1152];
M = (3*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [1152];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [1152];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [1152];
p_llrRes = (__m512i*) &llrRes [1152];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [1152];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [1152];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
// Process group with 3 CNs
M = (5*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [3456];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [3456];
M = (5*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [3456];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [3456];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [2304];
p_llrRes = (__m512i*) &llrRes [2304];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [2304];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [2304];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
p_res = &p_bnProcBufRes[120];
p_llrRes = (__m256i*) &llrRes [2304];
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m512i*) &llrRes [2304];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
}
// Process group with 4 CNs
M = (3*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [9216];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [9216];
M = (3*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [9216];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [9216];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [4224];
p_llrRes = (__m512i*) &llrRes [4224];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [4224];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [4224];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [4224];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [4224];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[108];
p_llrRes = (__m256i*) &llrRes [4224];
p_res = &p_bnProcBufRes[54];
p_llrRes = (__m512i*) &llrRes [4224];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]);
}
// Process group with 5 CNs
M = (2*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [13824];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [13824];
M = (2*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [13824];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [13824];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [5376];
p_llrRes = (__m512i*) &llrRes [5376];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [5376];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [5376];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [5376];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [5376];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[72];
p_llrRes = (__m256i*) &llrRes [5376];
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m512i*) &llrRes [5376];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
}
p_res = &p_bnProcBufRes[96];
p_llrRes = (__m256i*) &llrRes [5376];
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m512i*) &llrRes [5376];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
}
// Process group with 6 CNs
M = (1*Z + 31)>>5;
p_bnProcBuf = (__m256i*) &bnProcBuf [17664];
p_bnProcBufRes = (__m256i*) &bnProcBufRes [17664];
M = (1*Z + 63)>>6;
p_bnProcBuf = (__m512i*) &bnProcBuf [17664];
p_bnProcBufRes = (__m512i*) &bnProcBufRes [17664];
p_res = &p_bnProcBufRes[0];
p_llrRes = (__m256i*) &llrRes [6144];
p_llrRes = (__m512i*) &llrRes [6144];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
}
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m256i*) &llrRes [6144];
p_res = &p_bnProcBufRes[6];
p_llrRes = (__m512i*) &llrRes [6144];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]);
}
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m256i*) &llrRes [6144];
p_res = &p_bnProcBufRes[12];
p_llrRes = (__m512i*) &llrRes [6144];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
}
p_res = &p_bnProcBufRes[36];
p_llrRes = (__m256i*) &llrRes [6144];
p_res = &p_bnProcBufRes[18];
p_llrRes = (__m512i*) &llrRes [6144];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]);
}
p_res = &p_bnProcBufRes[48];
p_llrRes = (__m256i*) &llrRes [6144];
p_res = &p_bnProcBufRes[24];
p_llrRes = (__m512i*) &llrRes [6144];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
}
p_res = &p_bnProcBufRes[60];
p_llrRes = (__m256i*) &llrRes [6144];
p_res = &p_bnProcBufRes[30];
p_llrRes = (__m512i*) &llrRes [6144];
for (i=0;i<M;i++) {
p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]);
}
// Process group with 7 CNs
// Process group with 8 CNs
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
#include <stdint.h>
#include <immintrin.h>
static inline void nrLDPC_cnProc_BG1_R89_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) {
//Process group with 3 BNs
__m256i ymm0, min, sgn,ones,maxLLR;
ones = _mm256_set1_epi8((char)1);
maxLLR = _mm256_set1_epi8((char)127);
uint32_t M;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
M = (1*Z + 31)>>5;
for (int i=0;i<M;i+=2) {
ymm0 = ((__m256i*)cnProcBuf)[12+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[24+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[0+i] = _mm256_sign_epi8(min, sgn);
ymm0 = ((__m256i*)cnProcBuf)[13+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[25+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i+=2) {
ymm0 = ((__m256i*)cnProcBuf)[0+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[24+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[12+i] = _mm256_sign_epi8(min, sgn);
ymm0 = ((__m256i*)cnProcBuf)[1+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[25+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[13+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i+=2) {
ymm0 = ((__m256i*)cnProcBuf)[0+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[12+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[24+i] = _mm256_sign_epi8(min, sgn);
ymm0 = ((__m256i*)cnProcBuf)[1+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[13+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[25+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 4 BNs
//Process group with 5 BNs
//Process group with 6 BNs
//Process group with 7 BNs
//Process group with 8 BNs
//Process group with 9 BNs
//Process group with 10 BNs
//Process group with 19 BNs
M = (4*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2880+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2928+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2976+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3024+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3072+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3120+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3168+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3216+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3264+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3312+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3360+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3408+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3456+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3504+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3552+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3600+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3648+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3744+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3696+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2880+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2928+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2976+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3024+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3072+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3120+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3168+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3216+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3264+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3312+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3360+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3408+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3504+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3552+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3600+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3648+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[3696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[3744+i] = _mm256_sign_epi8(min, sgn);
}
}
#include <stdint.h>
#include <immintrin.h>
static inline void nrLDPC_cnProc_BG2_R13_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) {
//Process group with 3 BNs
__m256i ymm0, min, sgn,ones,maxLLR;
ones = _mm256_set1_epi8((char)1);
maxLLR = _mm256_set1_epi8((char)127);
uint32_t M;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
//Process group with 4 BNs
M = (8*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[456+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[216+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[456+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[696+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[936+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 5 BNs
M = (7*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1176+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1284+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1392+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1500+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1608+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 6 BNs
M = (3*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1716+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1752+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1788+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1824+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1860+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1896+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 8 BNs
M = (2*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1956+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1980+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2004+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2028+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2076+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2100+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 10 BNs
M = (2*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2124+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2148+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2196+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2220+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2244+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2268+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2316+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2340+i] = _mm256_sign_epi8(min, sgn);
}
}
#include <stdint.h>
#include <immintrin.h>
static inline void nrLDPC_cnProc_BG2_R15_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) {
//Process group with 3 BNs
__m256i ymm0, min, sgn,ones,maxLLR;
ones = _mm256_set1_epi8((char)1);
maxLLR = _mm256_set1_epi8((char)127);
uint32_t M;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
M = (6*Z + 31)>>5;
for (int i=0;i<M;i+=2) {
ymm0 = ((__m256i*)cnProcBuf)[72+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[144+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[0+i] = _mm256_sign_epi8(min, sgn);
ymm0 = ((__m256i*)cnProcBuf)[73+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[145+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i+=2) {
ymm0 = ((__m256i*)cnProcBuf)[0+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[144+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[72+i] = _mm256_sign_epi8(min, sgn);
ymm0 = ((__m256i*)cnProcBuf)[1+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[145+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[73+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i+=2) {
ymm0 = ((__m256i*)cnProcBuf)[0+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[72+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[144+i] = _mm256_sign_epi8(min, sgn);
ymm0 = ((__m256i*)cnProcBuf)[1+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[73+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[145+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 4 BNs
M = (20*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[456+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[216+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[456+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[696+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[936+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 5 BNs
M = (9*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1176+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1284+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1392+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1608+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1500+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1176+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1284+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1392+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1500+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1608+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 6 BNs
M = (3*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1716+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1752+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1788+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1824+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1860+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1896+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 8 BNs
M = (2*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1956+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1980+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2004+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2028+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2076+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2100+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 10 BNs
M = (2*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2124+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2148+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2196+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2220+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2244+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2268+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2316+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2340+i] = _mm256_sign_epi8(min, sgn);
}
}
#include <stdint.h>
#include <immintrin.h>
static inline void nrLDPC_cnProc_BG2_R23_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) {
//Process group with 3 BNs
__m256i ymm0, min, sgn,ones,maxLLR;
ones = _mm256_set1_epi8((char)1);
maxLLR = _mm256_set1_epi8((char)127);
uint32_t M;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
//Process group with 4 BNs
M = (1*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[456+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[216+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[456+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[936+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[696+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[216+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[456+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[696+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[936+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 5 BNs
//Process group with 6 BNs
M = (2*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1716+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1752+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1788+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1824+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1896+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1860+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1716+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1752+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1788+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1824+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1860+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1896+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 8 BNs
M = (2*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1956+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[1980+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2004+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2028+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2100+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2076+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[1932+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1956+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[1980+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2004+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2028+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2052+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2076+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2100+i] = _mm256_sign_epi8(min, sgn);
}
//Process group with 10 BNs
M = (2*Z + 31)>>5;
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2124+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2148+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2196+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2220+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2244+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2268+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2340+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2316+i] = _mm256_sign_epi8(min, sgn);
}
for (int i=0;i<M;i++) {
ymm0 = ((__m256i*)cnProcBuf)[2124+i];
sgn = _mm256_sign_epi8(ones, ymm0);
min = _mm256_abs_epi8(ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2148+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2172+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2196+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2220+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2244+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2268+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2292+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
ymm0 = ((__m256i*)cnProcBuf)[2316+i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
min = _mm256_min_epu8(min, maxLLR);
((__m256i*)cnProcBufRes)[2340+i] = _mm256_sign_epi8(min, sgn);
}
}
#include <omp.h>
#pragma omp declare simd notinbranch
static inline void nrLDPC_llr2CnProcBuf_BG1_AVX2(t_nrLDPC_lut* p_lut, int8_t* llr, int8_t* cnProcBuf, uint16_t Z) {
const uint16_t (*lut_circShift_CNG3) [lut_numCnInCnGroups_BG1_R13[0]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[0]]) p_lut->circShift[0];
const uint16_t (*lut_circShift_CNG4) [lut_numCnInCnGroups_BG1_R13[1]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[1]]) p_lut->circShift[1];
const uint16_t (*lut_circShift_CNG5) [lut_numCnInCnGroups_BG1_R13[2]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[2]]) p_lut->circShift[2];
const uint16_t (*lut_circShift_CNG6) [lut_numCnInCnGroups_BG1_R13[3]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[3]]) p_lut->circShift[3];
const uint16_t (*lut_circShift_CNG7) [lut_numCnInCnGroups_BG1_R13[4]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[4]]) p_lut->circShift[4];
const uint16_t (*lut_circShift_CNG8) [lut_numCnInCnGroups_BG1_R13[5]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[5]]) p_lut->circShift[5];
const uint16_t (*lut_circShift_CNG9) [lut_numCnInCnGroups_BG1_R13[6]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[6]]) p_lut->circShift[6];
const uint16_t (*lut_circShift_CNG10)[lut_numCnInCnGroups_BG1_R13[7]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[7]]) p_lut->circShift[7];
const uint16_t (*lut_circShift_CNG19)[lut_numCnInCnGroups_BG1_R13[8]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[8]]) p_lut->circShift[8];
const uint8_t (*lut_posBnInCnProcBuf_CNG3) [lut_numCnInCnGroups_BG1_R13[0]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[0]]) p_lut->posBnInCnProcBuf[0];
const uint8_t (*lut_posBnInCnProcBuf_CNG4) [lut_numCnInCnGroups_BG1_R13[1]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[1]]) p_lut->posBnInCnProcBuf[1];
const uint8_t (*lut_posBnInCnProcBuf_CNG5) [lut_numCnInCnGroups_BG1_R13[2]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[2]]) p_lut->posBnInCnProcBuf[2];
const uint8_t (*lut_posBnInCnProcBuf_CNG6) [lut_numCnInCnGroups_BG1_R13[3]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[3]]) p_lut->posBnInCnProcBuf[3];
const uint8_t (*lut_posBnInCnProcBuf_CNG7) [lut_numCnInCnGroups_BG1_R13[4]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[4]]) p_lut->posBnInCnProcBuf[4];
const uint8_t (*lut_posBnInCnProcBuf_CNG8) [lut_numCnInCnGroups_BG1_R13[5]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[5]]) p_lut->posBnInCnProcBuf[5];
const uint8_t (*lut_posBnInCnProcBuf_CNG9) [lut_numCnInCnGroups_BG1_R13[6]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[6]]) p_lut->posBnInCnProcBuf[6];
const uint8_t (*lut_posBnInCnProcBuf_CNG10)[lut_numCnInCnGroups_BG1_R13[7]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[7]]) p_lut->posBnInCnProcBuf[7];
const uint8_t (*lut_posBnInCnProcBuf_CNG19)[lut_numCnInCnGroups_BG1_R13[8]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[8]]) p_lut->posBnInCnProcBuf[8];
int8_t* p_cnProcBuf;
uint32_t idxBn = 0;
p_cnProcBuf= &cnProcBuf[0];
idxBn = lut_posBnInCnProcBuf_CNG3[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[0][0]);
p_cnProcBuf= &cnProcBuf[384];
idxBn = lut_posBnInCnProcBuf_CNG3[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[1][0]);
p_cnProcBuf= &cnProcBuf[768];
idxBn = lut_posBnInCnProcBuf_CNG3[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[2][0]);
p_cnProcBuf= &cnProcBuf[1152];
idxBn = lut_posBnInCnProcBuf_CNG4[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[0][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[0][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[0][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[0][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[3072];
idxBn = lut_posBnInCnProcBuf_CNG4[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[1][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[1][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[1][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[1][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[4992];
idxBn = lut_posBnInCnProcBuf_CNG4[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[2][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[2][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[2][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[2][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[6912];
idxBn = lut_posBnInCnProcBuf_CNG4[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[3][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[3][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[3][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG4[3][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[8832];
idxBn = lut_posBnInCnProcBuf_CNG5[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][7]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][8]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][8]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][9]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][9]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][10]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][10]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][11]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][11]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][12]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][12]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][13]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][13]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][14]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][14]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][15]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][15]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][16]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][16]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[0][17]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][17]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[15744];
idxBn = lut_posBnInCnProcBuf_CNG5[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][7]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][8]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][8]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][9]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][9]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][10]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][10]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][11]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][11]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][12]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][12]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][13]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][13]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][14]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][14]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][15]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][15]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][16]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][16]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[1][17]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][17]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[22656];
idxBn = lut_posBnInCnProcBuf_CNG5[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][7]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][8]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][8]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][9]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][9]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][10]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][10]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][11]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][11]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][12]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][12]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][13]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][13]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][14]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][14]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][15]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][15]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][16]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][16]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[2][17]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][17]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[29568];
idxBn = lut_posBnInCnProcBuf_CNG5[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][7]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][8]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][8]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][9]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][9]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][10]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][10]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][11]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][11]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][12]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][12]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][13]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][13]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][14]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][14]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][15]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][15]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][16]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][16]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[3][17]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][17]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[36480];
idxBn = lut_posBnInCnProcBuf_CNG5[4][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][7]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][8]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][8]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][9]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][9]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][10]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][10]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][11]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][11]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][12]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][12]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][13]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][13]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][14]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][14]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][15]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][15]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][16]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][16]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG5[4][17]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][17]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[43392];
idxBn = lut_posBnInCnProcBuf_CNG6[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[0][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[0][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[0][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[0][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[0][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[0][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[0][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][7]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[46464];
idxBn = lut_posBnInCnProcBuf_CNG6[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[1][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[1][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[1][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[1][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[1][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[1][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[1][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][7]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[49536];
idxBn = lut_posBnInCnProcBuf_CNG6[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[2][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[2][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[2][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[2][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[2][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[2][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[2][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][7]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[52608];
idxBn = lut_posBnInCnProcBuf_CNG6[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[3][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[3][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[3][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[3][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[3][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[3][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[3][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][7]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[55680];
idxBn = lut_posBnInCnProcBuf_CNG6[4][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[4][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[4][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[4][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[4][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[4][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[4][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[4][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][7]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[58752];
idxBn = lut_posBnInCnProcBuf_CNG6[5][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[5][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[5][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[5][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[5][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][4]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[5][5]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][5]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[5][6]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][6]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG6[5][7]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][7]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[61824];
idxBn = lut_posBnInCnProcBuf_CNG7[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[0][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[0][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[0][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[0][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[63744];
idxBn = lut_posBnInCnProcBuf_CNG7[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[1][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[1][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[1][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[1][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[65664];
idxBn = lut_posBnInCnProcBuf_CNG7[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[2][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[2][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[2][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[2][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[67584];
idxBn = lut_posBnInCnProcBuf_CNG7[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[3][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[3][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[3][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[3][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[69504];
idxBn = lut_posBnInCnProcBuf_CNG7[4][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[4][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[4][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[4][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[4][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[71424];
idxBn = lut_posBnInCnProcBuf_CNG7[5][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[5][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[5][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[5][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[5][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[73344];
idxBn = lut_posBnInCnProcBuf_CNG7[6][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[6][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[6][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[6][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][3]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG7[6][4]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][4]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[75264];
idxBn = lut_posBnInCnProcBuf_CNG8[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[0][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[0][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[0][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[76032];
idxBn = lut_posBnInCnProcBuf_CNG8[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[1][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[1][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[1][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[76800];
idxBn = lut_posBnInCnProcBuf_CNG8[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[2][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[2][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[2][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[77568];
idxBn = lut_posBnInCnProcBuf_CNG8[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[3][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[3][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[3][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[78336];
idxBn = lut_posBnInCnProcBuf_CNG8[4][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[4][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[4][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[4][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[79104];
idxBn = lut_posBnInCnProcBuf_CNG8[5][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[5][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[5][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[5][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[79872];
idxBn = lut_posBnInCnProcBuf_CNG8[6][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[6][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[6][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[6][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[80640];
idxBn = lut_posBnInCnProcBuf_CNG8[7][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[7][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG8[7][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[7][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[81408];
idxBn = lut_posBnInCnProcBuf_CNG9[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[0][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[0][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[0][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[82176];
idxBn = lut_posBnInCnProcBuf_CNG9[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[1][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[1][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[1][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[82944];
idxBn = lut_posBnInCnProcBuf_CNG9[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[2][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[2][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[2][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[83712];
idxBn = lut_posBnInCnProcBuf_CNG9[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[3][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[3][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[3][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[84480];
idxBn = lut_posBnInCnProcBuf_CNG9[4][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[4][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[4][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[4][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[85248];
idxBn = lut_posBnInCnProcBuf_CNG9[5][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[5][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[5][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[5][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[86016];
idxBn = lut_posBnInCnProcBuf_CNG9[6][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[6][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[6][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[6][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[86784];
idxBn = lut_posBnInCnProcBuf_CNG9[7][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[7][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[7][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[7][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[87552];
idxBn = lut_posBnInCnProcBuf_CNG9[8][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[8][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG9[8][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[8][1]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[88320];
idxBn = lut_posBnInCnProcBuf_CNG10[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[0][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[88704];
idxBn = lut_posBnInCnProcBuf_CNG10[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[1][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[89088];
idxBn = lut_posBnInCnProcBuf_CNG10[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[2][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[89472];
idxBn = lut_posBnInCnProcBuf_CNG10[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[3][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[89856];
idxBn = lut_posBnInCnProcBuf_CNG10[4][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[4][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[90240];
idxBn = lut_posBnInCnProcBuf_CNG10[5][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[5][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[90624];
idxBn = lut_posBnInCnProcBuf_CNG10[6][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[6][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[91008];
idxBn = lut_posBnInCnProcBuf_CNG10[7][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[7][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[91392];
idxBn = lut_posBnInCnProcBuf_CNG10[8][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[8][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[91776];
idxBn = lut_posBnInCnProcBuf_CNG10[9][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[9][0]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[92160];
idxBn = lut_posBnInCnProcBuf_CNG19[0][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[0][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[0][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[0][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[93696];
idxBn = lut_posBnInCnProcBuf_CNG19[1][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[1][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[1][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[1][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[95232];
idxBn = lut_posBnInCnProcBuf_CNG19[2][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[2][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[2][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[2][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[96768];
idxBn = lut_posBnInCnProcBuf_CNG19[3][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[3][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[3][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[3][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[98304];
idxBn = lut_posBnInCnProcBuf_CNG19[4][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[4][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[4][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[4][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[99840];
idxBn = lut_posBnInCnProcBuf_CNG19[5][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[5][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[5][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[5][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[101376];
idxBn = lut_posBnInCnProcBuf_CNG19[6][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[6][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[6][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[6][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[102912];
idxBn = lut_posBnInCnProcBuf_CNG19[7][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[7][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[7][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[7][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[104448];
idxBn = lut_posBnInCnProcBuf_CNG19[8][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[8][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[8][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[8][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[105984];
idxBn = lut_posBnInCnProcBuf_CNG19[9][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[9][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[9][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[9][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[107520];
idxBn = lut_posBnInCnProcBuf_CNG19[10][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[10][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[10][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[10][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[109056];
idxBn = lut_posBnInCnProcBuf_CNG19[11][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[11][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[11][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[11][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[110592];
idxBn = lut_posBnInCnProcBuf_CNG19[12][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[12][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[12][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[12][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[112128];
idxBn = lut_posBnInCnProcBuf_CNG19[13][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[13][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[13][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[13][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[113664];
idxBn = lut_posBnInCnProcBuf_CNG19[14][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[14][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[14][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[14][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[115200];
idxBn = lut_posBnInCnProcBuf_CNG19[15][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[15][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[15][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[15][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[116736];
idxBn = lut_posBnInCnProcBuf_CNG19[16][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[16][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[16][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[16][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[118272];
idxBn = lut_posBnInCnProcBuf_CNG19[17][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[17][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[17][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[17][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][3]);
p_cnProcBuf += Z;
p_cnProcBuf= &cnProcBuf[119808];
idxBn = lut_posBnInCnProcBuf_CNG19[18][0]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][0]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[18][1]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][1]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[18][2]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][2]);
p_cnProcBuf += Z;
idxBn = lut_posBnInCnProcBuf_CNG19[18][3]*Z;
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][3]);
p_cnProcBuf += Z;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment