Commit 3ed622af authored by Sy's avatar Sy

use of avx512 at CN processing level

parent a7e61374
...@@ -2745,8 +2745,8 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR) ...@@ -2745,8 +2745,8 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
uint32_t M = numLLR>>5; uint32_t M = numLLR>>5;
uint32_t Mr = numLLR&31; uint32_t Mr = numLLR&31;
const __m256i* p_zeros = (__m256i*) zeros256_epi8; const __m256i* p_zeros = (__m256i*) zeros512_epi8;
const __m256i* p_ones = (__m256i*) ones256_epi8; const __m256i* p_ones = (__m256i*) ones512_epi8;
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
......
...@@ -34,15 +34,15 @@ ...@@ -34,15 +34,15 @@
#include <stdlib.h> #include <stdlib.h>
#include "nrLDPC_types.h" #include "nrLDPC_types.h"
#ifndef malloc32_clear #ifndef malloc64_clear
/** /**
\brief Allocates 32 byte aligned memory and initializes to zero \brief Allocates 64 byte aligned memory and initializes to zero
\param size Input size in bytes \param size Input size in bytes
\return Pointer to memory \return Pointer to memory
*/ */
static inline void* malloc32_clear(size_t size) static inline void* malloc64_clear(size_t size)
{ {
void* ptr = (void*) memalign(32, size+32); void* ptr = (void*) memalign(64, size+64);
memset(ptr, 0, size); memset(ptr, 0, size);
return ptr; return ptr;
} }
...@@ -56,16 +56,16 @@ static inline void* malloc32_clear(size_t size) ...@@ -56,16 +56,16 @@ static inline void* malloc32_clear(size_t size)
*/ */
static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void) static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void)
{ {
t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc32_clear(sizeof(t_nrLDPC_procBuf)); t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc64_clear(sizeof(t_nrLDPC_procBuf));
if (p_procBuf) if (p_procBuf)
{ {
p_procBuf->cnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t)); p_procBuf->cnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->cnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t)); p_procBuf->cnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t)); p_procBuf->bnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t)); p_procBuf->bnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->llrRes = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t)); p_procBuf->llrRes = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
p_procBuf->llrProcBuf = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t)); p_procBuf->llrProcBuf = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
} }
return(p_procBuf); return(p_procBuf);
......
#include <stdint.h> #include <stdint.h>
#include <immintrin.h> #include <immintrin.h>
#include "../nrLDPCdecoder_defs.h" #include "../nrLDPCdecoder_defs.h"
#include "../nrLDPC_types.h" #include "../nrLDPC_types.h"
//#include "../nrLDPC_init.h"
//#include "../nrLDPC_mPass.h"
//#include "nrLDPC_cnProc.h"
#include "../nrLDPC_bnProc.h" #include "../nrLDPC_bnProc.h"
#include "cnProc_gen.h" #include "cnProc_gen.h"
...@@ -24,6 +21,16 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -24,6 +21,16 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
fprintf(fd,"#include <stdint.h>\n"); fprintf(fd,"#include <stdint.h>\n");
fprintf(fd,"#include <immintrin.h>\n"); fprintf(fd,"#include <immintrin.h>\n");
fprintf(fd, "__m512i _mm512_sign_epi16(__m512i a, __m512i b){ \n"); /* Emulate _mm512_sign_epi16() with instructions that exist in the AVX-512 instruction set */
fprintf(fd, "b = _mm512_min_epi16(b, _mm512_set1_epi16(1)); \n" );
fprintf(fd, "b = _mm512_max_epi16(b, _mm512_set1_epi16(-1)); \n" );
fprintf(fd, " a = _mm512_mullo_epi16(a, b);\n");
fprintf(fd, "return a;\n");
fprintf(fd, "}\n" );
fprintf(fd,"void nrLDPC_cnProc_BG1_Z%d_%s(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]); fprintf(fd,"void nrLDPC_cnProc_BG1_Z%d_%s(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]);
const uint8_t* lut_numCnInCnGroups; const uint8_t* lut_numCnInCnGroups;
...@@ -34,25 +41,25 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -34,25 +41,25 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
else if (R==2) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R89; else if (R==2) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R89;
else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();} else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
//__m256i* p_cnProcBuf; //__m512i* p_cnProcBuf;
//__m256i* p_cnProcBufRes; //__m512i* p_cnProcBufRes;
// Number of CNs in Groups // Number of CNs in Groups
uint32_t M; uint32_t M;
uint32_t j; uint32_t j;
uint32_t k; uint32_t k;
// Offset to each bit within a group in terms of 32 Byte // Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup; uint32_t bitOffsetInGroup;
//__m256i ymm0, min, sgn; //__m512i zmm0, min, sgn;
//__m256i* p_cnProcBufResBit; //__m512i* p_cnProcBufResBit;
// const __m256i* p_ones = (__m256i*) ones256_epi8; // const __m512i* p_ones = (__m512i*) ones256_epi8;
// const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8; // const __m512i* p_maxLLR = (__m512i*) maxLLR256_epi8;
// LUT with offsets for bits that need to be processed // LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc. // 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32) // Offsets are in units of bitOffsetInGroup (1*384/64)
// const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}}; // const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
// ===================================================================== // =====================================================================
...@@ -60,26 +67,29 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -60,26 +67,29 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
fprintf(fd,"//Process group with 3 BNs\n"); fprintf(fd,"//Process group with 3 BNs\n");
// LUT with offsets for bits that need to be processed // LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc. // 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32) // Offsets are in units of bitOffsetInGroup (1*384/64)=6
const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}}; // Offsets are in units of bitOffsetInGroup (1*384/64)=6
const uint8_t lut_idxCnProcG3[3][2] = {{6,12}, {0,12}, {0,6}};
fprintf(fd," __m256i ymm0, min, sgn,ones,maxLLR;\n");
fprintf(fd," ones = _mm256_set1_epi8((char)1);\n"); fprintf(fd," __m512i zmm0, min, sgn,ones,maxLLR;\n");
fprintf(fd," maxLLR = _mm256_set1_epi8((char)127);\n"); fprintf(fd," ones = _mm512_set1_epi8((char)1);\n");
fprintf(fd," maxLLR = _mm512_set1_epi8((char)127);\n");
if (lut_numCnInCnGroups[0] > 0) if (lut_numCnInCnGroups[0] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[0]*Z + 31)>>5; M = (lut_numCnInCnGroups[0]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 3 // Set pointers to start of group 3
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[0]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -93,55 +103,55 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -93,55 +103,55 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// { // {
fprintf(fd," for (int i=0;i<%d;i+=2) {\n",M); fprintf(fd," for (int i=0;i<%d;i+=2) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// 32 CNs of second BN // 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][1]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][1]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup));
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]+1); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]+1);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// 32 CNs of second BN // 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][1]+1); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][1]+1);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup)+1); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup)+1);
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
...@@ -150,22 +160,22 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -150,22 +160,22 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 4 BNs // Process group with 4 BNs
fprintf(fd,"//Process group with 4 BNs\n"); fprintf(fd,"//Process group with 4 BNs\n");
// Offset is 5*384/32 = 60 // Offset is 5*384/64 = 30
const uint8_t lut_idxCnProcG4[4][3] = {{60,120,180}, {0,120,180}, {0,60,180}, {0,60,120}}; const uint8_t lut_idxCnProcG4[4][3] = {{30,60,90}, {0,60,90}, {0,30,90}, {0,30,60}};
if (lut_numCnInCnGroups[1] > 0) if (lut_numCnInCnGroups[1] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[1]*Z + 31)>>5; M = (lut_numCnInCnGroups[1]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4 // Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -178,33 +188,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -178,33 +188,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<3; k++) for (k=1; k<3; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[1]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[1]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
...@@ -213,24 +223,24 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -213,24 +223,24 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 5 BNs // Process group with 5 BNs
fprintf(fd,"//Process group with 5 BNs\n"); fprintf(fd,"//Process group with 5 BNs\n");
// Offset is 18*384/32 = 216 // Offset is 18*384/64 = 216
const uint16_t lut_idxCnProcG5[5][4] = {{216,432,648,864}, {0,432,648,864}, const uint16_t lut_idxCnProcG5[5][4] = {{108,216,324,432}, {0,216,324,432},
{0,216,648,864}, {0,216,432,864}, {0,216,432,648}}; {0,108,324,432}, {0,108,216,432}, {0,108,216,324}};
if (lut_numCnInCnGroups[2] > 0) if (lut_numCnInCnGroups[2] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[2]*Z + 31)>>5; M = (lut_numCnInCnGroups[2]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4 // Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -243,33 +253,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -243,33 +253,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<4; k++) for (k=1; k<4; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[2]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[2]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
...@@ -277,25 +287,25 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -277,25 +287,25 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 6 BNs // Process group with 6 BNs
fprintf(fd,"//Process group with 6 BNs\n"); fprintf(fd,"//Process group with 6 BNs\n");
// Offset is 8*384/32 = 96 // Offset is 8*384/64 = 48
const uint16_t lut_idxCnProcG6[6][5] = {{96,192,288,384,480}, {0,192,288,384,480}, const uint16_t lut_idxCnProcG6[6][5] = {{48,96,144,192,240}, {0,96,144,192,240},
{0,96,288,384,480}, {0,96,192,384,480}, {0,48,144,192,240}, {0,48,96,192,240},
{0,96,192,288,480}, {0,96,192,288,384}}; {0,48,96,144,240}, {0,48,96,144,192}};
if (lut_numCnInCnGroups[3] > 0) if (lut_numCnInCnGroups[3] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[3]*Z + 31)>>5; M = (lut_numCnInCnGroups[3]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4 // Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -308,33 +318,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -308,33 +318,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<5; k++) for (k=1; k<5; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[3]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[3]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
...@@ -343,27 +353,26 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -343,27 +353,26 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 7 BNs // Process group with 7 BNs
fprintf(fd,"//Process group with 7 BNs\n"); fprintf(fd,"//Process group with 7 BNs\n");
// Offset is 5*384/32 = 60 // Offset is 5*384/64 = 30
const uint16_t lut_idxCnProcG7[7][6] = {{60,120,180,240,300,360}, {0,120,180,240,300,360}, const uint16_t lut_idxCnProcG7[7][6] = {{30,60,90,120,150,180}, {0,60,90,120,150,180},
{0,60,180,240,300,360}, {0,60,120,240,300,360}, {0,30,90,120,150,180}, {0,30,60,120,150,180},
{0,60,120,180,300,360}, {0,60,120,180,240,360}, {0,30,60,90,150,180}, {0,30,60,90,120,180},
{0,60,120,180,240,300}}; {0,30,60,90,120,150}};
if (lut_numCnInCnGroups[4] > 0) if (lut_numCnInCnGroups[4] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[4]*Z + 31)>>5; M = (lut_numCnInCnGroups[4]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4 // Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -376,33 +385,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -376,33 +385,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG7[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG7[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<6; k++) for (k=1; k<6; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG7[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG7[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[4]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[4]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
...@@ -411,28 +420,27 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -411,28 +420,27 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 8 BNs // Process group with 8 BNs
fprintf(fd,"//Process group with 8 BNs\n"); fprintf(fd,"//Process group with 8 BNs\n");
// Offset is 2*384/32 = 24 // Offset is 2*384/64 = 12
const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168}, const uint8_t lut_idxCnProcG8[8][7] = {{12,24,36,48,56,72,84}, {0,24,36,48,56,72,84},
{0,24,72,96,120,144,168}, {0,24,48,96,120,144,168}, {0,12,36,48,56,72,84}, {0,12,24,48,56,72,84},
{0,24,48,72,120,144,168}, {0,24,48,72,96,144,168}, {0,12,24,36,56,72,84}, {0,12,24,36,48,72,84},
{0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}}; {0,12,24,36,48,56,84}, {0,12,24,36,48,120,72}};
if (lut_numCnInCnGroups[5] > 0) if (lut_numCnInCnGroups[5] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[5]*Z + 31)>>5; M = (lut_numCnInCnGroups[5]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4 // Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -445,33 +453,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -445,33 +453,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG8[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG8[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<7; k++) for (k=1; k<7; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG8[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG8[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[5]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[5]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
...@@ -479,30 +487,29 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -479,30 +487,29 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 9 BNs // Process group with 9 BNs
fprintf(fd,"//Process group with 9 BNs\n"); fprintf(fd,"//Process group with 9 BNs\n");
// Offset is 2*384/32 = 24 // Offset is 2*384/64 = 12
const uint8_t lut_idxCnProcG9[9][8] = {{24,48,72,96,120,144,168,192}, {0,48,72,96,120,144,168,192}, const uint8_t lut_idxCnProcG9[9][8] = {{12,24,36,48,60,72,84,96}, {0,24,36,48,60,72,84,96},
{0,24,72,96,120,144,168,192}, {0,24,48,96,120,144,168,192}, {0,12,36,48,60,72,84,96}, {0,12,24,48,60,72,84,96},
{0,24,48,72,120,144,168,192}, {0,24,48,72,96,144,168,192}, {0,12,24,36,60,72,84,96}, {0,12,24,36,48,72,84,96},
{0,24,48,72,96,120,168,192}, {0,24,48,72,96,120,144,192}, {0,12,24,36,48,60,84,96}, {0,12,24,36,48,60,72,96},
{0,24,48,72,96,120,144,168}}; {0,12,24,36,48,60,72,84}};
if (lut_numCnInCnGroups[6] > 0) if (lut_numCnInCnGroups[6] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[6]*Z + 31)>>5; M = (lut_numCnInCnGroups[6]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 9 // Set pointers to start of group 9
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -515,33 +522,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -515,33 +522,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>5)+lut_idxCnProcG9[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>5)+lut_idxCnProcG9[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<8; k++) for (k=1; k<8; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>5)+lut_idxCnProcG9[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>5)+lut_idxCnProcG9[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[6]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[6]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
...@@ -549,12 +556,12 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -549,12 +556,12 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 10 BNs // Process group with 10 BNs
fprintf(fd,"//Process group with 10 BNs\n"); fprintf(fd,"//Process group with 10 BNs\n");
// Offset is 1*384/32 = 12 // Offset is 1*384/64 = 6
const uint8_t lut_idxCnProcG10[10][9] = {{12,24,36,48,60,72,84,96,108}, {0,24,36,48,60,72,84,96,108}, const uint8_t lut_idxCnProcG10[10][9] = {{6,12,18,24,30,36,42,48,54}, {0,12,18,24,30,36,42,48,54},
{0,12,36,48,60,72,84,96,108}, {0,12,24,48,60,72,84,96,108}, {0,6,18,24,30,36,42,48,54}, {0,6,12,24,30,36,42,48,54},
{0,12,24,36,60,72,84,96,108}, {0,12,24,36,48,72,84,96,108}, {0,6,12,18,30,36,42,48,54}, {0,6,12,18,24,36,42,48,54},
{0,12,24,36,48,60,84,96,108}, {0,12,24,36,48,60,72,96,108}, {0,6,12,18,24,30,42,48,54}, {0,6,12,18,24,30,36,48,54},
{0,12,24,36,48,60,72,84,108}, {0,12,24,36,48,60,72,84,96}}; {0,6,12,18,24,30,36,42,54}, {0,6,12,36,24,30,36,42,48}};
...@@ -562,17 +569,17 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -562,17 +569,17 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
if (lut_numCnInCnGroups[7] > 0) if (lut_numCnInCnGroups[7] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[7]*Z + 31)>>5; M = (lut_numCnInCnGroups[7]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 10 // Set pointers to start of group 10
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -585,33 +592,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -585,33 +592,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>5)+lut_idxCnProcG10[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>5)+lut_idxCnProcG10[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<9; k++) for (k=1; k<9; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>5)+lut_idxCnProcG10[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>5)+lut_idxCnProcG10[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[7]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[7]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
...@@ -620,32 +627,32 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -620,32 +627,32 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// ===================================================================== // =====================================================================
// Process group with 19 BNs // Process group with 19 BNs
fprintf(fd,"//Process group with 19 BNs\n"); fprintf(fd,"//Process group with 19 BNs\n");
// Offset is 4*384/32 = 12 // Offset is 4*384/64 = 24
const uint16_t lut_idxCnProcG19[19][18] = {{48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, const uint16_t lut_idxCnProcG19[19][18] = {{24,48,72,96,120,144,168,192,216,240,264,288,312,336,360,384,408,432}, {0,48,72,96,120,144,168,192,216,240,264,288,312,336,360,384,408,432},
{0,48,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,24,72,96,120,144,168,192,216,240,264,288,312,336,360,384,408,432}, {0,24,48,96,120,144,168,192,216,240,264,288,312,336,360,384,408,432},
{0,48,96,144,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,24,48,72,120,144,168,192,216,240,264,288,312,336,360,384,408,432}, {0,24,48,72,96,144,168,192,216,240,264,288,312,336,360,384,408,432},
{0,48,96,144,192,240,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,384,432,480,528,576,624,672,720,768,816,864}, {0,24,48,72,96,120,168,192,216,240,264,288,312,336,360,384,408,432}, {0,24,48,72,96,120,144,192,216,240,264,288,312,336,360,384,408,432},
{0,48,96,144,192,240,288,336,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,480,528,576,624,672,720,768,816,864}, {0,24,48,72,96,120,144,168,216,240,264,288,312,336,360,384,408,432}, {0,24,48,72,96,120,144,168,192,240,264,288,312,336,360,384,408,432},
{0,48,96,144,192,240,288,336,384,432,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,576,624,672,720,768,816,864}, {0,24,48,72,96,120,144,168,192,216,264,288,312,336,360,384,408,432}, {0,24,48,72,96,120,144,168,192,216,240,288,312,336,360,384,408,432},
{0,48,96,144,192,240,288,336,384,432,480,528,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,672,720,768,816,864}, {0,24,48,72,96,120,144,168,192,216,240,264,312,336,360,384,408,432}, {0,24,48,72,96,120,144,168,192,216,240,264,288,336,360,384,408,432},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,768,816,864}, {0,24,48,72,96,120,144,168,192,216,240,264,288,312,360,384,408,432}, {0,24,48,72,96,120,144,168,192,216,240,264,288,312,336,384,408,432},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864}, {0,24,48,72,96,120,144,168,192,216,240,264,288,312,336,360,408,432}, {0,24,48,72,96,120,144,168,192,216,240,264,288,312,336,360,384,432},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}}; {0,24,48,72,96,120,144,168,192,216,240,264,288,312,336,360,384,408}};
if (lut_numCnInCnGroups[8] > 0) if (lut_numCnInCnGroups[8] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32 // Ceil for values not divisible by 64
M = (lut_numCnInCnGroups[8]*Z + 31)>>5; M = (lut_numCnInCnGroups[8]*Z + 63)>>5;
// Set the offset to each bit within a group in terms of 32 Byte // Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX)>>5; bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 19 // Set pointers to start of group 19
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]]; //p_cnProcBuf = (__m512i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0; int iprime=0;
...@@ -658,33 +665,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R) ...@@ -658,33 +665,33 @@ void nrLDPC_cnProc_BG1_generator(uint16_t Z,int R)
// for (i=0; i<M; i++,iprime++) // for (i=0; i<M; i++,iprime++)
// { // {
fprintf(fd," for (int i=0;i<%d;i++) {\n",M); fprintf(fd," for (int i=0;i<%d;i++) {\n",M);
// Abs and sign of 32 CNs (first BN) // Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i]; // zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>5)+lut_idxCnProcG19[j][0]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>5)+lut_idxCnProcG19[j][0]);
// sgn = _mm256_sign_epi8(ones, ymm0); // sgn = _mm512_sign_epi16(ones, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(ones, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(ones, zmm0);\n");
// min = _mm256_abs_epi8(ymm0); // min = _mm512_abs_epi8(zmm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n"); fprintf(fd," min = _mm512_abs_epi8(zmm0);\n");
// Loop over BNs // Loop over BNs
for (k=1; k<18; k++) for (k=1; k<18; k++)
{ {
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>5)+lut_idxCnProcG19[j][k]); fprintf(fd," zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>5)+lut_idxCnProcG19[j][k]);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); // min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n"); fprintf(fd," min = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0); // sgn = _mm512_sign_epi16(sgn, zmm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = _mm512_sign_epi16(sgn, zmm0);\n");
} }
// Store result // Store result
// min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127 // min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, maxLLR);\n"); fprintf(fd," min = _mm512_min_epu8(min, maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); // *p_cnProcBufResBit = _mm512_sign_epi16(min, sgn);
// p_cnProcBufResBit++; // p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[8]>>5)+(j*bitOffsetInGroup)); fprintf(fd," ((__m512i*)cnProcBufRes)[%d+i] = _mm512_sign_epi16(min, sgn);\n",(lut_startAddrCnGroups[8]>>5)+(j*bitOffsetInGroup));
fprintf(fd," }\n"); fprintf(fd," }\n");
} }
} }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -191,11 +191,11 @@ static const uint16_t lut_startAddrBnGroupsLlr_BG2_R13[NR_LDPC_NUM_BN_GROUPS_BG2 ...@@ -191,11 +191,11 @@ static const uint16_t lut_startAddrBnGroupsLlr_BG2_R13[NR_LDPC_NUM_BN_GROUPS_BG2
/** Start address for every BN group within the LLR processing buffer for BG2 rate = 2/3 */ /** Start address for every BN group within the LLR processing buffer for BG2 rate = 2/3 */
static const uint16_t lut_startAddrBnGroupsLlr_BG2_R23[NR_LDPC_NUM_BN_GROUPS_BG2_R23] = {0, 1152, 2304, 4224, 5376, 6144}; static const uint16_t lut_startAddrBnGroupsLlr_BG2_R23[NR_LDPC_NUM_BN_GROUPS_BG2_R23] = {0, 1152, 2304, 4224, 5376, 6144};
/** Vector of 32 '1' in int8 for application with AVX2 */ /** Vector of 64 '1' in int8 for application with AVX512 */
static const int8_t ones256_epi8[32] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; static const int8_t ones512_epi8[64] __attribute__ ((aligned(64))) = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
/** Vector of 32 '0' in int8 for application with AVX2 */ /** Vector of 64 '0' in int8 for application with AVX512 */
static const int8_t zeros256_epi8[32] __attribute__ ((aligned(32))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; static const int8_t zeros512_epi8[64] __attribute__ ((aligned(64))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
/** Vector of 32 '127' in int8 for application with AVX2 */ /** Vector of 64 '127' in int8 for application with AVX512 */
static const int8_t maxLLR256_epi8[32] __attribute__ ((aligned(32))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127}; static const int8_t maxLLR512_epi8[64] __attribute__ ((aligned(64))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment