Commit fa3a637b authored by Raymond Knopp's avatar Raymond Knopp

cleanup of generator, added directory/file creation for output

parent 550ab500
......@@ -11,636 +11,650 @@
void nrLDPC_cnProc_BG1_generator(t_nrLDPC_procBuf* p_procBuf, uint16_t Z,int R)
{
printf("void nrLDPC_cnProc_BG1_Z%d\n",Z);
const char *ratestr[3]={"13","23","89"};
const uint8_t* lut_numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG1;
if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
if (R==0) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R13;
else if (R==1) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R23;
else if (R==2) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R89;
else { printf("aborting, illegal R %d\n",R); abort();}
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
system("mkdir -p ldpc_gen_files");
//__m256i* p_cnProcBuf;
//__m256i* p_cnProcBufRes;
char fname[50];
sprintf(fname,"ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s.c",Z,ratestr[R]);
FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create %s\n");abort();}
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
fprintf(fd,"void nrLDPC_cnProc_BG1_Z%d_%s(t_nrLDPC_procBuf* p_procBuf) {\n",Z,ratestr[R]);
//__m256i ymm0, min, sgn;
//__m256i* p_cnProcBufResBit;
const uint8_t* lut_numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG1;
// const __m256i* p_ones = (__m256i*) ones256_epi8;
if (R==0) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R13;
else if (R==1) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R23;
else if (R==2) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R89;
else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
//__m256i* p_cnProcBuf;
//__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
//__m256i ymm0, min, sgn;
//__m256i* p_cnProcBufResBit;
// const __m256i* p_ones = (__m256i*) ones256_epi8;
// const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
// const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
// const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
// =====================================================================
// Process group with 3 BNs
// =====================================================================
// Process group with 3 BNs
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup (1*384/32)
const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
printf(" __m256i ymm0, min, sgn;\n");
printf(" const __m256i* p_ones = (__m256i*) ones256_epi8;\n");
printf(" const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;\n");
fprintf(fd," __m256i ymm0, min, sgn;\n");
fprintf(fd," const __m256i* p_ones = (__m256i*) ones256_epi8;\n");
fprintf(fd," const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;\n");
if (lut_numCnInCnGroups[0] > 0)
if (lut_numCnInCnGroups[0] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[0]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[0]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 3
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Set pointers to start of group 3
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN
int iprime=0;
for (j=0; j<3; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<3; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[0]+lut_idxCnProcG3[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[0]+lut_idxCnProcG3[j][1] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcB ufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[0]+(j*bitOffsetInGroup)+i);
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[0]+lut_idxCnProcG3[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// 32 CNs of second BN
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[0]+lut_idxCnProcG3[j][1] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcB ufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[0]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 4 BNs
// =====================================================================
// Process group with 4 BNs
// Offset is 5*384/32 = 60
const uint8_t lut_idxCnProcG4[4][3] = {{60,120,180}, {0,120,180}, {0,60,180}, {0,60,120}};
// Offset is 5*384/32 = 60
const uint8_t lut_idxCnProcG4[4][3] = {{60,120,180}, {0,120,180}, {0,60,180}, {0,60,120}};
if (lut_numCnInCnGroups[1] > 0)
if (lut_numCnInCnGroups[1] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[1]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[1]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<4; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<4; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[1]+lut_idxCnProcG4[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[1]+lut_idxCnProcG4[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<3; k++)
// Loop over BNs
for (k=1; k<3; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[1]+lut_idxCnProcG4[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[1]+lut_idxCnProcG4[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[1]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[1]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 5 BNs
// =====================================================================
// Process group with 5 BNs
// Offset is 18*384/32 = 216
const uint16_t lut_idxCnProcG5[5][4] = {{216,432,648,864}, {0,432,648,864},
{0,216,648,864}, {0,216,432,864}, {0,216,432,648}};
// Offset is 18*384/32 = 216
const uint16_t lut_idxCnProcG5[5][4] = {{216,432,648,864}, {0,432,648,864},
{0,216,648,864}, {0,216,432,864}, {0,216,432,648}};
if (lut_numCnInCnGroups[2] > 0)
if (lut_numCnInCnGroups[2] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[2]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[2]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<5; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<5; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[2]+lut_idxCnProcG5[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[2]+lut_idxCnProcG5[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<4; k++)
// Loop over BNs
for (k=1; k<4; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[2]+lut_idxCnProcG5[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[2]+lut_idxCnProcG5[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[2]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[2]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 6 BNs
// =====================================================================
// Process group with 6 BNs
// Offset is 8*384/32 = 96
const uint16_t lut_idxCnProcG6[6][5] = {{96,192,288,384,480}, {0,192,288,384,480},
{0,96,288,384,480}, {0,96,192,384,480},
{0,96,192,288,480}, {0,96,192,288,384}};
// Offset is 8*384/32 = 96
const uint16_t lut_idxCnProcG6[6][5] = {{96,192,288,384,480}, {0,192,288,384,480},
{0,96,288,384,480}, {0,96,192,384,480},
{0,96,192,288,480}, {0,96,192,288,384}};
if (lut_numCnInCnGroups[3] > 0)
if (lut_numCnInCnGroups[3] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[3]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[3]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<6; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<6; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[3]+lut_idxCnProcG6[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[3]+lut_idxCnProcG6[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<5; k++)
// Loop over BNs
for (k=1; k<5; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[3]+lut_idxCnProcG6[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[3]+lut_idxCnProcG6[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[3]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[3]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 7 BNs
// =====================================================================
// Process group with 7 BNs
// Offset is 5*384/32 = 60
const uint16_t lut_idxCnProcG7[7][6] = {{60,120,180,240,300,360}, {0,120,180,240,300,360},
{0,60,180,240,300,360}, {0,60,120,240,300,360},
{0,60,120,180,300,360}, {0,60,120,180,240,360},
{0,60,120,180,240,300}};
// Offset is 5*384/32 = 60
const uint16_t lut_idxCnProcG7[7][6] = {{60,120,180,240,300,360}, {0,120,180,240,300,360},
{0,60,180,240,300,360}, {0,60,120,240,300,360},
{0,60,120,180,300,360}, {0,60,120,180,240,360},
{0,60,120,180,240,300}};
if (lut_numCnInCnGroups[4] > 0)
if (lut_numCnInCnGroups[4] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[4]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[4]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<7; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<7; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[4]+lut_idxCnProcG7[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[4]+lut_idxCnProcG7[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<6; k++)
// Loop over BNs
for (k=1; k<6; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[4]+lut_idxCnProcG7[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[4]+lut_idxCnProcG7[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[4]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[4]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 8 BNs
// =====================================================================
// Process group with 8 BNs
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168},
{0,24,72,96,120,144,168}, {0,24,48,96,120,144,168},
{0,24,48,72,120,144,168}, {0,24,48,72,96,144,168},
{0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}};
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168},
{0,24,72,96,120,144,168}, {0,24,48,96,120,144,168},
{0,24,48,72,120,144,168}, {0,24,48,72,96,144,168},
{0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}};
if (lut_numCnInCnGroups[5] > 0)
if (lut_numCnInCnGroups[5] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[5]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[5]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 4
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<8; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<8; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[5]+lut_idxCnProcG8[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[5]+lut_idxCnProcG8[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<7; k++)
// Loop over BNs
for (k=1; k<7; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[5]+lut_idxCnProcG8[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[5]+lut_idxCnProcG8[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[5]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[5]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 9 BNs
// =====================================================================
// Process group with 9 BNs
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG9[9][8] = {{24,48,72,96,120,144,168,192}, {0,48,72,96,120,144,168,192},
{0,24,72,96,120,144,168,192}, {0,24,48,96,120,144,168,192},
{0,24,48,72,120,144,168,192}, {0,24,48,72,96,144,168,192},
{0,24,48,72,96,120,168,192}, {0,24,48,72,96,120,144,192},
{0,24,48,72,96,120,144,168}};
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG9[9][8] = {{24,48,72,96,120,144,168,192}, {0,48,72,96,120,144,168,192},
{0,24,72,96,120,144,168,192}, {0,24,48,96,120,144,168,192},
{0,24,48,72,120,144,168,192}, {0,24,48,72,96,144,168,192},
{0,24,48,72,96,120,168,192}, {0,24,48,72,96,120,144,192},
{0,24,48,72,96,120,144,168}};
if (lut_numCnInCnGroups[6] > 0)
if (lut_numCnInCnGroups[6] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[6]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[6]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 9
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 9
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<9; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<9; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[6]+lut_idxCnProcG9[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[6]+lut_idxCnProcG9[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<8; k++)
// Loop over BNs
for (k=1; k<8; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[6]+lut_idxCnProcG9[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[6]+lut_idxCnProcG9[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[6]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[6]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 10 BNs
// =====================================================================
// Process group with 10 BNs
// Offset is 1*384/32 = 12
const uint8_t lut_idxCnProcG10[10][9] = {{12,24,36,48,60,72,84,96,108}, {0,24,36,48,60,72,84,96,108},
{0,12,36,48,60,72,84,96,108}, {0,12,24,48,60,72,84,96,108},
{0,12,24,36,60,72,84,96,108}, {0,12,24,36,48,72,84,96,108},
{0,12,24,36,48,60,84,96,108}, {0,12,24,36,48,60,72,96,108},
{0,12,24,36,48,60,72,84,108}, {0,12,24,36,48,60,72,84,96}};
// Offset is 1*384/32 = 12
const uint8_t lut_idxCnProcG10[10][9] = {{12,24,36,48,60,72,84,96,108}, {0,24,36,48,60,72,84,96,108},
{0,12,36,48,60,72,84,96,108}, {0,12,24,48,60,72,84,96,108},
{0,12,24,36,60,72,84,96,108}, {0,12,24,36,48,72,84,96,108},
{0,12,24,36,48,60,84,96,108}, {0,12,24,36,48,60,72,96,108},
{0,12,24,36,48,60,72,84,108}, {0,12,24,36,48,60,72,84,96}};
if (lut_numCnInCnGroups[7] > 0)
if (lut_numCnInCnGroups[7] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[7]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[7]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 10
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 10
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<10; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<10; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[7]+lut_idxCnProcG10[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[7]+lut_idxCnProcG10[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<9; k++)
// Loop over BNs
for (k=1; k<9; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[7]+lut_idxCnProcG10[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[7]+lut_idxCnProcG10[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[7]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[7]+(j*bitOffsetInGroup)+i);
}
}
}
// =====================================================================
// Process group with 19 BNs
// Offset is 4*384/32 = 12
const uint16_t lut_idxCnProcG19[19][18] = {{48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864},
{0,48,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864},
{0,48,96,144,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,288,336,384,432,480,528,576,624,672,720,768,816,864},
{0,48,96,144,192,240,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,384,432,480,528,576,624,672,720,768,816,864},
{0,48,96,144,192,240,288,336,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,480,528,576,624,672,720,768,816,864},
{0,48,96,144,192,240,288,336,384,432,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,576,624,672,720,768,816,864},
{0,48,96,144,192,240,288,336,384,432,480,528,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,672,720,768,816,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,768,816,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}};
// =====================================================================
// Process group with 19 BNs
// Offset is 4*384/32 = 12
const uint16_t lut_idxCnProcG19[19][18] = {{48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864},
{0,48,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864},
{0,48,96,144,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,288,336,384,432,480,528,576,624,672,720,768,816,864},
{0,48,96,144,192,240,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,384,432,480,528,576,624,672,720,768,816,864},
{0,48,96,144,192,240,288,336,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,480,528,576,624,672,720,768,816,864},
{0,48,96,144,192,240,288,336,384,432,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,576,624,672,720,768,816,864},
{0,48,96,144,192,240,288,336,384,432,480,528,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,672,720,768,816,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,768,816,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}};
if (lut_numCnInCnGroups[8] > 0)
if (lut_numCnInCnGroups[8] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[8]*Z + 31)>>5;
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[8]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 19
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Set pointers to start of group 19
//p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
int iprime=0;
for (j=0; j<19; j++)
// Loop over every BN
int iprime=0;
for (j=0; j<19; j++)
{
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Set of results pointer to correct BN address
//p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++,iprime++)
// Loop over CNs
for (i=0; i<M; i++,iprime++)
{
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[8]+lut_idxCnProcG19[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
printf(" sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
printf(" min = _mm256_abs_epi8(ymm0);\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[8]+lut_idxCnProcG19[j][0] + i);
// sgn = _mm256_sign_epi8(*p_ones, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(*p_ones, ymm0);\n");
// min = _mm256_abs_epi8(ymm0);
fprintf(fd," min = _mm256_abs_epi8(ymm0);\n");
// Loop over BNs
for (k=1; k<18; k++)
// Loop over BNs
for (k=1; k<18; k++)
{
printf(" ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[8]+lut_idxCnProcG19[j][k] + i);
fprintf(fd," ymm0 = ((__m256i*)&cnProcBuf)[%d];\n",lut_startAddrCnGroups[8]+lut_idxCnProcG19[j][k] + i);
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
printf(" min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
fprintf(fd," min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
printf(" sgn = _mm256_sign_epi8(sgn, ymm0);\n");
// sgn = _mm256_sign_epi8(sgn, ymm0);
fprintf(fd," sgn = _mm256_sign_epi8(sgn, ymm0);\n");
}
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
printf(" min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
printf(" ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[8]+(j*bitOffsetInGroup)+i);
// Store result
// min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
fprintf(fd," min = _mm256_min_epu8(min, *p_maxLLR);\n");
// *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
// p_cnProcBufResBit++;
fprintf(fd," ((__m256i*)cnProcBufRestBit)[%d] = _mm256_sign_epi8(min, sgn);\n",lut_numCnInCnGroups[8]+(j*bitOffsetInGroup)+i);
}
}
}
fprintf(fd,"}\n");
fclose(fd);
}//end of the function nrLDPC_cnProc_BG1
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment