Commit 0f71ebd2 authored by sebastian's avatar sebastian

Experimental changes for layered processing BG2

parent 6548ae15
...@@ -31,6 +31,570 @@ ...@@ -31,6 +31,570 @@
#ifndef __NR_LDPC_CNPROC__H__ #ifndef __NR_LDPC_CNPROC__H__
#define __NR_LDPC_CNPROC__H__ #define __NR_LDPC_CNPROC__H__
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
/*
static inline void nrLDPC_cnProcCore(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_cnProcGroup* p_cnProcGroup, uint16_t Z)
{
__m256i* p_cnProcBuf = (__m256i*) p_cnProcGroup->cnProcBufGroup;
__m256i* p_cnProcBufRes = (__m256i*) p_cnProcGroup->cnProcBufResGroup;
const uint8_t numConnectedBn = p_cnProcGroup->numConnectedBn;
const uint8_t numCnInCnGroup = p_cnProcGroup->numCnInCnGroup;
const uint8_t numCnInCnGroupMax = p_cnProcGroup->numCnInCnGroupMax;
const uint16_t (*idxCnProcGroup)[numConnectedBn-1] = (uint16_t(*)[numConnectedBn-1]) p_cnProcGroup->idxCnProcGroup;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group
if (numCnInCnGroup > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (numCnInCnGroup*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (numCnInCnGroupMax*NR_LDPC_ZMAX)>>5;
// Loop over every BN
for (j=0; j<numConnectedBn; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[idxCnProcGroup[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
//mexPrintf("idxCnProcGroup[%d][0] = %d\n",j,idxCnProcGroup[j][0]);
// Loop over BNs
for (k=1; k<(numConnectedBn-1); k++)
{
//mexPrintf("idxCnProcGroup[%d][%d] = %d\n",j,k,idxCnProcGroup[j][k]);
ymm0 = p_cnProcBuf[idxCnProcGroup[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
*/
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG3(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup
const uint8_t lut_idxCnProcG3[3][2] = {{72,144}, {0,144}, {0,72}};
if (lut_numCnInCnGroups[0] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[0]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[0]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 3
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN
for (j=0; j<3; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// 32 CNs of second BN
ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG4(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 4 BNs
// Offset is 20*384/32 = 240
const uint16_t lut_idxCnProcG4[4][3] = {{240,480,720}, {0,480,720}, {0,240,720}, {0,240,480}};
if (lut_numCnInCnGroups[1] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[1]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[1]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
for (j=0; j<4; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<3; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG5(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 5 BNs
// Offset is 9*384/32 = 108
const uint16_t lut_idxCnProcG5[5][4] = {{108,216,324,432}, {0,216,324,432},
{0,108,324,432}, {0,108,216,432}, {0,108,216,324}};
if (lut_numCnInCnGroups[2] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[2]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[2]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 5
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[2]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[2]];
// Loop over every BN
for (j=0; j<5; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<4; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG6(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 6 BNs
// Offset is 3*384/32 = 36
const uint16_t lut_idxCnProcG6[6][5] = {{36,72,108,144,180}, {0,72,108,144,180},
{0,36,108,144,180}, {0,36,72,144,180},
{0,36,72,108,180}, {0,36,72,108,144}};
if (lut_numCnInCnGroups[3] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[3]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[3]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 6
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[3]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[3]];
// Loop over every BN
for (j=0; j<6; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<5; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG8(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 8 BNs
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168},
{0,24,72,96,120,144,168}, {0,24,48,96,120,144,168},
{0,24,48,72,120,144,168}, {0,24,48,72,96,144,168},
{0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}};
if (lut_numCnInCnGroups[4] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[4]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[4]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 8
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[4]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[4]];
// Loop over every BN
for (j=0; j<8; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<7; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG10(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 10 BNs
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG10[10][9] = {{24,48,72,96,120,144,168,192,216}, {0,48,72,96,120,144,168,192,216},
{0,24,72,96,120,144,168,192,216}, {0,24,48,96,120,144,168,192,216},
{0,24,48,72,120,144,168,192,216}, {0,24,48,72,96,144,168,192,216},
{0,24,48,72,96,120,168,192,216}, {0,24,48,72,96,120,144,192,216},
{0,24,48,72,96,120,144,168,216}, {0,24,48,72,96,120,144,168,192}};
if (lut_numCnInCnGroups[5] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[5]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[5]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 10
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[5]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[5]];
// Loop over every BN
for (j=0; j<10; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<9; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/** /**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer. \brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs \param p_lut Pointer to decoder LUTs
...@@ -44,7 +608,7 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr ...@@ -44,7 +608,7 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr
int8_t* cnProcBuf = p_procBuf->cnProcBuf; int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf; __m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes; __m256i* p_cnProcBufRes;
...@@ -373,7 +937,7 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr ...@@ -373,7 +937,7 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr
int8_t* cnProcBuf = p_procBuf->cnProcBuf; int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf; __m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes; __m256i* p_cnProcBufRes;
...@@ -872,7 +1436,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -872,7 +1436,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
int8_t* cnProcBuf = p_procBuf->cnProcBuf; int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf; __m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes; __m256i* p_cnProcBufRes;
...@@ -1507,7 +2071,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1507,7 +2071,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
int8_t* cnProcBuf = p_procBuf->cnProcBuf; int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf; __m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes; __m256i* p_cnProcBufRes;
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "nrLDPC_cnProc.h" #include "nrLDPC_cnProc.h"
#include "nrLDPC_bnProc.h" #include "nrLDPC_bnProc.h"
#define NR_LDPC_ENABLE_PARITY_CHECK //#define NR_LDPC_ENABLE_PARITY_CHECK
//#define NR_LDPC_PROFILER_DETAIL //#define NR_LDPC_PROFILER_DETAIL
#ifdef NR_LDPC_DEBUG_MODE #ifdef NR_LDPC_DEBUG_MODE
...@@ -83,11 +83,13 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -83,11 +83,13 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
// Minimum number of iterations is 1 // Minimum number of iterations is 1
// 0 iterations means hard-decision on input LLRs // 0 iterations means hard-decision on input LLRs
uint32_t i = 1; uint32_t i = 0;
// Initialize with parity check fail != 0 // Initialize with parity check fail != 0
int32_t pcRes = 1; int32_t pcRes = 1;
int8_t* p_llrOut; int8_t* p_llrOut;
uint32_t l;
if (outMode == nrLDPC_outMode_LLRINT8) if (outMode == nrLDPC_outMode_LLRINT8)
{ {
p_llrOut = p_out; p_llrOut = p_out;
...@@ -100,6 +102,10 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -100,6 +102,10 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
memset(p_llrOut,0, NR_LDPC_MAX_NUM_LLR*sizeof(int8_t)); memset(p_llrOut,0, NR_LDPC_MAX_NUM_LLR*sizeof(int8_t));
} }
// CN processing layers for BG2
static const t_nrLDPC_cnProcGroup cnProcLayers_BG2[NR_LDPC_NUM_CN_GROUPS_BG2] = {nrLDPC_cnProc_BG2_CNG3, nrLDPC_cnProc_BG2_CNG4, nrLDPC_cnProc_BG2_CNG5, nrLDPC_cnProc_BG2_CNG6, nrLDPC_cnProc_BG2_CNG8, nrLDPC_cnProc_BG2_CNG10};
static const t_nrLDPC_cn2bnProcBufGroup cn2bnProcBufLayers_BG2[NR_LDPC_NUM_CN_GROUPS_BG2] = {nrLDPC_cn2bnProcBuf_BG2_CNG3, nrLDPC_cn2bnProcBuf_BG2_CNG4, nrLDPC_cn2bnProcBuf_BG2_CNG5, nrLDPC_cn2bnProcBuf_BG2_CNG6, nrLDPC_cn2bnProcBuf_BG2_CNG8, nrLDPC_cn2bnProcBuf_BG2_CNG10};
static const t_nrLDPC_bn2cnProcBufGroup bn2cnProcBufLayers_BG2[NR_LDPC_NUM_CN_GROUPS_BG2] = {nrLDPC_bn2cnProcBuf_BG2_CNG3, nrLDPC_bn2cnProcBuf_BG2_CNG4, nrLDPC_bn2cnProcBuf_BG2_CNG5, nrLDPC_bn2cnProcBuf_BG2_CNG6, nrLDPC_bn2cnProcBuf_BG2_CNG8, nrLDPC_bn2cnProcBuf_BG2_CNG10};
// Initialization // Initialization
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
...@@ -132,15 +138,208 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -132,15 +138,208 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
#ifdef NR_LDPC_DEBUG_MODE #ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_CN_PROC); nrLDPC_debug_initBuffer2File(nrLDPC_buffers_CN_PROC);
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_CN_PROC, p_procBuf); nrLDPC_debug_initBuffer2File(nrLDPC_buffers_CN_PROC_RES);
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_BN_PROC);
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_LLR_RES);
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_BN_PROC_RES);
#endif
#ifdef NR_LDPC_ENABLE_LAYERED_DECODING
// Buffers have to be set to zero otherwise previous call will impact decoding with current implementation
memset(p_procBuf->cnProcBuf,0, NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
memset(p_procBuf->cnProcBufRes,0, NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
memset(p_procBuf->bnProcBuf,0, NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
memset(p_procBuf->bnProcBufRes,0, NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
#endif
// Iterations
while ( (i < numMaxIter) && (pcRes != 0) )
{
//mexPrintf("i = %d, pcRes = %d\n", i, pcRes);
#ifdef NR_LDPC_ENABLE_LAYERED_DECODING
// Processing layers
for (l=0; l<6; l++)
{
// CN Processing
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_CN_PROC, p_procBuf);
#endif
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc);
#endif
cnProcLayers_BG2[l](p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_CN_PROC_RES, p_procBuf);
#endif
// CN -> BN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cn2bnProcBuf);
#endif
cn2bnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cn2bnProcBuf);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_BN_PROC, p_procBuf);
#endif
// BN Processing
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProcPc);
#endif
nrLDPC_bnProcPc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProcPc);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_LLR_RES, p_procBuf);
#endif
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProc);
#endif
nrLDPC_bnProc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProc);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_BN_PROC_RES, p_procBuf);
#endif
// BN -> CN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bn2cnProcBuf);
#endif
bn2cnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bn2cnProcBuf);
#endif
}
#else
// No layers
// CN Processing
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc);
#endif
for (l=0; l<6; l++)
{
cnProcLayers_BG2[l](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc);
#endif
// CN -> BN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cn2bnProcBuf);
#endif
for (l=0; l<6; l++)
{
cn2bnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cn2bnProcBuf);
#endif
// BN Processing
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProcPc);
#endif
nrLDPC_bnProcPc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProcPc);
#endif
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProc);
#endif
nrLDPC_bnProc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProc);
#endif
// BN -> CN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bn2cnProcBuf);
#endif
for (l=0; l<6; l++)
{
bn2cnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bn2cnProcBuf);
#endif
// END NR_LDPC_ENABLE_LAYERED_DECODING
#endif
// Parity Check
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProcPc);
#endif
if (BG == 1)
{
pcRes = nrLDPC_cnProcPc_BG1(p_lut, p_procBuf, Z);
}
else
{
pcRes = nrLDPC_cnProcPc_BG2(p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProcPc);
#endif
#endif
// Increase iteration counter
i++;
//mexPrintf("pcRes = %d\n", pcRes);
}
// If maximum number of iterations reached an PC still fails increase number of iterations
// Thus, i > numMaxIter indicates that PC has failed
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
if (pcRes != 0)
{
i++;
}
#endif
// Assign results from processing buffer to output
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->llrRes2llrOut);
#endif #endif
nrLDPC_llrRes2llrOut(p_lut, p_llrOut, p_procBuf, Z, BG);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->llrRes2llrOut);
#endif
// Hard-decision
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->llr2bit);
#endif
if (outMode == nrLDPC_outMode_BIT)
{
nrLDPC_llr2bitPacked(p_out, p_llrOut, numLLR);
}
else if (outMode == nrLDPC_outMode_BITINT8)
{
nrLDPC_llr2bit(p_out, p_llrOut, numLLR);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->llr2bit);
#endif
return i;
}
/*
// First iteration // First iteration
// CN processing // CN processing
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc); start_meas(&p_profiler->cnProc);
#endif #endif
if (BG == 1) if (BG == 1)
{ {
nrLDPC_cnProc_BG1(p_lut, p_procBuf, Z); nrLDPC_cnProc_BG1(p_lut, p_procBuf, Z);
...@@ -149,6 +348,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -149,6 +348,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{ {
nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z); nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z);
} }
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc); stop_meas(&p_profiler->cnProc);
#endif #endif
...@@ -248,6 +448,12 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -248,6 +448,12 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{ {
nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z); nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z);
} }
for (k = 0; k < 6; k++)
{
cnProcLayers_BG2[k](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc); stop_meas(&p_profiler->cnProc);
#endif #endif
...@@ -350,6 +556,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -350,6 +556,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc); start_meas(&p_profiler->cnProc);
#endif #endif
if (BG == 1) if (BG == 1)
{ {
nrLDPC_cnProc_BG1(p_lut, p_procBuf, Z); nrLDPC_cnProc_BG1(p_lut, p_procBuf, Z);
...@@ -358,6 +565,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -358,6 +565,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{ {
nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z); nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z);
} }
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc); stop_meas(&p_profiler->cnProc);
#endif #endif
...@@ -490,3 +698,4 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -490,3 +698,4 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
return i; return i;
} }
*/
...@@ -472,6 +472,258 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -472,6 +472,258 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
} }
} }
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG3(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG3) [lut_numCnInCnGroups_BG2_R15[0]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[0]]) p_lut->circShift[0];
const uint32_t (*lut_startAddrBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint32_t(*)[lut_numCnInCnGroups[0]]) p_lut->startAddrBnProcBuf[0];
const uint8_t (*lut_bnPosBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint8_t(*)[lut_numCnInCnGroups[0]]) p_lut->bnPosBnProcBuf[0];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 3 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[0]*NR_LDPC_ZMAX;
for (j=0; j<3; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG3[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG4(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG4) [lut_numCnInCnGroups_BG2_R15[1]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[1]]) p_lut->circShift[1];
const uint32_t (*lut_startAddrBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint32_t(*)[lut_numCnInCnGroups[1]]) p_lut->startAddrBnProcBuf[1];
const uint8_t (*lut_bnPosBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint8_t(*)[lut_numCnInCnGroups[1]]) p_lut->bnPosBnProcBuf[1];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 4 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[1]*NR_LDPC_ZMAX;
for (j=0; j<4; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[1] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG5(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG5) [lut_numCnInCnGroups_BG2_R15[2]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[2]]) p_lut->circShift[2];
const uint32_t (*lut_startAddrBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint32_t(*)[lut_numCnInCnGroups[2]]) p_lut->startAddrBnProcBuf[2];
const uint8_t (*lut_bnPosBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint8_t(*)[lut_numCnInCnGroups[2]]) p_lut->bnPosBnProcBuf[2];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[2]*NR_LDPC_ZMAX;
for (j=0; j<5; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG6(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG6) [lut_numCnInCnGroups_BG2_R15[3]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[3]]) p_lut->circShift[3];
const uint32_t (*lut_startAddrBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint32_t(*)[lut_numCnInCnGroups[3]]) p_lut->startAddrBnProcBuf[3];
const uint8_t (*lut_bnPosBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint8_t(*)[lut_numCnInCnGroups[3]]) p_lut->bnPosBnProcBuf[3];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 6 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[3]*NR_LDPC_ZMAX;
for (j=0; j<6; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[3] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG8(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG8) [lut_numCnInCnGroups_BG2_R15[4]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[4]]) p_lut->circShift[4];
const uint32_t (*lut_startAddrBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint32_t(*)[lut_numCnInCnGroups[4]]) p_lut->startAddrBnProcBuf[4];
const uint8_t (*lut_bnPosBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint8_t(*)[lut_numCnInCnGroups[4]]) p_lut->bnPosBnProcBuf[4];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 8 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[4]*NR_LDPC_ZMAX;
for (j=0; j<8; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG10(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG10) [lut_numCnInCnGroups_BG2_R15[5]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[5]]) p_lut->circShift[5];
const uint32_t (*lut_startAddrBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint32_t(*)[lut_numCnInCnGroups[5]]) p_lut->startAddrBnProcBuf[5];
const uint8_t (*lut_bnPosBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint8_t(*)[lut_numCnInCnGroups[5]]) p_lut->bnPosBnProcBuf[5];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 10 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[5]*NR_LDPC_ZMAX;
for (j=0; j<10; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
p_cnProcBufRes += Z;
}
}
}
/** /**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2. \brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs \param p_lut Pointer to decoder LUTs
...@@ -815,6 +1067,261 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -815,6 +1067,261 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
} }
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG3(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG3) [lut_numCnInCnGroups_BG2_R15[0]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[0]]) p_lut->circShift[0];
const uint32_t (*lut_startAddrBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint32_t(*)[lut_numCnInCnGroups[0]]) p_lut->startAddrBnProcBuf[0];
const uint8_t (*lut_bnPosBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint8_t(*)[lut_numCnInCnGroups[0]]) p_lut->bnPosBnProcBuf[0];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// For CN groups 3 to 6 no need to send the last BN back since it's single edge
// and BN processing does not change the value already in the CN proc buf
// =====================================================================
// CN group with 3 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[0]*NR_LDPC_ZMAX;
for (j=0; j<2; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG3[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG4(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG4) [lut_numCnInCnGroups_BG2_R15[1]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[1]]) p_lut->circShift[1];
const uint32_t (*lut_startAddrBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint32_t(*)[lut_numCnInCnGroups[1]]) p_lut->startAddrBnProcBuf[1];
const uint8_t (*lut_bnPosBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint8_t(*)[lut_numCnInCnGroups[1]]) p_lut->bnPosBnProcBuf[1];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 4 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[1]*NR_LDPC_ZMAX;
for (j=0; j<3; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[1] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG5(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG5) [lut_numCnInCnGroups_BG2_R15[2]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[2]]) p_lut->circShift[2];
const uint32_t (*lut_startAddrBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint32_t(*)[lut_numCnInCnGroups[2]]) p_lut->startAddrBnProcBuf[2];
const uint8_t (*lut_bnPosBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint8_t(*)[lut_numCnInCnGroups[2]]) p_lut->bnPosBnProcBuf[2];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[2]*NR_LDPC_ZMAX;
for (j=0; j<4; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG6(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG6) [lut_numCnInCnGroups_BG2_R15[3]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[3]]) p_lut->circShift[3];
const uint32_t (*lut_startAddrBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint32_t(*)[lut_numCnInCnGroups[3]]) p_lut->startAddrBnProcBuf[3];
const uint8_t (*lut_bnPosBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint8_t(*)[lut_numCnInCnGroups[3]]) p_lut->bnPosBnProcBuf[3];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 6 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[3]*NR_LDPC_ZMAX;
for (j=0; j<5; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[3] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG8(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG8) [lut_numCnInCnGroups_BG2_R15[4]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[4]]) p_lut->circShift[4];
const uint32_t (*lut_startAddrBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint32_t(*)[lut_numCnInCnGroups[4]]) p_lut->startAddrBnProcBuf[4];
const uint8_t (*lut_bnPosBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint8_t(*)[lut_numCnInCnGroups[4]]) p_lut->bnPosBnProcBuf[4];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 8 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[4]*NR_LDPC_ZMAX;
for (j=0; j<8; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG10(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG10) [lut_numCnInCnGroups_BG2_R15[5]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[5]]) p_lut->circShift[5];
const uint32_t (*lut_startAddrBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint32_t(*)[lut_numCnInCnGroups[5]]) p_lut->startAddrBnProcBuf[5];
const uint8_t (*lut_bnPosBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint8_t(*)[lut_numCnInCnGroups[5]]) p_lut->bnPosBnProcBuf[5];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 10 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[5]*NR_LDPC_ZMAX;
for (j=0; j<10; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z;
}
}
}
/** /**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2. \brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs \param p_lut Pointer to decoder LUTs
......
...@@ -103,4 +103,12 @@ typedef struct nrLDPC_procBuf { ...@@ -103,4 +103,12 @@ typedef struct nrLDPC_procBuf {
int8_t* llrProcBuf; /**< LLR processing buffer */ int8_t* llrProcBuf; /**< LLR processing buffer */
} t_nrLDPC_procBuf; } t_nrLDPC_procBuf;
/**
Structure for CN processing of one CN group
*/
typedef void (*t_nrLDPC_cnProcGroup)(t_nrLDPC_lut*, t_nrLDPC_procBuf*, uint16_t);
typedef void (*t_nrLDPC_cn2bnProcBufGroup)(t_nrLDPC_lut*, t_nrLDPC_procBuf*, uint16_t);
typedef void (*t_nrLDPC_bn2cnProcBufGroup)(t_nrLDPC_lut*, t_nrLDPC_procBuf*, uint16_t);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment