Commit 0f71ebd2 authored by sebastian's avatar sebastian

Experimental changes for layered processing BG2

parent 6548ae15
......@@ -31,6 +31,570 @@
#ifndef __NR_LDPC_CNPROC__H__
#define __NR_LDPC_CNPROC__H__
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
/*
static inline void nrLDPC_cnProcCore(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_cnProcGroup* p_cnProcGroup, uint16_t Z)
{
__m256i* p_cnProcBuf = (__m256i*) p_cnProcGroup->cnProcBufGroup;
__m256i* p_cnProcBufRes = (__m256i*) p_cnProcGroup->cnProcBufResGroup;
const uint8_t numConnectedBn = p_cnProcGroup->numConnectedBn;
const uint8_t numCnInCnGroup = p_cnProcGroup->numCnInCnGroup;
const uint8_t numCnInCnGroupMax = p_cnProcGroup->numCnInCnGroupMax;
const uint16_t (*idxCnProcGroup)[numConnectedBn-1] = (uint16_t(*)[numConnectedBn-1]) p_cnProcGroup->idxCnProcGroup;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group
if (numCnInCnGroup > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (numCnInCnGroup*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (numCnInCnGroupMax*NR_LDPC_ZMAX)>>5;
// Loop over every BN
for (j=0; j<numConnectedBn; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[idxCnProcGroup[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
//mexPrintf("idxCnProcGroup[%d][0] = %d\n",j,idxCnProcGroup[j][0]);
// Loop over BNs
for (k=1; k<(numConnectedBn-1); k++)
{
//mexPrintf("idxCnProcGroup[%d][%d] = %d\n",j,k,idxCnProcGroup[j][k]);
ymm0 = p_cnProcBuf[idxCnProcGroup[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
*/
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG3(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// LUT with offsets for bits that need to be processed
// 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
// Offsets are in units of bitOffsetInGroup
const uint8_t lut_idxCnProcG3[3][2] = {{72,144}, {0,144}, {0,72}};
if (lut_numCnInCnGroups[0] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[0]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[0]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 3
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[0]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN
for (j=0; j<3; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// 32 CNs of second BN
ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG4(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 4 BNs
// Offset is 20*384/32 = 240
const uint16_t lut_idxCnProcG4[4][3] = {{240,480,720}, {0,480,720}, {0,240,720}, {0,240,480}};
if (lut_numCnInCnGroups[1] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[1]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[1]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 4
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[1]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN
for (j=0; j<4; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<3; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG5(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 5 BNs
// Offset is 9*384/32 = 108
const uint16_t lut_idxCnProcG5[5][4] = {{108,216,324,432}, {0,216,324,432},
{0,108,324,432}, {0,108,216,432}, {0,108,216,324}};
if (lut_numCnInCnGroups[2] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[2]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[2]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 5
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[2]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[2]];
// Loop over every BN
for (j=0; j<5; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<4; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG6(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 6 BNs
// Offset is 3*384/32 = 36
const uint16_t lut_idxCnProcG6[6][5] = {{36,72,108,144,180}, {0,72,108,144,180},
{0,36,108,144,180}, {0,36,72,144,180},
{0,36,72,108,180}, {0,36,72,108,144}};
if (lut_numCnInCnGroups[3] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[3]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[3]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 6
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[3]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[3]];
// Loop over every BN
for (j=0; j<6; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<5; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG8(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 8 BNs
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168},
{0,24,72,96,120,144,168}, {0,24,48,96,120,144,168},
{0,24,48,72,120,144,168}, {0,24,48,72,96,144,168},
{0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}};
if (lut_numCnInCnGroups[4] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[4]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[4]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 8
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[4]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[4]];
// Loop over every BN
for (j=0; j<8; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<7; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cnProc_BG2_CNG10(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
// Number of CNs in Groups
uint32_t M;
uint32_t i;
uint32_t j;
uint32_t k;
// Offset to each bit within a group in terms of 32 Byte
uint32_t bitOffsetInGroup;
__m256i ymm0, min, sgn;
__m256i* p_cnProcBufResBit;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_maxLLR = (__m256i*) maxLLR256_epi8;
// =====================================================================
// Process group with 10 BNs
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG10[10][9] = {{24,48,72,96,120,144,168,192,216}, {0,48,72,96,120,144,168,192,216},
{0,24,72,96,120,144,168,192,216}, {0,24,48,96,120,144,168,192,216},
{0,24,48,72,120,144,168,192,216}, {0,24,48,72,96,144,168,192,216},
{0,24,48,72,96,120,168,192,216}, {0,24,48,72,96,120,144,192,216},
{0,24,48,72,96,120,144,168,216}, {0,24,48,72,96,120,144,168,192}};
if (lut_numCnInCnGroups[5] > 0)
{
// Number of groups of 32 CNs for parallel processing
// Ceil for values not divisible by 32
M = (lut_numCnInCnGroups[5]*Z + 31)>>5;
// Set the offset to each bit within a group in terms of 32 Byte
bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[5]*NR_LDPC_ZMAX)>>5;
// Set pointers to start of group 10
p_cnProcBuf = (__m256i*) &cnProcBuf [lut_startAddrCnGroups[5]];
p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[5]];
// Loop over every BN
for (j=0; j<10; j++)
{
// Set of results pointer to correct BN address
p_cnProcBufResBit = p_cnProcBufRes + (j*bitOffsetInGroup);
// Loop over CNs
for (i=0; i<M; i++)
{
// Abs and sign of 32 CNs (first BN)
ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][0] + i];
sgn = _mm256_sign_epi8(*p_ones, ymm0);
min = _mm256_abs_epi8(ymm0);
// Loop over BNs
for (k=1; k<9; k++)
{
ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][k] + i];
min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
sgn = _mm256_sign_epi8(sgn, ymm0);
}
// Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
p_cnProcBufResBit++;
}
}
}
}
/**
\brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
\param p_lut Pointer to decoder LUTs
......@@ -44,7 +608,7 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
......@@ -373,7 +937,7 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
......@@ -872,7 +1436,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
......@@ -1507,7 +2071,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
__m256i* p_cnProcBuf;
__m256i* p_cnProcBufRes;
......
......@@ -38,7 +38,7 @@
#include "nrLDPC_cnProc.h"
#include "nrLDPC_bnProc.h"
#define NR_LDPC_ENABLE_PARITY_CHECK
//#define NR_LDPC_ENABLE_PARITY_CHECK
//#define NR_LDPC_PROFILER_DETAIL
#ifdef NR_LDPC_DEBUG_MODE
......@@ -83,11 +83,13 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
// Minimum number of iterations is 1
// 0 iterations means hard-decision on input LLRs
uint32_t i = 1;
uint32_t i = 0;
// Initialize with parity check fail != 0
int32_t pcRes = 1;
int8_t* p_llrOut;
uint32_t l;
if (outMode == nrLDPC_outMode_LLRINT8)
{
p_llrOut = p_out;
......@@ -100,6 +102,10 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
memset(p_llrOut,0, NR_LDPC_MAX_NUM_LLR*sizeof(int8_t));
}
// CN processing layers for BG2
static const t_nrLDPC_cnProcGroup cnProcLayers_BG2[NR_LDPC_NUM_CN_GROUPS_BG2] = {nrLDPC_cnProc_BG2_CNG3, nrLDPC_cnProc_BG2_CNG4, nrLDPC_cnProc_BG2_CNG5, nrLDPC_cnProc_BG2_CNG6, nrLDPC_cnProc_BG2_CNG8, nrLDPC_cnProc_BG2_CNG10};
static const t_nrLDPC_cn2bnProcBufGroup cn2bnProcBufLayers_BG2[NR_LDPC_NUM_CN_GROUPS_BG2] = {nrLDPC_cn2bnProcBuf_BG2_CNG3, nrLDPC_cn2bnProcBuf_BG2_CNG4, nrLDPC_cn2bnProcBuf_BG2_CNG5, nrLDPC_cn2bnProcBuf_BG2_CNG6, nrLDPC_cn2bnProcBuf_BG2_CNG8, nrLDPC_cn2bnProcBuf_BG2_CNG10};
static const t_nrLDPC_bn2cnProcBufGroup bn2cnProcBufLayers_BG2[NR_LDPC_NUM_CN_GROUPS_BG2] = {nrLDPC_bn2cnProcBuf_BG2_CNG3, nrLDPC_bn2cnProcBuf_BG2_CNG4, nrLDPC_bn2cnProcBuf_BG2_CNG5, nrLDPC_bn2cnProcBuf_BG2_CNG6, nrLDPC_bn2cnProcBuf_BG2_CNG8, nrLDPC_bn2cnProcBuf_BG2_CNG10};
// Initialization
#ifdef NR_LDPC_PROFILER_DETAIL
......@@ -132,15 +138,208 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_CN_PROC);
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_CN_PROC, p_procBuf);
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_CN_PROC_RES);
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_BN_PROC);
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_LLR_RES);
nrLDPC_debug_initBuffer2File(nrLDPC_buffers_BN_PROC_RES);
#endif
#ifdef NR_LDPC_ENABLE_LAYERED_DECODING
// Buffers have to be set to zero otherwise previous call will impact decoding with current implementation
memset(p_procBuf->cnProcBuf,0, NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
memset(p_procBuf->cnProcBufRes,0, NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
memset(p_procBuf->bnProcBuf,0, NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
memset(p_procBuf->bnProcBufRes,0, NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
#endif
// Iterations
while ( (i < numMaxIter) && (pcRes != 0) )
{
//mexPrintf("i = %d, pcRes = %d\n", i, pcRes);
#ifdef NR_LDPC_ENABLE_LAYERED_DECODING
// Processing layers
for (l=0; l<6; l++)
{
// CN Processing
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_CN_PROC, p_procBuf);
#endif
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc);
#endif
cnProcLayers_BG2[l](p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_CN_PROC_RES, p_procBuf);
#endif
// CN -> BN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cn2bnProcBuf);
#endif
cn2bnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cn2bnProcBuf);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_BN_PROC, p_procBuf);
#endif
// BN Processing
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProcPc);
#endif
nrLDPC_bnProcPc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProcPc);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_LLR_RES, p_procBuf);
#endif
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProc);
#endif
nrLDPC_bnProc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProc);
#endif
#ifdef NR_LDPC_DEBUG_MODE
nrLDPC_debug_writeBuffer2File(nrLDPC_buffers_BN_PROC_RES, p_procBuf);
#endif
// BN -> CN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bn2cnProcBuf);
#endif
bn2cnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bn2cnProcBuf);
#endif
}
#else
// No layers
// CN Processing
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc);
#endif
for (l=0; l<6; l++)
{
cnProcLayers_BG2[l](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc);
#endif
// CN -> BN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cn2bnProcBuf);
#endif
for (l=0; l<6; l++)
{
cn2bnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cn2bnProcBuf);
#endif
// BN Processing
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProcPc);
#endif
nrLDPC_bnProcPc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProcPc);
#endif
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bnProc);
#endif
nrLDPC_bnProc(p_lut, p_procBuf, Z);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bnProc);
#endif
// BN -> CN
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->bn2cnProcBuf);
#endif
for (l=0; l<6; l++)
{
bn2cnProcBufLayers_BG2[l](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->bn2cnProcBuf);
#endif
// END NR_LDPC_ENABLE_LAYERED_DECODING
#endif
// Parity Check
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProcPc);
#endif
if (BG == 1)
{
pcRes = nrLDPC_cnProcPc_BG1(p_lut, p_procBuf, Z);
}
else
{
pcRes = nrLDPC_cnProcPc_BG2(p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProcPc);
#endif
#endif
// Increase iteration counter
i++;
//mexPrintf("pcRes = %d\n", pcRes);
}
// If maximum number of iterations reached an PC still fails increase number of iterations
// Thus, i > numMaxIter indicates that PC has failed
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
if (pcRes != 0)
{
i++;
}
#endif
// Assign results from processing buffer to output
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->llrRes2llrOut);
#endif
nrLDPC_llrRes2llrOut(p_lut, p_llrOut, p_procBuf, Z, BG);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->llrRes2llrOut);
#endif
// Hard-decision
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->llr2bit);
#endif
if (outMode == nrLDPC_outMode_BIT)
{
nrLDPC_llr2bitPacked(p_out, p_llrOut, numLLR);
}
else if (outMode == nrLDPC_outMode_BITINT8)
{
nrLDPC_llr2bit(p_out, p_llrOut, numLLR);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->llr2bit);
#endif
return i;
}
/*
// First iteration
// CN processing
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc);
#endif
if (BG == 1)
{
nrLDPC_cnProc_BG1(p_lut, p_procBuf, Z);
......@@ -149,6 +348,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc);
#endif
......@@ -248,6 +448,12 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z);
}
for (k = 0; k < 6; k++)
{
cnProcLayers_BG2[k](p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc);
#endif
......@@ -350,6 +556,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
#ifdef NR_LDPC_PROFILER_DETAIL
start_meas(&p_profiler->cnProc);
#endif
if (BG == 1)
{
nrLDPC_cnProc_BG1(p_lut, p_procBuf, Z);
......@@ -358,6 +565,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
nrLDPC_cnProc_BG2(p_lut, p_procBuf, Z);
}
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProc);
#endif
......@@ -490,3 +698,4 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
return i;
}
*/
......@@ -472,6 +472,258 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG3(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG3) [lut_numCnInCnGroups_BG2_R15[0]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[0]]) p_lut->circShift[0];
const uint32_t (*lut_startAddrBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint32_t(*)[lut_numCnInCnGroups[0]]) p_lut->startAddrBnProcBuf[0];
const uint8_t (*lut_bnPosBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint8_t(*)[lut_numCnInCnGroups[0]]) p_lut->bnPosBnProcBuf[0];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 3 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[0]*NR_LDPC_ZMAX;
for (j=0; j<3; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG3[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG4(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG4) [lut_numCnInCnGroups_BG2_R15[1]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[1]]) p_lut->circShift[1];
const uint32_t (*lut_startAddrBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint32_t(*)[lut_numCnInCnGroups[1]]) p_lut->startAddrBnProcBuf[1];
const uint8_t (*lut_bnPosBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint8_t(*)[lut_numCnInCnGroups[1]]) p_lut->bnPosBnProcBuf[1];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 4 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[1]*NR_LDPC_ZMAX;
for (j=0; j<4; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[1] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG5(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG5) [lut_numCnInCnGroups_BG2_R15[2]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[2]]) p_lut->circShift[2];
const uint32_t (*lut_startAddrBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint32_t(*)[lut_numCnInCnGroups[2]]) p_lut->startAddrBnProcBuf[2];
const uint8_t (*lut_bnPosBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint8_t(*)[lut_numCnInCnGroups[2]]) p_lut->bnPosBnProcBuf[2];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[2]*NR_LDPC_ZMAX;
for (j=0; j<5; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG6(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG6) [lut_numCnInCnGroups_BG2_R15[3]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[3]]) p_lut->circShift[3];
const uint32_t (*lut_startAddrBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint32_t(*)[lut_numCnInCnGroups[3]]) p_lut->startAddrBnProcBuf[3];
const uint8_t (*lut_bnPosBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint8_t(*)[lut_numCnInCnGroups[3]]) p_lut->bnPosBnProcBuf[3];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 6 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[3]*NR_LDPC_ZMAX;
for (j=0; j<6; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[3] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG8(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG8) [lut_numCnInCnGroups_BG2_R15[4]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[4]]) p_lut->circShift[4];
const uint32_t (*lut_startAddrBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint32_t(*)[lut_numCnInCnGroups[4]]) p_lut->startAddrBnProcBuf[4];
const uint8_t (*lut_bnPosBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint8_t(*)[lut_numCnInCnGroups[4]]) p_lut->bnPosBnProcBuf[4];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 8 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[4]*NR_LDPC_ZMAX;
for (j=0; j<8; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_cn2bnProcBuf_BG2_CNG10(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG10) [lut_numCnInCnGroups_BG2_R15[5]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[5]]) p_lut->circShift[5];
const uint32_t (*lut_startAddrBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint32_t(*)[lut_numCnInCnGroups[5]]) p_lut->startAddrBnProcBuf[5];
const uint8_t (*lut_bnPosBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint8_t(*)[lut_numCnInCnGroups[5]]) p_lut->bnPosBnProcBuf[5];
int8_t* cnProcBufRes = p_procBuf->cnProcBufRes;
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* p_cnProcBufRes;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 10 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[5]*NR_LDPC_ZMAX;
for (j=0; j<10; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
p_cnProcBufRes += Z;
}
}
}
/**
\brief Copies the values in the CN processing results buffer to their corresponding place in the BN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
......@@ -815,6 +1067,261 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG3(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG3) [lut_numCnInCnGroups_BG2_R15[0]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[0]]) p_lut->circShift[0];
const uint32_t (*lut_startAddrBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint32_t(*)[lut_numCnInCnGroups[0]]) p_lut->startAddrBnProcBuf[0];
const uint8_t (*lut_bnPosBnProcBuf_CNG3) [lut_numCnInCnGroups[0]] = (uint8_t(*)[lut_numCnInCnGroups[0]]) p_lut->bnPosBnProcBuf[0];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// For CN groups 3 to 6 no need to send the last BN back since it's single edge
// and BN processing does not change the value already in the CN proc buf
// =====================================================================
// CN group with 3 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[0]*NR_LDPC_ZMAX;
for (j=0; j<2; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG3[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG4(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG4) [lut_numCnInCnGroups_BG2_R15[1]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[1]]) p_lut->circShift[1];
const uint32_t (*lut_startAddrBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint32_t(*)[lut_numCnInCnGroups[1]]) p_lut->startAddrBnProcBuf[1];
const uint8_t (*lut_bnPosBnProcBuf_CNG4) [lut_numCnInCnGroups[1]] = (uint8_t(*)[lut_numCnInCnGroups[1]]) p_lut->bnPosBnProcBuf[1];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 4 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[1]*NR_LDPC_ZMAX;
for (j=0; j<3; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[1] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG5(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG5) [lut_numCnInCnGroups_BG2_R15[2]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[2]]) p_lut->circShift[2];
const uint32_t (*lut_startAddrBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint32_t(*)[lut_numCnInCnGroups[2]]) p_lut->startAddrBnProcBuf[2];
const uint8_t (*lut_bnPosBnProcBuf_CNG5) [lut_numCnInCnGroups[2]] = (uint8_t(*)[lut_numCnInCnGroups[2]]) p_lut->bnPosBnProcBuf[2];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[2]*NR_LDPC_ZMAX;
for (j=0; j<4; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG6(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG6) [lut_numCnInCnGroups_BG2_R15[3]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[3]]) p_lut->circShift[3];
const uint32_t (*lut_startAddrBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint32_t(*)[lut_numCnInCnGroups[3]]) p_lut->startAddrBnProcBuf[3];
const uint8_t (*lut_bnPosBnProcBuf_CNG6) [lut_numCnInCnGroups[3]] = (uint8_t(*)[lut_numCnInCnGroups[3]]) p_lut->bnPosBnProcBuf[3];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 6 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[3]*NR_LDPC_ZMAX;
for (j=0; j<5; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[3] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG8(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG8) [lut_numCnInCnGroups_BG2_R15[4]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[4]]) p_lut->circShift[4];
const uint32_t (*lut_startAddrBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint32_t(*)[lut_numCnInCnGroups[4]]) p_lut->startAddrBnProcBuf[4];
const uint8_t (*lut_bnPosBnProcBuf_CNG8) [lut_numCnInCnGroups[4]] = (uint8_t(*)[lut_numCnInCnGroups[4]]) p_lut->bnPosBnProcBuf[4];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 8 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[4]*NR_LDPC_ZMAX;
for (j=0; j<8; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
\param p_procBuf Pointer to the processing buffers
\param Z Lifting size
*/
static inline void nrLDPC_bn2cnProcBuf_BG2_CNG10(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z)
{
const uint8_t* lut_numCnInCnGroups = p_lut->numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = p_lut->startAddrCnGroups;
const uint16_t (*lut_circShift_CNG10) [lut_numCnInCnGroups_BG2_R15[5]] = (uint16_t(*)[lut_numCnInCnGroups_BG2_R15[5]]) p_lut->circShift[5];
const uint32_t (*lut_startAddrBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint32_t(*)[lut_numCnInCnGroups[5]]) p_lut->startAddrBnProcBuf[5];
const uint8_t (*lut_bnPosBnProcBuf_CNG10) [lut_numCnInCnGroups[5]] = (uint8_t(*)[lut_numCnInCnGroups[5]]) p_lut->bnPosBnProcBuf[5];
int8_t* cnProcBuf = p_procBuf->cnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* p_cnProcBuf;
uint32_t bitOffsetInGroup;
uint32_t i;
uint32_t j;
uint32_t idxBn = 0;
// =====================================================================
// CN group with 10 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG2_R15[5]*NR_LDPC_ZMAX;
for (j=0; j<10; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z;
}
}
}
/**
\brief Copies the values in the BN processing results buffer to their corresponding place in the CN processing buffer for BG2.
\param p_lut Pointer to decoder LUTs
......
......@@ -103,4 +103,12 @@ typedef struct nrLDPC_procBuf {
int8_t* llrProcBuf; /**< LLR processing buffer */
} t_nrLDPC_procBuf;
/**
Structure for CN processing of one CN group
*/
typedef void (*t_nrLDPC_cnProcGroup)(t_nrLDPC_lut*, t_nrLDPC_procBuf*, uint16_t);
typedef void (*t_nrLDPC_cn2bnProcBufGroup)(t_nrLDPC_lut*, t_nrLDPC_procBuf*, uint16_t);
typedef void (*t_nrLDPC_bn2cnProcBufGroup)(t_nrLDPC_lut*, t_nrLDPC_procBuf*, uint16_t);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment