Commit 119be5d4 authored by Sy's avatar Sy

Re-implementation of memcpy--> more faster | llr2cnProcBuf, cn2bnProcbuf,...

Re-implementation of memcpy--> more faster | llr2cnProcBuf, cn2bnProcbuf, bn2cnProcBuf are  optimized
parent cb8760a4
/* /*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
...@@ -21,13 +22,8 @@ ...@@ -21,13 +22,8 @@
/*!\file nrLDPC_mPass.h /*!\file nrLDPC_mPass.h
* \brief Defines the functions for message passing * \brief Defines the functions for message passing
* \author Sebastian Wagner (TCL Communications) Email: <mailto:sebastian.wagner@tcl.com> *
* \date 30-09-2019 */
* \version 2.0
* \note
* \warning
*/
#ifndef __NR_LDPC_MPASS__H__ #ifndef __NR_LDPC_MPASS__H__
#define __NR_LDPC_MPASS__H__ #define __NR_LDPC_MPASS__H__
...@@ -35,7 +31,7 @@ ...@@ -35,7 +31,7 @@
#include "nrLDPCdecoder_defs.h" #include "nrLDPCdecoder_defs.h"
#include <omp.h> #include <omp.h>
/** /**
\brief Circular memcpy \brief Circular memcpy1
|<- rem->|<- circular shift ->| |<- rem->|<- circular shift ->|
(src) str2 = |--------xxxxxxxxxxxxxxxxxxxxx| (src) str2 = |--------xxxxxxxxxxxxxxxxxxxxx|
\_______________ \_______________
...@@ -46,17 +42,27 @@ ...@@ -46,17 +42,27 @@
\param Z Lifting size \param Z Lifting size
\param cshift Circular shift \param cshift Circular shift
*/ */
static inline void *nrLDPC_inv_circ_memcpy(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift) //more faster memcpy by using "rep movsb", which on modern processors is highly optimized
void *memcpy1(void *dst, const void *src, size_t n)
{
void *ret = dst;
asm volatile("rep movsb" : "+D" (dst) : "c"(n), "S"(src) : "cc", "memory");
return ret;
}
static inline void *nrLDPC_inv_circ_memcpy1(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift)
{ {
uint16_t rem = Z - cshift; uint16_t rem = Z - cshift;
memcpy(str1+cshift, str2 , rem); memcpy1(str1+cshift, str2 , rem);
memcpy(str1 , str2+rem, cshift); memcpy1(str1 , str2+rem, cshift);
return(str1); return(str1);
} }
/** /**
\brief Inverse circular memcpy \brief Inverse circular memcpy1
|<- circular shift ->|<- rem->| |<- circular shift ->|<- rem->|
(src) str2 = |xxxxxxxxxxxxxxxxxxxx\--------| (src) str2 = |xxxxxxxxxxxxxxxxxxxx\--------|
\ \
...@@ -66,11 +72,11 @@ static inline void *nrLDPC_inv_circ_memcpy(int8_t *str1, const int8_t *str2, uin ...@@ -66,11 +72,11 @@ static inline void *nrLDPC_inv_circ_memcpy(int8_t *str1, const int8_t *str2, uin
\param Z Lifting size \param Z Lifting size
\param cshift Circular shift \param cshift Circular shift
*/ */
static inline void *nrLDPC_circ_memcpy(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift) static inline void *nrLDPC_circ_memcpy1(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift)
{ {
uint16_t rem = Z - cshift; uint16_t rem = Z - cshift;
memcpy(str1 , str2+cshift, rem); memcpy1(str1 , str2+cshift, rem);
memcpy(str1+rem , str2 , cshift); memcpy1(str1+rem , str2 , cshift);
return(str1); return(str1);
} }
...@@ -114,14 +120,14 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL ...@@ -114,14 +120,14 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL
// Copy LLRs connected to 1 CN // Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0) if (numBn2CnG1 > 0)
{ {
memcpy(&llrProcBuf[0], &llr[colG1], numBn2CnG1*Z); memcpy1(&llrProcBuf[0], &llr[colG1], numBn2CnG1*Z);
} }
// First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs... // First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs...
for (i=0; i<startColParity; i++) for (i=0; i<startColParity; i++)
{ {
idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z; idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(&llrProcBuf[idxBn], llr, Z); memcpy1(&llrProcBuf[idxBn], llr, Z);
llr += Z; llr += Z;
} }
} }
...@@ -171,8 +177,8 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -171,8 +177,8 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX;
// #pragma omp simd // #pragma omp simd
// #pragma omp parallel for schedule(dynamic) // #pragma omp parallel for schedule(dynamic)
for (j=0; j<3; j++) for (j=0; j<3; j++)
{ {
...@@ -180,7 +186,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -180,7 +186,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
idxBn = lut_posBnInCnProcBuf_CNG3[j][0]*Z; idxBn = lut_posBnInCnProcBuf_CNG3[j][0]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][0]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][0]);
} }
// ===================================================================== // =====================================================================
...@@ -196,7 +202,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -196,7 +202,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
{ {
idxBn = lut_posBnInCnProcBuf_CNG4[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
...@@ -216,7 +222,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -216,7 +222,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[2]; i++) for (i=0; i<lut_numCnInCnGroups[2]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG5[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -233,7 +239,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -233,7 +239,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[3]; i++) for (i=0; i<lut_numCnInCnGroups[3]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG6[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -252,7 +258,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -252,7 +258,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[4]; i++) for (i=0; i<lut_numCnInCnGroups[4]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG7[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG7[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -270,7 +276,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -270,7 +276,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[5]; i++) for (i=0; i<lut_numCnInCnGroups[5]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG8[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -287,7 +293,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -287,7 +293,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[6]; i++) for (i=0; i<lut_numCnInCnGroups[6]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG9[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG9[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -304,7 +310,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -304,7 +310,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[7]; i++) for (i=0; i<lut_numCnInCnGroups[7]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG10[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -322,7 +328,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -322,7 +328,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[8]; i++) for (i=0; i<lut_numCnInCnGroups[8]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG19[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG19[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -390,7 +396,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -390,7 +396,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[0]; i++) for (i=0; i<lut_numCnInCnGroups[0]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG3[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG3[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -407,7 +413,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -407,7 +413,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[1]; i++) for (i=0; i<lut_numCnInCnGroups[1]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG4[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -424,7 +430,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -424,7 +430,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[2]; i++) for (i=0; i<lut_numCnInCnGroups[2]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG5[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -441,7 +447,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -441,7 +447,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[3]; i++) for (i=0; i<lut_numCnInCnGroups[3]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG6[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -458,7 +464,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -458,7 +464,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[4]; i++) for (i=0; i<lut_numCnInCnGroups[4]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG8[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -475,7 +481,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -475,7 +481,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[5]; i++) for (i=0; i<lut_numCnInCnGroups[5]; i++)
{ {
idxBn = lut_posBnInCnProcBuf_CNG10[j][i]*Z; idxBn = lut_posBnInCnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -533,7 +539,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -533,7 +539,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[0]; i++) for (i=0; i<lut_numCnInCnGroups[0]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG3[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG3[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -550,7 +556,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -550,7 +556,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[1]; i++) for (i=0; i<lut_numCnInCnGroups[1]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -567,7 +573,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -567,7 +573,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[2]; i++) for (i=0; i<lut_numCnInCnGroups[2]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -584,7 +590,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -584,7 +590,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[3]; i++) for (i=0; i<lut_numCnInCnGroups[3]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -601,7 +607,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -601,7 +607,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[4]; i++) for (i=0; i<lut_numCnInCnGroups[4]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -618,7 +624,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -618,7 +624,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[5]; i++) for (i=0; i<lut_numCnInCnGroups[5]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -681,7 +687,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -681,7 +687,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (j=0; j<3; j++) for (j=0; j<3; j++)
{ {
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[0] + j*bitOffsetInGroup]; p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
nrLDPC_inv_circ_memcpy(&bnProcBuf[lut_startAddrBnProcBuf_CNG3[j][0]],p_cnProcBufRes,Z,lut_circShift_CNG3[j][0]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[lut_startAddrBnProcBuf_CNG3[j][0]],p_cnProcBufRes,Z,lut_circShift_CNG3[j][0]);
} }
// ===================================================================== // =====================================================================
...@@ -696,7 +702,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -696,7 +702,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[1]; i++) for (i=0; i<lut_numCnInCnGroups[1]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -713,7 +719,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -713,7 +719,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[2]; i++) for (i=0; i<lut_numCnInCnGroups[2]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -730,7 +736,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -730,7 +736,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[3]; i++) for (i=0; i<lut_numCnInCnGroups[3]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -747,7 +753,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -747,7 +753,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[4]; i++) for (i=0; i<lut_numCnInCnGroups[4]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG7[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG7[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -764,7 +770,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -764,7 +770,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[5]; i++) for (i=0; i<lut_numCnInCnGroups[5]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -781,7 +787,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -781,7 +787,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[6]; i++) for (i=0; i<lut_numCnInCnGroups[6]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG9[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG9[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -798,7 +804,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -798,7 +804,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[7]; i++) for (i=0; i<lut_numCnInCnGroups[7]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -815,7 +821,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -815,7 +821,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[8]; i++) for (i=0; i<lut_numCnInCnGroups[8]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG19[j][i]); nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG19[j][i]);
p_cnProcBufRes += Z; p_cnProcBufRes += Z;
} }
} }
...@@ -877,7 +883,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -877,7 +883,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[0]; i++) for (i=0; i<lut_numCnInCnGroups[0]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG3[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG3[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -893,7 +899,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -893,7 +899,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[1]; i++) for (i=0; i<lut_numCnInCnGroups[1]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -909,7 +915,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -909,7 +915,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[2]; i++) for (i=0; i<lut_numCnInCnGroups[2]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -925,7 +931,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -925,7 +931,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[3]; i++) for (i=0; i<lut_numCnInCnGroups[3]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -941,7 +947,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -941,7 +947,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[4]; i++) for (i=0; i<lut_numCnInCnGroups[4]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -957,7 +963,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -957,7 +963,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[5]; i++) for (i=0; i<lut_numCnInCnGroups[5]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1018,44 +1024,44 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1018,44 +1024,44 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// ===================================================================== // =====================================================================
// CN group with 3 BNs // CN group with 3 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX;
#pragma omp simd
for (j=0;j<2; j++) for (j=0;j<2; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[lut_startAddrBnProcBuf_CNG3[j][0]], Z, lut_circShift_CNG3[j][0]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[lut_startAddrBnProcBuf_CNG3[j][0]], Z, lut_circShift_CNG3[j][0]);
} }
// ===================================================================== // =====================================================================
// CN group with 4 BNs // CN group with 4 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<3; j++) for (j=0; j<3; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[1] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[1] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[1]; i++) for (i=0; i<lut_numCnInCnGroups[1]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
// ===================================================================== // =====================================================================
// CN group with 5 BNs // CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<4; j++) for (j=0; j<4; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[2]; i++) for (i=0; i<lut_numCnInCnGroups[2]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1064,15 +1070,15 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1064,15 +1070,15 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 6 BNs // CN group with 6 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<5; j++) for (j=0; j<5; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[3] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[3] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[3]; i++) for (i=0; i<lut_numCnInCnGroups[3]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1081,16 +1087,15 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1081,16 +1087,15 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 7 BNs // CN group with 7 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<6; j++) for (j=0; j<6; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
#pragma omp simd for (i=0; i<lut_numCnInCnGroups[4]; i++)
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG7[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG7[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1099,14 +1104,13 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1099,14 +1104,13 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 8 BNs // CN group with 8 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX;
#pragma omp simd
for (j=0; j<7; j++) for (j=0; j<7; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[5]; i++) for (i=0; i<lut_numCnInCnGroups[5]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1115,15 +1119,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1115,15 +1119,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 9 BNs // CN group with 9 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<8; j++) for (j=0; j<8; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[6] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[6] + j*bitOffsetInGroup];
#pragma omp simd for (i=0; i<lut_numCnInCnGroups[6]; i++)
for (i=0; i<lut_numCnInCnGroups[6]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG9[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG9[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1132,15 +1135,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1132,15 +1135,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 10 BNs // CN group with 10 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<9; j++) for (j=0; j<9; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[7] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[7] + j*bitOffsetInGroup];
#pragma omp simd for (i=0; i<lut_numCnInCnGroups[7]; i++)
for (i=0; i<lut_numCnInCnGroups[7]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1149,15 +1151,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1149,15 +1151,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 19 BNs // CN group with 19 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<19; j++) for (j=0; j<19; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[8] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[8] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[8]; i++) for (i=0; i<lut_numCnInCnGroups[8]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG19[j][i]); nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG19[j][i]);
p_cnProcBuf += Z; p_cnProcBuf += Z;
} }
} }
...@@ -1190,16 +1191,18 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n ...@@ -1190,16 +1191,18 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n
// Copy LLRs connected to 1 CN // Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0) if (numBn2CnG1 > 0)
{ {
memcpy(&llrOut[colG1], llrRes, numBn2CnG1*Z); memcpy1(&llrOut[colG1], llrRes, numBn2CnG1*Z);
} }
for (i=0; i<startColParity; i++) for (i=0; i<startColParity; i++)
{ {
idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z; idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(p_llrOut, &llrRes[idxBn], Z); memcpy1(p_llrOut, &llrRes[idxBn], Z);
p_llrOut += Z; p_llrOut += Z;
} }
} }
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment