Commit 119be5d4 authored by Sy's avatar Sy

Re-implementation of memcpy--> more faster | llr2cnProcBuf, cn2bnProcbuf,...

Re-implementation of memcpy--> more faster | llr2cnProcBuf, cn2bnProcbuf, bn2cnProcBuf are  optimized
parent cb8760a4
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
......@@ -21,13 +22,8 @@
/*!\file nrLDPC_mPass.h
* \brief Defines the functions for message passing
* \author Sebastian Wagner (TCL Communications) Email: <mailto:sebastian.wagner@tcl.com>
* \date 30-09-2019
* \version 2.0
* \note
* \warning
*/
*
*/
#ifndef __NR_LDPC_MPASS__H__
#define __NR_LDPC_MPASS__H__
......@@ -35,7 +31,7 @@
#include "nrLDPCdecoder_defs.h"
#include <omp.h>
/**
\brief Circular memcpy
\brief Circular memcpy1
|<- rem->|<- circular shift ->|
(src) str2 = |--------xxxxxxxxxxxxxxxxxxxxx|
\_______________
......@@ -46,17 +42,27 @@
\param Z Lifting size
\param cshift Circular shift
*/
static inline void *nrLDPC_inv_circ_memcpy(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift)
//more faster memcpy by using "rep movsb", which on modern processors is highly optimized
void *memcpy1(void *dst, const void *src, size_t n)
{
void *ret = dst;
asm volatile("rep movsb" : "+D" (dst) : "c"(n), "S"(src) : "cc", "memory");
return ret;
}
static inline void *nrLDPC_inv_circ_memcpy1(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift)
{
uint16_t rem = Z - cshift;
memcpy(str1+cshift, str2 , rem);
memcpy(str1 , str2+rem, cshift);
memcpy1(str1+cshift, str2 , rem);
memcpy1(str1 , str2+rem, cshift);
return(str1);
}
/**
\brief Inverse circular memcpy
\brief Inverse circular memcpy1
|<- circular shift ->|<- rem->|
(src) str2 = |xxxxxxxxxxxxxxxxxxxx\--------|
\
......@@ -66,11 +72,11 @@ static inline void *nrLDPC_inv_circ_memcpy(int8_t *str1, const int8_t *str2, uin
\param Z Lifting size
\param cshift Circular shift
*/
static inline void *nrLDPC_circ_memcpy(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift)
static inline void *nrLDPC_circ_memcpy1(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift)
{
uint16_t rem = Z - cshift;
memcpy(str1 , str2+cshift, rem);
memcpy(str1+rem , str2 , cshift);
memcpy1(str1 , str2+cshift, rem);
memcpy1(str1+rem , str2 , cshift);
return(str1);
}
......@@ -114,14 +120,14 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL
// Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0)
{
memcpy(&llrProcBuf[0], &llr[colG1], numBn2CnG1*Z);
memcpy1(&llrProcBuf[0], &llr[colG1], numBn2CnG1*Z);
}
// First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs...
for (i=0; i<startColParity; i++)
{
idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(&llrProcBuf[idxBn], llr, Z);
memcpy1(&llrProcBuf[idxBn], llr, Z);
llr += Z;
}
}
......@@ -171,8 +177,8 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX;
// #pragma omp simd
// #pragma omp parallel for schedule(dynamic)
// #pragma omp simd
// #pragma omp parallel for schedule(dynamic)
for (j=0; j<3; j++)
{
......@@ -180,7 +186,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
idxBn = lut_posBnInCnProcBuf_CNG3[j][0]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][0]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][0]);
}
// =====================================================================
......@@ -196,7 +202,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
{
idxBn = lut_posBnInCnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z;
}
......@@ -216,7 +222,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -233,7 +239,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -252,7 +258,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG7[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -270,7 +276,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -287,7 +293,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[6]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG9[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -304,7 +310,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[7]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -322,7 +328,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[8]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG19[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -390,7 +396,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG3[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -407,7 +413,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -424,7 +430,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -441,7 +447,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -458,7 +464,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -475,7 +481,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG2(t_nrLDPC_lut* p_lut, int8_t* llr, t_
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_posBnInCnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -533,7 +539,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG3[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG3[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -550,7 +556,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -567,7 +573,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -584,7 +590,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -601,7 +607,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -618,7 +624,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -681,7 +687,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (j=0; j<3; j++)
{
p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
nrLDPC_inv_circ_memcpy(&bnProcBuf[lut_startAddrBnProcBuf_CNG3[j][0]],p_cnProcBufRes,Z,lut_circShift_CNG3[j][0]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[lut_startAddrBnProcBuf_CNG3[j][0]],p_cnProcBufRes,Z,lut_circShift_CNG3[j][0]);
}
// =====================================================================
......@@ -696,7 +702,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG4[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -713,7 +719,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG5[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -730,7 +736,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG6[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -747,7 +753,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG7[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG7[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -764,7 +770,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG8[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -781,7 +787,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[6]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG9[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG9[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -798,7 +804,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[7]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG10[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -815,7 +821,7 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[8]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z;
nrLDPC_inv_circ_memcpy(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG19[j][i]);
nrLDPC_inv_circ_memcpy1(&bnProcBuf[idxBn],p_cnProcBufRes,Z,lut_circShift_CNG19[j][i]);
p_cnProcBufRes += Z;
}
}
......@@ -877,7 +883,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG3[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG3[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -893,7 +899,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -909,7 +915,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -925,7 +931,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -941,7 +947,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -957,7 +963,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1018,44 +1024,44 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// =====================================================================
// CN group with 3 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX;
#pragma omp simd
for (j=0;j<2; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[lut_startAddrBnProcBuf_CNG3[j][0]], Z, lut_circShift_CNG3[j][0]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[lut_startAddrBnProcBuf_CNG3[j][0]], Z, lut_circShift_CNG3[j][0]);
}
// =====================================================================
// CN group with 4 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<3; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[1] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[1]; i++)
for (i=0; i<lut_numCnInCnGroups[1]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG4[j][i]);
p_cnProcBuf += Z;
}
}
}
// =====================================================================
// CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<4; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[2]; i++)
for (i=0; i<lut_numCnInCnGroups[2]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG5[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1064,15 +1070,15 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 6 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<5; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[3] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[3]; i++)
for (i=0; i<lut_numCnInCnGroups[3]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG6[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1081,16 +1087,15 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 7 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<6; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[4]; i++)
for (i=0; i<lut_numCnInCnGroups[4]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG7[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG7[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1099,14 +1104,13 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 8 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX;
#pragma omp simd
for (j=0; j<7; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[5]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG8[j][i] + lut_bnPosBnProcBuf_CNG8[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG8[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1115,15 +1119,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 9 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<8; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[6] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[6]; i++)
for (i=0; i<lut_numCnInCnGroups[6]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG9[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG9[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1132,15 +1135,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 10 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<9; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[7] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[7]; i++)
for (i=0; i<lut_numCnInCnGroups[7]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1149,15 +1151,14 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 19 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<19; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[8] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[8]; i++)
for (i=0; i<lut_numCnInCnGroups[8]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z;
nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG19[j][i]);
nrLDPC_circ_memcpy1(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG19[j][i]);
p_cnProcBuf += Z;
}
}
......@@ -1190,16 +1191,18 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n
// Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0)
{
memcpy(&llrOut[colG1], llrRes, numBn2CnG1*Z);
memcpy1(&llrOut[colG1], llrRes, numBn2CnG1*Z);
}
for (i=0; i<startColParity; i++)
{
idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(p_llrOut, &llrRes[idxBn], Z);
memcpy1(p_llrOut, &llrRes[idxBn], Z);
p_llrOut += Z;
}
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment