From 79de8953aa290bb0d88e5ba4bb0d5033e293c5f0 Mon Sep 17 00:00:00 2001 From: sebastian <you@example.com> Date: Wed, 18 Sep 2019 17:37:16 +0200 Subject: [PATCH] Optimization of bn2cn shuffling --- .../PHY/CODING/nrLDPC_decoder/nrLDPC_mPass.h | 157 +++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_mPass.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_mPass.h index f3431eba24..3c1d9e3605 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_mPass.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_mPass.h @@ -34,6 +34,26 @@ #include <string.h> #include "nrLDPC_defs.h" +/** + \brief Circular memcpy + (src) str2 = |xxxxxxxxxxxxxxxxxxxx\--------| + \ + (dst) str1 = |--------xxxxxxxxxxxxxxxxxxxxx| + \param str1 Pointer to the start of the destination buffer + \param str2 Pointer to the source buffer + \param Z Lifting size + \param cshift Cyclic shift +*/ +static inline void *nrLDPC_circ_memcpy(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift) +{ + uint16_t rem = Z - cshift; + memcpy(str1+cshift, str2 , rem); + memcpy(str1 , str2+rem, cshift); + + //mexPrintf("memcpy(%p,%p,%d) | memcpy(%p,%p,%d) | rem = %d, cshift = %d\n", str1+cshift, str2, rem, str1,str2+rem,cshift,rem,cshift); + return(str1); +} + /** \brief Copies the input LLRs to their corresponding place in the LLR processing buffer. \param p_lut Pointer to decoder LUTs @@ -275,14 +295,21 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[0]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG3[3] = {111360, 100224, 0}; + const uint16_t cyclicShiftValues_Z384_CNG3[3] = {332, 181, 0}; + for (j=0; j<3; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[0] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG3[j]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG3[j]); } // ===================================================================== @@ -292,14 +319,28 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[1]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG4[4][5] = {{105984, 107904, 120192, 120576, 109440}, {40704, 74880, 43392, 47232, 43008}, {42240, 19200, 62592, 23424, 92928}, {8832, 12672, 13824, 14592, 15744}}; + + const uint16_t cyclicShiftValues_Z384_CNG4[4][5] = {{194, 269, 175, 113, 135}, {194, 82, 37, 14, 149}, {101, 115, 312, 218, 15}, {0, 0, 0, 0, 0}}; + //const uint16_t cyclicShiftValues_Z384_CNG4[4][5] = {{175, 113, 194, 269, 135}, {14, 194, 149, 101, 37}, {15, 82, 312, 115, 218}, {0, 0, 0, 0, 0}}; + for (j=0; j<4; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[1] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[1]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG4[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG4[j][i]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } + } // ===================================================================== @@ -309,14 +350,29 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[2]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG5[5][18] = {{116736, 105216, 105600, 117504, 117888, 106368, 118272, 106752, 118656, 107136, 119040, 107520, 119424, 119808, 108288, 108672, 109056, 120960}, {88704, 30336, 39552, 31872, 46848, 58752, 89856, 87936, 90240, 33408, 89472, 41856, 61824, 30720, 73344, 74496, 62208, 92544}, {72576, 88320, 86400, 46464, 33024, 97536, 73728, 90624, 60288, 61440, 32640, 91776, 34176, 91392, 91008, 32256, 98688, 33792}, {59520, 97152, 57216, 31104, 74112, 22272, 21888, 23040, 22656, 75264, 61056, 92160, 97920, 93312, 34560, 98304, 23808, 93696}, {6912, 7296, 8064, 8448, 9216, 9600, 9984, 10368, 10752, 11136, 11520, 11904, 12288, 13056, 13440, 14208, 14976, 15360}}; + + const uint16_t cyclicShiftValues_Z384_CNG5[5][18] = {{30, 24, 72, 71, 222, 252, 159, 100, 102, 323, 230, 320, 210, 185, 258, 52, 113, 80}, {11, 89, 17, 81, 19, 5, 229, 215, 201, 8, 148, 335, 313, 177, 93, 314, 132, 78}, {233, 61, 383, 76, 244, 147, 260, 258, 175, 361, 202, 2, 297, 289, 346, 139, 114, 163}, {22, 27, 312, 136, 274, 78, 90, 256, 287, 105, 312, 266, 21, 214, 297, 288, 168, 274}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + for (j=0; j<5; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[2] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; + //mexPrintf("bnProcBuf = %p, p_cnProcBufRes = %p\n", &bnProcBuf[p_lut_cn2bn[j*M + i]],&p_cnProcBufRes[i]); } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[2]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG5[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG5[j][i]); + //mexPrintf("bnProcBuf = %p, p_cnProcBufRes = %p | p_lut_cn2bn = %d\n", &bnProcBuf[p_lut_cn2bn[j*M + i*Z]],&p_cnProcBufRes[i*Z],p_lut_cn2bn[j*M + i*Z]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } + } // ===================================================================== @@ -326,14 +382,28 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[3]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG6[6][8] = {{114432, 103680, 115584, 104064, 115968, 116352, 104832, 117120}, {68736, 69888, 55680, 87168, 104448, 71040, 17280, 72192}, {83328, 56832, 59136, 71424, 84864, 29184, 60672, 46080}, {41472, 42624, 57984, 96768, 41088, 58368, 43776, 59904}, {18816, 86016, 71808, 31488, 86784, 87552, 72960, 89088}, {3456, 4608, 4992, 5376, 5760, 6144, 6528, 7680}}; + + + const uint16_t cyclicShiftValues_Z384_CNG6[6][8] = {{313, 13, 260, 130, 145, 187, 205, 298}, {177, 338, 303, 163, 213, 206, 102, 158}, {266, 57, 81, 280, 344, 264, 328, 235}, {115, 289, 358, 132, 242, 341, 213, 339}, {370, 57, 375, 4, 197, 59, 97, 234}, {0, 0, 0, 0, 0, 0, 0, 0}}; + + for (j=0; j<6; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[3] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[3]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG6[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG6[j][i]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } } // ===================================================================== @@ -343,14 +413,29 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[4]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG7[7][5] = {{112512, 102144, 114048, 114816, 115200}, {100992, 28800, 102912, 85632, 103296}, {45312, 45696, 83712, 29568, 85248}, {80256, 81792, 55296, 57600, 70272}, {38784, 39936, 69120, 56448, 96384}, {52608, 54144, 96000, 70656, 21504}, {1152, 2304, 3072, 3840, 4224}}; + + + + const uint16_t cyclicShiftValues_Z384_CNG7[7][5] = {{9, 101, 77, 142, 241}, {62, 339, 186, 248, 2}, {316, 274, 174, 137, 210}, {333, 111, 232, 89, 318}, {290, 383, 50, 347, 55}, {114, 354, 74, 12, 269}, {0, 0, 0, 0, 0}}; + + for (j=0; j<7; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[4] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[4]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG7[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG7[j][i]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } } // ===================================================================== @@ -360,14 +445,30 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[5]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG8[8][2] = {{111744, 113664}, {100608, 102528}, {66432, 84096}, {81024, 56064}, {52992, 69504}, {67200, 84480}, {81408, 18432}, {384, 2688}}; + + + const uint16_t cyclicShiftValues_Z384_CNG8[8][2] = {{195, 48}, {14, 102}, {115, 8}, {166, 47}, {241, 188}, {51, 334}, {157, 115}, {0, 0}}; + + + + for (j=0; j<8; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[5] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[5]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG8[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG8[j][i]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } } // ===================================================================== @@ -377,14 +478,25 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[6]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG9[9][2] = {{112128, 113280}, {38400, 101760}, {80640, 82176}, {52224, 53760}, {66816, 67968}, {53376, 54912}, {95232, 95616}, {39168, 40320}, {768, 1920}}; + const uint16_t cyclicShiftValues_Z384_CNG9[9][2] = {{278, 366}, {257, 232}, {1, 321}, {351, 133}, {92, 57}, {253, 303}, {18, 63}, {225, 82}, {0, 0,}}; + for (j=0; j<9; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[6] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[6]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG9[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG9[j][i]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } } // ===================================================================== @@ -394,14 +506,25 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[7]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG10[10][1] = {{112896}, {101376}, {67584}, {82560}, {54528}, {29952}, {68352}, {82944}, {21120}, {1536}}; + const uint16_t cyclicShiftValues_Z384_CNG10[10][1] = {{307}, {179}, {165}, {18}, {39}, {224}, {368}, {67}, {170}, {0}}; + for (j=0; j<10; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[7] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[7]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG10[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG10[j][i]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } } // ===================================================================== @@ -411,14 +534,27 @@ static inline void nrLDPC_cn2bnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[8]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG19[19][4] = {{109824, 110208, 110592, 110976}, {99072, 25728, 99456, 99840}, {24192, 64128, 27264, 65280}, {62976, 44160, 44544, 44928}, {16128, 16512, 16896, 37248}, {34944, 75648, 36096, 78720}, {24576, 35328, 77184, 37632}, {76032, 26112, 36480, 79104}, {47616, 49152, 27648, 50688}, {76416, 77952, 77568, 79488}, {63360, 48000, 64512, 65664}, {24960, 26496, 49536, 51072}, {48384, 49920, 28032, 51456}, {94080, 48768, 50304, 51840}, {25344, 26880, 94464, 94848}, {35712, 64896, 28416, 38016}, {63744, 78336, 36864, 66048}, {76800, 18048, 20352, 79872}, {17664, 19584, 19968, 20736}}; + + const uint16_t cyclicShiftValues_Z384_CNG19[19][4] = {{307, 76, 205, 276}, {19, 76, 250, 87}, {50, 73, 328, 0}, {369, 288, 332, 275}, {181, 144, 256, 199}, {216, 331, 161, 153}, {317, 331, 267, 56}, {288, 178, 160, 132}, {109, 295, 63, 305}, {17, 342, 129, 231}, {357, 217, 200, 341}, {215, 99, 88, 212}, {106, 354, 53, 304}, {242, 114, 131, 300}, {180, 331, 240, 271}, {330, 112, 205, 39}, {346, 0, 13, 357}, {1, 0, 0, 1}, {0, 0, 0, 0}}; + + for (j=0; j<19; j++) { p_cnProcBufRes = &cnProcBufRes[lut_startAddrCnGroups[8] + j*bitOffsetInGroup]; + /* for (i=0; i<M; i++) { bnProcBuf[p_lut_cn2bn[j*M + i]] = p_cnProcBufRes[i]; } + */ + + for (i=0; i<lut_numCnInCnGroups_BG1_R13[8]; i++) + { + nrLDPC_circ_memcpy(&bnProcBuf[startAddrBnProcBuf_CNG19[j][i]],p_cnProcBufRes,Z,cyclicShiftValues_Z384_CNG19[j][i]); + p_cnProcBufRes+=NR_LDPC_ZMAX; + } } } @@ -616,14 +752,31 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf M = lut_numCnInCnGroups[2]*Z; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX; + const uint32_t startAddrBnProcBuf_CNG5[5][18] = {{116736, 105216, 105600, 117504, 117888, 106368, 118272, 106752, 118656, 107136, 119040, 107520, 119424, 119808, 108288, 108672, 109056, 120960}, {88704, 30336, 39552, 31872, 46848, 58752, 89856, 87936, 90240, 33408, 89472, 41856, 61824, 30720, 73344, 74496, 62208, 92544}, {72576, 88320, 86400, 46464, 33024, 97536, 73728, 90624, 60288, 61440, 32640, 91776, 34176, 91392, 91008, 32256, 98688, 33792}, {59520, 97152, 57216, 31104, 74112, 22272, 21888, 23040, 22656, 75264, 61056, 92160, 97920, 93312, 34560, 98304, 23808, 93696}, {6912, 7296, 8064, 8448, 9216, 9600, 9984, 10368, 10752, 11136, 11520, 11904, 12288, 13056, 13440, 14208, 14976, 15360}}; + + const uint16_t cyclicShiftValues_Z384_CNG5[5][18] = {{30, 71, 222, 159, 102, 230, 210, 185, 80, 24, 72, 252, 100, 323, 320, 258, 52, 113}, {89, 81, 8, 93, 314, 76, 19, 17, 335, 383, 215, 148, 346, 78, 177, 139, 163, 61}, {229, 289, 361, 11, 201, 2, 214, 233, 260, 312, 5, 175, 313, 136, 202, 297, 132, 22}, {312, 27, 147, 21, 288, 114, 244, 297, 274, 105, 258, 266, 274, 90, 287, 78, 256, 168}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + for (j=0; j<4; j++) { p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup]; - for (i=0; i<M; i++) + if (1) { - p_cnProcBuf[i] = bnProcBufRes[p_lut_cn2bn[j*M + i]]; + for (i=0; i<M; i++) + { + p_cnProcBuf[i] = bnProcBufRes[p_lut_cn2bn[j*M + i]]; + } + } + /* + else + { + for (i=0; i<lut_numCnInCnGroups_BG1_R13[2]; i++) + { + nrLDPC_circ_memcpy(p_cnProcBuf, &bnProcBufRes[startAddrBnProcBuf_CNG5[j][i]], Z, cyclicShiftValues_Z384_CNG5[j][i]); + p_cnProcBuf+=Z; + } } + */ } // ===================================================================== -- 2.26.2