Commit 3ed622af authored by Sy's avatar Sy

use of avx512 at CN processing level

parent a7e61374
...@@ -2745,8 +2745,8 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR) ...@@ -2745,8 +2745,8 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
uint32_t M = numLLR>>5; uint32_t M = numLLR>>5;
uint32_t Mr = numLLR&31; uint32_t Mr = numLLR&31;
const __m256i* p_zeros = (__m256i*) zeros256_epi8; const __m256i* p_zeros = (__m256i*) zeros512_epi8;
const __m256i* p_ones = (__m256i*) ones256_epi8; const __m256i* p_ones = (__m256i*) ones512_epi8;
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
......
...@@ -34,15 +34,15 @@ ...@@ -34,15 +34,15 @@
#include <stdlib.h> #include <stdlib.h>
#include "nrLDPC_types.h" #include "nrLDPC_types.h"
#ifndef malloc32_clear #ifndef malloc64_clear
/** /**
\brief Allocates 32 byte aligned memory and initializes to zero \brief Allocates 64 byte aligned memory and initializes to zero
\param size Input size in bytes \param size Input size in bytes
\return Pointer to memory \return Pointer to memory
*/ */
static inline void* malloc32_clear(size_t size) static inline void* malloc64_clear(size_t size)
{ {
void* ptr = (void*) memalign(32, size+32); void* ptr = (void*) memalign(64, size+64);
memset(ptr, 0, size); memset(ptr, 0, size);
return ptr; return ptr;
} }
...@@ -56,16 +56,16 @@ static inline void* malloc32_clear(size_t size) ...@@ -56,16 +56,16 @@ static inline void* malloc32_clear(size_t size)
*/ */
static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void) static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void)
{ {
t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc32_clear(sizeof(t_nrLDPC_procBuf)); t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc64_clear(sizeof(t_nrLDPC_procBuf));
if (p_procBuf) if (p_procBuf)
{ {
p_procBuf->cnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t)); p_procBuf->cnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->cnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t)); p_procBuf->cnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t)); p_procBuf->bnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t)); p_procBuf->bnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->llrRes = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t)); p_procBuf->llrRes = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
p_procBuf->llrProcBuf = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t)); p_procBuf->llrProcBuf = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
} }
return(p_procBuf); return(p_procBuf);
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -191,11 +191,11 @@ static const uint16_t lut_startAddrBnGroupsLlr_BG2_R13[NR_LDPC_NUM_BN_GROUPS_BG2 ...@@ -191,11 +191,11 @@ static const uint16_t lut_startAddrBnGroupsLlr_BG2_R13[NR_LDPC_NUM_BN_GROUPS_BG2
/** Start address for every BN group within the LLR processing buffer for BG2 rate = 2/3 */ /** Start address for every BN group within the LLR processing buffer for BG2 rate = 2/3 */
static const uint16_t lut_startAddrBnGroupsLlr_BG2_R23[NR_LDPC_NUM_BN_GROUPS_BG2_R23] = {0, 1152, 2304, 4224, 5376, 6144}; static const uint16_t lut_startAddrBnGroupsLlr_BG2_R23[NR_LDPC_NUM_BN_GROUPS_BG2_R23] = {0, 1152, 2304, 4224, 5376, 6144};
/** Vector of 32 '1' in int8 for application with AVX2 */ /** Vector of 64 '1' in int8 for application with AVX512 */
static const int8_t ones256_epi8[32] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; static const int8_t ones512_epi8[64] __attribute__ ((aligned(64))) = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
/** Vector of 32 '0' in int8 for application with AVX2 */ /** Vector of 64 '0' in int8 for application with AVX512 */
static const int8_t zeros256_epi8[32] __attribute__ ((aligned(32))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; static const int8_t zeros512_epi8[64] __attribute__ ((aligned(64))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
/** Vector of 32 '127' in int8 for application with AVX2 */ /** Vector of 64 '127' in int8 for application with AVX512 */
static const int8_t maxLLR256_epi8[32] __attribute__ ((aligned(32))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127}; static const int8_t maxLLR512_epi8[64] __attribute__ ((aligned(64))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment