Commit 3ed622af authored by Sy's avatar Sy

use of avx512 at CN processing level

parent a7e61374
......@@ -2745,8 +2745,8 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
uint32_t M = numLLR>>5;
uint32_t Mr = numLLR&31;
const __m256i* p_zeros = (__m256i*) zeros256_epi8;
const __m256i* p_ones = (__m256i*) ones256_epi8;
const __m256i* p_zeros = (__m256i*) zeros512_epi8;
const __m256i* p_ones = (__m256i*) ones512_epi8;
for (i=0; i<M; i++)
{
......
......@@ -34,15 +34,15 @@
#include <stdlib.h>
#include "nrLDPC_types.h"
#ifndef malloc32_clear
#ifndef malloc64_clear
/**
\brief Allocates 32 byte aligned memory and initializes to zero
\brief Allocates 64 byte aligned memory and initializes to zero
\param size Input size in bytes
\return Pointer to memory
*/
static inline void* malloc32_clear(size_t size)
static inline void* malloc64_clear(size_t size)
{
void* ptr = (void*) memalign(32, size+32);
void* ptr = (void*) memalign(64, size+64);
memset(ptr, 0, size);
return ptr;
}
......@@ -56,16 +56,16 @@ static inline void* malloc32_clear(size_t size)
*/
static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void)
{
t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc32_clear(sizeof(t_nrLDPC_procBuf));
t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc64_clear(sizeof(t_nrLDPC_procBuf));
if (p_procBuf)
{
p_procBuf->cnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->cnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->llrRes = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
p_procBuf->llrProcBuf = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
p_procBuf->cnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->cnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->llrRes = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
p_procBuf->llrProcBuf = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
}
return(p_procBuf);
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -191,11 +191,11 @@ static const uint16_t lut_startAddrBnGroupsLlr_BG2_R13[NR_LDPC_NUM_BN_GROUPS_BG2
/** Start address for every BN group within the LLR processing buffer for BG2 rate = 2/3 */
static const uint16_t lut_startAddrBnGroupsLlr_BG2_R23[NR_LDPC_NUM_BN_GROUPS_BG2_R23] = {0, 1152, 2304, 4224, 5376, 6144};
/** Vector of 32 '1' in int8 for application with AVX2 */
static const int8_t ones256_epi8[32] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
/** Vector of 32 '0' in int8 for application with AVX2 */
static const int8_t zeros256_epi8[32] __attribute__ ((aligned(32))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
/** Vector of 32 '127' in int8 for application with AVX2 */
static const int8_t maxLLR256_epi8[32] __attribute__ ((aligned(32))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
/** Vector of 64 '1' in int8 for application with AVX512 */
static const int8_t ones512_epi8[64] __attribute__ ((aligned(64))) = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
/** Vector of 64 '0' in int8 for application with AVX512 */
static const int8_t zeros512_epi8[64] __attribute__ ((aligned(64))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
/** Vector of 64 '127' in int8 for application with AVX512 */
static const int8_t maxLLR512_epi8[64] __attribute__ ((aligned(64))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment