Commit cd2b80e6 authored by Sy's avatar Sy

some cleanup

parent 5521e343
......@@ -724,7 +724,7 @@ if (BG==1)
#ifdef __AVX512BW__
nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#else
nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,+p_procBuf->llrRes, Z);
nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z);
#endif
break;
}
......
This diff is collapsed.
This diff is collapsed.
void nrLDPC_bnProc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {
__m256i* p_bnProcBuf;
__m256i* p_bnProcBufRes;
__m256i* p_llrRes;
__m256i* p_res;
uint32_t M, i;
// Process group with 2 CNs
M = (3*Z + 31)>>5;
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[36 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[36 + i ], ((__m256i*) bnProcBuf)[36 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[72 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[36 + i ], ((__m256i*) bnProcBuf)[72 + i]);
}
// Process group with 3 CNs
M = (5*Z + 31)>>5;
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[108 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[72 + i ], ((__m256i*) bnProcBuf)[108 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[168 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[72 + i ], ((__m256i*) bnProcBuf)[168 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[228 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[72 + i ], ((__m256i*) bnProcBuf)[228 + i]);
}
// Process group with 4 CNs
M = (3*Z + 31)>>5;
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[288 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[288 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[324 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[324 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[360 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[360 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[396 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[396 + i]);
}
// Process group with 5 CNs
M = (2*Z + 31)>>5;
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[432 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[432 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[456 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[456 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[480 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[480 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[504 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[504 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[528 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[528 + i]);
}
// Process group with 6 CNs
M = (1*Z + 31)>>5;
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[552 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[552 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[564 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[564 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[576 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[576 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[588 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[588 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[600 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[600 + i]);
}
for (i=0;i<M;i++) {
((__m256i*)bnProcBufRes)[612 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[612 + i]);
}
// Process group with 7 CNs
// Process group with 8 CNs
// Process group with 9 CNs
// Process group with 10 CNs
// Process group with 11 CNs
// Process group with 12 CNs
// Process group with 13 CNs
// Process group with 14 CNs
// Process group with 15 CNs
// Process group with 16 CNs
// Process group with 17 CNs
// Process group with 18 CNs
// Process group with 19 CNs
// Process group with 20 CNs
// Process group with 21 CNs
// Process group with 22 CNs
// Process group with <23 CNs
// Process group with 24 CNs
// Process group with 25 CNs
// Process group with 26 CNs
// Process group with 27 CNs
// Process group with 28 CNs
// Process group with 29 CNs
// Process group with 30 CNs
}
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*!\file nrLDPC_init_mem.h
* \brief Defines the function to initialize the LDPC decoder and sets correct LUTs.
* \author Sebastian Wagner (TCL Communications) Email: <mailto:sebastian.wagner@tcl.com>
* \date 07-12-2018
* \version 1.0
* \note
* \warning
*/
#ifndef __NR_LDPC_INIT_MEM__H__
#define __NR_LDPC_INIT_MEM__H__
#include <stdlib.h>
#include "nrLDPC_types.h"
#ifndef malloc32_clear
/**
\brief Allocates 32 byte aligned memory and initializes to zero
\param size Input size in bytes
\return Pointer to memory
*/
static inline void* malloc32_clear(size_t size)
{
void* ptr = (void*) memalign(64, size+64);
memset(ptr, 0, size);
return ptr;
}
#endif
/**
\brief Allocates and initializes the internal decoder processing buffers
\param p_decParams Pointer to decoder parameters
\param p_lut Pointer to decoder LUTs
\return Number of LLR values
*/
static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void)
{
t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc32_clear(sizeof(t_nrLDPC_procBuf));
if (p_procBuf)
{
p_procBuf->cnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->cnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->llrRes = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
p_procBuf->llrProcBuf = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
}
return(p_procBuf);
}
static inline void nrLDPC_free_mem(t_nrLDPC_procBuf* p_procBuf)
{
free(p_procBuf->cnProcBuf);
free(p_procBuf->cnProcBufRes);
free(p_procBuf->bnProcBuf);
free(p_procBuf->bnProcBufRes);
free(p_procBuf->llrRes);
free(p_procBuf->llrProcBuf);
free(p_procBuf);
}
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment