Commit a62641f9 authored by Laurent THOMAS's avatar Laurent THOMAS Committed by Robert Schmidt

remove race condition on norm128 global

parent a7bc2823
......@@ -1322,16 +1322,13 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
{
simde__m256i* p_llrOut = (simde__m256i*) llrOut;
simde__m256i* p_out = (simde__m256i*) out;
int8_t* p_llrOut8;
int8_t* p_out8;
uint32_t i;
uint32_t M = numLLR>>5;
uint32_t Mr = numLLR&31;
const uint32_t M = numLLR>>5;
const uint32_t Mr = numLLR&31;
const simde__m256i* p_zeros = (simde__m256i*) zeros256_epi8;
const simde__m256i* p_ones = (simde__m256i*) ones256_epi8;
for (int i = 0; i < M; i++) {
for (uint32_t i = 0; i < M; i++) {
*p_out++ = simde_mm256_and_si256(*p_ones, simde_mm256_cmpgt_epi8(*p_zeros, *p_llrOut));
p_llrOut++;
}
......@@ -1340,7 +1337,7 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
int8_t* p_llrOut8 = (int8_t*)p_llrOut;
int8_t* p_out8 = (int8_t*)p_out;
for (int i = 0; i < Mr; i++)
for (uint32_t i = 0; i < Mr; i++)
p_out8[i] = p_llrOut8[i] < 0;
}
......@@ -1357,7 +1354,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu
{
/** Vector of indices for shuffling input */
const uint8_t constShuffle_256_epi8[32] __attribute__ ((aligned(32))) = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
const __m256i* p_shuffle = (__m256i*)constShuffle_256_epi8;
const simde__m256i* p_shuffle = (simde__m256i*) constShuffle_256_epi8;
simde__m256i* p_llrOut = (simde__m256i*) llrOut;
uint32_t* p_bits = (uint32_t*) out;
......@@ -1366,7 +1363,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu
for (uint32_t i = 0; i < M; i++) {
// Move LSB to MSB on 8 bits
const __m256i inPerm = simde_mm256_shuffle_epi8(*p_llrOut, *p_shuffle);
const simde__m256i inPerm = simde_mm256_shuffle_epi8(*p_llrOut, *p_shuffle);
// Hard decision
*p_bits++ = simde_mm256_movemask_epi8(inPerm);
p_llrOut++;
......
......@@ -31,7 +31,7 @@
#ifndef __NR_LDPC_CNPROC__H__
#define __NR_LDPC_CNPROC__H__
#include <simde/x86/avx512.h>
#define conditional_negate(a, b, z) simde_mm512_mask_sub_epi8(a, simde_mm512_movepi8_mask(b), z, a)
static inline void nrLDPC_cnProc_BG2_AVX512(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z)
{
......
......@@ -24,8 +24,8 @@
#include <stdint.h>
#include "../../nrLDPCdecoder_defs.h"
#define AVOID_MM256_SIGN 1
#define DROP_MAXLLR 1
//#define AVOID_MM256_SIGN 1
//#define DROP_MAXLLR 1
void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
{
const char *ratestr[3]={"13","23","89"};
......@@ -214,7 +214,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......@@ -289,7 +289,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......@@ -364,7 +364,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......@@ -442,7 +442,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......@@ -521,7 +521,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......@@ -600,7 +600,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......@@ -680,7 +680,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......@@ -714,7 +714,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}};
if (lut_numCnInCnGroups[8] > 0)
{
// Number of groups of 32 CNs for parallel processing
......@@ -763,7 +763,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif
......
......@@ -43,7 +43,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(const char *dir, int R)
abort();
}
fprintf(fd, "#define conditional_negate(a,b,z) simde_mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
//fprintf(fd, "#define conditional_negate(a,b,z) simde_mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
fprintf(fd, "static inline void nrLDPC_cnProc_BG2_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n", ratestr[R]);
const uint8_t *lut_numCnInCnGroups;
......
......@@ -33,6 +33,12 @@
#include "PHY/CODING/nrSmallBlock/nr_small_block_defs.h"
#include "assertions.h"
#include "PHY/sse_intrin.h"
#if defined(__AVX512F__)
#include <simde/x86/avx512.h>
// simde current version missed this instruction
#define simde_mm512_reduce_add_epi32 _mm512_reduce_add_epi32
#define simde_mm512_cvtepi8_epi32 _mm512_cvtepi8_epi32
#endif
//#define DEBUG_DECODESMALLBLOCK
......
......@@ -267,7 +267,7 @@ static void nr_processDLSegment(void *arg)
//Saturate coded bits before decoding into 8 bits values
simde__m128i *pv = (simde__m128i*)&z;
simde__m128i *pl = (simde__m128i*)&l;
for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) {
for (int i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) {
pl[j] = simde_mm_packs_epi16(pv[i],pv[i+1]);
}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment