Commit a62641f9 authored by Laurent THOMAS's avatar Laurent THOMAS Committed by Robert Schmidt

remove race condition on norm128 global

parent a7bc2823
...@@ -1322,16 +1322,13 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR) ...@@ -1322,16 +1322,13 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
{ {
simde__m256i* p_llrOut = (simde__m256i*) llrOut; simde__m256i* p_llrOut = (simde__m256i*) llrOut;
simde__m256i* p_out = (simde__m256i*) out; simde__m256i* p_out = (simde__m256i*) out;
int8_t* p_llrOut8; const uint32_t M = numLLR>>5;
int8_t* p_out8; const uint32_t Mr = numLLR&31;
uint32_t i;
uint32_t M = numLLR>>5;
uint32_t Mr = numLLR&31;
const simde__m256i* p_zeros = (simde__m256i*) zeros256_epi8; const simde__m256i* p_zeros = (simde__m256i*) zeros256_epi8;
const simde__m256i* p_ones = (simde__m256i*) ones256_epi8; const simde__m256i* p_ones = (simde__m256i*) ones256_epi8;
for (int i = 0; i < M; i++) { for (uint32_t i = 0; i < M; i++) {
*p_out++ = simde_mm256_and_si256(*p_ones, simde_mm256_cmpgt_epi8(*p_zeros, *p_llrOut)); *p_out++ = simde_mm256_and_si256(*p_ones, simde_mm256_cmpgt_epi8(*p_zeros, *p_llrOut));
p_llrOut++; p_llrOut++;
} }
...@@ -1340,7 +1337,7 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR) ...@@ -1340,7 +1337,7 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
int8_t* p_llrOut8 = (int8_t*)p_llrOut; int8_t* p_llrOut8 = (int8_t*)p_llrOut;
int8_t* p_out8 = (int8_t*)p_out; int8_t* p_out8 = (int8_t*)p_out;
for (int i = 0; i < Mr; i++) for (uint32_t i = 0; i < Mr; i++)
p_out8[i] = p_llrOut8[i] < 0; p_out8[i] = p_llrOut8[i] < 0;
} }
...@@ -1357,7 +1354,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu ...@@ -1357,7 +1354,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu
{ {
/** Vector of indices for shuffling input */ /** Vector of indices for shuffling input */
const uint8_t constShuffle_256_epi8[32] __attribute__ ((aligned(32))) = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}; const uint8_t constShuffle_256_epi8[32] __attribute__ ((aligned(32))) = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
const __m256i* p_shuffle = (__m256i*)constShuffle_256_epi8; const simde__m256i* p_shuffle = (simde__m256i*) constShuffle_256_epi8;
simde__m256i* p_llrOut = (simde__m256i*) llrOut; simde__m256i* p_llrOut = (simde__m256i*) llrOut;
uint32_t* p_bits = (uint32_t*) out; uint32_t* p_bits = (uint32_t*) out;
...@@ -1366,7 +1363,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu ...@@ -1366,7 +1363,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu
for (uint32_t i = 0; i < M; i++) { for (uint32_t i = 0; i < M; i++) {
// Move LSB to MSB on 8 bits // Move LSB to MSB on 8 bits
const __m256i inPerm = simde_mm256_shuffle_epi8(*p_llrOut, *p_shuffle); const simde__m256i inPerm = simde_mm256_shuffle_epi8(*p_llrOut, *p_shuffle);
// Hard decision // Hard decision
*p_bits++ = simde_mm256_movemask_epi8(inPerm); *p_bits++ = simde_mm256_movemask_epi8(inPerm);
p_llrOut++; p_llrOut++;
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#ifndef __NR_LDPC_CNPROC__H__ #ifndef __NR_LDPC_CNPROC__H__
#define __NR_LDPC_CNPROC__H__ #define __NR_LDPC_CNPROC__H__
#include <simde/x86/avx512.h>
#define conditional_negate(a, b, z) simde_mm512_mask_sub_epi8(a, simde_mm512_movepi8_mask(b), z, a) #define conditional_negate(a, b, z) simde_mm512_mask_sub_epi8(a, simde_mm512_movepi8_mask(b), z, a)
static inline void nrLDPC_cnProc_BG2_AVX512(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) static inline void nrLDPC_cnProc_BG2_AVX512(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z)
{ {
......
...@@ -24,8 +24,8 @@ ...@@ -24,8 +24,8 @@
#include <stdint.h> #include <stdint.h>
#include "../../nrLDPCdecoder_defs.h" #include "../../nrLDPCdecoder_defs.h"
#define AVOID_MM256_SIGN 1 //#define AVOID_MM256_SIGN 1
#define DROP_MAXLLR 1 //#define DROP_MAXLLR 1
void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
{ {
const char *ratestr[3]={"13","23","89"}; const char *ratestr[3]={"13","23","89"};
...@@ -214,7 +214,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -214,7 +214,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
...@@ -289,7 +289,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -289,7 +289,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
...@@ -364,7 +364,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -364,7 +364,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
...@@ -442,7 +442,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -442,7 +442,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
...@@ -521,7 +521,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -521,7 +521,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
...@@ -600,7 +600,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -600,7 +600,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
...@@ -680,7 +680,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -680,7 +680,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
...@@ -714,7 +714,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -714,7 +714,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864},
{0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}}; {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}};
if (lut_numCnInCnGroups[8] > 0) if (lut_numCnInCnGroups[8] > 0)
{ {
// Number of groups of 32 CNs for parallel processing // Number of groups of 32 CNs for parallel processing
...@@ -763,7 +763,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R) ...@@ -763,7 +763,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// sgn = simde_mm256_sign_epi8(sgn, ymm0); // sgn = simde_mm256_sign_epi8(sgn, ymm0);
#ifndef AVOID_MM256_SIGN #ifndef AVOID_MM256_SIGN
fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_sign_epi8(sgn, ymm0);\n");
#else #else
fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n"); fprintf(fd," sgn = simde_mm256_xor_si256(sgn, ymm0);\n");
#endif #endif
......
...@@ -43,7 +43,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(const char *dir, int R) ...@@ -43,7 +43,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(const char *dir, int R)
abort(); abort();
} }
fprintf(fd, "#define conditional_negate(a,b,z) simde_mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n"); //fprintf(fd, "#define conditional_negate(a,b,z) simde_mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
fprintf(fd, "static inline void nrLDPC_cnProc_BG2_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n", ratestr[R]); fprintf(fd, "static inline void nrLDPC_cnProc_BG2_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n", ratestr[R]);
const uint8_t *lut_numCnInCnGroups; const uint8_t *lut_numCnInCnGroups;
......
...@@ -33,6 +33,12 @@ ...@@ -33,6 +33,12 @@
#include "PHY/CODING/nrSmallBlock/nr_small_block_defs.h" #include "PHY/CODING/nrSmallBlock/nr_small_block_defs.h"
#include "assertions.h" #include "assertions.h"
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
#if defined(__AVX512F__)
#include <simde/x86/avx512.h>
// simde current version missed this instruction
#define simde_mm512_reduce_add_epi32 _mm512_reduce_add_epi32
#define simde_mm512_cvtepi8_epi32 _mm512_cvtepi8_epi32
#endif
//#define DEBUG_DECODESMALLBLOCK //#define DEBUG_DECODESMALLBLOCK
......
...@@ -267,7 +267,7 @@ static void nr_processDLSegment(void *arg) ...@@ -267,7 +267,7 @@ static void nr_processDLSegment(void *arg)
//Saturate coded bits before decoding into 8 bits values //Saturate coded bits before decoding into 8 bits values
simde__m128i *pv = (simde__m128i*)&z; simde__m128i *pv = (simde__m128i*)&z;
simde__m128i *pl = (simde__m128i*)&l; simde__m128i *pl = (simde__m128i*)&l;
for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) { for (int i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) {
pl[j] = simde_mm_packs_epi16(pv[i],pv[i+1]); pl[j] = simde_mm_packs_epi16(pv[i],pv[i+1]);
} }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment