remove race condition on norm128 global

a62641f9 · Laurent THOMAS · Robert Schmidt · a7bc2823 · a62641f9 · a62641f9
Commit a62641f9 authored Sep 13, 2023 by Laurent THOMAS Committed by Robert Schmidt Sep 13, 2023
8 changed files
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_bnProc.h
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_bnProc.h
@@ -1322,16 +1322,13 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
 {
    simde__m256i* p_llrOut = (simde__m256i*) llrOut;
    simde__m256i* p_out    = (simde__m256i*) out;
-    int8_t* p_llrOut8;
+    const uint32_t M  = numLLR>>5;
-    int8_t* p_out8;
+    const uint32_t Mr = numLLR&31;
-    uint32_t i;
-    uint32_t M  = numLLR>>5;
-    uint32_t Mr = numLLR&31;
    const simde__m256i* p_zeros = (simde__m256i*) zeros256_epi8;
    const simde__m256i* p_ones  = (simde__m256i*) ones256_epi8;
-  for (int i = 0; i < M; i++) {
+  for (uint32_t i = 0; i < M; i++) {
    *p_out++ = simde_mm256_and_si256(*p_ones, simde_mm256_cmpgt_epi8(*p_zeros, *p_llrOut));
    p_llrOut++;
  }
@@ -1340,7 +1337,7 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
  int8_t* p_llrOut8 = (int8_t*)p_llrOut;
  int8_t* p_out8 = (int8_t*)p_out;
-  for (int i = 0; i < Mr; i++)
+  for (uint32_t i = 0; i < Mr; i++)
    p_out8[i] = p_llrOut8[i] < 0;
 }
@@ -1357,7 +1354,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu
 {
    /** Vector of indices for shuffling input */
    const uint8_t constShuffle_256_epi8[32] __attribute__ ((aligned(32))) = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
-    const __m256i* p_shuffle = (__m256i*)constShuffle_256_epi8;
+    const simde__m256i* p_shuffle = (simde__m256i*) constShuffle_256_epi8;
    simde__m256i*  p_llrOut = (simde__m256i*)  llrOut;
    uint32_t* p_bits   = (uint32_t*) out;
@@ -1366,7 +1363,7 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu
    for (uint32_t i = 0; i < M; i++) {
      // Move LSB to MSB on 8 bits
-      const __m256i inPerm = simde_mm256_shuffle_epi8(*p_llrOut, *p_shuffle);
+      const simde__m256i inPerm = simde_mm256_shuffle_epi8(*p_llrOut, *p_shuffle);
      // Hard decision
      *p_bits++ = simde_mm256_movemask_epi8(inPerm);
      p_llrOut++;

--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_cnProc_avx512.h
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_cnProc_avx512.h
@@ -31,7 +31,7 @@
 #ifndef __NR_LDPC_CNPROC__H__
 #define __NR_LDPC_CNPROC__H__
+#include <simde/x86/avx512.h>
 #define conditional_negate(a, b, z) simde_mm512_mask_sub_epi8(a, simde_mm512_movepi8_mask(b), z, a)
 static inline void nrLDPC_cnProc_BG2_AVX512(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z)
 {

--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG1_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG1_avx2.c
@@ -24,8 +24,8 @@
 #include <stdint.h>
 #include "../../nrLDPCdecoder_defs.h"
-#define AVOID_MM256_SIGN 1
+//#define AVOID_MM256_SIGN 1
-#define DROP_MAXLLR 1
+//#define DROP_MAXLLR 1
 void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 {
  const char *ratestr[3]={"13","23","89"};
@@ -214,7 +214,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif
@@ -289,7 +289,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif
@@ -364,7 +364,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif
@@ -442,7 +442,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif
@@ -521,7 +521,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif
@@ -600,7 +600,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif
@@ -680,7 +680,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif
@@ -714,7 +714,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 					     {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864},
 					     {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}};
  if (lut_numCnInCnGroups[8] > 0)
    {
      // Number of groups of 32 CNs for parallel processing
@@ -763,7 +763,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 #ifndef AVOID_MM256_SIGN
-  	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 #else
 	      fprintf(fd,"                sgn  = simde_mm256_xor_si256(sgn, ymm0);\n");
 #endif

--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG2_avx512.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG2_avx512.c
@@ -43,7 +43,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(const char *dir, int R)
    abort();
  }
-  fprintf(fd, "#define conditional_negate(a,b,z) simde_mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
+  //fprintf(fd, "#define conditional_negate(a,b,z) simde_mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
  fprintf(fd, "static inline void nrLDPC_cnProc_BG2_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n", ratestr[R]);
  const uint8_t *lut_numCnInCnGroups;

--- a/openair1/PHY/CODING/nrSmallBlock/decodeSmallBlock.c
+++ b/openair1/PHY/CODING/nrSmallBlock/decodeSmallBlock.c
@@ -33,6 +33,12 @@
 #include "PHY/CODING/nrSmallBlock/nr_small_block_defs.h"
 #include "assertions.h"
 #include "PHY/sse_intrin.h"
+#if defined(__AVX512F__)
+#include <simde/x86/avx512.h>
+// simde current version missed this instruction
+#define simde_mm512_reduce_add_epi32 _mm512_reduce_add_epi32
+#define simde_mm512_cvtepi8_epi32 _mm512_cvtepi8_epi32
+#endif
 //#define DEBUG_DECODESMALLBLOCK

--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
@@ -267,7 +267,7 @@ static void nr_processDLSegment(void *arg)
    //Saturate coded bits before decoding into 8 bits values
    simde__m128i *pv = (simde__m128i*)&z;
    simde__m128i *pl = (simde__m128i*)&l;
-    for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1;  i+=2, j++) {
+    for (int i=0, j=0; j < ((kc*harq_process->Z)>>4)+1;  i+=2, j++) {
      pl[j] = simde_mm_packs_epi16(pv[i],pv[i+1]);
    }

--- a/openair1/PHY/TOOLS/oai_dfts.c
+++ b/openair1/PHY/TOOLS/oai_dfts.c