Merge remote-tracking branch 'origin/error-simd-intrinsic-name' into integration_2025_w06 (!3239)

Remove inexistant SIMD instruction develop branch doesn't compile with AVX2 only. For instance, mm_loadi_epi32() doesn't exist in SSE, only in the AVX512 family, despite it being a 128 bit vector size instruction. See also: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_mm_loadi_epi32() This was masked by using SIMDE.

Merge remote-tracking branch 'origin/error-simd-intrinsic-name' into integration_2025_w06 (!3239)
Remove inexistant SIMD instruction develop branch doesn't compile with AVX2 only. For instance, mm_loadi_epi32() doesn't exist in SSE, only in the AVX512 family, despite it being a 128 bit vector size instruction. See also: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_mm_loadi_epi32() This was masked by using SIMDE.
976f2101 · Robert Schmidt · 8c398256 · 43b98124 · 976f2101 · 976f2101
Commit 976f2101 authored Feb 07, 2025 by Robert Schmidt
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

openair1/PHY/MODULATION/nr_modulation.c openair1/PHY/MODULATION/nr_modulation.c +2 -2

openair1/PHY/TOOLS/tools_defs.h openair1/PHY/TOOLS/tools_defs.h +4 -4

No files found.
--- a/openair1/PHY/MODULATION/nr_modulation.c
+++ b/openair1/PHY/MODULATION/nr_modulation.c
@@ -517,7 +517,7 @@ void nr_layer_precoder_simd(const int n_layers,
      prec_weight.r = pmi_pdu->weights[nl][ant].precoder_weight_Re;
      prec_weight.i = pmi_pdu->weights[nl][ant].precoder_weight_Im;

-      const simde__m256i x = simde_mm256_loadu_epi32(&txdataF_res_mapped[nl][symbol][sc]);
+      const simde__m256i x = simde_mm256_loadu_si256(&txdataF_res_mapped[nl][symbol][sc]);

      // Rearrange precoding matrix weight to match complex multiplication and broadcast it to match SIMD size
      const simde__m256i w_c   = simde_mm256_set1_epi32(c16toI32(c16conj(prec_weight)));   // broadcast conjugate of w
@@ -561,7 +561,7 @@ void nr_layer_precoder_simd(const int n_layers,
      prec_weight.r = pmi_pdu->weights[nl][ant].precoder_weight_Re;
      prec_weight.i = pmi_pdu->weights[nl][ant].precoder_weight_Im;

-      const simde__m128i x = simde_mm_loadu_epi32(&txdataF_res_mapped[nl][symbol][sc]);
+      const simde__m128i x = simde_mm_loadu_si128(&txdataF_res_mapped[nl][symbol][sc]);

      // Rearrange precoding matrix weight to match complex multiplication and broadcast it to match SIMD size
      const simde__m128i w_c   = simde_mm_set1_epi32(c16toI32(c16conj(prec_weight)));   // broadcast conjugate of w

--- a/openair1/PHY/TOOLS/tools_defs.h
+++ b/openair1/PHY/TOOLS/tools_defs.h
@@ -401,8 +401,8 @@ static __attribute__((always_inline)) inline void mult_complex_vectors(const c16
  int i;
  // do 8 multiplications at a time
  for (i = 0; i < size - 7; i += 8) {
-    const simde__m256i i1 = simde_mm256_loadu_epi32((simde__m256i *)(in1 + i));
-    const simde__m256i i2 = simde_mm256_loadu_epi32((simde__m256i *)(in2 + i));
+    const simde__m256i i1 = simde_mm256_loadu_si256((simde__m256i *)(in1 + i));
+    const simde__m256i i2 = simde_mm256_loadu_si256((simde__m256i *)(in2 + i));
    const simde__m256i i2swap = simde_mm256_shuffle_epi8(i2, complex_shuffle256);
    const simde__m256i i2conj = simde_mm256_sign_epi16(i2, conj256);
    const simde__m256i re = simde_mm256_madd_epi16(i1, i2conj);
@@ -412,8 +412,8 @@ static __attribute__((always_inline)) inline void mult_complex_vectors(const c16
        simde_mm256_blend_epi16(simde_mm256_srai_epi32(re, shift), simde_mm256_slli_epi32(im, 16 - shift), 0xAA));
  }
  if (size - i > 4) {
-    const simde__m128i i1 = simde_mm_loadu_epi32((simde__m128i *)(in1 + i));
-    const simde__m128i i2 = simde_mm_loadu_epi32((simde__m128i *)(in2 + i));
+    const simde__m128i i1 = simde_mm_loadu_si128((simde__m128i *)(in1 + i));
+    const simde__m128i i2 = simde_mm_loadu_si128((simde__m128i *)(in2 + i));
    const simde__m128i i2swap = simde_mm_shuffle_epi8(i2, *(simde__m128i *)&complex_shuffle256);
    const simde__m128i i2conj = simde_mm_sign_epi16(i2, *(simde__m128i *)&conj256);
    const simde__m128i re = simde_mm_madd_epi16(i1, i2conj);