Commit 976f2101 authored by Robert Schmidt's avatar Robert Schmidt

Merge remote-tracking branch 'origin/error-simd-intrinsic-name' into integration_2025_w06 (!3239)

Remove inexistant SIMD instruction

develop branch doesn't compile with AVX2 only. For instance,
mm_loadi_epi32() doesn't exist in SSE, only in the AVX512 family,
despite it being a 128 bit vector size instruction.

See also:
https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_mm_loadi_epi32()

This was masked by using SIMDE.
parents 8c398256 43b98124
......@@ -517,7 +517,7 @@ void nr_layer_precoder_simd(const int n_layers,
prec_weight.r = pmi_pdu->weights[nl][ant].precoder_weight_Re;
prec_weight.i = pmi_pdu->weights[nl][ant].precoder_weight_Im;
const simde__m256i x = simde_mm256_loadu_epi32(&txdataF_res_mapped[nl][symbol][sc]);
const simde__m256i x = simde_mm256_loadu_si256(&txdataF_res_mapped[nl][symbol][sc]);
// Rearrange precoding matrix weight to match complex multiplication and broadcast it to match SIMD size
const simde__m256i w_c = simde_mm256_set1_epi32(c16toI32(c16conj(prec_weight))); // broadcast conjugate of w
......@@ -561,7 +561,7 @@ void nr_layer_precoder_simd(const int n_layers,
prec_weight.r = pmi_pdu->weights[nl][ant].precoder_weight_Re;
prec_weight.i = pmi_pdu->weights[nl][ant].precoder_weight_Im;
const simde__m128i x = simde_mm_loadu_epi32(&txdataF_res_mapped[nl][symbol][sc]);
const simde__m128i x = simde_mm_loadu_si128(&txdataF_res_mapped[nl][symbol][sc]);
// Rearrange precoding matrix weight to match complex multiplication and broadcast it to match SIMD size
const simde__m128i w_c = simde_mm_set1_epi32(c16toI32(c16conj(prec_weight))); // broadcast conjugate of w
......
......@@ -401,8 +401,8 @@ static __attribute__((always_inline)) inline void mult_complex_vectors(const c16
int i;
// do 8 multiplications at a time
for (i = 0; i < size - 7; i += 8) {
const simde__m256i i1 = simde_mm256_loadu_epi32((simde__m256i *)(in1 + i));
const simde__m256i i2 = simde_mm256_loadu_epi32((simde__m256i *)(in2 + i));
const simde__m256i i1 = simde_mm256_loadu_si256((simde__m256i *)(in1 + i));
const simde__m256i i2 = simde_mm256_loadu_si256((simde__m256i *)(in2 + i));
const simde__m256i i2swap = simde_mm256_shuffle_epi8(i2, complex_shuffle256);
const simde__m256i i2conj = simde_mm256_sign_epi16(i2, conj256);
const simde__m256i re = simde_mm256_madd_epi16(i1, i2conj);
......@@ -412,8 +412,8 @@ static __attribute__((always_inline)) inline void mult_complex_vectors(const c16
simde_mm256_blend_epi16(simde_mm256_srai_epi32(re, shift), simde_mm256_slli_epi32(im, 16 - shift), 0xAA));
}
if (size - i > 4) {
const simde__m128i i1 = simde_mm_loadu_epi32((simde__m128i *)(in1 + i));
const simde__m128i i2 = simde_mm_loadu_epi32((simde__m128i *)(in2 + i));
const simde__m128i i1 = simde_mm_loadu_si128((simde__m128i *)(in1 + i));
const simde__m128i i2 = simde_mm_loadu_si128((simde__m128i *)(in2 + i));
const simde__m128i i2swap = simde_mm_shuffle_epi8(i2, *(simde__m128i *)&complex_shuffle256);
const simde__m128i i2conj = simde_mm_sign_epi16(i2, *(simde__m128i *)&conj256);
const simde__m128i re = simde_mm_madd_epi16(i1, i2conj);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment