Commit 26b3fcca authored by Sakthivel Velumani's avatar Sakthivel Velumani Committed by francescomani

Remove SIMD operation for DMRS amplitude scaling

There was concern that someone in the future might call the function
with the wrong inputs. So, based on the discussion on the MR below
https://gitlab.eurecom.fr/oai/openairinterface5g/-/merge_requests/2916#note_132506
the SIMD for multiplying DMRS vector with amplitude is removed and is
now done with plain C (no SIMD).

The execution time for the function dmrs_amp_mult() in this commit is
10x higher that the previous commit.
parent 393363e2
......@@ -386,18 +386,17 @@ static void dmrs_amp_mult(const uint32_t dmrs_port,
const uint32_t n_dmrs,
const pusch_dmrs_type_t dmrs_type)
{
/*
A short array that holds amplitude values used for
multiplying with the modulated DMRS vector in 128bit SIMD.
*/
int16_t alpha_dmrs[8] __attribute((aligned(16)));
for (int_fast8_t i = 0; i < sizeofArray(alpha_dmrs) / 2; i++) {
const int16_t a = Wf[i % 2] * Wt * AMP;
alpha_dmrs[2 * i] = a; // multiplier for real part of DMRS symbol
alpha_dmrs[2 * i + 1] = a; // multiplier for img part of DMRS symbol
/* short array that hold amplitude for k_prime = 0 and k_prime = 1 */
int32_t alpha_dmrs[2] __attribute((aligned(16)));
for (int_fast8_t i = 0; i < sizeofArray(alpha_dmrs); i++) {
const int32_t a = Wf[i] * Wt * AMP;
alpha_dmrs[i] = a;
}
/* multiply amplitude with complex DMRS vector */
for (int_fast16_t i = 0; i < n_dmrs; i++) {
mod_dmrs_out[i] = c16mulRealShift(mod_dmrs[i], alpha_dmrs[i % 2], 15);
}
/* multiply mod_dmrs with alpha_dmrs in 4 symbol patches */
mult_real_vector_single_vector(mod_dmrs, alpha_dmrs, mod_dmrs_out, n_dmrs);
}
/*
......
......@@ -24,18 +24,6 @@
#include <simde/simde-common.h>
#include <simde/x86/sse.h>
void mult_real_vector_single_vector(const c16_t *x, const int16_t *alpha, c16_t *y, const unsigned int N)
{
const simd_q15_t *alpha_128 = (const simd_q15_t *)alpha;
const simd_q15_t *x_128 = (const simd_q15_t *)x;
simd_q15_t *y_128 = (simd_q15_t *)y;
const unsigned int num_adds = (N + 3) / 4; // ceil(N/4)
for (uint_fast32_t n = 0; n < num_adds; n++) {
y_128[n] = mulhi_int16(x_128[n], *alpha_128);
}
}
void multadd_complex_vector_real_scalar(int16_t *x,
int16_t alpha,
int16_t *y,
......
......@@ -824,8 +824,6 @@ double interp(double x, double *xs, double *ys, int count);
void simde_mm128_separate_real_imag_parts(simde__m128i *out_re, simde__m128i *out_im, simde__m128i in0, simde__m128i in1);
void simde_mm256_separate_real_imag_parts(simde__m256i *out_re, simde__m256i *out_im, simde__m256i in0, simde__m256i in1);
void mult_real_vector_single_vector(const c16_t *x, const int16_t *alpha, c16_t *y, const unsigned int N);
#ifdef __cplusplus
}
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment