Commit b4647988 authored by Quency's avatar Quency Committed by Quency Lin

misc / Reorder inlines and add architecture guard

parent 3ce1f651
...@@ -574,10 +574,10 @@ void nr_ulsch_qpsk_qpsk(c16_t *stream0_in, c16_t *stream1_in, c16_t *stream0_out ...@@ -574,10 +574,10 @@ void nr_ulsch_qpsk_qpsk(c16_t *stream0_in, c16_t *stream1_in, c16_t *stream0_out
} }
static const int16_t ones[8] __attribute__((aligned(16))) = {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff};
static const int16_t ones256[16] __attribute__((aligned(32))) = {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, #ifdef USE_128BIT
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff};
static const int16_t ones[8] __attribute__((aligned(16))) = {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff};
// calculate interference magnitude // calculate interference magnitude
// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 // tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4
...@@ -606,6 +606,77 @@ static inline simde__m128i interference_abs_64qam_epi16(simde__m128i psi, ...@@ -606,6 +606,77 @@ static inline simde__m128i interference_abs_64qam_epi16(simde__m128i psi,
return simde_mm_or_si128(tmp_result, tmp_result3); return simde_mm_or_si128(tmp_result, tmp_result3);
} }
// Calculates psi_a = psi_r * a_r + psi_i * a_i
static inline simde__m128i prodsum_psi_a_epi16(simde__m128i psi_r, simde__m128i a_r, simde__m128i psi_i, simde__m128i a_i)
{
simde__m128i tmp_result = simde_mm_mulhi_epi16(psi_r, a_r);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
simde__m128i tmp_result2 = simde_mm_mulhi_epi16(psi_i, a_i);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
return simde_mm_adds_epi16(tmp_result, tmp_result2);
}
// Calculate interference magnitude
static inline simde__m128i interference_abs_epi16(simde__m128i psi, simde__m128i int_ch_mag, simde__m128i c1, simde__m128i c2)
{
simde__m128i tmp_result = simde_mm_cmplt_epi16(psi, int_ch_mag);
simde__m128i tmp_result2 = simde_mm_xor_si128(tmp_result, (*(simde__m128i *)&ones[0]));
tmp_result = simde_mm_and_si128(tmp_result, c1);
tmp_result2 = simde_mm_and_si128(tmp_result2, c2);
return simde_mm_or_si128(tmp_result, tmp_result2);
}
// Calculates a_sq = int_ch_mag * (a_r^2 + a_i^2) * scale_factor
static inline simde__m128i square_a_epi16(simde__m128i a_r, simde__m128i a_i, simde__m128i int_ch_mag, simde__m128i scale_factor)
{
simde__m128i tmp_result = simde_mm_mulhi_epi16(a_r, a_r);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
tmp_result = simde_mm_mulhi_epi16(tmp_result, scale_factor);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
tmp_result = simde_mm_mulhi_epi16(tmp_result, int_ch_mag);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
simde__m128i tmp_result2 = simde_mm_mulhi_epi16(a_i, a_i);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, scale_factor);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, int_ch_mag);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
return simde_mm_adds_epi16(tmp_result, tmp_result2);
}
// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
static inline simde__m128i square_a_64qam_epi16(simde__m128i a_r, simde__m128i a_i, simde__m128i int_ch_mag, simde__m128i scale_factor)
{
simde__m128i tmp_result = simde_mm_mulhi_epi16(a_r, a_r);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
tmp_result = simde_mm_mulhi_epi16(tmp_result, scale_factor);
tmp_result = simde_mm_slli_epi16(tmp_result, 3);
tmp_result = simde_mm_mulhi_epi16(tmp_result, int_ch_mag);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
simde__m128i tmp_result2 = simde_mm_mulhi_epi16(a_i, a_i);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, scale_factor);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 3);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, int_ch_mag);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
return simde_mm_adds_epi16(tmp_result, tmp_result2);
}
simde__m128i max_epi16(simde__m128i m0, simde__m128i m1, simde__m128i m2, simde__m128i m3, simde__m128i m4, simde__m128i m5, simde__m128i m6, simde__m128i m7)
{
simde__m128i a0 = simde_mm_max_epi16(m0, m1);
simde__m128i a1 = simde_mm_max_epi16(m2, m3);
simde__m128i a2 = simde_mm_max_epi16(m4, m5);
simde__m128i a3 = simde_mm_max_epi16(m6, m7);
simde__m128i b0 = simde_mm_max_epi16(a0, a1);
simde__m128i b1 = simde_mm_max_epi16(a2, a3);
return simde_mm_max_epi16(b0, b1);
}
#else
static const int16_t ones256[16] __attribute__((aligned(32))) = {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff};
// calculate interference magnitude // calculate interference magnitude
// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 // tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4
...@@ -634,16 +705,6 @@ static inline simde__m256i interference_abs_64qam_epi16_256(simde__m256i psi, ...@@ -634,16 +705,6 @@ static inline simde__m256i interference_abs_64qam_epi16_256(simde__m256i psi,
return simde_mm256_or_si256(tmp_result, tmp_result3); return simde_mm256_or_si256(tmp_result, tmp_result3);
} }
// Calculates psi_a = psi_r * a_r + psi_i * a_i
static inline simde__m128i prodsum_psi_a_epi16(simde__m128i psi_r, simde__m128i a_r, simde__m128i psi_i, simde__m128i a_i)
{
simde__m128i tmp_result = simde_mm_mulhi_epi16(psi_r, a_r);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
simde__m128i tmp_result2 = simde_mm_mulhi_epi16(psi_i, a_i);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
return simde_mm_adds_epi16(tmp_result, tmp_result2);
}
// calculates psi_a = psi_r*a_r + psi_i*a_i // calculates psi_a = psi_r*a_r + psi_i*a_i
static inline simde__m256i prodsum_psi_a_epi16_256(simde__m256i psi_r, simde__m256i a_r, simde__m256i psi_i, simde__m256i a_i) static inline simde__m256i prodsum_psi_a_epi16_256(simde__m256i psi_r, simde__m256i a_r, simde__m256i psi_i, simde__m256i a_i)
{ {
...@@ -654,16 +715,6 @@ static inline simde__m256i prodsum_psi_a_epi16_256(simde__m256i psi_r, simde__m2 ...@@ -654,16 +715,6 @@ static inline simde__m256i prodsum_psi_a_epi16_256(simde__m256i psi_r, simde__m2
return simde_mm256_adds_epi16(tmp_result, tmp_result2); return simde_mm256_adds_epi16(tmp_result, tmp_result2);
} }
// Calculate interference magnitude
static inline simde__m128i interference_abs_epi16(simde__m128i psi, simde__m128i int_ch_mag, simde__m128i c1, simde__m128i c2)
{
simde__m128i tmp_result = simde_mm_cmplt_epi16(psi, int_ch_mag);
simde__m128i tmp_result2 = simde_mm_xor_si128(tmp_result, (*(simde__m128i *)&ones[0]));
tmp_result = simde_mm_and_si128(tmp_result, c1);
tmp_result2 = simde_mm_and_si128(tmp_result2, c2);
return simde_mm_or_si128(tmp_result, tmp_result2);
}
// Calculate interference magnitude // Calculate interference magnitude
static inline simde__m256i interference_abs_epi16_256(simde__m256i psi, simde__m256i int_ch_mag, simde__m256i c1, simde__m256i c2) static inline simde__m256i interference_abs_epi16_256(simde__m256i psi, simde__m256i int_ch_mag, simde__m256i c1, simde__m256i c2)
{ {
...@@ -674,24 +725,6 @@ static inline simde__m256i interference_abs_epi16_256(simde__m256i psi, simde__m ...@@ -674,24 +725,6 @@ static inline simde__m256i interference_abs_epi16_256(simde__m256i psi, simde__m
return simde_mm256_or_si256(tmp_result, tmp_result2); return simde_mm256_or_si256(tmp_result, tmp_result2);
} }
// Calculates a_sq = int_ch_mag * (a_r^2 + a_i^2) * scale_factor
static inline simde__m128i square_a_epi16(simde__m128i a_r, simde__m128i a_i, simde__m128i int_ch_mag, simde__m128i scale_factor)
{
simde__m128i tmp_result = simde_mm_mulhi_epi16(a_r, a_r);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
tmp_result = simde_mm_mulhi_epi16(tmp_result, scale_factor);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
tmp_result = simde_mm_mulhi_epi16(tmp_result, int_ch_mag);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
simde__m128i tmp_result2 = simde_mm_mulhi_epi16(a_i, a_i);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, scale_factor);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, int_ch_mag);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
return simde_mm_adds_epi16(tmp_result, tmp_result2);
}
// Calculates a_sq = int_ch_mag * (a_r^2 + a_i^2) * scale_factor // Calculates a_sq = int_ch_mag * (a_r^2 + a_i^2) * scale_factor
static inline simde__m256i square_a_epi16_256(simde__m256i a_r, simde__m256i a_i, simde__m256i int_ch_mag, simde__m256i scale_factor) static inline simde__m256i square_a_epi16_256(simde__m256i a_r, simde__m256i a_i, simde__m256i int_ch_mag, simde__m256i scale_factor)
{ {
...@@ -710,25 +743,6 @@ static inline simde__m256i square_a_epi16_256(simde__m256i a_r, simde__m256i a_i ...@@ -710,25 +743,6 @@ static inline simde__m256i square_a_epi16_256(simde__m256i a_r, simde__m256i a_i
return simde_mm256_adds_epi16(tmp_result, tmp_result2); return simde_mm256_adds_epi16(tmp_result, tmp_result2);
} }
// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
static inline simde__m128i square_a_64qam_epi16(simde__m128i a_r, simde__m128i a_i, simde__m128i int_ch_mag, simde__m128i scale_factor)
{
simde__m128i tmp_result = simde_mm_mulhi_epi16(a_r, a_r);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
tmp_result = simde_mm_mulhi_epi16(tmp_result, scale_factor);
tmp_result = simde_mm_slli_epi16(tmp_result, 3);
tmp_result = simde_mm_mulhi_epi16(tmp_result, int_ch_mag);
tmp_result = simde_mm_slli_epi16(tmp_result, 1);
simde__m128i tmp_result2 = simde_mm_mulhi_epi16(a_i, a_i);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, scale_factor);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 3);
tmp_result2 = simde_mm_mulhi_epi16(tmp_result2, int_ch_mag);
tmp_result2 = simde_mm_slli_epi16(tmp_result2, 1);
return simde_mm_adds_epi16(tmp_result, tmp_result2);
}
// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM // calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
static inline simde__m256i square_a_64qam_epi16_256(simde__m256i a_r, simde__m256i a_i, simde__m256i int_ch_mag, simde__m256i scale_factor) static inline simde__m256i square_a_64qam_epi16_256(simde__m256i a_r, simde__m256i a_i, simde__m256i int_ch_mag, simde__m256i scale_factor)
{ {
...@@ -747,17 +761,6 @@ static inline simde__m256i square_a_64qam_epi16_256(simde__m256i a_r, simde__m25 ...@@ -747,17 +761,6 @@ static inline simde__m256i square_a_64qam_epi16_256(simde__m256i a_r, simde__m25
return simde_mm256_adds_epi16(tmp_result, tmp_result2); return simde_mm256_adds_epi16(tmp_result, tmp_result2);
} }
simde__m128i max_epi16(simde__m128i m0, simde__m128i m1, simde__m128i m2, simde__m128i m3, simde__m128i m4, simde__m128i m5, simde__m128i m6, simde__m128i m7)
{
simde__m128i a0 = simde_mm_max_epi16(m0, m1);
simde__m128i a1 = simde_mm_max_epi16(m2, m3);
simde__m128i a2 = simde_mm_max_epi16(m4, m5);
simde__m128i a3 = simde_mm_max_epi16(m6, m7);
simde__m128i b0 = simde_mm_max_epi16(a0, a1);
simde__m128i b1 = simde_mm_max_epi16(a2, a3);
return simde_mm_max_epi16(b0, b1);
}
simde__m256i max_epi16_256(simde__m256i m0, simde__m256i m1, simde__m256i m2, simde__m256i m3, simde__m256i m4, simde__m256i m5, simde__m256i m6, simde__m256i m7) simde__m256i max_epi16_256(simde__m256i m0, simde__m256i m1, simde__m256i m2, simde__m256i m3, simde__m256i m4, simde__m256i m5, simde__m256i m6, simde__m256i m7)
{ {
simde__m256i a0 = simde_mm256_max_epi16(m0, m1); simde__m256i a0 = simde_mm256_max_epi16(m0, m1);
...@@ -769,6 +772,8 @@ simde__m256i max_epi16_256(simde__m256i m0, simde__m256i m1, simde__m256i m2, si ...@@ -769,6 +772,8 @@ simde__m256i max_epi16_256(simde__m256i m0, simde__m256i m1, simde__m256i m2, si
return simde_mm256_max_epi16(b0, b1); return simde_mm256_max_epi16(b0, b1);
} }
#endif
/* /*
* This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are * This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are
* 16QAM. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers. * 16QAM. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment