Commit 0da2c77a authored by Raymond Knopp's avatar Raymond Knopp

Merge branch 'enhancement-10-harmony' of...

Merge branch 'enhancement-10-harmony' of https://gitlab.eurecom.fr/oai/openairinterface5g into enhancement-10-harmony
parents e729036b 8fffd732
...@@ -58,10 +58,10 @@ ...@@ -58,10 +58,10 @@
#define print_ints(s,x) printf("%s %d %d %d %d\n",s,(x)[0],(x)[1],(x)[2],(x)[3]) #define print_ints(s,x) printf("%s %d %d %d %d\n",s,(x)[0],(x)[1],(x)[2],(x)[3])
static int16_t conjugatedft[32] __attribute__((aligned(32))) = {-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1}; const static int16_t conjugatedft[32] __attribute__((aligned(32))) = {-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1};
static int16_t reflip[32] __attribute__((aligned(32))) = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1}; const static int16_t reflip[32] __attribute__((aligned(32))) = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1};
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
static inline void cmac(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline)); static inline void cmac(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
...@@ -474,33 +474,33 @@ static inline int16x8_t packed_cmult2(int16x8_t a,int16x8_t b, int16x8_t b2) ...@@ -474,33 +474,33 @@ static inline int16x8_t packed_cmult2(int16x8_t a,int16x8_t b, int16x8_t b2)
#endif #endif
static int16_t W0s[16]__attribute__((aligned(32))) = {32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0}; const static int16_t W0s[16]__attribute__((aligned(32))) = {32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0};
static int16_t W13s[16]__attribute__((aligned(32))) = {-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378}; const static int16_t W13s[16]__attribute__((aligned(32))) = {-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378};
static int16_t W23s[16]__attribute__((aligned(32))) = {-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378}; const static int16_t W23s[16]__attribute__((aligned(32))) = {-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378};
static int16_t W15s[16]__attribute__((aligned(32))) = {10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163}; const static int16_t W15s[16]__attribute__((aligned(32))) = {10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163};
static int16_t W25s[16]__attribute__((aligned(32))) = {-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260}; const static int16_t W25s[16]__attribute__((aligned(32))) = {-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260};
static int16_t W35s[16]__attribute__((aligned(32))) = {-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260}; const static int16_t W35s[16]__attribute__((aligned(32))) = {-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260};
static int16_t W45s[16]__attribute__((aligned(32))) = {10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163}; const static int16_t W45s[16]__attribute__((aligned(32))) = {10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163};
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i *W0 = (__m128i *)W0s; const __m128i *W0 = (__m128i *)W0s;
__m128i *W13 = (__m128i *)W13s; const __m128i *W13 = (__m128i *)W13s;
__m128i *W23 = (__m128i *)W23s; const __m128i *W23 = (__m128i *)W23s;
__m128i *W15 = (__m128i *)W15s; const __m128i *W15 = (__m128i *)W15s;
__m128i *W25 = (__m128i *)W25s; const __m128i *W25 = (__m128i *)W25s;
__m128i *W35 = (__m128i *)W35s; const __m128i *W35 = (__m128i *)W35s;
__m128i *W45 = (__m128i *)W45s; const __m128i *W45 = (__m128i *)W45s;
#ifdef __AVX2__ #ifdef __AVX2__
__m256i *W0_256 = (__m256i *)W0s; const __m256i *W0_256 = (__m256i *)W0s;
__m256i *W13_256 = (__m256i *)W13s; const __m256i *W13_256 = (__m256i *)W13s;
__m256i *W23_256 = (__m256i *)W23s; const __m256i *W23_256 = (__m256i *)W23s;
__m256i *W15_256 = (__m256i *)W15s; const __m256i *W15_256 = (__m256i *)W15s;
__m256i *W25_256 = (__m256i *)W25s; const __m256i *W25_256 = (__m256i *)W25s;
__m256i *W35_256 = (__m256i *)W35s; const __m256i *W35_256 = (__m256i *)W35s;
__m256i *W45_256 = (__m256i *)W45s; const __m256i *W45_256 = (__m256i *)W45s;
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
...@@ -512,7 +512,7 @@ int16x8_t *W25 = (int16x8_t *)W25s; ...@@ -512,7 +512,7 @@ int16x8_t *W25 = (int16x8_t *)W25s;
int16x8_t *W35 = (int16x8_t *)W35s; int16x8_t *W35 = (int16x8_t *)W35s;
int16x8_t *W45 = (int16x8_t *)W45s; int16x8_t *W45 = (int16x8_t *)W45s;
#endif #endif
static int16_t dft_norm_table[16] = {9459, //12 const static int16_t dft_norm_table[16] = {9459, //12
6689,//24 6689,//24
5461,//36 5461,//36
4729,//482 4729,//482
...@@ -528,7 +528,7 @@ static int16_t dft_norm_table[16] = {9459, //12 ...@@ -528,7 +528,7 @@ static int16_t dft_norm_table[16] = {9459, //12
16384,//2, //240 16384,//2, //240
18918,//sqrt(3), // 288 18918,//sqrt(3), // 288
14654 14654
}; //sqrt(5) //300 }; //sqrt(5) //300
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
...@@ -2036,42 +2036,42 @@ static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off) ...@@ -2036,42 +2036,42 @@ static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off)
// 16-point optimized DFT kernel // 16-point optimized DFT kernel
int16_t tw16[24] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273, const static int16_t tw16[24] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,
32767,0,23169,-23170,0 ,-32767,-23170,-23170, 32767,0,23169,-23170,0 ,-32767,-23170,-23170,
32767,0,12539,-30273,-23170,-23170,-30273,12539 32767,0,12539,-30273,-23170,-23170,-30273,12539
}; };
int16_t tw16a[24] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273, const static int16_t tw16a[24] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273,
32767,0,23169,23170,0 ,32767,-23170,23170, 32767,0,23169,23170,0 ,32767,-23170,23170,
32767,0,12539,30273,-23170,23170,-30273,-12539 32767,0,12539,30273,-23170,23170,-30273,-12539
}; };
int16_t tw16b[24] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539, const static int16_t tw16b[24] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539,
0,32767,-23170,23169,-32767,0 ,-23170,-23170, 0,32767,-23170,23169,-32767,0 ,-23170,-23170,
0,32767,-30273,12539,-23170,-23170,12539 ,-30273 0,32767,-30273,12539,-23170,-23170,12539 ,-30273
}; };
int16_t tw16c[24] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539, const static int16_t tw16c[24] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539,
0,32767,23170,23169,32767,0 ,23170 ,-23170, 0,32767,23170,23169,32767,0 ,23170 ,-23170,
0,32767,30273,12539,23170,-23170,-12539,-30273 0,32767,30273,12539,23170,-23170,-12539,-30273
}; };
int16_t tw16rep[48] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,32767,0,30272,-12540,23169 ,-23170,12539 ,-30273, const static int16_t tw16rep[48] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,
32767,0,23169,-23170,0 ,-32767,-23170,-23170,32767,0,23169,-23170,0 ,-32767,-23170,-23170, 32767,0,23169,-23170,0 ,-32767,-23170,-23170,32767,0,23169,-23170,0 ,-32767,-23170,-23170,
32767,0,12539,-30273,-23170,-23170,-30273,12539,32767,0,12539,-30273,-23170,-23170,-30273,12539 32767,0,12539,-30273,-23170,-23170,-30273,12539,32767,0,12539,-30273,-23170,-23170,-30273,12539
}; };
int16_t tw16arep[48] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273,32767,0,30272,12540,23169 ,23170,12539 ,30273, const static int16_t tw16arep[48] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273,32767,0,30272,12540,23169 ,23170,12539 ,30273,
32767,0,23169,23170,0 ,32767,-23170,23170,32767,0,23169,23170,0 ,32767,-23170,23170, 32767,0,23169,23170,0 ,32767,-23170,23170,32767,0,23169,23170,0 ,32767,-23170,23170,
32767,0,12539,30273,-23170,23170,-30273,-12539,32767,0,12539,30273,-23170,23170,-30273,-12539 32767,0,12539,30273,-23170,23170,-30273,-12539,32767,0,12539,30273,-23170,23170,-30273,-12539
}; };
int16_t tw16brep[48] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539,0,32767,-12540,30272,-23170,23169 ,-30273,12539, const static int16_t tw16brep[48] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539,0,32767,-12540,30272,-23170,23169 ,-30273,12539,
0,32767,-23170,23169,-32767,0 ,-23170,-23170,0,32767,-23170,23169,-32767,0 ,-23170,-23170, 0,32767,-23170,23169,-32767,0 ,-23170,-23170,0,32767,-23170,23169,-32767,0 ,-23170,-23170,
0,32767,-30273,12539,-23170,-23170,12539 ,-30273,0,32767,-30273,12539,-23170,-23170,12539 ,-30273 0,32767,-30273,12539,-23170,-23170,12539 ,-30273,0,32767,-30273,12539,-23170,-23170,12539 ,-30273
}; };
int16_t tw16crep[48] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539,0,32767,12540,30272,23170,23169 ,30273 ,12539, const static int16_t tw16crep[48] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539,0,32767,12540,30272,23170,23169 ,30273 ,12539,
0,32767,23170,23169,32767,0 ,23170 ,-23170,0,32767,23170,23169,32767,0 ,23170 ,-23170, 0,32767,23170,23169,32767,0 ,23170 ,-23170,0,32767,23170,23169,32767,0 ,23170 ,-23170,
0,32767,30273,12539,23170,-23170,-12539,-30273,0,32767,30273,12539,23170,-23170,-12539,-30273 0,32767,30273,12539,23170,-23170,-12539,-30273,0,32767,30273,12539,23170,-23170,-12539,-30273
}; };
...@@ -2502,7 +2502,7 @@ static inline void idft16_simd256(int16_t *x,int16_t *y) ...@@ -2502,7 +2502,7 @@ static inline void idft16_simd256(int16_t *x,int16_t *y)
// 64-point optimized DFT // 64-point optimized DFT
int16_t tw64[96] __attribute__((aligned(32))) = { const static int16_t tw64[96] __attribute__((aligned(32))) = {
32767,0,32609,-3212,32137,-6393,31356,-9512, 32767,0,32609,-3212,32137,-6393,31356,-9512,
30272,-12540,28897,-15447,27244,-18205,25329,-20788, 30272,-12540,28897,-15447,27244,-18205,25329,-20788,
23169,-23170,20787,-25330,18204,-27245,15446,-28898, 23169,-23170,20787,-25330,18204,-27245,15446,-28898,
...@@ -2517,7 +2517,7 @@ int16_t tw64[96] __attribute__((aligned(32))) = { ...@@ -2517,7 +2517,7 @@ int16_t tw64[96] __attribute__((aligned(32))) = {
-30273,12539,-25330,20787,-18205,27244,-9512,31356 -30273,12539,-25330,20787,-18205,27244,-9512,31356
}; };
int16_t tw64rep[192] __attribute__((aligned(32))) = { const static int16_t tw64rep[192] __attribute__((aligned(32))) = {
32767,0,32609,-3212,32137,-6393,31356,-9512,32767,0,32609,-3212,32137,-6393,31356,-9512, 32767,0,32609,-3212,32137,-6393,31356,-9512,32767,0,32609,-3212,32137,-6393,31356,-9512,
30272,-12540,28897,-15447,27244,-18205,25329,-20788,30272,-12540,28897,-15447,27244,-18205,25329,-20788, 30272,-12540,28897,-15447,27244,-18205,25329,-20788,30272,-12540,28897,-15447,27244,-18205,25329,-20788,
23169,-23170,20787,-25330,18204,-27245,15446,-28898,23169,-23170,20787,-25330,18204,-27245,15446,-28898, 23169,-23170,20787,-25330,18204,-27245,15446,-28898,23169,-23170,20787,-25330,18204,-27245,15446,-28898,
...@@ -2532,7 +2532,7 @@ int16_t tw64rep[192] __attribute__((aligned(32))) = { ...@@ -2532,7 +2532,7 @@ int16_t tw64rep[192] __attribute__((aligned(32))) = {
-30273,12539,-25330,20787,-18205,27244,-9512,31356,-30273,12539,-25330,20787,-18205,27244,-9512,31356 -30273,12539,-25330,20787,-18205,27244,-9512,31356,-30273,12539,-25330,20787,-18205,27244,-9512,31356
}; };
int16_t tw64a[96] __attribute__((aligned(32))) = { const static int16_t tw64a[96] __attribute__((aligned(32))) = {
32767,0,32609,3212,32137,6393,31356,9512, 32767,0,32609,3212,32137,6393,31356,9512,
30272,12540,28897,15447,27244,18205,25329,20788, 30272,12540,28897,15447,27244,18205,25329,20788,
23169,23170,20787,25330,18204,27245,15446,28898, 23169,23170,20787,25330,18204,27245,15446,28898,
...@@ -2546,7 +2546,7 @@ int16_t tw64a[96] __attribute__((aligned(32))) = { ...@@ -2546,7 +2546,7 @@ int16_t tw64a[96] __attribute__((aligned(32))) = {
-23170,23170,-28898,15447,-32138,6393,-32610,-3211, -23170,23170,-28898,15447,-32138,6393,-32610,-3211,
-30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356 -30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356
}; };
int16_t tw64arep[192] __attribute__((aligned(32))) = { const static int16_t tw64arep[192] __attribute__((aligned(32))) = {
32767,0,32609,3212,32137,6393,31356,9512,32767,0,32609,3212,32137,6393,31356,9512, 32767,0,32609,3212,32137,6393,31356,9512,32767,0,32609,3212,32137,6393,31356,9512,
30272,12540,28897,15447,27244,18205,25329,20788,30272,12540,28897,15447,27244,18205,25329,20788, 30272,12540,28897,15447,27244,18205,25329,20788,30272,12540,28897,15447,27244,18205,25329,20788,
23169,23170,20787,25330,18204,27245,15446,28898,23169,23170,20787,25330,18204,27245,15446,28898, 23169,23170,20787,25330,18204,27245,15446,28898,23169,23170,20787,25330,18204,27245,15446,28898,
...@@ -2561,7 +2561,7 @@ int16_t tw64arep[192] __attribute__((aligned(32))) = { ...@@ -2561,7 +2561,7 @@ int16_t tw64arep[192] __attribute__((aligned(32))) = {
-30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356,-30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356 -30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356,-30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356
}; };
int16_t tw64b[96] __attribute__((aligned(32))) = { const static int16_t tw64b[96] __attribute__((aligned(32))) = {
0,32767,-3212,32609,-6393,32137,-9512,31356, 0,32767,-3212,32609,-6393,32137,-9512,31356,
-12540,30272,-15447,28897,-18205,27244,-20788,25329, -12540,30272,-15447,28897,-18205,27244,-20788,25329,
-23170,23169,-25330,20787,-27245,18204,-28898,15446, -23170,23169,-25330,20787,-27245,18204,-28898,15446,
...@@ -2576,7 +2576,7 @@ int16_t tw64b[96] __attribute__((aligned(32))) = { ...@@ -2576,7 +2576,7 @@ int16_t tw64b[96] __attribute__((aligned(32))) = {
12539,-30273,20787,-25330,27244,-18205,31356,-9512 12539,-30273,20787,-25330,27244,-18205,31356,-9512
}; };
int16_t tw64brep[192] __attribute__((aligned(32))) = { const static int16_t tw64brep[192] __attribute__((aligned(32))) = {
0,32767,-3212,32609,-6393,32137,-9512,31356,0,32767,-3212,32609,-6393,32137,-9512,31356, 0,32767,-3212,32609,-6393,32137,-9512,31356,0,32767,-3212,32609,-6393,32137,-9512,31356,
-12540,30272,-15447,28897,-18205,27244,-20788,25329,-12540,30272,-15447,28897,-18205,27244,-20788,25329, -12540,30272,-15447,28897,-18205,27244,-20788,25329,-12540,30272,-15447,28897,-18205,27244,-20788,25329,
-23170,23169,-25330,20787,-27245,18204,-28898,15446,-23170,23169,-25330,20787,-27245,18204,-28898,15446, -23170,23169,-25330,20787,-27245,18204,-28898,15446,-23170,23169,-25330,20787,-27245,18204,-28898,15446,
...@@ -2591,7 +2591,7 @@ int16_t tw64brep[192] __attribute__((aligned(32))) = { ...@@ -2591,7 +2591,7 @@ int16_t tw64brep[192] __attribute__((aligned(32))) = {
12539,-30273,20787,-25330,27244,-18205,31356,-9512,12539,-30273,20787,-25330,27244,-18205,31356,-9512 12539,-30273,20787,-25330,27244,-18205,31356,-9512,12539,-30273,20787,-25330,27244,-18205,31356,-9512
}; };
int16_t tw64c[96] __attribute__((aligned(32))) = { const static int16_t tw64c[96] __attribute__((aligned(32))) = {
0,32767,3212,32609,6393,32137,9512,31356, 0,32767,3212,32609,6393,32137,9512,31356,
12540,30272,15447,28897,18205,27244,20788,25329, 12540,30272,15447,28897,18205,27244,20788,25329,
23170,23169,25330,20787,27245,18204,28898,15446, 23170,23169,25330,20787,27245,18204,28898,15446,
...@@ -2606,7 +2606,7 @@ int16_t tw64c[96] __attribute__((aligned(32))) = { ...@@ -2606,7 +2606,7 @@ int16_t tw64c[96] __attribute__((aligned(32))) = {
-12539,-30273,-20787,-25330,-27244,-18205,-31356,-9512 -12539,-30273,-20787,-25330,-27244,-18205,-31356,-9512
}; };
int16_t tw64crep[192] __attribute__((aligned(32))) = { const static int16_t tw64crep[192] __attribute__((aligned(32))) = {
0,32767,3212,32609,6393,32137,9512,31356,0,32767,3212,32609,6393,32137,9512,31356, 0,32767,3212,32609,6393,32137,9512,31356,0,32767,3212,32609,6393,32137,9512,31356,
12540,30272,15447,28897,18205,27244,20788,25329,12540,30272,15447,28897,18205,27244,20788,25329, 12540,30272,15447,28897,18205,27244,20788,25329,12540,30272,15447,28897,18205,27244,20788,25329,
23170,23169,25330,20787,27245,18204,28898,15446,23170,23169,25330,20787,27245,18204,28898,15446, 23170,23169,25330,20787,27245,18204,28898,15446,23170,23169,25330,20787,27245,18204,28898,15446,
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment