Commit 2c666a8e authored by Hongzhi Wang's avatar Hongzhi Wang Committed by laurent

testing on x86

parent 8e833302
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
#ifndef __CRC_H__ #ifndef __CRC_H__
#define __CRC_H__ #define __CRC_H__
#include "PHY/sse_intrin.h"
#include "crcext.h" #include "crcext.h"
#include "types.h" #include "types.h"
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
...@@ -307,10 +306,17 @@ simde__m128i crc32_folding_round(const simde__m128i data_block, ...@@ -307,10 +306,17 @@ simde__m128i crc32_folding_round(const simde__m128i data_block,
const simde__m128i k1_k2, const simde__m128i k1_k2,
const simde__m128i fold) const simde__m128i fold)
{ {
#ifdef __x86_64__
__m128i tmp = _mm_clmulepi64_si128(fold, k1_k2, 0x11);
return _mm_xor_si128(_mm_clmulepi64_si128(fold, k1_k2, 0x00),
_mm_xor_si128(data_block, tmp));
#else
simde__m128i tmp = simde_mm_clmulepi64_si128(fold, k1_k2, 0x11); simde__m128i tmp = simde_mm_clmulepi64_si128(fold, k1_k2, 0x11);
return simde_mm_xor_si128(simde_mm_clmulepi64_si128(fold, k1_k2, 0x00), return simde_mm_xor_si128(simde_mm_clmulepi64_si128(fold, k1_k2, 0x00),
simde_mm_xor_si128(data_block, tmp)); simde_mm_xor_si128(data_block, tmp));
#endif
} }
/** /**
...@@ -326,12 +332,20 @@ simde__m128i crc32_reduce_128_to_64(simde__m128i data128, const simde__m128i k3_ ...@@ -326,12 +332,20 @@ simde__m128i crc32_reduce_128_to_64(simde__m128i data128, const simde__m128i k3_
{ {
simde__m128i tmp; simde__m128i tmp;
tmp = simde_mm_xor_si128(simde_mm_clmulepi64_si128(data128, k3_q, 0x01 /* k3 */), #ifdef __x86_64__
tmp = simde_mm_xor_si128(_mm_clmulepi64_si128(data128, k3_q, 0x01 /* k3 */),
data128); data128);
data128 = simde_mm_xor_si128(simde_mm_clmulepi64_si128(tmp, k3_q, 0x01 /* k3 */), data128 = simde_mm_xor_si128(_mm_clmulepi64_si128(tmp, k3_q, 0x01 /* k3 */),
data128); data128);
#else
tmp = _mm_xor_si128(_mm_clmulepi64_si128(data128, k3_q, 0x01 /* k3 */),
data128);
data128 = _mm_xor_si128(_mm_clmulepi64_si128(tmp, k3_q, 0x01 /* k3 */),
data128);
#endif
return simde_mm_srli_si128(simde_mm_slli_si128(data128, 8), 8); return simde_mm_srli_si128(simde_mm_slli_si128(data128, 8), 8);
} }
...@@ -349,11 +363,18 @@ uint32_t ...@@ -349,11 +363,18 @@ uint32_t
crc32_reduce_64_to_32(simde__m128i fold, const simde__m128i k3_q, const simde__m128i p_res) crc32_reduce_64_to_32(simde__m128i fold, const simde__m128i k3_q, const simde__m128i p_res)
{ {
simde__m128i temp; simde__m128i temp;
#ifdef __x86_64__
temp = _mm_clmulepi64_si128(simde_mm_srli_si128(fold, 4),
k3_q, 0x10 /* Q */);
temp = simde_mm_srli_si128(simde_mm_xor_si128(temp, fold), 4);
temp = _mm_clmulepi64_si128(temp, p_res, 0 /* P */);
#else
temp = simde_mm_clmulepi64_si128(simde_mm_srli_si128(fold, 4), temp = simde_mm_clmulepi64_si128(simde_mm_srli_si128(fold, 4),
k3_q, 0x10 /* Q */); k3_q, 0x10 /* Q */);
temp = simde_mm_srli_si128(simde_mm_xor_si128(temp, fold), 4); temp = simde_mm_srli_si128(simde_mm_xor_si128(temp, fold), 4);
temp = simde_mm_clmulepi64_si128(temp, p_res, 0 /* P */); temp = simde_mm_clmulepi64_si128(temp, p_res, 0 /* P */);
#endif
return simde_mm_extract_epi32(simde_mm_xor_si128(temp, fold), 0); return simde_mm_extract_epi32(simde_mm_xor_si128(temp, fold), 0);
} }
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
#include "nrLDPC_bnProc.h" #include "nrLDPC_bnProc.h"
#define UNROLL_CN_PROC 1 #define UNROLL_CN_PROC 1
#define UNROLL_BN_PROC 1 #define UNROLL_BN_PROC 1
#define UNROLL_BN_PROC_PC 1 //#define UNROLL_BN_PROC_PC 1
#define UNROLL_BN2CN_PROC 1 #define UNROLL_BN2CN_PROC 1
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
| cn processing files -->AVX512 | cn processing files -->AVX512
......
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=128, byte encoding // generated code for Zc=128, byte encoding
static inline void ldpc_BG2_Zc128_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc128_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc128_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc128_byte(uint8_t *c,uint8_t *d) {
d2[328]=simde_mm_xor_si128(c2[2407],simde_mm_xor_si128(c2[2247],simde_mm_xor_si128(c2[1921],simde_mm_xor_si128(c2[964],simde_mm_xor_si128(c2[1782],simde_mm_xor_si128(c2[1622],simde_mm_xor_si128(c2[1142],simde_mm_xor_si128(c2[2103],simde_mm_xor_si128(c2[176],simde_mm_xor_si128(c2[356],simde_mm_xor_si128(c2[196],simde_mm_xor_si128(c2[2434],simde_mm_xor_si128(c2[49],simde_mm_xor_si128(c2[2448],simde_mm_xor_si128(c2[1809],simde_mm_xor_si128(c2[210],simde_mm_xor_si128(c2[391],simde_mm_xor_si128(c2[1506],simde_mm_xor_si128(c2[545],simde_mm_xor_si128(c2[407],simde_mm_xor_si128(c2[2165],simde_mm_xor_si128(c2[886],simde_mm_xor_si128(c2[579],simde_mm_xor_si128(c2[419],simde_mm_xor_si128(c2[1221],simde_mm_xor_si128(c2[1060],simde_mm_xor_si128(c2[273],simde_mm_xor_si128(c2[1874],simde_mm_xor_si128(c2[933],simde_mm_xor_si128(c2[934],simde_mm_xor_si128(c2[1248],simde_mm_xor_si128(c2[628],simde_mm_xor_si128(c2[468],simde_mm_xor_si128(c2[311],c2[951])))))))))))))))))))))))))))))))))); d2[328]=simde_mm_xor_si128(c2[2407],simde_mm_xor_si128(c2[2247],simde_mm_xor_si128(c2[1921],simde_mm_xor_si128(c2[964],simde_mm_xor_si128(c2[1782],simde_mm_xor_si128(c2[1622],simde_mm_xor_si128(c2[1142],simde_mm_xor_si128(c2[2103],simde_mm_xor_si128(c2[176],simde_mm_xor_si128(c2[356],simde_mm_xor_si128(c2[196],simde_mm_xor_si128(c2[2434],simde_mm_xor_si128(c2[49],simde_mm_xor_si128(c2[2448],simde_mm_xor_si128(c2[1809],simde_mm_xor_si128(c2[210],simde_mm_xor_si128(c2[391],simde_mm_xor_si128(c2[1506],simde_mm_xor_si128(c2[545],simde_mm_xor_si128(c2[407],simde_mm_xor_si128(c2[2165],simde_mm_xor_si128(c2[886],simde_mm_xor_si128(c2[579],simde_mm_xor_si128(c2[419],simde_mm_xor_si128(c2[1221],simde_mm_xor_si128(c2[1060],simde_mm_xor_si128(c2[273],simde_mm_xor_si128(c2[1874],simde_mm_xor_si128(c2[933],simde_mm_xor_si128(c2[934],simde_mm_xor_si128(c2[1248],simde_mm_xor_si128(c2[628],simde_mm_xor_si128(c2[468],simde_mm_xor_si128(c2[311],c2[951]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=160, byte encoding // generated code for Zc=160, byte encoding
static inline void ldpc_BG2_Zc160_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc160_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc160_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc160_byte(uint8_t *c,uint8_t *d) {
d2[410]=simde_mm_xor_si128(c2[1403],simde_mm_xor_si128(c2[1203],simde_mm_xor_si128(c2[3001],simde_mm_xor_si128(c2[2004],simde_mm_xor_si128(c2[1423],simde_mm_xor_si128(c2[1223],simde_mm_xor_si128(c2[829],simde_mm_xor_si128(c2[2425],simde_mm_xor_si128(c2[20],simde_mm_xor_si128(c2[1443],simde_mm_xor_si128(c2[1243],simde_mm_xor_si128(c2[1044],simde_mm_xor_si128(c2[1463],simde_mm_xor_si128(c2[1263],simde_mm_xor_si128(c2[461],simde_mm_xor_si128(c2[1869],simde_mm_xor_si128(c2[1283],simde_mm_xor_si128(c2[3083],simde_mm_xor_si128(c2[2489],simde_mm_xor_si128(c2[1303],simde_mm_xor_si128(c2[1702],simde_mm_xor_si128(c2[500],simde_mm_xor_si128(c2[1523],simde_mm_xor_si128(c2[1323],simde_mm_xor_si128(c2[2928],simde_mm_xor_si128(c2[524],simde_mm_xor_si128(c2[1343],simde_mm_xor_si128(c2[3146],simde_mm_xor_si128(c2[1363],simde_mm_xor_si128(c2[560],simde_mm_xor_si128(c2[2366],simde_mm_xor_si128(c2[1583],simde_mm_xor_si128(c2[1383],simde_mm_xor_si128(c2[2786],c2[986])))))))))))))))))))))))))))))))))); d2[410]=simde_mm_xor_si128(c2[1403],simde_mm_xor_si128(c2[1203],simde_mm_xor_si128(c2[3001],simde_mm_xor_si128(c2[2004],simde_mm_xor_si128(c2[1423],simde_mm_xor_si128(c2[1223],simde_mm_xor_si128(c2[829],simde_mm_xor_si128(c2[2425],simde_mm_xor_si128(c2[20],simde_mm_xor_si128(c2[1443],simde_mm_xor_si128(c2[1243],simde_mm_xor_si128(c2[1044],simde_mm_xor_si128(c2[1463],simde_mm_xor_si128(c2[1263],simde_mm_xor_si128(c2[461],simde_mm_xor_si128(c2[1869],simde_mm_xor_si128(c2[1283],simde_mm_xor_si128(c2[3083],simde_mm_xor_si128(c2[2489],simde_mm_xor_si128(c2[1303],simde_mm_xor_si128(c2[1702],simde_mm_xor_si128(c2[500],simde_mm_xor_si128(c2[1523],simde_mm_xor_si128(c2[1323],simde_mm_xor_si128(c2[2928],simde_mm_xor_si128(c2[524],simde_mm_xor_si128(c2[1343],simde_mm_xor_si128(c2[3146],simde_mm_xor_si128(c2[1363],simde_mm_xor_si128(c2[560],simde_mm_xor_si128(c2[2366],simde_mm_xor_si128(c2[1583],simde_mm_xor_si128(c2[1383],simde_mm_xor_si128(c2[2786],c2[986]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=192, byte encoding // generated code for Zc=192, byte encoding
static inline void ldpc_BG2_Zc192_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc192_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc192_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc192_byte(uint8_t *c,uint8_t *d) {
d2[492]=simde_mm_xor_si128(c2[2402],simde_mm_xor_si128(c2[2162],simde_mm_xor_si128(c2[1445],simde_mm_xor_si128(c2[965],simde_mm_xor_si128(c2[3153],simde_mm_xor_si128(c2[2913],simde_mm_xor_si128(c2[3154],simde_mm_xor_si128(c2[744],simde_mm_xor_si128(c2[753],simde_mm_xor_si128(c2[530],simde_mm_xor_si128(c2[290],simde_mm_xor_si128(c2[2450],simde_mm_xor_si128(c2[3439],simde_mm_xor_si128(c2[3199],simde_mm_xor_si128(c2[3677],simde_mm_xor_si128(c2[83],simde_mm_xor_si128(c2[2742],simde_mm_xor_si128(c2[2266],simde_mm_xor_si128(c2[2986],simde_mm_xor_si128(c2[1809],simde_mm_xor_si128(c2[848],simde_mm_xor_si128(c2[1800],simde_mm_xor_si128(c2[872],simde_mm_xor_si128(c2[632],simde_mm_xor_si128(c2[2549],simde_mm_xor_si128(c2[1595],simde_mm_xor_si128(c2[1611],simde_mm_xor_si128(c2[419],simde_mm_xor_si128(c2[1155],simde_mm_xor_si128(c2[3322],simde_mm_xor_si128(c2[1877],simde_mm_xor_si128(c2[2138],simde_mm_xor_si128(c2[1898],simde_mm_xor_si128(c2[3579],c2[1419])))))))))))))))))))))))))))))))))); d2[492]=simde_mm_xor_si128(c2[2402],simde_mm_xor_si128(c2[2162],simde_mm_xor_si128(c2[1445],simde_mm_xor_si128(c2[965],simde_mm_xor_si128(c2[3153],simde_mm_xor_si128(c2[2913],simde_mm_xor_si128(c2[3154],simde_mm_xor_si128(c2[744],simde_mm_xor_si128(c2[753],simde_mm_xor_si128(c2[530],simde_mm_xor_si128(c2[290],simde_mm_xor_si128(c2[2450],simde_mm_xor_si128(c2[3439],simde_mm_xor_si128(c2[3199],simde_mm_xor_si128(c2[3677],simde_mm_xor_si128(c2[83],simde_mm_xor_si128(c2[2742],simde_mm_xor_si128(c2[2266],simde_mm_xor_si128(c2[2986],simde_mm_xor_si128(c2[1809],simde_mm_xor_si128(c2[848],simde_mm_xor_si128(c2[1800],simde_mm_xor_si128(c2[872],simde_mm_xor_si128(c2[632],simde_mm_xor_si128(c2[2549],simde_mm_xor_si128(c2[1595],simde_mm_xor_si128(c2[1611],simde_mm_xor_si128(c2[419],simde_mm_xor_si128(c2[1155],simde_mm_xor_si128(c2[3322],simde_mm_xor_si128(c2[1877],simde_mm_xor_si128(c2[2138],simde_mm_xor_si128(c2[1898],simde_mm_xor_si128(c2[3579],c2[1419]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=224, byte encoding // generated code for Zc=224, byte encoding
static inline void ldpc_BG2_Zc224_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc224_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc224_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc224_byte(uint8_t *c,uint8_t *d) {
d2[574]=simde_mm_xor_si128(c2[2529],simde_mm_xor_si128(c2[2809],simde_mm_xor_si128(c2[1968],simde_mm_xor_si128(c2[2800],simde_mm_xor_si128(c2[4239],simde_mm_xor_si128(c2[40],simde_mm_xor_si128(c2[1441],simde_mm_xor_si128(c2[3110],simde_mm_xor_si128(c2[2275],simde_mm_xor_si128(c2[2302],simde_mm_xor_si128(c2[2582],simde_mm_xor_si128(c2[2301],simde_mm_xor_si128(c2[1766],simde_mm_xor_si128(c2[2046],simde_mm_xor_si128(c2[4004],simde_mm_xor_si128(c2[92],simde_mm_xor_si128(c2[1524],simde_mm_xor_si128(c2[395],simde_mm_xor_si128(c2[1799],simde_mm_xor_si128(c2[4060],simde_mm_xor_si128(c2[4072],simde_mm_xor_si128(c2[1546],simde_mm_xor_si128(c2[179],simde_mm_xor_si128(c2[459],simde_mm_xor_si128(c2[1580],simde_mm_xor_si128(c2[4100],simde_mm_xor_si128(c2[2999],simde_mm_xor_si128(c2[208],simde_mm_xor_si128(c2[4430],simde_mm_xor_si128(c2[3874],simde_mm_xor_si128(c2[4144],simde_mm_xor_si128(c2[2777],simde_mm_xor_si128(c2[3057],simde_mm_xor_si128(c2[539],c2[2225])))))))))))))))))))))))))))))))))); d2[574]=simde_mm_xor_si128(c2[2529],simde_mm_xor_si128(c2[2809],simde_mm_xor_si128(c2[1968],simde_mm_xor_si128(c2[2800],simde_mm_xor_si128(c2[4239],simde_mm_xor_si128(c2[40],simde_mm_xor_si128(c2[1441],simde_mm_xor_si128(c2[3110],simde_mm_xor_si128(c2[2275],simde_mm_xor_si128(c2[2302],simde_mm_xor_si128(c2[2582],simde_mm_xor_si128(c2[2301],simde_mm_xor_si128(c2[1766],simde_mm_xor_si128(c2[2046],simde_mm_xor_si128(c2[4004],simde_mm_xor_si128(c2[92],simde_mm_xor_si128(c2[1524],simde_mm_xor_si128(c2[395],simde_mm_xor_si128(c2[1799],simde_mm_xor_si128(c2[4060],simde_mm_xor_si128(c2[4072],simde_mm_xor_si128(c2[1546],simde_mm_xor_si128(c2[179],simde_mm_xor_si128(c2[459],simde_mm_xor_si128(c2[1580],simde_mm_xor_si128(c2[4100],simde_mm_xor_si128(c2[2999],simde_mm_xor_si128(c2[208],simde_mm_xor_si128(c2[4430],simde_mm_xor_si128(c2[3874],simde_mm_xor_si128(c2[4144],simde_mm_xor_si128(c2[2777],simde_mm_xor_si128(c2[3057],simde_mm_xor_si128(c2[539],c2[2225]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=256, byte encoding // generated code for Zc=256, byte encoding
static inline void ldpc_BG2_Zc256_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc256_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc256_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc256_byte(uint8_t *c,uint8_t *d) {
d2[656]=simde_mm_xor_si128(c2[4807],simde_mm_xor_si128(c2[4487],simde_mm_xor_si128(c2[3841],simde_mm_xor_si128(c2[1932],simde_mm_xor_si128(c2[3566],simde_mm_xor_si128(c2[3246],simde_mm_xor_si128(c2[2286],simde_mm_xor_si128(c2[4199],simde_mm_xor_si128(c2[360],simde_mm_xor_si128(c2[708],simde_mm_xor_si128(c2[388],simde_mm_xor_si128(c2[4874],simde_mm_xor_si128(c2[105],simde_mm_xor_si128(c2[4904],simde_mm_xor_si128(c2[3617],simde_mm_xor_si128(c2[426],simde_mm_xor_si128(c2[775],simde_mm_xor_si128(c2[3018],simde_mm_xor_si128(c2[1089],simde_mm_xor_si128(c2[815],simde_mm_xor_si128(c2[4333],simde_mm_xor_si128(c2[1774],simde_mm_xor_si128(c2[1155],simde_mm_xor_si128(c2[835],simde_mm_xor_si128(c2[2437],simde_mm_xor_si128(c2[2116],simde_mm_xor_si128(c2[545],simde_mm_xor_si128(c2[3754],simde_mm_xor_si128(c2[1861],simde_mm_xor_si128(c2[1862],simde_mm_xor_si128(c2[2504],simde_mm_xor_si128(c2[1252],simde_mm_xor_si128(c2[932],simde_mm_xor_si128(c2[615],c2[1903])))))))))))))))))))))))))))))))))); d2[656]=simde_mm_xor_si128(c2[4807],simde_mm_xor_si128(c2[4487],simde_mm_xor_si128(c2[3841],simde_mm_xor_si128(c2[1932],simde_mm_xor_si128(c2[3566],simde_mm_xor_si128(c2[3246],simde_mm_xor_si128(c2[2286],simde_mm_xor_si128(c2[4199],simde_mm_xor_si128(c2[360],simde_mm_xor_si128(c2[708],simde_mm_xor_si128(c2[388],simde_mm_xor_si128(c2[4874],simde_mm_xor_si128(c2[105],simde_mm_xor_si128(c2[4904],simde_mm_xor_si128(c2[3617],simde_mm_xor_si128(c2[426],simde_mm_xor_si128(c2[775],simde_mm_xor_si128(c2[3018],simde_mm_xor_si128(c2[1089],simde_mm_xor_si128(c2[815],simde_mm_xor_si128(c2[4333],simde_mm_xor_si128(c2[1774],simde_mm_xor_si128(c2[1155],simde_mm_xor_si128(c2[835],simde_mm_xor_si128(c2[2437],simde_mm_xor_si128(c2[2116],simde_mm_xor_si128(c2[545],simde_mm_xor_si128(c2[3754],simde_mm_xor_si128(c2[1861],simde_mm_xor_si128(c2[1862],simde_mm_xor_si128(c2[2504],simde_mm_xor_si128(c2[1252],simde_mm_xor_si128(c2[932],simde_mm_xor_si128(c2[615],c2[1903]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=288, byte encoding // generated code for Zc=288, byte encoding
static inline void ldpc_BG2_Zc288_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc288_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc288_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc288_byte(uint8_t *c,uint8_t *d) {
d2[738]=simde_mm_xor_si128(c2[2161],simde_mm_xor_si128(c2[1801],simde_mm_xor_si128(c2[1802],simde_mm_xor_si128(c2[367],simde_mm_xor_si128(c2[4718],simde_mm_xor_si128(c2[4358],simde_mm_xor_si128(c2[4363],simde_mm_xor_si128(c2[3644],simde_mm_xor_si128(c2[39],simde_mm_xor_si128(c2[2956],simde_mm_xor_si128(c2[2596],simde_mm_xor_si128(c2[4040],simde_mm_xor_si128(c2[2271],simde_mm_xor_si128(c2[1911],simde_mm_xor_si128(c2[115],simde_mm_xor_si128(c2[5154],simde_mm_xor_si128(c2[3751],simde_mm_xor_si128(c2[152],simde_mm_xor_si128(c2[3026],simde_mm_xor_si128(c2[1625],simde_mm_xor_si128(c2[2341],simde_mm_xor_si128(c2[5582],simde_mm_xor_si128(c2[2384],simde_mm_xor_si128(c2[2024],simde_mm_xor_si128(c2[2382],simde_mm_xor_si128(c2[4181],simde_mm_xor_si128(c2[1699],simde_mm_xor_si128(c2[1336],simde_mm_xor_si128(c2[2813],simde_mm_xor_si128(c2[656],simde_mm_xor_si128(c2[1017],simde_mm_xor_si128(c2[1053],simde_mm_xor_si128(c2[693],simde_mm_xor_si128(c2[1768],c2[690])))))))))))))))))))))))))))))))))); d2[738]=simde_mm_xor_si128(c2[2161],simde_mm_xor_si128(c2[1801],simde_mm_xor_si128(c2[1802],simde_mm_xor_si128(c2[367],simde_mm_xor_si128(c2[4718],simde_mm_xor_si128(c2[4358],simde_mm_xor_si128(c2[4363],simde_mm_xor_si128(c2[3644],simde_mm_xor_si128(c2[39],simde_mm_xor_si128(c2[2956],simde_mm_xor_si128(c2[2596],simde_mm_xor_si128(c2[4040],simde_mm_xor_si128(c2[2271],simde_mm_xor_si128(c2[1911],simde_mm_xor_si128(c2[115],simde_mm_xor_si128(c2[5154],simde_mm_xor_si128(c2[3751],simde_mm_xor_si128(c2[152],simde_mm_xor_si128(c2[3026],simde_mm_xor_si128(c2[1625],simde_mm_xor_si128(c2[2341],simde_mm_xor_si128(c2[5582],simde_mm_xor_si128(c2[2384],simde_mm_xor_si128(c2[2024],simde_mm_xor_si128(c2[2382],simde_mm_xor_si128(c2[4181],simde_mm_xor_si128(c2[1699],simde_mm_xor_si128(c2[1336],simde_mm_xor_si128(c2[2813],simde_mm_xor_si128(c2[656],simde_mm_xor_si128(c2[1017],simde_mm_xor_si128(c2[1053],simde_mm_xor_si128(c2[693],simde_mm_xor_si128(c2[1768],c2[690]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=320, byte encoding // generated code for Zc=320, byte encoding
static inline void ldpc_BG2_Zc320_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc320_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc320_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc320_byte(uint8_t *c,uint8_t *d) {
d2[820]=simde_mm_xor_si128(c2[2803],simde_mm_xor_si128(c2[2403],simde_mm_xor_si128(c2[6011],simde_mm_xor_si128(c2[4004],simde_mm_xor_si128(c2[2843],simde_mm_xor_si128(c2[2443],simde_mm_xor_si128(c2[1649],simde_mm_xor_si128(c2[4845],simde_mm_xor_si128(c2[40],simde_mm_xor_si128(c2[2883],simde_mm_xor_si128(c2[2483],simde_mm_xor_si128(c2[2084],simde_mm_xor_si128(c2[2923],simde_mm_xor_si128(c2[2523],simde_mm_xor_si128(c2[931],simde_mm_xor_si128(c2[3729],simde_mm_xor_si128(c2[2563],simde_mm_xor_si128(c2[6163],simde_mm_xor_si128(c2[4969],simde_mm_xor_si128(c2[2603],simde_mm_xor_si128(c2[3412],simde_mm_xor_si128(c2[1000],simde_mm_xor_si128(c2[3043],simde_mm_xor_si128(c2[2643],simde_mm_xor_si128(c2[5848],simde_mm_xor_si128(c2[1044],simde_mm_xor_si128(c2[2683],simde_mm_xor_si128(c2[6286],simde_mm_xor_si128(c2[2723],simde_mm_xor_si128(c2[1130],simde_mm_xor_si128(c2[4726],simde_mm_xor_si128(c2[3163],simde_mm_xor_si128(c2[2763],simde_mm_xor_si128(c2[5566],c2[1966])))))))))))))))))))))))))))))))))); d2[820]=simde_mm_xor_si128(c2[2803],simde_mm_xor_si128(c2[2403],simde_mm_xor_si128(c2[6011],simde_mm_xor_si128(c2[4004],simde_mm_xor_si128(c2[2843],simde_mm_xor_si128(c2[2443],simde_mm_xor_si128(c2[1649],simde_mm_xor_si128(c2[4845],simde_mm_xor_si128(c2[40],simde_mm_xor_si128(c2[2883],simde_mm_xor_si128(c2[2483],simde_mm_xor_si128(c2[2084],simde_mm_xor_si128(c2[2923],simde_mm_xor_si128(c2[2523],simde_mm_xor_si128(c2[931],simde_mm_xor_si128(c2[3729],simde_mm_xor_si128(c2[2563],simde_mm_xor_si128(c2[6163],simde_mm_xor_si128(c2[4969],simde_mm_xor_si128(c2[2603],simde_mm_xor_si128(c2[3412],simde_mm_xor_si128(c2[1000],simde_mm_xor_si128(c2[3043],simde_mm_xor_si128(c2[2643],simde_mm_xor_si128(c2[5848],simde_mm_xor_si128(c2[1044],simde_mm_xor_si128(c2[2683],simde_mm_xor_si128(c2[6286],simde_mm_xor_si128(c2[2723],simde_mm_xor_si128(c2[1130],simde_mm_xor_si128(c2[4726],simde_mm_xor_si128(c2[3163],simde_mm_xor_si128(c2[2763],simde_mm_xor_si128(c2[5566],c2[1966]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=352, byte encoding // generated code for Zc=352, byte encoding
static inline void ldpc_BG2_Zc352_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc352_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc352_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc352_byte(uint8_t *c,uint8_t *d) {
d2[902]=simde_mm_xor_si128(c2[1770],simde_mm_xor_si128(c2[1330],simde_mm_xor_si128(c2[3521],simde_mm_xor_si128(c2[3966],simde_mm_xor_si128(c2[3133],simde_mm_xor_si128(c2[2693],simde_mm_xor_si128(c2[6654],simde_mm_xor_si128(c2[5327],simde_mm_xor_si128(c2[1812],simde_mm_xor_si128(c2[2729],simde_mm_xor_si128(c2[2289],simde_mm_xor_si128(c2[2299],simde_mm_xor_si128(c2[4972],simde_mm_xor_si128(c2[4532],simde_mm_xor_si128(c2[3656],simde_mm_xor_si128(c2[1019],simde_mm_xor_si128(c2[2820],simde_mm_xor_si128(c2[4141],simde_mm_xor_si128(c2[2387],simde_mm_xor_si128(c2[3740],simde_mm_xor_si128(c2[2868],simde_mm_xor_si128(c2[2860],simde_mm_xor_si128(c2[267],simde_mm_xor_si128(c2[6866],simde_mm_xor_si128(c2[6427],simde_mm_xor_si128(c2[3785],simde_mm_xor_si128(c2[5590],simde_mm_xor_si128(c2[314],simde_mm_xor_si128(c2[1243],simde_mm_xor_si128(c2[2561],simde_mm_xor_si128(c2[3873],simde_mm_xor_si128(c2[1724],simde_mm_xor_si128(c2[1284],simde_mm_xor_si128(c2[5684],c2[403])))))))))))))))))))))))))))))))))); d2[902]=simde_mm_xor_si128(c2[1770],simde_mm_xor_si128(c2[1330],simde_mm_xor_si128(c2[3521],simde_mm_xor_si128(c2[3966],simde_mm_xor_si128(c2[3133],simde_mm_xor_si128(c2[2693],simde_mm_xor_si128(c2[6654],simde_mm_xor_si128(c2[5327],simde_mm_xor_si128(c2[1812],simde_mm_xor_si128(c2[2729],simde_mm_xor_si128(c2[2289],simde_mm_xor_si128(c2[2299],simde_mm_xor_si128(c2[4972],simde_mm_xor_si128(c2[4532],simde_mm_xor_si128(c2[3656],simde_mm_xor_si128(c2[1019],simde_mm_xor_si128(c2[2820],simde_mm_xor_si128(c2[4141],simde_mm_xor_si128(c2[2387],simde_mm_xor_si128(c2[3740],simde_mm_xor_si128(c2[2868],simde_mm_xor_si128(c2[2860],simde_mm_xor_si128(c2[267],simde_mm_xor_si128(c2[6866],simde_mm_xor_si128(c2[6427],simde_mm_xor_si128(c2[3785],simde_mm_xor_si128(c2[5590],simde_mm_xor_si128(c2[314],simde_mm_xor_si128(c2[1243],simde_mm_xor_si128(c2[2561],simde_mm_xor_si128(c2[3873],simde_mm_xor_si128(c2[1724],simde_mm_xor_si128(c2[1284],simde_mm_xor_si128(c2[5684],c2[403]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=384, byte encoding // generated code for Zc=384, byte encoding
static inline void ldpc_BG2_Zc384_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc384_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc384_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc384_byte(uint8_t *c,uint8_t *d) {
d2[984]=simde_mm_xor_si128(c2[4814],simde_mm_xor_si128(c2[4334],simde_mm_xor_si128(c2[2885],simde_mm_xor_si128(c2[1925],simde_mm_xor_si128(c2[6297],simde_mm_xor_si128(c2[5817],simde_mm_xor_si128(c2[6298],simde_mm_xor_si128(c2[1500],simde_mm_xor_si128(c2[1497],simde_mm_xor_si128(c2[1070],simde_mm_xor_si128(c2[590],simde_mm_xor_si128(c2[4910],simde_mm_xor_si128(c2[6871],simde_mm_xor_si128(c2[6391],simde_mm_xor_si128(c2[7349],simde_mm_xor_si128(c2[155],simde_mm_xor_si128(c2[5478],simde_mm_xor_si128(c2[4522],simde_mm_xor_si128(c2[5962],simde_mm_xor_si128(c2[3609],simde_mm_xor_si128(c2[1688],simde_mm_xor_si128(c2[3600],simde_mm_xor_si128(c2[1736],simde_mm_xor_si128(c2[1256],simde_mm_xor_si128(c2[5093],simde_mm_xor_si128(c2[3179],simde_mm_xor_si128(c2[3231],simde_mm_xor_si128(c2[827],simde_mm_xor_si128(c2[2319],simde_mm_xor_si128(c2[6634],simde_mm_xor_si128(c2[3749],simde_mm_xor_si128(c2[4286],simde_mm_xor_si128(c2[3806],simde_mm_xor_si128(c2[7155],c2[2847])))))))))))))))))))))))))))))))))); d2[984]=simde_mm_xor_si128(c2[4814],simde_mm_xor_si128(c2[4334],simde_mm_xor_si128(c2[2885],simde_mm_xor_si128(c2[1925],simde_mm_xor_si128(c2[6297],simde_mm_xor_si128(c2[5817],simde_mm_xor_si128(c2[6298],simde_mm_xor_si128(c2[1500],simde_mm_xor_si128(c2[1497],simde_mm_xor_si128(c2[1070],simde_mm_xor_si128(c2[590],simde_mm_xor_si128(c2[4910],simde_mm_xor_si128(c2[6871],simde_mm_xor_si128(c2[6391],simde_mm_xor_si128(c2[7349],simde_mm_xor_si128(c2[155],simde_mm_xor_si128(c2[5478],simde_mm_xor_si128(c2[4522],simde_mm_xor_si128(c2[5962],simde_mm_xor_si128(c2[3609],simde_mm_xor_si128(c2[1688],simde_mm_xor_si128(c2[3600],simde_mm_xor_si128(c2[1736],simde_mm_xor_si128(c2[1256],simde_mm_xor_si128(c2[5093],simde_mm_xor_si128(c2[3179],simde_mm_xor_si128(c2[3231],simde_mm_xor_si128(c2[827],simde_mm_xor_si128(c2[2319],simde_mm_xor_si128(c2[6634],simde_mm_xor_si128(c2[3749],simde_mm_xor_si128(c2[4286],simde_mm_xor_si128(c2[3806],simde_mm_xor_si128(c2[7155],c2[2847]))))))))))))))))))))))))))))))))));
} }
} }
#endif
#ifndef __AVX2__
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
// generated code for Zc=96, byte encoding // generated code for Zc=96, byte encoding
static inline void ldpc_BG2_Zc96_byte(uint8_t *c,uint8_t *d) { static inline void ldpc_BG2_Zc96_byte(uint8_t *c,uint8_t *d) {
...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc96_byte(uint8_t *c,uint8_t *d) { ...@@ -137,3 +138,4 @@ static inline void ldpc_BG2_Zc96_byte(uint8_t *c,uint8_t *d) {
d2[246]=simde_mm_xor_si128(c2[1202],simde_mm_xor_si128(c2[1082],simde_mm_xor_si128(c2[725],simde_mm_xor_si128(c2[485],simde_mm_xor_si128(c2[1575],simde_mm_xor_si128(c2[1455],simde_mm_xor_si128(c2[1576],simde_mm_xor_si128(c2[372],simde_mm_xor_si128(c2[375],simde_mm_xor_si128(c2[266],simde_mm_xor_si128(c2[146],simde_mm_xor_si128(c2[1226],simde_mm_xor_si128(c2[1717],simde_mm_xor_si128(c2[1597],simde_mm_xor_si128(c2[1841],simde_mm_xor_si128(c2[41],simde_mm_xor_si128(c2[1368],simde_mm_xor_si128(c2[1132],simde_mm_xor_si128(c2[1492],simde_mm_xor_si128(c2[903],simde_mm_xor_si128(c2[422],simde_mm_xor_si128(c2[900],simde_mm_xor_si128(c2[434],simde_mm_xor_si128(c2[314],simde_mm_xor_si128(c2[1277],simde_mm_xor_si128(c2[797],simde_mm_xor_si128(c2[807],simde_mm_xor_si128(c2[209],simde_mm_xor_si128(c2[579],simde_mm_xor_si128(c2[1660],simde_mm_xor_si128(c2[941],simde_mm_xor_si128(c2[1070],simde_mm_xor_si128(c2[950],simde_mm_xor_si128(c2[1791],c2[711])))))))))))))))))))))))))))))))))); d2[246]=simde_mm_xor_si128(c2[1202],simde_mm_xor_si128(c2[1082],simde_mm_xor_si128(c2[725],simde_mm_xor_si128(c2[485],simde_mm_xor_si128(c2[1575],simde_mm_xor_si128(c2[1455],simde_mm_xor_si128(c2[1576],simde_mm_xor_si128(c2[372],simde_mm_xor_si128(c2[375],simde_mm_xor_si128(c2[266],simde_mm_xor_si128(c2[146],simde_mm_xor_si128(c2[1226],simde_mm_xor_si128(c2[1717],simde_mm_xor_si128(c2[1597],simde_mm_xor_si128(c2[1841],simde_mm_xor_si128(c2[41],simde_mm_xor_si128(c2[1368],simde_mm_xor_si128(c2[1132],simde_mm_xor_si128(c2[1492],simde_mm_xor_si128(c2[903],simde_mm_xor_si128(c2[422],simde_mm_xor_si128(c2[900],simde_mm_xor_si128(c2[434],simde_mm_xor_si128(c2[314],simde_mm_xor_si128(c2[1277],simde_mm_xor_si128(c2[797],simde_mm_xor_si128(c2[807],simde_mm_xor_si128(c2[209],simde_mm_xor_si128(c2[579],simde_mm_xor_si128(c2[1660],simde_mm_xor_si128(c2[941],simde_mm_xor_si128(c2[1070],simde_mm_xor_si128(c2[950],simde_mm_xor_si128(c2[1791],c2[711]))))))))))))))))))))))))))))))))));
} }
} }
#endif
...@@ -49,10 +49,6 @@ ...@@ -49,10 +49,6 @@
#define SSE_INTRIN_H #define SSE_INTRIN_H
#if defined(__x86_64) || defined(__i386__)
/* x86 processors */
#include <simde/x86/mmx.h> #include <simde/x86/mmx.h>
#include <simde/x86/sse.h> #include <simde/x86/sse.h>
#include <simde/x86/sse2.h> #include <simde/x86/sse2.h>
...@@ -62,24 +58,20 @@ ...@@ -62,24 +58,20 @@
#include <simde/x86/sse4.2.h> #include <simde/x86/sse4.2.h>
#include <simde/x86/avx2.h> #include <simde/x86/avx2.h>
#include <simde/x86/fma.h> #include <simde/x86/fma.h>
#include <simde/x86/clmul.h> #if defined(__x86_64) || defined(__i386__)
/* x86 processors */
#if defined(__AVX512BW__) || defined(__AVX512F__) #if defined(__AVX512BW__) || defined(__AVX512F__)
#include <immintrin.h> #include <immintrin.h>
#endif #endif
#elif defined(__arm__) || defined(__aarch64__) #elif defined(__arm__) || defined(__aarch64__)
/* ARM processors */ /* ARM processors */
// note this fails on some x86 machines, with an error like:
#include <simde/x86/mmx.h> // /usr/lib/gcc/x86_64-redhat-linux/8/include/gfniintrin.h:57:1: error: inlining failed in call to always_inline ‘_mm_gf2p8affine_epi64_epi8’: target specific option mismatch
#include <simde/x86/sse.h>
#include <simde/x86/sse2.h>
#include <simde/x86/sse3.h>
#include <simde/x86/ssse3.h>
#include <simde/x86/sse4.1.h>
#include <simde/x86/sse4.2.h>
#include <simde/x86/avx2.h>
#include <simde/x86/fma.h>
#include <simde/x86/clmul.h> #include <simde/x86/clmul.h>
#include <simde/arm/neon.h> #include <simde/arm/neon.h>
#include <stdbool.h> #include <stdbool.h>
#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment