Commit d902d76d authored by laurent's avatar laurent

fix performance issue in simde package

parent bad1c258
......@@ -205,12 +205,12 @@ if(EXISTS "/proc/cpuinfo")
message("AVX512 is ${AVX512}")
message("AVX2 is ${AVX2}")
if ("${AVX512}" STREQUAL "False")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mno-avx512f")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mno-avx512f -march=native -DSIMDE_X86_AVX2_NATIVE -DSIMDE_X86_AVX2_NATIVE -DSIMDE_X86_AVX512BW_NATIVE -DSIMDE_X86_AVX512F_NATIVE -DSIMDE_X86_AVX512VL_NATIVE -DSIMDE_X86_AVX_NATIVE -DSIMDE_X86_AVX_NATIVE -DSIMDE_X86_F16C_NATIVE -DSIMDE_X86_FMA_NATIVE -DSIMDE_X86_GFNI_NATIVE -DSIMDE_X86_MMX_NATIVE -DSIMDE_X86_PCLMUL_NATIVE -DSIMDE_X86_SSE2_NATIVE -DSIMDE_X86_SSE3_NATIVE -DSIMDE_X86_SSE4_1_NATIVE -DSIMDE_X86_SSE4_2_NATIVE -DSIMDE_X86_SSE_NATIVE -DSIMDE_X86_SSSE3_NATIVE -DSIMDE_X86_VPCLMULQDQ_NATIVE -DSIMDE_X86_XOP_HAVE_COM_ -DSIMDE_X86_XOP_NATIVE")
else()
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx512bw -march=skylake-avx512 -mtune=skylake-avx512")
endif()
if (CPUINFO MATCHES "avx2" AND "${AVX2}" STREQUAL "True")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -DSIMDE_X86_AVX2_NATIVE")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -DSIMDE_X86_AVX2_NATIVE -DSIMDE_X86_VPCLMULQDQ_NATIVE")
endif()
if (CPUINFO MATCHES "sse4_1")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -DSIMDE_X86_SSE4_1_NATIVE")
......
......@@ -927,11 +927,33 @@ install_asn1c_from_source(){
install_simde_from_source(){
echo_info "\nInstalling SIMDE from source without test cases (header files only)"
cd /tmp
$SUDO rm -rf /tmp/simde
git clone https://github.com/simd-everywhere/simde-no-tests.git /tmp/simde
cd /tmp/simde
# brute force copy into /usr/include
$SUDO \cp -rv ../simde /usr/include
$SUDO patch /usr/include/simde/x86/avx.h << FIN
--- /usr/include/simde/x86/avx.h.old 2022-10-03 19:17:39.828223432 +0200
+++ /usr/include/simde/x86/avx.h 2022-10-05 16:19:55.086019445 +0200
@@ -3636,12 +3636,16 @@
SIMDE_FUNCTION_ATTRIBUTES
simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {
+#if defined(SIMDE_X86_AVX_NATIVE)
+ return _mm256_insertf128_si256(a, b, imm8);
+#else
simde__m256i_private a_ = simde__m256i_to_private(a);
simde__m128i_private b_ = simde__m128i_to_private(b);
a_.m128i_private[imm8] = b_;
return simde__m256i_from_private(a_);
+#endif
}
#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)
#undef _mm256_insertf128_si256
FIN
}
#################################################
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment