diff --git a/configure.ac b/configure.ac index 3e8cf18..3beea03 100644 --- a/configure.ac +++ b/configure.ac @@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind], [enable_valgrind=no]) AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno) +AC_ARG_ENABLE([avx], AS_HELP_STRING([--enable-avx], [Build with AVX optimizations])) +AX_CHECK_COMPILE_FLAG(-mavx, [ax_cv_support_avx=yes], []) + +AS_IF([test "x$enable_avx" = "xyes"], + [AS_IF([test "x$ax_cv_support_avx" = "xno"], + [AC_MSG_ERROR([AVX requested but compiler does not support -mavx])], + [SIMD_FLAGS="$SIMD_FLAGS -mavx"]) + ]) + AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile]) AC_OUTPUT diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4 index c03ccef..95c4dbe 100644 --- a/m4/ax_ext.m4 +++ b/m4/ax_ext.m4 @@ -1,40 +1,7 @@ # -# Updated by KMG to support -DINTEL_SSE for GF-Complete +# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html +# but simplified to do compile time SIMD checks only # -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_ext.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_EXT -# -# DESCRIPTION -# -# Find supported SIMD extensions by requesting cpuid. When an SIMD -# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if -# compiler supports it. For example, if "sse2" is available, then "-msse2" -# is added to SIMD_FLAGS. -# -# This macro calls: -# -# AC_SUBST(SIMD_FLAGS) -# -# And defines: -# -# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX -# -# LICENSE -# -# Copyright (c) 2007 Christophe Tournayre -# Copyright (c) 2013 Michael Petch -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 12 AC_DEFUN([AX_EXT], [ @@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT], AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64]) SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64" - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], - [ - # TODO: detect / cross-compile - ax_cv_have_neon_ext=yes - ]) - AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext], - [ - # TODO: detect / cross-compile - ax_cv_have_arm_crypt_ext=yes - ]) - - if test "$ax_cv_have_arm_crypt_ext" = yes; then - AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension]) - fi - + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes]) if test "$ax_cv_have_neon_ext" = yes; then - AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no]) fi - - if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto, - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", []) - elif test "$ax_cv_have_arm_crypt_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto, - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", []) - elif test "$ax_cv_have_neon_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", []) - fi - ;; + ;; arm*) - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], - [ - # TODO: detect / cross-compile - ax_cv_have_neon_ext=yes - ]) - + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes]) if test "$ax_cv_have_neon_ext" = yes; then - AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) - AX_CHECK_COMPILE_FLAG(-mfpu=neon, - SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", []) + AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no]) fi - ;; + ;; powerpc*) - AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], - [ - if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then - if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then - ax_cv_have_altivec_ext=yes - fi - fi - ]) - - if test "$ax_cv_have_altivec_ext" = yes; then - AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) - AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", []) - fi - ;; - + AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes]) + if test "$ax_cv_have_altivec_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no]) + fi + ;; i[[3456]]86*|x86_64*|amd64*) - AC_REQUIRE([AX_GCC_X86_CPUID]) - AC_REQUIRE([AX_GCC_X86_AVX_XGETBV]) - - AX_GCC_X86_CPUID(0x00000001) - ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` - edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` - - AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext], - [ - ax_cv_have_mmx_ext=no - if test "$((0x$edx>>23&0x01))" = 1; then - ax_cv_have_mmx_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext], - [ - ax_cv_have_sse_ext=no - if test "$((0x$edx>>25&0x01))" = 1; then - ax_cv_have_sse_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext], - [ - ax_cv_have_sse2_ext=no - if test "$((0x$edx>>26&0x01))" = 1; then - ax_cv_have_sse2_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext], - [ - ax_cv_have_sse3_ext=no - if test "$((0x$ecx&0x01))" = 1; then - ax_cv_have_sse3_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext], - [ - ax_cv_have_pclmuldq_ext=no - if test "$((0x$ecx>>1&0x01))" = 1; then - ax_cv_have_pclmuldq_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext], - [ - ax_cv_have_ssse3_ext=no - if test "$((0x$ecx>>9&0x01))" = 1; then - ax_cv_have_ssse3_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext], - [ - ax_cv_have_sse41_ext=no - if test "$((0x$ecx>>19&0x01))" = 1; then - ax_cv_have_sse41_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext], - [ - ax_cv_have_sse42_ext=no - if test "$((0x$ecx>>20&0x01))" = 1; then - ax_cv_have_sse42_ext=yes - fi - ]) - - AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext], - [ - ax_cv_have_avx_cpu_ext=no - if test "$((0x$ecx>>28&0x01))" = 1; then - ax_cv_have_avx_cpu_ext=yes - fi - ]) - - if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then - AX_GCC_X86_AVX_XGETBV(0x00000000) - - xgetbv_eax="0" - if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then - xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` - fi - - AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext], - [ - ax_cv_have_avx_ext=no - - if test "$((0x$ecx>>27&0x01))" = 1; then - if test "$((0x$xgetbv_eax&0x6))" = 6; then - ax_cv_have_avx_ext=yes - fi - fi - ]) - if test x"$ax_cv_have_avx_ext" = x"no"; then - AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't]) - fi - fi - - if test "$ax_cv_have_mmx_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, []) - if test x"$ax_cv_support_mmx_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mmmx" - AC_DEFINE(HAVE_MMX,,[Support mmx instructions]) - else - AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?]) - fi - fi - + AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes]) if test "$ax_cv_have_sse_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, []) - if test x"$ax_cv_support_sse_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE" - AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions]) - else - AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no]) fi + AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes]) if test "$ax_cv_have_sse2_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, []) - if test x"$ax_cv_support_sse2_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2" - AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions]) - else - AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no]) fi + AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes]) if test "$ax_cv_have_sse3_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, []) - if test x"$ax_cv_support_sse3_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3" - AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions]) - else - AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?]) - fi - fi - - if test "$ax_cv_have_pclmuldq_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, []) - if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL" - AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication]) - else - AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no]) fi + AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes]) if test "$ax_cv_have_ssse3_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, []) - if test x"$ax_cv_support_ssse3_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3" - AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) - else - AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no]) fi + AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes]) + if test "$ax_cv_have_pclmuldq_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no]) + fi + + AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes]) if test "$ax_cv_have_sse41_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, []) - if test x"$ax_cv_support_sse41_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4" - AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions]) - else - AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no]) fi + AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes]) if test "$ax_cv_have_sse42_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, []) - if test x"$ax_cv_support_sse42_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4" - AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions]) - else - AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?]) - fi + AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no]) fi - - if test "$ax_cv_have_avx_ext" = yes; then - AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, []) - if test x"$ax_cv_support_avx_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mavx" - AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions]) - else - AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?]) - fi - fi - - ;; + ;; esac AC_SUBST(SIMD_FLAGS) diff --git a/m4/ax_gcc_x86_avx_xgetbv.m4 b/m4/ax_gcc_x86_avx_xgetbv.m4 deleted file mode 100644 index 0624eeb..0000000 --- a/m4/ax_gcc_x86_avx_xgetbv.m4 +++ /dev/null @@ -1,79 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_X86_AVX_XGETBV -# -# DESCRIPTION -# -# On later x86 processors with AVX SIMD support, with gcc or a compiler -# that has a compatible syntax for inline assembly instructions, run a -# small program that executes the xgetbv instruction with input OP. This -# can be used to detect if the OS supports AVX instruction usage. -# -# On output, the values of the eax and edx registers are stored as -# hexadecimal strings as "eax:edx" in the cache variable -# ax_cv_gcc_x86_avx_xgetbv. -# -# If the xgetbv instruction fails (because you are running a -# cross-compiler, or because you are not using gcc, or because you are on -# a processor that doesn't have this instruction), -# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown". -# -# This macro mainly exists to be used in AX_EXT. -# -# LICENSE -# -# Copyright (c) 2013 Michael Petch -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 1 - -AC_DEFUN([AX_GCC_X86_AVX_XGETBV], -[AC_REQUIRE([AC_PROG_CC]) -AC_LANG_PUSH([C]) -AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1, - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ - int op = $1, eax, edx; - FILE *f; - /* Opcodes for xgetbv */ - __asm__(".byte 0x0f, 0x01, 0xd0" - : "=a" (eax), "=d" (edx) - : "c" (op)); - f = fopen("conftest_xgetbv", "w"); if (!f) return 1; - fprintf(f, "%x:%x\n", eax, edx); - fclose(f); - return 0; -])], - [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv], - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv], - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])]) -AC_LANG_POP([C]) -]) diff --git a/m4/ax_gcc_x86_cpuid.m4 b/m4/ax_gcc_x86_cpuid.m4 deleted file mode 100644 index 7d46fee..0000000 --- a/m4/ax_gcc_x86_cpuid.m4 +++ /dev/null @@ -1,79 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_X86_CPUID(OP) -# -# DESCRIPTION -# -# On Pentium and later x86 processors, with gcc or a compiler that has a -# compatible syntax for inline assembly instructions, run a small program -# that executes the cpuid instruction with input OP. This can be used to -# detect the CPU type. -# -# On output, the values of the eax, ebx, ecx, and edx registers are stored -# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable -# ax_cv_gcc_x86_cpuid_OP. -# -# If the cpuid instruction fails (because you are running a -# cross-compiler, or because you are not using gcc, or because you are on -# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP -# is set to the string "unknown". -# -# This macro mainly exists to be used in AX_GCC_ARCHFLAG. -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson -# Copyright (c) 2008 Matteo Frigo -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 7 - -AC_DEFUN([AX_GCC_X86_CPUID], -[AC_REQUIRE([AC_PROG_CC]) -AC_LANG_PUSH([C]) -AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ - int op = $1, eax, ebx, ecx, edx; - FILE *f; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); - f = fopen("conftest_cpuid", "w"); if (!f) return 1; - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); - fclose(f); - return 0; -])], - [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], - [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], - [ax_cv_gcc_x86_cpuid_$1=unknown])]) -AC_LANG_POP([C]) -]) diff --git a/src/gf_cpu.c b/src/gf_cpu.c index ee2f847..fae2cd5 100644 --- a/src/gf_cpu.c +++ b/src/gf_cpu.c @@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0; #if defined(__x86_64__) +#if defined(_MSC_VER) + +#define cpuid(info, x) __cpuidex(info, x, 0) + +#elif defined(__GNUC__) + +#include +void cpuid(int info[4], int InfoType){ + __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); +} + +#else + +#error please add a way to detect CPU SIMD support at runtime + +#endif + void gf_cpu_identify(void) { if (gf_cpu_identified) { return; } - int op = 1, eax, ebx, ecx, edx; + int reg[4]; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); + cpuid(reg, 1); #if defined(INTEL_SSE4_PCLMUL) - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) { + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) { gf_cpu_supports_intel_pclmul = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_pclmul\n"); @@ -44,7 +59,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSE4) - if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) { + if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) { gf_cpu_supports_intel_sse4 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_sse4\n"); @@ -53,7 +68,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSSE3) - if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) { + if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) { gf_cpu_supports_intel_ssse3 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_ssse3\n"); @@ -62,7 +77,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSE3) - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) { + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) { gf_cpu_supports_intel_sse3 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_sse3\n"); @@ -71,7 +86,7 @@ void gf_cpu_identify(void) #endif #if defined(INTEL_SSE2) - if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) { + if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) { gf_cpu_supports_intel_sse2 = 1; #ifdef DEBUG_CPU_DETECTION printf("#gf_cpu_supports_intel_sse2\n"); diff --git a/test/Makefile.am b/test/Makefile.am index 2791528..f590ecc 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,7 +1,7 @@ # GF-Complete 'test' AM file AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC +AM_CFLAGS = -O3 -fPIC bin_PROGRAMS = gf_unit diff --git a/tools/Makefile.am b/tools/Makefile.am index a9dd8b9..4ca9131 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,7 +1,7 @@ # GF-Complete 'tools' AM file AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC +AM_CFLAGS = -O3 -fPIC bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time diff --git a/tools/test_simd.sh b/tools/test_simd.sh index 1b0e319..6401590 100755 --- a/tools/test_simd.sh +++ b/tools/test_simd.sh @@ -27,6 +27,16 @@ test_functions() { return ${failed} } +# build with DEBUG_CPU_FUNCTIONS and print out CPU detection +test_detection() { + failed=0 + + { ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; } + { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + + return ${failed} +} + compile_arm() { failed=0 @@ -167,7 +177,7 @@ runtime_intel_flags() { { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } done - echo "====SSE2 support..." >> ${1} + echo "====SSE2 support..." >> ${1} export ax_cv_have_sse_ext=no export ax_cv_have_sse2_ext=yes export ax_cv_have_sse3_ext=no diff --git a/tools/test_simd_qemu.sh b/tools/test_simd_qemu.sh index 7b2cb1c..5771874 100755 --- a/tools/test_simd_qemu.sh +++ b/tools/test_simd_qemu.sh @@ -224,6 +224,8 @@ run_test_simd_basic() { { run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } echo "=====running functions test" { run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + echo "=====running detection test" + { run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } echo "=====running runtime test" { run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } stop_qemu