util/bufferiszero: Use __attribute__((target)) for avx2/avx512

Use the attribute, which is supported by clang, instead of
the #pragma, which is not supported and, for some reason,
also not detected by the meson probe, so we fail by -Werror.

Include only <immintrin.h> as that is the outermost "official"
header for these intrinsics -- emmintrin.h and smmintrin -- are
older SSE2 and SSE4 specific headers, while the immintrin.h
includes all of the Intel intrinsics.

Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
master
Richard Henderson 2022-12-03 19:31:12 -06:00
parent 5584e2dbe8
commit 701ea5870d
2 changed files with 8 additions and 41 deletions

View File

@ -2338,11 +2338,9 @@ config_host_data.set('CONFIG_CPUID_H', have_cpuid_h)
config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \
.require(cc.links('''
#pragma GCC push_options
#pragma GCC target("avx2")
#include <cpuid.h>
#include <immintrin.h>
static int bar(void *a) {
static int __attribute__((target("avx2"))) bar(void *a) {
__m256i x = *(__m256i *)a;
return _mm256_testz_si256(x, x);
}
@ -2352,11 +2350,9 @@ config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \
.require(cc.links('''
#pragma GCC push_options
#pragma GCC target("avx512f")
#include <cpuid.h>
#include <immintrin.h>
static int bar(void *a) {
static int __attribute__((target("avx512f"))) bar(void *a) {
__m512i x = *(__m512i *)a;
return _mm512_test_epi64_mask(x, x);
}

View File

@ -64,18 +64,11 @@ buffer_zero_int(const void *buf, size_t len)
}
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
/* Do not use push_options pragmas unnecessarily, because clang
* does not support them.
*/
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
#pragma GCC push_options
#pragma GCC target("sse2")
#endif
#include <emmintrin.h>
#include <immintrin.h>
/* Note that each of these vectorized functions require len >= 64. */
static bool
static bool __attribute__((target("sse2")))
buffer_zero_sse2(const void *buf, size_t len)
{
__m128i t = _mm_loadu_si128(buf);
@ -104,20 +97,9 @@ buffer_zero_sse2(const void *buf, size_t len)
return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
}
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
#pragma GCC pop_options
#endif
#ifdef CONFIG_AVX2_OPT
/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
* the includes have to be within the corresponding push_options region, and
* therefore the regions themselves have to be ordered with increasing ISA.
*/
#pragma GCC push_options
#pragma GCC target("sse4")
#include <smmintrin.h>
static bool
static bool __attribute__((target("sse4")))
buffer_zero_sse4(const void *buf, size_t len)
{
__m128i t = _mm_loadu_si128(buf);
@ -145,12 +127,7 @@ buffer_zero_sse4(const void *buf, size_t len)
return _mm_testz_si128(t, t);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target("avx2")
#include <immintrin.h>
static bool
static bool __attribute__((target("avx2")))
buffer_zero_avx2(const void *buf, size_t len)
{
/* Begin with an unaligned head of 32 bytes. */
@ -176,15 +153,10 @@ buffer_zero_avx2(const void *buf, size_t len)
return _mm256_testz_si256(t, t);
}
#pragma GCC pop_options
#endif /* CONFIG_AVX2_OPT */
#ifdef CONFIG_AVX512F_OPT
#pragma GCC push_options
#pragma GCC target("avx512f")
#include <immintrin.h>
static bool
static bool __attribute__((target("avx512f")))
buffer_zero_avx512(const void *buf, size_t len)
{
/* Begin with an unaligned head of 64 bytes. */
@ -210,8 +182,7 @@ buffer_zero_avx512(const void *buf, size_t len)
return !_mm512_test_epi64_mask(t, t);
}
#pragma GCC pop_options
#endif
#endif /* CONFIG_AVX512F_OPT */
/* Note that for test_buffer_is_zero_next_accel, the most preferred