* whpx fixes in preparation for GDB support (Ivan)

* VSS header fixes (Marc-André)
 * 5-level EPT support (Vitaly)
 * AMX support (Jing Liu & Yang Zhong)
 * Bundle changes to MSI routes (Longpeng)
 * More precise emulation of #SS (Gareth)
 * Disable ASAN testing
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmIwb5QUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOOUQf8DiNcq8XVVMdX946Qwa4pSxc4ZJtF
 X+RkNsscluuLJ2vGEFKwPVps6c6UPqAhXUruZOQmcLmma511MsyJrxyfd4iRgPD2
 tL1+n4RpfsbnTEGT8c6TFWWMEIOjLTbKmR/SIxuxpeVG3xlk6tlCevykrIdc90gP
 vQIByTGFx3GwiPyDo0j92mA/CsWLnfq6zQ2Tox1xCyt8R+QDimqG0KGLc5RAyiyC
 ZmilN2yaqizDfkIzinwHG6gP1NGwVUsrUNl4X9C4mwEMFnsXiyKP5n/BlDZ7w4Wb
 QXalFpPg1hJxRGGvyta6OF9VmCfmK9Q0FNVWm1lPE5adn3ECHFo6FJKvfg==
 =LVgf
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* whpx fixes in preparation for GDB support (Ivan)
* VSS header fixes (Marc-André)
* 5-level EPT support (Vitaly)
* AMX support (Jing Liu & Yang Zhong)
* Bundle changes to MSI routes (Longpeng)
* More precise emulation of #SS (Gareth)
* Disable ASAN testing

# gpg: Signature made Tue 15 Mar 2022 10:51:00 GMT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (22 commits)
  gitlab-ci: do not run tests with address sanitizer
  KVM: SVM: always set MSR_AMD64_TSC_RATIO to default value
  i386: Add Icelake-Server-v6 CPU model with 5-level EPT support
  x86: Support XFD and AMX xsave data migration
  x86: add support for KVM_CAP_XSAVE2 and AMX state migration
  x86: Add AMX CPUIDs enumeration
  x86: Add XFD faulting bit for state components
  x86: Grant AMX permission for guest
  x86: Add AMX XTILECFG and XTILEDATA components
  x86: Fix the 64-byte boundary enumeration for extended state
  linux-headers: include missing changes from 5.17
  target/i386: Throw a #SS when loading a non-canonical IST
  target/i386: only include bits in pg_mode if they are not ignored
  kvm/msi: do explicit commit when adding msi routes
  kvm-irqchip: introduce new API to support route change
  update meson-buildoptions.sh
  qga/vss: update informative message about MinGW
  qga/vss-win32: check old VSS SDK headers
  meson: fix generic location of vss headers
  vmxcap: Add 5-level EPT bit
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2022-03-15 14:41:16 +00:00
commit dee3a86d54
26 changed files with 451 additions and 85 deletions

View File

@ -515,8 +515,6 @@ build-oss-fuzz:
echo Testing ${fuzzer} ... ;
"${fuzzer}" -runs=1 -seed=1 || exit 1 ;
done
# Unrelated to fuzzer: run some tests with -fsanitize=address
- cd build-oss-fuzz && make check-qtest-i386 check-unit
build-tci:
extends: .native_build_job_template

View File

@ -1961,10 +1961,11 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
return kvm_set_irq(s, route->kroute.gsi, 1);
}
int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev)
{
struct kvm_irq_routing_entry kroute = {};
int virq;
KVMState *s = c->s;
MSIMessage msg = {0, 0};
if (pci_available && dev) {
@ -2004,7 +2005,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
kvm_add_routing_entry(s, &kroute);
kvm_arch_add_msi_route_post(&kroute, vector, dev);
kvm_irqchip_commit_routes(s);
c->changes++;
return virq;
}
@ -2162,7 +2163,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
abort();
}
int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev)
{
return -ENOSYS;
}

View File

@ -77,7 +77,7 @@ int kvm_on_sigbus(int code, void *addr)
return 1;
}
int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev)
{
return -ENOSYS;
}

View File

@ -424,16 +424,19 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
Error **errp)
{
PCIDevice *pdev = PCI_DEVICE(s);
KVMRouteChange c;
int ret;
IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
assert(!s->msi_vectors[vector].pdev);
ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
c = kvm_irqchip_begin_route_changes(kvm_state);
ret = kvm_irqchip_add_msi_route(&c, vector, pdev);
if (ret < 0) {
error_setg(errp, "kvm_irqchip_add_msi_route failed");
return;
}
kvm_irqchip_commit_route_changes(&c);
s->msi_vectors[vector].virq = ret;
s->msi_vectors[vector].pdev = pdev;

View File

@ -412,6 +412,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
int vector_n, bool msix)
{
KVMRouteChange c;
int virq;
if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
@ -422,11 +423,13 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
return;
}
virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev);
c = kvm_irqchip_begin_route_changes(kvm_state);
virq = kvm_irqchip_add_msi_route(&c, vector_n, &vdev->pdev);
if (virq < 0) {
event_notifier_cleanup(&vector->kvm_interrupt);
return;
}
kvm_irqchip_commit_route_changes(&c);
if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
NULL, virq) < 0) {

View File

@ -683,10 +683,12 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
int ret;
if (irqfd->users == 0) {
ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev);
KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
if (ret < 0) {
return ret;
}
kvm_irqchip_commit_route_changes(&c);
irqfd->virq = ret;
}
irqfd->users++;

View File

@ -224,6 +224,11 @@ DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE,
extern KVMState *kvm_state;
typedef struct Notifier Notifier;
typedef struct KVMRouteChange {
KVMState *s;
int changes;
} KVMRouteChange;
/* external API */
bool kvm_has_free_slot(MachineState *ms);
@ -481,7 +486,7 @@ void kvm_init_cpu_signals(CPUState *cpu);
/**
* kvm_irqchip_add_msi_route - Add MSI route for specific vector
* @s: KVM state
* @c: KVMRouteChange instance.
* @vector: which vector to add. This can be either MSI/MSIX
* vector. The function will automatically detect whether
* MSI/MSIX is enabled, and fetch corresponding MSI
@ -490,10 +495,24 @@ void kvm_init_cpu_signals(CPUState *cpu);
* as @NULL, an empty MSI message will be inited.
* @return: virq (>=0) when success, errno (<0) when failed.
*/
int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev);
int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev);
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
PCIDevice *dev);
void kvm_irqchip_commit_routes(KVMState *s);
static inline KVMRouteChange kvm_irqchip_begin_route_changes(KVMState *s)
{
return (KVMRouteChange) { .s = s, .changes = 0 };
}
static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c)
{
if (c->changes) {
kvm_irqchip_commit_routes(c->s);
c->changes = 0;
}
}
void kvm_irqchip_release_virq(KVMState *s, int virq);
int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter);

View File

@ -452,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
/* attributes for system fd (group 0) */
#define KVM_X86_XCOMP_GUEST_SUPP 0
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];

View File

@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#define KVM_CAP_VM_GPA_BITS 207
#define KVM_CAP_XSAVE2 208
#define KVM_CAP_SYS_ATTRIBUTES 209
#ifdef KVM_CAP_IRQ_ROUTING
@ -2047,4 +2048,7 @@ struct kvm_stats_desc {
#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
/* Available with KVM_CAP_XSAVE2 */
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
#endif /* __LINUX_KVM_H */

View File

@ -1954,12 +1954,15 @@ config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + '''
}'''))
have_vss = false
have_vss_sdk = false # old xp/2003 SDK
if targetos == 'windows' and link_language == 'cpp'
have_vss = cxx.compiles('''
#define __MIDL_user_allocate_free_DEFINED__
#include <inc/win2003/vss.h>
#include <vss.h>
int main(void) { return VSS_CTX_BACKUP; }''')
have_vss_sdk = cxx.has_header('vscoordint.h')
endif
config_host_data.set('HAVE_VSS_SDK', have_vss_sdk)
have_ntddscsi = false
if targetos == 'windows'

View File

@ -15,7 +15,7 @@ have_qga_vss = get_option('qga_vss') \
If your Visual Studio installation doesn't have the VSS headers,
Please download and install Microsoft VSS SDK:
http://www.microsoft.com/en-us/download/details.aspx?id=23490
On POSIX-systems, MinGW doesn't yet provide working headers.
On POSIX-systems, MinGW should provide headers in >=10.0 releases.
you can extract the SDK headers by:
$ scripts/extract-vsssdk-headers setup.exe
The headers are extracted in the directory 'inc/win2003'.

View File

@ -13,7 +13,11 @@
#include "qemu/osdep.h"
#include "vss-common.h"
#ifdef HAVE_VSS_SDK
#include <vscoordint.h>
#else
#include <vsadmin.h>
#endif
#include "install.h"
#include <wbemidl.h>
#include <comdef.h>

View File

@ -12,7 +12,11 @@
#include "qemu/osdep.h"
#include "vss-common.h"
#ifdef HAVE_VSS_SDK
#include <vscoordint.h>
#else
#include <vsadmin.h>
#endif
#include <vsprov.h>
#define VSS_TIMEOUT_MSEC (60*1000)

View File

@ -64,12 +64,13 @@ const CLSID CLSID_QGAVSSProvider = { 0x6e6a3492, 0x8d4d, 0x440c,
const TCHAR g_szClsid[] = TEXT("{6E6A3492-8D4D-440C-9619-5E5D0CC31CA8}");
const TCHAR g_szProgid[] = TEXT("QGAVSSProvider");
#ifdef HAVE_VSS_SDK
/* Enums undefined in VSS SDK 7.2 but defined in newer Windows SDK */
enum __VSS_VOLUME_SNAPSHOT_ATTRIBUTES {
VSS_VOLSNAP_ATTR_NO_AUTORECOVERY = 0x00000002,
VSS_VOLSNAP_ATTR_TXF_RECOVERY = 0x02000000
};
#endif
/* COM pointer utility; call ->Release() when it goes out of scope */
template <class T>

View File

@ -249,6 +249,7 @@ controls = [
bits = {
0: 'Execute-only EPT translations',
6: 'Page-walk length 4',
7: 'Page-walk length 5',
8: 'Paging-structure memory type UC',
14: 'Paging-structure memory type WB',
16: '2MB EPT pages',

View File

@ -20,7 +20,6 @@ meson_options_help() {
printf "%s\n" ' --enable-malloc=CHOICE choose memory allocator to use [system] (choices:'
printf "%s\n" ' jemalloc/system/tcmalloc)'
printf "%s\n" ' --enable-profiler profiler support'
printf "%s\n" ' --enable-qga-vss build QGA VSS support'
printf "%s\n" ' --enable-qom-cast-debug cast debugging support'
printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and'
printf "%s\n" ' getrandom()'
@ -97,6 +96,7 @@ meson_options_help() {
printf "%s\n" ' parallels parallels image format support'
printf "%s\n" ' qcow1 qcow1 image format support'
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'

View File

@ -575,6 +575,18 @@ static CPUCacheInfo legacy_l3_cache = {
#define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */
#define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
/* CPUID Leaf 0x1D constants: */
#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1
#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000
#define INTEL_AMX_BYTES_PER_TILE 0x400
#define INTEL_AMX_BYTES_PER_ROW 0x40
#define INTEL_AMX_TILE_MAX_NAMES 0x8
#define INTEL_AMX_TILE_MAX_ROWS 0x10
/* CPUID Leaf 0x1E constants: */
#define INTEL_AMX_TMUL_MAX_K 0x10
#define INTEL_AMX_TMUL_MAX_N 0x40
void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
uint32_t vendor2, uint32_t vendor3)
{
@ -844,8 +856,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
"avx512-vp2intersect", NULL, "md-clear", NULL,
NULL, NULL, "serialize", NULL,
"tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
NULL, NULL, NULL, "avx512-fp16",
NULL, NULL, "spec-ctrl", "stibp",
NULL, NULL, "amx-bf16", "avx512-fp16",
"amx-tile", "amx-int8", "spec-ctrl", "stibp",
NULL, "arch-capabilities", "core-capability", "ssbd",
},
.cpuid = {
@ -910,7 +922,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
"xsaveopt", "xsavec", "xgetbv1", "xsaves",
NULL, NULL, NULL, NULL,
"xfd", NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
@ -1402,6 +1414,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
[XSTATE_PKRU_BIT] =
{ .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
.size = sizeof(XSavePKRU) },
[XSTATE_XTILE_CFG_BIT] = {
.feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
.size = sizeof(XSaveXTILECFG),
},
[XSTATE_XTILE_DATA_BIT] = {
.feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
.size = sizeof(XSaveXTILEDATA)
},
};
static uint32_t xsave_area_size(uint64_t mask)
@ -3506,6 +3526,14 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
},
},
{
.version = 6,
.note = "5-level EPT",
.props = (PropValue[]) {
{ "vmx-page-walk-5", "on" },
{ /* end of list */ }
},
},
{ /* end of list */ }
}
},
@ -5488,6 +5516,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
const ExtSaveArea *esa = &x86_ext_save_areas[count];
*eax = esa->size;
*ebx = esa->offset;
*ecx = esa->ecx &
(ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK);
}
}
break;
@ -5576,6 +5606,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
break;
}
case 0x1D: {
/* AMX TILE */
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
break;
}
if (count == 0) {
/* Highest numbered palette subleaf */
*eax = INTEL_AMX_TILE_MAX_SUBLEAF;
} else if (count == 1) {
*eax = INTEL_AMX_TOTAL_TILE_BYTES |
(INTEL_AMX_BYTES_PER_TILE << 16);
*ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
*ecx = INTEL_AMX_TILE_MAX_ROWS;
}
break;
}
case 0x1E: {
/* AMX TMUL */
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
break;
}
if (count == 0) {
/* Highest numbered palette subleaf */
*ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
}
break;
}
case 0x40000000:
/*
* CPUID code in kvm_arch_init_vcpu() ignores stuff
@ -5930,9 +5997,7 @@ static void x86_cpu_reset(DeviceState *dev)
x86_cpu_set_sgxlepubkeyhash(env);
if (env->features[FEAT_SVM] & CPUID_SVM_TSCSCALE) {
env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT;
}
env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT;
#endif
}
@ -5998,6 +6063,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
CPUX86State *env = &cpu->env;
int i;
uint64_t mask;
static bool request_perm;
if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
env->features[FEAT_XSAVE_COMP_LO] = 0;
@ -6013,6 +6079,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
}
}
/* Only request permission for first vcpu */
if (kvm_enabled() && !request_perm) {
kvm_request_xsave_components(cpu, mask);
request_perm = true;
}
env->features[FEAT_XSAVE_COMP_LO] = mask;
env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
}

View File

@ -507,6 +507,9 @@ typedef enum X86Seg {
#define MSR_VM_HSAVE_PA 0xc0010117
#define MSR_IA32_XFD 0x000001c4
#define MSR_IA32_XFD_ERR 0x000001c5
#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_XSS 0x00000da0
#define MSR_IA32_UMWAIT_CONTROL 0xe1
@ -539,6 +542,8 @@ typedef enum X86Seg {
#define XSTATE_ZMM_Hi256_BIT 6
#define XSTATE_Hi16_ZMM_BIT 7
#define XSTATE_PKRU_BIT 9
#define XSTATE_XTILE_CFG_BIT 17
#define XSTATE_XTILE_DATA_BIT 18
#define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT)
#define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT)
@ -549,6 +554,17 @@ typedef enum X86Seg {
#define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT)
#define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
#define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK)
#define ESA_FEATURE_ALIGN64_BIT 1
#define ESA_FEATURE_XFD_BIT 2
#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT)
#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT)
/* CPUID feature words */
typedef enum FeatureWord {
@ -842,6 +858,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
/* AVX512_FP16 instruction */
#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
/* AMX tile (two-dimensional register) */
#define CPUID_7_0_EDX_AMX_TILE (1U << 24)
/* Speculation Control */
#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
/* Single Thread Indirect Branch Predictors */
@ -857,6 +875,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
/* AVX512 BFloat16 Instruction */
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)
/* XFD Extend Feature Disabled */
#define CPUID_D_1_EAX_XFD (1U << 4)
/* Packets which contain IP payload have LIP values */
#define CPUID_14_0_ECX_LIP (1U << 31)
@ -1345,6 +1365,16 @@ typedef struct XSavePKRU {
uint32_t padding;
} XSavePKRU;
/* Ext. save area 17: AMX XTILECFG state */
typedef struct XSaveXTILECFG {
uint8_t xtilecfg[64];
} XSaveXTILECFG;
/* Ext. save area 18: AMX XTILEDATA state */
typedef struct XSaveXTILEDATA {
uint8_t xtiledata[8][1024];
} XSaveXTILEDATA;
QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
@ -1352,13 +1382,16 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40);
QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000);
typedef struct ExtSaveArea {
uint32_t feature, bits;
uint32_t offset, size;
uint32_t ecx;
} ExtSaveArea;
#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1)
extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
@ -1499,6 +1532,10 @@ typedef struct CPUArchState {
uint64_t opmask_regs[NB_OPMASK_REGS];
YMMReg zmmh_regs[CPU_NB_REGS];
ZMMReg hi16_zmm_regs[CPU_NB_REGS];
#ifdef TARGET_X86_64
uint8_t xtilecfg[64];
uint8_t xtiledata[8192];
#endif
/* sysenter registers */
uint32_t sysenter_cs;
@ -1584,6 +1621,10 @@ typedef struct CPUArchState {
uint64_t msr_rtit_cr3_match;
uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
/* Per-VCPU XFD MSRs */
uint64_t msr_xfd;
uint64_t msr_xfd_err;
/* exception/interrupt handling */
int error_code;
int exception_is_int;

View File

@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
static void kvm_cpu_xsave_init(void)
{
static bool first = true;
KVMState *s = kvm_state;
uint32_t eax, ebx, ecx, edx;
int i;
if (!first) {
@ -100,10 +100,11 @@ static void kvm_cpu_xsave_init(void)
ExtSaveArea *esa = &x86_ext_save_areas[i];
if (esa->size) {
int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
if (sz != 0) {
assert(esa->size == sz);
esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
if (eax != 0) {
assert(esa->size == eax);
esa->offset = ebx;
esa->ecx = ecx;
}
}
}

View File

@ -17,6 +17,7 @@
#include "qapi/error.h"
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/syscall.h>
#include <linux/kvm.h>
#include "standard-headers/asm-x86/kvm_para.h"
@ -123,6 +124,7 @@ static uint32_t num_architectural_pmu_gp_counters;
static uint32_t num_architectural_pmu_fixed_counters;
static int has_xsave;
static int has_xsave2;
static int has_xcrs;
static int has_pit_state2;
static int has_sregs2;
@ -349,6 +351,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
struct kvm_cpuid2 *cpuid;
uint32_t ret = 0;
uint32_t cpuid_1_edx;
uint64_t bitmask;
cpuid = get_supported_cpuid(s);
@ -406,6 +409,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
if (!has_msr_arch_capabs) {
ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
}
} else if (function == 0xd && index == 0 &&
(reg == R_EAX || reg == R_EDX)) {
struct kvm_device_attr attr = {
.group = 0,
.attr = KVM_X86_XCOMP_GUEST_SUPP,
.addr = (unsigned long) &bitmask
};
bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
if (!sys_attr) {
warn_report("cannot get sys attribute capabilities %d", sys_attr);
}
int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
"error: %d", rc);
}
ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
} else if (function == 0x80000001 && reg == R_ECX) {
/*
* It's safe to enable TOPOEXT even if it's not returned by
@ -1566,6 +1588,26 @@ static Error *invtsc_mig_blocker;
#define KVM_MAX_CPUID_ENTRIES 100
static void kvm_init_xsave(CPUX86State *env)
{
if (has_xsave2) {
env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
} else if (has_xsave) {
env->xsave_buf_len = sizeof(struct kvm_xsave);
} else {
return;
}
env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
memset(env->xsave_buf, 0, env->xsave_buf_len);
/*
* The allocated storage must be large enough for all of the
* possible XSAVE state components.
*/
assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
env->xsave_buf_len);
}
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
@ -1595,6 +1637,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpuid_i = 0;
has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
r = kvm_arch_set_tsc_khz(cs);
if (r < 0) {
return r;
@ -1760,7 +1804,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
c = &cpuid_data.entries[cpuid_i++];
}
break;
case 0x14: {
case 0x14:
case 0x1d:
case 0x1e: {
uint32_t times;
c->function = i;
@ -1982,19 +2028,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
if (r) {
goto fail;
}
if (has_xsave) {
env->xsave_buf_len = sizeof(struct kvm_xsave);
env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
memset(env->xsave_buf, 0, env->xsave_buf_len);
/*
* The allocated storage must be large enough for all of the
* possible XSAVE state components.
*/
assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
<= env->xsave_buf_len);
}
kvm_init_xsave(env);
max_nested_state_len = kvm_max_nested_state_length();
if (max_nested_state_len > 0) {
@ -3243,6 +3277,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
env->msr_ia32_sgxlepubkeyhash[3]);
}
if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
kvm_msr_entry_add(cpu, MSR_IA32_XFD,
env->msr_xfd);
kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
env->msr_xfd_err);
}
/* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
* kvm_put_msr_feature_control. */
}
@ -3298,13 +3339,14 @@ static int kvm_get_xsave(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
void *xsave = env->xsave_buf;
int ret;
int type, ret;
if (!has_xsave) {
return kvm_get_fpu(cpu);
}
ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
if (ret < 0) {
return ret;
}
@ -3634,6 +3676,11 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
}
if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
}
ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
if (ret < 0) {
return ret;
@ -3930,6 +3977,12 @@ static int kvm_get_msrs(X86CPU *cpu)
env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
msrs[i].data;
break;
case MSR_IA32_XFD:
env->msr_xfd = msrs[i].data;
break;
case MSR_IA32_XFD_ERR:
env->msr_xfd_err = msrs[i].data;
break;
}
}
@ -4940,16 +4993,18 @@ void kvm_arch_init_irq_routing(KVMState *s)
kvm_gsi_routing_allowed = true;
if (kvm_irqchip_is_split()) {
KVMRouteChange c = kvm_irqchip_begin_route_changes(s);
int i;
/* If the ioapic is in QEMU and the lapics are in KVM, reserve
MSI routes for signaling interrupts to the local apics. */
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) {
if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) {
error_report("Could not enable split IRQ mode.");
exit(1);
}
}
kvm_irqchip_commit_route_changes(&c);
}
}
@ -5149,3 +5204,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
{
return !sev_es_enabled();
}
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
{
KVMState *s = kvm_state;
uint64_t supported;
mask &= XSTATE_DYNAMIC_MASK;
if (!mask) {
return;
}
/*
* Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
* ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
* about them already because they are not supported features.
*/
supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
mask &= supported;
while (mask) {
int bit = ctz64(mask);
int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
if (rc) {
/*
* Older kernel version (<5.17) do not support
* ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
* any dynamic feature from kvm_arch_get_supported_cpuid.
*/
warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
"for feature bit %d", bit);
}
mask &= ~BIT_ULL(bit);
}
}

View File

@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
bool kvm_enable_sgx_provisioning(KVMState *s);
void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
#endif

View File

@ -1483,6 +1483,48 @@ static const VMStateDescription vmstate_pdptrs = {
}
};
static bool xfd_msrs_needed(void *opaque)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD);
}
static const VMStateDescription vmstate_msr_xfd = {
.name = "cpu/msr_xfd",
.version_id = 1,
.minimum_version_id = 1,
.needed = xfd_msrs_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.msr_xfd, X86CPU),
VMSTATE_UINT64(env.msr_xfd_err, X86CPU),
VMSTATE_END_OF_LIST()
}
};
#ifdef TARGET_X86_64
static bool amx_xtile_needed(void *opaque)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE);
}
static const VMStateDescription vmstate_amx_xtile = {
.name = "cpu/intel_amx_xtile",
.version_id = 1,
.minimum_version_id = 1,
.needed = amx_xtile_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64),
VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192),
VMSTATE_END_OF_LIST()
}
};
#endif
const VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
@ -1622,6 +1664,10 @@ const VMStateDescription vmstate_x86_cpu = {
&vmstate_msr_tsx_ctrl,
&vmstate_msr_intel_sgx,
&vmstate_pdptrs,
&vmstate_msr_xfd,
#ifdef TARGET_X86_64
&vmstate_amx_xtile,
#endif
NULL
}
};

View File

@ -28,6 +28,42 @@
#include "helper-tcg.h"
#include "seg_helper.h"
int get_pg_mode(CPUX86State *env)
{
int pg_mode = 0;
if (!(env->cr[0] & CR0_PG_MASK)) {
return 0;
}
if (env->cr[0] & CR0_WP_MASK) {
pg_mode |= PG_MODE_WP;
}
if (env->cr[4] & CR4_PAE_MASK) {
pg_mode |= PG_MODE_PAE;
if (env->efer & MSR_EFER_NXE) {
pg_mode |= PG_MODE_NXE;
}
}
if (env->cr[4] & CR4_PSE_MASK) {
pg_mode |= PG_MODE_PSE;
}
if (env->cr[4] & CR4_SMEP_MASK) {
pg_mode |= PG_MODE_SMEP;
}
if (env->hflags & HF_LMA_MASK) {
pg_mode |= PG_MODE_LMA;
if (env->cr[4] & CR4_PKE_MASK) {
pg_mode |= PG_MODE_PKE;
}
if (env->cr[4] & CR4_PKS_MASK) {
pg_mode |= PG_MODE_PKS;
}
if (env->cr[4] & CR4_LA57_MASK) {
pg_mode |= PG_MODE_LA57;
}
}
return pg_mode;
}
/* return non zero if error */
static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr,
uint32_t *e2_ptr, int selector,
@ -794,7 +830,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level)
{
X86CPU *cpu = env_archcpu(env);
int index;
int index, pg_mode;
target_ulong rsp;
int32_t sext;
#if 0
printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
@ -808,7 +846,17 @@ static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level)
if ((index + 7) > env->tr.limit) {
raise_exception_err(env, EXCP0A_TSS, env->tr.selector & 0xfffc);
}
return cpu_ldq_kernel(env, env->tr.base + index);
rsp = cpu_ldq_kernel(env, env->tr.base + index);
/* test virtual address sign extension */
pg_mode = get_pg_mode(env);
sext = (int64_t)rsp >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
if (sext != 0 && sext != -1) {
raise_exception_err(env, EXCP0C_STACK, 0);
}
return rsp;
}
/* 64 bit interrupt */

View File

@ -22,39 +22,6 @@
#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
int get_pg_mode(CPUX86State *env)
{
int pg_mode = 0;
if (env->cr[0] & CR0_WP_MASK) {
pg_mode |= PG_MODE_WP;
}
if (env->cr[4] & CR4_PAE_MASK) {
pg_mode |= PG_MODE_PAE;
}
if (env->cr[4] & CR4_PSE_MASK) {
pg_mode |= PG_MODE_PSE;
}
if (env->cr[4] & CR4_PKE_MASK) {
pg_mode |= PG_MODE_PKE;
}
if (env->cr[4] & CR4_PKS_MASK) {
pg_mode |= PG_MODE_PKS;
}
if (env->cr[4] & CR4_SMEP_MASK) {
pg_mode |= PG_MODE_SMEP;
}
if (env->cr[4] & CR4_LA57_MASK) {
pg_mode |= PG_MODE_LA57;
}
if (env->hflags & HF_LMA_MASK) {
pg_mode |= PG_MODE_LMA;
}
if (env->efer & MSR_EFER_NXE) {
pg_mode |= PG_MODE_NXE;
}
return pg_mode;
}
#define PG_ERROR_OK (-1)
typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
@ -279,9 +246,7 @@ do_check_protect_pse36:
*prot |= PAGE_EXEC;
}
if (!(pg_mode & PG_MODE_LMA)) {
pkr = 0;
} else if (ptep & PG_USER_MASK) {
if (ptep & PG_USER_MASK) {
pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
} else {
pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
@ -344,8 +309,7 @@ do_check_protect_pse36:
if (is_user)
error_code |= PG_ERROR_U_MASK;
if (is_write1 == 2 &&
(((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) ||
(pg_mode & PG_MODE_SMEP)))
((pg_mode & PG_MODE_NXE) || (pg_mode & PG_MODE_SMEP)))
error_code |= PG_ERROR_I_D_MASK;
return error_code;
}

View File

@ -256,6 +256,21 @@ static int whpx_set_tsc(CPUState *cpu)
return 0;
}
/*
* The CR8 register in the CPU is mapped to the TPR register of the APIC,
* however, they use a slightly different encoding. Specifically:
*
* APIC.TPR[bits 7:4] = CR8[bits 3:0]
*
* This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64
* and IA-32 Architectures Software Developer's Manual.
*/
static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr)
{
return tpr >> 4;
}
static void whpx_set_registers(CPUState *cpu, int level)
{
struct whpx_state *whpx = &whpx_global;
@ -284,7 +299,7 @@ static void whpx_set_registers(CPUState *cpu, int level)
v86 = (env->eflags & VM_MASK);
r86 = !(env->cr[0] & CR0_PE_MASK);
vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state));
vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
idx = 0;
@ -475,6 +490,17 @@ static void whpx_get_registers(CPUState *cpu)
hr);
}
if (whpx_apic_in_platform()) {
/*
* Fetch the TPR value from the emulated APIC. It may get overwritten
* below with the value from CR8 returned by
* WHvGetVirtualProcessorRegisters().
*/
whpx_apic_get(x86_cpu->apic_state);
vcpu->tpr = whpx_apic_tpr_to_cr8(
cpu_get_apic_tpr(x86_cpu->apic_state));
}
idx = 0;
/* Indexes for first 16 registers match between HV and QEMU definitions */
@ -604,6 +630,8 @@ static void whpx_get_registers(CPUState *cpu)
whpx_apic_get(x86_cpu->apic_state);
}
x86_update_hflags(env);
return;
}

View File

@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
memcpy(pkru, &env->pkru, sizeof(env->pkru));
}
e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
if (e->size && e->offset) {
XSaveXTILECFG *tilecfg = buf + e->offset;
memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg));
}
e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
if (e->size && e->offset && buflen >= e->size + e->offset) {
XSaveXTILEDATA *tiledata = buf + e->offset;
memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata));
}
#endif
}
@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
pkru = buf + e->offset;
memcpy(&env->pkru, pkru, sizeof(env->pkru));
}
e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
if (e->size && e->offset) {
const XSaveXTILECFG *tilecfg = buf + e->offset;
memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg));
}
e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
if (e->size && e->offset && buflen >= e->size + e->offset) {
const XSaveXTILEDATA *tiledata = buf + e->offset;
memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata));
}
#endif
}