diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fc65049e1d..73d688f842 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -112,6 +112,16 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; GlobalProperty pc_compat_3_1[] = { { "intel-iommu", "dma-drain", "off" }, + { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, + { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, + { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, + { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, }; const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1); diff --git a/qemu-doc.texi b/qemu-doc.texi index f7ad1dfe4b..16b955cbf9 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -37,6 +37,7 @@ * QEMU System emulator for non PC targets:: * QEMU Guest Agent:: * QEMU User space emulator:: +* System requirements:: * Implementation notes:: * Deprecated features:: * Supported build platforms:: @@ -2813,6 +2814,18 @@ Act as if the host page size was 'pagesize' bytes Run the emulation in single step mode. @end table +@node System requirements +@chapter System requirements + +@section KVM kernel module + +On x86_64 hosts, the default set of CPU features enabled by the KVM accelerator +require the host to be running Linux v4.5 or newer. + +The OpteronG[345] CPU models require KVM support for RDTSCP, which was +added with Linux 4.5 which is supported by the major distros. And even +if RHEL7 has kernel 3.10, KVM there has the required functionality there +to make it close to a 4.5 or newer kernel. @include qemu-tech.texi diff --git a/target/i386/cpu.c b/target/i386/cpu.c index fa37203d89..2f5412592d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -929,6 +929,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { */ .no_autoenable_flags = ~0U, }, + /* + * .feat_names are commented out for Hyper-V enlightenments because we + * don't want to have two different ways for enabling them on QEMU command + * line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require + * enabling several feature bits simultaneously, exposing these bits + * individually may just confuse guests. + */ [FEAT_HYPERV_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -980,6 +987,36 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .cpuid = { .eax = 0x40000003, .reg = R_EDX, }, }, + [FEAT_HV_RECOMM_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL /* hv_recommend_pv_as_switch */, + NULL /* hv_recommend_pv_tlbflush_local */, + NULL /* hv_recommend_pv_tlbflush_remote */, + NULL /* hv_recommend_msr_apic_access */, + NULL /* hv_recommend_msr_reset */, + NULL /* hv_recommend_relaxed_timing */, + NULL /* hv_recommend_dma_remapping */, + NULL /* hv_recommend_int_remapping */, + NULL /* hv_recommend_x2apic_msrs */, + NULL /* hv_recommend_autoeoi_deprecation */, + NULL /* hv_recommend_pv_ipi */, + NULL /* hv_recommend_ex_hypercalls */, + NULL /* hv_hypervisor_is_nested */, + NULL /* hv_recommend_int_mbec */, + NULL /* hv_recommend_evmcs */, + NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x40000004, .reg = R_EAX, }, + }, + [FEAT_HV_NESTED_EAX] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { .eax = 0x4000000A, .reg = R_EAX, }, + }, [FEAT_SVM] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -2296,7 +2333,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, + CPUID_7_0_EBX_SMAP, /* Missing: XSAVES (not supported by some Linux versions, * including v4.1 to v4.12). * KVM doesn't yet expose any XSAVES state save component, @@ -2343,7 +2380,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, + CPUID_7_0_EBX_SMAP, /* Missing: XSAVES (not supported by some Linux versions, * including v4.1 to v4.12). * KVM doesn't yet expose any XSAVES state save component, @@ -2388,7 +2425,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, @@ -2440,7 +2477,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL, @@ -2490,7 +2527,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | @@ -2546,7 +2583,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_INTEL_PT, + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_INTEL_PT, .features[FEAT_7_0_ECX] = CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | @@ -2601,7 +2638,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | @@ -2706,7 +2743,6 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_DE | CPUID_FP87, .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = @@ -2730,9 +2766,9 @@ static X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_1_ECX] = CPUID_EXT_POPCNT | CPUID_EXT_CX16 | CPUID_EXT_MONITOR | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, + CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL | + CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, @@ -2757,10 +2793,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_FMA4 | CPUID_EXT3_XOP | CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | @@ -2788,10 +2823,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_TBM | CPUID_EXT3_FMA4 | CPUID_EXT3_XOP | CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | @@ -5151,6 +5185,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) if (cpu->host_phys_bits) { /* The user asked for us to use the host physical bits */ cpu->phys_bits = host_phys_bits; + if (cpu->host_phys_bits_limit && + cpu->phys_bits > cpu->host_phys_bits_limit) { + cpu->phys_bits = cpu->host_phys_bits_limit; + } } /* Print a warning if the user set it to a value that's not the @@ -5738,6 +5776,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ef41a033c5..59656a70e6 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -497,6 +497,8 @@ typedef enum FeatureWord { FEAT_HYPERV_EAX, /* CPUID[4000_0003].EAX */ FEAT_HYPERV_EBX, /* CPUID[4000_0003].EBX */ FEAT_HYPERV_EDX, /* CPUID[4000_0003].EDX */ + FEAT_HV_RECOMM_EAX, /* CPUID[4000_0004].EAX */ + FEAT_HV_NESTED_EAX, /* CPUID[4000_000A].EAX */ FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEAT_6_EAX, /* CPUID[6].EAX */ @@ -1459,6 +1461,9 @@ struct X86CPU { /* if true override the phys_bits value with a value read from the host */ bool host_phys_bits; + /* if set, limit maximum value for phys_bits when host_phys_bits is true */ + uint8_t host_phys_bits_limit; + /* Stop SMI delivery for migration compatibility with old machines */ bool kvm_no_smi_migration; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 739cf8c8ea..9af4542fb8 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -798,6 +798,48 @@ static int hyperv_handle_properties(CPUState *cs) } env->features[FEAT_HYPERV_EAX] |= HV_SYNTIMERS_AVAILABLE; } + if (cpu->hyperv_relaxed_timing) { + env->features[FEAT_HV_RECOMM_EAX] |= HV_RELAXED_TIMING_RECOMMENDED; + } + if (cpu->hyperv_vapic) { + env->features[FEAT_HV_RECOMM_EAX] |= HV_APIC_ACCESS_RECOMMENDED; + } + if (cpu->hyperv_tlbflush) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_TLBFLUSH) <= 0) { + fprintf(stderr, "Hyper-V TLB flush support " + "(requested by 'hv-tlbflush' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + env->features[FEAT_HV_RECOMM_EAX] |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; + env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } + if (cpu->hyperv_ipi) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_SEND_IPI) <= 0) { + fprintf(stderr, "Hyper-V IPI send support " + "(requested by 'hv-ipi' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + env->features[FEAT_HV_RECOMM_EAX] |= HV_CLUSTER_IPI_RECOMMENDED; + env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } + if (cpu->hyperv_evmcs) { + uint16_t evmcs_version; + + if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, + (uintptr_t)&evmcs_version)) { + fprintf(stderr, "Hyper-V Enlightened VMCS " + "(requested by 'hv-evmcs' cpu flag) " + "is not supported by kernel\n"); + return -ENOSYS; + } + env->features[FEAT_HV_RECOMM_EAX] |= HV_ENLIGHTENED_VMCS_RECOMMENDED; + env->features[FEAT_HV_NESTED_EAX] = evmcs_version; + } + return 0; } @@ -879,7 +921,6 @@ int kvm_arch_init_vcpu(CPUState *cs) uint32_t unused; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; - uint16_t evmcs_version; int kvm_base = KVM_CPUID_SIGNATURE; int r; Error *local_err = NULL; @@ -954,44 +995,8 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; c->function = HV_CPUID_ENLIGHTMENT_INFO; - if (cpu->hyperv_relaxed_timing) { - c->eax |= HV_RELAXED_TIMING_RECOMMENDED; - } - if (cpu->hyperv_vapic) { - c->eax |= HV_APIC_ACCESS_RECOMMENDED; - } - if (cpu->hyperv_tlbflush) { - if (kvm_check_extension(cs->kvm_state, - KVM_CAP_HYPERV_TLBFLUSH) <= 0) { - fprintf(stderr, "Hyper-V TLB flush support " - "(requested by 'hv-tlbflush' cpu flag) " - " is not supported by kernel\n"); - return -ENOSYS; - } - c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; - c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; - } - if (cpu->hyperv_ipi) { - if (kvm_check_extension(cs->kvm_state, - KVM_CAP_HYPERV_SEND_IPI) <= 0) { - fprintf(stderr, "Hyper-V IPI send support " - "(requested by 'hv-ipi' cpu flag) " - " is not supported by kernel\n"); - return -ENOSYS; - } - c->eax |= HV_CLUSTER_IPI_RECOMMENDED; - c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; - } - if (cpu->hyperv_evmcs) { - if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, - (uintptr_t)&evmcs_version)) { - fprintf(stderr, "Hyper-V Enlightened VMCS " - "(requested by 'hv-evmcs' cpu flag) " - "is not supported by kernel\n"); - return -ENOSYS; - } - c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; - } + + c->eax = env->features[FEAT_HV_RECOMM_EAX]; c->ebx = cpu->hyperv_spinlock_attempts; c = &cpuid_data.entries[cpuid_i++]; @@ -1015,7 +1020,7 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; c->function = HV_CPUID_NESTED_FEATURES; - c->eax = evmcs_version; + c->eax = env->features[FEAT_HV_NESTED_EAX]; } }