From a33ba1bf0dc6082227d1ddf964632e07b53b8971 Mon Sep 17 00:00:00 2001 From: Wenyao Hai Date: Thu, 11 May 2023 16:33:44 -0700 Subject: KVM: VMX: Open code writing vCPU's PAT in VMX's MSR handler Open code setting "vcpu->arch.pat" in vmx_set_msr() instead of bouncing through kvm_set_msr_common() to get to the same code in kvm_mtrr_set_msr(). This aligns VMX with SVM, avoids hiding a very simple operation behind a relatively complicated function call (finding the PAT MSR case in kvm_set_msr_common() is non-trivial), and most importantly, makes it clear that not unwinding the VMCS updates if kvm_set_msr_common() isn't a bug (because kvm_set_msr_common() can never fail for PAT). Opportunistically set vcpu->arch.pat before updating the VMCS info so that a future patch can move the common bits (back) into kvm_set_msr_common() without a functional change. Note, MSR_IA32_CR_PAT is 0x277, and is very subtly handled by case 0x200 ... MSR_IA32_MC0_CTL2 - 1: in kvm_set_msr_common(). Cc: Kai Huang Signed-off-by: Wenyao Hai [sean: massage changelog, hoist setting vcpu->arch.pat up] Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230511233351.635053-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/vmx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 44fb619803b8..33b8625d3541 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2290,16 +2290,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!kvm_pat_valid(data)) return 1; + vcpu->arch.pat = data; + if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) get_vmcs12(vcpu)->guest_ia32_pat = data; - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) vmcs_write64(GUEST_IA32_PAT, data); - vcpu->arch.pat = data; - break; - } - ret = kvm_set_msr_common(vcpu, msr_info); break; case MSR_IA32_MCG_EXT_CTL: if ((!msr_info->host_initiated && -- cgit v1.2.3 From 7aeae027611ff27f13a32e19736c8fd06e41786c Mon Sep 17 00:00:00 2001 From: Ke Guo Date: Thu, 11 May 2023 16:33:45 -0700 Subject: KVM: SVM: Use kvm_pat_valid() directly instead of kvm_mtrr_valid() Use kvm_pat_valid() directly instead of bouncing through kvm_mtrr_valid(). The PAT is not an MTRR, and kvm_mtrr_valid() just redirects to kvm_pat_valid(), i.e. is exempt from KVM's "zap SPTEs" logic that's needed to honor guest MTRRs when the VM has a passthrough device with non-coherent DMA (KVM does NOT set "ignore guest PAT" in this case, and so enables hardware virtualization of the guest's PAT, i.e. doesn't need to manually emulate the PAT memtype). Signed-off-by: Ke Guo [sean: massage changelog] Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230511233351.635053-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ca32389f3c36..e1e17c7dc7d5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2939,7 +2939,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_CR_PAT: - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + if (!kvm_pat_valid(data)) return 1; vcpu->arch.pat = data; svm->vmcb01.ptr->save.g_pat = data; -- cgit v1.2.3 From ebda79e5057778be1ad8ed072e4229894dfc66b7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 May 2023 16:33:46 -0700 Subject: KVM: x86: Add helper to query if variable MTRR MSR is base (versus mask) Add a helper to query whether a variable MTRR MSR is a base versus as mask MSR. Replace the unnecessarily complex math with a simple check on bit 0; base MSRs are even, mask MSRs are odd. Link: https://lore.kernel.org/r/20230511233351.635053-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mtrr.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 9fac1ec03463..f65ce4b3980f 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -25,6 +25,12 @@ #define IA32_MTRR_DEF_TYPE_FE (1ULL << 10) #define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff) +static bool is_mtrr_base_msr(unsigned int msr) +{ + /* MTRR base MSRs use even numbers, masks use odd numbers. */ + return !(msr & 0x1); +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -342,10 +348,9 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; struct kvm_mtrr_range *tmp, *cur; - int index, is_mtrr_mask; + int index; index = (msr - 0x200) / 2; - is_mtrr_mask = msr - 0x200 - 2 * index; cur = &mtrr_state->var_ranges[index]; /* remove the entry if it's in the list. */ @@ -356,7 +361,7 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) * Set all illegal GPA bits in the mask, since those bits must * implicitly be 0. The bits are then cleared when reading them. */ - if (!is_mtrr_mask) + if (is_mtrr_base_msr(msr)) cur->base = data; else cur->mask = data | kvm_vcpu_reserved_gpa_bits_raw(vcpu); @@ -418,11 +423,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) else if (msr == MSR_IA32_CR_PAT) *pdata = vcpu->arch.pat; else { /* Variable MTRRs */ - int is_mtrr_mask; - index = (msr - 0x200) / 2; - is_mtrr_mask = msr - 0x200 - 2 * index; - if (!is_mtrr_mask) + if (is_mtrr_base_msr(msr)) *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; else *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; -- cgit v1.2.3 From 9ae38b4fb13597ce1821376d23958bbe4976c759 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 May 2023 16:33:47 -0700 Subject: KVM: x86: Add helper to get variable MTRR range from MSR index Add a helper to dedup the logic for retrieving a variable MTRR range structure given a variable MTRR MSR index. No functional change intended. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230511233351.635053-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mtrr.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index f65ce4b3980f..59851dbebfea 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -31,6 +31,14 @@ static bool is_mtrr_base_msr(unsigned int msr) return !(msr & 0x1); } +static struct kvm_mtrr_range *var_mtrr_msr_to_range(struct kvm_vcpu *vcpu, + unsigned int msr) +{ + int index = (msr - 0x200) / 2; + + return &vcpu->arch.mtrr_state.var_ranges[index]; +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -314,7 +322,6 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) { struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; gfn_t start, end; - int index; if (msr == MSR_IA32_CR_PAT || !tdp_enabled || !kvm_arch_has_noncoherent_dma(vcpu->kvm)) @@ -332,8 +339,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) end = ~0ULL; } else { /* variable range MTRRs. */ - index = (msr - 0x200) / 2; - var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end); + var_mtrr_range(var_mtrr_msr_to_range(vcpu, msr), &start, &end); } kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end)); @@ -348,14 +354,12 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; struct kvm_mtrr_range *tmp, *cur; - int index; - index = (msr - 0x200) / 2; - cur = &mtrr_state->var_ranges[index]; + cur = var_mtrr_msr_to_range(vcpu, msr); /* remove the entry if it's in the list. */ if (var_mtrr_range_is_valid(cur)) - list_del(&mtrr_state->var_ranges[index].node); + list_del(&cur->node); /* * Set all illegal GPA bits in the mask, since those bits must @@ -423,11 +427,10 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) else if (msr == MSR_IA32_CR_PAT) *pdata = vcpu->arch.pat; else { /* Variable MTRRs */ - index = (msr - 0x200) / 2; if (is_mtrr_base_msr(msr)) - *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; + *pdata = var_mtrr_msr_to_range(vcpu, msr)->base; else - *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; + *pdata = var_mtrr_msr_to_range(vcpu, msr)->mask; *pdata &= ~kvm_vcpu_reserved_gpa_bits_raw(vcpu); } -- cgit v1.2.3 From 34a83deac31cd9fdecef331578422095af2db4b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 May 2023 16:33:48 -0700 Subject: KVM: x86: Use MTRR macros to define possible MTRR MSR ranges Use the MTRR macros to identify the ranges of possible MTRR MSRs instead of bounding the ranges with a mismash of open coded values and unrelated MSR indices. Carving out the gap for the machine check MSRs in particular is confusing, as it's easy to incorrectly think the case statement handles MCE MSRs instead of skipping them. Drop the range-based funneling of MSRs between the end of the MCE MSRs and MTRR_DEF_TYPE, i.e. 0x2A0-0x2FF, and instead handle MTTR_DEF_TYPE as the one-off case that it is. Extract PAT (0x277) as well in anticipation of dropping PAT "handling" from the MTRR code. Keep the range-based handling for the variable+fixed MTRRs even though capturing unknown MSRs 0x214-0x24F is arguably "wrong". There is a gap in the fixed MTRRs, 0x260-0x267, i.e. the MTRR code needs to filter out unknown MSRs anyways, and using a single range generates marginally better code for the big switch statement. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230511233351.635053-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mtrr.c | 7 ++++--- arch/x86/kvm/x86.c | 10 ++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 59851dbebfea..dc213b940141 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -34,7 +34,7 @@ static bool is_mtrr_base_msr(unsigned int msr) static struct kvm_mtrr_range *var_mtrr_msr_to_range(struct kvm_vcpu *vcpu, unsigned int msr) { - int index = (msr - 0x200) / 2; + int index = (msr - MTRRphysBase_MSR(0)) / 2; return &vcpu->arch.mtrr_state.var_ranges[index]; } @@ -42,7 +42,7 @@ static struct kvm_mtrr_range *var_mtrr_msr_to_range(struct kvm_vcpu *vcpu, static bool msr_mtrr_valid(unsigned msr) { switch (msr) { - case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: + case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1): case MSR_MTRRfix64K_00000: case MSR_MTRRfix16K_80000: case MSR_MTRRfix16K_A0000: @@ -88,7 +88,8 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) } /* variable MTRRs */ - WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); + WARN_ON(!(msr >= MTRRphysBase_MSR(0) && + msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1))); mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu); if ((msr & 1) == 0) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c0778ca39650..8017e0eb1e67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3702,8 +3702,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; } break; - case 0x200 ... MSR_IA32_MC0_CTL2 - 1: - case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff: + case MSR_IA32_CR_PAT: + case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000: + case MSR_MTRRdefType: return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: return kvm_set_apic_base(vcpu, msr_info); @@ -4110,9 +4111,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = kvm_scale_tsc(rdtsc(), ratio) + offset; break; } + case MSR_IA32_CR_PAT: case MSR_MTRRcap: - case 0x200 ... MSR_IA32_MC0_CTL2 - 1: - case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff: + case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000: + case MSR_MTRRdefType: return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); case 0xcd: /* fsb frequency */ msr_info->data = 3; -- cgit v1.2.3 From bc7fe2f0b7511ef6bb2765b6e1f923da449e00ef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 May 2023 16:33:49 -0700 Subject: KVM: x86: Move PAT MSR handling out of mtrr.c Drop handling of MSR_IA32_CR_PAT from mtrr.c now that SVM and VMX handle writes without bouncing through kvm_set_msr_common(). PAT isn't truly an MTRR even though it affects memory types, and more importantly KVM enables hardware virtualization of guest PAT (by NOT setting "ignore guest PAT") when a guest has non-coherent DMA, i.e. KVM doesn't need to zap SPTEs when the guest PAT changes. The read path is and always has been trivial, i.e. burying it in the MTRR code does more harm than good. WARN and continue for the PAT case in kvm_set_msr_common(), as that code is _currently_ reached if and only if KVM is buggy. Defer cleaning up the lack of symmetry between the read and write paths to a future patch. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230511233351.635053-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mtrr.c | 19 ++++++------------- arch/x86/kvm/x86.c | 13 +++++++++++++ 2 files changed, 19 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index dc213b940141..cdbbb511f940 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -55,7 +55,6 @@ static bool msr_mtrr_valid(unsigned msr) case MSR_MTRRfix4K_F0000: case MSR_MTRRfix4K_F8000: case MSR_MTRRdefType: - case MSR_IA32_CR_PAT: return true; } return false; @@ -74,9 +73,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (!msr_mtrr_valid(msr)) return false; - if (msr == MSR_IA32_CR_PAT) { - return kvm_pat_valid(data); - } else if (msr == MSR_MTRRdefType) { + if (msr == MSR_MTRRdefType) { if (data & ~0xcff) return false; return valid_mtrr_type(data & 0xff); @@ -324,8 +321,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; gfn_t start, end; - if (msr == MSR_IA32_CR_PAT || !tdp_enabled || - !kvm_arch_has_noncoherent_dma(vcpu->kvm)) + if (!tdp_enabled || !kvm_arch_has_noncoherent_dma(vcpu->kvm)) return; if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType) @@ -392,8 +388,6 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data; else if (msr == MSR_MTRRdefType) vcpu->arch.mtrr_state.deftype = data; - else if (msr == MSR_IA32_CR_PAT) - vcpu->arch.pat = data; else set_var_mtrr_msr(vcpu, msr, data); @@ -421,13 +415,12 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 1; index = fixed_msr_to_range_index(msr); - if (index >= 0) + if (index >= 0) { *pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index]; - else if (msr == MSR_MTRRdefType) + } else if (msr == MSR_MTRRdefType) { *pdata = vcpu->arch.mtrr_state.deftype; - else if (msr == MSR_IA32_CR_PAT) - *pdata = vcpu->arch.pat; - else { /* Variable MTRRs */ + } else { + /* Variable MTRRs */ if (is_mtrr_base_msr(msr)) *pdata = var_mtrr_msr_to_range(vcpu, msr)->base; else diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8017e0eb1e67..902c9935979f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3703,6 +3703,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_CR_PAT: + /* + * Writes to PAT should be handled by vendor code as both SVM + * and VMX track the guest's PAT in the VMCB/VMCS. + */ + WARN_ON_ONCE(1); + + if (!kvm_pat_valid(data)) + return 1; + + vcpu->arch.pat = data; + break; case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000: case MSR_MTRRdefType: return kvm_mtrr_set_msr(vcpu, msr, data); @@ -4112,6 +4123,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; } case MSR_IA32_CR_PAT: + msr_info->data = vcpu->arch.pat; + break; case MSR_MTRRcap: case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000: case MSR_MTRRdefType: -- cgit v1.2.3 From 3a5f49078eb5106f9b918066908508c3044b5ada Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 May 2023 16:33:50 -0700 Subject: KVM: x86: Make kvm_mtrr_valid() static now that there are no external users Make kvm_mtrr_valid() local to mtrr.c now that it's not used to check the validity of a PAT MSR value. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230511233351.635053-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mtrr.c | 3 +-- arch/x86/kvm/x86.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index cdbbb511f940..3eb6e7f47e96 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -65,7 +65,7 @@ static bool valid_mtrr_type(unsigned t) return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ } -bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) { int i; u64 mask; @@ -100,7 +100,6 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) return (data & mask) == 0; } -EXPORT_SYMBOL_GPL(kvm_mtrr_valid); static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c544602d07a3..82e3dafc5453 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -309,7 +309,6 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); -bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, -- cgit v1.2.3 From dee321977a230802a5065af4ad4f4f5e8558a738 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 May 2023 16:33:51 -0700 Subject: KVM: x86: Move common handling of PAT MSR writes to kvm_set_msr_common() Move the common check-and-set handling of PAT MSR writes out of vendor code and into kvm_set_msr_common(). This aligns writes with reads, which are already handled in common code, i.e. makes the handling of reads and writes symmetrical in common code. Alternatively, the common handling in kvm_get_msr_common() could be moved to vendor code, but duplicating code is generally undesirable (even though the duplicatated code is trivial in this case), and guest writes to PAT should be rare, i.e. the overhead of the extra function call is a non-issue in practice. Suggested-by: Kai Huang Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230511233351.635053-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 7 ++++--- arch/x86/kvm/vmx/vmx.c | 7 +++---- arch/x86/kvm/x86.c | 6 ------ 3 files changed, 7 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e1e17c7dc7d5..924609d63f8a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2939,9 +2939,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_CR_PAT: - if (!kvm_pat_valid(data)) - return 1; - vcpu->arch.pat = data; + ret = kvm_set_msr_common(vcpu, msr); + if (ret) + break; + svm->vmcb01.ptr->save.g_pat = data; if (is_guest_mode(vcpu)) nested_vmcb02_compute_g_pat(svm); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 33b8625d3541..2d9d155691a7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2287,10 +2287,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; goto find_uret_msr; case MSR_IA32_CR_PAT: - if (!kvm_pat_valid(data)) - return 1; - - vcpu->arch.pat = data; + ret = kvm_set_msr_common(vcpu, msr_info); + if (ret) + break; if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 902c9935979f..5ad55ef71433 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3703,12 +3703,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_CR_PAT: - /* - * Writes to PAT should be handled by vendor code as both SVM - * and VMX track the guest's PAT in the VMCB/VMCS. - */ - WARN_ON_ONCE(1); - if (!kvm_pat_valid(data)) return 1; -- cgit v1.2.3 From 0d42522bdee70b9197be63dd76c9f6297cd1e832 Mon Sep 17 00:00:00 2001 From: Jinliang Zheng Date: Wed, 19 Apr 2023 10:19:25 +0800 Subject: KVM: x86: Fix poll command According to the hardware manual, when the Poll command is issued, the byte returned by the I/O read is 1 in Bit 7 when there is an interrupt, and the highest priority binary code in Bits 2:0. The current pic simulation code is not implemented strictly according to the above expression. Fix the implementation of pic_poll_read(), set Bit 7 when there is an interrupt. Signed-off-by: Jinliang Zheng Link: https://lore.kernel.org/r/20230419021924.1342184-1-alexjlzheng@tencent.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/i8259.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 4756bcb5724f..8dec646e764b 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -411,7 +411,10 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) pic_clear_isr(s, ret); if (addr1 >> 7 || ret != 2) pic_update_irq(s->pics_state); + /* Bit 7 is 1, means there's an interrupt */ + ret |= 0x80; } else { + /* Bit 7 is 0, means there's no interrupt */ ret = 0x07; pic_update_irq(s->pics_state); } -- cgit v1.2.3 From ab322c43cce97ff6d05439c9b72bf1513e3e1020 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 26 May 2023 14:03:39 -0700 Subject: KVM: x86: Update number of entries for KVM_GET_CPUID2 on success, not failure Update cpuid->nent if and only if kvm_vcpu_ioctl_get_cpuid2() succeeds. The sole caller copies @cpuid to userspace only on success, i.e. the existing code effectively does nothing. Arguably, KVM should report the number of entries when returning -E2BIG so that userspace doesn't have to guess the size, but all other similar KVM ioctls() don't report the size either, i.e. userspace is conditioned to guess. Suggested-by: Takahiro Itazuri Link: https://lore.kernel.org/all/20230410141820.57328-1-itazur@amazon.com Link: https://lore.kernel.org/r/20230526210340.2799158-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0c9660a07b23..241f554f1764 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -501,20 +501,15 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { - int r; - - r = -E2BIG; if (cpuid->nent < vcpu->arch.cpuid_nent) - goto out; - r = -EFAULT; + return -E2BIG; + if (copy_to_user(entries, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) - goto out; - return 0; + return -EFAULT; -out: cpuid->nent = vcpu->arch.cpuid_nent; - return r; + return 0; } /* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ -- cgit v1.2.3 From 02f1b0b736606f9870595b3089d9c124f9da8be9 Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Wed, 24 May 2023 14:16:32 +0800 Subject: KVM: x86: Correct the name for skipping VMENTER l1d flush There is no VMENTER_L1D_FLUSH_NESTED_VM. It should be ARCH_CAP_SKIP_VMENTRY_L1DFLUSH. Signed-off-by: Chao Gao Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20230524061634.54141-3-chao.gao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5ad55ef71433..7c7be4815eaa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1631,7 +1631,7 @@ static u64 kvm_get_arch_capabilities(void) * If we're doing cache flushes (either "always" or "cond") * we will do one whenever the guest does a vmlaunch/vmresume. * If an outer hypervisor is doing the cache flush for us - * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that + * (ARCH_CAP_SKIP_VMENTRY_L1DFLUSH), we can safely pass that * capability to the guest too, and if EPT is disabled we're not * vulnerable. Overall, only VMENTER_L1D_FLUSH_NEVER will * require a nested hypervisor to do a flush of its own. -- cgit v1.2.3 From 056b9919a16aedc560e6756a235ef5a62a68db05 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 1 Jun 2023 18:05:50 -0700 Subject: KVM: x86: Use cpu_feature_enabled() for PKU instead of #ifdef Replace an #ifdef on CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS with a cpu_feature_enabled() check on X86_FEATURE_PKU. The macro magic of DISABLED_MASK_BIT_SET() means that cpu_feature_enabled() provides the same end result (no code generated) when PKU is disabled by Kconfig. No functional change intended. Cc: Jon Kohler Reviewed-by: Jon Kohler Link: https://lore.kernel.org/r/20230602010550.785722-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7c7be4815eaa..6f461afed800 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1017,13 +1017,11 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); } -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS - if (static_cpu_has(X86_FEATURE_PKU) && + if (cpu_feature_enabled(X86_FEATURE_PKU) && vcpu->arch.pkru != vcpu->arch.host_pkru && ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) write_pkru(vcpu->arch.pkru); -#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ } EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); @@ -1032,15 +1030,13 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_state_protected) return; -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS - if (static_cpu_has(X86_FEATURE_PKU) && + if (cpu_feature_enabled(X86_FEATURE_PKU) && ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) { vcpu->arch.pkru = rdpkru(); if (vcpu->arch.pkru != vcpu->arch.host_pkru) write_pkru(vcpu->arch.host_pkru); } -#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) { -- cgit v1.2.3 From e12fa4b92a07f4274ffa02e581d059a7869dd50e Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 5 Jun 2023 22:01:21 +0200 Subject: KVM: x86: Clean up: remove redundant bool conversions As test_bit() returns bool, explicitly converting result to bool is unnecessary. Get rid of '!!'. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Michal Luczaj Link: https://lore.kernel.org/r/20230605200158.118109-1-mhal@rbox.co Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/x86.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 924609d63f8a..02183ebc0e16 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -752,7 +752,7 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) BUG_ON(offset == MSR_INVALID); - return !!test_bit(bit_write, &tmp); + return test_bit(bit_write, &tmp); } static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f461afed800..355f3df8dbad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1805,7 +1805,7 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) unsigned long *bitmap = ranges[i].bitmap; if ((index >= start) && (index < end) && (flags & type)) { - allowed = !!test_bit(index - start, bitmap); + allowed = test_bit(index - start, bitmap); break; } } -- cgit v1.2.3 From a30642570855faa1992f333ce5cb60351b0a37da Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Jun 2023 17:46:36 -0700 Subject: KVM: x86: Update comments about MSR lists exposed to userspace Refresh comments about msrs_to_save, emulated_msrs, and msr_based_features to remove stale references left behind by commit 2374b7310b66 (KVM: x86/pmu: Use separate array for defining "PMU MSRs to save"), and to better reflect the current reality, e.g. emulated_msrs is no longer just for MSRs that are "kvm-specific". Reported-by: Binbin Wu Link: https://lore.kernel.org/r/20230607004636.1421424-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 355f3df8dbad..07b0f960de07 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1423,15 +1423,14 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); /* - * List of msr numbers which we expose to userspace through KVM_GET_MSRS - * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. - * - * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) - * extract the supported MSRs from the related const lists. - * msrs_to_save is selected from the msrs_to_save_all to reflect the - * capabilities of the host cpu. This capabilities test skips MSRs that are - * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs - * may depend on host virtualization features rather than host cpu features. + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track + * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS, + * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that + * require host support, i.e. should be probed via RDMSR. emulated_msrs holds + * MSRs that KVM emulates without strictly requiring host support. + * msr_based_features holds MSRs that enumerate features, i.e. are effectively + * CPUID leafs. Note, msr_based_features isn't mutually exclusive with + * msrs_to_save and emulated_msrs. */ static const u32 msrs_to_save_base[] = { @@ -1527,11 +1526,11 @@ static const u32 emulated_msrs_all[] = { MSR_IA32_UCODE_REV, /* - * The following list leaves out MSRs whose values are determined - * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. - * We always support the "true" VMX control MSRs, even if the host - * processor does not, so I am putting these registers here rather - * than in msrs_to_save_all. + * KVM always supports the "true" VMX control MSRs, even if the host + * does not. The VMX MSRs as a whole are considered "emulated" as KVM + * doesn't strictly require them to exist in the host (ignoring that + * KVM would refuse to load in the first place if the core set of MSRs + * aren't supported). */ MSR_IA32_VMX_BASIC, MSR_IA32_VMX_TRUE_PINBASED_CTLS, -- cgit v1.2.3 From fb1273635f8c38df18483ffcd038a5b792dfcc94 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 16 Jun 2023 18:02:33 +0300 Subject: KVM: x86: Remove PRIx* definitions as they are solely for user space In the Linux kernel we do not support PRI.64 specifiers. Moreover they seem not to be used anyway here. Drop them. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230616150233.83813-1-andriy.shevchenko@linux.intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e542cf285b51..58ee570db395 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -51,11 +51,6 @@ #define mod_64(x, y) ((x) % (y)) #endif -#define PRId64 "d" -#define PRIx64 "llx" -#define PRIu64 "u" -#define PRIo64 "o" - /* 14 is the version for Xeon and Pentium 8.4.8*/ #define APIC_VERSION 0x14UL #define LAPIC_MMIO_LENGTH (1 << 12) -- cgit v1.2.3