From 474b99ed703b7e4031f3925adacf19e7c8af2075 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Fri, 2 Feb 2024 16:23:40 -0800 Subject: KVM: x86/mmu: Don't acquire mmu_lock when using indirect_shadow_pages as a heuristic Drop KVM's completely pointless acquisition of mmu_lock when deciding whether or not to unprotect any shadow pages residing at the gfn before resuming the guest to let it retry an instruction that KVM failed to emulated. In this case, indirect_shadow_pages is used as a coarse-grained heuristic to check if there is any chance of there being a relevant shadow page to unprotected. But acquiring mmu_lock largely defeats any benefit to the heuristic, as taking mmu_lock for write is likely far more costly to the VM as a whole than unnecessarily walking mmu_page_hash. Furthermore, the current code is already prone to false negatives and false positives, as it drops mmu_lock before checking the flag and unprotecting shadow pages. And as evidenced by the lack of bug reports, neither false positives nor false negatives are problematic. A false positive simply means that KVM will try to unprotect shadow pages that have already been zapped. And a false negative means that KVM will resume the guest without unprotecting the gfn, i.e. if a shadow page was _just_ created, the vCPU will hit the same page fault and do the whole dance all over again, and detect and unprotect the shadow page the second time around (or not, if something else zaps it first). Reported-by: Jim Mattson Signed-off-by: Mingwei Zhang [sean: drop READ_ONCE() and comment change, rewrite changelog] Link: https://lore.kernel.org/r/20240203002343.383056-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 363b1c080205..7015f8786397 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8789,13 +8789,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, /* The instructions are well-emulated on direct mmu. */ if (vcpu->arch.mmu->root_role.direct) { - unsigned int indirect_shadow_pages; - - write_lock(&vcpu->kvm->mmu_lock); - indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; - write_unlock(&vcpu->kvm->mmu_lock); - - if (indirect_shadow_pages) + if (vcpu->kvm->arch.indirect_shadow_pages) kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); return true; -- cgit v1.2.3 From 515c18a64e704bc932c5a64e25aaeb712252cf0b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Feb 2024 16:23:41 -0800 Subject: KVM: x86: Drop dedicated logic for direct MMUs in reexecute_instruction() Now that KVM doesn't pointlessly acquire mmu_lock for direct MMUs, drop the dedicated path entirely and always query indirect_shadow_pages when deciding whether or not to try unprotecting the gfn. For indirect, a.k.a. shadow MMUs, checking indirect_shadow_pages is harmless; unless *every* shadow page was somehow zapped while KVM was attempting to emulate the instruction, indirect_shadow_pages is guaranteed to be non-zero. Well, unless the instruction used a direct hugepage with 2-level paging for its code page, but in that case, there's obviously nothing to unprotect. And in the extremely unlikely case all shadow pages were zapped, there's again obviously nothing to unprotect. Link: https://lore.kernel.org/r/20240203002343.383056-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7015f8786397..ac3ea5829df6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8787,27 +8787,27 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, kvm_release_pfn_clean(pfn); - /* The instructions are well-emulated on direct mmu. */ - if (vcpu->arch.mmu->root_role.direct) { - if (vcpu->kvm->arch.indirect_shadow_pages) - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); - - return true; - } - /* - * if emulation was due to access to shadowed page table - * and it failed try to unshadow page and re-enter the - * guest to let CPU execute the instruction. + * If emulation may have been triggered by a write to a shadowed page + * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the + * guest to let the CPU re-execute the instruction in the hope that the + * CPU can cleanly execute the instruction that KVM failed to emulate. */ - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + if (vcpu->kvm->arch.indirect_shadow_pages) + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); /* - * If the access faults on its page table, it can not - * be fixed by unprotecting shadow page and it should - * be reported to userspace. + * If the failed instruction faulted on an access to page tables that + * are used to translate any part of the instruction, KVM can't resolve + * the issue by unprotecting the gfn, as zapping the shadow page will + * result in the instruction taking a !PRESENT page fault and thus put + * the vCPU into an infinite loop of page faults. E.g. KVM will create + * a SPTE and write-protect the gfn to resolve the !PRESENT fault, and + * then zap the SPTE to unprotect the gfn, and then do it all over + * again. Report the error to userspace. */ - return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP); + return vcpu->arch.mmu->root_role.direct || + !(emulation_type & EMULTYPE_WRITE_PF_TO_SP); } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3 From dfeef3d3f310ee464493e848383c4e9fe879089a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Feb 2024 16:23:42 -0800 Subject: KVM: x86: Drop superfluous check on direct MMU vs. WRITE_PF_TO_SP flag Remove reexecute_instruction()'s final check on the MMU being direct, as EMULTYPE_WRITE_PF_TO_SP is only ever set if the MMU is indirect, i.e. is a shadow MMU. Prior to commit 93c05d3ef252 ("KVM: x86: improve reexecute_instruction"), the flag simply didn't exist (and KVM actually returned "true" unconditionally for both types of MMUs). I.e. the explicit check for a direct MMU is simply leftover artifact from old code. Link: https://lore.kernel.org/r/20240203002343.383056-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac3ea5829df6..48ec889452e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8806,8 +8806,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * then zap the SPTE to unprotect the gfn, and then do it all over * again. Report the error to userspace. */ - return vcpu->arch.mmu->root_role.direct || - !(emulation_type & EMULTYPE_WRITE_PF_TO_SP); + return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP); } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3