aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorGravatar Paolo Bonzini <pbonzini@redhat.com> 2024-03-11 10:29:22 -0400
committerGravatar Paolo Bonzini <pbonzini@redhat.com> 2024-03-11 10:29:22 -0400
commit41ebae2ecd1496aaf72596af9f37529d62a160ab (patch)
tree622acefe722f98cebca79afdd72f1c68149b3f54 /arch/x86/kvm/x86.c
parentMerge tag 'kvm-x86-misc-6.9' of https://github.com/kvm-x86/linux into HEAD (diff)
parentkvm/x86: allocate the write-tracking metadata on-demand (diff)
downloadlinux-41ebae2ecd1496aaf72596af9f37529d62a160ab.tar.gz
linux-41ebae2ecd1496aaf72596af9f37529d62a160ab.tar.bz2
linux-41ebae2ecd1496aaf72596af9f37529d62a160ab.zip
Merge tag 'kvm-x86-mmu-6.9' of https://github.com/kvm-x86/linux into HEAD
KVM x86 MMU changes for 6.9: - Clean up code related to unprotecting shadow pages when retrying a guest instruction after failed #PF-induced emulation. - Zap TDP MMU roots at 4KiB granularity to minimize the delay in yielding if a reschedule is needed, e.g. if a high priority task needs to run. Because KVM doesn't support yielding in the middle of processing a zapped non-leaf SPTE, zapping at 1GiB granularity can result in multi-millisecond lag when attempting to schedule in a high priority. - Rework TDP MMU root unload, free, and alloc to run with mmu_lock held for read, e.g. to avoid serializing vCPUs when userspace deletes a memslot. - Allocate write-tracking metadata on-demand to avoid the memory overhead when running kernels built with KVMGT support (external write-tracking enabled), but for workloads that don't use nested virtualization (shadow paging) or KVMGT.
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c39
1 files changed, 16 insertions, 23 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e6b1b85dca8a..064862d87b9e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8792,31 +8792,24 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
kvm_release_pfn_clean(pfn);
- /* The instructions are well-emulated on direct mmu. */
- if (vcpu->arch.mmu->root_role.direct) {
- unsigned int indirect_shadow_pages;
-
- write_lock(&vcpu->kvm->mmu_lock);
- indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
- write_unlock(&vcpu->kvm->mmu_lock);
-
- if (indirect_shadow_pages)
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
-
- return true;
- }
-
/*
- * if emulation was due to access to shadowed page table
- * and it failed try to unshadow page and re-enter the
- * guest to let CPU execute the instruction.
+ * If emulation may have been triggered by a write to a shadowed page
+ * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the
+ * guest to let the CPU re-execute the instruction in the hope that the
+ * CPU can cleanly execute the instruction that KVM failed to emulate.
*/
- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
-
- /*
- * If the access faults on its page table, it can not
- * be fixed by unprotecting shadow page and it should
- * be reported to userspace.
+ if (vcpu->kvm->arch.indirect_shadow_pages)
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+
+ /*
+ * If the failed instruction faulted on an access to page tables that
+ * are used to translate any part of the instruction, KVM can't resolve
+ * the issue by unprotecting the gfn, as zapping the shadow page will
+ * result in the instruction taking a !PRESENT page fault and thus put
+ * the vCPU into an infinite loop of page faults. E.g. KVM will create
+ * a SPTE and write-protect the gfn to resolve the !PRESENT fault, and
+ * then zap the SPTE to unprotect the gfn, and then do it all over
+ * again. Report the error to userspace.
*/
return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
}