aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64/include
diff options
context:
space:
mode:
authorGravatar Marc Zyngier <maz@kernel.org> 2022-12-05 14:22:55 +0000
committerGravatar Marc Zyngier <maz@kernel.org> 2022-12-05 14:22:55 +0000
commitfe8e3f44c58760f0779813938ed465b3297db1ac (patch)
treef0d529b2e7bf99b662134ac7b1dc180a21d0a3b8 /arch/arm64/include
parentMerge branch kvm-arm64/dirty-ring into kvmarm-master/next (diff)
parentKVM: arm64: Reject shared table walks in the hyp code (diff)
downloadlinux-fe8e3f44c58760f0779813938ed465b3297db1ac.tar.gz
linux-fe8e3f44c58760f0779813938ed465b3297db1ac.tar.bz2
linux-fe8e3f44c58760f0779813938ed465b3297db1ac.zip
Merge branch kvm-arm64/parallel-faults into kvmarm-master/next
* kvm-arm64/parallel-faults: : . : Parallel stage-2 fault handling, courtesy of Oliver Upton. : From the cover letter: : : "Presently KVM only takes a read lock for stage 2 faults if it believes : the fault can be fixed by relaxing permissions on a PTE (write unprotect : for dirty logging). Otherwise, stage 2 faults grab the write lock, which : predictably can pile up all the vCPUs in a sufficiently large VM. : : Like the TDP MMU for x86, this series loosens the locking around : manipulations of the stage 2 page tables to allow parallel faults. RCU : and atomics are exploited to safely build/destroy the stage 2 page : tables in light of multiple software observers." : . KVM: arm64: Reject shared table walks in the hyp code KVM: arm64: Don't acquire RCU read lock for exclusive table walks KVM: arm64: Take a pointer to walker data in kvm_dereference_pteref() KVM: arm64: Handle stage-2 faults in parallel KVM: arm64: Make table->block changes parallel-aware KVM: arm64: Make leaf->leaf PTE changes parallel-aware KVM: arm64: Make block->table PTE changes parallel-aware KVM: arm64: Split init and set for table PTE KVM: arm64: Atomically update stage 2 leaf attributes in parallel walks KVM: arm64: Protect stage-2 traversal with RCU KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make KVM: arm64: Use an opaque type for pteps KVM: arm64: Add a helper to tear down unlinked stage-2 subtrees KVM: arm64: Don't pass kvm_pgtable through kvm_pgtable_walk_data KVM: arm64: Pass mm_ops through the visitor context KVM: arm64: Stash observed pte value in visitor context KVM: arm64: Combine visitor arguments into a context structure Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h153
1 files changed, 125 insertions, 28 deletions
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 3252eb50ecfe..d5cb01f8dc06 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -85,6 +85,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
* allocation is physically contiguous.
* @free_pages_exact: Free an exact number of memory pages previously
* allocated by zalloc_pages_exact.
+ * @free_removed_table: Free a removed paging structure by unlinking and
+ * dropping references.
* @get_page: Increment the refcount on a page.
* @put_page: Decrement the refcount on a page. When the
* refcount reaches 0 the page is automatically
@@ -103,6 +105,7 @@ struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
void* (*zalloc_pages_exact)(size_t size);
void (*free_pages_exact)(void *addr, size_t size);
+ void (*free_removed_table)(void *addr, u32 level);
void (*get_page)(void *addr);
void (*put_page)(void *addr);
int (*page_count)(void *addr);
@@ -162,29 +165,6 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
enum kvm_pgtable_prot prot);
/**
- * struct kvm_pgtable - KVM page-table.
- * @ia_bits: Maximum input address size, in bits.
- * @start_level: Level at which the page-table walk starts.
- * @pgd: Pointer to the first top-level entry of the page-table.
- * @mm_ops: Memory management callbacks.
- * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
- * @flags: Stage-2 page-table flags.
- * @force_pte_cb: Function that returns true if page level mappings must
- * be used instead of block mappings.
- */
-struct kvm_pgtable {
- u32 ia_bits;
- u32 start_level;
- kvm_pte_t *pgd;
- struct kvm_pgtable_mm_ops *mm_ops;
-
- /* Stage-2 only */
- struct kvm_s2_mmu *mmu;
- enum kvm_pgtable_stage2_flags flags;
- kvm_pgtable_force_pte_cb_t force_pte_cb;
-};
-
-/**
* enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
* @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
* entries.
@@ -192,17 +172,34 @@ struct kvm_pgtable {
* children.
* @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
* children.
+ * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
+ * with other software walkers.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
+ KVM_PGTABLE_WALK_SHARED = BIT(3),
};
-typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
- kvm_pte_t *ptep,
- enum kvm_pgtable_walk_flags flag,
- void * const arg);
+struct kvm_pgtable_visit_ctx {
+ kvm_pte_t *ptep;
+ kvm_pte_t old;
+ void *arg;
+ struct kvm_pgtable_mm_ops *mm_ops;
+ u64 addr;
+ u64 end;
+ u32 level;
+ enum kvm_pgtable_walk_flags flags;
+};
+
+typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit);
+
+static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
+{
+ return ctx->flags & KVM_PGTABLE_WALK_SHARED;
+}
/**
* struct kvm_pgtable_walker - Hook into a page-table walk.
@@ -217,6 +214,94 @@ struct kvm_pgtable_walker {
const enum kvm_pgtable_walk_flags flags;
};
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
+typedef kvm_pte_t *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ /*
+ * Due to the lack of RCU (or a similar protection scheme), only
+ * non-shared table walkers are allowed in the hypervisor.
+ */
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ return -EPERM;
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_lock();
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return rcu_read_lock_held();
+}
+
+#endif
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits: Maximum input address size, in bits.
+ * @start_level: Level at which the page-table walk starts.
+ * @pgd: Pointer to the first top-level entry of the page-table.
+ * @mm_ops: Memory management callbacks.
+ * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags: Stage-2 page-table flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ */
+struct kvm_pgtable {
+ u32 ia_bits;
+ u32 start_level;
+ kvm_pteref_t pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Stage-2 only */
+ struct kvm_s2_mmu *mmu;
+ enum kvm_pgtable_stage2_flags flags;
+ kvm_pgtable_force_pte_cb_t force_pte_cb;
+};
+
/**
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
* @pgt: Uninitialised page-table structure to initialise.
@@ -325,6 +410,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
+ * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
+ * @mm_ops: Memory management callbacks.
+ * @pgtable: Unlinked stage-2 paging structure to be freed.
+ * @level: Level of the stage-2 paging structure to be freed.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior to
+ * freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+
+/**
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address at which to place the mapping.
@@ -333,6 +429,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* @prot: Permissions and attributes for the mapping.
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
* page-table pages.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
@@ -354,7 +451,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
*/
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 phys, enum kvm_pgtable_prot prot,
- void *mc);
+ void *mc, enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to