aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
authorGravatar Paolo Bonzini <pbonzini@redhat.com> 2024-05-12 03:15:53 -0400
committerGravatar Paolo Bonzini <pbonzini@redhat.com> 2024-05-12 03:15:53 -0400
commite5f62e27b16601f08b6b04dc964691d48d0a6a91 (patch)
tree526674e4dec329b1030bcf106842388bc90704be /arch/arm64
parentMerge tag 'loongarch-kvm-6.10' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff)
parentMerge branch kvm-arm64/mpidr-reset into kvmarm-master/next (diff)
downloadlinux-e5f62e27b16601f08b6b04dc964691d48d0a6a91.tar.gz
linux-e5f62e27b16601f08b6b04dc964691d48d0a6a91.tar.bz2
linux-e5f62e27b16601f08b6b04dc964691d48d0a6a91.zip
Merge tag 'kvmarm-6.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for Linux 6.10 - Move a lot of state that was previously stored on a per vcpu basis into a per-CPU area, because it is only pertinent to the host while the vcpu is loaded. This results in better state tracking, and a smaller vcpu structure. - Add full handling of the ERET/ERETAA/ERETAB instructions in nested virtualisation. The last two instructions also require emulating part of the pointer authentication extension. As a result, the trap handling of pointer authentication has been greattly simplified. - Turn the global (and not very scalable) LPI translation cache into a per-ITS, scalable cache, making non directly injected LPIs much cheaper to make visible to the vcpu. - A batch of pKVM patches, mostly fixes and cleanups, as the upstreaming process seems to be resuming. Fingers crossed! - Allocate PPIs and SGIs outside of the vcpu structure, allowing for smaller EL2 mapping and some flexibility in implementing more or less than 32 private IRQs. - Purge stale mpidr_data if a vcpu is created after the MPIDR map has been created. - Preserve vcpu-specific ID registers across a vcpu reset. - Various minor cleanups and improvements.
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/include/asm/esr.h12
-rw-r--r--arch/arm64/include/asm/kvm_asm.h8
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h16
-rw-r--r--arch/arm64/include/asm/kvm_host.h156
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm64/include/asm/kvm_nested.h13
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h21
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/arm64/include/asm/virt.h12
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c4
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/arm64/kvm/arm.c209
-rw-r--r--arch/arm64/kvm/emulate-nested.c66
-rw-r--r--arch/arm64/kvm/fpsimd.c69
-rw-r--r--arch/arm64/kvm/handle_exit.c36
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h8
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h86
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c18
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c115
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c21
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c27
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c109
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c26
-rw-r--r--arch/arm64/kvm/mmio.c12
-rw-r--r--arch/arm64/kvm/mmu.c8
-rw-r--r--arch/arm64/kvm/nested.c8
-rw-r--r--arch/arm64/kvm/pauth.c206
-rw-r--r--arch/arm64/kvm/pkvm.c2
-rw-r--r--arch/arm64/kvm/pmu.c2
-rw-r--r--arch/arm64/kvm/reset.c1
-rw-r--r--arch/arm64/kvm/sys_regs.c69
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c82
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c90
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c352
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c23
-rw-r--r--arch/arm64/kvm/vgic/vgic.c17
-rw-r--r--arch/arm64/kvm/vgic/vgic.h8
48 files changed, 1249 insertions, 754 deletions
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 81606bf7d5ac..7abf09df7033 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -404,6 +404,18 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
}
+/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
+static inline bool esr_iss_is_eretax(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERET;
+}
+
+/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */
+static inline bool esr_iss_is_eretab(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERETA;
+}
+
const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24b5e6b23417..a6330460d9e5 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -73,10 +73,8 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
@@ -241,8 +239,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
-extern u64 __vgic_v3_read_vmcr(void);
-extern void __vgic_v3_write_vmcr(u32 vmcr);
extern void __vgic_v3_init_lrs(void);
extern u64 __kvm_get_mdcr_el2(void);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 975af30af31f..501e3e019c93 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -125,16 +125,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TWI;
}
-static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
-}
-
-static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
-}
-
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.vsesr_el2;
@@ -587,16 +577,14 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
} else if (has_hvhe()) {
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
- if (!vcpu_has_sve(vcpu) ||
- (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED))
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
} else {
val = CPTR_NVHE_EL2_RES1;
- if (vcpu_has_sve(vcpu) &&
- (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
+ if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
val |= CPTR_EL2_TZ;
if (cpus_have_final_cap(ARM64_SME))
val &= ~CPTR_EL2_TSM;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 9e8a496fb284..8170c04fde91 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -211,6 +211,7 @@ typedef unsigned int pkvm_handle_t;
struct kvm_protected_vm {
pkvm_handle_t handle;
struct kvm_hyp_memcache teardown_mc;
+ bool enabled;
};
struct kvm_mpidr_data {
@@ -220,20 +221,10 @@ struct kvm_mpidr_data {
static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
{
- unsigned long mask = data->mpidr_mask;
- u64 aff = mpidr & MPIDR_HWID_BITMASK;
- int nbits, bit, bit_idx = 0;
- u16 index = 0;
+ unsigned long index = 0, mask = data->mpidr_mask;
+ unsigned long aff = mpidr & MPIDR_HWID_BITMASK;
- /*
- * If this looks like RISC-V's BEXT or x86's PEXT
- * instructions, it isn't by accident.
- */
- nbits = fls(mask);
- for_each_set_bit(bit, &mask, nbits) {
- index |= (aff & BIT(bit)) >> (bit - bit_idx);
- bit_idx++;
- }
+ bitmap_gather(&index, &aff, &mask, fls(mask));
return index;
}
@@ -530,8 +521,42 @@ struct kvm_cpu_context {
u64 *vncr_array;
};
+/*
+ * This structure is instantiated on a per-CPU basis, and contains
+ * data that is:
+ *
+ * - tied to a single physical CPU, and
+ * - either have a lifetime that does not extend past vcpu_put()
+ * - or is an invariant for the lifetime of the system
+ *
+ * Use host_data_ptr(field) as a way to access a pointer to such a
+ * field.
+ */
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
+ struct user_fpsimd_state *fpsimd_state; /* hyp VA */
+
+ /* Ownership of the FP regs */
+ enum {
+ FP_STATE_FREE,
+ FP_STATE_HOST_OWNED,
+ FP_STATE_GUEST_OWNED,
+ } fp_owner;
+
+ /*
+ * host_debug_state contains the host registers which are
+ * saved and restored during world switches.
+ */
+ struct {
+ /* {Break,watch}point registers */
+ struct kvm_guest_debug_arch regs;
+ /* Statistical profiling extension */
+ u64 pmscr_el1;
+ /* Self-hosted trace */
+ u64 trfcr_el1;
+ /* Values of trap registers for the host before guest entry. */
+ u64 mdcr_el2;
+ } host_debug_state;
};
struct kvm_host_psci_config {
@@ -592,19 +617,9 @@ struct kvm_vcpu_arch {
u64 mdcr_el2;
u64 cptr_el2;
- /* Values of trap registers for the host before guest entry. */
- u64 mdcr_el2_host;
-
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Ownership of the FP regs */
- enum {
- FP_STATE_FREE,
- FP_STATE_HOST_OWNED,
- FP_STATE_GUEST_OWNED,
- } fp_state;
-
/* Configuration flags, set once and for all before the vcpu can run */
u8 cflags;
@@ -627,11 +642,10 @@ struct kvm_vcpu_arch {
* We maintain more than a single set of debug registers to support
* debugging the guest from the host and to maintain separate host and
* guest state during world switches. vcpu_debug_state are the debug
- * registers of the vcpu as the guest sees them. host_debug_state are
- * the host registers which are saved and restored during
- * world switches. external_debug_state contains the debug
- * values we want to debug the guest. This is set via the
- * KVM_SET_GUEST_DEBUG ioctl.
+ * registers of the vcpu as the guest sees them.
+ *
+ * external_debug_state contains the debug values we want to debug the
+ * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl.
*
* debug_ptr points to the set of debug registers that should be loaded
* onto the hardware when running the guest.
@@ -640,18 +654,6 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
- struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
- struct task_struct *parent_task;
-
- struct {
- /* {Break,watch}point registers */
- struct kvm_guest_debug_arch regs;
- /* Statistical profiling extension */
- u64 pmscr_el1;
- /* Self-hosted trace */
- u64 trfcr_el1;
- } host_debug_state;
-
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
@@ -817,8 +819,6 @@ struct kvm_vcpu_arch {
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
-/* vcpu running in HYP context */
-#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7))
/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
@@ -896,7 +896,7 @@ struct kvm_vcpu_arch {
* Don't bother with VNCR-based accesses in the nVHE code, it has no
* business dealing with NV.
*/
-static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
+static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
{
#if !defined (__KVM_NVHE_HYPERVISOR__)
if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
@@ -906,6 +906,13 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
return (u64 *)&ctxt->sys_regs[r];
}
+#define __ctxt_sys_reg(c,r) \
+ ({ \
+ BUILD_BUG_ON(__builtin_constant_p(r) && \
+ (r) >= NR_SYS_REGS); \
+ ___ctxt_sys_reg(c, r); \
+ })
+
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
@@ -1168,6 +1175,44 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
+/*
+ * How we access per-CPU host data depends on the where we access it from,
+ * and the mode we're in:
+ *
+ * - VHE and nVHE hypervisor bits use their locally defined instance
+ *
+ * - the rest of the kernel use either the VHE or nVHE one, depending on
+ * the mode we're running in.
+ *
+ * Unless we're in protected mode, fully deprivileged, and the nVHE
+ * per-CPU stuff is exclusively accessible to the protected EL2 code.
+ * In this case, the EL1 code uses the *VHE* data as its private state
+ * (which makes sense in a way as there shouldn't be any shared state
+ * between the host and the hypervisor).
+ *
+ * Yes, this is all totally trivial. Shoot me now.
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f)
+#else
+#define host_data_ptr(f) \
+ (static_branch_unlikely(&kvm_protected_mode_initialized) ? \
+ &this_cpu_ptr(&kvm_host_data)->f : \
+ &this_cpu_ptr_hyp_sym(kvm_host_data)->f)
+#endif
+
+/* Check whether the FP regs are owned by the guest */
+static inline bool guest_owns_fp_regs(void)
+{
+ return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED;
+}
+
+/* Check whether the FP regs are owned by the host */
+static inline bool host_owns_fp_regs(void)
+{
+ return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED;
+}
+
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
@@ -1211,7 +1256,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
-void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{
@@ -1247,10 +1291,9 @@ struct kvm *kvm_arch_alloc_vm(void);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
-static inline bool kvm_vm_is_protected(struct kvm *kvm)
-{
- return false;
-}
+#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled)
+
+#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm)
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
@@ -1275,6 +1318,8 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
@@ -1331,4 +1376,19 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
(get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
+/* Check for a given level of PAuth support */
+#define kvm_has_pauth(k, l) \
+ ({ \
+ bool pa, pi, pa3; \
+ \
+ pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \
+ pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \
+ pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \
+ pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \
+ pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \
+ pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \
+ \
+ (pa + pi + pa3) == 1; \
+ })
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 3e2a1ac0c9bb..3e80464f8953 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -80,8 +80,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
#ifdef __KVM_NVHE_HYPERVISOR__
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index c77d795556e1..5e0ab0596246 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -60,7 +60,20 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
return ttbr0 & ~GENMASK_ULL(63, 48);
}
+extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
int kvm_init_nv_sysregs(struct kvm *kvm);
+#ifdef CONFIG_ARM64_PTR_AUTH
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
+#else
+static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+ /* We really should never execute this... */
+ WARN_ON_ONCE(1);
+ *elr = 0xbad9acc0debadbad;
+ return false;
+}
+#endif
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
index 0cd0965255d2..d81bac256abc 100644
--- a/arch/arm64/include/asm/kvm_ptrauth.h
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -99,5 +99,26 @@ alternative_else_nop_endif
.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
.endm
#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#else /* !__ASSEMBLY */
+
+#define __ptrauth_save_key(ctxt, key) \
+ do { \
+ u64 __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
+ } while(0)
+
+#define ptrauth_save_keys(ctxt) \
+ do { \
+ __ptrauth_save_key(ctxt, APIA); \
+ __ptrauth_save_key(ctxt, APIB); \
+ __ptrauth_save_key(ctxt, APDA); \
+ __ptrauth_save_key(ctxt, APDB); \
+ __ptrauth_save_key(ctxt, APGA); \
+ } while(0)
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_KVM_PTRAUTH_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index ef207a0d4f0d..9943ff0af4c9 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -297,6 +297,7 @@
#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
+#define TCR_TBID0 (UL(1) << 51)
#define TCR_TBID1 (UL(1) << 52)
#define TCR_NFD0 (UL(1) << 53)
#define TCR_NFD1 (UL(1) << 54)
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 261d6e9df2e1..ebf4a9f943ed 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -82,6 +82,12 @@ bool is_kvm_arm_initialised(void);
DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+static inline bool is_pkvm_initialized(void)
+{
+ return IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized);
+}
+
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
@@ -89,8 +95,7 @@ static inline bool is_hyp_mode_available(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
- if (IS_ENABLED(CONFIG_KVM) &&
- static_branch_likely(&kvm_protected_mode_initialized))
+ if (is_pkvm_initialized())
return true;
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
@@ -104,8 +109,7 @@ static inline bool is_hyp_mode_mismatched(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
- if (IS_ENABLED(CONFIG_KVM) &&
- static_branch_likely(&kvm_protected_mode_initialized))
+ if (is_pkvm_initialized())
return false;
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index aad399796e81..c20549e43a77 100644
--- a/arch/arm64/kernel/pi/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -209,8 +209,8 @@ static const struct {
char alias[FTR_ALIAS_NAME_LEN];
char feature[FTR_ALIAS_OPTION_LEN];
} aliases[] __initconst = {
- { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
- { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=nvhe", "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=protected", "arm64_sw.hvhe=1" },
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index c0c050e53157..04882b577575 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
+kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
always-y := hyp_constants.h hyp-constants.s
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index c4a0a35e02c7..9996a989b52e 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -35,10 +35,11 @@
#include <asm/virt.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/kvm_pkvm.h>
-#include <asm/kvm_emulate.h>
+#include <asm/kvm_ptrauth.h>
#include <asm/sections.h>
#include <kvm/arm_hypercalls.h>
@@ -69,15 +70,42 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static bool pkvm_ext_allowed(struct kvm *kvm, long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
- int r;
- u64 new_cap;
+ int r = -EINVAL;
if (cap->flags)
return -EINVAL;
+ if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap))
+ return -EINVAL;
+
switch (cap->cap) {
case KVM_CAP_ARM_NISV_TO_USER:
r = 0;
@@ -86,9 +114,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
break;
case KVM_CAP_ARM_MTE:
mutex_lock(&kvm->lock);
- if (!system_supports_mte() || kvm->created_vcpus) {
- r = -EINVAL;
- } else {
+ if (system_supports_mte() && !kvm->created_vcpus) {
r = 0;
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
}
@@ -99,25 +125,22 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
break;
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
- new_cap = cap->args[0];
-
mutex_lock(&kvm->slots_lock);
/*
* To keep things simple, allow changing the chunk
* size only when no memory slots have been created.
*/
- if (!kvm_are_all_memslots_empty(kvm)) {
- r = -EINVAL;
- } else if (new_cap && !kvm_is_block_size_supported(new_cap)) {
- r = -EINVAL;
- } else {
- r = 0;
- kvm->arch.mmu.split_page_chunk_size = new_cap;
+ if (kvm_are_all_memslots_empty(kvm)) {
+ u64 new_cap = cap->args[0];
+
+ if (!new_cap || kvm_is_block_size_supported(new_cap)) {
+ r = 0;
+ kvm->arch.mmu.split_page_chunk_size = new_cap;
+ }
}
mutex_unlock(&kvm->slots_lock);
break;
default:
- r = -EINVAL;
break;
}
@@ -195,6 +218,23 @@ void kvm_arch_create_vm_debugfs(struct kvm *kvm)
kvm_sys_regs_create_debugfs(kvm);
}
+static void kvm_destroy_mpidr_data(struct kvm *kvm)
+{
+ struct kvm_mpidr_data *data;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ data = rcu_dereference_protected(kvm->arch.mpidr_data,
+ lockdep_is_held(&kvm->arch.config_lock));
+ if (data) {
+ rcu_assign_pointer(kvm->arch.mpidr_data, NULL);
+ synchronize_rcu();
+ kfree(data);
+ }
+
+ mutex_unlock(&kvm->arch.config_lock);
+}
+
/**
* kvm_arch_destroy_vm - destroy the VM data structure
* @kvm: pointer to the KVM struct
@@ -209,7 +249,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (is_protected_kvm_enabled())
pkvm_destroy_hyp_vm(kvm);
- kfree(kvm->arch.mpidr_data);
+ kvm_destroy_mpidr_data(kvm);
+
kfree(kvm->arch.sysreg_masks);
kvm_destroy_vcpus(kvm);
@@ -218,9 +259,47 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_arm_teardown_hypercalls(kvm);
}
+static bool kvm_has_full_ptr_auth(void)
+{
+ bool apa, gpa, api, gpi, apa3, gpa3;
+ u64 isar1, isar2, val;
+
+ /*
+ * Check that:
+ *
+ * - both Address and Generic auth are implemented for a given
+ * algorithm (Q5, IMPDEF or Q3)
+ * - only a single algorithm is implemented.
+ */
+ if (!system_has_full_ptr_auth())
+ return false;
+
+ isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+ isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+ apa = !!FIELD_GET(ID_AA64ISAR1_EL1_APA_MASK, isar1);
+ val = FIELD_GET(ID_AA64ISAR1_EL1_GPA_MASK, isar1);
+ gpa = (val == ID_AA64ISAR1_EL1_GPA_IMP);
+
+ api = !!FIELD_GET(ID_AA64ISAR1_EL1_API_MASK, isar1);
+ val = FIELD_GET(ID_AA64ISAR1_EL1_GPI_MASK, isar1);
+ gpi = (val == ID_AA64ISAR1_EL1_GPI_IMP);
+
+ apa3 = !!FIELD_GET(ID_AA64ISAR2_EL1_APA3_MASK, isar2);
+ val = FIELD_GET(ID_AA64ISAR2_EL1_GPA3_MASK, isar2);
+ gpa3 = (val == ID_AA64ISAR2_EL1_GPA3_IMP);
+
+ return (apa == gpa && api == gpi && apa3 == gpa3 &&
+ (apa + api + apa3) == 1);
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
+
+ if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext))
+ return 0;
+
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
@@ -311,7 +390,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
case KVM_CAP_ARM_PTRAUTH_GENERIC:
- r = system_has_full_ptr_auth();
+ r = kvm_has_full_ptr_auth();
break;
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
if (kvm)
@@ -378,12 +457,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
- /*
- * Default value for the FP state, will be overloaded at load
- * time if we support FP (pretty likely)
- */
- vcpu->arch.fp_state = FP_STATE_FREE;
-
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@@ -395,6 +468,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+ /*
+ * This vCPU may have been created after mpidr_data was initialized.
+ * Throw out the pre-computed mappings if that is the case which forces
+ * KVM to fall back to iteratively searching the vCPUs.
+ */
+ kvm_destroy_mpidr_data(vcpu->kvm);
+
err = kvm_vgic_vcpu_init(vcpu);
if (err)
return err;
@@ -428,6 +508,44 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
}
+static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_has_ptrauth(vcpu)) {
+ /*
+ * Either we're running running an L2 guest, and the API/APK
+ * bits come from L1's HCR_EL2, or API/APK are both set.
+ */
+ if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) {
+ u64 val;
+
+ val = __vcpu_sys_reg(vcpu, HCR_EL2);
+ val &= (HCR_API | HCR_APK);
+ vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
+ vcpu->arch.hcr_el2 |= val;
+ } else {
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+ }
+
+ /*
+ * Save the host keys if there is any chance for the guest
+ * to use pauth, as the entry code will reload the guest
+ * keys in that case.
+ * Protected mode is the exception to that rule, as the
+ * entry into the EL2 code eagerly switch back and forth
+ * between host and hyp keys (and kvm_hyp_ctxt is out of
+ * reach anyway).
+ */
+ if (is_protected_kvm_enabled())
+ return;
+
+ if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) {
+ struct kvm_cpu_context *ctxt;
+ ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt);
+ ptrauth_save_keys(ctxt);
+ }
+ }
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_s2_mmu *mmu;
@@ -466,8 +584,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
else
vcpu_set_wfx_traps(vcpu);
- if (vcpu_has_ptrauth(vcpu))
- vcpu_ptrauth_disable(vcpu);
+ vcpu_set_pauth_traps(vcpu);
+
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
@@ -580,11 +698,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
}
#endif
-static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
-}
-
static void kvm_init_mpidr_data(struct kvm *kvm)
{
struct kvm_mpidr_data *data = NULL;
@@ -594,7 +707,8 @@ static void kvm_init_mpidr_data(struct kvm *kvm)
mutex_lock(&kvm->arch.config_lock);
- if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1)
+ if (rcu_access_pointer(kvm->arch.mpidr_data) ||
+ atomic_read(&kvm->online_vcpus) == 1)
goto out;
kvm_for_each_vcpu(c, vcpu, kvm) {
@@ -631,7 +745,7 @@ static void kvm_init_mpidr_data(struct kvm *kvm)
data->cmpidr_to_idx[index] = c;
}
- kvm->arch.mpidr_data = data;
+ rcu_assign_pointer(kvm->arch.mpidr_data, data);
out:
mutex_unlock(&kvm->arch.config_lock);
}
@@ -790,9 +904,8 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
* doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
- kvm_vgic_vmcr_sync(vcpu);
vcpu_set_flag(vcpu, IN_WFI);
- vgic_v4_put(vcpu);
+ kvm_vgic_put(vcpu);
preempt_enable();
kvm_vcpu_halt(vcpu);
@@ -800,7 +913,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_disable();
vcpu_clear_flag(vcpu, IN_WFI);
- vgic_v4_load(vcpu);
+ kvm_vgic_load(vcpu);
preempt_enable();
}
@@ -980,7 +1093,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu);
- if (ret)
+ if (ret <= 0)
return ret;
}
@@ -1270,7 +1383,7 @@ static unsigned long system_supported_vcpu_features(void)
if (!system_supports_sve())
clear_bit(KVM_ARM_VCPU_SVE, &features);
- if (!system_has_full_ptr_auth()) {
+ if (!kvm_has_full_ptr_auth()) {
clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features);
clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features);
}
@@ -1971,7 +2084,7 @@ static void cpu_set_hyp_vector(void)
static void cpu_hyp_init_context(void)
{
- kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
+ kvm_init_host_cpu_context(host_data_ptr(host_ctxt));
if (!is_kernel_in_hyp_mode())
cpu_init_hyp_mode();
@@ -2470,21 +2583,27 @@ out_err:
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu = NULL;
+ struct kvm_mpidr_data *data;
unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
- if (kvm->arch.mpidr_data) {
- u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr);
+ rcu_read_lock();
+ data = rcu_dereference(kvm->arch.mpidr_data);
+
+ if (data) {
+ u16 idx = kvm_mpidr_index(data, mpidr);
- vcpu = kvm_get_vcpu(kvm,
- kvm->arch.mpidr_data->cmpidr_to_idx[idx]);
+ vcpu = kvm_get_vcpu(kvm, data->cmpidr_to_idx[idx]);
if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu))
vcpu = NULL;
+ }
+ rcu_read_unlock();
+
+ if (vcpu)
return vcpu;
- }
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 4697ba41b3a9..72d733c74a38 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2117,6 +2117,26 @@ inject:
return true;
}
+static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+ bool control_bit_set;
+
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+ if (!is_hyp_ctxt(vcpu) && control_bit_set) {
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+ return true;
+ }
+ return false;
+}
+
+bool forward_smc_trap(struct kvm_vcpu *vcpu)
+{
+ return forward_traps(vcpu, HCR_TSC);
+}
+
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
{
u64 mode = spsr & PSR_MODE_MASK;
@@ -2152,37 +2172,39 @@ static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
{
- u64 spsr, elr, mode;
- bool direct_eret;
+ u64 spsr, elr, esr;
/*
- * Going through the whole put/load motions is a waste of time
- * if this is a VHE guest hypervisor returning to its own
- * userspace, or the hypervisor performing a local exception
- * return. No need to save/restore registers, no need to
- * switch S2 MMU. Just do the canonical ERET.
+ * Forward this trap to the virtual EL2 if the virtual
+ * HCR_EL2.NV bit is set and this is coming from !EL2.
*/
- spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
- spsr = kvm_check_illegal_exception_return(vcpu, spsr);
-
- mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
-
- direct_eret = (mode == PSR_MODE_EL0t &&
- vcpu_el2_e2h_is_set(vcpu) &&
- vcpu_el2_tge_is_set(vcpu));
- direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
-
- if (direct_eret) {
- *vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
- *vcpu_cpsr(vcpu) = spsr;
- trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
+ if (forward_traps(vcpu, HCR_NV))
return;
+
+ /* Check for an ERETAx */
+ esr = kvm_vcpu_get_esr(vcpu);
+ if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) {
+ /*
+ * Oh no, ERETAx failed to authenticate. If we have
+ * FPACCOMBINE, deliver an exception right away. If we
+ * don't, then let the mangled ELR value trickle down the
+ * ERET handling, and the guest will have a little surprise.
+ */
+ if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE)) {
+ esr &= ESR_ELx_ERET_ISS_ERETA;
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC);
+ kvm_inject_nested_sync(vcpu, esr);
+ return;
+ }
}
preempt_disable();
kvm_arch_vcpu_put(vcpu);
- elr = __vcpu_sys_reg(vcpu, ELR_EL2);
+ spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
+ spsr = kvm_check_illegal_exception_return(vcpu, spsr);
+ if (!esr_iss_is_eretax(esr))
+ elr = __vcpu_sys_reg(vcpu, ELR_EL2);
trace_kvm_nested_eret(vcpu, elr, spsr);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 826307e19e3a..1807d3a79a8a 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -14,19 +14,6 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
-void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
-{
- struct task_struct *p = vcpu->arch.parent_task;
- struct user_fpsimd_state *fpsimd;
-
- if (!is_protected_kvm_enabled() || !p)
- return;
-
- fpsimd = &p->thread.uw.fpsimd_state;
- kvm_unshare_hyp(fpsimd, fpsimd + 1);
- put_task_struct(p);
-}
-
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
@@ -38,30 +25,18 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
*/
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
{
- int ret;
-
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
+ int ret;
- kvm_vcpu_unshare_task_fp(vcpu);
+ /* pKVM has its own tracking of the host fpsimd state. */
+ if (is_protected_kvm_enabled())
+ return 0;
/* Make sure the host task fpsimd state is visible to hyp: */
ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
return ret;
- vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-
- /*
- * We need to keep current's task_struct pinned until its data has been
- * unshared with the hypervisor to make sure it is not re-used by the
- * kernel and donated to someone else while already shared -- see
- * kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
- */
- if (is_protected_kvm_enabled()) {
- get_task_struct(current);
- vcpu->arch.parent_task = current;
- }
-
return 0;
}
@@ -86,7 +61,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
* FP_STATE_FREE if the flag set.
*/
- vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+ *host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
@@ -110,7 +86,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* been saved, this is very unlikely to happen.
*/
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
- vcpu->arch.fp_state = FP_STATE_FREE;
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
fpsimd_save_and_flush_cpu_state();
}
}
@@ -126,7 +102,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
- vcpu->arch.fp_state = FP_STATE_FREE;
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
}
/*
@@ -142,8 +118,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!irqs_disabled());
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
-
+ if (guest_owns_fp_regs()) {
/*
* Currently we do not support SME guests so SVCR is
* always 0 and we just need a variable to point to.
@@ -196,16 +171,38 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
isb();
}
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
+ if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
- /* Restore the VL that was saved when bound to the CPU */
+ /*
+ * Restore the VL that was saved when bound to the CPU,
+ * which is the maximum VL for the guest. Because the
+ * layout of the data when saving the sve state depends
+ * on the VL, we need to use a consistent (i.e., the
+ * maximum) VL.
+ * Note that this means that at guest exit ZCR_EL1 is
+ * not necessarily the same as on guest entry.
+ *
+ * Restoring the VL isn't needed in VHE mode since
+ * ZCR_EL2 (accessed via ZCR_EL1) would fulfill the same
+ * role when doing the save from EL2.
+ */
if (!has_vhe())
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
SYS_ZCR_EL1);
}
+ /*
+ * Flush (save and invalidate) the fpsimd/sve state so that if
+ * the host tries to use fpsimd/sve, it's not using stale data
+ * from the guest.
+ *
+ * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the
+ * context unconditionally, in both nVHE and VHE. This allows
+ * the kernel to restore the fpsimd/sve state, including ZCR_EL1
+ * when needed.
+ */
fpsimd_save_and_flush_cpu_state();
} else if (has_vhe() && system_supports_sve()) {
/*
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 617ae6dea5d5..b037f0a0e27e 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -56,6 +56,13 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
static int handle_smc(struct kvm_vcpu *vcpu)
{
/*
+ * Forward this trapped smc instruction to the virtual EL2 if
+ * the guest has asked for it.
+ */
+ if (forward_smc_trap(vcpu))
+ return 1;
+
+ /*
* "If an SMC instruction executed at Non-secure EL1 is
* trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
* Trap exception, not a Secure Monitor Call exception [...]"
@@ -207,19 +214,40 @@ static int handle_sve(struct kvm_vcpu *vcpu)
}
/*
- * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into
- * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
- * that we can do is give the guest an UNDEF.
+ * Two possibilities to handle a trapping ptrauth instruction:
+ *
+ * - Guest usage of a ptrauth instruction (which the guest EL1 did not
+ * turn into a NOP). If we get here, it is because we didn't enable
+ * ptrauth for the guest. This results in an UNDEF, as it isn't
+ * supposed to use ptrauth without being told it could.
+ *
+ * - Running an L2 NV guest while L1 has left HCR_EL2.API==0, and for
+ * which we reinject the exception into L1.
+ *
+ * Anything else is an emulation bug (hence the WARN_ON + UNDEF).
*/
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
{
+ if (!vcpu_has_ptrauth(vcpu)) {
+ kvm_inject_undefined(vcpu);
+ return 1;
+ }
+
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+ return 1;
+ }
+
+ /* Really shouldn't be here! */
+ WARN_ON_ONCE(1);
kvm_inject_undefined(vcpu);
return 1;
}
static int kvm_handle_eret(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
+ if (esr_iss_is_eretax(kvm_vcpu_get_esr(vcpu)) &&
+ !vcpu_has_ptrauth(vcpu))
return kvm_handle_ptrauth(vcpu);
/*
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index 961bbef104a6..d00093699aaf 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -135,9 +135,9 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(host_dbg, host_ctxt);
@@ -154,9 +154,9 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(guest_dbg, guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e3fcf8c4d5b4..a92566f36022 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -27,6 +27,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
+#include <asm/kvm_ptrauth.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
@@ -39,12 +40,6 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
-/* Check whether the FP regs are owned by the guest */
-static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
-}
-
/* Save the 32-bit only FPSIMD system register state */
static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
@@ -155,7 +150,7 @@ static inline bool cpu_has_amu(void)
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
CHECK_FGT_MASKS(HFGRTR_EL2);
@@ -191,7 +186,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (!cpus_have_final_cap(ARM64_HAS_FGT))
@@ -226,13 +221,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
- vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
+ *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
@@ -254,13 +249,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
- write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
+ write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3()) {
struct kvm_cpu_context *hctxt;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
@@ -271,10 +266,8 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
__deactivate_traps_hfgxtr(vcpu);
}
-static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
{
- u64 hcr = vcpu->arch.hcr_el2;
-
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
hcr |= HCR_TVM;
@@ -376,8 +369,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb();
/* Write out the host state if it's in the registers */
- if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ if (host_owns_fp_regs())
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
/* Restore the guest state */
if (sve_guest)
@@ -389,7 +382,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
- vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
return true;
}
@@ -449,60 +442,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static inline bool esr_is_ptrauth_trap(u64 esr)
-{
- switch (esr_sys64_to_sysreg(esr)) {
- case SYS_APIAKEYLO_EL1:
- case SYS_APIAKEYHI_EL1:
- case SYS_APIBKEYLO_EL1:
- case SYS_APIBKEYHI_EL1:
- case SYS_APDAKEYLO_EL1:
- case SYS_APDAKEYHI_EL1:
- case SYS_APDBKEYLO_EL1:
- case SYS_APDBKEYHI_EL1:
- case SYS_APGAKEYLO_EL1:
- case SYS_APGAKEYHI_EL1:
- return true;
- }
-
- return false;
-}
-
-#define __ptrauth_save_key(ctxt, key) \
- do { \
- u64 __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
-} while(0)
-
-DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-
-static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- struct kvm_cpu_context *ctxt;
- u64 val;
-
- if (!vcpu_has_ptrauth(vcpu))
- return false;
-
- ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
- __ptrauth_save_key(ctxt, APIA);
- __ptrauth_save_key(ctxt, APIB);
- __ptrauth_save_key(ctxt, APDA);
- __ptrauth_save_key(ctxt, APDB);
- __ptrauth_save_key(ctxt, APGA);
-
- vcpu_ptrauth_enable(vcpu);
-
- val = read_sysreg(hcr_el2);
- val |= (HCR_API | HCR_APK);
- write_sysreg(val, hcr_el2);
-
- return true;
-}
-
static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
@@ -590,9 +529,6 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
- if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
- return kvm_hyp_handle_ptrauth(vcpu, exit_code);
-
if (kvm_hyp_handle_cntpct(vcpu))
return true;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 82b3d62538a6..22f374e9f532 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -53,7 +53,13 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
}
+static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ return vcpu_is_protected(&hyp_vcpu->vcpu);
+}
+
void pkvm_hyp_vm_table_init(void *tbl);
+void pkvm_host_fpsimd_state_init(void);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva);
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 7746ea507b6f..53efda0235cf 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
/* Disable and flush Self-Hosted Trace generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 320f2eaa14a9..02746f9d0980 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -600,7 +600,6 @@ static bool ffa_call_supported(u64 func_id)
case FFA_MSG_POLL:
case FFA_MSG_WAIT:
/* 32-bit variants of 64-bit calls */
- case FFA_MSG_SEND_DIRECT_REQ:
case FFA_MSG_SEND_DIRECT_RESP:
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2385fd03ed87..d5c48dc98f67 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -39,10 +39,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
- hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state;
hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
- hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state;
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
@@ -64,7 +62,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
- host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
@@ -178,16 +175,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
}
-static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
-}
-
-static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
-}
-
static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_init_lrs();
@@ -198,18 +185,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
}
-static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
}
-static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if));
}
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
@@ -340,10 +327,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__vgic_v3_read_vmcr),
- HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_save_aprs),
- HANDLE_FUNC(__vgic_v3_restore_aprs),
+ HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
+ HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_vcpu_init_traps),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 861c76021a25..caba3e4bd09e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -533,7 +533,13 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
int ret = 0;
esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ if (!__get_fault_info(esr, &fault)) {
+ /*
+ * We've presumably raced with a page-table change which caused
+ * AT to fail, try again.
+ */
+ return;
+ }
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
ret = host_stage2_idmap(addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 26dd9a20ad6e..16aa4875ddb8 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -200,7 +200,7 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
}
/*
- * Initialize trap register values for protected VMs.
+ * Initialize trap register values in protected mode.
*/
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
{
@@ -247,6 +247,17 @@ void pkvm_hyp_vm_table_init(void *tbl)
vm_table = tbl;
}
+void pkvm_host_fpsimd_state_init(void)
+{
+ unsigned long i;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+
+ host_data->fpsimd_state = &host_data->host_ctxt.fp_regs;
+ }
+}
+
/*
* Return the hyp vm structure corresponding to the handle.
*/
@@ -430,6 +441,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size)
static void __unmap_donated_memory(void *va, size_t size)
{
+ kvm_flush_dcache_to_poc(va, size);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
PAGE_ALIGN(size) >> PAGE_SHIFT));
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index d57bcb6ab94d..dfe8fe0f7eaf 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
if (is_cpu_on)
boot_args = this_cpu_ptr(&cpu_on_args);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bc58d1b515af..859f22f754d3 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -257,8 +257,7 @@ static int fix_hyp_pgtable_refcnt(void)
void __noreturn __pkvm_init_finalise(void)
{
- struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
- struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt;
+ struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt);
unsigned long nr_pages, reserved_pages, pfn;
int ret;
@@ -301,6 +300,7 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
pkvm_hyp_vm_table_init(vm_table_base);
+ pkvm_host_fpsimd_state_init();
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c50f8459e4fc..6758cd905570 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
__activate_traps_common(vcpu);
val = vcpu->arch.cptr_el2;
@@ -53,7 +53,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TSM;
}
- if (!guest_owns_fp_regs(vcpu)) {
+ if (!guest_owns_fp_regs()) {
if (has_hvhe())
val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
@@ -191,7 +191,6 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -203,13 +202,12 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
- if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ if (unlikely(vcpu_is_protected(vcpu)))
return pvm_exit_handlers;
return hyp_exit_handlers;
@@ -228,9 +226,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
*/
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-
- if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
/*
* As we have caught the guest red-handed, decide that it isn't
* fit for purpose anymore by making the vcpu invalid. The VMM
@@ -264,7 +260,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmr_sync();
}
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
@@ -337,7 +333,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -367,7 +363,7 @@ asmlinkage void __noreturn hyp_panic(void)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
if (vcpu) {
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 2fc68da4036d..ca3c09df8d7c 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -11,13 +11,23 @@
#include <nvhe/mem_protect.h>
struct tlb_inv_context {
- u64 tcr;
+ struct kvm_s2_mmu *mmu;
+ u64 tcr;
+ u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt,
- bool nsh)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt,
+ bool nsh)
{
+ struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+ cxt->mmu = NULL;
+
/*
* We have two requirements:
*
@@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
else
dsb(ish);
+ /*
+ * If we're already in the desired context, then there's nothing to do.
+ */
+ if (vcpu) {
+ /*
+ * We're in guest context. However, for this to work, this needs
+ * to be called from within __kvm_vcpu_run(), which ensures that
+ * __hyp_running_vcpu is set to the current guest vcpu.
+ */
+ if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu))
+ return;
+
+ cxt->mmu = vcpu->arch.hw_mmu;
+ } else {
+ /* We're in host context. */
+ if (mmu == host_s2_mmu)
+ return;
+
+ cxt->mmu = host_s2_mmu;
+ }
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
/*
* For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
+ * avoid a Stage-1 walk with the old VMID while we have
+ * the new VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the host S1 MMU is enabled, so
+ * we can simply set the EPD bits to avoid any further
+ * TLB fill. For guests, we ensure that the S1 MMU is
+ * temporarily enabled in the next context.
*/
val = cxt->tcr = read_sysreg_el1(SYS_TCR);
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
write_sysreg_el1(val, SYS_TCR);
isb();
+
+ if (vcpu) {
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ if (!(val & SCTLR_ELx_M)) {
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ isb();
+ }
+ } else {
+ /* The host S1 MMU is always enabled. */
+ cxt->sctlr = SCTLR_ELx_M;
+ }
}
/*
@@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
* ensuring that we always have an ISB, but not two ISBs back
* to back.
*/
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ if (vcpu)
+ __load_host_stage2();
+ else
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
- __load_host_stage2();
+ struct kvm_s2_mmu *mmu = cxt->mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ if (!mmu)
+ return;
+
+ if (vcpu)
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ else
+ __load_host_stage2();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the host VMID */
+ /* Ensure write of the old VMID */
isb();
- /* Restore the host's TCR_EL1 */
+
+ if (!(cxt->sctlr & SCTLR_ELx_M)) {
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ isb();
+ }
+
write_sysreg_el1(cxt->tcr, SYS_TCR);
}
}
@@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, true);
+ enter_vmid_context(mmu, &cxt, true);
/*
* We could do so much better if we had the VA as well.
@@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
start = round_down(start, stride);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -162,7 +229,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -170,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -184,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
{
- /* Same remark as in __tlb_switch_to_guest() */
+ /* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
dsb(ish);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 5a59ef88b646..9e2bbee77491 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -914,12 +914,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+ return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}
static bool stage2_pte_executable(kvm_pte_t pte)
{
- return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+ return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
@@ -979,6 +979,21 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (!stage2_pte_needs_update(ctx->old, new))
return -EAGAIN;
+ /* If we're only changing software bits, then store them and go! */
+ if (!kvm_pgtable_walk_shared(ctx) &&
+ !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) {
+ bool old_is_counted = stage2_pte_is_counted(ctx->old);
+
+ if (old_is_counted != stage2_pte_is_counted(new)) {
+ if (old_is_counted)
+ mm_ops->put_page(ctx->ptep);
+ else
+ mm_ops->get_page(ctx->ptep);
+ }
+ WARN_ON_ONCE(!stage2_try_set_pte(ctx, new));
+ return 0;
+ }
+
if (!stage2_try_break_pte(ctx, data->mmu))
return -EAGAIN;
@@ -1370,7 +1385,7 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_pgtable *pgt = ctx->arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
- if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old))
+ if (!stage2_pte_cacheable(pgt, ctx->old))
return 0;
if (mm_ops->dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 6cb638b184b1..7b397fad26f2 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
-u64 __vgic_v3_read_vmcr(void)
+static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
-void __vgic_v3_write_vmcr(u32 vmcr)
+static void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ __vgic_v3_save_aprs(cpu_if);
+ if (cpu_if->vgic_sre)
+ cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
+}
+
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ /*
+ * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
+ * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
+ * VMCR_EL2 save/restore in the world switch.
+ */
+ if (cpu_if->vgic_sre)
+ __vgic_v3_write_vmcr(cpu_if->vgic_vmcr);
+ __vgic_v3_restore_aprs(cpu_if);
+}
+
static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1581df6aec87..d7af5f46f22a 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -33,11 +33,43 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
+/*
+ * HCR_EL2 bits that the NV guest can freely change (no RES0/RES1
+ * semantics, irrespective of the configuration), but that cannot be
+ * applied to the actual HW as things would otherwise break badly.
+ *
+ * - TGE: we want the guest to use EL1, which is incompatible with
+ * this bit being set
+ *
+ * - API/APK: they are already accounted for by vcpu_load(), and can
+ * only take effect across a load/put cycle (such as ERET)
+ */
+#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK)
+
+static u64 __compute_hcr(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (!vcpu_has_nv(vcpu))
+ return hcr;
+
+ if (is_hyp_ctxt(vcpu)) {
+ hcr |= HCR_NV | HCR_NV2 | HCR_AT | HCR_TTLB;
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ hcr |= HCR_NV1;
+
+ write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
+ }
+
+ return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE);
+}
+
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, __compute_hcr(vcpu));
if (has_cntpoff()) {
struct timer_map map;
@@ -75,7 +107,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM;
- if (guest_owns_fp_regs(vcpu)) {
+ if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu))
val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
} else {
@@ -162,6 +194,8 @@ static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
{
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = vcpu;
+
__vcpu_load_switch_sysregs(vcpu);
__vcpu_load_activate_traps(vcpu);
__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
@@ -171,6 +205,61 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
{
__vcpu_put_deactivate_traps(vcpu);
__vcpu_put_switch_sysregs(vcpu);
+
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
+}
+
+static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 spsr, elr, mode;
+
+ /*
+ * Going through the whole put/load motions is a waste of time
+ * if this is a VHE guest hypervisor returning to its own
+ * userspace, or the hypervisor performing a local exception
+ * return. No need to save/restore registers, no need to
+ * switch S2 MMU. Just do the canonical ERET.
+ *
+ * Unless the trap has to be forwarded further down the line,
+ * of course...
+ */
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) ||
+ (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_ERET))
+ return false;
+
+ spsr = read_sysreg_el1(SYS_SPSR);
+ mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ switch (mode) {
+ case PSR_MODE_EL0t:
+ if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ return false;
+ break;
+ case PSR_MODE_EL2t:
+ mode = PSR_MODE_EL1t;
+ break;
+ case PSR_MODE_EL2h:
+ mode = PSR_MODE_EL1h;
+ break;
+ default:
+ return false;
+ }
+
+ /* If ERETAx fails, take the slow path */
+ if (esr_iss_is_eretax(esr)) {
+ if (!(vcpu_has_ptrauth(vcpu) && kvm_auth_eretax(vcpu, &elr)))
+ return false;
+ } else {
+ elr = read_sysreg_el1(SYS_ELR);
+ }
+
+ spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+
+ write_sysreg_el2(spsr, SYS_SPSR);
+ write_sysreg_el2(elr, SYS_ELR);
+
+ return true;
}
static const exit_handler_fn hyp_exit_handlers[] = {
@@ -182,7 +271,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_ERET] = kvm_hyp_handle_eret,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -197,7 +286,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
* If we were in HYP context on entry, adjust the PSTATE view
* so that the usual helpers work correctly.
*/
- if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
+ if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) {
u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
@@ -221,8 +310,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt;
u64 exit_code;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
sysreg_save_host_state_vhe(host_ctxt);
@@ -240,11 +328,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
- if (is_hyp_ctxt(vcpu))
- vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
- else
- vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
-
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
@@ -258,7 +341,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_host_state_vhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -306,7 +389,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
__deactivate_traps(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index a8b9ea496706..e12bd7d6d2dc 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -67,7 +67,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
/*
@@ -110,7 +110,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 1a60b95381e8..5fa0359f3a87 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -17,8 +17,8 @@ struct tlb_inv_context {
u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
@@ -67,7 +67,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
isb();
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
/*
* We're done with the TLB operation, let's restore the host's
@@ -97,7 +97,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -118,7 +118,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -129,7 +129,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nshst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -150,7 +150,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -179,7 +179,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -189,13 +189,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -203,14 +203,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 200c8019a82a..cd6b7b83e2c3 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -86,7 +86,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
/* Detect an already handled MMIO return */
if (unlikely(!vcpu->mmio_needed))
- return 0;
+ return 1;
vcpu->mmio_needed = 0;
@@ -117,7 +117,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
*/
kvm_incr_pc(vcpu);
- return 0;
+ return 1;
}
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
@@ -133,11 +133,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
/*
* No valid syndrome? Ask userspace for help if it has
* volunteered to do so, and bail out otherwise.
+ *
+ * In the protected VM case, there isn't much userspace can do
+ * though, so directly deliver an exception to the guest.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
+ if (vcpu_is_protected(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ return 1;
+ }
+
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index ff17849be9f4..8bcab0cc3fe9 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1522,8 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
read_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
- if (mmu_invalidate_retry(kvm, mmu_seq))
+ if (mmu_invalidate_retry(kvm, mmu_seq)) {
+ ret = -EAGAIN;
goto out_unlock;
+ }
/*
* If we are not forced to use page mapping, check if we are
@@ -1581,6 +1583,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
memcache,
KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
+out_unlock:
+ read_unlock(&kvm->mmu_lock);
/* Mark the page dirty only if the fault is handled successfully */
if (writable && !ret) {
@@ -1588,8 +1592,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mark_page_dirty_in_slot(kvm, memslot, gfn);
}
-out_unlock:
- read_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
}
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index ced30c90521a..6813c7c7f00a 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -35,13 +35,9 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
break;
case SYS_ID_AA64ISAR1_EL1:
- /* Support everything but PtrAuth and Spec Invalidation */
+ /* Support everything but Spec Invalidation */
val &= ~(GENMASK_ULL(63, 56) |
- NV_FTR(ISAR1, SPECRES) |
- NV_FTR(ISAR1, GPI) |
- NV_FTR(ISAR1, GPA) |
- NV_FTR(ISAR1, API) |
- NV_FTR(ISAR1, APA));
+ NV_FTR(ISAR1, SPECRES));
break;
case SYS_ID_AA64PFR0_EL1:
diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c
new file mode 100644
index 000000000000..d5eb3ae876be
--- /dev/null
+++ b/arch/arm64/kvm/pauth.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ *
+ * Primitive PAuth emulation for ERETAA/ERETAB.
+ *
+ * This code assumes that is is run from EL2, and that it is part of
+ * the emulation of ERETAx for a guest hypervisor. That's a lot of
+ * baked-in assumptions and shortcuts.
+ *
+ * Do no reuse for anything else!
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/gpr-num.h>
+#include <asm/kvm_emulate.h>
+#include <asm/pointer_auth.h>
+
+/* PACGA Xd, Xn, Xm */
+#define PACGA(d,n,m) \
+ asm volatile(__DEFINE_ASM_GPR_NUMS \
+ ".inst 0x9AC03000 |" \
+ "(.L__gpr_num_%[Rd] << 0) |" \
+ "(.L__gpr_num_%[Rn] << 5) |" \
+ "(.L__gpr_num_%[Rm] << 16)\n" \
+ : [Rd] "=r" ((d)) \
+ : [Rn] "r" ((n)), [Rm] "r" ((m)))
+
+static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr,
+ struct ptrauth_key ikey)
+{
+ struct ptrauth_key gkey;
+ u64 mod, pac = 0;
+
+ preempt_disable();
+
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+ mod = __vcpu_sys_reg(vcpu, SP_EL2);
+ else
+ mod = read_sysreg(sp_el1);
+
+ gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1);
+ gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1);
+
+ __ptrauth_key_install_nosync(APGA, ikey);
+ isb();
+
+ PACGA(pac, ptr, mod);
+ isb();
+
+ __ptrauth_key_install_nosync(APGA, gkey);
+
+ preempt_enable();
+
+ /* PAC in the top 32bits */
+ return pac;
+}
+
+static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55)
+{
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ bool tbi, tbid;
+
+ /*
+ * Since we are authenticating an instruction address, we have
+ * to take TBID into account. If E2H==0, ignore VA[55], as
+ * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in
+ * this case, this is likely a guest bug...
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu)) {
+ tbi = tcr & BIT(20);
+ tbid = tcr & BIT(29);
+ } else if (bit55) {
+ tbi = tcr & TCR_TBI1;
+ tbid = tcr & TCR_TBID1;
+ } else {
+ tbi = tcr & TCR_TBI0;
+ tbid = tcr & TCR_TBID0;
+ }
+
+ return tbi && !tbid;
+}
+
+static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55)
+{
+ static const int maxtxsz = 39; // Revisit these two values once
+ static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ int txsz;
+
+ if (!vcpu_el2_e2h_is_set(vcpu) || !bit55)
+ txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
+ else
+ txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
+
+ return 64 - clamp(txsz, mintxsz, maxtxsz);
+}
+
+static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55)
+{
+ int bottom_pac;
+ u64 mask;
+
+ bottom_pac = compute_bottom_pac(vcpu, bit55);
+
+ mask = GENMASK(54, bottom_pac);
+ if (!effective_tbi(vcpu, bit55))
+ mask |= GENMASK(63, 56);
+
+ return mask;
+}
+
+static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask)
+{
+ bool bit55 = !!(ptr & BIT(55));
+
+ if (bit55)
+ return ptr | mask;
+
+ return ptr & ~mask;
+}
+
+static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr)
+{
+ bool bit55 = !!(ptr & BIT(55));
+ u64 mask, error_code;
+ int shift;
+
+ if (effective_tbi(vcpu, bit55)) {
+ mask = GENMASK(54, 53);
+ shift = 53;
+ } else {
+ mask = GENMASK(62, 61);
+ shift = 61;
+ }
+
+ if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu)))
+ error_code = 2 << shift;
+ else
+ error_code = 1 << shift;
+
+ ptr &= ~mask;
+ ptr |= error_code;
+
+ return ptr;
+}
+
+/*
+ * Authenticate an ERETAA/ERETAB instruction, returning true if the
+ * authentication succeeded and false otherwise. In all cases, *elr
+ * contains the VA to ERET to. Potential exception injection is left
+ * to the caller.
+ */
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+ u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 ptr, cptr, pac, mask;
+ struct ptrauth_key ikey;
+
+ *elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2);
+
+ /* We assume we're already in the context of an ERETAx */
+ if (esr_iss_is_eretab(esr)) {
+ if (!(sctlr & SCTLR_EL1_EnIB))
+ return true;
+
+ ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1);
+ ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1);
+ } else {
+ if (!(sctlr & SCTLR_EL1_EnIA))
+ return true;
+
+ ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1);
+ ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1);
+ }
+
+ mask = compute_pac_mask(vcpu, !!(ptr & BIT(55)));
+ cptr = to_canonical_addr(vcpu, ptr, mask);
+
+ pac = compute_pac(vcpu, cptr, ikey);
+
+ /*
+ * Slightly deviate from the pseudocode: if we have a PAC
+ * match with the signed pointer, then it must be good.
+ * Anything after this point is pure error handling.
+ */
+ if ((pac & mask) == (ptr & mask)) {
+ *elr = cptr;
+ return true;
+ }
+
+ /*
+ * Authentication failed, corrupt the canonical address if
+ * PAuth2 isn't implemented, or some XORing if it is.
+ */
+ if (!kvm_has_pauth(vcpu->kvm, PAuth2))
+ cptr = corrupt_addr(vcpu, cptr);
+ else
+ cptr = ptr ^ (pac & mask);
+
+ *elr = cptr;
+ return false;
+}
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index b7be96a53597..85117ea8f351 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -222,7 +222,6 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
int pkvm_init_host_vm(struct kvm *host_kvm)
{
- mutex_init(&host_kvm->lock);
return 0;
}
@@ -259,6 +258,7 @@ static int __init finalize_pkvm(void)
* at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
ret = pkvm_drop_host_privileges();
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index a243934c5568..329819806096 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val)
if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
return false;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
return true;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 68d1d05672bd..1b7b58cb121f 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -151,7 +151,6 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
void *sve_state = vcpu->arch.sve_state;
- kvm_vcpu_unshare_task_fp(vcpu);
kvm_unshare_hyp(vcpu, vcpu + 1);
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c9f4f387155f..22b45a15d068 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1568,17 +1568,31 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
return IDREG(vcpu->kvm, reg_to_encoding(r));
}
+static bool is_feature_id_reg(u32 encoding)
+{
+ return (sys_reg_Op0(encoding) == 3 &&
+ (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) &&
+ sys_reg_CRn(encoding) == 0 &&
+ sys_reg_CRm(encoding) <= 7);
+}
+
/*
* Return true if the register's (Op0, Op1, CRn, CRm, Op2) is
- * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
+ * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8, which is the range of ID
+ * registers KVM maintains on a per-VM basis.
*/
-static inline bool is_id_reg(u32 id)
+static inline bool is_vm_ftr_id_reg(u32 id)
{
return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 &&
sys_reg_CRm(id) < 8);
}
+static inline bool is_vcpu_ftr_id_reg(u32 id)
+{
+ return is_feature_id_reg(id) && !is_vm_ftr_id_reg(id);
+}
+
static inline bool is_aa32_id_reg(u32 id)
{
return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
@@ -2338,7 +2352,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR0_EL1_TGRAN16_2)),
ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 |
ID_AA64MMFR1_EL1_HCX |
- ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_TWED |
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_VH |
@@ -3069,12 +3082,14 @@ static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
for (i = 0; i < n; i++) {
if (!is_32 && table[i].reg && !table[i].reset) {
- kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i);
+ kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n",
+ &table[i], i, table[i].name);
return false;
}
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
- kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1);
+ kvm_err("sys_reg table %pS entry %d (%s -> %s) out of order\n",
+ &table[i], i, table[i - 1].name, table[i].name);
return false;
}
}
@@ -3509,26 +3524,25 @@ void kvm_sys_regs_create_debugfs(struct kvm *kvm)
&idregs_debug_fops);
}
-static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
+static void reset_vm_ftr_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *reg)
{
- const struct sys_reg_desc *idreg = first_idreg;
- u32 id = reg_to_encoding(idreg);
+ u32 id = reg_to_encoding(reg);
struct kvm *kvm = vcpu->kvm;
if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
return;
lockdep_assert_held(&kvm->arch.config_lock);
+ IDREG(kvm, id) = reg->reset(vcpu, reg);
+}
- /* Initialize all idregs */
- while (is_id_reg(id)) {
- IDREG(kvm, id) = idreg->reset(vcpu, idreg);
-
- idreg++;
- id = reg_to_encoding(idreg);
- }
+static void reset_vcpu_ftr_id_reg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *reg)
+{
+ if (kvm_vcpu_initialized(vcpu))
+ return;
- set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
+ reg->reset(vcpu, reg);
}
/**
@@ -3540,19 +3554,24 @@ static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
*/
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long i;
- kvm_reset_id_regs(vcpu);
-
for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) {
const struct sys_reg_desc *r = &sys_reg_descs[i];
- if (is_id_reg(reg_to_encoding(r)))
+ if (!r->reset)
continue;
- if (r->reset)
+ if (is_vm_ftr_id_reg(reg_to_encoding(r)))
+ reset_vm_ftr_id_reg(vcpu, r);
+ else if (is_vcpu_ftr_id_reg(reg_to_encoding(r)))
+ reset_vcpu_ftr_id_reg(vcpu, r);
+ else
r->reset(vcpu, r);
}
+
+ set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
}
/**
@@ -3978,14 +3997,6 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
sys_reg_CRm(r), \
sys_reg_Op2(r))
-static bool is_feature_id_reg(u32 encoding)
-{
- return (sys_reg_Op0(encoding) == 3 &&
- (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) &&
- sys_reg_CRn(encoding) == 0 &&
- sys_reg_CRm(encoding) <= 7);
-}
-
int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range)
{
const void *zero_page = page_to_virt(ZERO_PAGE(0));
@@ -4014,7 +4025,7 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
* compliant with a given revision of the architecture, but the
* RES0/RES1 definitions allow us to do that.
*/
- if (is_id_reg(encoding)) {
+ if (is_vm_ftr_id_reg(encoding)) {
if (!reg->val ||
(is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0()))
continue;
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 389025ce7749..bcbc8c986b1d 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -28,27 +28,65 @@ struct vgic_state_iter {
int nr_lpis;
int dist_id;
int vcpu_id;
- int intid;
+ unsigned long intid;
int lpi_idx;
- u32 *lpi_array;
};
-static void iter_next(struct vgic_state_iter *iter)
+static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
if (iter->dist_id == 0) {
iter->dist_id++;
return;
}
+ /*
+ * Let the xarray drive the iterator after the last SPI, as the iterator
+ * has exhausted the sequentially-allocated INTID space.
+ */
+ if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
+ if (iter->lpi_idx < iter->nr_lpis)
+ xa_find_after(&dist->lpi_xa, &iter->intid,
+ VGIC_LPI_MAX_INTID,
+ LPI_XA_MARK_DEBUG_ITER);
+ iter->lpi_idx++;
+ return;
+ }
+
iter->intid++;
if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
++iter->vcpu_id < iter->nr_cpus)
iter->intid = 0;
+}
- if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
- if (iter->lpi_idx < iter->nr_lpis)
- iter->intid = iter->lpi_array[iter->lpi_idx];
- iter->lpi_idx++;
+static int iter_mark_lpis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
+ int nr_lpis = 0;
+
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ if (!vgic_try_get_irq_kref(irq))
+ continue;
+
+ xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+ nr_lpis++;
+ }
+
+ return nr_lpis;
+}
+
+static void iter_unmark_lpis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
+
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+ vgic_put_irq(kvm, irq);
}
}
@@ -61,15 +99,12 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
iter->nr_cpus = nr_cpus;
iter->nr_spis = kvm->arch.vgic.nr_spis;
- if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
- if (iter->nr_lpis < 0)
- iter->nr_lpis = 0;
- }
+ if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ iter->nr_lpis = iter_mark_lpis(kvm);
/* Fast forward to the right position if needed */
while (pos--)
- iter_next(iter);
+ iter_next(kvm, iter);
}
static bool end_of_vgic(struct vgic_state_iter *iter)
@@ -114,7 +149,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
struct vgic_state_iter *iter = kvm->arch.vgic.iter;
++*pos;
- iter_next(iter);
+ iter_next(kvm, iter);
if (end_of_vgic(iter))
iter = NULL;
return iter;
@@ -134,13 +169,14 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
mutex_lock(&kvm->arch.config_lock);
iter = kvm->arch.vgic.iter;
- kfree(iter->lpi_array);
+ iter_unmark_lpis(kvm);
kfree(iter);
kvm->arch.vgic.iter = NULL;
mutex_unlock(&kvm->arch.config_lock);
}
-static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
+static void print_dist_state(struct seq_file *s, struct vgic_dist *dist,
+ struct vgic_state_iter *iter)
{
bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
@@ -149,7 +185,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
if (v3)
- seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
+ seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis);
seq_printf(s, "enabled:\t%d\n", dist->enabled);
seq_printf(s, "\n");
@@ -236,7 +272,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
unsigned long flags;
if (iter->dist_id == 0) {
- print_dist_state(s, &kvm->arch.vgic);
+ print_dist_state(s, &kvm->arch.vgic, iter);
return 0;
}
@@ -246,11 +282,13 @@ static int vgic_debug_show(struct seq_file *s, void *v)
if (iter->vcpu_id < iter->nr_cpus)
vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
+ /*
+ * Expect this to succeed, as iter_mark_lpis() takes a reference on
+ * every LPI to be visited.
+ */
irq = vgic_get_irq(kvm, vcpu, iter->intid);
- if (!irq) {
- seq_printf(s, " LPI %4d freed\n", iter->intid);
- return 0;
- }
+ if (WARN_ON_ONCE(!irq))
+ return -EINVAL;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
print_irq_state(s, irq, vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index f20941f83a07..8f5b7a3e7009 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -53,8 +53,6 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- INIT_LIST_HEAD(&dist->lpi_translation_cache);
- raw_spin_lock_init(&dist->lpi_list_lock);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
@@ -182,27 +180,22 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
return 0;
}
-/**
- * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
- * structures and register VCPU-specific KVM iodevs
- *
- * @vcpu: pointer to the VCPU being created and initialized
- *
- * Only do initialization, but do not actually enable the
- * VGIC CPU interface
- */
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- int ret = 0;
int i;
- vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ lockdep_assert_held(&vcpu->kvm->arch.config_lock);
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- raw_spin_lock_init(&vgic_cpu->ap_list_lock);
- atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
+ if (vgic_cpu->private_irqs)
+ return 0;
+
+ vgic_cpu->private_irqs = kcalloc(VGIC_NR_PRIVATE_IRQS,
+ sizeof(struct vgic_irq),
+ GFP_KERNEL_ACCOUNT);
+
+ if (!vgic_cpu->private_irqs)
+ return -ENOMEM;
/*
* Enable and configure all SGIs to be edge-triggered and
@@ -227,9 +220,48 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
}
}
+ return 0;
+}
+
+static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ ret = vgic_allocate_private_irqs_locked(vcpu);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return ret;
+}
+
+/**
+ * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
+ * structures and register VCPU-specific KVM iodevs
+ *
+ * @vcpu: pointer to the VCPU being created and initialized
+ *
+ * Only do initialization, but do not actually enable the
+ * VGIC CPU interface
+ */
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int ret = 0;
+
+ vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ raw_spin_lock_init(&vgic_cpu->ap_list_lock);
+ atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
+
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
+ ret = vgic_allocate_private_irqs(vcpu);
+ if (ret)
+ return ret;
+
/*
* If we are creating a VCPU with a GICv3 we must also register the
* KVM io device for the redistributor that belongs to this VCPU.
@@ -285,10 +317,13 @@ int vgic_init(struct kvm *kvm)
/* Initialize groups on CPUs created before the VGIC type was known */
kvm_for_each_vcpu(idx, vcpu, kvm) {
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ ret = vgic_allocate_private_irqs_locked(vcpu);
+ if (ret)
+ goto out;
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+ struct vgic_irq *irq = vgic_get_irq(kvm, vcpu, i);
+
switch (dist->vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V3:
irq->group = 1;
@@ -300,14 +335,15 @@ int vgic_init(struct kvm *kvm)
break;
default:
ret = -EINVAL;
- goto out;
}
+
+ vgic_put_irq(kvm, irq);
+
+ if (ret)
+ goto out;
}
}
- if (vgic_has_its(kvm))
- vgic_lpi_translation_cache_init(kvm);
-
/*
* If we have GICv4.1 enabled, unconditionally request enable the
* v4 support so that we get HW-accelerated vSGIs. Otherwise, only
@@ -361,9 +397,6 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
}
- if (vgic_has_its(kvm))
- vgic_lpi_translation_cache_destroy(kvm);
-
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
@@ -381,6 +414,9 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
vgic_flush_pending_lpis(vcpu);
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ kfree(vgic_cpu->private_irqs);
+ vgic_cpu->private_irqs = NULL;
+
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
vgic_unregister_redist_iodev(vcpu);
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index e85a495ada9c..40bb43f20bf3 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -23,6 +23,8 @@
#include "vgic.h"
#include "vgic-mmio.h"
+static struct kvm_device_ops kvm_arm_vgic_its_ops;
+
static int vgic_its_save_tables_v0(struct vgic_its *its);
static int vgic_its_restore_tables_v0(struct vgic_its *its);
static int vgic_its_commit_v0(struct vgic_its *its);
@@ -67,7 +69,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
irq->target_vcpu = vcpu;
irq->group = 1;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ xa_lock_irqsave(&dist->lpi_xa, flags);
/*
* There could be a race with another vgic_add_lpi(), so we need to
@@ -82,17 +84,14 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
goto out_unlock;
}
- ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
+ ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0));
if (ret) {
xa_release(&dist->lpi_xa, intid);
kfree(irq);
- goto out_unlock;
}
- atomic_inc(&dist->lpi_count);
-
out_unlock:
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ xa_unlock_irqrestore(&dist->lpi_xa, flags);
if (ret)
return ERR_PTR(ret);
@@ -150,14 +149,6 @@ struct its_ite {
u32 event_id;
};
-struct vgic_translation_cache_entry {
- struct list_head entry;
- phys_addr_t db;
- u32 devid;
- u32 eventid;
- struct vgic_irq *irq;
-};
-
/**
* struct vgic_its_abi - ITS abi ops and settings
* @cte_esz: collection table entry size
@@ -252,8 +243,10 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
#define GIC_LPI_OFFSET 8192
-#define VITS_TYPER_IDBITS 16
-#define VITS_TYPER_DEVBITS 16
+#define VITS_TYPER_IDBITS 16
+#define VITS_MAX_EVENTID (BIT(VITS_TYPER_IDBITS) - 1)
+#define VITS_TYPER_DEVBITS 16
+#define VITS_MAX_DEVID (BIT(VITS_TYPER_DEVBITS) - 1)
#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1)
#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1)
@@ -316,53 +309,6 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
return 0;
}
-#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
-
-/*
- * Create a snapshot of the current LPIs targeting @vcpu, so that we can
- * enumerate those LPIs without holding any lock.
- * Returns their number and puts the kmalloc'ed array into intid_ptr.
- */
-int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
- struct vgic_irq *irq;
- unsigned long flags;
- u32 *intids;
- int irq_count, i = 0;
-
- /*
- * There is an obvious race between allocating the array and LPIs
- * being mapped/unmapped. If we ended up here as a result of a
- * command, we're safe (locks are held, preventing another
- * command). If coming from another path (such as enabling LPIs),
- * we must be careful not to overrun the array.
- */
- irq_count = atomic_read(&dist->lpi_count);
- intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
- if (!intids)
- return -ENOMEM;
-
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
- rcu_read_lock();
-
- xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
- if (i == irq_count)
- break;
- /* We don't need to "get" the IRQ, as we hold the list lock. */
- if (vcpu && irq->target_vcpu != vcpu)
- continue;
- intids[i++] = irq->intid;
- }
-
- rcu_read_unlock();
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
-
- *intid_ptr = intids;
- return i;
-}
-
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
int ret = 0;
@@ -446,23 +392,18 @@ static u32 max_lpis_propbaser(u64 propbaser)
static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
{
gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long intid, flags;
struct vgic_irq *irq;
int last_byte_offset = -1;
int ret = 0;
- u32 *intids;
- int nr_irqs, i;
- unsigned long flags;
u8 pendmask;
- nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
- if (nr_irqs < 0)
- return nr_irqs;
-
- for (i = 0; i < nr_irqs; i++) {
+ xa_for_each(&dist->lpi_xa, intid, irq) {
int byte_offset, bit_nr;
- byte_offset = intids[i] / BITS_PER_BYTE;
- bit_nr = intids[i] % BITS_PER_BYTE;
+ byte_offset = intid / BITS_PER_BYTE;
+ bit_nr = intid % BITS_PER_BYTE;
/*
* For contiguously allocated LPIs chances are we just read
@@ -472,25 +413,23 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
ret = kvm_read_guest_lock(vcpu->kvm,
pendbase + byte_offset,
&pendmask, 1);
- if (ret) {
- kfree(intids);
+ if (ret)
return ret;
- }
+
last_byte_offset = byte_offset;
}
- irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
+ irq = vgic_get_irq(vcpu->kvm, NULL, intid);
if (!irq)
continue;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = pendmask & (1U << bit_nr);
+ if (irq->target_vcpu == vcpu)
+ irq->pending_latch = pendmask & (1U << bit_nr);
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
- kfree(intids);
-
return ret;
}
@@ -566,51 +505,52 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
return 0;
}
-static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist,
- phys_addr_t db,
- u32 devid, u32 eventid)
+static struct vgic_its *__vgic_doorbell_to_its(struct kvm *kvm, gpa_t db)
{
- struct vgic_translation_cache_entry *cte;
+ struct kvm_io_device *kvm_io_dev;
+ struct vgic_io_device *iodev;
- list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
- /*
- * If we hit a NULL entry, there is nothing after this
- * point.
- */
- if (!cte->irq)
- break;
+ kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, db);
+ if (!kvm_io_dev)
+ return ERR_PTR(-EINVAL);
- if (cte->db != db || cte->devid != devid ||
- cte->eventid != eventid)
- continue;
+ if (kvm_io_dev->ops != &kvm_io_gic_ops)
+ return ERR_PTR(-EINVAL);
- /*
- * Move this entry to the head, as it is the most
- * recently used.
- */
- if (!list_is_first(&cte->entry, &dist->lpi_translation_cache))
- list_move(&cte->entry, &dist->lpi_translation_cache);
+ iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+ if (iodev->iodev_type != IODEV_ITS)
+ return ERR_PTR(-EINVAL);
- return cte->irq;
- }
+ return iodev->its;
+}
+
+static unsigned long vgic_its_cache_key(u32 devid, u32 eventid)
+{
+ return (((unsigned long)devid) << VITS_TYPER_IDBITS) | eventid;
- return NULL;
}
static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
u32 devid, u32 eventid)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
+ unsigned long cache_key = vgic_its_cache_key(devid, eventid);
+ struct vgic_its *its;
struct vgic_irq *irq;
- unsigned long flags;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ if (devid > VITS_MAX_DEVID || eventid > VITS_MAX_EVENTID)
+ return NULL;
- irq = __vgic_its_check_cache(dist, db, devid, eventid);
+ its = __vgic_doorbell_to_its(kvm, db);
+ if (IS_ERR(its))
+ return NULL;
+
+ rcu_read_lock();
+
+ irq = xa_load(&its->translation_cache, cache_key);
if (!vgic_try_get_irq_kref(irq))
irq = NULL;
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ rcu_read_unlock();
return irq;
}
@@ -619,41 +559,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid,
struct vgic_irq *irq)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte;
- unsigned long flags;
- phys_addr_t db;
+ unsigned long cache_key = vgic_its_cache_key(devid, eventid);
+ struct vgic_irq *old;
/* Do not cache a directly injected interrupt */
if (irq->hw)
return;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-
- if (unlikely(list_empty(&dist->lpi_translation_cache)))
- goto out;
-
- /*
- * We could have raced with another CPU caching the same
- * translation behind our back, so let's check it is not in
- * already
- */
- db = its->vgic_its_base + GITS_TRANSLATER;
- if (__vgic_its_check_cache(dist, db, devid, eventid))
- goto out;
-
- /* Always reuse the last entry (LRU policy) */
- cte = list_last_entry(&dist->lpi_translation_cache,
- typeof(*cte), entry);
-
- /*
- * Caching the translation implies having an extra reference
- * to the interrupt, so drop the potential reference on what
- * was in the cache, and increment it on the new interrupt.
- */
- if (cte->irq)
- vgic_put_irq(kvm, cte->irq);
-
/*
* The irq refcount is guaranteed to be nonzero while holding the
* its_lock, as the ITE (and the reference it holds) cannot be freed.
@@ -661,39 +573,44 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
lockdep_assert_held(&its->its_lock);
vgic_get_irq_kref(irq);
- cte->db = db;
- cte->devid = devid;
- cte->eventid = eventid;
- cte->irq = irq;
+ /*
+ * We could have raced with another CPU caching the same
+ * translation behind our back, ensure we don't leak a
+ * reference if that is the case.
+ */
+ old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
+ if (old)
+ vgic_put_irq(kvm, old);
+}
- /* Move the new translation to the head of the list */
- list_move(&cte->entry, &dist->lpi_translation_cache);
+static void vgic_its_invalidate_cache(struct vgic_its *its)
+{
+ struct kvm *kvm = its->dev->kvm;
+ struct vgic_irq *irq;
+ unsigned long idx;
-out:
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ xa_for_each(&its->translation_cache, idx, irq) {
+ xa_erase(&its->translation_cache, idx);
+ vgic_put_irq(kvm, irq);
+ }
}
-void vgic_its_invalidate_cache(struct kvm *kvm)
+void vgic_its_invalidate_all_caches(struct kvm *kvm)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte;
- unsigned long flags;
+ struct kvm_device *dev;
+ struct vgic_its *its;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ rcu_read_lock();
- list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
- /*
- * If we hit a NULL entry, there is nothing after this
- * point.
- */
- if (!cte->irq)
- break;
+ list_for_each_entry_rcu(dev, &kvm->devices, vm_node) {
+ if (dev->ops != &kvm_arm_vgic_its_ops)
+ continue;
- vgic_put_irq(kvm, cte->irq);
- cte->irq = NULL;
+ its = dev->private;
+ vgic_its_invalidate_cache(its);
}
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ rcu_read_unlock();
}
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
@@ -725,8 +642,6 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
{
u64 address;
- struct kvm_io_device *kvm_io_dev;
- struct vgic_io_device *iodev;
if (!vgic_has_its(kvm))
return ERR_PTR(-ENODEV);
@@ -736,18 +651,7 @@ struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
address = (u64)msi->address_hi << 32 | msi->address_lo;
- kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
- if (!kvm_io_dev)
- return ERR_PTR(-EINVAL);
-
- if (kvm_io_dev->ops != &kvm_io_gic_ops)
- return ERR_PTR(-EINVAL);
-
- iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
- if (iodev->iodev_type != IODEV_ITS)
- return ERR_PTR(-EINVAL);
-
- return iodev->its;
+ return __vgic_doorbell_to_its(kvm, address);
}
/*
@@ -883,7 +787,7 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
* don't bother here since we clear the ITTE anyway and the
* pending state is a property of the ITTE struct.
*/
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
its_free_ite(kvm, ite);
return 0;
@@ -920,7 +824,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
ite->collection = collection;
vcpu = collection_to_vcpu(kvm, collection);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
return update_affinity(ite->irq, vcpu);
}
@@ -955,7 +859,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
switch (type) {
case GITS_BASER_TYPE_DEVICE:
- if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
+ if (id > VITS_MAX_DEVID)
return false;
break;
case GITS_BASER_TYPE_COLLECTION:
@@ -1167,7 +1071,8 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
}
/* Requires the its_lock to be held. */
-static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
+static void vgic_its_free_device(struct kvm *kvm, struct vgic_its *its,
+ struct its_device *device)
{
struct its_ite *ite, *temp;
@@ -1179,7 +1084,7 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
its_free_ite(kvm, ite);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
list_del(&device->dev_list);
kfree(device);
@@ -1191,7 +1096,7 @@ static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its)
struct its_device *cur, *temp;
list_for_each_entry_safe(cur, temp, &its->device_list, dev_list)
- vgic_its_free_device(kvm, cur);
+ vgic_its_free_device(kvm, its, cur);
}
/* its lock must be held */
@@ -1250,7 +1155,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
* by removing the mapping and re-establishing it.
*/
if (device)
- vgic_its_free_device(kvm, device);
+ vgic_its_free_device(kvm, its, device);
/*
* The spec does not say whether unmapping a not-mapped device
@@ -1281,7 +1186,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
if (!valid) {
vgic_its_free_collection(its, coll_id);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
} else {
struct kvm_vcpu *vcpu;
@@ -1372,23 +1277,19 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
int vgic_its_invall(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int irq_count, i = 0;
- u32 *intids;
-
- irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
- if (irq_count < 0)
- return irq_count;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
- for (i = 0; i < irq_count; i++) {
- struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ irq = vgic_get_irq(kvm, NULL, intid);
if (!irq)
continue;
+
update_lpi_config(kvm, irq, vcpu, false);
vgic_put_irq(kvm, irq);
}
- kfree(intids);
-
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
@@ -1431,10 +1332,10 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu1, *vcpu2;
struct vgic_irq *irq;
- u32 *intids;
- int irq_count, i;
+ unsigned long intid;
/* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */
vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd));
@@ -1446,12 +1347,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
if (vcpu1 == vcpu2)
return 0;
- irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
- if (irq_count < 0)
- return irq_count;
-
- for (i = 0; i < irq_count; i++) {
- irq = vgic_get_irq(kvm, NULL, intids[i]);
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ irq = vgic_get_irq(kvm, NULL, intid);
if (!irq)
continue;
@@ -1460,9 +1357,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
vgic_put_irq(kvm, irq);
}
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
- kfree(intids);
return 0;
}
@@ -1813,7 +1709,7 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
its->enabled = !!(val & GITS_CTLR_ENABLE);
if (!its->enabled)
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
/*
* Try to process any pending commands. This function bails out early
@@ -1914,47 +1810,6 @@ out:
return ret;
}
-/* Default is 16 cached LPIs per vcpu */
-#define LPI_DEFAULT_PCPU_CACHE_SIZE 16
-
-void vgic_lpi_translation_cache_init(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- unsigned int sz;
- int i;
-
- if (!list_empty(&dist->lpi_translation_cache))
- return;
-
- sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
-
- for (i = 0; i < sz; i++) {
- struct vgic_translation_cache_entry *cte;
-
- /* An allocation failure is not fatal */
- cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT);
- if (WARN_ON(!cte))
- break;
-
- INIT_LIST_HEAD(&cte->entry);
- list_add(&cte->entry, &dist->lpi_translation_cache);
- }
-}
-
-void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte, *tmp;
-
- vgic_its_invalidate_cache(kvm);
-
- list_for_each_entry_safe(cte, tmp,
- &dist->lpi_translation_cache, entry) {
- list_del(&cte->entry);
- kfree(cte);
- }
-}
-
#define INITIAL_BASER_VALUE \
(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \
GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
@@ -1987,8 +1842,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
kfree(its);
return ret;
}
-
- vgic_lpi_translation_cache_init(dev->kvm);
}
mutex_init(&its->its_lock);
@@ -2006,6 +1859,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
INIT_LIST_HEAD(&its->device_list);
INIT_LIST_HEAD(&its->collection_list);
+ xa_init(&its->translation_cache);
dev->kvm->arch.vgic.msis_require_devid = true;
dev->kvm->arch.vgic.has_its = true;
@@ -2036,6 +1890,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
vgic_its_free_device_list(kvm, its);
vgic_its_free_collection_list(kvm, its);
+ vgic_its_invalidate_cache(its);
+ xa_destroy(&its->translation_cache);
mutex_unlock(&its->its_lock);
kfree(its);
@@ -2438,7 +2294,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
ret = vgic_its_restore_itt(its, dev);
if (ret) {
- vgic_its_free_device(its->dev->kvm, dev);
+ vgic_its_free_device(its->dev->kvm, its, dev);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index c15ee1df036a..a3983a631b5a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -277,7 +277,7 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
return;
vgic_flush_pending_lpis(vcpu);
- vgic_its_invalidate_cache(vcpu->kvm);
+ vgic_its_invalidate_all_caches(vcpu->kvm);
atomic_set_release(&vgic_cpu->ctlr, 0);
} else {
ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0,
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 7e9cdb78f7ce..ae5a44d5702d 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -464,17 +464,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
kvm_vgic_global_state.vctrl_base + GICH_APR);
}
-void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-
- cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
-}
-
void vgic_v2_put(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- vgic_v2_vmcr_sync(vcpu);
+ cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 4ea3340786b9..ed6e412cd74b 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -722,15 +722,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- /*
- * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
- * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
- * VMCR_EL2 save/restore in the world switch.
- */
- if (likely(cpu_if->vgic_sre))
- kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
-
- kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if);
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@@ -738,24 +730,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
WARN_ON(vgic_v4_load(vcpu));
}
-void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-
- if (likely(cpu_if->vgic_sre))
- cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
-}
-
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
- vgic_v3_vmcr_sync(vcpu);
-
- kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
-
if (has_vhe())
__vgic_v3_deactivate_traps(cpu_if);
}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 4ec93587c8cd..f07b3ddff7d4 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -29,9 +29,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* its->cmd_lock (mutex)
* its->its_lock (mutex)
* vgic_cpu->ap_list_lock must be taken with IRQs disabled
- * kvm->lpi_list_lock must be taken with IRQs disabled
- * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
- * vgic_irq->irq_lock must be taken with IRQs disabled
+ * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
+ * vgic_irq->irq_lock must be taken with IRQs disabled
*
* As the ap_list_lock might be taken from the timer interrupt handler,
* we have to disable IRQs before taking this lock and everything lower
@@ -126,7 +125,6 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
__xa_erase(&dist->lpi_xa, irq->intid);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
- atomic_dec(&dist->lpi_count);
kfree_rcu(irq, rcu);
}
@@ -939,17 +937,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
vgic_v3_put(vcpu);
}
-void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
- return;
-
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_vmcr_sync(vcpu);
- else
- vgic_v3_vmcr_sync(vcpu);
-}
-
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0c2b82de8fa3..6106ebd5ba42 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -16,6 +16,7 @@
#define INTERRUPT_ID_BITS_SPIS 10
#define INTERRUPT_ID_BITS_ITS 16
+#define VGIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
#define VGIC_PRI_BITS 5
#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
@@ -214,7 +215,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
void vgic_v2_init_lrs(void);
void vgic_v2_load(struct kvm_vcpu *vcpu);
void vgic_v2_put(struct kvm_vcpu *vcpu);
-void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
@@ -253,7 +253,6 @@ bool vgic_v3_check_base(struct kvm *kvm);
void vgic_v3_load(struct kvm_vcpu *vcpu);
void vgic_v3_put(struct kvm_vcpu *vcpu);
-void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
@@ -330,14 +329,11 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
}
bool vgic_lpis_enabled(struct kvm_vcpu *vcpu);
-int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi);
-void vgic_lpi_translation_cache_init(struct kvm *kvm);
-void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
-void vgic_its_invalidate_cache(struct kvm *kvm);
+void vgic_its_invalidate_all_caches(struct kvm *kvm);
/* GICv4.1 MMIO interface */
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);