aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/coco/tdx/tdcall.S13
-rw-r--r--arch/x86/coco/tdx/tdx.c93
-rw-r--r--arch/x86/include/asm/tdx.h4
-rw-r--r--arch/x86/kernel/process.c4
4 files changed, 112 insertions, 2 deletions
diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
index 662479ccf630..245888290bb6 100644
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -139,6 +139,19 @@ SYM_FUNC_START(__tdx_hypercall)
movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
+ /*
+ * For the idle loop STI needs to be called directly before the TDCALL
+ * that enters idle (EXIT_REASON_HLT case). STI instruction enables
+ * interrupts only one instruction later. If there is a window between
+ * STI and the instruction that emulates the HALT state, there is a
+ * chance for interrupts to happen in this window, which can delay the
+ * HLT operation indefinitely. Since this is the not the desired
+ * result, conditionally call STI before TDCALL.
+ */
+ testq $TDX_HCALL_ISSUE_STI, %rsi
+ jz .Lskip_sti
+ sti
+.Lskip_sti:
tdcall
/*
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 60a3f2ff5b95..ed7302581cc7 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -7,6 +7,7 @@
#include <linux/cpufeature.h>
#include <asm/coco.h>
#include <asm/tdx.h>
+#include <asm/vmx.h>
/* TDX module Call Leaf IDs */
#define TDX_GET_INFO 1
@@ -37,6 +38,17 @@ void __tdx_hypercall_failed(void)
}
/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static u64 hcall_func(u64 exit_reason)
+{
+ return exit_reason;
+}
+
+/*
* Used for TDX guests to make calls directly to the TD module. This
* should only be used for calls that have no legitimate reason to fail
* or where the kernel can not survive the call failing.
@@ -74,6 +86,62 @@ static u64 get_cc_mask(void)
return BIT_ULL(gpa_width - 1);
}
+static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = hcall_func(EXIT_REASON_HLT),
+ .r12 = irq_disabled,
+ };
+
+ /*
+ * Emulate HLT operation via hypercall. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface
+ * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>.
+ *
+ * The VMM uses the "IRQ disabled" param to understand IRQ
+ * enabled status (RFLAGS.IF) of the TD guest and to determine
+ * whether or not it should schedule the halted vCPU if an
+ * IRQ becomes pending. E.g. if IRQs are disabled, the VMM
+ * can keep the vCPU in virtual HLT, even if an IRQ is
+ * pending, without hanging/breaking the guest.
+ */
+ return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
+}
+
+static bool handle_halt(void)
+{
+ /*
+ * Since non safe halt is mainly used in CPU offlining
+ * and the guest will always stay in the halt state, don't
+ * call the STI instruction (set do_sti as false).
+ */
+ const bool irq_disabled = irqs_disabled();
+ const bool do_sti = false;
+
+ if (__halt(irq_disabled, do_sti))
+ return false;
+
+ return true;
+}
+
+void __cpuidle tdx_safe_halt(void)
+{
+ /*
+ * For do_sti=true case, __tdx_hypercall() function enables
+ * interrupts using the STI instruction before the TDCALL. So
+ * set irq_disabled as false.
+ */
+ const bool irq_disabled = false;
+ const bool do_sti = true;
+
+ /*
+ * Use WARN_ONCE() to report the failure.
+ */
+ if (__halt(irq_disabled, do_sti))
+ WARN_ONCE(1, "HLT instruction emulation failed\n");
+}
+
void tdx_get_ve_info(struct ve_info *ve)
{
struct tdx_module_output out;
@@ -104,11 +172,32 @@ void tdx_get_ve_info(struct ve_info *ve)
ve->instr_info = upper_32_bits(out.r10);
}
+/* Handle the kernel #VE */
+static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
+{
+ switch (ve->exit_reason) {
+ case EXIT_REASON_HLT:
+ return handle_halt();
+ default:
+ pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+ return false;
+ }
+}
+
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
{
- pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+ bool ret;
+
+ if (user_mode(regs))
+ ret = false;
+ else
+ ret = virt_exception_kernel(regs, ve);
+
+ /* After successful #VE handling, move the IP */
+ if (ret)
+ regs->ip += ve->instr_len;
- return false;
+ return ret;
}
void __init tdx_early_init(void)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index c4142e7b004c..cbd61e142f4e 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -14,6 +14,7 @@
#define TDX_HYPERCALL_STANDARD 0
#define TDX_HCALL_HAS_OUTPUT BIT(0)
+#define TDX_HCALL_ISSUE_STI BIT(1)
/*
* SW-defined error codes.
@@ -91,9 +92,12 @@ void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+void tdx_safe_halt(void);
+
#else
static inline void tdx_early_init(void) { };
+static inline void tdx_safe_halt(void) { };
#endif /* CONFIG_INTEL_TDX_GUEST */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b370767f5b19..dbaf12c43fe1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -46,6 +46,7 @@
#include <asm/proto.h>
#include <asm/frame.h>
#include <asm/unwind.h>
+#include <asm/tdx.h>
#include "process.h"
@@ -873,6 +874,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
} else if (prefer_mwait_c1_over_halt(c)) {
pr_info("using mwait in idle threads\n");
x86_idle = mwait_idle;
+ } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ pr_info("using TDX aware idle routine\n");
+ x86_idle = tdx_safe_halt;
} else
x86_idle = default_idle;
}