aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/pgtable.h
diff options
context:
space:
mode:
authorGravatar Rick Edgecombe <rick.p.edgecombe@intel.com> 2023-06-12 17:10:36 -0700
committerGravatar Rick Edgecombe <rick.p.edgecombe@intel.com> 2023-07-11 14:12:19 -0700
commitfca4d413c5f707b759d1031b170cbb8884f0999d (patch)
tree8a437cc63b1cda5a6efdbf99a3d790a1f9747dbc /arch/x86/include/asm/pgtable.h
parentx86/mm: Move pmd_write(), pud_write() up in the file (diff)
downloadlinux-fca4d413c5f707b759d1031b170cbb8884f0999d.tar.gz
linux-fca4d413c5f707b759d1031b170cbb8884f0999d.tar.bz2
linux-fca4d413c5f707b759d1031b170cbb8884f0999d.zip
x86/mm: Introduce _PAGE_SAVED_DIRTY
Some OSes have a greater dependence on software available bits in PTEs than Linux. That left the hardware architects looking for a way to represent a new memory type (shadow stack) within the existing bits. They chose to repurpose a lightly-used state: Write=0,Dirty=1. So in order to support shadow stack memory, Linux should avoid creating memory with this PTE bit combination unless it intends for it to be shadow stack. The reason it's lightly used is that Dirty=1 is normally set by HW _before_ a write. A write with a Write=0 PTE would typically only generate a fault, not set Dirty=1. Hardware can (rarely) both set Dirty=1 *and* generate the fault, resulting in a Write=0,Dirty=1 PTE. Hardware which supports shadow stacks will no longer exhibit this oddity. So that leaves Write=0,Dirty=1 PTEs created in software. To avoid inadvertently created shadow stack memory, in places where Linux normally creates Write=0,Dirty=1, it can use the software-defined _PAGE_SAVED_DIRTY in place of the hardware _PAGE_DIRTY. In other words, whenever Linux needs to create Write=0,Dirty=1, it instead creates Write=0,SavedDirty=1 except for shadow stack, which is Write=0,Dirty=1. There are six bits left available to software in the 64-bit PTE after consuming a bit for _PAGE_SAVED_DIRTY. For 32 bit, the same bit as _PAGE_BIT_UFFD_WP is used, since user fault fd is not supported on 32 bit. This leaves one unused software bit on 32 bit (_PAGE_BIT_SOFT_DIRTY, as this is also not supported on 32 bit). Implement only the infrastructure for _PAGE_SAVED_DIRTY. Changes to actually begin creating _PAGE_SAVED_DIRTY PTEs will follow once other pieces are in place. Since this SavedDirty shifting is done for all x86 CPUs, this leaves the possibility for the hardware oddity to still create Write=0,Dirty=1 PTEs in rare cases. Since these CPUs also don't support shadow stack, this will be harmless as it was before the introduction of SavedDirty. Implement the shifting logic to be branchless. Embed the logic of whether to do the shifting (including checking the Write bits) so that it can be called by future callers that would otherwise need additional branching logic. This efficiency allows the logic of when to do the shifting to be centralized, making the code easier to reason about. Co-developed-by: Yu-cheng Yu <yu-cheng.yu@intel.com> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Tested-by: Pengfei Xu <pengfei.xu@intel.com> Tested-by: John Allen <john.allen@amd.com> Tested-by: Kees Cook <keescook@chromium.org> Link: https://lore.kernel.org/all/20230613001108.3040476-11-rick.p.edgecombe%40intel.com
Diffstat (limited to 'arch/x86/include/asm/pgtable.h')
-rw-r--r--arch/x86/include/asm/pgtable.h83
1 files changed, 83 insertions, 0 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5f6e4ac1aad8..0c0747cbad3e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -302,6 +302,53 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
return native_make_pte(v & ~clear);
}
+/*
+ * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the
+ * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So
+ * when creating dirty, write-protected memory, a software bit is used:
+ * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the
+ * Dirty bit to SavedDirty, and vice-vesra.
+ *
+ * This shifting is only done if needed. In the case of shifting
+ * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of
+ * shifting SavedDirty->Dirty, the condition is Write=1.
+ */
+static inline pgprotval_t mksaveddirty_shift(pgprotval_t v)
+{
+ pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;
+
+ v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
+ v &= ~(cond << _PAGE_BIT_DIRTY);
+
+ return v;
+}
+
+static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v)
+{
+ pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1;
+
+ v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY;
+ v &= ~(cond << _PAGE_BIT_SAVED_DIRTY);
+
+ return v;
+}
+
+static inline pte_t pte_mksaveddirty(pte_t pte)
+{
+ pteval_t v = native_pte_val(pte);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pte(v);
+}
+
+static inline pte_t pte_clear_saveddirty(pte_t pte)
+{
+ pteval_t v = native_pte_val(pte);
+
+ v = clear_saveddirty_shift(v);
+ return native_make_pte(v);
+}
+
static inline pte_t pte_wrprotect(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_RW);
@@ -414,6 +461,24 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
return native_make_pmd(v & ~clear);
}
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pmd(v);
+}
+
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ v = clear_saveddirty_shift(v);
+ return native_make_pmd(v);
+}
+
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_RW);
@@ -485,6 +550,24 @@ static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
return native_make_pud(v & ~clear);
}
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_mksaveddirty(pud_t pud)
+{
+ pudval_t v = native_pud_val(pud);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pud(v);
+}
+
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_clear_saveddirty(pud_t pud)
+{
+ pudval_t v = native_pud_val(pud);
+
+ v = clear_saveddirty_shift(v);
+ return native_make_pud(v);
+}
+
static inline pud_t pud_mkold(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_ACCESSED);