From 2a4a06da8a4b93dd189171eed7a99fffd38f42f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Nov 2020 11:41:40 +0100 Subject: mm/gup: Provide gup_get_pte() more generic In order to write another lockless page-table walker, we need gup_get_pte() exposed. While doing that, rename it to ptep_get_lockless() to match the existing ptep_get() naming. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201126121121.036370527@infradead.org --- mm/gup.c | 58 +--------------------------------------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 98eb8e6d2609..44b0c6b89602 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2085,62 +2085,6 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) put_page(page); } -#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH - -/* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking any - * locks. For this we would like to load the pointers atomically, but sometimes - * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What - * we do have is the guarantee that a PTE will only either go from not present - * to present, or present to not present or both -- it will not switch to a - * completely different present page without a TLB flush in between; something - * that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'. - * We load pte_high *after* loading pte_low, which ensures we don't see an older - * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't - * picked up a changed pte high. We might have gotten rubbish values from - * pte_low and pte_high, but we are guaranteed that pte_low will not have the - * present bit set *unless* it is 'l'. Because get_user_pages_fast() only - * operates on present ptes we're safe. - */ -static inline pte_t gup_get_pte(pte_t *ptep) -{ - pte_t pte; - - do { - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - } while (unlikely(pte.pte_low != ptep->pte_low)); - - return pte; -} -#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ -/* - * We require that the PTE can be read atomically. - */ -static inline pte_t gup_get_pte(pte_t *ptep) -{ - return ptep_get(ptep); -} -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ - static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start, unsigned int flags, struct page **pages) @@ -2166,7 +2110,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, ptem = ptep = pte_offset_map(&pmd, addr); do { - pte_t pte = gup_get_pte(ptep); + pte_t pte = ptep_get_lockless(ptep); struct page *head, *page; /* -- cgit v1.2.3