aboutsummaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/base.c8
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/page.c1
-rw-r--r--fs/proc/task_mmu.c49
-rw-r--r--fs/proc/vmcore.c43
5 files changed, 65 insertions, 38 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01fb37ecc89f..c1031843cc6a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1763,25 +1763,25 @@ out:
static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
{
- char *tmp = (char *)__get_free_page(GFP_KERNEL);
+ char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
char *pathname;
int len;
if (!tmp)
return -ENOMEM;
- pathname = d_path(path, tmp, PAGE_SIZE);
+ pathname = d_path(path, tmp, PATH_MAX);
len = PTR_ERR(pathname);
if (IS_ERR(pathname))
goto out;
- len = tmp + PAGE_SIZE - 1 - pathname;
+ len = tmp + PATH_MAX - 1 - pathname;
if (len > buflen)
len = buflen;
if (copy_to_user(buffer, pathname, len))
len = -EFAULT;
out:
- free_page((unsigned long)tmp);
+ kfree(tmp);
return len;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index f84355c5a36d..73aeb4e6d32e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -66,7 +66,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
{
struct proc_inode *ei;
- ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, proc_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
ei->pid = NULL;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 9f1077d94cde..a2873a617ae8 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/hugetlb.h>
+#include <linux/memremap.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 18f8c3acbb85..f46060eb91b5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -309,7 +309,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma);
if (!name) {
- const char *anon_name;
+ struct anon_vma_name *anon_name;
if (!mm) {
name = "[vdso]";
@@ -327,10 +327,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
goto done;
}
- anon_name = vma_anon_name(vma);
+ anon_name = anon_vma_name(vma);
if (anon_name) {
seq_pad(m, ' ');
- seq_printf(m, "[anon:%s]", anon_name);
+ seq_printf(m, "[anon:%s]", anon_name->name);
}
}
@@ -440,7 +440,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
}
static void smaps_account(struct mem_size_stats *mss, struct page *page,
- bool compound, bool young, bool dirty, bool locked)
+ bool compound, bool young, bool dirty, bool locked,
+ bool migration)
{
int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
@@ -467,8 +468,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
* page_count(page) == 1 guarantees the page is mapped exactly once.
* If any subpage of the compound page mapped with PTE it would elevate
* page_count().
+ *
+ * The page_mapcount() is called to get a snapshot of the mapcount.
+ * Without holding the page lock this snapshot can be slightly wrong as
+ * we cannot always read the mapcount atomically. It is not safe to
+ * call page_mapcount() even with PTL held if the page is not mapped,
+ * especially for migration entries. Treat regular migration entries
+ * as mapcount == 1.
*/
- if (page_count(page) == 1) {
+ if ((page_count(page) == 1) || migration) {
smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
locked, true);
return;
@@ -517,6 +525,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
+ bool migration = false;
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
@@ -536,8 +545,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
} else {
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
}
- } else if (is_pfn_swap_entry(swpent))
+ } else if (is_pfn_swap_entry(swpent)) {
+ if (is_migration_entry(swpent))
+ migration = true;
page = pfn_swap_entry_to_page(swpent);
+ }
} else {
smaps_pte_hole_lookup(addr, walk);
return;
@@ -546,7 +558,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
+ smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
+ locked, migration);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -557,6 +570,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
+ bool migration = false;
if (pmd_present(*pmd)) {
/* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -564,8 +578,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
- if (is_migration_entry(entry))
+ if (is_migration_entry(entry)) {
+ migration = true;
page = pfn_swap_entry_to_page(entry);
+ }
}
if (IS_ERR_OR_NULL(page))
return;
@@ -577,7 +593,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
/* pass */;
else
mss->file_thp += HPAGE_PMD_SIZE;
- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
+
+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
+ locked, migration);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -1378,6 +1396,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
{
u64 frame = 0, flags = 0;
struct page *page = NULL;
+ bool migration = false;
if (pte_present(pte)) {
if (pm->show_pfn)
@@ -1399,13 +1418,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags |= PM_SWAP;
+ migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
}
if (page && !PageAnon(page))
flags |= PM_FILE;
- if (page && page_mapcount(page) == 1)
+ if (page && !migration && page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1421,8 +1441,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
spinlock_t *ptl;
pte_t *pte, *orig_pte;
int err = 0;
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ bool migration = false;
+
ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
u64 flags = 0, frame = 0;
@@ -1461,11 +1482,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
+ migration = is_migration_entry(entry);
page = pfn_swap_entry_to_page(entry);
}
#endif
- if (page && page_mapcount(page) == 1)
+ if (page && !migration && page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
for (; addr != end; addr += PAGE_SIZE) {
@@ -1575,7 +1597,8 @@ static const struct mm_walk_ops pagemap_ops = {
* Bits 5-54 swap offset if swapped
* Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped
- * Bits 57-60 zero
+ * Bit 57 pte is uffd-wp write-protected
+ * Bits 58-60 zero
* Bit 61 page is file-page or shared-anon
* Bit 62 page swapped
* Bit 63 page present
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 702754dd1daf..6f1b8ddc6f7a 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -62,7 +62,8 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
/* Device Dump Size */
static size_t vmcoredd_orig_sz;
-static DECLARE_RWSEM(vmcore_cb_rwsem);
+static DEFINE_SPINLOCK(vmcore_cb_lock);
+DEFINE_STATIC_SRCU(vmcore_cb_srcu);
/* List of registered vmcore callbacks. */
static LIST_HEAD(vmcore_cb_list);
/* Whether the vmcore has been opened once. */
@@ -70,8 +71,8 @@ static bool vmcore_opened;
void register_vmcore_cb(struct vmcore_cb *cb)
{
- down_write(&vmcore_cb_rwsem);
INIT_LIST_HEAD(&cb->next);
+ spin_lock(&vmcore_cb_lock);
list_add_tail(&cb->next, &vmcore_cb_list);
/*
* Registering a vmcore callback after the vmcore was opened is
@@ -79,14 +80,14 @@ void register_vmcore_cb(struct vmcore_cb *cb)
*/
if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback registration\n");
- up_write(&vmcore_cb_rwsem);
+ spin_unlock(&vmcore_cb_lock);
}
EXPORT_SYMBOL_GPL(register_vmcore_cb);
void unregister_vmcore_cb(struct vmcore_cb *cb)
{
- down_write(&vmcore_cb_rwsem);
- list_del(&cb->next);
+ spin_lock(&vmcore_cb_lock);
+ list_del_rcu(&cb->next);
/*
* Unregistering a vmcore callback after the vmcore was opened is
* very unusual (e.g., forced driver removal), but we cannot stop
@@ -94,7 +95,9 @@ void unregister_vmcore_cb(struct vmcore_cb *cb)
*/
if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback unregistration\n");
- up_write(&vmcore_cb_rwsem);
+ spin_unlock(&vmcore_cb_lock);
+
+ synchronize_srcu(&vmcore_cb_srcu);
}
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
@@ -103,9 +106,8 @@ static bool pfn_is_ram(unsigned long pfn)
struct vmcore_cb *cb;
bool ret = true;
- lockdep_assert_held_read(&vmcore_cb_rwsem);
-
- list_for_each_entry(cb, &vmcore_cb_list, next) {
+ list_for_each_entry_srcu(cb, &vmcore_cb_list, next,
+ srcu_read_lock_held(&vmcore_cb_srcu)) {
if (unlikely(!cb->pfn_is_ram))
continue;
ret = cb->pfn_is_ram(cb, pfn);
@@ -118,9 +120,9 @@ static bool pfn_is_ram(unsigned long pfn)
static int open_vmcore(struct inode *inode, struct file *file)
{
- down_read(&vmcore_cb_rwsem);
+ spin_lock(&vmcore_cb_lock);
vmcore_opened = true;
- up_read(&vmcore_cb_rwsem);
+ spin_unlock(&vmcore_cb_lock);
return 0;
}
@@ -133,6 +135,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
unsigned long pfn, offset;
size_t nr_bytes;
ssize_t read = 0, tmp;
+ int idx;
if (!count)
return 0;
@@ -140,7 +143,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
offset = (unsigned long)(*ppos % PAGE_SIZE);
pfn = (unsigned long)(*ppos / PAGE_SIZE);
- down_read(&vmcore_cb_rwsem);
+ idx = srcu_read_lock(&vmcore_cb_srcu);
do {
if (count > (PAGE_SIZE - offset))
nr_bytes = PAGE_SIZE - offset;
@@ -165,7 +168,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
offset, userbuf);
}
if (tmp < 0) {
- up_read(&vmcore_cb_rwsem);
+ srcu_read_unlock(&vmcore_cb_srcu, idx);
return tmp;
}
@@ -176,8 +179,8 @@ ssize_t read_from_oldmem(char *buf, size_t count,
++pfn;
offset = 0;
} while (count);
+ srcu_read_unlock(&vmcore_cb_srcu, idx);
- up_read(&vmcore_cb_rwsem);
return read;
}
@@ -477,7 +480,7 @@ static const struct vm_operations_struct vmcore_mmap_ops = {
/**
* vmcore_alloc_buf - allocate buffer in vmalloc memory
- * @sizez: size of buffer
+ * @size: size of buffer
*
* If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
* the buffer to user-space by means of remap_vmalloc_range().
@@ -568,18 +571,18 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
unsigned long from, unsigned long pfn,
unsigned long size, pgprot_t prot)
{
- int ret;
+ int ret, idx;
/*
- * Check if oldmem_pfn_is_ram was registered to avoid
- * looping over all pages without a reason.
+ * Check if a callback was registered to avoid looping over all
+ * pages without a reason.
*/
- down_read(&vmcore_cb_rwsem);
+ idx = srcu_read_lock(&vmcore_cb_srcu);
if (!list_empty(&vmcore_cb_list))
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
else
ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
- up_read(&vmcore_cb_rwsem);
+ srcu_read_unlock(&vmcore_cb_srcu, idx);
return ret;
}