aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorGravatar Linus Torvalds <torvalds@linux-foundation.org> 2024-03-15 09:00:09 -0700
committerGravatar Linus Torvalds <torvalds@linux-foundation.org> 2024-03-15 09:00:09 -0700
commit32a50540c3d26341698505998dfca5b0e8fb4fd4 (patch)
treed50aae41b90ae2d24c7790af7f2e27e7ec05939e /include/linux
parentMerge tag 'mm-nonmm-stable-2024-03-14-09-36' of git://git.kernel.org/pub/scm/... (diff)
parentbcachefs: time_stats: shrink time_stat_buffer for better alignment (diff)
downloadlinux-32a50540c3d26341698505998dfca5b0e8fb4fd4.tar.gz
linux-32a50540c3d26341698505998dfca5b0e8fb4fd4.tar.bz2
linux-32a50540c3d26341698505998dfca5b0e8fb4fd4.zip
Merge tag 'bcachefs-2024-03-13' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs updates from Kent Overstreet: - Subvolume children btree; this is needed for providing a userspace interface for walking subvolumes, which will come later - Lots of improvements to directory structure checking - Improved journal pipelining, significantly improving performance on high iodepth write workloads - Discard path improvements: the discard path is more efficient, and no longer flushes the journal unnecessarily - Buffered write path can now avoid taking the inode lock - new mm helper: memalloc_flags_{save|restore} - mempool now does kvmalloc mempools * tag 'bcachefs-2024-03-13' of https://evilpiepirate.org/git/bcachefs: (128 commits) bcachefs: time_stats: shrink time_stat_buffer for better alignment bcachefs: time_stats: split stats-with-quantiles into a separate structure bcachefs: mean_and_variance: put struct mean_and_variance_weighted on a diet bcachefs: time_stats: add larger units bcachefs: pull out time_stats.[ch] bcachefs: reconstruct_alloc cleanup bcachefs: fix bch_folio_sector padding bcachefs: Fix btree key cache coherency during replay bcachefs: Always flush write buffer in delete_dead_inodes() bcachefs: Fix order of gc_done passes bcachefs: fix deletion of indirect extents in btree_gc bcachefs: Prefer struct_size over open coded arithmetic bcachefs: Kill unused flags argument to btree_split() bcachefs: Check for writing superblocks with nonsense member seq fields bcachefs: fix bch2_journal_buf_to_text() lib/generic-radix-tree.c: Make nodes more reasonably sized bcachefs: copy_(to|from)_user_errcode() bcachefs: Split out bkey_types.h bcachefs: fix lost journal buf wakeup due to improved pipelining bcachefs: intercept mountoption value for bool type ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/generic-radix-tree.h29
-rw-r--r--include/linux/mempool.h13
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/sched/mm.h60
5 files changed, 71 insertions, 36 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d5d5a4ee24f0..00fc429b0af0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3074,6 +3074,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb);
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
+extern int file_remove_privs_flags(struct file *file, unsigned int flags);
extern int file_remove_privs(struct file *);
int setattr_should_drop_sgid(struct mnt_idmap *idmap,
const struct inode *inode);
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index 847413164738..f3512fddf3d7 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -5,7 +5,7 @@
* DOC: Generic radix trees/sparse arrays
*
* Very simple and minimalistic, supporting arbitrary size entries up to
- * PAGE_SIZE.
+ * GENRADIX_NODE_SIZE.
*
* A genradix is defined with the type it will store, like so:
*
@@ -45,12 +45,15 @@
struct genradix_root;
+#define GENRADIX_NODE_SHIFT 9
+#define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT)
+
struct __genradix {
struct genradix_root *root;
};
/*
- * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE:
*/
#define __GENRADIX_INITIALIZER \
@@ -101,14 +104,14 @@ void __genradix_free(struct __genradix *);
static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
{
if (__builtin_constant_p(obj_size))
- BUILD_BUG_ON(obj_size > PAGE_SIZE);
+ BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE);
else
- BUG_ON(obj_size > PAGE_SIZE);
+ BUG_ON(obj_size > GENRADIX_NODE_SIZE);
if (!is_power_of_2(obj_size)) {
- size_t objs_per_page = PAGE_SIZE / obj_size;
+ size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size;
- return (idx / objs_per_page) * PAGE_SIZE +
+ return (idx / objs_per_page) * GENRADIX_NODE_SIZE +
(idx % objs_per_page) * obj_size;
} else {
return idx * obj_size;
@@ -118,9 +121,9 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
#define __genradix_cast(_radix) (typeof((_radix)->type[0]) *)
#define __genradix_obj_size(_radix) sizeof((_radix)->type[0])
#define __genradix_objs_per_page(_radix) \
- (PAGE_SIZE / sizeof((_radix)->type[0]))
+ (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0]))
#define __genradix_page_remainder(_radix) \
- (PAGE_SIZE % sizeof((_radix)->type[0]))
+ (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0]))
#define __genradix_idx_to_offset(_radix, _idx) \
__idx_to_offset(_idx, __genradix_obj_size(_radix))
@@ -217,8 +220,8 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
iter->offset += obj_size;
if (!is_power_of_2(obj_size) &&
- (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
- iter->offset = round_up(iter->offset, PAGE_SIZE);
+ (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE)
+ iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE);
iter->pos++;
}
@@ -235,8 +238,8 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter,
return;
}
- if ((iter->offset & (PAGE_SIZE - 1)) == 0)
- iter->offset -= PAGE_SIZE % obj_size;
+ if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0)
+ iter->offset -= GENRADIX_NODE_SIZE % obj_size;
iter->offset -= obj_size;
iter->pos--;
@@ -263,7 +266,7 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter,
genradix_for_each_from(_radix, _iter, _p, 0)
#define genradix_last_pos(_radix) \
- (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1)
+ (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1)
/**
* genradix_for_each_reverse - iterate over entry in a genradix, reverse order
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 7be1e32e6d42..16c5cc807ff6 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -95,6 +95,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
(void *) size);
}
+void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data);
+void mempool_kvfree(void *element, void *pool_data);
+
+static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+ return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
+static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size)
+{
+ return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
/*
* A mempool_alloc_t and mempool_free_t for a simple page allocator that
* allocates pages of the order specified by pool_data
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 94f0e618865b..3c2abbc587b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1639,8 +1639,8 @@ extern struct pid *cad_pid;
* I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
-#define PF__HOLE__00800000 0x00800000
-#define PF__HOLE__01000000 0x01000000
+#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */
+#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */
#define PF__HOLE__02000000 0x02000000
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 7a4066d22883..b6543f9d78d6 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -236,16 +236,25 @@ static inline gfp_t current_gfp_context(gfp_t flags)
{
unsigned int pflags = READ_ONCE(current->flags);
- if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) {
+ if (unlikely(pflags & (PF_MEMALLOC_NOIO |
+ PF_MEMALLOC_NOFS |
+ PF_MEMALLOC_NORECLAIM |
+ PF_MEMALLOC_NOWARN |
+ PF_MEMALLOC_PIN))) {
/*
- * NOIO implies both NOIO and NOFS and it is a weaker context
- * so always make sure it makes precedence
+ * Stronger flags before weaker flags:
+ * NORECLAIM implies NOIO, which in turn implies NOFS
*/
- if (pflags & PF_MEMALLOC_NOIO)
+ if (pflags & PF_MEMALLOC_NORECLAIM)
+ flags &= ~__GFP_DIRECT_RECLAIM;
+ else if (pflags & PF_MEMALLOC_NOIO)
flags &= ~(__GFP_IO | __GFP_FS);
else if (pflags & PF_MEMALLOC_NOFS)
flags &= ~__GFP_FS;
+ if (pflags & PF_MEMALLOC_NOWARN)
+ flags |= __GFP_NOWARN;
+
if (pflags & PF_MEMALLOC_PIN)
flags &= ~__GFP_MOVABLE;
}
@@ -307,6 +316,24 @@ static inline void might_alloc(gfp_t gfp_mask)
}
/**
+ * memalloc_flags_save - Add a PF_* flag to current->flags, save old value
+ *
+ * This allows PF_* flags to be conveniently added, irrespective of current
+ * value, and then the old version restored with memalloc_flags_restore().
+ */
+static inline unsigned memalloc_flags_save(unsigned flags)
+{
+ unsigned oldflags = ~current->flags & flags;
+ current->flags |= flags;
+ return oldflags;
+}
+
+static inline void memalloc_flags_restore(unsigned flags)
+{
+ current->flags &= ~flags;
+}
+
+/**
* memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
*
* This functions marks the beginning of the GFP_NOIO allocation scope.
@@ -320,9 +347,7 @@ static inline void might_alloc(gfp_t gfp_mask)
*/
static inline unsigned int memalloc_noio_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
- current->flags |= PF_MEMALLOC_NOIO;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC_NOIO);
}
/**
@@ -335,7 +360,7 @@ static inline unsigned int memalloc_noio_save(void)
*/
static inline void memalloc_noio_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+ memalloc_flags_restore(flags);
}
/**
@@ -352,9 +377,7 @@ static inline void memalloc_noio_restore(unsigned int flags)
*/
static inline unsigned int memalloc_nofs_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
- current->flags |= PF_MEMALLOC_NOFS;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC_NOFS);
}
/**
@@ -367,7 +390,7 @@ static inline unsigned int memalloc_nofs_save(void)
*/
static inline void memalloc_nofs_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
+ memalloc_flags_restore(flags);
}
/**
@@ -395,9 +418,7 @@ static inline void memalloc_nofs_restore(unsigned int flags)
*/
static inline unsigned int memalloc_noreclaim_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC;
- current->flags |= PF_MEMALLOC;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC);
}
/**
@@ -410,7 +431,7 @@ static inline unsigned int memalloc_noreclaim_save(void)
*/
static inline void memalloc_noreclaim_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC) | flags;
+ memalloc_flags_restore(flags);
}
/**
@@ -425,10 +446,7 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
*/
static inline unsigned int memalloc_pin_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC_PIN;
-
- current->flags |= PF_MEMALLOC_PIN;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC_PIN);
}
/**
@@ -441,7 +459,7 @@ static inline unsigned int memalloc_pin_save(void)
*/
static inline void memalloc_pin_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags;
+ memalloc_flags_restore(flags);
}
#ifdef CONFIG_MEMCG