aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Pavel Begunkov <asml.silence@gmail.com> 2022-06-25 11:55:38 +0100
committerGravatar Jens Axboe <axboe@kernel.dk> 2022-07-24 18:39:16 -0600
commit6e73dffbb93cb8797cd4e42e98d837edf0f1a967 (patch)
treea1ea7810d2221f23c3bc0f673e6cc6a23262b5c7
parentio_uring: add support for passing fixed file descriptors (diff)
downloadlinux-6e73dffbb93cb8797cd4e42e98d837edf0f1a967.tar.gz
linux-6e73dffbb93cb8797cd4e42e98d837edf0f1a967.tar.bz2
linux-6e73dffbb93cb8797cd4e42e98d837edf0f1a967.zip
io_uring: let to set a range for file slot allocation
From recently io_uring provides an option to allocate a file index for operation registering fixed files. However, it's utterly unusable with mixed approaches when for a part of files the userspace knows better where to place it, as it may race and users don't have any sane way to pick a slot and hoping it will not be taken. Let the userspace to register a range of fixed file slots in which the auto-allocation happens. The use case is splittting the fixed table in two parts, where on of them is used for auto-allocation and another for slot-specified operations. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/66ab0394e436f38437cf7c44676e1920d09687ad.1656154403.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--include/linux/io_uring_types.h3
-rw-r--r--include/uapi/linux/io_uring.h13
-rw-r--r--io_uring/filetable.c24
-rw-r--r--io_uring/filetable.h20
-rw-r--r--io_uring/io_uring.c6
-rw-r--r--io_uring/rsrc.c2
6 files changed, 61 insertions, 7 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3ca8f363f504..26ef11e978d4 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -233,6 +233,9 @@ struct io_ring_ctx {
unsigned long check_cq;
+ unsigned int file_alloc_start;
+ unsigned int file_alloc_end;
+
struct {
/*
* We cache a range of free CQEs we can use, once exhausted it
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index f378eabbff21..cf95354198a3 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -449,6 +449,9 @@ enum {
/* sync cancelation API */
IORING_REGISTER_SYNC_CANCEL = 24,
+ /* register a range of fixed file slots for automatic slot allocation */
+ IORING_REGISTER_FILE_ALLOC_RANGE = 25,
+
/* this goes last */
IORING_REGISTER_LAST
};
@@ -595,4 +598,14 @@ struct io_uring_sync_cancel_reg {
__u64 pad[4];
};
+/*
+ * Argument for IORING_REGISTER_FILE_ALLOC_RANGE
+ * The range is specified as [off, off + len)
+ */
+struct io_uring_file_index_range {
+ __u32 off;
+ __u32 len;
+ __u64 resv;
+};
+
#endif
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index abaa5ba7f655..7b473259f3f4 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -16,7 +16,7 @@
static int io_file_bitmap_get(struct io_ring_ctx *ctx)
{
struct io_file_table *table = &ctx->file_table;
- unsigned long nr = ctx->nr_user_files;
+ unsigned long nr = ctx->file_alloc_end;
int ret;
do {
@@ -24,11 +24,10 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
if (ret != nr)
return ret;
- if (!table->alloc_hint)
+ if (table->alloc_hint == ctx->file_alloc_start)
break;
-
nr = table->alloc_hint;
- table->alloc_hint = 0;
+ table->alloc_hint = ctx->file_alloc_start;
} while (1);
return -ENFILE;
@@ -175,3 +174,20 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
io_rsrc_node_switch(ctx, ctx->file_data);
return 0;
}
+
+int io_register_file_alloc_range(struct io_ring_ctx *ctx,
+ struct io_uring_file_index_range __user *arg)
+{
+ struct io_uring_file_index_range range;
+ u32 end;
+
+ if (copy_from_user(&range, arg, sizeof(range)))
+ return -EFAULT;
+ if (check_add_overflow(range.off, range.len, &end))
+ return -EOVERFLOW;
+ if (range.resv || end > ctx->nr_user_files)
+ return -EINVAL;
+
+ io_file_table_set_alloc_range(ctx, range.off, range.len);
+ return 0;
+}
diff --git a/io_uring/filetable.h b/io_uring/filetable.h
index 79eb50c1980e..ff3a712e11bf 100644
--- a/io_uring/filetable.h
+++ b/io_uring/filetable.h
@@ -3,9 +3,7 @@
#define IOU_FILE_TABLE_H
#include <linux/file.h>
-
-struct io_ring_ctx;
-struct io_kiocb;
+#include <linux/io_uring_types.h>
/*
* FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0
@@ -33,6 +31,9 @@ int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
unsigned int file_slot);
int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset);
+int io_register_file_alloc_range(struct io_ring_ctx *ctx,
+ struct io_uring_file_index_range __user *arg);
+
unsigned int io_file_get_flags(struct file *file);
static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
@@ -71,4 +72,17 @@ static inline void io_fixed_file_set(struct io_fixed_file *file_slot,
file_slot->file_ptr = file_ptr;
}
+static inline void io_reset_alloc_hint(struct io_ring_ctx *ctx)
+{
+ ctx->file_table.alloc_hint = ctx->file_alloc_start;
+}
+
+static inline void io_file_table_set_alloc_range(struct io_ring_ctx *ctx,
+ unsigned off, unsigned len)
+{
+ ctx->file_alloc_start = off;
+ ctx->file_alloc_end = off + len;
+ io_reset_alloc_hint(ctx);
+}
+
#endif
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 070ee9ec9ee7..745264938a48 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3866,6 +3866,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_sync_cancel(ctx, arg);
break;
+ case IORING_REGISTER_FILE_ALLOC_RANGE:
+ ret = -EINVAL;
+ if (!arg || nr_args)
+ break;
+ ret = io_register_file_alloc_range(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 706fa020505b..d2e589c703d0 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1012,6 +1012,8 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
io_file_bitmap_set(&ctx->file_table, i);
}
+ /* default it to the whole table */
+ io_file_table_set_alloc_range(ctx, 0, ctx->nr_user_files);
io_rsrc_node_switch(ctx, NULL);
return 0;
fail: