[SRU][Mantic][PATCH 2/2] io_uring/kbuf: defer release of mapped buffer rings
Yuxuan Luo
yuxuan.luo at canonical.com
Tue Jan 30 02:35:19 UTC 2024
From: Jens Axboe <axboe at kernel.dk>
If a provided buffer ring is setup with IOU_PBUF_RING_MMAP, then the
kernel allocates the memory for it and the application is expected to
mmap(2) this memory. However, io_uring uses remap_pfn_range() for this
operation, so we cannot rely on normal munmap/release on freeing them
for us.
Stash an io_buf_free entry away for each of these, if any, and provide
a helper to free them post ->release().
Cc: stable at vger.kernel.org
Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring")
Reported-by: Jann Horn <jannh at google.com>
Signed-off-by: Jens Axboe <axboe at kernel.dk>
(cherry picked from commit c392cbecd8eca4c53f2bf508731257d9d0a21c2d)
CVE-2024-0582
Signed-off-by: Yuxuan Luo <yuxuan.luo at canonical.com>
---
include/linux/io_uring_types.h | 3 +++
io_uring/io_uring.c | 2 ++
io_uring/kbuf.c | 44 ++++++++++++++++++++++++++++++----
io_uring/kbuf.h | 2 ++
4 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 13d19b9be9f4a..5fd664fb71c86 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -327,6 +327,9 @@ struct io_ring_ctx {
struct list_head io_buffers_cache;
+ /* deferred free list, protected by ->uring_lock */
+ struct hlist_head io_buf_list;
+
/* Keep this last, we don't need it for the fast path */
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 1b0a27fe41eef..4c21b068b7578 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -323,6 +323,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->sqd_list);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
INIT_LIST_HEAD(&ctx->io_buffers_cache);
+ INIT_HLIST_HEAD(&ctx->io_buf_list);
io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
sizeof(struct io_rsrc_node));
io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
@@ -2942,6 +2943,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
ctx->mm_account = NULL;
}
io_rings_free(ctx);
+ io_kbuf_mmap_list_free(ctx);
percpu_ref_exit(&ctx->refs);
free_uid(ctx->user);
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 9123138aa9f48..79cf9131182b4 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -28,6 +28,11 @@ struct io_provide_buf {
__u16 bid;
};
+struct io_buf_free {
+ struct hlist_node list;
+ void *mem;
+};
+
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
unsigned int bgid)
{
@@ -218,7 +223,10 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
if (bl->is_mapped) {
i = bl->buf_ring->tail - bl->head;
if (bl->is_mmap) {
- folio_put(virt_to_folio(bl->buf_ring));
+ /*
+ * io_kbuf_list_free() will free the page(s) at
+ * ->release() time.
+ */
bl->buf_ring = NULL;
bl->is_mmap = 0;
} else if (bl->buf_nr_pages) {
@@ -523,18 +531,28 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
return -EINVAL;
}
-static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
+static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
+ struct io_uring_buf_reg *reg,
struct io_buffer_list *bl)
{
- gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+ struct io_buf_free *ibf;
size_t ring_size;
void *ptr;
ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
- ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
+ ptr = io_mem_alloc(ring_size);
if (!ptr)
return -ENOMEM;
+ /* Allocate and store deferred free entry */
+ ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT);
+ if (!ibf) {
+ io_mem_free(ptr);
+ return -ENOMEM;
+ }
+ ibf->mem = ptr;
+ hlist_add_head(&ibf->list, &ctx->io_buf_list);
+
bl->buf_ring = ptr;
bl->is_mapped = 1;
bl->is_mmap = 1;
@@ -591,7 +609,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!(reg.flags & IOU_PBUF_RING_MMAP))
ret = io_pin_pbuf_ring(®, bl);
else
- ret = io_alloc_pbuf_ring(®, bl);
+ ret = io_alloc_pbuf_ring(ctx, ®, bl);
if (!ret) {
bl->nr_entries = reg.ring_entries;
@@ -641,3 +659,19 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
return bl->buf_ring;
}
+
+/*
+ * Called at or after ->release(), free the mmap'ed buffers that we used
+ * for memory mapped provided buffer rings.
+ */
+void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx)
+{
+ struct io_buf_free *ibf;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) {
+ hlist_del(&ibf->list);
+ io_mem_free(ibf->mem);
+ kfree(ibf);
+ }
+}
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index d14345ef61fc8..ecb5f955cd42e 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -51,6 +51,8 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
+
unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
--
2.34.1
More information about the kernel-team
mailing list