UBUNTU: SAUCE: x86: introduce /dev/mem restrictions with a config option

Kees Cook kees at ubuntu.com
Thu Feb 14 23:01:15 UTC 2008


OriginalAuthor: Arjan van de Ven <arjan at linux.intel.com>
OriginalLocation: http://lkml.org/lkml/2008/1/30/473

Backported from linux-2.6-x86 efa3098e4fa842405c91cb0d739fc649333e6d2f,
which contained additional init_32/64 ioremap_32/64 mergings and EFI clean-u
Upstream BIOS-sanity fixes (950f9d95bed1a366434d3597ea75f5b9d772d74f) were
included, with the page_is_ram() and devmem_is_allowed() functions duplicate
between init_32.c and init_64.c.

Original commit message:

This patch introduces a restriction on /dev/mem: Only non-memory can be
read or written unless the newly introduced config option is set.

The X server needs access to /dev/mem for the PCI space, but it doesn't need
access to memory; both the file permissions and SELinux permissions of /dev/
just make X effectively super-super powerful. With the exception of the
BIOS area, there's just no valid app that uses /dev/mem on actual memory.
Other popular users of /dev/mem are rootkits and the like.
(note: mmap access of memory via /dev/mem was already not allowed since
a really long time)

People who want to use /dev/mem for kernel debugging can enable the config
option.

The restrictions of this patch have been in the Fedora and RHEL kernels for
at least 4 years without any problems.

Signed-off-by: Arjan van de Ven <arjan at linux.intel.com>
Signed-off-by: Ingo Molnar <mingo at elte.hu>
Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
Signed-off-by: Kees Cook <kees at ubuntu.com>
---
 b/arch/x86/Kconfig.debug    |   12 ++++++++
 b/arch/x86/mm/init_32.c     |   42 ++++++++++++++++++++++------
 b/arch/x86/mm/init_64.c     |   66 ++++++++++++++++++++++++++++++++++++++++++++
 b/drivers/char/mem.c        |   28 ++++++++++++++++++
 b/include/asm-x86/e820.h    |    3 ++
 b/include/asm-x86/page_32.h |    1 
 b/include/asm-x86/page_64.h |    3 ++
 7 files changed, 147 insertions(+), 8 deletions(-)
---
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c569745..46350bd 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,6 +5,18 @@ config TRACE_IRQFLAGS_SUPPORT
 
 source "lib/Kconfig.debug"
 
+config NONPROMISC_DEVMEM
+	bool "Disable promiscuous /dev/mem"
+	default y
+	help
+	  The /dev/mem file by default only allows userspace access to PCI
+	  space and the BIOS code and data regions. This is sufficient for
+	  dosemu and X and all common users of /dev/mem. With this config
+	  option, you allow userspace access to all of memory, including
+	  kernel and userspace memory. Accidental access to this is
+	  obviously disasterous, but specific access can be used by people
+	  debugging the kernel.
+
 config EARLY_PRINTK
 	bool "Early printk" if EMBEDDED && DEBUG_KERNEL && X86_32
 	default y
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3c76d19..05180bb 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -223,22 +223,48 @@ int page_is_ram(unsigned long pagenr)
 	}
 
 	for (i = 0; i < e820.nr_map; i++) {
-
-		if (e820.map[i].type != E820_RAM)	/* not usable memory */
+		/*
+		 * Not usable memory:
+		 */
+		if (e820.map[i].type != E820_RAM)
 			continue;
+		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
+		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
+
 		/*
-		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
-		 *	are not. Notably the 640->1Mb area. We need a sanity
-		 *	check here.
+		 * Sanity check: Some BIOSen report areas as RAM that
+		 * are not. Notably the 640->1Mb area, which is the
+		 * PCI BIOS area.
 		 */
-		addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
-		end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
-		if  ((pagenr >= addr) && (pagenr < end))
+		if (addr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
+		    end < (BIOS_END >> PAGE_SHIFT))
+			continue;
+
+		if ((pagenr >= addr) && (pagenr < end))
 			return 1;
 	}
 	return 0;
 }
 
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+	if (pagenr <= 256)
+		return 1;
+	if (!page_is_ram(pagenr))
+		return 1;
+	return 0;
+}
+
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0f9c8c8..7ff01a7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -26,6 +26,7 @@
 #include <linux/poison.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
+#include <linux/efi.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nmi.h>
 
@@ -512,6 +513,71 @@ int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
 }
 #endif
 
+int page_is_ram(unsigned long pagenr)
+{
+	int i;
+	unsigned long addr, end;
+
+	if (efi_enabled) {
+		efi_memory_desc_t *md;
+		void *p;
+
+		for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+			md = p;
+			if (!is_available_memory(md))
+				continue;
+			addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+			end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
+
+			if ((pagenr >= addr) && (pagenr < end))
+				return 1;
+		}
+		return 0;
+	}
+
+	for (i = 0; i < e820.nr_map; i++) {
+		/*
+		 * Not usable memory:
+		 */
+		if (e820.map[i].type != E820_RAM)
+			continue;
+		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
+		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
+
+		/*
+		 * Sanity check: Some BIOSen report areas as RAM that
+		 * are not. Notably the 640->1Mb area, which is the
+		 * PCI BIOS area.
+		 */
+		if (addr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
+		    end < (BIOS_END >> PAGE_SHIFT))
+			continue;
+
+		if ((pagenr >= addr) && (pagenr < end))
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+	if (pagenr <= 256)
+		return 1;
+	if (!page_is_ram(pagenr))
+		return 1;
+	return 0;
+}
+
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
 			 kcore_vsyscall;
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 08f0b0e..21dfb0d 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -108,6 +108,30 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
 }
 #endif
 
+#ifdef CONFIG_NONPROMISC_DEVMEM
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+	unsigned long cursor;
+
+	cursor = from >> PAGE_SHIFT;
+	while ((cursor << PAGE_SHIFT) < to) {
+		if (!devmem_is_allowed(cursor)) {
+			printk(KERN_INFO "Program %s tried to read /dev/mem "
+				"between %lx->%lx.\n",
+				current->comm, from, to);
+			return 0;
+		}
+		cursor++;
+	}
+	return 1;
+}
+#else
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+	return 1;
+}
+#endif
+
 /*
  * This funcion reads the *physical* memory. The f_pos points directly to the 
  * memory location. 
@@ -157,6 +181,8 @@ static ssize_t read_mem(struct file * file, char __user * buf,
 		 */
 		ptr = xlate_dev_mem_ptr(p);
 
+		if (!range_is_allowed(p, p+count))
+			return -EPERM;
 		if (copy_to_user(buf, ptr, sz))
 			return -EFAULT;
 		buf += sz;
@@ -214,6 +240,8 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
 		 */
 		ptr = xlate_dev_mem_ptr(p);
 
+		if (!range_is_allowed(p, p+sz))
+			return -EPERM;
 		copied = copy_from_user(ptr, buf, sz);
 		if (copied) {
 			written += sz - copied;
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 3e214f3..a040091 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -22,6 +22,9 @@ struct e820map {
 };
 #endif /* __ASSEMBLY__ */
 
+#define BIOS_BEGIN             0x000a0000
+#define BIOS_END               0x00100000
+
 #ifdef __KERNEL__
 #ifdef CONFIG_X86_32
 # include "e820_32.h"
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 80ecc66..3db44f7 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -166,6 +166,7 @@ extern unsigned int __VMALLOC_RESERVE;
 extern int sysctl_legacy_va_layout;
 
 extern int page_is_ram(unsigned long pagenr);
+extern int devmem_is_allowed(unsigned long pagenr);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c3b52bc..1b0ab39 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -45,6 +45,9 @@ extern unsigned long end_pfn;
 void clear_page(void *);
 void copy_page(void *, void *);
 
+extern int page_is_ram(unsigned long pagenr);
+extern int devmem_is_allowed(unsigned long pagenr);
+
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 

-- 
Kees Cook
Ubuntu Security Team




More information about the kernel-team mailing list