[daniel.lezcano at free.fr: [PATCH 1/1][V5] Add reboot_pid_ns to handle the reboot syscall]

Serge Hallyn serge.hallyn at canonical.com
Wed Jan 25 18:02:25 UTC 2012


Quoting Stefan Bader (stefan.bader at canonical.com):
> Finding some discussion between Stephane on #ubuntu-server this morning, it
> sounds like the patch currently is on the review stack of Andrew Morton who has
> other things to complete before looking at that. So I think we should go and get
> the patch into alpha2 without waiting for upstream.
> 
> I backported the patch to apply to current precise. It needed some small
> conflict resolution due to

Thanks, this looks good.

-serge

> commit 0499680a42141d86417a8fbaa8c8db806bea1201
> Author: Vasiliy Kulikov <segooon at gmail.com>
> Date:   Tue Jan 10 15:11:31 2012 -0800
> 
>     procfs: add hidepid= and gid= mount options
> 
> (which is in v3.3-rc1) being not in Precise. Stephane, Serge, just to be sure:
> There is not anything else you really need in the kernel but have not yet told
> us? ;)
> 
> Anyway, attached the updated patch against Precise master-next.
> 
> -Stefan
> 

> From ad93badd269dd583f67429f1204312f43ae5638a Mon Sep 17 00:00:00 2001
> From: Daniel Lezcano <daniel.lezcano at free.fr>
> Date: Thu, 5 Jan 2012 10:06:50 +0100
> Subject: [PATCH] UBUNTU: SAUCE: Add reboot_pid_ns to handle the reboot
>  syscall
> 
> In the case of a child pid namespace, rebooting the system does not
> really makes sense. When the pid namespace is used in conjunction
> with the other namespaces in order to create a linux container, the
> reboot syscall leads to some problems.
> 
> A container can reboot the host. That can be fixed by dropping
> the sys_reboot capability but we are unable to correctly to poweroff/
> halt/reboot a container and the container stays stuck at the shutdown
> time with the container's init process waiting indefinitively.
> 
> After several attempts, no solution from userspace was found to reliabily
> handle the shutdown from a container.
> 
> This patch propose to make the init process of the child pid namespace to
> exit with a signal status set to : SIGINT if the child pid namespace called
> "halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
> When the reboot syscall is called and we are not in the initial
> pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
> and "RESTART2". Otherwise we return EINVAL.
> 
> Returning EINVAL is also an easy way to check if this feature is supported
> by the kernel when invoking another 'reboot' option like CAD.
> 
> By this way the parent process of the child pid namespace knows if
> it rebooted or not and can take the right decision.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano at free.fr>
> Acked-by: Serge Hallyn <serge.hallyn at canonical.com>
> Reviewed-by: Oleg Nesterov <oleg at redhat.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/914676
> 
> (backported from https://lkml.org/lkml/2012/1/5/58)
> Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
> ---
>  include/linux/pid_namespace.h |    8 +++++++-
>  kernel/pid_namespace.c        |   33 +++++++++++++++++++++++++++++++++
>  kernel/sys.c                  |    8 ++++++++
>  3 files changed, 48 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index 38d1032..5f761d6 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -30,6 +30,7 @@ struct pid_namespace {
>  #ifdef CONFIG_BSD_PROCESS_ACCT
>  	struct bsd_acct_struct *bacct;
>  #endif
> +	int reboot;
>  };
>  
>  extern struct pid_namespace init_pid_ns;
> @@ -45,6 +46,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
>  extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
>  extern void free_pid_ns(struct kref *kref);
>  extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
> +extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
>  
>  static inline void put_pid_ns(struct pid_namespace *ns)
>  {
> @@ -72,11 +74,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
>  {
>  }
>  
> -
>  static inline void zap_pid_ns_processes(struct pid_namespace *ns)
>  {
>  	BUG();
>  }
> +
> +static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_PID_NS */
>  
>  extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index e9c9adc..ac84fd3 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -15,6 +15,7 @@
>  #include <linux/acct.h>
>  #include <linux/slab.h>
>  #include <linux/proc_fs.h>
> +#include <linux/reboot.h>
>  
>  #define BITS_PER_PAGE		(PAGE_SIZE*8)
>  
> @@ -187,10 +188,42 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
>  		rc = sys_wait4(-1, NULL, __WALL, NULL);
>  	} while (rc != -ECHILD);
>  
> +	if (pid_ns->reboot)
> +		current->signal->group_exit_code = pid_ns->reboot;
> +
>  	acct_exit_ns(pid_ns);
>  	return;
>  }
>  
> +int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	if (pid_ns == &init_pid_ns)
> +		return 0;
> +
> +	switch(cmd) {
> +	case LINUX_REBOOT_CMD_RESTART2:
> +	case LINUX_REBOOT_CMD_RESTART:
> +		pid_ns->reboot = SIGHUP;
> +		break;
> +
> +	case LINUX_REBOOT_CMD_POWER_OFF:
> +	case LINUX_REBOOT_CMD_HALT:
> +		pid_ns->reboot = SIGINT;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	read_lock(&tasklist_lock);
> +	force_sig(SIGKILL, pid_ns->child_reaper);
> +	read_unlock(&tasklist_lock);
> +
> +	do_exit(0);
> +
> +	/* Not reached */
> +	return 0;
> +}
> +
>  static __init int pid_namespaces_init(void)
>  {
>  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
> diff --git a/kernel/sys.c b/kernel/sys.c
> index e7aa654..3d2b2d0 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -444,6 +444,14 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
>  	                magic2 != LINUX_REBOOT_MAGIC2C))
>  		return -EINVAL;
>  
> +	/* In case the pid namespaces are enabled, the current task is in a
> +	 * child pid_namespace and the command is handled by 'reboot_pid_ns',
> +	 * this one will invoke 'do_exit'.
> +	 */
> +	ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
> +	if (ret)
> +		return ret;
> +
>  	/* Instead of trying to make the power_off code look like
>  	 * halt when pm_power_off is not set do it the easy way.
>  	 */
> -- 
> 1.7.5.4
> 





More information about the kernel-team mailing list