[PATCH RFC 2/2] seccomp: implement SECCOMP_FILTER_FLAG_NO_INHERITANCE

Dmitry V. Levin ldv at altlinux.org
Mon Nov 11 20:44:50 UTC 2019


On Mon, Nov 11, 2019 at 04:20:20PM +0100, Paul Chaignon wrote:
> Userspace ptracer can use seccomp's SECCOMP_RET_TRACE action to stop the
> tracee only at syscalls of interest.  Strace v5.3, for example, supports
> this with the --seccomp-bpf option.  However, since seccomp filters are
> inherited by children tasks, this behavior forces the userspace ptracer to
> trace children tasks as well.
> 
> This patch adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER to
> prevent children tasks of the process from inheriting the filter.  It
> allows ptracers to use seccomp's SECCOMP_RET_TRACE even when not following
> children tasks.
> 
> Signed-off-by: Paul Chaignon <paul.chaignon at gmail.com>
> ---
>  include/linux/seccomp.h                       |  8 +++-
>  include/uapi/linux/seccomp.h                  |  1 +
>  kernel/fork.c                                 |  5 +++
>  kernel/seccomp.c                              | 14 ++++++
>  tools/testing/selftests/seccomp/seccomp_bpf.c | 44 ++++++++++++++++++-
>  5 files changed, 70 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index 84868d37b35d..85e2f92a7221 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -7,7 +7,8 @@
>  #define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC | \
>  					 SECCOMP_FILTER_FLAG_LOG | \
>  					 SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
> -					 SECCOMP_FILTER_FLAG_NEW_LISTENER)
> +					 SECCOMP_FILTER_FLAG_NEW_LISTENER | \
> +					 SECCOMP_FILTER_FLAG_NO_INHERIT)
>  
>  #ifdef CONFIG_SECCOMP
>  
> @@ -83,6 +84,7 @@ static inline int seccomp_mode(struct seccomp *s)
>  #ifdef CONFIG_SECCOMP_FILTER
>  extern void put_seccomp_filter(struct task_struct *tsk);
>  extern void get_seccomp_filter(struct task_struct *tsk);
> +extern bool inherited_seccomp_filter(struct task_struct *tsk);
>  #else  /* CONFIG_SECCOMP_FILTER */
>  static inline void put_seccomp_filter(struct task_struct *tsk)
>  {
> @@ -92,6 +94,10 @@ static inline void get_seccomp_filter(struct task_struct *tsk)
>  {
>  	return;
>  }
> +static inline bool inherited_seccomp_filter(struct task_struct *tsk)
> +{
> +	return true;
> +}
>  #endif /* CONFIG_SECCOMP_FILTER */
>  
>  #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index 90734aa5aa36..e7f715fb7b34 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -22,6 +22,7 @@
>  #define SECCOMP_FILTER_FLAG_LOG			(1UL << 1)
>  #define SECCOMP_FILTER_FLAG_SPEC_ALLOW		(1UL << 2)
>  #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
> +#define SECCOMP_FILTER_FLAG_NO_INHERIT		(1UL << 4)

NO_INHERIT is too generic, it doesn't clearly say that we want to skip
fork inheritance.

>  /*
>   * All BPF programs must return a 32-bit value.
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 55af6931c6ec..1df5b058b067 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1605,6 +1605,11 @@ static void copy_seccomp(struct task_struct *p)
>  	 */
>  	assert_spin_locked(&current->sighand->siglock);
>  
> +	if (!inherited_seccomp_filter(current)) {
> +		clear_tsk_thread_flag(p, TIF_SECCOMP);
> +		return;
> +	}

Since the task can have many seccomp filters (chained by seccomp_filter.prev)
and SECCOMP_FILTER_FLAG_NO_INHERIT flag affects only those filters that were
created using this flag, this code should copy only those filters that are
excluded from the fork inheritance.

> +
>  	/* Ref-count the new filter user, and assign it. */
>  	get_seccomp_filter(current);
>  	p->seccomp = current->seccomp;
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index dba52a7db5e8..c79b3d8ecc34 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -110,6 +110,7 @@ struct notification {
>   *         outside of a lifetime-guarded section.  In general, this
>   *         is only needed for handling filters shared across tasks.
>   * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
> + * @dont_inherit: true if children tasks shouldn't inherit filters.
>   * @prev: points to a previously installed, or inherited, filter
>   * @prog: the BPF program to evaluate
>   * @notif: the struct that holds all notification related information
> @@ -128,6 +129,7 @@ struct notification {
>  struct seccomp_filter {
>  	refcount_t usage;
>  	bool log;
> +	bool dont_inherit;
>  	struct seccomp_filter *prev;
>  	struct bpf_prog *prog;
>  	struct notification *notif;
> @@ -535,6 +537,10 @@ static long seccomp_attach_filter(unsigned int flags,
>  	if (flags & SECCOMP_FILTER_FLAG_LOG)
>  		filter->log = true;
>  
> +	/* Set inherited flag, if present. */
> +	if (flags & SECCOMP_FILTER_FLAG_NO_INHERIT)
> +		filter->dont_inherit = true;
> +
>  	/*
>  	 * If there is an existing filter, make it the prev and don't drop its
>  	 * task reference.
> @@ -587,6 +593,14 @@ void put_seccomp_filter(struct task_struct *tsk)
>  	__put_seccomp_filter(tsk->seccomp.filter);
>  }
>  
> +bool inherited_seccomp_filter(struct task_struct *tsk)
> +{
> +	struct seccomp_filter *orig = tsk->seccomp.filter;
> +	if (!orig)
> +		return false;
> +	return !orig->dont_inherit;
> +}

You cannot say that all seccomp filters of the task are inherited or not
because some of them can be excluded from the fork inheritance.

> +
>  static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
>  {
>  	clear_siginfo(info);
> diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
> index 7f8b5c8982e3..0b82437c2c0b 100644
> --- a/tools/testing/selftests/seccomp/seccomp_bpf.c
> +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
> @@ -199,6 +199,10 @@ struct seccomp_notif_sizes {
>  };
>  #endif
>  
> +#ifndef SECCOMP_FILTER_FLAG_NO_INHERIT
> +#define SECCOMP_FILTER_FLAG_NO_INHERIT (1UL << 4)
> +#endif
> +
>  #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
>  #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
>  #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
> @@ -778,6 +782,43 @@ TEST(KILL_process)
>  	ASSERT_EQ(SIGSYS, WTERMSIG(status));
>  }
>  
> +TEST(no_inheritance)
> +{
> +	pid_t pid;
> +	int status;
> +	struct sock_filter filter[] = {
> +		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
> +			offsetof(struct seccomp_data, nr)),
> +		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
> +		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
> +		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
> +	};
> +	struct sock_fprog prog = {
> +		.len = (unsigned short)ARRAY_SIZE(filter),
> +		.filter = filter,
> +	};
> +
> +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
> +		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
> +	}
> +
> +	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
> +			     SECCOMP_FILTER_FLAG_NO_INHERIT, &prog));
> +
> +	pid = fork();
> +	ASSERT_GE(pid, 0);
> +	if (pid == 0) {
> +		_exit(getpid() < 0);
> +	}
> +
> +	EXPECT_EQ(waitpid(pid, &status, 0), pid);
> +	EXPECT_EQ(true, WIFEXITED(status));
> +	EXPECT_EQ(0, WEXITSTATUS(status));
> +
> +	pid = getpid();
> +	EXPECT_EQ(-E2BIG, pid);
> +}

The test should check whether a mix of inherited and excluded filters
is inherited properly.


-- 
ldv
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20191111/c451d6a0/attachment.bin>


More information about the Strace-devel mailing list