[PATCH RFC 2/2] seccomp: implement SECCOMP_FILTER_FLAG_NO_INHERITANCE
Dmitry V. Levin
ldv at altlinux.org
Mon Nov 11 20:44:50 UTC 2019
On Mon, Nov 11, 2019 at 04:20:20PM +0100, Paul Chaignon wrote:
> Userspace ptracer can use seccomp's SECCOMP_RET_TRACE action to stop the
> tracee only at syscalls of interest. Strace v5.3, for example, supports
> this with the --seccomp-bpf option. However, since seccomp filters are
> inherited by children tasks, this behavior forces the userspace ptracer to
> trace children tasks as well.
>
> This patch adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER to
> prevent children tasks of the process from inheriting the filter. It
> allows ptracers to use seccomp's SECCOMP_RET_TRACE even when not following
> children tasks.
>
> Signed-off-by: Paul Chaignon <paul.chaignon at gmail.com>
> ---
> include/linux/seccomp.h | 8 +++-
> include/uapi/linux/seccomp.h | 1 +
> kernel/fork.c | 5 +++
> kernel/seccomp.c | 14 ++++++
> tools/testing/selftests/seccomp/seccomp_bpf.c | 44 ++++++++++++++++++-
> 5 files changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index 84868d37b35d..85e2f92a7221 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -7,7 +7,8 @@
> #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
> SECCOMP_FILTER_FLAG_LOG | \
> SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
> - SECCOMP_FILTER_FLAG_NEW_LISTENER)
> + SECCOMP_FILTER_FLAG_NEW_LISTENER | \
> + SECCOMP_FILTER_FLAG_NO_INHERIT)
>
> #ifdef CONFIG_SECCOMP
>
> @@ -83,6 +84,7 @@ static inline int seccomp_mode(struct seccomp *s)
> #ifdef CONFIG_SECCOMP_FILTER
> extern void put_seccomp_filter(struct task_struct *tsk);
> extern void get_seccomp_filter(struct task_struct *tsk);
> +extern bool inherited_seccomp_filter(struct task_struct *tsk);
> #else /* CONFIG_SECCOMP_FILTER */
> static inline void put_seccomp_filter(struct task_struct *tsk)
> {
> @@ -92,6 +94,10 @@ static inline void get_seccomp_filter(struct task_struct *tsk)
> {
> return;
> }
> +static inline bool inherited_seccomp_filter(struct task_struct *tsk)
> +{
> + return true;
> +}
> #endif /* CONFIG_SECCOMP_FILTER */
>
> #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index 90734aa5aa36..e7f715fb7b34 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -22,6 +22,7 @@
> #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
> #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
> #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
> +#define SECCOMP_FILTER_FLAG_NO_INHERIT (1UL << 4)
NO_INHERIT is too generic, it doesn't clearly say that we want to skip
fork inheritance.
> /*
> * All BPF programs must return a 32-bit value.
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 55af6931c6ec..1df5b058b067 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1605,6 +1605,11 @@ static void copy_seccomp(struct task_struct *p)
> */
> assert_spin_locked(¤t->sighand->siglock);
>
> + if (!inherited_seccomp_filter(current)) {
> + clear_tsk_thread_flag(p, TIF_SECCOMP);
> + return;
> + }
Since the task can have many seccomp filters (chained by seccomp_filter.prev)
and SECCOMP_FILTER_FLAG_NO_INHERIT flag affects only those filters that were
created using this flag, this code should copy only those filters that are
excluded from the fork inheritance.
> +
> /* Ref-count the new filter user, and assign it. */
> get_seccomp_filter(current);
> p->seccomp = current->seccomp;
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index dba52a7db5e8..c79b3d8ecc34 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -110,6 +110,7 @@ struct notification {
> * outside of a lifetime-guarded section. In general, this
> * is only needed for handling filters shared across tasks.
> * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
> + * @dont_inherit: true if children tasks shouldn't inherit filters.
> * @prev: points to a previously installed, or inherited, filter
> * @prog: the BPF program to evaluate
> * @notif: the struct that holds all notification related information
> @@ -128,6 +129,7 @@ struct notification {
> struct seccomp_filter {
> refcount_t usage;
> bool log;
> + bool dont_inherit;
> struct seccomp_filter *prev;
> struct bpf_prog *prog;
> struct notification *notif;
> @@ -535,6 +537,10 @@ static long seccomp_attach_filter(unsigned int flags,
> if (flags & SECCOMP_FILTER_FLAG_LOG)
> filter->log = true;
>
> + /* Set inherited flag, if present. */
> + if (flags & SECCOMP_FILTER_FLAG_NO_INHERIT)
> + filter->dont_inherit = true;
> +
> /*
> * If there is an existing filter, make it the prev and don't drop its
> * task reference.
> @@ -587,6 +593,14 @@ void put_seccomp_filter(struct task_struct *tsk)
> __put_seccomp_filter(tsk->seccomp.filter);
> }
>
> +bool inherited_seccomp_filter(struct task_struct *tsk)
> +{
> + struct seccomp_filter *orig = tsk->seccomp.filter;
> + if (!orig)
> + return false;
> + return !orig->dont_inherit;
> +}
You cannot say that all seccomp filters of the task are inherited or not
because some of them can be excluded from the fork inheritance.
> +
> static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
> {
> clear_siginfo(info);
> diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
> index 7f8b5c8982e3..0b82437c2c0b 100644
> --- a/tools/testing/selftests/seccomp/seccomp_bpf.c
> +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
> @@ -199,6 +199,10 @@ struct seccomp_notif_sizes {
> };
> #endif
>
> +#ifndef SECCOMP_FILTER_FLAG_NO_INHERIT
> +#define SECCOMP_FILTER_FLAG_NO_INHERIT (1UL << 4)
> +#endif
> +
> #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
> #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
> #define PTRACE_EVENTMSG_SYSCALL_EXIT 2
> @@ -778,6 +782,43 @@ TEST(KILL_process)
> ASSERT_EQ(SIGSYS, WTERMSIG(status));
> }
>
> +TEST(no_inheritance)
> +{
> + pid_t pid;
> + int status;
> + struct sock_filter filter[] = {
> + BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
> + offsetof(struct seccomp_data, nr)),
> + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
> + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
> + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
> + };
> + struct sock_fprog prog = {
> + .len = (unsigned short)ARRAY_SIZE(filter),
> + .filter = filter,
> + };
> +
> + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
> + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
> + }
> +
> + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
> + SECCOMP_FILTER_FLAG_NO_INHERIT, &prog));
> +
> + pid = fork();
> + ASSERT_GE(pid, 0);
> + if (pid == 0) {
> + _exit(getpid() < 0);
> + }
> +
> + EXPECT_EQ(waitpid(pid, &status, 0), pid);
> + EXPECT_EQ(true, WIFEXITED(status));
> + EXPECT_EQ(0, WEXITSTATUS(status));
> +
> + pid = getpid();
> + EXPECT_EQ(-E2BIG, pid);
> +}
The test should check whether a mix of inherited and excluded filters
is inherited properly.
--
ldv
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20191111/c451d6a0/attachment.bin>
More information about the Strace-devel
mailing list