[PATCH v2 6/7] ptrace: introduce PTRACE_SET_SYSCALL_INFO request

Charlie Jenkins charlie at rivosinc.com
Thu Jan 16 01:55:31 UTC 2025


On Mon, Jan 13, 2025 at 07:12:08PM +0200, Dmitry V. Levin wrote:
> PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements
> PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of
> system calls the tracee is blocked in.
> 
> This API allows ptracers to obtain and modify system call details
> in a straightforward and architecture-agnostic way.
> 
> Current implementation supports changing only those bits of system call
> information that are used by strace, namely, syscall number, syscall
> arguments, and syscall return value.
> 
> Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO,
> such as instruction pointer and stack pointer, could be added later
> if needed, by using struct ptrace_syscall_info.flags to specify
> the additional details that should be set.  Currently, flags and reserved
> fields of struct ptrace_syscall_info must be initialized with zeroes;
> arch, instruction_pointer, and stack_pointer fields are ignored.
> 
> PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY,
> PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations.
> Other operations could be added later if needed.
> 
> Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with
> PTRACE_GET_SYSCALL_INFO, but it didn't happen.  The last straw that
> convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure
> to provide an API of changing the first system call argument on riscv
> architecture.
> 
> ptrace(2) man page:
> 
> long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
> ...
> PTRACE_SET_SYSCALL_INFO
>        Modify information about the system call that caused the stop.
>        The "data" argument is a pointer to struct ptrace_syscall_info
>        that specifies the system call information to be set.
>        The "addr" argument should be set to sizeof(struct ptrace_syscall_info)).
> 
> Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
> Signed-off-by: Dmitry V. Levin <ldv at strace.io>
> ---
>  include/linux/ptrace.h      |  3 ++
>  include/uapi/linux/ptrace.h |  4 +-
>  kernel/ptrace.c             | 95 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 101 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
> index 90507d4afcd6..c8dbf1e498bf 100644
> --- a/include/linux/ptrace.h
> +++ b/include/linux/ptrace.h
> @@ -17,6 +17,9 @@ struct syscall_info {
>  	struct seccomp_data	data;
>  };
>  
> +/* sizeof() the first published struct ptrace_syscall_info */
> +#define PTRACE_SYSCALL_INFO_SIZE_VER0	84
> +
>  extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
>  			    void *buf, int len, unsigned int gup_flags);
>  
> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
> index 72c038fc71d0..ca75b3ab5d22 100644
> --- a/include/uapi/linux/ptrace.h
> +++ b/include/uapi/linux/ptrace.h
> @@ -74,6 +74,7 @@ struct seccomp_metadata {
>  };
>  
>  #define PTRACE_GET_SYSCALL_INFO		0x420e
> +#define PTRACE_SET_SYSCALL_INFO		0x4212
>  #define PTRACE_SYSCALL_INFO_NONE	0
>  #define PTRACE_SYSCALL_INFO_ENTRY	1
>  #define PTRACE_SYSCALL_INFO_EXIT	2
> @@ -81,7 +82,8 @@ struct seccomp_metadata {
>  
>  struct ptrace_syscall_info {
>  	__u8 op;	/* PTRACE_SYSCALL_INFO_* */
> -	__u8 pad[3];
> +	__u8 reserved;
> +	__u16 flags;
>  	__u32 arch;
>  	__u64 instruction_pointer;
>  	__u64 stack_pointer;
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 22e7d74cf4cd..41d37cb8f74a 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -1016,6 +1016,97 @@ ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size,
>  	write_size = min(actual_size, user_size);
>  	return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size;
>  }
> +
> +static unsigned long
> +ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs,
> +			      struct ptrace_syscall_info *info)
> +{
> +	unsigned long args[ARRAY_SIZE(info->entry.args)];
> +	int nr = info->entry.nr;
> +	int i;
> +
> +	if (nr != info->entry.nr)
> +		return -ERANGE;
> +
> +	for (i = 0; i < ARRAY_SIZE(args); i++) {
> +		args[i] = info->entry.args[i];
> +		if (args[i] != info->entry.args[i])
> +			return -ERANGE;
> +	}
> +
> +	syscall_set_nr(child, regs, nr);
> +	/*
> +	 * If the syscall number is set to -1, setting syscall arguments is not
> +	 * just pointless, it would also clobber the syscall return value on
> +	 * those architectures that share the same register both for the first
> +	 * argument of syscall and its return value.
> +	 */
> +	if (nr != -1)
> +		syscall_set_arguments(child, regs, args);
> +
> +	return 0;
> +}
> +
> +static unsigned long
> +ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
> +				struct ptrace_syscall_info *info)
> +{
> +	/*
> +	 * info->entry is currently a subset of info->seccomp,
> +	 * info->seccomp.ret_data is currently ignored.
> +	 */
> +	return ptrace_set_syscall_info_entry(child, regs, info);
> +}
> +
> +static unsigned long
> +ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs,
> +			     struct ptrace_syscall_info *info)
> +{
> +	if (info->exit.is_error)
> +		syscall_set_return_value(child, regs, info->exit.rval, 0);
> +	else
> +		syscall_set_return_value(child, regs, 0, info->exit.rval);
> +
> +	return 0;
> +}
> +
> +static int
> +ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
> +			void __user *datavp)
> +{
> +	struct pt_regs *regs = task_pt_regs(child);
> +	struct ptrace_syscall_info info;
> +	int error;
> +
> +	BUILD_BUG_ON(sizeof(struct ptrace_syscall_info) < PTRACE_SYSCALL_INFO_SIZE_VER0);
> +
> +	if (user_size < PTRACE_SYSCALL_INFO_SIZE_VER0 || user_size > PAGE_SIZE)
> +		return -EINVAL;
> +
> +	error = copy_struct_from_user(&info, sizeof(info), datavp, user_size);
> +	if (error)
> +		return error;
> +
> +	/* Reserved for future use. */
> +	if (info.flags || info.reserved)
> +		return -EINVAL;
> +
> +	/* Changing the type of the system call stop is not supported. */
> +	if (ptrace_get_syscall_info_op(child) != info.op)

Since this isn't supported anyway, would it make sense to set the
info.op to ptrace_get_syscall_info_op(child) like is done for
get_syscall_info? The usecase I see for this is simplifying when the
user doesn't call PTRACE_GET_SYSCALL_INFO before calling
PTRACE_SET_SYSCALL_INFO.

- Charlie

> +		return -EINVAL;
> +
> +	switch (info.op) {
> +	case PTRACE_SYSCALL_INFO_ENTRY:
> +		return ptrace_set_syscall_info_entry(child, regs, &info);
> +	case PTRACE_SYSCALL_INFO_EXIT:
> +		return ptrace_set_syscall_info_exit(child, regs, &info);
> +	case PTRACE_SYSCALL_INFO_SECCOMP:
> +		return ptrace_set_syscall_info_seccomp(child, regs, &info);
> +	default:
> +		/* Other types of system call stops are not supported. */
> +		return -EINVAL;
> +	}
> +}
>  #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
>  
>  int ptrace_request(struct task_struct *child, long request,
> @@ -1234,6 +1325,10 @@ int ptrace_request(struct task_struct *child, long request,
>  	case PTRACE_GET_SYSCALL_INFO:
>  		ret = ptrace_get_syscall_info(child, addr, datavp);
>  		break;
> +
> +	case PTRACE_SET_SYSCALL_INFO:
> +		ret = ptrace_set_syscall_info(child, addr, datavp);
> +		break;
>  #endif
>  
>  	case PTRACE_SECCOMP_GET_FILTER:
> -- 
> ldv


More information about the Strace-devel mailing list