[PATCH v3 5/6] ptrace: introduce PTRACE_SET_SYSCALL_INFO request
Dmitry V. Levin
ldv at strace.io
Tue Jan 28 09:16:50 UTC 2025
PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements
PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of
system calls the tracee is blocked in.
This API allows ptracers to obtain and modify system call details
in a straightforward and architecture-agnostic way.
Current implementation supports changing only those bits of system call
information that are used by strace, namely, syscall number, syscall
arguments, and syscall return value.
Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO,
such as instruction pointer and stack pointer, could be added later if
needed, by using struct ptrace_syscall_info.flags to specify the additional
details that should be set. Currently, "flags", "reserved", and
"seccomp.reserved2" fields of struct ptrace_syscall_info must be
initialized with zeroes; "arch", "instruction_pointer", and "stack_pointer"
fields are ignored.
PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY,
PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations.
Other operations could be added later if needed.
Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with
PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that
convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure
to provide an API of changing the first system call argument on riscv
architecture.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
...
PTRACE_SET_SYSCALL_INFO
Modify information about the system call that caused the stop.
The "data" argument is a pointer to struct ptrace_syscall_info
that specifies the system call information to be set.
The "addr" argument should be set to sizeof(struct ptrace_syscall_info)).
Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Signed-off-by: Dmitry V. Levin <ldv at strace.io>
Reviewed-by: Alexey Gladkov <legion at kernel.org>
Reviewed-by: Charlie Jenkins <charlie at rivosinc.com>
Tested-by: Charlie Jenkins <charlie at rivosinc.com>
---
include/uapi/linux/ptrace.h | 7 ++-
kernel/ptrace.c | 121 +++++++++++++++++++++++++++++++++++-
2 files changed, 126 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 72c038fc71d0..5f8ef6156752 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -74,6 +74,7 @@ struct seccomp_metadata {
};
#define PTRACE_GET_SYSCALL_INFO 0x420e
+#define PTRACE_SET_SYSCALL_INFO 0x4212
#define PTRACE_SYSCALL_INFO_NONE 0
#define PTRACE_SYSCALL_INFO_ENTRY 1
#define PTRACE_SYSCALL_INFO_EXIT 2
@@ -81,7 +82,8 @@ struct seccomp_metadata {
struct ptrace_syscall_info {
__u8 op; /* PTRACE_SYSCALL_INFO_* */
- __u8 pad[3];
+ __u8 reserved;
+ __u16 flags;
__u32 arch;
__u64 instruction_pointer;
__u64 stack_pointer;
@@ -98,6 +100,7 @@ struct ptrace_syscall_info {
__u64 nr;
__u64 args[6];
__u32 ret_data;
+ __u32 reserved2;
} seccomp;
};
};
@@ -142,6 +145,8 @@ struct ptrace_sud_config {
__u64 len;
};
+/* 0x4212 is PTRACE_SET_SYSCALL_INFO */
+
/*
* These values are stored in task->ptrace_message
* by ptrace_stop to describe the current syscall-stop.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 22e7d74cf4cd..b9c1949186bf 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -944,7 +944,10 @@ ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
ptrace_get_syscall_info_entry(child, regs, info);
info->seccomp.ret_data = child->ptrace_message;
- /* ret_data is the last field in struct ptrace_syscall_info.seccomp */
+ /*
+ * ret_data is the last non-reserved field
+ * in struct ptrace_syscall_info.seccomp
+ */
return offsetofend(struct ptrace_syscall_info, seccomp.ret_data);
}
@@ -1016,6 +1019,118 @@ ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size,
write_size = min(actual_size, user_size);
return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size;
}
+
+static int
+ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs,
+ struct ptrace_syscall_info *info)
+{
+ unsigned long args[ARRAY_SIZE(info->entry.args)];
+ int nr = info->entry.nr;
+ int i;
+
+ /*
+ * Check that the syscall number specified in info->entry.nr
+ * is either a value of type "int" or a sign-extended value
+ * of type "int".
+ */
+ if (nr != info->entry.nr)
+ return -ERANGE;
+
+ for (i = 0; i < ARRAY_SIZE(args); i++) {
+ args[i] = info->entry.args[i];
+ /*
+ * Check that the syscall argument specified in
+ * info->entry.args[i] is either a value of type
+ * "unsigned long" or a sign-extended value of type "long".
+ */
+ if (args[i] != info->entry.args[i])
+ return -ERANGE;
+ }
+
+ syscall_set_nr(child, regs, nr);
+ /*
+ * If the syscall number is set to -1, setting syscall arguments is not
+ * just pointless, it would also clobber the syscall return value on
+ * those architectures that share the same register both for the first
+ * argument of syscall and its return value.
+ */
+ if (nr != -1)
+ syscall_set_arguments(child, regs, args);
+
+ return 0;
+}
+
+static int
+ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
+ struct ptrace_syscall_info *info)
+{
+ /*
+ * info->entry is currently a subset of info->seccomp,
+ * info->seccomp.ret_data is currently ignored.
+ */
+ return ptrace_set_syscall_info_entry(child, regs, info);
+}
+
+static int
+ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs,
+ struct ptrace_syscall_info *info)
+{
+ long rval = info->exit.rval;
+
+ /*
+ * Check that the return value specified in info->exit.rval
+ * is either a value of type "long" or a sign-extended value
+ * of type "long".
+ */
+ if (rval != info->exit.rval)
+ return -ERANGE;
+
+ if (info->exit.is_error)
+ syscall_set_return_value(child, regs, rval, 0);
+ else
+ syscall_set_return_value(child, regs, 0, rval);
+
+ return 0;
+}
+
+static int
+ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
+ const void __user *datavp)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+ struct ptrace_syscall_info info;
+
+ if (user_size < sizeof(info))
+ return -EINVAL;
+
+ /*
+ * The compatibility is tracked by info.op and info.flags: if user-space
+ * does not instruct us to use unknown extra bits from future versions
+ * of ptrace_syscall_info, we are not going to read them either.
+ */
+ if (copy_from_user(&info, datavp, sizeof(info)))
+ return -EFAULT;
+
+ /* Reserved for future use. */
+ if (info.flags || info.reserved || info.seccomp.reserved2)
+ return -EINVAL;
+
+ /* Changing the type of the system call stop is not supported yet. */
+ if (ptrace_get_syscall_info_op(child) != info.op)
+ return -EINVAL;
+
+ switch (info.op) {
+ case PTRACE_SYSCALL_INFO_ENTRY:
+ return ptrace_set_syscall_info_entry(child, regs, &info);
+ case PTRACE_SYSCALL_INFO_EXIT:
+ return ptrace_set_syscall_info_exit(child, regs, &info);
+ case PTRACE_SYSCALL_INFO_SECCOMP:
+ return ptrace_set_syscall_info_seccomp(child, regs, &info);
+ default:
+ /* Other types of system call stops are not supported yet. */
+ return -EINVAL;
+ }
+}
#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
int ptrace_request(struct task_struct *child, long request,
@@ -1234,6 +1349,10 @@ int ptrace_request(struct task_struct *child, long request,
case PTRACE_GET_SYSCALL_INFO:
ret = ptrace_get_syscall_info(child, addr, datavp);
break;
+
+ case PTRACE_SET_SYSCALL_INFO:
+ ret = ptrace_set_syscall_info(child, addr, datavp);
+ break;
#endif
case PTRACE_SECCOMP_GET_FILTER:
--
ldv
More information about the Strace-devel
mailing list