[PATCH 1/5] Duplicate get_scno into identical get_scno_on_sysenter, get_scno_on_sysexit
Denys Vlasenko
dvlasenk at redhat.com
Mon Aug 22 10:24:04 UTC 2011
On Mon, 2011-08-22 at 12:21 +0200, Denys Vlasenko wrote:
> 1. Duplicate get_scno into identical get_scno_on_sysenter,
> get_scno_on_sysexit functions. Call them in syscall enter and syscall
> exit, correspondingly.
diff -d -urpN strace.0/defs.h strace.1/defs.h
--- strace.0/defs.h 2011-08-21 18:10:51.000000000 +0200
+++ strace.1/defs.h 2011-08-22 03:18:10.723934620 +0200
@@ -586,7 +586,7 @@ extern void droptcb(struct tcb *);
extern void set_sortby(const char *);
extern void set_overhead(int);
extern void qualify(const char *);
-extern int get_scno(struct tcb *);
+extern int get_scno_on_sysenter(struct tcb *);
extern long known_scno(struct tcb *);
extern long do_ptrace(int request, struct tcb *tcp, void *addr, void *data);
extern int ptrace_restart(int request, struct tcb *tcp, int sig);
diff -d -urpN strace.0/strace.c strace.1/strace.c
--- strace.0/strace.c 2011-08-22 01:57:07.000000000 +0200
+++ strace.1/strace.c 2011-08-22 03:17:36.822952537 +0200
@@ -1488,7 +1488,7 @@ proc_open(struct tcb *tcp, int attaching
}
if (tcp->status.PR_WHY == PR_SYSENTRY) {
tcp->flags &= ~TCB_INSYSCALL;
- get_scno(tcp);
+ get_scno_on_sysenter(tcp);
if (known_scno(tcp) == SYS_execve)
break;
}
diff -d -urpN strace.0/syscall.c strace.1/syscall.c
--- strace.0/syscall.c 2011-08-22 11:56:27.982265607 +0200
+++ strace.1/syscall.c 2011-08-22 03:37:20.155534144 +0200
@@ -750,7 +750,7 @@ struct reg regs; /* TODO: make static? *
* ("????" etc) and bail out.
*/
int
-get_scno(struct tcb *tcp)
+get_scno_on_sysenter(struct tcb *tcp)
{
long scno = 0;
@@ -1357,6 +1357,619 @@ get_scno(struct tcb *tcp)
return 1;
}
+/* Returns:
+ * 0: "ignore this ptrace stop", bail out of trace_syscall() silently.
+ * 1: ok, continue in trace_syscall().
+ * other: error, trace_syscall() should print error indicator
+ * ("????" etc) and bail out.
+ */
+static int
+get_scno_on_sysexit(struct tcb *tcp)
+{
+ long scno = 0;
+
+#ifdef LINUX
+# if defined(S390) || defined(S390X)
+ if (tcp->flags & TCB_WAITEXECVE) {
+ /*
+ * When the execve system call completes successfully, the
+ * new process still has -ENOSYS (old style) or __NR_execve
+ * (new style) in gpr2. We cannot recover the scno again
+ * by disassembly, because the image that executed the
+ * syscall is gone now. Fortunately, we don't want it. We
+ * leave the flag set so that syscall_fixup can fake the
+ * result.
+ */
+ if (exiting(tcp))
+ return 1;
+ /*
+ * This is the post-execve SIGTRAP. We cannot try to read
+ * the system call here either.
+ */
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+
+ if (upeek(tcp, PT_GPR2, &syscall_mode) < 0)
+ return -1;
+
+ if (syscall_mode != -ENOSYS) {
+ /*
+ * Since kernel version 2.5.44 the scno gets passed in gpr2.
+ */
+ scno = syscall_mode;
+ } else {
+ /*
+ * Old style of "passing" the scno via the SVC instruction.
+ */
+
+ long opcode, offset_reg, tmp;
+ void * svc_addr;
+ static const int gpr_offset[16] = {
+ PT_GPR0, PT_GPR1, PT_ORIGGPR2, PT_GPR3,
+ PT_GPR4, PT_GPR5, PT_GPR6, PT_GPR7,
+ PT_GPR8, PT_GPR9, PT_GPR10, PT_GPR11,
+ PT_GPR12, PT_GPR13, PT_GPR14, PT_GPR15
+ };
+
+ if (upeek(tcp, PT_PSWADDR, &pc) < 0)
+ return -1;
+ errno = 0;
+ opcode = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)(pc-sizeof(long)), 0);
+ if (errno) {
+ perror("peektext(pc-oneword)");
+ return -1;
+ }
+
+ /*
+ * We have to check if the SVC got executed directly or via an
+ * EXECUTE instruction. In case of EXECUTE it is necessary to do
+ * instruction decoding to derive the system call number.
+ * Unfortunately the opcode sizes of EXECUTE and SVC are differently,
+ * so that this doesn't work if a SVC opcode is part of an EXECUTE
+ * opcode. Since there is no way to find out the opcode size this
+ * is the best we can do...
+ */
+
+ if ((opcode & 0xff00) == 0x0a00) {
+ /* SVC opcode */
+ scno = opcode & 0xff;
+ }
+ else {
+ /* SVC got executed by EXECUTE instruction */
+
+ /*
+ * Do instruction decoding of EXECUTE. If you really want to
+ * understand this, read the Principles of Operations.
+ */
+ svc_addr = (void *) (opcode & 0xfff);
+
+ tmp = 0;
+ offset_reg = (opcode & 0x000f0000) >> 16;
+ if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
+ return -1;
+ svc_addr += tmp;
+
+ tmp = 0;
+ offset_reg = (opcode & 0x0000f000) >> 12;
+ if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
+ return -1;
+ svc_addr += tmp;
+
+ scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, svc_addr, 0);
+ if (errno)
+ return -1;
+# if defined(S390X)
+ scno >>= 48;
+# else
+ scno >>= 16;
+# endif
+ tmp = 0;
+ offset_reg = (opcode & 0x00f00000) >> 20;
+ if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
+ return -1;
+
+ scno = (scno | tmp) & 0xff;
+ }
+ }
+# elif defined (POWERPC)
+ if (upeek(tcp, sizeof(unsigned long)*PT_R0, &scno) < 0)
+ return -1;
+ if (entering(tcp)) {
+ /* Check if this is the post-execve SIGTRAP. */
+ if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ }
+
+# ifdef POWERPC64
+ if (entering(tcp)) {
+ /* TODO: speed up strace by not doing this at every syscall.
+ * We only need to do it after execve.
+ */
+ int currpers;
+ long val;
+ int pid = tcp->pid;
+
+ /* Check for 64/32 bit mode. */
+ if (upeek(tcp, sizeof(unsigned long)*PT_MSR, &val) < 0)
+ return -1;
+ /* SF is bit 0 of MSR */
+ if (val < 0)
+ currpers = 0;
+ else
+ currpers = 1;
+ if (currpers != current_personality) {
+ static const char *const names[] = {"64 bit", "32 bit"};
+ set_personality(currpers);
+ fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
+ pid, names[current_personality]);
+ }
+ }
+# endif
+# elif defined(AVR32)
+ /*
+ * Read complete register set in one go.
+ */
+ if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, ®s) < 0)
+ return -1;
+
+ /*
+ * We only need to grab the syscall number on syscall entry.
+ */
+ if (entering(tcp)) {
+ scno = regs.r8;
+
+ /* Check if this is the post-execve SIGTRAP. */
+ if (tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ }
+# elif defined(BFIN)
+ if (upeek(tcp, PT_ORIG_P0, &scno))
+ return -1;
+# elif defined (I386)
+ if (upeek(tcp, 4*ORIG_EAX, &scno) < 0)
+ return -1;
+# elif defined (X86_64)
+ if (upeek(tcp, 8*ORIG_RAX, &scno) < 0)
+ return -1;
+
+ if (entering(tcp)) {
+ /* TODO: speed up strace by not doing this at every syscall.
+ * We only need to do it after execve.
+ */
+ int currpers;
+ long val;
+ int pid = tcp->pid;
+
+ /* Check CS register value. On x86-64 linux it is:
+ * 0x33 for long mode (64 bit)
+ * 0x23 for compatibility mode (32 bit)
+ * It takes only one ptrace and thus doesn't need
+ * to be cached.
+ */
+ if (upeek(tcp, 8*CS, &val) < 0)
+ return -1;
+ switch (val) {
+ case 0x23: currpers = 1; break;
+ case 0x33: currpers = 0; break;
+ default:
+ fprintf(stderr, "Unknown value CS=0x%02X while "
+ "detecting personality of process "
+ "PID=%d\n", (int)val, pid);
+ currpers = current_personality;
+ break;
+ }
+# if 0
+ /* This version analyzes the opcode of a syscall instruction.
+ * (int 0x80 on i386 vs. syscall on x86-64)
+ * It works, but is too complicated.
+ */
+ unsigned long val, rip, i;
+
+ if (upeek(tcp, 8*RIP, &rip) < 0)
+ perror("upeek(RIP)");
+
+ /* sizeof(syscall) == sizeof(int 0x80) == 2 */
+ rip -= 2;
+ errno = 0;
+
+ call = ptrace(PTRACE_PEEKTEXT, pid, (char *)rip, (char *)0);
+ if (errno)
+ fprintf(stderr, "ptrace_peektext failed: %s\n",
+ strerror(errno));
+ switch (call & 0xffff) {
+ /* x86-64: syscall = 0x0f 0x05 */
+ case 0x050f: currpers = 0; break;
+ /* i386: int 0x80 = 0xcd 0x80 */
+ case 0x80cd: currpers = 1; break;
+ default:
+ currpers = current_personality;
+ fprintf(stderr,
+ "Unknown syscall opcode (0x%04X) while "
+ "detecting personality of process "
+ "PID=%d\n", (int)call, pid);
+ break;
+ }
+# endif
+ if (currpers != current_personality) {
+ static const char *const names[] = {"64 bit", "32 bit"};
+ set_personality(currpers);
+ fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
+ pid, names[current_personality]);
+ }
+ }
+# elif defined(IA64)
+# define IA64_PSR_IS ((long)1 << 34)
+ if (upeek(tcp, PT_CR_IPSR, &psr) >= 0)
+ ia32 = (psr & IA64_PSR_IS) != 0;
+ if (entering(tcp)) {
+ if (ia32) {
+ if (upeek(tcp, PT_R1, &scno) < 0) /* orig eax */
+ return -1;
+ } else {
+ if (upeek(tcp, PT_R15, &scno) < 0)
+ return -1;
+ }
+ /* Check if this is the post-execve SIGTRAP. */
+ if (tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ } else {
+ /* Syscall exit */
+ if (upeek(tcp, PT_R8, &r8) < 0)
+ return -1;
+ if (upeek(tcp, PT_R10, &r10) < 0)
+ return -1;
+ }
+# elif defined (ARM)
+ /*
+ * Read complete register set in one go.
+ */
+ if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (void *)®s) == -1)
+ return -1;
+
+ /*
+ * We only need to grab the syscall number on syscall entry.
+ */
+ if (regs.ARM_ip == 0) {
+ if (entering(tcp)) {
+ /* Check if this is the post-execve SIGTRAP. */
+ if (tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ }
+
+ /*
+ * Note: we only deal with only 32-bit CPUs here.
+ */
+ if (regs.ARM_cpsr & 0x20) {
+ /*
+ * Get the Thumb-mode system call number
+ */
+ scno = regs.ARM_r7;
+ } else {
+ /*
+ * Get the ARM-mode system call number
+ */
+ errno = 0;
+ scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, (void *)(regs.ARM_pc - 4), NULL);
+ if (errno)
+ return -1;
+
+ /* FIXME: bogus check? it is already done on entering before,
+ * so we never can see it here?
+ */
+ if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+
+ /* Handle the EABI syscall convention. We do not
+ bother converting structures between the two
+ ABIs, but basic functionality should work even
+ if strace and the traced program have different
+ ABIs. */
+ if (scno == 0xef000000) {
+ scno = regs.ARM_r7;
+ } else {
+ if ((scno & 0x0ff00000) != 0x0f900000) {
+ fprintf(stderr, "syscall: unknown syscall trap 0x%08lx\n",
+ scno);
+ return -1;
+ }
+
+ /*
+ * Fixup the syscall number
+ */
+ scno &= 0x000fffff;
+ }
+ }
+ if (scno & 0x0f0000) {
+ /*
+ * Handle ARM specific syscall
+ */
+ set_personality(1);
+ scno &= 0x0000ffff;
+ } else
+ set_personality(0);
+
+ if (exiting(tcp)) {
+ fprintf(stderr, "pid %d stray syscall exit\n", tcp->pid);
+ tcp->flags &= ~TCB_INSYSCALL;
+ }
+ } else {
+ if (entering(tcp)) {
+ fprintf(stderr, "pid %d stray syscall entry\n", tcp->pid);
+ tcp->flags |= TCB_INSYSCALL;
+ }
+ }
+# elif defined (M68K)
+ if (upeek(tcp, 4*PT_ORIG_D0, &scno) < 0)
+ return -1;
+# elif defined (LINUX_MIPSN32)
+ unsigned long long regs[38];
+
+ if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long) ®s) < 0)
+ return -1;
+ a3 = regs[REG_A3];
+ r2 = regs[REG_V0];
+
+ if (entering(tcp)) {
+ scno = r2;
+
+ /* Check if this is the post-execve SIGTRAP. */
+ if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+
+ if (scno < 0 || scno > nsyscalls) {
+ if (a3 == 0 || a3 == -1) {
+ if (debug)
+ fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno);
+ return 0;
+ }
+ }
+ }
+# elif defined (MIPS)
+ if (upeek(tcp, REG_A3, &a3) < 0)
+ return -1;
+ if (entering(tcp)) {
+ if (upeek(tcp, REG_V0, &scno) < 0)
+ return -1;
+
+ /* Check if this is the post-execve SIGTRAP. */
+ if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+
+ if (scno < 0 || scno > nsyscalls) {
+ if (a3 == 0 || a3 == -1) {
+ if (debug)
+ fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno);
+ return 0;
+ }
+ }
+ } else {
+ if (upeek(tcp, REG_V0, &r2) < 0)
+ return -1;
+ }
+# elif defined (ALPHA)
+ if (upeek(tcp, REG_A3, &a3) < 0)
+ return -1;
+
+ if (entering(tcp)) {
+ if (upeek(tcp, REG_R0, &scno) < 0)
+ return -1;
+
+ /* Check if this is the post-execve SIGTRAP. */
+ if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+
+ /*
+ * Do some sanity checks to figure out if it's
+ * really a syscall entry
+ */
+ if (scno < 0 || scno > nsyscalls) {
+ if (a3 == 0 || a3 == -1) {
+ if (debug)
+ fprintf(stderr, "stray syscall exit: r0 = %ld\n", scno);
+ return 0;
+ }
+ }
+ }
+ else {
+ if (upeek(tcp, REG_R0, &r0) < 0)
+ return -1;
+ }
+# elif defined (SPARC) || defined (SPARC64)
+ /* Everything we need is in the current register set. */
+ if (ptrace(PTRACE_GETREGS, tcp->pid, (char *)®s, 0) < 0)
+ return -1;
+
+ /* If we are entering, then disassemble the syscall trap. */
+ if (entering(tcp)) {
+ /* Retrieve the syscall trap instruction. */
+ errno = 0;
+# if defined(SPARC64)
+ trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.tpc, 0);
+ trap >>= 32;
+# else
+ trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.pc, 0);
+# endif
+ if (errno)
+ return -1;
+
+ /* Disassemble the trap to see what personality to use. */
+ switch (trap) {
+ case 0x91d02010:
+ /* Linux/SPARC syscall trap. */
+ set_personality(0);
+ break;
+ case 0x91d0206d:
+ /* Linux/SPARC64 syscall trap. */
+ set_personality(2);
+ break;
+ case 0x91d02000:
+ /* SunOS syscall trap. (pers 1) */
+ fprintf(stderr, "syscall: SunOS no support\n");
+ return -1;
+ case 0x91d02008:
+ /* Solaris 2.x syscall trap. (per 2) */
+ set_personality(1);
+ break;
+ case 0x91d02009:
+ /* NetBSD/FreeBSD syscall trap. */
+ fprintf(stderr, "syscall: NetBSD/FreeBSD not supported\n");
+ return -1;
+ case 0x91d02027:
+ /* Solaris 2.x gettimeofday */
+ set_personality(1);
+ break;
+ default:
+ /* Check if this is the post-execve SIGTRAP. */
+ if (tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+# if defined (SPARC64)
+ fprintf(stderr, "syscall: unknown syscall trap %08lx %016lx\n", trap, regs.tpc);
+# else
+ fprintf(stderr, "syscall: unknown syscall trap %08lx %08lx\n", trap, regs.pc);
+# endif
+ return -1;
+ }
+
+ /* Extract the system call number from the registers. */
+ if (trap == 0x91d02027)
+ scno = 156;
+ else
+ scno = regs.u_regs[U_REG_G1];
+ if (scno == 0) {
+ scno = regs.u_regs[U_REG_O0];
+ memmove(®s.u_regs[U_REG_O0], ®s.u_regs[U_REG_O1], 7*sizeof(regs.u_regs[0]));
+ }
+ }
+# elif defined(HPPA)
+ if (upeek(tcp, PT_GR20, &scno) < 0)
+ return -1;
+ if (entering(tcp)) {
+ /* Check if this is the post-execve SIGTRAP. */
+ if (tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ }
+# elif defined(SH)
+ /*
+ * In the new syscall ABI, the system call number is in R3.
+ */
+ if (upeek(tcp, 4*(REG_REG0+3), &scno) < 0)
+ return -1;
+
+ if (scno < 0) {
+ /* Odd as it may seem, a glibc bug has been known to cause
+ glibc to issue bogus negative syscall numbers. So for
+ our purposes, make strace print what it *should* have been */
+ long correct_scno = (scno & 0xff);
+ if (debug)
+ fprintf(stderr,
+ "Detected glibc bug: bogus system call"
+ " number = %ld, correcting to %ld\n",
+ scno,
+ correct_scno);
+ scno = correct_scno;
+ }
+
+ if (entering(tcp)) {
+ /* Check if this is the post-execve SIGTRAP. */
+ if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ }
+# elif defined(SH64)
+ if (upeek(tcp, REG_SYSCALL, &scno) < 0)
+ return -1;
+ scno &= 0xFFFF;
+
+ if (entering(tcp)) {
+ /* Check if this is the post-execve SIGTRAP. */
+ if (tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ }
+# elif defined(CRISV10) || defined(CRISV32)
+ if (upeek(tcp, 4*PT_R9, &scno) < 0)
+ return -1;
+# elif defined(TILE)
+ if (upeek(tcp, PTREGS_OFFSET_REG(10), &scno) < 0)
+ return -1;
+
+ if (entering(tcp)) {
+ /* Check if this is the post-execve SIGTRAP. */
+ if (tcp->flags & TCB_WAITEXECVE) {
+ tcp->flags &= ~TCB_WAITEXECVE;
+ return 0;
+ }
+ }
+# elif defined(MICROBLAZE)
+ if (upeek(tcp, 0, &scno) < 0)
+ return -1;
+# endif
+#endif /* LINUX */
+
+#ifdef SUNOS4
+ if (upeek(tcp, uoff(u_arg[7]), &scno) < 0)
+ return -1;
+#elif defined(SH)
+ /* new syscall ABI returns result in R0 */
+ if (upeek(tcp, 4*REG_REG0, (long *)&r0) < 0)
+ return -1;
+#elif defined(SH64)
+ /* ABI defines result returned in r9 */
+ if (upeek(tcp, REG_GENERAL(9), (long *)&r9) < 0)
+ return -1;
+#endif
+
+#ifdef USE_PROCFS
+# ifdef HAVE_PR_SYSCALL
+ scno = tcp->status.PR_SYSCALL;
+# else
+# ifndef FREEBSD
+ scno = tcp->status.PR_WHAT;
+# else
+ if (pread(tcp->pfd_reg, ®s, sizeof(regs), 0) < 0) {
+ perror("pread");
+ return -1;
+ }
+ switch (regs.r_eax) {
+ case SYS_syscall:
+ case SYS___syscall:
+ pread(tcp->pfd, &scno, sizeof(scno), regs.r_esp + sizeof(int));
+ break;
+ default:
+ scno = regs.r_eax;
+ break;
+ }
+# endif /* FREEBSD */
+# endif /* !HAVE_PR_SYSCALL */
+#endif /* USE_PROCFS */
+
+ if (entering(tcp))
+ tcp->scno = scno;
+ return 1;
+}
long
known_scno(struct tcb *tcp)
@@ -2388,7 +3001,7 @@ trace_syscall_entering(struct tcb *tcp)
{
int res, scno_good;
- scno_good = res = get_scno(tcp);
+ scno_good = res = get_scno_on_sysenter(tcp);
if (res == 0)
return res;
if (res == 1)
@@ -2560,7 +3173,7 @@ trace_syscall_exiting(struct tcb *tcp)
/* BTW, why we don't just memorize syscall no. on entry
* in tcp->something?
*/
- scno_good = res = get_scno(tcp);
+ scno_good = res = get_scno_on_sysexit(tcp);
if (res == 0)
return res;
if (res == 1)
More information about the Strace-devel
mailing list