[PATCH 1/5] Duplicate get_scno into identical get_scno_on_sysenter, get_scno_on_sysexit

Denys Vlasenko dvlasenk at redhat.com
Mon Aug 22 10:24:04 UTC 2011


On Mon, 2011-08-22 at 12:21 +0200, Denys Vlasenko wrote:
> 1. Duplicate get_scno into identical get_scno_on_sysenter,
> get_scno_on_sysexit functions. Call them in syscall enter and syscall
> exit, correspondingly.

diff -d -urpN strace.0/defs.h strace.1/defs.h
--- strace.0/defs.h	2011-08-21 18:10:51.000000000 +0200
+++ strace.1/defs.h	2011-08-22 03:18:10.723934620 +0200
@@ -586,7 +586,7 @@ extern void droptcb(struct tcb *);
 extern void set_sortby(const char *);
 extern void set_overhead(int);
 extern void qualify(const char *);
-extern int get_scno(struct tcb *);
+extern int get_scno_on_sysenter(struct tcb *);
 extern long known_scno(struct tcb *);
 extern long do_ptrace(int request, struct tcb *tcp, void *addr, void *data);
 extern int ptrace_restart(int request, struct tcb *tcp, int sig);
diff -d -urpN strace.0/strace.c strace.1/strace.c
--- strace.0/strace.c	2011-08-22 01:57:07.000000000 +0200
+++ strace.1/strace.c	2011-08-22 03:17:36.822952537 +0200
@@ -1488,7 +1488,7 @@ proc_open(struct tcb *tcp, int attaching
 			}
 			if (tcp->status.PR_WHY == PR_SYSENTRY) {
 				tcp->flags &= ~TCB_INSYSCALL;
-				get_scno(tcp);
+				get_scno_on_sysenter(tcp);
 				if (known_scno(tcp) == SYS_execve)
 					break;
 			}
diff -d -urpN strace.0/syscall.c strace.1/syscall.c
--- strace.0/syscall.c	2011-08-22 11:56:27.982265607 +0200
+++ strace.1/syscall.c	2011-08-22 03:37:20.155534144 +0200
@@ -750,7 +750,7 @@ struct reg regs; /* TODO: make static? *
  *    ("????" etc) and bail out.
  */
 int
-get_scno(struct tcb *tcp)
+get_scno_on_sysenter(struct tcb *tcp)
 {
 	long scno = 0;
 
@@ -1357,6 +1357,619 @@ get_scno(struct tcb *tcp)
 	return 1;
 }
 
+/* Returns:
+ * 0: "ignore this ptrace stop", bail out of trace_syscall() silently.
+ * 1: ok, continue in trace_syscall().
+ * other: error, trace_syscall() should print error indicator
+ *    ("????" etc) and bail out.
+ */
+static int
+get_scno_on_sysexit(struct tcb *tcp)
+{
+	long scno = 0;
+
+#ifdef LINUX
+# if defined(S390) || defined(S390X)
+	if (tcp->flags & TCB_WAITEXECVE) {
+		/*
+		 * When the execve system call completes successfully, the
+		 * new process still has -ENOSYS (old style) or __NR_execve
+		 * (new style) in gpr2.  We cannot recover the scno again
+		 * by disassembly, because the image that executed the
+		 * syscall is gone now.  Fortunately, we don't want it.  We
+		 * leave the flag set so that syscall_fixup can fake the
+		 * result.
+		 */
+		if (exiting(tcp))
+			return 1;
+		/*
+		 * This is the post-execve SIGTRAP.  We cannot try to read
+		 * the system call here either.
+		 */
+		tcp->flags &= ~TCB_WAITEXECVE;
+		return 0;
+	}
+
+	if (upeek(tcp, PT_GPR2, &syscall_mode) < 0)
+		return -1;
+
+	if (syscall_mode != -ENOSYS) {
+		/*
+		 * Since kernel version 2.5.44 the scno gets passed in gpr2.
+		 */
+		scno = syscall_mode;
+	} else {
+		/*
+		 * Old style of "passing" the scno via the SVC instruction.
+		 */
+
+		long opcode, offset_reg, tmp;
+		void * svc_addr;
+		static const int gpr_offset[16] = {
+				PT_GPR0,  PT_GPR1,  PT_ORIGGPR2, PT_GPR3,
+				PT_GPR4,  PT_GPR5,  PT_GPR6,     PT_GPR7,
+				PT_GPR8,  PT_GPR9,  PT_GPR10,    PT_GPR11,
+				PT_GPR12, PT_GPR13, PT_GPR14,    PT_GPR15
+		};
+
+		if (upeek(tcp, PT_PSWADDR, &pc) < 0)
+			return -1;
+		errno = 0;
+		opcode = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)(pc-sizeof(long)), 0);
+		if (errno) {
+			perror("peektext(pc-oneword)");
+			return -1;
+		}
+
+		/*
+		 *  We have to check if the SVC got executed directly or via an
+		 *  EXECUTE instruction. In case of EXECUTE it is necessary to do
+		 *  instruction decoding to derive the system call number.
+		 *  Unfortunately the opcode sizes of EXECUTE and SVC are differently,
+		 *  so that this doesn't work if a SVC opcode is part of an EXECUTE
+		 *  opcode. Since there is no way to find out the opcode size this
+		 *  is the best we can do...
+		 */
+
+		if ((opcode & 0xff00) == 0x0a00) {
+			/* SVC opcode */
+			scno = opcode & 0xff;
+		}
+		else {
+			/* SVC got executed by EXECUTE instruction */
+
+			/*
+			 *  Do instruction decoding of EXECUTE. If you really want to
+			 *  understand this, read the Principles of Operations.
+			 */
+			svc_addr = (void *) (opcode & 0xfff);
+
+			tmp = 0;
+			offset_reg = (opcode & 0x000f0000) >> 16;
+			if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
+				return -1;
+			svc_addr += tmp;
+
+			tmp = 0;
+			offset_reg = (opcode & 0x0000f000) >> 12;
+			if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
+				return -1;
+			svc_addr += tmp;
+
+			scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, svc_addr, 0);
+			if (errno)
+				return -1;
+#  if defined(S390X)
+			scno >>= 48;
+#  else
+			scno >>= 16;
+#  endif
+			tmp = 0;
+			offset_reg = (opcode & 0x00f00000) >> 20;
+			if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
+				return -1;
+
+			scno = (scno | tmp) & 0xff;
+		}
+	}
+# elif defined (POWERPC)
+	if (upeek(tcp, sizeof(unsigned long)*PT_R0, &scno) < 0)
+		return -1;
+	if (entering(tcp)) {
+		/* Check if this is the post-execve SIGTRAP. */
+		if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+	}
+
+#  ifdef POWERPC64
+	if (entering(tcp)) {
+		/* TODO: speed up strace by not doing this at every syscall.
+		 * We only need to do it after execve.
+		 */
+		int currpers;
+		long val;
+		int pid = tcp->pid;
+
+		/* Check for 64/32 bit mode. */
+		if (upeek(tcp, sizeof(unsigned long)*PT_MSR, &val) < 0)
+			return -1;
+		/* SF is bit 0 of MSR */
+		if (val < 0)
+			currpers = 0;
+		else
+			currpers = 1;
+		if (currpers != current_personality) {
+			static const char *const names[] = {"64 bit", "32 bit"};
+			set_personality(currpers);
+			fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
+					pid, names[current_personality]);
+		}
+	}
+#  endif
+# elif defined(AVR32)
+	/*
+	 * Read complete register set in one go.
+	 */
+	if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, &regs) < 0)
+		return -1;
+
+	/*
+	 * We only need to grab the syscall number on syscall entry.
+	 */
+	if (entering(tcp)) {
+		scno = regs.r8;
+
+		/* Check if this is the post-execve SIGTRAP. */
+		if (tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+	}
+# elif defined(BFIN)
+	if (upeek(tcp, PT_ORIG_P0, &scno))
+		return -1;
+# elif defined (I386)
+	if (upeek(tcp, 4*ORIG_EAX, &scno) < 0)
+		return -1;
+# elif defined (X86_64)
+	if (upeek(tcp, 8*ORIG_RAX, &scno) < 0)
+		return -1;
+
+	if (entering(tcp)) {
+		/* TODO: speed up strace by not doing this at every syscall.
+		 * We only need to do it after execve.
+		 */
+		int currpers;
+		long val;
+		int pid = tcp->pid;
+
+		/* Check CS register value. On x86-64 linux it is:
+		 *	0x33	for long mode (64 bit)
+		 *	0x23	for compatibility mode (32 bit)
+		 * It takes only one ptrace and thus doesn't need
+		 * to be cached.
+		 */
+		if (upeek(tcp, 8*CS, &val) < 0)
+			return -1;
+		switch (val) {
+			case 0x23: currpers = 1; break;
+			case 0x33: currpers = 0; break;
+			default:
+				fprintf(stderr, "Unknown value CS=0x%02X while "
+					 "detecting personality of process "
+					 "PID=%d\n", (int)val, pid);
+				currpers = current_personality;
+				break;
+		}
+#  if 0
+		/* This version analyzes the opcode of a syscall instruction.
+		 * (int 0x80 on i386 vs. syscall on x86-64)
+		 * It works, but is too complicated.
+		 */
+		unsigned long val, rip, i;
+
+		if (upeek(tcp, 8*RIP, &rip) < 0)
+			perror("upeek(RIP)");
+
+		/* sizeof(syscall) == sizeof(int 0x80) == 2 */
+		rip -= 2;
+		errno = 0;
+
+		call = ptrace(PTRACE_PEEKTEXT, pid, (char *)rip, (char *)0);
+		if (errno)
+			fprintf(stderr, "ptrace_peektext failed: %s\n",
+					strerror(errno));
+		switch (call & 0xffff) {
+			/* x86-64: syscall = 0x0f 0x05 */
+			case 0x050f: currpers = 0; break;
+			/* i386: int 0x80 = 0xcd 0x80 */
+			case 0x80cd: currpers = 1; break;
+			default:
+				currpers = current_personality;
+				fprintf(stderr,
+					"Unknown syscall opcode (0x%04X) while "
+					"detecting personality of process "
+					"PID=%d\n", (int)call, pid);
+				break;
+		}
+#  endif
+		if (currpers != current_personality) {
+			static const char *const names[] = {"64 bit", "32 bit"};
+			set_personality(currpers);
+			fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
+					pid, names[current_personality]);
+		}
+	}
+# elif defined(IA64)
+#	define IA64_PSR_IS	((long)1 << 34)
+	if (upeek(tcp, PT_CR_IPSR, &psr) >= 0)
+		ia32 = (psr & IA64_PSR_IS) != 0;
+	if (entering(tcp)) {
+		if (ia32) {
+			if (upeek(tcp, PT_R1, &scno) < 0)	/* orig eax */
+				return -1;
+		} else {
+			if (upeek(tcp, PT_R15, &scno) < 0)
+				return -1;
+		}
+		/* Check if this is the post-execve SIGTRAP. */
+		if (tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+	} else {
+		/* Syscall exit */
+		if (upeek(tcp, PT_R8, &r8) < 0)
+			return -1;
+		if (upeek(tcp, PT_R10, &r10) < 0)
+			return -1;
+	}
+# elif defined (ARM)
+	/*
+	 * Read complete register set in one go.
+	 */
+	if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (void *)&regs) == -1)
+		return -1;
+
+	/*
+	 * We only need to grab the syscall number on syscall entry.
+	 */
+	if (regs.ARM_ip == 0) {
+		if (entering(tcp)) {
+			/* Check if this is the post-execve SIGTRAP. */
+			if (tcp->flags & TCB_WAITEXECVE) {
+				tcp->flags &= ~TCB_WAITEXECVE;
+				return 0;
+			}
+		}
+
+		/*
+		 * Note: we only deal with only 32-bit CPUs here.
+		 */
+		if (regs.ARM_cpsr & 0x20) {
+			/*
+			 * Get the Thumb-mode system call number
+			 */
+			scno = regs.ARM_r7;
+		} else {
+			/*
+			 * Get the ARM-mode system call number
+			 */
+			errno = 0;
+			scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, (void *)(regs.ARM_pc - 4), NULL);
+			if (errno)
+				return -1;
+
+		/* FIXME: bogus check? it is already done on entering before,
+		 * so we never can see it here?
+		 */
+			if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) {
+				tcp->flags &= ~TCB_WAITEXECVE;
+				return 0;
+			}
+
+			/* Handle the EABI syscall convention.  We do not
+			   bother converting structures between the two
+			   ABIs, but basic functionality should work even
+			   if strace and the traced program have different
+			   ABIs.  */
+			if (scno == 0xef000000) {
+				scno = regs.ARM_r7;
+			} else {
+				if ((scno & 0x0ff00000) != 0x0f900000) {
+					fprintf(stderr, "syscall: unknown syscall trap 0x%08lx\n",
+						scno);
+					return -1;
+				}
+
+				/*
+				 * Fixup the syscall number
+				 */
+				scno &= 0x000fffff;
+			}
+		}
+		if (scno & 0x0f0000) {
+			/*
+			 * Handle ARM specific syscall
+			 */
+			set_personality(1);
+			scno &= 0x0000ffff;
+		} else
+			set_personality(0);
+
+		if (exiting(tcp)) {
+			fprintf(stderr, "pid %d stray syscall exit\n", tcp->pid);
+			tcp->flags &= ~TCB_INSYSCALL;
+		}
+	} else {
+		if (entering(tcp)) {
+			fprintf(stderr, "pid %d stray syscall entry\n", tcp->pid);
+			tcp->flags |= TCB_INSYSCALL;
+		}
+	}
+# elif defined (M68K)
+	if (upeek(tcp, 4*PT_ORIG_D0, &scno) < 0)
+		return -1;
+# elif defined (LINUX_MIPSN32)
+	unsigned long long regs[38];
+
+	if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long) &regs) < 0)
+		return -1;
+	a3 = regs[REG_A3];
+	r2 = regs[REG_V0];
+
+	if (entering(tcp)) {
+		scno = r2;
+
+		/* Check if this is the post-execve SIGTRAP. */
+		if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+
+		if (scno < 0 || scno > nsyscalls) {
+			if (a3 == 0 || a3 == -1) {
+				if (debug)
+					fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno);
+				return 0;
+			}
+		}
+	}
+# elif defined (MIPS)
+	if (upeek(tcp, REG_A3, &a3) < 0)
+		return -1;
+	if (entering(tcp)) {
+		if (upeek(tcp, REG_V0, &scno) < 0)
+			return -1;
+
+		/* Check if this is the post-execve SIGTRAP. */
+		if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+
+		if (scno < 0 || scno > nsyscalls) {
+			if (a3 == 0 || a3 == -1) {
+				if (debug)
+					fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno);
+				return 0;
+			}
+		}
+	} else {
+		if (upeek(tcp, REG_V0, &r2) < 0)
+			return -1;
+	}
+# elif defined (ALPHA)
+	if (upeek(tcp, REG_A3, &a3) < 0)
+		return -1;
+
+	if (entering(tcp)) {
+		if (upeek(tcp, REG_R0, &scno) < 0)
+			return -1;
+
+		/* Check if this is the post-execve SIGTRAP. */
+		if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+
+		/*
+		 * Do some sanity checks to figure out if it's
+		 * really a syscall entry
+		 */
+		if (scno < 0 || scno > nsyscalls) {
+			if (a3 == 0 || a3 == -1) {
+				if (debug)
+					fprintf(stderr, "stray syscall exit: r0 = %ld\n", scno);
+				return 0;
+			}
+		}
+	}
+	else {
+		if (upeek(tcp, REG_R0, &r0) < 0)
+			return -1;
+	}
+# elif defined (SPARC) || defined (SPARC64)
+	/* Everything we need is in the current register set. */
+	if (ptrace(PTRACE_GETREGS, tcp->pid, (char *)&regs, 0) < 0)
+		return -1;
+
+	/* If we are entering, then disassemble the syscall trap. */
+	if (entering(tcp)) {
+		/* Retrieve the syscall trap instruction. */
+		errno = 0;
+#  if defined(SPARC64)
+		trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.tpc, 0);
+		trap >>= 32;
+#  else
+		trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.pc, 0);
+#  endif
+		if (errno)
+			return -1;
+
+		/* Disassemble the trap to see what personality to use. */
+		switch (trap) {
+		case 0x91d02010:
+			/* Linux/SPARC syscall trap. */
+			set_personality(0);
+			break;
+		case 0x91d0206d:
+			/* Linux/SPARC64 syscall trap. */
+			set_personality(2);
+			break;
+		case 0x91d02000:
+			/* SunOS syscall trap. (pers 1) */
+			fprintf(stderr, "syscall: SunOS no support\n");
+			return -1;
+		case 0x91d02008:
+			/* Solaris 2.x syscall trap. (per 2) */
+			set_personality(1);
+			break;
+		case 0x91d02009:
+			/* NetBSD/FreeBSD syscall trap. */
+			fprintf(stderr, "syscall: NetBSD/FreeBSD not supported\n");
+			return -1;
+		case 0x91d02027:
+			/* Solaris 2.x gettimeofday */
+			set_personality(1);
+			break;
+		default:
+			/* Check if this is the post-execve SIGTRAP. */
+			if (tcp->flags & TCB_WAITEXECVE) {
+				tcp->flags &= ~TCB_WAITEXECVE;
+				return 0;
+			}
+#  if defined (SPARC64)
+			fprintf(stderr, "syscall: unknown syscall trap %08lx %016lx\n", trap, regs.tpc);
+#  else
+			fprintf(stderr, "syscall: unknown syscall trap %08lx %08lx\n", trap, regs.pc);
+#  endif
+			return -1;
+		}
+
+		/* Extract the system call number from the registers. */
+		if (trap == 0x91d02027)
+			scno = 156;
+		else
+			scno = regs.u_regs[U_REG_G1];
+		if (scno == 0) {
+			scno = regs.u_regs[U_REG_O0];
+			memmove(&regs.u_regs[U_REG_O0], &regs.u_regs[U_REG_O1], 7*sizeof(regs.u_regs[0]));
+		}
+	}
+# elif defined(HPPA)
+	if (upeek(tcp, PT_GR20, &scno) < 0)
+		return -1;
+	if (entering(tcp)) {
+		/* Check if this is the post-execve SIGTRAP. */
+		if (tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+	}
+# elif defined(SH)
+	/*
+	 * In the new syscall ABI, the system call number is in R3.
+	 */
+	if (upeek(tcp, 4*(REG_REG0+3), &scno) < 0)
+		return -1;
+
+	if (scno < 0) {
+		/* Odd as it may seem, a glibc bug has been known to cause
+		   glibc to issue bogus negative syscall numbers.  So for
+		   our purposes, make strace print what it *should* have been */
+		long correct_scno = (scno & 0xff);
+		if (debug)
+			fprintf(stderr,
+				"Detected glibc bug: bogus system call"
+				" number = %ld, correcting to %ld\n",
+				scno,
+				correct_scno);
+		scno = correct_scno;
+	}
+
+	if (entering(tcp)) {
+		/* Check if this is the post-execve SIGTRAP. */
+		if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+	}
+# elif defined(SH64)
+	if (upeek(tcp, REG_SYSCALL, &scno) < 0)
+		return -1;
+	scno &= 0xFFFF;
+
+	if (entering(tcp)) {
+		/* Check if this is the post-execve SIGTRAP. */
+		if (tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+	}
+# elif defined(CRISV10) || defined(CRISV32)
+	if (upeek(tcp, 4*PT_R9, &scno) < 0)
+		return -1;
+# elif defined(TILE)
+	if (upeek(tcp, PTREGS_OFFSET_REG(10), &scno) < 0)
+		return -1;
+
+	if (entering(tcp)) {
+		/* Check if this is the post-execve SIGTRAP. */
+		if (tcp->flags & TCB_WAITEXECVE) {
+			tcp->flags &= ~TCB_WAITEXECVE;
+			return 0;
+		}
+	}
+# elif defined(MICROBLAZE)
+	if (upeek(tcp, 0, &scno) < 0)
+		return -1;
+# endif
+#endif /* LINUX */
+
+#ifdef SUNOS4
+	if (upeek(tcp, uoff(u_arg[7]), &scno) < 0)
+		return -1;
+#elif defined(SH)
+	/* new syscall ABI returns result in R0 */
+	if (upeek(tcp, 4*REG_REG0, (long *)&r0) < 0)
+		return -1;
+#elif defined(SH64)
+	/* ABI defines result returned in r9 */
+	if (upeek(tcp, REG_GENERAL(9), (long *)&r9) < 0)
+		return -1;
+#endif
+
+#ifdef USE_PROCFS
+# ifdef HAVE_PR_SYSCALL
+	scno = tcp->status.PR_SYSCALL;
+# else
+#  ifndef FREEBSD
+	scno = tcp->status.PR_WHAT;
+#  else
+	if (pread(tcp->pfd_reg, &regs, sizeof(regs), 0) < 0) {
+		perror("pread");
+		return -1;
+	}
+	switch (regs.r_eax) {
+	case SYS_syscall:
+	case SYS___syscall:
+		pread(tcp->pfd, &scno, sizeof(scno), regs.r_esp + sizeof(int));
+		break;
+	default:
+		scno = regs.r_eax;
+		break;
+	}
+#  endif /* FREEBSD */
+# endif /* !HAVE_PR_SYSCALL */
+#endif /* USE_PROCFS */
+
+	if (entering(tcp))
+		tcp->scno = scno;
+	return 1;
+}
 
 long
 known_scno(struct tcb *tcp)
@@ -2388,7 +3001,7 @@ trace_syscall_entering(struct tcb *tcp)
 {
 	int res, scno_good;
 
-	scno_good = res = get_scno(tcp);
+	scno_good = res = get_scno_on_sysenter(tcp);
 	if (res == 0)
 		return res;
 	if (res == 1)
@@ -2560,7 +3173,7 @@ trace_syscall_exiting(struct tcb *tcp)
 	/* BTW, why we don't just memorize syscall no. on entry
 	 * in tcp->something?
 	 */
-	scno_good = res = get_scno(tcp);
+	scno_good = res = get_scno_on_sysexit(tcp);
 	if (res == 0)
 		return res;
 	if (res == 1)






More information about the Strace-devel mailing list