[PATCH] new x86 personality detection

Denys Vlasenko dvlasenk at redhat.com
Mon Feb 11 11:45:48 UTC 2013


Hi,

Recent addition of AArch64 used a novel way of detecting
32/64-bitness of the process by looking at the returned size
of ptrace(PTRACE_GETREGSET, NT_PRSTATUS).

I played with it on x86 and it appears to be working there too.

Our current code looks at cs and ds registers, which is in fact
*wrong*. See, for example, https://lkml.org/lkml/2012/1/18/320
This program confuses strace:

  #include <errno.h>
  #include <stdlib.h>
  #include <signal.h>
  #define _GNU_SOURCE
  #include <unistd.h>
  #include <sys/syscall.h>
  void handler(int sig) {
	printf("SIGWINCH\n");
  }
  int main(unsigned int argc, char **argv) {
	signal(SIGWINCH, handler);
	asm("int $0x80": :"a" (29));	/* sys_pause - 32-bit */
	syscall(34);	/* sys_pause - 64-bit */
  }

because strace thinks that both syscalls are 64-bit.

This patch implements a (hopefully) correct way to check for
syscall bitness on x86.

I tested it to work when stracing normal 32-bit binaries,
can't test the above example till this evening.
But it should work too (famous last words?).

Please review.

-- 
vda

diff -d -urpN strace.4/syscall.c strace.5/syscall.c
--- strace.4/syscall.c	2013-02-11 11:24:31.536160334 +0100
+++ strace.5/syscall.c	2013-02-11 11:25:32.910007767 +0100
@@ -65,6 +65,13 @@
 # include <asm/rse.h>
 #endif

+#if defined(X86_64) || defined(X32)
+# include <linux/ptrace.h>
+# include <asm/ptrace.h>
+# include <sys/uio.h>
+# include <elf.h>
+#endif
+
 #if defined(AARCH64)
 # include <asm/ptrace.h>
 # include <sys/uio.h>
@@ -657,12 +664,39 @@ is_restart_error(struct tcb *tcp)
 struct pt_regs i386_regs;
 #elif defined(X86_64) || defined(X32)
 /*
- * On 32 bits, pt_regs and user_regs_struct are the same,
- * but on 64 bits, user_regs_struct has six more fields:
+ * On i386, pt_regs and user_regs_struct are the same,
+ * but on 64 bit x86, user_regs_struct has six more fields:
  * fs_base, gs_base, ds, es, fs, gs.
  * PTRACE_GETREGS fills them too, so struct pt_regs would overflow.
  */
-static struct user_regs_struct x86_64_regs;
+struct i386_user_regs_struct {
+	uint32_t ebx;
+	uint32_t ecx;
+	uint32_t edx;
+	uint32_t esi;
+	uint32_t edi;
+	uint32_t ebp;
+	uint32_t eax;
+	uint32_t xds;
+	uint32_t xes;
+	uint32_t xfs;
+	uint32_t xgs;
+	uint32_t orig_eax;
+	uint32_t eip;
+	uint32_t xcs;
+	uint32_t eflags;
+	uint32_t esp;
+	uint32_t xss;
+};
+static union {
+	struct user_regs_struct      x86_64_r;
+	struct i386_user_regs_struct i386_r;
+} x86_regs_union;
+# define x86_64_regs x86_regs_union.x86_64_r
+# define i386_regs   x86_regs_union.i386_r
+static struct iovec x86_io = {
+	.iov_base = &x86_regs_union
+};
 #elif defined(IA64)
 long ia32 = 0; /* not static */
 static long ia64_r8, ia64_r10;
@@ -738,7 +772,12 @@ printcall(struct tcb *tcp)
 	tprintf("[%016lx] ", psw);
 # endif
 #elif defined(X86_64) || defined(X32)
-	tprintf("[%016lx] ", (unsigned long) x86_64_regs.rip);
+	if (x86_io.iov_len == sizeof(i386_regs)) {
+		tprintf("[%08x] ", (unsigned) i386_regs.eip);
+	} else {
+/* FIXME: isn't rip a "long long" in X32 arch? */
+		tprintf("[%016lx] ", (unsigned long) x86_64_regs.rip);
+	}
 #elif defined(IA64)
 	long ip;

@@ -859,7 +898,9 @@ void get_regs(pid_t pid)
 # elif defined(I386)
 	get_regs_error = ptrace(PTRACE_GETREGS, pid, NULL, (long) &i386_regs);
 # elif defined(X86_64) || defined(X32)
-	get_regs_error = ptrace(PTRACE_GETREGS, pid, NULL, (long) &x86_64_regs);
+	/*x86_io.iov_base = &x86_regs_union; - already is */
+	x86_io.iov_len = sizeof(x86_regs_union);
+	get_regs_error = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, (long) &x86_io);
 # elif defined(ARM)
 	get_regs_error = ptrace(PTRACE_GETREGS, pid, NULL, (void *)&arm_regs);
 # elif defined(AARCH64)
@@ -1015,14 +1056,34 @@ get_scno(struct tcb *tcp)
 #  define __X32_SYSCALL_BIT	0x40000000
 # endif
 	int currpers;
-	scno = x86_64_regs.orig_rax;
-
-	/* Check CS register value. On x86-64 linux it is:
-	 *	0x33	for long mode (64 bit)
-	 *	0x23	for compatibility mode (32 bit)
-	 * Check DS register value. On x86-64 linux it is:
-	 *	0x2b	for x32 mode (x86-64 in 32 bit)
+# if 1
+	/* GETREGSET of NT_PRSTATUS tells us regset size,
+	 * which unambiguously detects i386.
+	 *
+	 * Linux kernel distinguishes x86-64 and x32 processes
+	 * solely by looking at __X32_SYSCALL_BIT:
+	 * arch/x86/include/asm/compat.h::is_x32_task():
+	 * if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
+	 *         return true;
+	 */
+	if (x86_io.iov_len == sizeof(i386_regs)) {
+		scno = i386_regs.orig_eax;
+		currpers = 1;
+	} else {
+		scno = x86_64_regs.orig_rax;
+		currpers = 0;
+		if (scno & __X32_SYSCALL_BIT) {
+			scno -= __X32_SYSCALL_BIT;
+			currpers = 2;
+		}
+	}
+# elif 0
+	/* On x86-64 linux:
+	 * cs = 0x33 for long mode (64 bit)
+	 * cs = 0x23 for compatibility mode (32 bit)
+	 * ds = 0x2b for x32 mode (x86-64 in 32 bit)
 	 */
+	scno = x86_64_regs.orig_rax;
 	switch (x86_64_regs.cs) {
 		case 0x23: currpers = 1; break;
 		case 0x33:
@@ -1039,7 +1100,7 @@ get_scno(struct tcb *tcp)
 			currpers = current_personality;
 			break;
 	}
-# if 0
+# elif 0
 	/* This version analyzes the opcode of a syscall instruction.
 	 * (int 0x80 on i386 vs. syscall on x86-64)
 	 * It works, but is too complicated.
@@ -1363,9 +1424,12 @@ syscall_fixup_on_sysenter(struct tcb *tc
 	}
 #elif defined(X86_64) || defined(X32)
 	{
-		long rax = x86_64_regs.rax;
-		if (current_personality == 1)
-			rax = (int)rax; /* sign extend from 32 bits */
+		long rax;
+		if (x86_io.iov_len == sizeof(i386_regs)) {
+			rax = (int)i386_regs.eax; /* sign extend from 32 bits */
+		} else {
+			rax = x86_64_regs.rax;
+		}
 		if (rax != -ENOSYS) {
 			if (debug_flag)
 				fprintf(stderr, "not a syscall entry (rax = %ld)\n", rax);
@@ -1659,7 +1723,8 @@ get_syscall_args(struct tcb *tcp)
 #elif defined(X86_64) || defined(X32)
 	(void)i;
 	(void)nargs;
-	if (current_personality != 1) { /* x86-64 or x32 ABI */
+	if (x86_io.iov_len != sizeof(i386_regs)) {
+		/* x86-64 or x32 ABI */
 		tcp->u_arg[0] = x86_64_regs.rdi;
 		tcp->u_arg[1] = x86_64_regs.rsi;
 		tcp->u_arg[2] = x86_64_regs.rdx;
@@ -1674,14 +1739,15 @@ get_syscall_args(struct tcb *tcp)
 		tcp->ext_arg[4] = x86_64_regs.r8;
 		tcp->ext_arg[5] = x86_64_regs.r9;
 #  endif
-	} else { /* i386 ABI */
+	} else {
+		/* i386 ABI */
 		/* Sign-extend lower 32 bits */
-		tcp->u_arg[0] = (long)(int)x86_64_regs.rbx;
-		tcp->u_arg[1] = (long)(int)x86_64_regs.rcx;
-		tcp->u_arg[2] = (long)(int)x86_64_regs.rdx;
-		tcp->u_arg[3] = (long)(int)x86_64_regs.rsi;
-		tcp->u_arg[4] = (long)(int)x86_64_regs.rdi;
-		tcp->u_arg[5] = (long)(int)x86_64_regs.rbp;
+		tcp->u_arg[0] = (long)(int)i386_regs.ebx;
+		tcp->u_arg[1] = (long)(int)i386_regs.ecx;
+		tcp->u_arg[2] = (long)(int)i386_regs.edx;
+		tcp->u_arg[3] = (long)(int)i386_regs.esi;
+		tcp->u_arg[4] = (long)(int)i386_regs.edi;
+		tcp->u_arg[5] = (long)(int)i386_regs.ebp;
 	}
 #elif defined(MICROBLAZE)
 	for (i = 0; i < nargs; ++i)
@@ -1977,14 +2043,20 @@ get_error(struct tcb *tcp)
 		tcp->u_rval = i386_regs.eax;
 	}
 #elif defined(X86_64) || defined(X32)
-	if (check_errno && is_negated_errno(x86_64_regs.rax)) {
+	long rax;
+	if (x86_io.iov_len == sizeof(i386_regs)) {
+		rax = (int)i386_regs.eax; /* sign extend from 32 bits */
+	} else {
+		rax = x86_64_regs.rax;
+	}
+	if (check_errno && is_negated_errno(rax)) {
 		tcp->u_rval = -1;
-		u_error = -x86_64_regs.rax;
+		u_error = -rax;
 	}
 	else {
-		tcp->u_rval = x86_64_regs.rax;
+		tcp->u_rval = rax;
 # if defined(X32)
-		tcp->u_lrval = x86_64_regs.rax;
+		tcp->u_lrval = rax;
 # endif
 	}
 #elif defined(IA64)




More information about the Strace-devel mailing list