[PATCH] x86_64 strace personality switching

Michael A Fetterman mafetter at alum.mit.edu
Fri Apr 29 04:13:14 UTC 2011


strace on x86_64 sometimes gets confused by interrupted system calls.
This happens routinely when tracing a large tree of processes.

You get things like this:

1405  vfork( <unfinished ...>
<various output from other processes omitted>
1405  <... vfork resumed> )             = 1406
1405  close(6)                          = 0
1405  read(5,  <unfinished ...>
<various output from other processes omitted>
1405  <... close resumed> )             = 38

Note that strange "unfinished read" which is "resumed" as a close() syscall !!

What's going on?
Here's a big hint:
In 32-bit mode, the "read" syscall is sysycall #3.
In 64-bit mode, the "close" syscall is syscall #3.

Process 1405 was a 32-bit process.
In between the "unfinished read" and the "resumed close" there was other output,
some of which was the handling of a 64-bit process's syscall.
That caused the syscall "personality" in strace to switch to 64-bit, and nothing
ever switched it back.

The fix is below...

# HG changeset patch
# User Michael A Fetterman <mafetter at nvidia.com>
# Date 1304049559 14400
# Node ID be916b3ff18e37fab58f1ecef1dae435a626d324
# Parent  0b241c7fcff465a1067ea2f85be7a46921e9a3e6
Remember current 32/64-bit personality of each process when initiating
a system call, so that if/when we need to handle the return of that
syscall, we can switch back to the correct personality, if/as needed.

Signed-off: Michael A Fetterman <mafetter at nvidia.com>

 defs.h    |   1 +
 syscall.c |  53 +++++++++---------------------
-----------------------
 2 files changed, 10 insertions(+), 44 deletions(-)

diff --git a/defs.h b/defs.h
--- a/defs.h
+++ b/defs.h
@@ -344,6 +344,7 @@
       short flags;            /* See below for TCB_ values */
       int pid;                /* Process Id of this entry */
       long scno;              /* System call number */
+       int currpers;           /* personality when scno was polled */
       int u_nargs;            /* System call arguments */
       long u_arg[MAX_ARGS];   /* System call arguments */
 #if defined (LINUX_MIPSN32)
diff --git a/syscall.c b/syscall.c
--- a/syscall.c
+++ b/syscall.c
@@ -897,9 +897,7 @@
               return -1;

       if (!(tcp->flags & TCB_INSYSCALL)) {
-               static int currpers = -1;
               long val;
-               int pid = tcp->pid;

               /* Check CS register value. On x86-64 linux it is:
                *      0x33    for long mode (64 bit)
@@ -910,53 +908,20 @@
               if (upeek(tcp, 8*CS, &val) < 0)
                       return -1;
               switch (val) {
-                       case 0x23: currpers = 1; break;
-                       case 0x33: currpers = 0; break;
+                       case 0x23: tcp->currpers = 1; break;
+                       case 0x33: tcp->currpers = 0; break;
                       default:
                               fprintf(stderr, "Unknown value CS=0x%02X while "
                                        "detecting personality of process "
-                                        "PID=%d\n", (int)val, pid);
-                               currpers = current_personality;
+                                        "PID=%d\n", (int)val, tcp->pid);
                               break;
               }
-#  if 0
-               /* This version analyzes the opcode of a syscall instruction.
-                * (int 0x80 on i386 vs. syscall on x86-64)
-                * It works, but is too complicated.
-                */
-               unsigned long val, rip, i;
-
-               if (upeek(tcp, 8*RIP, &rip) < 0)
-                       perror("upeek(RIP)");
-
-               /* sizeof(syscall) == sizeof(int 0x80) == 2 */
-               rip -= 2;
-               errno = 0;
-
-               call = ptrace(PTRACE_PEEKTEXT, pid, (char *)rip, (char *)0);
-               if (errno)
-                       fprintf(stderr, "ptrace_peektext failed: %s\n",
-                                       strerror(errno));
-               switch (call & 0xffff) {
-                       /* x86-64: syscall = 0x0f 0x05 */
-                       case 0x050f: currpers = 0; break;
-                       /* i386: int 0x80 = 0xcd 0x80 */
-                       case 0x80cd: currpers = 1; break;
-                       default:
-                               currpers = current_personality;
-                               fprintf(stderr,
-                                       "Unknown syscall opcode (0x%04X) while "
-                                       "detecting personality of process "
-                                       "PID=%d\n", (int)call, pid);
-                               break;
-               }
-#  endif
-               if (currpers != current_personality) {
-                       static const char *const names[] = {"64 bit", "32 bit"};
-                       set_personality(currpers);
-                       fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
-                                       pid, names[current_personality]);
-               }
+       }
+       if (tcp->currpers != current_personality) {
+               static const char *const names[] = {"64 bit", "32 bit"};
+               set_personality(tcp->currpers);
+               fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
+                               tcp->pid, names[current_personality]);
       }
 # elif defined(IA64)
 #      define IA64_PSR_IS      ((long)1 << 34)




More information about the Strace-devel mailing list