[PATCH] RFC: do not detach when we think tracee is going to die
Denys Vlasenko
dvlasenk at redhat.com
Sat Jun 25 09:34:23 UTC 2011
Hi,
This is a big change. I am not entirely sure it is correct,
so this patch is meant more like a RFC rather than a proposal
to apply it right away.
Current code plays some ungodly tricks, trying to not detach
thread group leader until all threads exit.
Also, it detaches from a tracee when signal delivery is detected
which will cause tracee to exit.
This operation is racy (not to mention the determination
whether signal is set to SIG_DFL is horrible hack):
after we determined that this signal is indeed fatal
but before we detach and let process die,
*other thread* may set a handler to this signal, and
we will leak the process, falsely displaying it as killed!
I need to look in the past to figure out why we even do it.
First guess is that it's a workaround for old kernel bugs.
For one, test/childthread.c allegedly demonstrates
why these hacks are needed.
The patch deletes the hacks. We no longer need tcp->nclone_threads,
TCB_EXITING and TCB_GROUP_EXITING. We also lose a few rather
ugly functions.
I also added a new message: "+++ exited with EXITCODE +++"
which shows exact moment strace got exit notification.
It is analogous to existing "+++ killed by SIG +++" message.
Examples:
(1) test/childthread.c still works correctly:
gcc -o test/childthread test/childthread.c -Wall -pthread; ./strace -f -oLOG test/childthread
Before:
6253 execve("test/childthread", ["test/childthread"], [/* 52 vars */]) = 0
...
6253 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb7817728) = 6254
6253 rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
6254 mmap2(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0 <unfinished ...>
6253 rt_sigaction(SIGCHLD, NULL, <unfinished ...>
6254 <... mmap2 resumed> ) = 0xb7016000
6253 <... rt_sigaction resumed> {SIG_DFL, [], 0}, 8) = 0
6254 brk(0 <unfinished ...>
6253 rt_sigprocmask(SIG_SETMASK, [], <unfinished ...>
6254 <... brk resumed> ) = 0x8394000
6253 <... rt_sigprocmask resumed> NULL, 8) = 0
6254 brk(0x83b5000 <unfinished ...>
6253 nanosleep({2, 0}, <unfinished ...>
6254 <... brk resumed> ) = 0x83b5000
6254 brk(0) = 0x83b5000
6254 mprotect(0xb7016000, 4096, PROT_NONE) = 0
6254 clone(child_stack=0xb7816494, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0xb7816bd8, {entry_number:6, base_addr:0xb7816b70, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}, child_tidptr=0xb7816bd8) = 6255
6255 set_robust_list(0xb7816be0, 0xc <unfinished ...>
6254 rt_sigprocmask(SIG_BLOCK, [CHLD], <unfinished ...>
6255 <... set_robust_list resumed> ) = 0
6254 <... rt_sigprocmask resumed> [], 8) = 0
6255 pause( <unfinished ...>
6254 rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
6254 rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
6254 nanosleep({1, 0}, 0xbfe5d144) = 0
6254 exit_group(42) = ?
6253 <... nanosleep resumed> {0, 999110335}) = ? ERESTART_RESTARTBLOCK (To be restarted)
6253 --- {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=6254, si_status=42, si_utime=0, si_stime=0} (Child exited) ---
6253 restart_syscall(<... resuming interrupted call ...>) = 0
6253 waitpid(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 42}], 0) = 6254
6253 fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
6253 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb782d000
6253 write(1, "OK\n", 3) = 3
6253 exit_group(0) = ?
After:
6279 execve("test/childthread", ["test/childthread"], [/* 52 vars */]) = 0
...
6280 clone(child_stack=0xb7789494, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0xb7789bd8, {entry_number:6, base_addr:0xb7789b70, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}, child_tidptr=0xb7789bd8) = 6281
6281 set_robust_list(0xb7789be0, 0xc <unfinished ...>
6280 rt_sigprocmask(SIG_BLOCK, [CHLD], <unfinished ...>
6281 <... set_robust_list resumed> ) = 0
6280 <... rt_sigprocmask resumed> [], 8) = 0
6281 pause( <unfinished ...>
6280 rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
6280 rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
6280 nanosleep({1, 0}, 0xbff76d24) = 0
6280 exit_group(42) = ?
6281 +++ exited with 42 +++
6280 +++ exited with 42 +++
6279 <... nanosleep resumed> {0, 999057987}) = ? ERESTART_RESTARTBLOCK (To be restarted)
6279 --- {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=6280, si_status=42, si_utime=0, si_stime=0} (Child exited) ---
6279 restart_syscall(<... resuming interrupted call ...>) = 0
6279 waitpid(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 42}], 0) = 6280
6279 fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
6279 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77a0000
6279 write(1, "OK\n", 3) = 3
6279 exit_group(0) = ?
6279 +++ exited with 0 +++
Note that new "+++ exited with 42 +++" message is a useful addition:
it shows that exit_group(42) in thread 6280 killed _two_ threads.
With old code it's not obvious.
(2) Death from signal still works and is shown correctly:
./strace sh -c 'kill -HUP $$'
Before:
execve("/bin/sh", ["sh", "-c", "kill -HUP $$"], [/* 52 vars */]) = 0
...
getpid() = 6306
kill(6306, SIGHUP) = 0
--- {si_signo=SIGHUP, si_code=SI_USER, si_pid=6306, si_uid=0} (Hangup) ---
+++ killed by SIGHUP +++
After:
execve("/bin/sh", ["sh", "-c", "kill -HUP $$"], [/* 52 vars */]) = 0
...
getpid() = 6310
kill(6310, SIGHUP) = 0
--- {si_signo=SIGHUP, si_code=SI_USER, si_pid=6310, si_uid=0} (Hangup) ---
+++ killed by SIGHUP +++
No change
Patch follows.
Please review.
--
vda
diff -d -urpN strace.5/defs.h strace.6/defs.h
--- strace.5/defs.h 2011-06-24 22:44:31.178006039 +0200
+++ strace.6/defs.h 2011-06-24 22:44:51.642954549 +0200
@@ -367,10 +367,6 @@ struct tcb {
struct timeval etime; /* Syscall entry time */
/* Support for tracing forked processes */
struct tcb *parent; /* Parent of this process */
-#ifdef LINUX
- int nclone_threads; /* # of children with CLONE_THREAD */
-#endif
- /* (1st arg of wait4()) */
long baddr; /* `Breakpoint' address */
long inst[2]; /* Instructions on above */
int pfd; /* proc file descriptor */
@@ -396,7 +392,6 @@ struct tcb {
#define TCB_INUSE 00002 /* This table entry is in use */
#define TCB_INSYSCALL 00004 /* A system call is in progress */
#define TCB_ATTACHED 00010 /* Process is not our own child */
-#define TCB_EXITING 00020 /* As far as we know, this process is exiting */
#define TCB_SUSPENDED 00040 /* Process can not be allowed to resume just now */
#define TCB_BPTSET 00100 /* "Breakpoint" set after fork(2) */
#define TCB_SIGTRAPPED 00200 /* Process wanted to block SIGTRAP */
@@ -414,7 +409,6 @@ struct tcb {
# define TCB_WAITEXECVE 04000 /* ignore SIGTRAP after exceve */
# endif
# define TCB_CLONE_THREAD 010000 /* CLONE_THREAD set in creating syscall */
-# define TCB_GROUP_EXITING 020000 /* TCB_EXITING was exit_group, not _exit */
# include <sys/syscall.h>
# ifndef __NR_exit_group
# /* Hack: Most headers around are too old to have __NR_exit_group. */
@@ -571,7 +565,6 @@ extern int clearbpt(struct tcb *);
* On newer kernels, we use PTRACE_O_TRACECLONE/TRACE[V]FORK instead.
*/
extern int setbpt(struct tcb *);
-extern int sigishandled(struct tcb *, int);
extern void printcall(struct tcb *);
extern const char *signame(int);
extern void print_sigset(struct tcb *, long, int);
@@ -591,7 +584,6 @@ extern int pathtrace_match(struct tcb *)
extern int change_syscall(struct tcb *, int);
extern int internal_fork(struct tcb *);
extern int internal_exec(struct tcb *);
-extern int internal_exit(struct tcb *);
#ifdef LINUX
extern int handle_new_child(struct tcb *, int, int);
#endif
diff -d -urpN strace.5/process.c strace.6/process.c
--- strace.5/process.c 2011-06-23 22:55:46.000000000 +0200
+++ strace.6/process.c 2011-06-24 00:05:03.614985099 +0200
@@ -435,19 +435,6 @@ sys_exit(struct tcb *tcp)
return 0;
}
-int
-internal_exit(struct tcb *tcp)
-{
- if (entering(tcp)) {
- tcp->flags |= TCB_EXITING;
-#ifdef __NR_exit_group
- if (known_scno(tcp) == __NR_exit_group)
- tcp->flags |= TCB_GROUP_EXITING;
-#endif
- }
- return 0;
-}
-
#ifdef USE_PROCFS
int
@@ -850,7 +837,6 @@ Process %u resumed (parent %d ready)\n",
}
if (call_flags & CLONE_THREAD) {
tcpchild->flags |= TCB_CLONE_THREAD;
- ++tcp->nclone_threads;
}
if ((call_flags & CLONE_PARENT) &&
!(call_flags & CLONE_THREAD)) {
diff -d -urpN strace.5/signal.c strace.6/signal.c
--- strace.5/signal.c 2011-06-23 22:55:46.000000000 +0200
+++ strace.6/signal.c 2011-06-24 00:10:27.497944974 +0200
@@ -798,137 +798,6 @@ printsiginfo(siginfo_t *sip, int verbose
#endif /* SVR4 || LINUX */
-#ifdef LINUX
-
-static void
-parse_sigset_t(const char *str, sigset_t *set)
-{
- const char *p;
- unsigned int digit;
- int i;
-
- sigemptyset(set);
-
- p = strchr(str, '\n');
- if (p == NULL)
- p = strchr(str, '\0');
- for (i = 0; p-- > str; i += 4) {
- if (*p >= '0' && *p <= '9')
- digit = *p - '0';
- else if (*p >= 'a' && *p <= 'f')
- digit = *p - 'a' + 10;
- else if (*p >= 'A' && *p <= 'F')
- digit = *p - 'A' + 10;
- else
- break;
- if (digit & 1)
- sigaddset(set, i + 1);
- if (digit & 2)
- sigaddset(set, i + 2);
- if (digit & 4)
- sigaddset(set, i + 3);
- if (digit & 8)
- sigaddset(set, i + 4);
- }
-}
-
-#endif
-
-/*
- * Check process TCP for the disposition of signal SIG.
- * Return 1 if the process would somehow manage to survive signal SIG,
- * else return 0. This routine will never be called with SIGKILL.
- */
-int
-sigishandled(struct tcb *tcp, int sig)
-{
-#ifdef LINUX
- int sfd;
- char sname[32];
- char buf[2048];
- const char *s;
- int i;
- sigset_t ignored, caught;
-#endif
-#ifdef SVR4
- /*
- * Since procfs doesn't interfere with wait I think it is safe
- * to punt on this question. If not, the information is there.
- */
- return 1;
-#else /* !SVR4 */
- switch (sig) {
- case SIGCONT:
- case SIGSTOP:
- case SIGTSTP:
- case SIGTTIN:
- case SIGTTOU:
- case SIGCHLD:
- case SIGIO:
-#if defined(SIGURG) && SIGURG != SIGIO
- case SIGURG:
-#endif
- case SIGWINCH:
- /* Gloria Gaynor says ... */
- return 1;
- default:
- break;
- }
-#endif /* !SVR4 */
-#ifdef LINUX
-
- /* This is incredibly costly but it's worth it. */
- /* NOTE: LinuxThreads internally uses SIGRTMIN, SIGRTMIN + 1 and
- SIGRTMIN + 2, so we can't use the obsolete /proc/%d/stat which
- doesn't handle real-time signals). */
- sprintf(sname, "/proc/%d/status", tcp->pid);
- if ((sfd = open(sname, O_RDONLY)) == -1) {
- perror(sname);
- return 1;
- }
- i = read(sfd, buf, sizeof(buf));
- buf[i] = '\0';
- close(sfd);
- /*
- * Skip the extraneous fields. We need to skip
- * command name has any spaces in it. So be it.
- */
- s = strstr(buf, "SigIgn:\t");
- if (!s) {
- fprintf(stderr, "/proc/pid/status format error\n");
- return 1;
- }
- parse_sigset_t(s + 8, &ignored);
-
- s = strstr(buf, "SigCgt:\t");
- if (!s) {
- fprintf(stderr, "/proc/pid/status format error\n");
- return 1;
- }
- parse_sigset_t(s + 8, &caught);
-
-#ifdef DEBUG
- fprintf(stderr, "sigs: %016qx %016qx (sig=%d)\n",
- *(long long *) &ignored, *(long long *) &caught, sig);
-#endif
- if (sigismember(&ignored, sig) || sigismember(&caught, sig))
- return 1;
-#endif /* LINUX */
-
-#ifdef SUNOS4
- void (*u_signal)();
-
- if (upeek(tcp, uoff(u_signal[0]) + sig*sizeof(u_signal),
- (long *) &u_signal) < 0) {
- return 0;
- }
- if (u_signal != SIG_DFL)
- return 1;
-#endif /* SUNOS4 */
-
- return 0;
-}
-
#if defined(SUNOS4) || defined(FREEBSD)
int
diff -d -urpN strace.5/strace.c strace.6/strace.c
--- strace.5/strace.c 2011-06-24 22:44:31.179006034 +0200
+++ strace.6/strace.c 2011-06-24 22:44:51.643954446 +0200
@@ -482,7 +482,6 @@ startup_attach(void)
if (tid != tcbtab[tcbi]->pid) {
tcp = alloctcb(tid);
tcp->flags |= TCB_ATTACHED|TCB_CLONE_THREAD;
- tcbtab[tcbi]->nclone_threads++;
tcp->parent = tcbtab[tcbi];
}
}
@@ -1553,40 +1552,11 @@ droptcb(struct tcb *tcp)
{
if (tcp->pid == 0)
return;
-#ifdef TCB_CLONE_THREAD
- if (tcp->nclone_threads > 0) {
- /* There are other threads left in this process, but this
- is the one whose PID represents the whole process.
- We need to keep this record around as a zombie until
- all the threads die. */
- tcp->flags |= TCB_EXITING;
- return;
- }
-#endif
+
nprocs--;
if (debug)
fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
- tcp->pid = 0;
-
- if (tcp->parent != NULL) {
-#ifdef TCB_CLONE_THREAD
- if (tcp->flags & TCB_CLONE_THREAD)
- tcp->parent->nclone_threads--;
-#endif
-#ifdef LINUX
- /* Update fields like NCLONE_DETACHED, only
- for zombie group leader that has already reported
- and been short-circuited at the top of this
- function. The same condition as at the top of DETACH. */
- if ((tcp->flags & TCB_CLONE_THREAD) &&
- tcp->parent->nclone_threads == 0 &&
- (tcp->parent->flags & TCB_EXITING))
- droptcb(tcp->parent);
-#endif
- tcp->parent = NULL;
- }
- tcp->flags = 0;
if (tcp->pfd != -1) {
close(tcp->pfd);
tcp->pfd = -1;
@@ -1601,14 +1571,15 @@ droptcb(struct tcb *tcp)
}
#endif /* !FREEBSD */
#ifdef USE_PROCFS
- rebuild_pollv(); /* Note, flags needs to be cleared by now. */
+ tcp->flags = 0; /* rebuild_pollv needs it */
+ rebuild_pollv();
#endif
}
if (outfname && followfork > 1 && tcp->outf)
fclose(tcp->outf);
- tcp->outf = NULL;
+ memset(tcp, 0, sizeof(*tcp));
}
/* detach traced process; continue with sig
@@ -1622,14 +1593,6 @@ detach(struct tcb *tcp, int sig)
int error = 0;
#ifdef LINUX
int status, catch_sigstop;
- struct tcb *zombie = NULL;
-
- /* If the group leader is lingering only because of this other
- thread now dying, then detach the leader as well. */
- if ((tcp->flags & TCB_CLONE_THREAD) &&
- tcp->parent->nclone_threads == 1 &&
- (tcp->parent->flags & TCB_EXITING))
- zombie = tcp->parent;
#endif
if (tcp->flags & TCB_BPTSET)
@@ -1732,13 +1695,6 @@ detach(struct tcb *tcp, int sig)
droptcb(tcp);
-#ifdef LINUX
- if (zombie != NULL) {
- /* TCP no longer exists therefore you must not detach() it. */
- droptcb(zombie);
- }
-#endif
-
return error;
}
@@ -2260,62 +2216,6 @@ trace(void)
#else /* !USE_PROCFS */
-#ifdef TCB_GROUP_EXITING
-/* Handle an exit detach or death signal that is taking all the
- related clone threads with it. This is called in three circumstances:
- SIG == -1 TCP has already died (TCB_ATTACHED is clear, strace is parent).
- SIG == 0 Continuing TCP will perform an exit_group syscall.
- SIG == other Continuing TCP with SIG will kill the process.
-*/
-static int
-handle_group_exit(struct tcb *tcp, int sig)
-{
- /* We need to locate our records of all the clone threads
- related to TCP, either its children or siblings. */
- struct tcb *leader = NULL;
-
- if (tcp->flags & TCB_CLONE_THREAD)
- leader = tcp->parent;
-
- if (sig < 0) {
- if (leader != NULL && leader != tcp
- && !(leader->flags & TCB_GROUP_EXITING)
- && !(tcp->flags & TCB_STARTUP)
- ) {
- fprintf(stderr,
- "PANIC: handle_group_exit: %d leader %d\n",
- tcp->pid, leader ? leader->pid : -1);
- }
- /* TCP no longer exists therefore you must not detach() it. */
- droptcb(tcp); /* Already died. */
- }
- else {
- /* Mark that we are taking the process down. */
- tcp->flags |= TCB_EXITING | TCB_GROUP_EXITING;
- if (tcp->flags & TCB_ATTACHED) {
- detach(tcp, sig);
- if (leader != NULL && leader != tcp)
- leader->flags |= TCB_GROUP_EXITING;
- } else {
- if (ptrace_restart(PTRACE_CONT, tcp, sig) < 0) {
- cleanup();
- return -1;
- }
- if (leader != NULL) {
- leader->flags |= TCB_GROUP_EXITING;
- if (leader != tcp)
- droptcb(tcp);
- }
- /* The leader will report to us as parent now,
- and then we'll get to the SIG==-1 case. */
- return 0;
- }
- }
-
- return 0;
-}
-#endif
-
#ifdef LINUX
static int
handle_ptrace_event(int status, struct tcb *tcp)
@@ -2512,37 +2412,24 @@ Process %d attached (waiting for parent)
#endif
printtrailer();
}
-#ifdef TCB_GROUP_EXITING
- handle_group_exit(tcp, -1);
-#else
droptcb(tcp);
-#endif
continue;
}
if (WIFEXITED(status)) {
if (pid == strace_child)
exit_code = WEXITSTATUS(status);
- if ((tcp->flags & (TCB_ATTACHED|TCB_STARTUP)) == TCB_ATTACHED
-#ifdef TCB_GROUP_EXITING
- && !(tcp->parent && (tcp->parent->flags & TCB_GROUP_EXITING))
- && !(tcp->flags & TCB_GROUP_EXITING)
-#endif
- ) {
- fprintf(stderr,
- "PANIC: attached pid %u exited with %d\n",
- pid, WEXITSTATUS(status));
- }
if (tcp == tcp_last) {
if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
tprintf(" <unfinished ... exit status %d>\n",
WEXITSTATUS(status));
tcp_last = NULL;
}
-#ifdef TCB_GROUP_EXITING
- handle_group_exit(tcp, -1);
-#else
+ if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
+ printleader(tcp);
+ tprintf("+++ exited with %d +++", WEXITSTATUS(status));
+ printtrailer();
+ }
droptcb(tcp);
-#endif
continue;
}
if (!WIFSTOPPED(status)) {
@@ -2646,16 +2533,6 @@ Process %d attached (waiting for parent)
PC_FORMAT_ARG);
printtrailer();
}
- if (((tcp->flags & TCB_ATTACHED) ||
- tcp->nclone_threads > 0) &&
- !sigishandled(tcp, WSTOPSIG(status))) {
-#ifdef TCB_GROUP_EXITING
- handle_group_exit(tcp, WSTOPSIG(status));
-#else
- detach(tcp, WSTOPSIG(status));
-#endif
- continue;
- }
if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
cleanup();
return -1;
@@ -2691,22 +2568,6 @@ Process %d attached (waiting for parent)
}
continue;
}
- if (tcp->flags & TCB_EXITING) {
-#ifdef TCB_GROUP_EXITING
- if (tcp->flags & TCB_GROUP_EXITING) {
- if (handle_group_exit(tcp, 0) < 0)
- return -1;
- continue;
- }
-#endif
- if (tcp->flags & TCB_ATTACHED)
- detach(tcp, 0);
- else if (ptrace_restart(PTRACE_CONT, tcp, 0) < 0) {
- cleanup();
- return -1;
- }
- continue;
- }
if (tcp->flags & TCB_SUSPENDED) {
if (!qflag)
fprintf(stderr, "Process %u suspended\n", pid);
diff -d -urpN strace.5/syscall.c strace.6/syscall.c
--- strace.5/syscall.c 2011-06-23 22:55:46.000000000 +0200
+++ strace.6/syscall.c 2011-06-23 23:58:29.402526811 +0200
@@ -627,9 +627,6 @@ internal_syscall(struct tcb *tcp)
func = sysent[tcp->scno].sys_func;
- if (sys_exit == func)
- return internal_exit(tcp);
-
if ( sys_fork == func
#if defined(FREEBSD) || defined(LINUX) || defined(SUNOS4)
|| sys_vfork == func
More information about the Strace-devel
mailing list