[patch] Multithreaded process left Stopped (T) on CTRL-C of strace
Jan Kratochvil
jan.kratochvil at redhat.com
Thu May 24 17:02:51 UTC 2007
On Thu, 24 May 2007 07:08:42 +0200, Roland McGrath wrote:
...
> Calling detach on something with TCB_EXITING set is wrong because that
> means it has already been through detach, or else is a natural child
> (without -p) that was continued and has had or soon will have a !WIFSTOPPED
> death report seen by wait4. All the process diddling has already been done
> and only droptcb is needed to clean up the data structure.
Probably, I miss there an assertion, stderr print could be used.
> The call you changed in handle_group_exit is also a case where it's already
> detached or dead, so detach does not make sense.
It looked so.
> I think the (leader->flags & TCB_ATTACHED) case there may also be wrong if
> leader->flags has TCB_EXITING. It should skip the PTRACE_KILL and detach
> when leader is already TCB_EXITING.
That PTRACE_KILL should not harm even if the leader is already dead.
But it definitely should not try to `detach(leader)' as either if it was
already TCB_EXITING or if it was alive `detach(leader)' would get locked up.
Testcase for the latter case is attached.
While I believe your intention was a changed code condition I cannot find
a counterexample failing the patched code. In the worst case there should be
double (triple...) PTRACE_KILL.
Thanks,
Jan
Debug dumps about the PTRACE_KILL change:
Hang of the unpatched strace:
-----------------------------
strace of strace:
wait4(4294967295, [{WIFSTOPPED(s) && WSTOPSIG(s) == SIGALRM}], __WALL, NULL) = 17211
rt_sigprocmask(SIG_BLOCK, [HUP INT QUIT PIPE TERM], NULL, 8) = 0
write(2, "[pid 17211] --- SIGALRM (Alarm c"..., 50) = 50
open("/proc/17211/status", O_RDONLY) = 3
read(3, "Name:\tselftkill\nState:\tT (tracin"..., 2048) = 720
close(3) = 0
ptrace(PTRACE_KILL, 17203, 0, 0) = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
ptrace(PTRACE_DETACH, 17203, 0x1, SIGALRM) = -1 ESRCH (No such process)
kill(17203, SIG_0) = 0
kill(17203, SIGSTOP) = 0
wait4(17203,
strace itself:
[pid 17211] tgkill(17203, 17211, SIGALRM) = 0
[pid 17211] --- SIGALRM (Alarm clock) @ 0 (0) ---
HANG
10177 pts/15 S 0:22 -bash
17203 pts/15 Zl+ 0:00 \_ [selftkill] <defunct>
UID PID PPID LWP C NLWP STIME TTY TIME CMD
jkratoch 17203 10177 17203 0 2 17:37 pts/15 00:00:00 [selftkill] <defunct>
jkratoch 17203 10177 17211 0 2 17:37 pts/15 00:00:00 [selftkill] <defunct>
Pass of the patched strace:
---------------------------
strace of strace:
wait4(4294967295, [{WIFSTOPPED(s) && WSTOPSIG(s) == SIGALRM}], __WALL, NULL) = 17498
rt_sigprocmask(SIG_BLOCK, [HUP INT QUIT PIPE TERM], NULL, 8) = 0
write(2, "[pid 17498] --- SIGALRM (Alarm c"..., 50) = 50
open("/proc/17498/status", O_RDONLY) = 3
read(3, "Name:\tselftkill\nState:\tT (tracin"..., 2048) = 720
close(3) = 0
ptrace(PTRACE_KILL, 17490, 0, 0) = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
ptrace(PTRACE_DETACH, 17498, 0x1, SIGALRM) = -1 ESRCH (No such process)
kill(17498, SIG_0) = 0
kill(17498, SIGSTOP) = 0
wait4(17498, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGKILL}], __WALL, NULL) = 17498
--- SIGCHLD (Child exited) @ 0 (0) ---
write(2, "Process 17498 detached\n", 23) = 23
exit_group(0) = ?
strace itself:
[pid 17498] tgkill(17490, 17498, SIGALRM) = 0
[pid 17498] --- SIGALRM (Alarm clock) @ 0 (0) ---
Process 17498 detached
inferior:
$ ./selftkill
17490
17498
Killed
$ _
-------------- next part --------------
2007-05-24 Jan Kratochvil <jan.kratochvil at redhat.com>
Roland McGrath <roland at redhat.com>
* strace.c (detach): New prototype.
[LINUX] (detach): Extended function comment.
Call droptcb() instead of the wrong parametrized detach() call.
(handle_group_exit): Call droptcb() instead of the wrong parametrized
detach() call and instead of the inappropriate detach() call
diff -u -rup strace-4.5.15-orig/strace.c strace-4.5.15-detach-zombie/strace.c
--- strace-4.5.15-orig/strace.c 2006-12-13 22:55:39.000000000 +0100
+++ strace-4.5.15-detach-zombie/strace.c 2007-05-24 18:11:05.000000000 +0200
@@ -83,6 +83,7 @@ unsigned int nprocs, tcbtabsize;
char *progname;
extern char **environ;
+static int detach P((struct tcb *tcp, int sig));
static int trace P((void));
static void cleanup P((void));
static void interrupt P((int sig));
@@ -1282,7 +1283,10 @@ struct tcb *tcp;
#endif /* !USE_PROCFS */
-/* detach traced process; continue with sig */
+/* detach traced process; continue with sig
+ Never call DETACH twice on the same process as both unattached and
+ attached-unstopped processes give the same ESRCH. For unattached process we
+ would SIGSTOP it and wait for its SIGSTOP notification forever. */
static int
detach(tcp, sig)
@@ -1460,8 +1464,10 @@ int sig;
droptcb(tcp);
#ifdef LINUX
- if (zombie != NULL)
- error = detach(zombie) || error;
+ if (zombie != NULL) {
+ /* TCP no longer exists therefore you must not detach () it. */
+ droptcb(zombie);
+ }
#endif
return error;
@@ -2035,7 +2041,8 @@ handle_group_exit(struct tcb *tcp, int s
fprintf(stderr,
"PANIC: handle_group_exit: %d leader %d\n",
tcp->pid, leader ? leader->pid : -1);
- detach(tcp); /* Already died. */
+ /* TCP no longer exists therefore you must not detach () it. */
+ droptcb(tcp); /* Already died. */
}
else {
/* Mark that we are taking the process down. */
@@ -2060,7 +2067,9 @@ handle_group_exit(struct tcb *tcp, int s
fprintf(stderr,
" [%d exit %d kills %d]\n",
tcp->pid, sig, leader->pid);
- detach(leader, sig);
+ /* LEADER no longer exists therefore you
+ must not detach () it. */
+ droptcb(leader);
}
else
leader->flags |= TCB_GROUP_EXITING;
-------------- next part --------------
#include <pthread.h>
#include <assert.h>
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <asm/unistd.h>
#include <unistd.h>
#define tgkill(pid, tid, sig) syscall (__NR_tgkill, (pid), (tid), (sig))
#define gettid() syscall (__NR_gettid)
static pid_t parent;
void *start (void *arg)
{
assert (getpid () == parent);
assert (getpid () != gettid ());
printf ("%d\n", (int) gettid ());
assert (signal (SIGALRM, SIG_DFL) == SIG_DFL);
tgkill (getpid (), gettid (), SIGALRM);
/* NOTREACHED */
abort ();
}
int main (void)
{
pthread_t thread_id;
int i;
sleep (5);
parent = getpid ();
assert (getpid () == gettid ());
printf("%d\n", (int) getpid ());
i = pthread_create (&thread_id, NULL, start, NULL);
assert (i == 0);
i = pthread_join (thread_id, NULL);
/* NOTREACHED */
abort ();
assert (i == 0);
return EXIT_SUCCESS;
}
More information about the Strace-devel
mailing list