[patch] Multithreaded process left Stopped (T) on CTRL-C of strace

Jan Kratochvil jan.kratochvil at redhat.com
Sun Jun 10 15:58:33 UTC 2007


On Sat, 02 Jun 2007 02:18:27 +0200, Roland McGrath wrote:
...
> I think a test case for this needs to have the initial thread call
> pthread_exit while other threads are still alive for a while longer.
> Then the leader will be TCB_EXITING and dead when the other thread's
> exit call leads to handle_group_exit.

Thank's this was a good idea, included the testcase and fixed the code
appropriately.  Neither PTRACE_KILL nor SIGKILL work but SIGSTOP looks fine for
the leader task.  I only hope there is no race.

Tested both on linux-2.6.20.4.x86_64 and kernel-2.6.20-1.2952.fc6.x86_64 .


> Can you try to reproduce that, and then amend your patch to check
> TCB_EXITING in the attached-leader case?  The PTRACE_KILL is indeed
> harmless (will just fail), but should be skipped too.

Implemented.


Thanks,
Jan
-------------- next part --------------
2007-06-10  Jan Kratochvil  <jan.kratochvil at redhat.com>

	* strace.c (detach): New prototype.  Extended the function comment.
	[LINUX] (detach): Call droptcb() instead of the wrongly parametrized
	detach() call.
	(handle_group_exit): Call droptcb() instead of the wrongly parametrized
	detach() call.  Fixes inferior's exit code by stopping the leader task
	instead of killing it.
	Code advisory: Roland McGrath
	Fixes RH#240961.
	* test/leaderkill.c: New file.
	* test/.cvsignore, test/Makefile: Added `test/leaderkill.c'.

Index: strace.c
===================================================================
RCS file: /cvsroot/strace/strace/strace.c,v
retrieving revision 1.76
diff -u -p -r1.76 strace.c
--- strace.c	2 Jun 2007 00:07:33 -0000	1.76
+++ strace.c	10 Jun 2007 15:51:31 -0000
@@ -98,6 +98,7 @@ unsigned int nprocs, tcbtabsize;
 char *progname;
 extern char **environ;
 
+static int detach P((struct tcb *tcp, int sig));
 static int trace P((void));
 static void cleanup P((void));
 static void interrupt P((int sig));
@@ -1297,7 +1298,10 @@ struct tcb *tcp;
 
 #endif /* !USE_PROCFS */
 
-/* detach traced process; continue with sig */
+/* detach traced process; continue with sig
+   Never call DETACH twice on the same process as both unattached and
+   attached-unstopped processes give the same ESRCH.  For unattached process we
+   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
 
 static int
 detach(tcp, sig)
@@ -1479,8 +1483,10 @@ int sig;
 	droptcb(tcp);
 
 #ifdef LINUX
-	if (zombie != NULL)
-		error = detach(zombie) || error;
+	if (zombie != NULL) {
+		/* TCP no longer exists therefore you must not detach () it.  */
+		droptcb(zombie);
+	}
 #endif
 
 	return error;
@@ -2054,7 +2060,8 @@ handle_group_exit(struct tcb *tcp, int s
 			fprintf(stderr,
 				"PANIC: handle_group_exit: %d leader %d\n",
 				tcp->pid, leader ? leader->pid : -1);
-		detach(tcp);	/* Already died.  */
+		/* TCP no longer exists therefore you must not detach () it.  */
+		droptcb(tcp);	/* Already died.  */
 	}
 	else {
 		/* Mark that we are taking the process down.  */
@@ -2065,21 +2072,18 @@ handle_group_exit(struct tcb *tcp, int s
 					/* We need to detach the leader so
 					   that the process death will be
 					   reported to its real parent.
-					   But we kill it first to prevent
-					   it doing anything before we kill
-					   the whole process in a moment.
-					   We can use PTRACE_KILL on a
-					   thread that's not already
-					   stopped.  Then the value we pass
-					   in PTRACE_DETACH just sets the
-					   death signal reported to the
-					   real parent.  */
-					ptrace(PTRACE_KILL, leader->pid, 0, 0);
-					if (debug)
-						fprintf(stderr,
-							" [%d exit %d kills %d]\n",
-							tcp->pid, sig, leader->pid);
-					detach(leader, sig);
+					   We must not kill it as the parent
+					   would get WTERMSIG (SIGKILL).
+					   SIGSTOP freezes it appropriately.  */
+					if (!(leader->flags & TCB_EXITING)) {
+						if (debug)
+							fprintf(stderr,
+								" [%d exit %d"
+								" kills %d]\n",
+								tcp->pid, sig,
+								leader->pid);
+						detach(leader, SIGSTOP);
+					}
 				}
 				else
 					leader->flags |= TCB_GROUP_EXITING;
Index: test/.cvsignore
===================================================================
RCS file: /cvsroot/strace/strace/test/.cvsignore,v
retrieving revision 1.2
diff -u -p -r1.2 .cvsignore
--- test/.cvsignore	3 Sep 2000 23:38:17 -0000	1.2
+++ test/.cvsignore	10 Jun 2007 15:51:32 -0000
@@ -3,3 +3,4 @@ sig
 skodic
 vfork
 clone
+leaderkill
Index: test/Makefile
===================================================================
RCS file: /cvsroot/strace/strace/test/Makefile,v
retrieving revision 1.4
diff -u -p -r1.4 Makefile
--- test/Makefile	10 Apr 2000 22:22:33 -0000	1.4
+++ test/Makefile	10 Jun 2007 15:51:32 -0000
@@ -2,8 +2,10 @@
 # $Id: Makefile,v 1.4 2000/04/10 22:22:33 wakkerma Exp $
 #
 
-all: vfork fork sig skodic clone
+all: vfork fork sig skodic clone leaderkill
+
+leaderkill: LDFLAGS += -pthread
 
 clean distclean:
-	rm -f clone vfork fork sig *.o core
+	rm -f clone vfork fork sig leaderkill *.o core
 
Index: test/leaderkill.c
===================================================================
RCS file: test/leaderkill.c
diff -N test/leaderkill.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ test/leaderkill.c	10 Jun 2007 15:51:32 -0000
@@ -0,0 +1,59 @@
+/* Test handle_group_exit () handling of a thread leader still alive with its
+ * thread child calling exit_group () and proper passing of the process exit
+ * code to the process parent of this whole thread group.
+ * 
+ * gcc -o test/leaderkill test/leaderkill.c -Wall -ggdb2 -pthread;./test/leaderkill & pid=$!;sleep 1;strace -o x -q ./strace -f -p $pid
+ * It must print: write(1, "OK\n", ...
+ */
+
+#include <pthread.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/wait.h>
+
+static void *start (void *arg)
+{
+  sleep (1);
+
+  exit (42);
+}
+
+int main (void)
+{
+  pthread_t thread1;
+  int i;
+  pid_t child, got_pid;
+  int status;
+
+  sleep (2);
+
+  child = fork ();
+  switch (child)
+    {
+      case -1:
+	abort ();
+      case 0:
+	i = pthread_create (&thread1, NULL, start, NULL);
+	assert (i == 0);
+/* Two possible testcases; the second one passed even in the older versions.  */
+#if 1
+	pause ();
+#else
+	pthread_exit (NULL);
+#endif
+	/* NOTREACHED */
+	abort ();
+	break;
+      default:
+	got_pid = waitpid (child, &status, 0);
+	assert (got_pid == child);
+	assert (WIFEXITED (status));
+	assert (WEXITSTATUS (status) == 42);
+	puts ("OK");
+	exit (0);
+    }
+  /* NOTREACHED */
+  abort ();
+}


More information about the Strace-devel mailing list