[PATCH] Collect processes in batches

Andreas Schwab schwab at redhat.com
Mon May 31 13:30:20 UTC 2010


* defs.h (struct tcb): Add wait_status and next_need_service
fields.
* strace.c (collect_stopped_tcbs, handle_stopped_tcbs): New
functions.
(trace): Use them.
---
 defs.h   |    3 +
 strace.c |  127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 118 insertions(+), 12 deletions(-)

diff --git a/defs.h b/defs.h
index 7975df8..af930b4 100644
--- a/defs.h
+++ b/defs.h
@@ -321,6 +321,9 @@ extern int mp_ioctl (int f, int c, void *a, int s);
 struct tcb {
 	short flags;		/* See below for TCB_ values */
 	int pid;		/* Process Id of this entry */
+	int wait_status;	/* Status from last wait() */
+	struct tcb *next_need_service;
+				/* Linked list of tracees found by wait()s */
 	long scno;		/* System call number */
 	int u_nargs;		/* System call arguments */
 	long u_arg[MAX_ARGS];	/* System call arguments */
diff --git a/strace.c b/strace.c
index 5125718..10d8e5c 100644
--- a/strace.c
+++ b/strace.c
@@ -2336,20 +2336,43 @@ handle_group_exit(struct tcb *tcp, int sig)
 }
 #endif
 
-static int
-trace()
+#ifdef LINUX
+static int remembered_pid;
+static int remembered_status;
+#endif
+
+static struct tcb *
+collect_stopped_tcbs(void)
 {
 	int pid;
 	int wait_errno;
 	int status;
 	struct tcb *tcp;
+	struct tcb *found_tcps;
 #ifdef LINUX
+	struct tcb **nextp;
 	struct rusage ru;
+	int wnohang = 0;
 #ifdef __WALL
 	static int wait4_options = __WALL;
 #endif
+
+	if (remembered_pid) {
+		pid = remembered_pid;
+		remembered_pid = 0;
+		if (debug)
+			fprintf(stderr, " [remembered wait(%#x) = %u]\n",
+						remembered_status, pid);
+		tcp = pid2tcb(pid); /* can't be NULL */
+		tcp->wait_status = remembered_status;
+		tcp->next_need_service = NULL;
+		return tcp;
+	}
+
+	nextp = &found_tcps;
 #endif /* LINUX */
 
+	found_tcps = NULL;
 	while (nprocs != 0) {
 		if (interrupted)
 			return 0;
@@ -2357,25 +2380,25 @@ trace()
 			sigprocmask(SIG_SETMASK, &empty_set, NULL);
 #ifdef LINUX
 #ifdef __WALL
-		pid = wait4(-1, &status, wait4_options, cflag ? &ru : NULL);
+		pid = wait4(-1, &status, wait4_options | wnohang, cflag ? &ru : NULL);
 		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
 			/* this kernel does not support __WALL */
 			wait4_options &= ~__WALL;
 			errno = 0;
-			pid = wait4(-1, &status, wait4_options,
+			pid = wait4(-1, &status, wait4_options | wnohang,
 					cflag ? &ru : NULL);
 		}
 		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
 			/* most likely a "cloned" process */
-			pid = wait4(-1, &status, __WCLONE,
+			pid = wait4(-1, &status, __WCLONE | wnohang,
 					cflag ? &ru : NULL);
-			if (pid == -1) {
+			if (pid == -1 && errno != ECHILD) {
 				fprintf(stderr, "strace: clone wait4 "
 						"failed: %s\n", strerror(errno));
 			}
 		}
 #else
-		pid = wait4(-1, &status, 0, cflag ? &ru : NULL);
+		pid = wait4(-1, &status, wnohang, cflag ? &ru : NULL);
 #endif /* __WALL */
 #endif /* LINUX */
 #ifdef SUNOS4
@@ -2385,6 +2408,15 @@ trace()
 		if (interactive)
 			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
 
+		if (pid == 0 && wnohang) {
+			/* We had at least one successful
+			 * wait() before. We waited
+			 * with WNOHANG second time.
+			 * Stop collecting more tracees,
+			 * process what we already have.
+			 */
+			break;
+		}
 		if (pid == -1) {
 			switch (wait_errno) {
 			case EINTR:
@@ -2396,11 +2428,11 @@ trace()
 				 * version of SunOS sometimes reports
 				 * ECHILD before sending us SIGCHILD.
 				 */
-				return 0;
+				return found_tcps;
 			default:
 				errno = wait_errno;
 				perror("strace: wait");
-				return -1;
+				return (struct tcb *) -1;
 			}
 		}
 		if (pid == popen_pid) {
@@ -2442,9 +2474,6 @@ Process %d attached (waiting for parent)\n",
 				exit(1);
 			}
 		}
-		/* set current output file */
-		outf = tcp->outf;
-		curcol = tcp->curcol;
 		if (cflag) {
 #ifdef LINUX
 			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
@@ -2463,6 +2492,58 @@ Process %d attached (waiting for parent)\n",
 			 */
 			continue;
 		}
+#ifdef LINUX
+		/* If we waited and got a stopped task notification,
+		 * subsequent wait may return the same pid again, for example,
+		 * with SIGKILL notification. SIGKILL kills even stopped tasks.
+		 * We must not add it to the list
+		 * (one task can't be inserted twice in the list).
+		 */
+		{
+			struct tcb *f = found_tcps;
+			while (f) {
+				if (f == tcp) {
+					remembered_pid = pid;
+					remembered_status = status;
+					return found_tcps;
+				}
+				f = f->next_need_service;
+			}
+		}
+
+		/* It is important to not invert the order of tasks
+		 * to process. For one, alloc_tcb() above picks newly forked
+		 * threads in some order, processing of them and their parent
+		 * should be in the same order, otherwise bad things happen
+		 * (misinterpreted SIGSTOPs and such).
+		 */
+		tcp->wait_status = status;
+		*nextp = tcp;
+		nextp = &tcp->next_need_service;
+		*nextp = NULL;
+		wnohang = WNOHANG;
+#endif
+#ifdef SUNOS4
+		tcp->wait_status = status;
+		tcp->next_need_service = NULL;
+		return tcp;
+#endif
+	}
+	return found_tcps;
+}
+
+static int
+handle_stopped_tcbs(struct tcb *tcp)
+{
+	for (; tcp; tcp = tcp->next_need_service) {
+		int pid;
+		int status;
+
+		outf = tcp->outf;
+		curcol = tcp->curcol;
+		status = tcp->wait_status;
+		pid = tcp->pid;
+
 		if (WIFSIGNALED(status)) {
 			if (pid == strace_child)
 				exit_code = 0x100 | WTERMSIG(status);
@@ -2678,6 +2759,28 @@ Process %d attached (waiting for parent)\n",
 			return -1;
 		}
 	}
+
+	return 0;
+}
+
+static int
+trace()
+{
+	int rc;
+	struct tcb *tcbs;
+
+	while (nprocs != 0) {
+		if (interrupted)
+			return 0;
+		tcbs = collect_stopped_tcbs();
+		if (!tcbs)
+			break;
+		if (tcbs == (struct tcb *) -1)
+			return -1;
+		rc = handle_stopped_tcbs(tcbs);
+		if (rc)
+			return rc;
+	}
 	return 0;
 }
 
-- 
1.7.1

-- 
Andreas Schwab, schwab at redhat.com
GPG Key fingerprint = D4E8 DBE3 3813 BB5D FA84  5EC7 45C6 250E 6F00 984E
"And now for something completely different."




More information about the Strace-devel mailing list