[PATCH] Collect processes in batches
Andreas Schwab
schwab at redhat.com
Mon May 31 13:30:20 UTC 2010
* defs.h (struct tcb): Add wait_status and next_need_service
fields.
* strace.c (collect_stopped_tcbs, handle_stopped_tcbs): New
functions.
(trace): Use them.
---
defs.h | 3 +
strace.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 118 insertions(+), 12 deletions(-)
diff --git a/defs.h b/defs.h
index 7975df8..af930b4 100644
--- a/defs.h
+++ b/defs.h
@@ -321,6 +321,9 @@ extern int mp_ioctl (int f, int c, void *a, int s);
struct tcb {
short flags; /* See below for TCB_ values */
int pid; /* Process Id of this entry */
+ int wait_status; /* Status from last wait() */
+ struct tcb *next_need_service;
+ /* Linked list of tracees found by wait()s */
long scno; /* System call number */
int u_nargs; /* System call arguments */
long u_arg[MAX_ARGS]; /* System call arguments */
diff --git a/strace.c b/strace.c
index 5125718..10d8e5c 100644
--- a/strace.c
+++ b/strace.c
@@ -2336,20 +2336,43 @@ handle_group_exit(struct tcb *tcp, int sig)
}
#endif
-static int
-trace()
+#ifdef LINUX
+static int remembered_pid;
+static int remembered_status;
+#endif
+
+static struct tcb *
+collect_stopped_tcbs(void)
{
int pid;
int wait_errno;
int status;
struct tcb *tcp;
+ struct tcb *found_tcps;
#ifdef LINUX
+ struct tcb **nextp;
struct rusage ru;
+ int wnohang = 0;
#ifdef __WALL
static int wait4_options = __WALL;
#endif
+
+ if (remembered_pid) {
+ pid = remembered_pid;
+ remembered_pid = 0;
+ if (debug)
+ fprintf(stderr, " [remembered wait(%#x) = %u]\n",
+ remembered_status, pid);
+ tcp = pid2tcb(pid); /* can't be NULL */
+ tcp->wait_status = remembered_status;
+ tcp->next_need_service = NULL;
+ return tcp;
+ }
+
+ nextp = &found_tcps;
#endif /* LINUX */
+ found_tcps = NULL;
while (nprocs != 0) {
if (interrupted)
return 0;
@@ -2357,25 +2380,25 @@ trace()
sigprocmask(SIG_SETMASK, &empty_set, NULL);
#ifdef LINUX
#ifdef __WALL
- pid = wait4(-1, &status, wait4_options, cflag ? &ru : NULL);
+ pid = wait4(-1, &status, wait4_options | wnohang, cflag ? &ru : NULL);
if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
/* this kernel does not support __WALL */
wait4_options &= ~__WALL;
errno = 0;
- pid = wait4(-1, &status, wait4_options,
+ pid = wait4(-1, &status, wait4_options | wnohang,
cflag ? &ru : NULL);
}
if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
/* most likely a "cloned" process */
- pid = wait4(-1, &status, __WCLONE,
+ pid = wait4(-1, &status, __WCLONE | wnohang,
cflag ? &ru : NULL);
- if (pid == -1) {
+ if (pid == -1 && errno != ECHILD) {
fprintf(stderr, "strace: clone wait4 "
"failed: %s\n", strerror(errno));
}
}
#else
- pid = wait4(-1, &status, 0, cflag ? &ru : NULL);
+ pid = wait4(-1, &status, wnohang, cflag ? &ru : NULL);
#endif /* __WALL */
#endif /* LINUX */
#ifdef SUNOS4
@@ -2385,6 +2408,15 @@ trace()
if (interactive)
sigprocmask(SIG_BLOCK, &blocked_set, NULL);
+ if (pid == 0 && wnohang) {
+ /* We had at least one successful
+ * wait() before. We waited
+ * with WNOHANG second time.
+ * Stop collecting more tracees,
+ * process what we already have.
+ */
+ break;
+ }
if (pid == -1) {
switch (wait_errno) {
case EINTR:
@@ -2396,11 +2428,11 @@ trace()
* version of SunOS sometimes reports
* ECHILD before sending us SIGCHILD.
*/
- return 0;
+ return found_tcps;
default:
errno = wait_errno;
perror("strace: wait");
- return -1;
+ return (struct tcb *) -1;
}
}
if (pid == popen_pid) {
@@ -2442,9 +2474,6 @@ Process %d attached (waiting for parent)\n",
exit(1);
}
}
- /* set current output file */
- outf = tcp->outf;
- curcol = tcp->curcol;
if (cflag) {
#ifdef LINUX
tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
@@ -2463,6 +2492,58 @@ Process %d attached (waiting for parent)\n",
*/
continue;
}
+#ifdef LINUX
+ /* If we waited and got a stopped task notification,
+ * subsequent wait may return the same pid again, for example,
+ * with SIGKILL notification. SIGKILL kills even stopped tasks.
+ * We must not add it to the list
+ * (one task can't be inserted twice in the list).
+ */
+ {
+ struct tcb *f = found_tcps;
+ while (f) {
+ if (f == tcp) {
+ remembered_pid = pid;
+ remembered_status = status;
+ return found_tcps;
+ }
+ f = f->next_need_service;
+ }
+ }
+
+ /* It is important to not invert the order of tasks
+ * to process. For one, alloc_tcb() above picks newly forked
+ * threads in some order, processing of them and their parent
+ * should be in the same order, otherwise bad things happen
+ * (misinterpreted SIGSTOPs and such).
+ */
+ tcp->wait_status = status;
+ *nextp = tcp;
+ nextp = &tcp->next_need_service;
+ *nextp = NULL;
+ wnohang = WNOHANG;
+#endif
+#ifdef SUNOS4
+ tcp->wait_status = status;
+ tcp->next_need_service = NULL;
+ return tcp;
+#endif
+ }
+ return found_tcps;
+}
+
+static int
+handle_stopped_tcbs(struct tcb *tcp)
+{
+ for (; tcp; tcp = tcp->next_need_service) {
+ int pid;
+ int status;
+
+ outf = tcp->outf;
+ curcol = tcp->curcol;
+ status = tcp->wait_status;
+ pid = tcp->pid;
+
if (WIFSIGNALED(status)) {
if (pid == strace_child)
exit_code = 0x100 | WTERMSIG(status);
@@ -2678,6 +2759,28 @@ Process %d attached (waiting for parent)\n",
return -1;
}
}
+
+ return 0;
+}
+
+static int
+trace()
+{
+ int rc;
+ struct tcb *tcbs;
+
+ while (nprocs != 0) {
+ if (interrupted)
+ return 0;
+ tcbs = collect_stopped_tcbs();
+ if (!tcbs)
+ break;
+ if (tcbs == (struct tcb *) -1)
+ return -1;
+ rc = handle_stopped_tcbs(tcbs);
+ if (rc)
+ return rc;
+ }
return 0;
}
--
1.7.1
--
Andreas Schwab, schwab at redhat.com
GPG Key fingerprint = D4E8 DBE3 3813 BB5D FA84 5EC7 45C6 250E 6F00 984E
"And now for something completely different."
More information about the Strace-devel
mailing list