[PATCH] Remove "many wait calls before resuming" logic

Denys Vlasenko dvlasenk at redhat.com
Mon May 4 16:03:19 UTC 2009


On Fri, 2009-04-24 at 01:38 -0700, Roland McGrath wrote:
> All the drastic changes to logic and syscall use need to come out.  
> These will not go in the 4.5.19 release.  These include:
> * anything sampling kernel version number
> * anything using ptrace features not used in 4.5.18
> * the "many wait calls before resuming" logic

Ok.

Please approve the following patch, which removes
many wait calls before resuming" logic.

Or let me know what you don't like about it.
--
vda



diff -d -urpN strace.5/defs.h strace.6/defs.h
--- strace.5/defs.h	2009-03-10 21:41:58.000000000 +0100
+++ strace.6/defs.h	2009-05-04 17:49:49.000000000 +0200
@@ -300,9 +300,6 @@ extern int mp_ioctl (int f, int c, void 
 struct tcb {
 	int flags;		/* See below for TCB_ values */
 	int pid;		/* Process Id of this entry */
-	int wait_status;	/* Status from last wait() */
-	struct tcb *next_need_service;
-				/* Linked list of tracees found by wait()s */
 	long scno;		/* System call number */
 	int u_nargs;		/* System call arguments */
 	long u_arg[MAX_ARGS];	/* System call arguments */
diff -d -urpN strace.5/strace.c strace.6/strace.c
--- strace.5/strace.c	2009-04-23 15:32:45.000000000 +0200
+++ strace.6/strace.c	2009-05-04 17:51:53.000000000 +0200
@@ -2249,69 +2249,44 @@ handle_group_exit(struct tcb *tcp, int s
 }
 #endif
 
-#ifdef LINUX
-static int remembered_pid;
-static int remembered_status;
-#endif
 
-static struct tcb *
-collect_stopped_tcbs(void)
+static int
+trace()
 {
 	int pid;
 	int wait_errno;
 	int status;
 	struct tcb *tcp;
-	struct tcb *found_tcps;
 #ifdef LINUX
-	struct tcb **nextp;
 	struct rusage ru;
 	struct rusage* ru_ptr = cflag ? &ru : NULL;
-	int wnohang = 0;
 #ifdef __WALL
 	int wait4_options = __WALL;
 #endif
+#endif
 
-	if (remembered_pid > 0) {
-		pid = remembered_pid;
-		remembered_pid = 0;
-		if (debug)
-			fprintf(stderr, " [remembered wait(%#x) = %u]\n",
-						remembered_status, pid);
-		tcp = pid2tcb(pid); /* can't be NULL */
-		tcp->wait_status = remembered_status;
-		tcp->next_need_service = NULL;
-		return tcp;
-	}
-	nextp = &found_tcps;
-#endif /* LINUX */
-
-	/* Make it possible to ^C strace while we wait */
-	if (interactive)
-		sigprocmask(SIG_SETMASK, &empty_set, NULL);
-
-	found_tcps = NULL;
-	while (1) {
+	while (nprocs != 0) {
 		if (interrupted)
-			break;
+			return 0;
 #ifdef LINUX
 #ifdef __WALL
-		pid = wait4(-1, &status, wait4_options | wnohang, ru_ptr);
+		pid = wait4(-1, &status, wait4_options, ru_ptr);
 		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
 			/* this kernel does not support __WALL */
 			wait4_options &= ~__WALL;
 			errno = 0;
-			pid = wait4(-1, &status, wait4_options | wnohang, ru_ptr);
+			pid = wait4(-1, &status, wait4_options, ru_ptr);
 		}
 		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
 			/* most likely a "cloned" process */
-			pid = wait4(-1, &status, __WCLONE | wnohang, ru_ptr);
+			pid = wait4(-1, &status, __WCLONE, ru_ptr);
 			if (pid < 0 && errno != ECHILD) {
 				fprintf(stderr, "strace: wait4(WCLONE) "
 						"failed: %s\n", strerror(errno));
 			}
 		}
 #else /* !__WALL */
-		pid = wait4(-1, &status, wnohang, ru_ptr);
+		pid = wait4(-1, &status, 0, ru_ptr);
 #endif
 #endif /* LINUX */
 #ifdef SUNOS4
@@ -2319,15 +2294,6 @@ collect_stopped_tcbs(void)
 #endif /* SUNOS4 */
 		wait_errno = errno;
 
-		if (pid == 0 && wnohang) {
-			/* We had at least one successful
-			 * wait() before. We waited
-			 * with WNOHANG second time.
-			 * Stop collecting more tracees,
-			 * process what we already have.
-			 */
-			break;
-		}
 		if (pid == -1) {
 			if (wait_errno == EINTR)
 				continue;
@@ -2421,66 +2387,7 @@ Process %d attached (waiting for parent)
 			continue;
 		}
 
-#ifdef LINUX
-		/* So far observed only on RHEL5 ia64, but I imagine this
-		 * can legitimately happen elsewhere.
-		 * If we waited and got a stopped task notification,
-		 * subsequent wait may return the same pid again, for example,
-		 * with SIGKILL notification. SIGKILL kills even stopped tasks.
-		 * We must not add it to the list
-		 * (one task can't be inserted twice in the list).
-		 */
-		{
-			struct tcb *f = found_tcps;
-			while (f) {
-				if (f == tcp) {
-					remembered_pid = pid;
-					remembered_status = status;
-					goto ret;
-				}
-				f = f->next_need_service;
-			}
-		}
-		/* It is important to not invert the order of tasks
-		 * to process. For one, alloc_tcb() above picks newly forked
-		 * threads in some order, processing of them and their parent
-		 * should be in the same order, otherwise bad things happen
-		 * (misinterpreted SIGSTOPs and such).
-		 */
-		tcp->wait_status = status;
-		*nextp = tcp;
-		nextp = &tcp->next_need_service;
-		*nextp = NULL;
-		wnohang = WNOHANG;
-#endif
-#ifdef SUNOS4
-		/* Probably need to replace wait with waitpid
-		 * and loop on Sun too, but I can't test it. Volunteers?
-		 */
-		tcp->wait_status = status;
-		tcp->next_need_service = NULL;
-		found_tcps = tcp;
-		break;
-#endif
-	} /* while (1) - collecting all stopped/exited tracees */
- ret:
-	/* Disable ^C etc */
-	if (interactive)
-		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
-
-	return found_tcps;
-}
-
-static int
-handle_stopped_tcbs(struct tcb *tcp)
-{
-	for (; tcp; tcp = tcp->next_need_service) {
-		int pid;
-		int status;
-
 		outf = tcp->outf;
-		status = tcp->wait_status;
-		pid = tcp->pid;
 
 		if (WIFSIGNALED(status)) {
 			if (pid == strace_child)
@@ -2771,39 +2678,6 @@ handle_stopped_tcbs(struct tcb *tcp)
 			cleanup();
 			return -1;
 		}
-	} /* for each tcp */
-
-	return 0;
-}
-
-static int
-trace()
-{
-	int rc;
-	struct tcb *tcbs;
-
-	while (nprocs != 0) {
-		/* The loop of "wait for one tracee, serve it, repeat"
-		 * may leave some tracees never served.
-		 * Kernel provides no guarantees of fairness when you have
-		 * many waitable tasks.
-		 * Try strace -f with test/many_looping_threads.c example.
-		 * To fix it, we collect *all* waitable tasks, then handle
-		 * them all, then repeat.
-		 */
-		/* If remembered_pid exists, there is one extra tracee
-		 * "collected" earlier. Do not exit in this case, we need
-		 * to handle it first.
-		 */
-		if (remembered_pid == 0 && interrupted) {
-			return 0;
-		}
-		tcbs = collect_stopped_tcbs();
-		if (!tcbs)
-			break;
-		rc = handle_stopped_tcbs(tcbs);
-		if (rc)
-			return rc;
 	}
 	return 0;
 }






More information about the Strace-devel mailing list