[PATCH 1/3] stop treating waitpid syscall specially

Denys Vlasenko dvlasenk at redhat.com
Mon Feb 9 19:16:52 UTC 2009


Hi folks,

I was talking with Oleg Nesterov and he mentioned an upcoming problem
with PID namespaces. Currently strace tracks and uses PIDs returned
by syscalls like fork, clone et al. in order to track who is
a child of whom. With PID namespaces, this is likely to fall apart,
because what these syscalls return will be different at different
levels of PID container hierarchy.

The idea to fix it is to try to stop tracking pids.

I asked myself "why do we need to know which process is a child
of which *at all*?"

>From code review it looks like strace tries hard to not allow
waitpid syscalls to execute until they have someone waitable for them.
Comments say that otherwise wait will erroneously return ECHILD,
because, allegedly, traced children are not considered to be really
children of their real parent, but they are children of strace.

I think this is not true, at least on Linux, at least on 2.6.
Mabe it was true in dim past (2.2?) and/or on other OSes, but
not on contemporary Linux.
Oleg also said he doesn't think it's true.

So I just tried to (partially) delete this special code
and try what will happen. Well, it seems to be working.
At least "strace -f -oFILE firefox" works.

These patches are not intended for inclusion, at least not
right now. I seek your comments and patch review, and also
am interested in hearing whether this is a viable idea at all.

This is the first patch. It removes special handling of waitXXX
syscalls.
--
vda


diff -d -urpN strace.5/defs.h strace.6/defs.h
--- strace.5/defs.h	2009-02-09 18:02:30.000000000 +0100
+++ strace.6/defs.h	2009-02-09 18:04:25.000000000 +0100
@@ -317,6 +317,7 @@ struct tcb {
 	struct timeval dtime;	/* Delta for system time usage */
 	struct timeval etime;	/* Syscall entry time */
 				/* Support for tracing forked processes */
+//TODO: try to get rid of:
 	struct tcb *parent;	/* Parent of this process */
 	int nchildren;		/* # of traced children */
 	int waitpid;		/* pid(s) this process is waiting for */
@@ -537,7 +538,6 @@ extern int internal_clone P((struct tcb 
 #endif
 extern int internal_fork P((struct tcb *));
 extern int internal_exec P((struct tcb *));
-extern int internal_wait P((struct tcb *, int));
 extern int internal_exit P((struct tcb *));
 
 extern const struct ioctlent *ioctl_lookup P((long));
diff -d -urpN strace.5/process.c strace.6/process.c
--- strace.5/process.c	2009-01-28 20:00:54.000000000 +0100
+++ strace.6/process.c	2009-02-09 16:27:14.000000000 +0100
@@ -2039,95 +2039,6 @@ printwaitn(struct tcb *tcp, int n, int b
 	return 0;
 }
 
-int
-internal_wait(tcp, flagarg)
-struct tcb *tcp;
-int flagarg;
-{
-	int got_kids;
-
-#ifdef TCB_CLONE_THREAD
-	if (tcp->flags & TCB_CLONE_THREAD)
-		/* The children we wait for are our parent's children.  */
-		got_kids = (tcp->parent->nchildren
-			    > tcp->parent->nclone_detached);
-	else
-		got_kids = (tcp->nchildren > tcp->nclone_detached);
-#else
-	got_kids = tcp->nchildren > 0;
-#endif
-
-	if (entering(tcp) && got_kids) {
-		/* There are children that this parent should block for.
-		   But ptrace made us the parent of the traced children
-		   and the real parent will get ECHILD from the wait call.
-
-		   XXX If we attached with strace -f -p PID, then there
-		   may be untraced dead children the parent could be reaping
-		   now, but we make him block.  */
-
-		/* ??? WTA: fix bug with hanging children */
-
-		if (!(tcp->u_arg[flagarg] & WNOHANG)) {
-			/*
-			 * There are traced children.  We'll make the parent
-			 * block to avoid a false ECHILD error due to our
-			 * ptrace having stolen the children.  However,
-			 * we shouldn't block if there are zombies to reap.
-			 * XXX doesn't handle pgrp matches (u_arg[0]==0,<-1)
-			 */
-			struct tcb *child = NULL;
-			if (tcp->nzombies > 0 &&
-			    (tcp->u_arg[0] == -1 ||
-			     (child = pid2tcb(tcp->u_arg[0])) == NULL))
-				return 0;
-			if (tcp->u_arg[0] > 0) {
-				/*
-				 * If the parent waits for a specified child
-				 * PID, then it must get ECHILD right away
-				 * if that PID is not one of its children.
-				 * Make sure that the requested PID matches
-				 * one of the parent's children that we are
-				 * tracing, and don't suspend it otherwise.
-				 */
-				if (child == NULL)
-					child = pid2tcb(tcp->u_arg[0]);
-				if (child == NULL || child->parent != (
-#ifdef TCB_CLONE_THREAD
-					    (tcp->flags & TCB_CLONE_THREAD)
-					    ? tcp->parent :
-#endif
-					    tcp) ||
-				    (child->flags & TCB_EXITING))
-					return 0;
-			}
-			tcp->flags |= TCB_SUSPENDED;
-			tcp->waitpid = tcp->u_arg[0];
-#ifdef TCB_CLONE_THREAD
-			if (tcp->flags & TCB_CLONE_THREAD)
-				tcp->parent->nclone_waiting++;
-#endif
-		}
-	}
-	if (exiting(tcp) && tcp->u_error == ECHILD && got_kids) {
-		if (tcp->u_arg[flagarg] & WNOHANG) {
-			/* We must force a fake result of 0 instead of
-			   the ECHILD error.  */
-			extern int force_result();
-			return force_result(tcp, 0, 0);
-		}
-	}
-	else if (exiting(tcp) && tcp->u_error == 0 && tcp->u_rval > 0 &&
-		 tcp->nzombies > 0 && pid2tcb(tcp->u_rval) == NULL) {
-		/*
-		 * We just reaped a child we don't know about,
-		 * presumably a zombie we already droptcb'd.
-		 */
-		tcp->nzombies--;
-	}
-	return 0;
-}
-
 #ifdef SVR4
 
 int
diff -d -urpN strace.5/syscall.c strace.6/syscall.c
--- strace.5/syscall.c	2009-01-23 17:30:26.000000000 +0100
+++ strace.6/syscall.c	2009-02-09 16:27:27.000000000 +0100
@@ -715,22 +715,6 @@ internal_syscall(struct tcb *tcp)
 	   )
 		return internal_exec(tcp);
 
-	if (   sys_waitpid == func
-	    || sys_wait4 == func
-#if defined(SVR4) || defined(FREEBSD) || defined(SUNOS4)
-	    || sys_wait == func
-#endif
-#ifdef ALPHA
-	    || sys_osf_wait4 == func
-#endif
-	   )
-		return internal_wait(tcp, 2);
-
-#if defined(LINUX) || defined(SVR4)
-	if (sys_waitid == func)
-		return internal_wait(tcp, 3);
-#endif
-
 	return 0;
 }
 






More information about the Strace-devel mailing list