[PATCH] RFC: remove parent/child handling and special handling of exit and wait syscalls

Denys Vlasenko dvlasenk at redhat.com
Fri Mar 6 00:50:13 UTC 2009


This patch was already posted before,
this is just a rediffed version.

I want it to remain in archives.

At the very earliest, it may go into cvs after
the next release, *if* I will be able to convince
Roland it's a good idea at all.

Anyone willing/able to test it on non-Linux OSes
we are supposed to be supporting?
--
vda


diff -urpN strace.2/defs.h strace.9/defs.h
--- strace.2/defs.h	2009-03-06 01:33:29.000000000 +0100
+++ strace.9/defs.h	2009-03-06 01:00:49.000000000 +0100
@@ -317,19 +317,11 @@ struct tcb {
 	struct timeval dtime;	/* Delta for system time usage */
 	struct timeval etime;	/* Syscall entry time */
 				/* Support for tracing forked processes */
-	struct tcb *parent;	/* Parent of this process */
-	int nchildren;		/* # of traced children */
-	int waitpid;		/* pid(s) this process is waiting for */
-	int nzombies;		/* # of formerly traced children now dead */
-#ifdef LINUX
-	int nclone_threads;	/* # of nchildren with CLONE_THREAD */
-	int nclone_detached;	/* # of nchildren with CLONE_DETACHED */
-	int nclone_waiting;	/* clone threads in wait4 (TCB_SUSPENDED) */
-				/* (1st arg of wait4()) */
-#endif
 	long baddr;		/* `Breakpoint' address */
 	long inst[2];		/* Instructions on above */
+#ifdef USE_PROCFS
 	int pfd;		/* proc file descriptor */
+#endif
 #ifdef SVR4
 #ifdef HAVE_MP_PROCFS
 	int pfd_stat;
@@ -352,8 +344,6 @@ struct tcb {
 #define TCB_INUSE	00002	/* This table entry is in use */
 #define TCB_INSYSCALL	00004	/* A system call is in progress */
 #define TCB_ATTACHED	00010	/* Process is not our own child */
-#define TCB_EXITING	00020	/* As far as we know, this process is exiting */
-#define TCB_SUSPENDED	00040	/* Process can not be allowed to resume just now */
 #define TCB_BPTSET	00100	/* "Breakpoint" set after fork(2) */
 #define TCB_SIGTRAPPED	00200	/* Process wanted to block SIGTRAP */
 #define TCB_REPRINT	01000	/* We should reprint this syscall on exit */
@@ -376,9 +366,6 @@ struct tcb {
   || defined(ARM) || defined(MIPS) || defined(BFIN)
 #  define TCB_WAITEXECVE 02000	/* ignore SIGTRAP after exceve */
 # endif
-# define TCB_CLONE_DETACHED 04000 /* CLONE_DETACHED set in creating syscall */
-# define TCB_CLONE_THREAD  010000 /* CLONE_THREAD set in creating syscall */
-# define TCB_GROUP_EXITING 020000 /* TCB_EXITING was exit_group, not _exit */
 # include <sys/syscall.h>
 # ifndef __NR_exit_group
 # /* Hack: Most headers around are too old to have __NR_exit_group.  */
@@ -540,8 +527,6 @@ extern int internal_clone P((struct tcb 
 #endif
 extern int internal_fork P((struct tcb *));
 extern int internal_exec P((struct tcb *));
-extern int internal_wait P((struct tcb *, int));
-extern int internal_exit P((struct tcb *));
 
 extern const struct ioctlent *ioctl_lookup P((long));
 extern const struct ioctlent *ioctl_next_match P((const struct ioctlent *));
diff -urpN strace.2/process.c strace.9/process.c
--- strace.2/process.c	2009-03-06 01:25:17.000000000 +0100
+++ strace.9/process.c	2009-03-06 01:25:24.000000000 +0100
@@ -464,26 +464,6 @@ struct tcb *tcp;
 	return 0;
 }
 
-int
-internal_exit(tcp)
-struct tcb *tcp;
-{
-	if (entering(tcp)) {
-		tcp->flags |= TCB_EXITING;
-#ifdef __NR_exit_group
-# ifdef IA64
-		if (ia32) {
-			if (tcp->scno == 252)
-				tcp->flags |= TCB_GROUP_EXITING;
-		} else
-# endif
-		if (known_scno(tcp) == __NR_exit_group)
-			tcp->flags |= TCB_GROUP_EXITING;
-#endif
-	}
-	return 0;
-}
-
 #ifdef USE_PROCFS
 
 int
@@ -866,22 +846,30 @@ setarg(tcp, argnum)
 int
 internal_clone(struct tcb *tcp)
 {
-	struct tcb *tcpchild;
-	int pid, bpt;
+	int bpt;
 
 	if (!followfork)
 		return 0;
+
 	if (entering(tcp)) {
+		/* On Linux, this changes syscall to clone(),
+		 * and/or inserts CLONE_PTRACE bit to clone flags:
+		 */
 		setbpt(tcp);
 		return 0;
-	} else {
-		bpt = tcp->flags & TCB_BPTSET;
+	}
 
-		if (syserror(tcp)) {
-			if (bpt)
-				clearbpt(tcp);
-			return 0;
-		}
+	bpt = tcp->flags & TCB_BPTSET;
+	if (syserror(tcp)) {
+		if (bpt)
+			clearbpt(tcp);
+		return 0;
+	}
+
+#ifndef CLONE_PTRACE
+	{
+		struct tcb *tcpchild;
+		int pid;
 
 		pid = tcp->u_rval;
 		/* Should not happen, but bugs often cause bogus value here. */
@@ -894,107 +882,34 @@ internal_clone(struct tcb *tcp)
 			return 0;
 		}
 
-#ifdef CLONE_PTRACE		/* See new setbpt code.  */
-		tcpchild = pid2tcb(pid);
-		if (tcpchild != NULL) {
-			/* The child already reported its startup trap
-			   before the parent reported its syscall return.  */
-			if ((tcpchild->flags
-			     & (TCB_STARTUP|TCB_ATTACHED|TCB_SUSPENDED))
-			    != (TCB_STARTUP|TCB_ATTACHED|TCB_SUSPENDED))
-				fprintf(stderr, "\
-[preattached child %d of %d in weird state!]\n",
-					pid, tcp->pid);
-		}
-		else
-#endif
-		{
-			tcpchild = alloctcb(pid);
-		}
-
-#ifndef CLONE_PTRACE
 		/* Attach to the new child */
+		tcpchild = alloctcb(pid);
 		if (ptrace(PTRACE_ATTACH, pid, (char *) 1, 0) < 0) {
 			if (bpt)
 				clearbpt(tcp);
 			perror("PTRACE_ATTACH");
-			fprintf(stderr, "Too late?\n");
 			droptcb(tcpchild);
 			return 0;
 		}
-#endif
-
-		if (bpt)
-			clearbpt(tcp);
-
+//waitpid() for pid here so that parent can't send it signals and whatnot
+//between PTRACE_ATTACH and SIGSTOP?
 		tcpchild->flags |= TCB_ATTACHED;
-		/* Child has BPT too, must be removed on first occasion.  */
+		/* Child has BPT too, need to know how to remove it later. */
 		if (bpt) {
 			tcpchild->flags |= TCB_BPTSET;
 			tcpchild->baddr = tcp->baddr;
 			memcpy(tcpchild->inst, tcp->inst,
 				sizeof tcpchild->inst);
 		}
-		tcpchild->parent = tcp;
-		tcp->nchildren++;
-		if (tcpchild->flags & TCB_SUSPENDED) {
-			/* The child was born suspended, due to our having
-			   forced CLONE_PTRACE.  */
-			if (bpt)
-				clearbpt(tcpchild);
-
-			tcpchild->flags &= ~(TCB_SUSPENDED|TCB_STARTUP);
-			/* TCB_SUSPENDED tasks are not collected by waitpid
-			 * loop, and left stopped. Restart it:
-			 */
-			if (ptrace_restart(PTRACE_SYSCALL, tcpchild, 0) < 0)
-				return -1;
-
-			if (!qflag)
-				fprintf(stderr, "\
-Process %u resumed (parent %d ready)\n",
-					pid, tcp->pid);
-		}
-		else {
-			if (!qflag)
-				fprintf(stderr, "Process %d attached\n", pid);
-		}
 
-#ifdef TCB_CLONE_THREAD
-		{
-			/*
-			 * Save the flags used in this call,
-			 * in case we point TCP to our parent below.
-			 */
-			int call_flags = tcp->u_arg[ARG_FLAGS];
-			if ((tcp->flags & TCB_CLONE_THREAD) &&
-			    tcp->parent != NULL) {
-				/* The parent in this clone is itself a
-				   thread belonging to another process.
-				   There is no meaning to the parentage
-				   relationship of the new child with the
-				   thread, only with the process.  We
-				   associate the new thread with our
-				   parent.  Since this is done for every
-				   new thread, there will never be a
-				   TCB_CLONE_THREAD process that has
-				   children.  */
-				--tcp->nchildren;
-				tcp = tcp->parent;
-				tcpchild->parent = tcp;
-				++tcp->nchildren;
-			}
-			if (call_flags & CLONE_THREAD) {
-				tcpchild->flags |= TCB_CLONE_THREAD;
-				++tcp->nclone_threads;
-			}
-			if (call_flags & CLONE_DETACHED) {
-				tcpchild->flags |= TCB_CLONE_DETACHED;
-				++tcp->nclone_detached;
-			}
-		}
-#endif
+		if (!qflag)
+			fprintf(stderr, "Process %d attached\n", pid);
 	}
+#endif /* !CLONE_PTRACE */
+
+	/* Remove breakpoint in the parent. */
+	if (bpt)
+		clearbpt(tcp);
 	return 0;
 }
 #endif
@@ -1007,9 +922,8 @@ struct tcb *tcp;
 	/* We do special magic with clone for any clone or fork.  */
 	return internal_clone(tcp);
 #else
-
 	struct tcb *tcpchild;
-	int pid;
+	int pid, i, bpt;
 	int follow = 1;
 
 #ifdef SYS_vfork
@@ -1023,92 +937,41 @@ struct tcb *tcp;
 		return 0;
 
 	if (entering(tcp)) {
-		if (setbpt(tcp) < 0)
-			return 0;
+		setbpt(tcp);
+		return 0;
 	}
-	else {
-		int bpt = tcp->flags & TCB_BPTSET;
 
-		if (bpt)
-			clearbpt(tcp);
+	bpt = tcp->flags & TCB_BPTSET;
+	if (bpt)
+		clearbpt(tcp);
 
-		if (syserror(tcp))
-			return 0;
+	if (syserror(tcp))
+		return 0;
 
-		pid = tcp->u_rval;
-		tcpchild = alloctcb(pid);
-#ifdef LINUX
-#ifdef HPPA
-		/* The child must have run before it can be attached. */
-		/* This must be a bug in the parisc kernel, but I havn't
-		 * identified it yet.  Seems to be an issue associated
-		 * with attaching to a process (which sends it a signal)
-		 * before that process has ever been scheduled.  When
-		 * debugging, I started seeing crashes in
-		 * arch/parisc/kernel/signal.c:do_signal(), apparently
-		 * caused by r8 getting corrupt over the dequeue_signal()
-		 * call.  Didn't make much sense though...
-		 */
-		{
-			struct timeval tv;
-			tv.tv_sec = 0;
-			tv.tv_usec = 10000;
-			select(0, NULL, NULL, NULL, &tv);
-		}
-#endif
-		if (ptrace(PTRACE_ATTACH, pid, (char *) 1, 0) < 0) {
-			perror("PTRACE_ATTACH");
-			fprintf(stderr, "Too late?\n");
-			droptcb(tcpchild);
-			return 0;
-		}
-#endif /* LINUX */
-#ifdef SUNOS4
-#ifdef oldway
-		/* The child must have run before it can be attached. */
-		{
-			struct timeval tv;
-			tv.tv_sec = 0;
-			tv.tv_usec = 10000;
-			select(0, NULL, NULL, NULL, &tv);
-		}
-		if (ptrace(PTRACE_ATTACH, pid, (char *)1, 0) < 0) {
-			perror("PTRACE_ATTACH");
-			fprintf(stderr, "Too late?\n");
-			droptcb(tcpchild);
-			return 0;
-		}
-#else /* !oldway */
-		/* Try to catch the new process as soon as possible. */
-		{
-			int i;
-			for (i = 0; i < 1024; i++)
-				if (ptrace(PTRACE_ATTACH, pid, (char *) 1, 0) >= 0)
-					break;
-			if (i == 1024) {
-				perror("PTRACE_ATTACH");
-				fprintf(stderr, "Too late?\n");
-				droptcb(tcpchild);
-				return 0;
-			}
-		}
-#endif /* !oldway */
-#endif /* SUNOS4 */
-		tcpchild->flags |= TCB_ATTACHED;
-		/* Child has BPT too, must be removed on first occasion */
-		if (bpt) {
-			tcpchild->flags |= TCB_BPTSET;
-			tcpchild->baddr = tcp->baddr;
-			memcpy(tcpchild->inst, tcp->inst,
-				sizeof tcpchild->inst);
-		}
-		tcpchild->parent = tcp;
-		tcp->nchildren++;
-		if (!qflag)
-			fprintf(stderr, "Process %d attached\n", pid);
+	pid = tcp->u_rval;
+	tcpchild = alloctcb(pid);
+	/* Try to catch the new process as soon as possible. */
+	for (i = 0; i < 1024; i++)
+		if (ptrace(PTRACE_ATTACH, pid, (char *) 1, 0) >= 0)
+			break;
+	if (i == 1024) {
+		perror("PTRACE_ATTACH");
+		fprintf(stderr, "Too late?\n");
+		droptcb(tcpchild);
+		return 0;
 	}
+	tcpchild->flags |= TCB_ATTACHED;
+	/* Child has BPT too, must be removed on first occasion */
+	if (bpt) {
+		tcpchild->flags |= TCB_BPTSET;
+		tcpchild->baddr = tcp->baddr;
+		memcpy(tcpchild->inst, tcp->inst,
+			sizeof tcpchild->inst);
+	}
+	if (!qflag)
+		fprintf(stderr, "Process %d attached\n", pid);
 	return 0;
-#endif
+#endif /* !LINUX */
 }
 
 #endif /* !USE_PROCFS */
@@ -2018,95 +1881,6 @@ printwaitn(struct tcb *tcp, int n, int b
 	return 0;
 }
 
-int
-internal_wait(tcp, flagarg)
-struct tcb *tcp;
-int flagarg;
-{
-	int got_kids;
-
-#ifdef TCB_CLONE_THREAD
-	if (tcp->flags & TCB_CLONE_THREAD)
-		/* The children we wait for are our parent's children.  */
-		got_kids = (tcp->parent->nchildren
-			    > tcp->parent->nclone_detached);
-	else
-		got_kids = (tcp->nchildren > tcp->nclone_detached);
-#else
-	got_kids = tcp->nchildren > 0;
-#endif
-
-	if (entering(tcp) && got_kids) {
-		/* There are children that this parent should block for.
-		   But ptrace made us the parent of the traced children
-		   and the real parent will get ECHILD from the wait call.
-
-		   XXX If we attached with strace -f -p PID, then there
-		   may be untraced dead children the parent could be reaping
-		   now, but we make him block.  */
-
-		/* ??? WTA: fix bug with hanging children */
-
-		if (!(tcp->u_arg[flagarg] & WNOHANG)) {
-			/*
-			 * There are traced children.  We'll make the parent
-			 * block to avoid a false ECHILD error due to our
-			 * ptrace having stolen the children.  However,
-			 * we shouldn't block if there are zombies to reap.
-			 * XXX doesn't handle pgrp matches (u_arg[0]==0,<-1)
-			 */
-			struct tcb *child = NULL;
-			if (tcp->nzombies > 0 &&
-			    (tcp->u_arg[0] == -1 ||
-			     (child = pid2tcb(tcp->u_arg[0])) == NULL))
-				return 0;
-			if (tcp->u_arg[0] > 0) {
-				/*
-				 * If the parent waits for a specified child
-				 * PID, then it must get ECHILD right away
-				 * if that PID is not one of its children.
-				 * Make sure that the requested PID matches
-				 * one of the parent's children that we are
-				 * tracing, and don't suspend it otherwise.
-				 */
-				if (child == NULL)
-					child = pid2tcb(tcp->u_arg[0]);
-				if (child == NULL || child->parent != (
-#ifdef TCB_CLONE_THREAD
-					    (tcp->flags & TCB_CLONE_THREAD)
-					    ? tcp->parent :
-#endif
-					    tcp) ||
-				    (child->flags & TCB_EXITING))
-					return 0;
-			}
-			tcp->flags |= TCB_SUSPENDED;
-			tcp->waitpid = tcp->u_arg[0];
-#ifdef TCB_CLONE_THREAD
-			if (tcp->flags & TCB_CLONE_THREAD)
-				tcp->parent->nclone_waiting++;
-#endif
-		}
-	}
-	if (exiting(tcp) && tcp->u_error == ECHILD && got_kids) {
-		if (tcp->u_arg[flagarg] & WNOHANG) {
-			/* We must force a fake result of 0 instead of
-			   the ECHILD error.  */
-			extern int force_result();
-			return force_result(tcp, 0, 0);
-		}
-	}
-	else if (exiting(tcp) && tcp->u_error == 0 && tcp->u_rval > 0 &&
-		 tcp->nzombies > 0 && pid2tcb(tcp->u_rval) == NULL) {
-		/*
-		 * We just reaped a child we don't know about,
-		 * presumably a zombie we already droptcb'd.
-		 */
-		tcp->nzombies--;
-	}
-	return 0;
-}
-
 #ifdef SVR4
 
 int
diff -urpN strace.2/strace.c strace.9/strace.c
--- strace.2/strace.c	2009-03-06 01:18:52.000000000 +0100
+++ strace.9/strace.c	2009-03-06 01:15:12.000000000 +0100
@@ -49,16 +49,16 @@
 
 #ifdef LINUX
 # include <asm/unistd.h>
-# if defined __NR_tgkill
-#  define my_tgkill(pid, tid, sig) syscall (__NR_tgkill, (pid), (tid), (sig))
-# elif defined __NR_tkill
-#  define my_tgkill(pid, tid, sig) syscall (__NR_tkill, (tid), (sig))
+# if defined __NR_tkill
+#  define my_tkill(tid, sig) syscall (__NR_tkill, (tid), (sig))
+# elif defined __NR_tgkill
+#  define my_tkill(tid, sig) syscall (__NR_tgkill, -1, (tid), (sig))
 # else
    /* kill() may choose arbitrarily the target task of the process group
       while we later wait on a that specific TID.  PID process waits become
       TID task specific waits for a process under ptrace(2).  */
 #  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
-#  define my_tgkill(pid, tid, sig) kill ((tid), (sig))
+#  define my_tkill(tid, sig) kill ((tid), (sig))
 # endif
 #endif
 
@@ -366,6 +366,22 @@ newoutf(struct tcb *tcp)
 	return 0;
 }
 
+struct tcb *
+pid2tcb(int pid)
+{
+	int i;
+	struct tcb *tcp;
+
+	for (i = 0; i < tcbtabsize; i++) {
+		tcp = tcbtab[i];
+		if (pid && tcp->pid != pid)
+			continue;
+		if (tcp->flags & TCB_INUSE)
+			return tcp;
+	}
+	return NULL;
+}
+
 static void
 startup_attach(void)
 {
@@ -398,14 +414,21 @@ startup_attach(void)
 		}
 	}
 
+	/* Possible states in flags at this point:
+	 * 0: free
+	 * TCB_INUSE: tcb for PROG in "strace PROG" run
+	 * TCB_INUSE+TCB_ATTACHED: tcb for "-p PID1 [-p PID2]..."
+	 * TCB_INUSE+TCB_ATTACHED+TCB_INSYSCALL:
+	 *		other threads of each -p PIDn.
+	 * NB: abusing TCB_INSYSCALL here, don't want to have
+	 *     yet another bit just for this loop.
+	 * NB2: I think TCB_STARTUP is set too in all nonfree entries.
+	 */
 	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
 		tcp = tcbtab[tcbi];
-		if (!(tcp->flags & TCB_INUSE) || !(tcp->flags & TCB_ATTACHED))
+		if ((tcp->flags & (TCB_INUSE|TCB_ATTACHED|TCB_INSYSCALL)) != (TCB_INUSE|TCB_ATTACHED))
 			continue;
-#ifdef LINUX
-		if (tcp->flags & TCB_CLONE_THREAD)
-			continue;
-#endif
+
 		/* Reinitialize the output since it may have changed. */
 		tcp->outf = outf;
 		if (newoutf(tcp) < 0)
@@ -426,25 +449,29 @@ startup_attach(void)
 			sprintf(procdir, "/proc/%d/task", tcp->pid);
 			dir = opendir(procdir);
 			if (dir != NULL) {
-				unsigned int ntid = 0, nerr = 0;
 				struct dirent *de;
+				unsigned ntid = 0;
+				unsigned master_pid = tcbtab[tcbi]->pid;
 				int tid;
+
 				while ((de = readdir(dir)) != NULL) {
 					if (de->d_fileno == 0)
 						continue;
 					tid = atoi(de->d_name);
 					if (tid <= 0)
 						continue;
-					++ntid;
-					if (ptrace(PTRACE_ATTACH, tid, (char *) 1, 0) < 0)
-						++nerr;
-					else if (tid != tcbtab[tcbi]->pid) {
-						tcp = alloctcb(tid);
-						tcp->flags |= TCB_ATTACHED|TCB_CLONE_THREAD|TCB_CLONE_DETACHED;
-						tcbtab[tcbi]->nchildren++;
-						tcbtab[tcbi]->nclone_threads++;
-						tcbtab[tcbi]->nclone_detached++;
-						tcp->parent = tcbtab[tcbi];
+					tcp = pid2tcb(tid); /* may be NULL */
+					if (ptrace(PTRACE_ATTACH, tid, (char *) 1, 0) < 0) {
+						if (tcp) {
+							/* -p PID itself can't be attached */
+							droptcb(tcp);
+						}
+					} else {
+						++ntid;
+						if (tcp == NULL) {
+							tcp = alloctcb(tid);
+							tcp->flags |= TCB_ATTACHED|TCB_INSYSCALL;
+						} /* else: we attached to -p PID */
 					}
 					if (interactive) {
 						sigprocmask(SIG_SETMASK, &empty_set, NULL);
@@ -454,17 +481,15 @@ startup_attach(void)
 					}
 				}
 				closedir(dir);
-				ntid -= nerr;
 				if (ntid == 0) {
 					perror("attach: ptrace(PTRACE_ATTACH, ...)");
-					droptcb(tcp);
 					continue;
 				}
 				if (!qflag) {
 					fprintf(stderr, ntid > 1
 ? "Process %u attached with %u threads - interrupt to quit\n"
 : "Process %u attached - interrupt to quit\n",
-						tcbtab[tcbi]->pid, ntid);
+						master_pid, ntid);
 				}
 				continue;
 			} /* if (opendir worked) */
@@ -496,6 +521,9 @@ startup_attach(void)
 				"Process %u attached - interrupt to quit\n",
 				tcp->pid);
 	}
+	/* Clear our cheat flags :) */
+	for (tcbi = 0; tcbi < tcbtabsize; tcbi++)
+		tcbtab[tcbi]->flags &= ~TCB_INSYSCALL;
 
 	if (interactive)
 		sigprocmask(SIG_SETMASK, &empty_set, NULL);
@@ -796,6 +824,8 @@ main(int argc, char *argv[])
 				fprintf(stderr, "%s: I'm sorry, I can't let you do that, Dave.\n", progname);
 				break;
 			}
+			if (pid2tcb(pid)) /* duplicate */
+				break;
 			tcp = alloc_tcb(pid, 0);
 			tcp->flags |= TCB_ATTACHED;
 			pflag_seen++;
@@ -1005,7 +1035,9 @@ alloc_tcb(int pid, int command_options_p
 			tcp->pid = pid;
 			tcp->flags = TCB_INUSE | TCB_STARTUP;
 			tcp->outf = outf; /* Initialise to current out file */
+#ifdef USE_PROCFS
 			tcp->pfd = -1;
+#endif
 			nprocs++;
 			if (command_options_parsed)
 				newoutf(tcp);
@@ -1303,27 +1335,6 @@ proc_open(struct tcb *tcp, int attaching
 	return 0;
 }
 
-#endif /* USE_PROCFS */
-
-struct tcb *
-pid2tcb(pid)
-int pid;
-{
-	int i;
-	struct tcb *tcp;
-
-	for (i = 0; i < tcbtabsize; i++) {
-		tcp = tcbtab[i];
-		if (pid && tcp->pid != pid)
-			continue;
-		if (tcp->flags & TCB_INUSE)
-			return tcp;
-	}
-	return NULL;
-}
-
-#ifdef USE_PROCFS
-
 static struct tcb *
 pfd2tcb(pfd)
 int pfd;
@@ -1348,45 +1359,11 @@ struct tcb *tcp;
 {
 	if (tcp->pid == 0)
 		return;
-#ifdef TCB_CLONE_THREAD
-	if (tcp->nclone_threads > 0) {
-		/* There are other threads left in this process, but this
-		   is the one whose PID represents the whole process.
-		   We need to keep this record around as a zombie until
-		   all the threads die.  */
-		tcp->flags |= TCB_EXITING;
-		return;
-	}
-#endif
+
 	nprocs--;
 	tcp->pid = 0;
-
-	if (tcp->parent != NULL) {
-		tcp->parent->nchildren--;
-#ifdef TCB_CLONE_THREAD
-		if (tcp->flags & TCB_CLONE_DETACHED)
-			tcp->parent->nclone_detached--;
-		if (tcp->flags & TCB_CLONE_THREAD)
-			tcp->parent->nclone_threads--;
-#endif
-#ifdef TCB_CLONE_DETACHED
-		if (!(tcp->flags & TCB_CLONE_DETACHED))
-#endif
-			tcp->parent->nzombies++;
-#ifdef LINUX
-		/* Update `tcp->parent->parent->nchildren' and the other fields
-		   like NCLONE_DETACHED, only for zombie group leader that has
-		   already reported and been short-circuited at the top of this
-		   function.  The same condition as at the top of DETACH.  */
-		if ((tcp->flags & TCB_CLONE_THREAD) &&
-		    tcp->parent->nclone_threads == 0 &&
-		    (tcp->parent->flags & TCB_EXITING))
-			droptcb(tcp->parent);
-#endif
-		tcp->parent = NULL;
-	}
-
 	tcp->flags = 0;
+#ifdef USE_PROCFS
 	if (tcp->pfd != -1) {
 		close(tcp->pfd);
 		tcp->pfd = -1;
@@ -1400,10 +1377,9 @@ struct tcb *tcp;
 			tcp->pfd_status = -1;
 		}
 #endif /* !FREEBSD */
-#ifdef USE_PROCFS
 		rebuild_pollv(); /* Note, flags needs to be cleared by now.  */
-#endif
 	}
+#endif
 
 	if (outfname && followfork > 1 && tcp->outf)
 		fclose(tcp->outf);
@@ -1411,100 +1387,6 @@ struct tcb *tcp;
 	tcp->outf = 0;
 }
 
-#ifndef USE_PROCFS
-
-static int
-resume(tcp)
-struct tcb *tcp;
-{
-	if (tcp == NULL)
-		return -1;
-
-	if (!(tcp->flags & TCB_SUSPENDED)) {
-		fprintf(stderr, "PANIC: pid %u not suspended\n", tcp->pid);
-		return -1;
-	}
-	tcp->flags &= ~TCB_SUSPENDED;
-#ifdef TCB_CLONE_THREAD
-	if (tcp->flags & TCB_CLONE_THREAD)
-		tcp->parent->nclone_waiting--;
-#endif
-
-	if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0)
-		return -1;
-
-	if (!qflag)
-		fprintf(stderr, "Process %u resumed\n", tcp->pid);
-	return 0;
-}
-
-static int
-resume_from_tcp (struct tcb *tcp)
-{
-	int error = 0;
-	int resumed = 0;
-
-	/* XXX This won't always be quite right (but it never was).
-	   A waiter with argument 0 or < -1 is waiting for any pid in
-	   a particular pgrp, which this child might or might not be
-	   in.  The waiter will only wake up if it's argument is -1
-	   or if it's waiting for tcp->pid's pgrp.  It makes a
-	   difference to wake up a waiter when there might be more
-	   traced children, because it could get a false ECHILD
-	   error.  OTOH, if this was the last child in the pgrp, then
-	   it ought to wake up and get ECHILD.  We would have to
-	   search the system for all pid's in the pgrp to be sure.
-
-	     && (t->waitpid == -1 ||
-		 (t->waitpid == 0 && getpgid (tcp->pid) == getpgid (t->pid))
-		 || (t->waitpid < 0 && t->waitpid == -getpid (t->pid)))
-	*/
-
-	if (tcp->parent &&
-	    (tcp->parent->flags & TCB_SUSPENDED) &&
-	    (tcp->parent->waitpid <= 0 || tcp->parent->waitpid == tcp->pid)) {
-		error = resume(tcp->parent);
-		++resumed;
-	}
-#ifdef TCB_CLONE_THREAD
-	if (tcp->parent && tcp->parent->nclone_waiting > 0) {
-		/* Some other threads of our parent are waiting too.  */
-		unsigned int i;
-
-		/* Resume all the threads that were waiting for this PID.  */
-		for (i = 0; i < tcbtabsize; i++) {
-			struct tcb *t = tcbtab[i];
-			if (t->parent == tcp->parent && t != tcp
-			    && ((t->flags & (TCB_CLONE_THREAD|TCB_SUSPENDED))
-				== (TCB_CLONE_THREAD|TCB_SUSPENDED))
-			    && t->waitpid == tcp->pid) {
-				error |= resume (t);
-				++resumed;
-			}
-		}
-		if (resumed == 0)
-			/* Noone was waiting for this PID in particular,
-			   so now we might need to resume some wildcarders.  */
-			for (i = 0; i < tcbtabsize; i++) {
-				struct tcb *t = tcbtab[i];
-				if (t->parent == tcp->parent && t != tcp
-				    && ((t->flags
-					 & (TCB_CLONE_THREAD|TCB_SUSPENDED))
-					== (TCB_CLONE_THREAD|TCB_SUSPENDED))
-				    && t->waitpid <= 0
-					) {
-					error |= resume (t);
-					break;
-				}
-			}
-	}
-#endif
-
-	return error;
-}
-
-#endif /* !USE_PROCFS */
-
 /* detach traced process; continue with sig
    Never call DETACH twice on the same process as both unattached and
    attached-unstopped processes give the same ESRCH.  For unattached process we
@@ -1518,14 +1400,6 @@ int sig;
 	int error = 0;
 #ifdef LINUX
 	int status, catch_sigstop;
-	struct tcb *zombie = NULL;
-
-	/* If the group leader is lingering only because of this other
-	   thread now dying, then detach the leader as well.  */
-	if ((tcp->flags & TCB_CLONE_THREAD) &&
-	    tcp->parent->nclone_threads == 1 &&
-	    (tcp->parent->flags & TCB_EXITING))
-		zombie = tcp->parent;
 #endif
 
 	if (tcp->flags & TCB_BPTSET)
@@ -1554,15 +1428,11 @@ int sig;
 		/* Shouldn't happen. */
 		perror("detach: ptrace(PTRACE_DETACH, ...)");
 	}
-	else if (my_tgkill((tcp->flags & TCB_CLONE_THREAD ? tcp->parent->pid
-							  : tcp->pid),
-			   tcp->pid, 0) < 0) {
+	else if (my_tkill(tcp->pid, 0) < 0) {
 		if (errno != ESRCH)
 			perror("detach: checking sanity");
 	}
-	else if (!catch_sigstop && my_tgkill((tcp->flags & TCB_CLONE_THREAD
-					      ? tcp->parent->pid : tcp->pid),
-					     tcp->pid, SIGSTOP) < 0) {
+	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
 		if (errno != ESRCH)
 			perror("detach: stopping child");
 	}
@@ -1623,22 +1493,11 @@ int sig;
 	error = ptrace_restart(PTRACE_DETACH, tcp, sig);
 #endif /* SUNOS4 */
 
-#ifndef USE_PROCFS
-	error |= resume_from_tcp (tcp);
-#endif
-
 	if (!qflag)
 		fprintf(stderr, "Process %u detached\n", tcp->pid);
 
 	droptcb(tcp);
 
-#ifdef LINUX
-	if (zombie != NULL) {
-		/* TCP no longer exists therefore you must not detach () it.  */
-		droptcb(zombie);
-	}
-#endif
-
 	return error;
 }
 
@@ -2187,67 +2046,6 @@ trace()
 
 #else /* !USE_PROCFS */
 
-#ifdef TCB_GROUP_EXITING
-/* Handle an exit detach or death signal that is taking all the
-   related clone threads with it.  This is called in three circumstances:
-   SIG == -1	TCP has already died (TCB_ATTACHED is clear, strace is parent).
-   SIG == 0	Continuing TCP will perform an exit_group syscall.
-   SIG == other	Continuing TCP with SIG will kill the process.
-*/
-static int
-handle_group_exit(struct tcb *tcp, int sig)
-{
-	/* We need to locate our records of all the clone threads
-	   related to TCP, either its children or siblings.  */
-	struct tcb *leader = NULL;
-
-	if (tcp->flags & TCB_CLONE_THREAD)
-		leader = tcp->parent;
-	else if (tcp->nclone_detached > 0)
-		leader = tcp;
-
-	if (sig < 0) {
-		if (leader != NULL && leader != tcp
-		 && !(leader->flags & TCB_GROUP_EXITING)
-		 && !(tcp->flags & TCB_STARTUP)
-		) {
-			fprintf(stderr,
-				"PANIC: handle_group_exit: %d leader %d\n",
-				tcp->pid, leader ? leader->pid : -1);
-		}
-		/* TCP no longer exists therefore you must not detach() it.  */
-#ifndef USE_PROCFS
-		resume_from_tcp(tcp);
-#endif
-		droptcb(tcp);	/* Already died.  */
-	}
-	else {
-		/* Mark that we are taking the process down.  */
-		tcp->flags |= TCB_EXITING | TCB_GROUP_EXITING;
-		if (tcp->flags & TCB_ATTACHED) {
-			detach(tcp, sig);
-			if (leader != NULL && leader != tcp)
-				leader->flags |= TCB_GROUP_EXITING;
-		} else {
-			if (ptrace_restart(PTRACE_CONT, tcp, sig) < 0) {
-				cleanup();
-				return -1;
-			}
-			if (leader != NULL) {
-				leader->flags |= TCB_GROUP_EXITING;
-				if (leader != tcp)
-					droptcb(tcp);
-			}
-			/* The leader will report to us as parent now,
-			   and then we'll get to the SIG==-1 case.  */
-			return 0;
-		}
-	}
-
-	return 0;
-}
-#endif
-
 static struct tcb *
 collect_stopped_tcbs(void)
 {
@@ -2364,6 +2162,16 @@ collect_stopped_tcbs(void)
 		if ((tcp = pid2tcb(pid)) == NULL) {
 #ifdef LINUX
 			if (followfork) {
+				int sig;
+
+				if (WIFEXITED(status)) {
+					fprintf(stderr, "unexpected PID %u: exited %d\n", pid, WEXITSTATUS(status));
+					continue;
+				}
+				if (!WIFSTOPPED(status) && !WIFSIGNALED(status)) {
+					fprintf(stderr, "unexpected PID %u: strange status %x\n", pid, status);
+					continue;
+				}
 				/* This is needed to go with the CLONE_PTRACE
 				   changes in process.c/util.c: we might see
 				   the child's initial trap before we see the
@@ -2374,48 +2182,59 @@ collect_stopped_tcbs(void)
 				   child so that we know how to do clearbpt
 				   in the child.  */
 				tcp = alloctcb(pid);
-				tcp->flags |= TCB_ATTACHED | TCB_SUSPENDED;
+				tcp->flags |= TCB_ATTACHED;
+				if (WIFSTOPPED(status)) {
+					if (!qflag)
+						fprintf(stderr, "Process %d attached\n", pid);
+					printleader(tcp);
+					sig = WSTOPSIG(status);
+					if (sig == SIGSTOP) {
+						tprintf("+++ new process attached +++");
+						sig = 0;
+					} else {
+//NB: if CLONE_PTRACE works as if we did PTRACE_ATTACH to the new process,
+//it may be subject to the same idiosyncrasy:
+//if process gets signaled before tracer (we) wait'ed for it,
+//we may get that other signal _before_ SIGSTOP!
+//Since now we do not block parent at clone() exit,
+//even parent might manage to send a signal to new child...
+//Does it actually happen? Do we need to handle it here?
+						tprintf("+++ new process attached (%s) +++", signame(WSTOPSIG(status)));
+					}
+					printtrailer();
+					if (ptrace_restart(PTRACE_SYSCALL, tcp, sig) < 0)
+						fprintf(stderr, "cannot trace PID %u: %s\n", pid, strerror(errno));
+					continue;
+				}
+				/* WIFSIGNALED.
+				 * Happens when new CLONE_PTRACEd process
+				 * was killed even before SIGSTOP.
+				 * See test/sigkill_rain.c
+				 */
 				if (!qflag)
-					fprintf(stderr, "\
-Process %d attached (waiting for parent)\n",
-						pid);
-			}
-			else
-				/* This can happen if a clone call used
-				   CLONE_PTRACE itself.  */
-#endif /* LINUX */
-			{
-				fprintf(stderr, "unknown pid: %u\n", pid);
-				if (WIFSTOPPED(status))
-					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
-				exit(1);
-			}
+					fprintf(stderr, "Process %d dead on arrival\n", pid);
+				/* Need to show "killed by", so save tcp. */
+				goto remember;
+			}
+			/* No -f, but we see an unknown pid?!
+			 * This can happen if clone call used CLONE_PTRACE
+			 * explicitly (not injected by strace).
+			 */
+//TODO: hole! processes can detect stracing this way.
+//remove CLONE_PTRACE bit on every clone() if no -f was given.
+#endif
+			fprintf(stderr, "unknown pid: %u\n", pid);
+			if (WIFSTOPPED(status))
+				ptrace(PTRACE_CONT, pid, (char *) 1, 0);
+			_exit(1);
 		}
 
 #ifdef LINUX
+ remember:
 		if (cflag) {
 			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
 			tcp->stime = ru.ru_stime;
 		}
-#endif
-		if (tcp->flags & TCB_SUSPENDED) {
-			/*
-			 * Apparently, doing any ptrace() call on a stopped
-			 * process, provokes the kernel to report the process
-			 * status again on a subsequent wait(), even if the
-			 * process has not been actually restarted.
-			 * Since we have inspected the arguments of suspended
-			 * processes we end up here testing for this case.
-			 *
-			 * We also end up here when we catch new pid of
-			 * CLONE_PTRACEd process. Do not process/restart it
-			 * until we see corresponding clone() syscall exit
-			 * in its parent.
-			 */
-			continue;
-		}
-
-#ifdef LINUX
 		/* So far observed only on RHEL5 ia64, but I imagine this
 		 * can legitimately happen elsewhere.
 		 * If we waited and got a stopped task notification,
@@ -2486,39 +2305,24 @@ handle_stopped_tcbs(struct tcb *tcp)
 					"");
 				printtrailer();
 			}
-#ifdef TCB_GROUP_EXITING
-			handle_group_exit(tcp, -1);
-#else
 			droptcb(tcp);
-#endif
 			continue;
 		}
 		if (WIFEXITED(status)) {
 			if (pid == strace_child)
 				exit_code = WEXITSTATUS(status);
-			if (debug)
-				fprintf(stderr, "pid %u exited with %d\n", pid, WEXITSTATUS(status));
-			if ((tcp->flags & (TCB_ATTACHED|TCB_STARTUP)) == TCB_ATTACHED
-#ifdef TCB_GROUP_EXITING
-			    && !(tcp->parent && (tcp->parent->flags & TCB_GROUP_EXITING))
-			    && !(tcp->flags & TCB_GROUP_EXITING)
-#endif
-			) {
-				fprintf(stderr,
-					"PANIC: attached pid %u exited with %d\n",
-					pid, WEXITSTATUS(status));
-			}
 			if (tcp == tcp_last) {
 				if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
 					tprintf(" <unfinished ... exit status %d>\n",
 						WEXITSTATUS(status));
 				tcp_last = NULL;
+			} else if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
+				printleader(tcp);
+				tprintf("+++ exited with %d +++",
+					WEXITSTATUS(status));
+				printtrailer();
 			}
-#ifdef TCB_GROUP_EXITING
-			handle_group_exit(tcp, -1);
-#else
 			droptcb(tcp);
-#endif
 			continue;
 		}
 		if (!WIFSTOPPED(status)) {
@@ -2692,21 +2496,10 @@ handle_stopped_tcbs(struct tcb *tcp)
 					strsignal(WSTOPSIG(status)), pc, addr);
 				printtrailer();
 			}
-			if (((tcp->flags & TCB_ATTACHED) ||
-			     tcp->nclone_threads > 0) &&
-				!sigishandled(tcp, WSTOPSIG(status))) {
-#ifdef TCB_GROUP_EXITING
-				handle_group_exit(tcp, WSTOPSIG(status));
-#else
-				detach(tcp, WSTOPSIG(status));
-#endif
-				continue;
-			}
 			if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
 				cleanup();
 				return -1;
 			}
-			tcp->flags &= ~TCB_SUSPENDED;
 			continue;
 		}
 		/* we handled the STATUS, we are permitted to interrupt now. */
@@ -2738,27 +2531,6 @@ handle_stopped_tcbs(struct tcb *tcp)
 			}
 			continue;
 		}
-		if (tcp->flags & TCB_EXITING) {
-#ifdef TCB_GROUP_EXITING
-			if (tcp->flags & TCB_GROUP_EXITING) {
-				if (handle_group_exit(tcp, 0) < 0)
-					return -1;
-				continue;
-			}
-#endif
-			if (tcp->flags & TCB_ATTACHED)
-				detach(tcp, 0);
-			else if (ptrace_restart(PTRACE_CONT, tcp, 0) < 0) {
-				cleanup();
-				return -1;
-			}
-			continue;
-		}
-		if (tcp->flags & TCB_SUSPENDED) {
-			if (!qflag)
-				fprintf(stderr, "Process %u suspended\n", pid);
-			continue;
-		}
 	tracing:
 		if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
 			cleanup();
diff -urpN strace.2/syscall.c strace.9/syscall.c
--- strace.2/syscall.c	2009-02-24 14:02:51.000000000 +0100
+++ strace.9/syscall.c	2009-02-24 14:02:26.000000000 +0100
@@ -688,9 +688,6 @@ internal_syscall(struct tcb *tcp)
 
 	func = sysent[tcp->scno].sys_func;
 
-	if (sys_exit == func)
-		return internal_exit(tcp);
-
 	if (   sys_fork == func
 #if defined(FREEBSD) || defined(LINUX) || defined(SUNOS4)
 	    || sys_vfork == func
@@ -716,22 +713,6 @@ internal_syscall(struct tcb *tcp)
 	   )
 		return internal_exec(tcp);
 
-	if (   sys_waitpid == func
-	    || sys_wait4 == func
-#if defined(SVR4) || defined(FREEBSD) || defined(SUNOS4)
-	    || sys_wait == func
-#endif
-#ifdef ALPHA
-	    || sys_osf_wait4 == func
-#endif
-	   )
-		return internal_wait(tcp, 2);
-
-#if defined(LINUX) || defined(SVR4)
-	if (sys_waitid == func)
-		return internal_wait(tcp, 3);
-#endif
-
 	return 0;
 }
 






More information about the Strace-devel mailing list