[PATCH] check process death if ptrace() fails, v2

Denys Vlasenko dvlasenk at redhat.com
Thu Dec 11 17:49:32 UTC 2008


On Wed, 2008-12-10 at 04:33 -0800, Roland McGrath wrote:
> > +extern long secure_ptrace P((int request, struct tcb *tcp, void *addr, void *data));
> > +extern long secure_ptrace_SYSCALL P((struct tcb *tcp, int sig));
> > +extern long secure_ptrace_CONT P((struct tcb *tcp, int sig));
> > +extern long secure_ptrace_DETACH P((struct tcb *tcp, int sig));
> 
> These are not good names, and there is no reason to have several different
> calls.

> Just have one do_ptrace(), pass it the ptrace args and also a
> string to use in case of error messages.  (This might not actually get
> used, or maybe it gets stored for later.  But make a clean interface for
> the function that gives one in case we want it.)

It will have five parameters then. ptrace() as-is with four params
looks unreadable. I would like to try and see whether we can avoid
passing that string just yet.

I propose to have

1. do_ptrace() as generic ptrace wrapper
(it will be used so far only in upeek), and

2. ptrace_restart(PTRACE_xxx, tcp, sig) for ptrace actions which
restart tracee (CONT/SYSCALL/DETACH) - this allows us
to have less params, remove ugly casts of sig,
and it's also good to hint that we restart (unpause) tracee
by calling it.

Nice thing we may do is to not return error if ptrace failed
with ESRCH - this simplifies code in callers which generally
should ignore ESRCH, but not other errors.
Interested callers may still check errno if they want to see ESRCH.

It also prints error message on !ESRCH errors.
Callers need not do it.

> > --- strace.1/process.c	2008-12-04 19:16:29.000000000 +0100
> > +++ strace.2/process.c	2008-12-09 19:00:00.000000000 +0100
> > @@ -964,7 +964,8 @@ struct tcb *tcp;
> >  
> >  			tcpchild->flags &= ~(TCB_SUSPENDED|TCB_STARTUP);
> >  			if (ptrace(PTRACE_SYSCALL, pid, (char *) 1, 0) < 0) {
> > -				perror("resume: ptrace(PTRACE_SYSCALL, ...)");
> > +				if (errno != ESRCH) /* if not killed meanwhile */
> > +					perror("resume: ptrace(PTRACE_SYSCALL, ...)");
> >  				return -1;
> >  			}
> 
> This doesn't need a special case.  Just use do_ptrace on tcpchild.

Ok, replaced with ptrace_restart(PTRACE_SYSCALL).


> > @@ -2175,8 +2168,7 @@ handle_group_exit(struct tcb *tcp, int s
> >  		  	if (leader != NULL && leader != tcp)
> >  				leader->flags |= TCB_GROUP_EXITING;
> >  		}
> > -		else if (ptrace(PTRACE_CONT, tcp->pid, (char *) 1, sig) < 0) {
> > -			perror("strace: ptrace(PTRACE_CONT, ...)");
> > +		else if (secure_ptrace_CONT(tcp, sig) < 0) {
> >  			cleanup();
> >  			return -1;
> >  		}
> 
> This shouldn't call cleanup() any more.  The purpose of storing of ptrace
> errors is that we can deal with a failure of just this PID without bailing
> out entirely.  The same goes for other places you touched similarly.

Well, it shouldn't call cleanup() _on ESRC_, but we do not want
to hide other errors (at least not in this patch, which only wants
to make SIGKILL detection better). Since I am making ptrace_restart()
not return error on ESRCH, it's easy.

Actually, the code around this place is somewhat hard to read:

                if (tcp->flags & TCB_ATTACHED) {
...
                }
                else if (ptrace(PTRACE_CONT, tcp->pid, (char *) 1, sig) < 0) {
                        perror("strace: ptrace(PTRACE_CONT, ...)");
                        cleanup();
                        return -1;
                }
                else {
                        if (leader != NULL)
                                leader->flags |= TCB_GROUP_EXITING;
                        if (leader != NULL && leader != tcp)
                                droptcb(tcp);
                        return 0;
                }

I am rewriting it this way (the logic is the same,
it is just less confusing now):

                if (tcp->flags & TCB_ATTACHED) {
...
                } else {
                        if (ptrace_restart(PTRACE_CONT, tcp, sig) < 0) {
                                cleanup();
                                return -1;
                        }
                        if (leader != NULL) {
                                leader->flags |= TCB_GROUP_EXITING;
                                if (leader != tcp)
                                        droptcb(tcp);
                        }
                        return 0;
                }


> >  	tracing:
> > -		if (ptrace(PTRACE_SYSCALL, pid, (char *) 1, 0) < 0) {
> > -			perror("trace: ptrace(PTRACE_SYSCALL, ...)");
> > +		/* ESRCH: task is dead or not stopped, we will handle it
> > +		 * at waitpid. Otherwise we are confused and bail out. */
> > +		if (secure_ptrace_SYSCALL(tcp, 0) < 0 && errno != ESRCH) {
> >  			cleanup();
> >  			return -1;
> 
> The ESRCH checks scattered around are not a good idea.

Dealt with in ptrace_restart() now.


> > @@ -2575,6 +2563,11 @@ struct tcb *tcp;
> >  	if (tcp_last && (!outfname || followfork < 2 || tcp_last == tcp)) {
> >  		tcp_last->flags |= TCB_REPRINT;
> >  		tprintf(" <unfinished ...>\n");
> > +	} else if (tcp_last->ptrace_errno) {
> > +		if (tcp_last->flags & TCB_INSYSCALL)
> > +			tprintf(" <unavailable>) =");
> > +		tprintf(" ? <unavailable>\n");
> > +		tcp_last->ptrace_errno = 0;
> 
> Printing a needed ")" must follow the example in trace_syscall:
> 
> 		tprintf(") ");
> 		tabto(acolumn);
> 
> before the "= ..." part.
> 
> But anyway, this is not quite the logic needed.  As you can see in
> trace_syscall, there is never a ")" printed that's not followed by a "= ...".

I don't do it either.

> So it can't be right to conditionalize the ") =" and not the "...".
> That just doesn't make sense.
> 
> Let's think through all the cases where there might have been a stored
> ptrace error.
> 
> 1. Error inside get_scno/syscall_fixup/syscall_enter.  That is, PID1 had a
>    syscall-entry stop, then ptrace failed to read the registers to get the
>    syscall number and args.
> 
>    We should fix trace_syscall so that it sets TCB_INSYSCALL for this case,
>    making it consistent that it's always set after the entry stop was seen
>    no matter what.  Otherwise recovering from this case would leave the
>    state confused.

>    Since we might not even have got the syscall number, we don't have any
>    info to print. 


I implemented this in the new patch below. "strace true" worked on my machine, will test on ia64 with SIGKILL now.

Please review the patch.
--
vda




diff -d -urpN strace.1/defs.h strace.2/defs.h
--- strace.1/defs.h	2008-12-11 15:32:06.000000000 +0100
+++ strace.2/defs.h	2008-12-11 16:04:09.000000000 +0100
@@ -336,6 +336,7 @@ struct tcb {
 	prstatus_t status;	/* procfs status structure */
 #endif
 #endif
+	int ptrace_errno;
 #ifdef FREEBSD
 	struct procfs_status status;
 	int pfd_reg;
@@ -466,6 +467,8 @@ extern void set_overhead P((int));
 extern void qualify P((char *));
 extern int get_scno P((struct tcb *));
 extern long known_scno P((struct tcb *));
+extern long do_ptrace P((int request, struct tcb *tcp, void *addr, void *data));
+extern int ptrace_restart P((int request, struct tcb *tcp, int sig));
 extern int trace_syscall P((struct tcb *));
 extern int count_syscall P((struct tcb *, struct timeval *));
 extern void printxval P((const struct xlat *, int, const char *));
diff -d -urpN strace.1/process.c strace.2/process.c
--- strace.1/process.c	2008-12-11 15:32:06.000000000 +0100
+++ strace.2/process.c	2008-12-11 15:40:41.000000000 +0100
@@ -918,10 +918,8 @@ struct tcb *tcp;
 				clearbpt(tcpchild);
 
 			tcpchild->flags &= ~(TCB_SUSPENDED|TCB_STARTUP);
-			if (ptrace(PTRACE_SYSCALL, pid, (char *) 1, 0) < 0) {
-				perror("resume: ptrace(PTRACE_SYSCALL, ...)");
+			if (ptrace_restart(PTRACE_SYSCALL, tcpchild, 0) < 0)
 				return -1;
-			}
 
 			if (!qflag)
 				fprintf(stderr, "\
diff -d -urpN strace.1/strace.c strace.2/strace.c
--- strace.1/strace.c	2008-12-11 15:32:06.000000000 +0100
+++ strace.2/strace.c	2008-12-11 17:55:01.000000000 +0100
@@ -1358,10 +1358,8 @@ struct tcb *tcp;
 		tcp->parent->nclone_waiting--;
 #endif
 
-	if (ptrace(PTRACE_SYSCALL, tcp->pid, (char *) 1, 0) < 0) {
-		perror("resume: ptrace(PTRACE_SYSCALL, ...)");
+	if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0)
 		return -1;
-	}
 
 	if (!qflag)
 		fprintf(stderr, "Process %u resumed\n", tcp->pid);
@@ -1533,21 +1531,14 @@ int sig;
 				break;
 			}
 			if (WSTOPSIG(status) == SIGSTOP) {
-				if ((error = ptrace(PTRACE_DETACH,
-				    tcp->pid, (char *) 1, sig)) < 0) {
-					if (errno != ESRCH)
-						perror("detach: ptrace(PTRACE_DETACH, ...)");
-					/* I died trying. */
-				}
+				ptrace_restart(PTRACE_DETACH, tcp, sig);
 				break;
 			}
-			if ((error = ptrace(PTRACE_CONT, tcp->pid, (char *) 1,
-			    WSTOPSIG(status) == SIGTRAP ?
-			    0 : WSTOPSIG(status))) < 0) {
-				if (errno != ESRCH)
-					perror("detach: ptrace(PTRACE_CONT, ...)");
+			error = ptrace_restart(PTRACE_CONT, tcp,
+					WSTOPSIG(status) == SIGTRAP ? 0
+					: WSTOPSIG(status));
+			if (error < 0)
 				break;
-			}
 		}
 #endif /* LINUX */
 
@@ -1556,8 +1547,7 @@ int sig;
 	if (sig && kill(tcp->pid, sig) < 0)
 		perror("detach: kill");
 	sig = 0;
-	if ((error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, sig)) < 0)
-		perror("detach: ptrace(PTRACE_DETACH, ...)");
+	error = ptrace_restart(PTRACE_DETACH, tcp, sig);
 #endif /* SUNOS4 */
 
 #ifndef USE_PROCFS
@@ -2160,17 +2150,16 @@ handle_group_exit(struct tcb *tcp, int s
 			detach(tcp, sig);
 		  	if (leader != NULL && leader != tcp)
 				leader->flags |= TCB_GROUP_EXITING;
-		}
-		else if (ptrace(PTRACE_CONT, tcp->pid, (char *) 1, sig) < 0) {
-			perror("strace: ptrace(PTRACE_CONT, ...)");
-			cleanup();
-			return -1;
-		}
-		else {
-			if (leader != NULL)
+		} else {
+			if (ptrace_restart(PTRACE_CONT, tcp, sig) < 0) {
+				cleanup();
+				return -1;
+			}
+			if (leader != NULL) {
 				leader->flags |= TCB_GROUP_EXITING;
-			if (leader != NULL && leader != tcp)
-				droptcb(tcp);
+				if (leader != tcp)
+					droptcb(tcp);
+			}
 			/* The leader will report to us as parent now,
 			   and then we'll get to the SIG==-1 case.  */
 			return 0;
@@ -2411,9 +2400,7 @@ Process %d attached (waiting for parent)
 				 * Hope we are back in control now.
 				 */
 				tcp->flags &= ~(TCB_INSYSCALL | TCB_SIGTRAPPED);
-				if (ptrace(PTRACE_SYSCALL,
-						pid, (char *) 1, 0) < 0) {
-					perror("trace: ptrace(PTRACE_SYSCALL, ...)");
+				if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
 					cleanup();
 					return -1;
 				}
@@ -2460,9 +2447,7 @@ Process %d attached (waiting for parent)
 #endif
 				continue;
 			}
-			if (ptrace(PTRACE_SYSCALL, pid, (char *) 1,
-				   WSTOPSIG(status)) < 0) {
-				perror("trace: ptrace(PTRACE_SYSCALL, ...)");
+			if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
 				cleanup();
 				return -1;
 			}
@@ -2472,7 +2457,7 @@ Process %d attached (waiting for parent)
 		/* we handled the STATUS, we are permitted to interrupt now. */
 		if (interrupted)
 			return 0;
-		if (trace_syscall(tcp) < 0) {
+		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
 			if (tcp->flags & TCB_ATTACHED)
 				detach(tcp, 0);
 			else {
@@ -2492,8 +2477,7 @@ Process %d attached (waiting for parent)
 #endif
 			if (tcp->flags & TCB_ATTACHED)
 				detach(tcp, 0);
-			else if (ptrace(PTRACE_CONT, pid, (char *) 1, 0) < 0) {
-				perror("strace: ptrace(PTRACE_CONT, ...)");
+			else if (ptrace_restart(PTRACE_CONT, tcp, 0) < 0) {
 				cleanup();
 				return -1;
 			}
@@ -2505,8 +2489,7 @@ Process %d attached (waiting for parent)
 			continue;
 		}
 	tracing:
-		if (ptrace(PTRACE_SYSCALL, pid, (char *) 1, 0) < 0) {
-			perror("trace: ptrace(PTRACE_SYSCALL, ...)");
+		if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
 			cleanup();
 			return -1;
 		}
@@ -2554,9 +2537,18 @@ void
 printleader(tcp)
 struct tcb *tcp;
 {
-	if (tcp_last && (!outfname || followfork < 2 || tcp_last == tcp)) {
-		tcp_last->flags |= TCB_REPRINT;
-		tprintf(" <unfinished ...>\n");
+	if (tcp_last) {
+		if (tcp_last->ptrace_errno) {
+			if (tcp_last->flags & TCB_INSYSCALL) {
+				tprintf(" <unavailable>)");
+				tabto(acolumn);
+			}
+			tprintf("= ? <unavailable>\n");
+			tcp_last->ptrace_errno = 0;
+		} else if (!outfname || followfork < 2 || tcp_last == tcp) {
+			tcp_last->flags |= TCB_REPRINT;
+			tprintf(" <unfinished ...>\n");
+		}
 	}
 	curcol = 0;
 	if ((followfork == 1 || pflag_seen > 1) && outfname)
diff -d -urpN strace.1/syscall.c strace.2/syscall.c
--- strace.1/syscall.c	2008-12-11 15:32:06.000000000 +0100
+++ strace.2/syscall.c	2008-12-11 18:41:27.000000000 +0100
@@ -2278,28 +2278,27 @@ trace_syscall(struct tcb *tcp)
 {
 	int sys_res;
 	struct timeval tv;
-	int res;
-
-	/* Measure the exit time as early as possible to avoid errors. */
-	if (dtime && (tcp->flags & TCB_INSYSCALL))
-		gettimeofday(&tv, NULL);
-
-	res = get_scno(tcp);
-	if (res != 1)
-		return res;
-
-	res = syscall_fixup(tcp);
-	if (res != 1)
-		return res;
+	int res, scno_good;
 
 	if (tcp->flags & TCB_INSYSCALL) {
 		long u_error;
-		res = get_error(tcp);
-		if (res != 1)
-			return res;
 
-		internal_syscall(tcp);
-		if (tcp->scno >= 0 && tcp->scno < nsyscalls &&
+		/* Measure the exit time as early as possible to avoid errors. */
+		if (dtime)
+			gettimeofday(&tv, NULL);
+
+		scno_good = res = get_scno(tcp);
+		if (res == 1) {
+			res = syscall_fixup(tcp);
+			if (res != 1)
+				return res;
+		}
+		if (res == 1)
+			res = get_error(tcp);
+		if (res == 1)
+			internal_syscall(tcp);
+
+		if (res == 1 && tcp->scno >= 0 && tcp->scno < nsyscalls &&
 		    !(qual_flags[tcp->scno] & QUAL_TRACE)) {
 			tcp->flags &= ~TCB_INSYSCALL;
 			return 0;
@@ -2308,7 +2307,9 @@ trace_syscall(struct tcb *tcp)
 		if (tcp->flags & TCB_REPRINT) {
 			printleader(tcp);
 			tprintf("<... ");
-			if (tcp->scno >= nsyscalls || tcp->scno < 0)
+			if (scno_good != 1)
+				tprintf("????" "(");
+			else if (tcp->scno >= nsyscalls || tcp->scno < 0)
 				tprintf("syscall_%lu", tcp->scno);
 			else
 				tprintf("%s", sysent[tcp->scno].sys_name);
@@ -2318,6 +2319,12 @@ trace_syscall(struct tcb *tcp)
 		if (cflag)
 			return count_syscall(tcp, &tv);
 
+		if (res != 1) {
+			tprintf(") ");
+			tabto(acolumn);
+			return res;
+		}
+
 		if (tcp->scno >= nsyscalls || tcp->scno < 0
 		    || (qual_flags[tcp->scno] & QUAL_RAW))
 			sys_res = printargs(tcp);
@@ -2420,9 +2427,32 @@ trace_syscall(struct tcb *tcp)
 	}
 
 	/* Entering system call */
-	res = syscall_enter(tcp);
-	if (res != 1)
+	scno_good = res = get_scno(tcp);
+	if (res == 1) {
+		res = syscall_fixup(tcp);
+		if (res != 1)
+			return res;
+	}
+	if (res == 1)
+		res = syscall_enter(tcp);
+
+	if (res != 1) {
+		printleader(tcp);
+		tcp->flags &= ~TCB_REPRINT;
+		tcp_last = tcp;
+		if (scno_good != 1)
+			tprintf("????" "(");
+		else if (tcp->scno >= nsyscalls || tcp->scno < 0)
+			tprintf("syscall_%lu(", tcp->scno);
+		else
+			tprintf("%s(", sysent[tcp->scno].sys_name);
+		/*
+		 * " <unavailable>" will be added later by the code which
+		 * detects ptrace errors.
+		 */
+		tcp->flags |= TCB_INSYSCALL;
 		return res;
+	}
 
 	switch (known_scno(tcp)) {
 #ifdef SYS_socket_subcall
diff -d -urpN strace.1/util.c strace.2/util.c
--- strace.1/util.c	2008-12-11 15:32:06.000000000 +0100
+++ strace.2/util.c	2008-12-11 18:45:34.000000000 +0100
@@ -241,6 +241,69 @@ xlookup(const struct xlat *xlat, int val
 }
 
 /*
+ * Generic ptrace wrapper which tracks ESRCH errors
+ * by setting tcp->ptrace_errno to it.
+ *
+ * We assume that ESRCH indicates likely process death (SIGKILL?),
+ * modulo bugs where process somehow ended up not stopped.
+ * Unfortunately kernel uses ESRCH for that case too. Oh well.
+ *
+ * Currently used by upeek() only.
+ * TODO: use this in all other ptrace() calls while decoding.
+ */
+long
+do_ptrace(int request, struct tcb *tcp, void *addr, void *data)
+{
+	long l;
+
+//	if (tcp->ptrace_errno) {
+//		errno = tcp->ptrace_errno;
+//		return -1;
+//	}
+//
+	errno = 0;
+	l = ptrace(request, tcp->pid, addr, data);
+	/* Non-ESRCH errors might be our invalid reg/mem accesses,
+	 * we do not trip on them */
+	if (errno == ESRCH)
+		tcp->ptrace_errno = ESRCH;
+	return l;
+}
+
+/*
+ * Used when we want to unblock stopped traced process.
+ * Should be only used with PTRACE_CONT, PTRACE_DETACH and PTRACE_SYSCALL.
+ * Returns 0 on success or if error was ESRCH
+ * (presumably process was killed while we talk to it).
+ * Otherwise prints error message and returns -1.
+ */
+int
+ptrace_restart(int op, struct tcb *tcp, int sig)
+{
+	long l;
+
+	if (tcp->ptrace_errno) {
+		errno = tcp->ptrace_errno;
+		return -1;
+	}
+
+	errno = 0;
+	l = ptrace(op, tcp->pid, (void *) 1, (void *) (long) sig);
+	tcp->ptrace_errno = errno;
+	/* ESRCH: probably SIGKILLed, don't report. Otherwise complain.  */
+	if (tcp->ptrace_errno && tcp->ptrace_errno != ESRCH) {
+		const char *msg = "SYSCALL";
+		if (op == PTRACE_CONT)
+			msg = "CONT";
+		if (op == PTRACE_DETACH)
+			msg = "DETACH";
+		fprintf(stderr, "strace: ptrace(PTRACE_%s,1,%d): %s\n",
+				msg, sig, strerror(tcp->ptrace_errno));
+	}
+	return -((tcp->ptrace_errno != ESRCH) && (l != 0));
+}
+
+/*
  * Print entry in struct xlat table, if there.
  */
 void
@@ -1035,11 +1098,13 @@ long *res;
 	}
 #endif /* SUNOS4_KERNEL_ARCH_KLUDGE */
 	errno = 0;
-	val = ptrace(PTRACE_PEEKUSER, tcp->pid, (char *) off, 0);
+	val = do_ptrace(PTRACE_PEEKUSER, tcp, (char *) off, 0);
 	if (val == -1 && errno) {
-		char buf[60];
-		sprintf(buf,"upeek: ptrace(PTRACE_PEEKUSER,%d,%lu,0)", tcp->pid, off);
-		perror(buf);
+		if (errno != ESRCH) {
+			char buf[60];
+			sprintf(buf,"upeek: ptrace(PTRACE_PEEKUSER,%d,%lu,0)", tcp->pid, off);
+			perror(buf);
+		}
 		return -1;
 	}
 	*res = val;






More information about the Strace-devel mailing list