[PATCH] strace.c: split trace() into next_event()/dispatch_event()

Wed May 31 15:58:07 UTC 2017

On Mon, May 15, 2017 at 01:42:40AM +0300, Victor Krapivensky wrote:
> This is a preparation needed to implement a pull-style API for LuaJIT.

I'm very much in favour of this change, it makes the control flow easier
to understand.  Technically, this patch looks correct, although I've spend
a lot of my reviewing time to get to this conclusion.  Next time when you
do refactoring, try making the change more obvious. :)

> -/* Returns true iff the main trace loop has to continue. */
> -static bool
> -trace(void)
> +enum trace_event {
> +	/* Break the main loop. */
> +	TE_BREAK,
> +
> +	/* Call next_event() again. */
> +	TE_AGAIN,

I suggest renaming TE_AGAIN to TE_NEXT.

> +	/* Restart the tracee with signal 0 and call next_event() again. */
> +	TE_AGAIN_RESTART,

I suggest renaming TE_AGAIN_RESTART to TE_RESTART.

> +	/*
> +	 * For all the events below, current_tcp is set to current tracee's tcb.
> +	 * All the suggested actions imply that you want to continue the tracing of the current
> +	 * tracee; alternatively, you can detach it.
> +	 */
> +
> +	/*
> +	 * Tracee is going to perform execve().
> +	 * Restart the tracee with signal 0.
> +	 */
> +	TE_STOP_BEFORE_EXECVE,
> +
> +	/*
> +	 * Tracee is going to terminate.
> +	 * Restart the tracee with signal 0.
> +	 */
> +	TE_STOP_BEFORE_EXIT,
> +
> +	/*
> +	 * Tracee was killed by a signal with number WTERMSIG(*pstatus).
> +	 */
> +	TE_SIGNALLED,
> +
> +	/*
> +	 * Tracee exited with status WEXITSTATUS(*pstatus).
> +	 */
> +	TE_EXITED,
> +
> +	/*
> +	 * Tracee was stopped by a signal with number WSTOPSIG(*pstatus).
> +	 * Restart the tracee with that signal number.
> +	 */
> +	TE_GROUP_STOP,
> +
> +	/*
> +	 * Tracee received signal with number WSTOPSIG(*pstatus); signal info is written to *si.
> +	 * Restart the tracee (with that signal number if you want to deliver it).
> +	 */
> +	TE_SIGNAL_DELIVERY_STOP,
> +
> +	/*
> +	 * Syscall entry or exit.
> +	 * Restart the tracee with signal 0, or with an injected signal number.
> +	 */
> +	TE_SYSCALL_STOP,

I suggest reordering these states so that the more frequent ones
are listed before the more rare ones.

Likewise, in the switch statement in dispatch_event.

> +};
> +
> +static enum trace_event
> +next_event(int *pstatus, siginfo_t *si)
>  {
>  	int pid;
>  	int wait_errno;
> @@ -2260,11 +2316,11 @@ trace(void)
>  	bool stopped;
>  	unsigned int sig;
>  	unsigned int event;
> -	struct tcb *tcp;
>  	struct rusage ru;
> +	struct tcb *tcp;
>  
>  	if (interrupted)
> -		return false;
> +		return TE_BREAK;
>  
>  	/*
>  	 * Used to exit simply when nprocs hits zero, but in this testcase:
> @@ -2284,21 +2340,21 @@ trace(void)
>  		 * on exit. Oh well...
>  		 */
>  		if (nprocs == 0)
> -			return false;
> +			return TE_BREAK;
>  	}
>  
>  	if (interactive)
>  		sigprocmask(SIG_SETMASK, &empty_set, NULL);
> -	pid = wait4(-1, &status, __WALL, (cflag ? &ru : NULL));
> +	pid = wait4(-1, pstatus, __WALL, (cflag ? &ru : NULL));
>  	wait_errno = errno;
>  	if (interactive)
> -		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
> +		sigprocmask(SIG_SETMASK, &blocked_set, NULL);
>  
>  	if (pid < 0) {
>  		if (wait_errno == EINTR)
> -			return true;
> +			return TE_AGAIN;
>  		if (nprocs == 0 && wait_errno == ECHILD)
> -			return false;
> +			return TE_BREAK;
>  		/*
>  		 * If nprocs > 0, ECHILD is not expected,
>  		 * treat it as any other error here:
> @@ -2307,10 +2363,12 @@ trace(void)
>  		perror_msg_and_die("wait4(__WALL)");
>  	}
>  
> +	status = *pstatus;

Do we need status variable along with *pstatus?

> +
>  	if (pid == popen_pid) {
>  		if (!WIFSTOPPED(status))
>  			popen_pid = 0;
> -		return true;
> +		return TE_AGAIN;
>  	}
>  
>  	if (debug_flag)
> @@ -2322,61 +2380,28 @@ trace(void)
>  	if (!tcp) {
>  		tcp = maybe_allocate_tcb(pid, status);
>  		if (!tcp)
> -			return true;
> +			return TE_AGAIN;
>  	}
>  
>  	clear_regs();
>  
>  	event = (unsigned int) status >> 16;
>  
> -	if (event == PTRACE_EVENT_EXEC) {
> -		/*
> -		 * Under Linux, execve changes pid to thread leader's pid,
> -		 * and we see this changed pid on EVENT_EXEC and later,
> -		 * execve sysexit. Leader "disappears" without exit
> -		 * notification. Let user know that, drop leader's tcb,
> -		 * and fix up pid in execve thread's tcb.
> -		 * Effectively, execve thread's tcb replaces leader's tcb.
> -		 *
> -		 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
> -		 * on exit syscall) in multithreaded programs exactly
> -		 * in order to handle this case.
> -		 *
> -		 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
> -		 * On 2.6 and earlier, it can return garbage.
> -		 */
> -		if (os_release >= KERNEL_VERSION(3,0,0))
> -			tcp = maybe_switch_tcbs(tcp, pid);
> -
> -		if (detach_on_execve) {
> -			if (tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
> -				tcp->flags &= ~TCB_SKIP_DETACH_ON_FIRST_EXEC;
> -			} else {
> -				detach(tcp); /* do "-b execve" thingy */
> -				return true;
> -			}
> -		}
> -	}
> -
> -	/* Set current output file */

Please keep this comment.

>  	current_tcp = tcp;
>  
> +	if (event == PTRACE_EVENT_EXEC)
> +		return TE_STOP_BEFORE_EXECVE;
> +
>  	if (cflag) {
>  		tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
>  		tcp->stime = ru.ru_stime;
>  	}
>  
> -	if (WIFSIGNALED(status)) {
> -		print_signalled(tcp, pid, status);
> -		droptcb(tcp);
> -		return true;
> -	}
> +	if (WIFSIGNALED(status))
> +		return TE_SIGNALLED;
>  
> -	if (WIFEXITED(status)) {
> -		print_exited(tcp, pid, status);
> -		droptcb(tcp);
> -		return true;
> -	}
> +	if (WIFEXITED(status))
> +		return TE_EXITED;
>  
>  	if (!WIFSTOPPED(status)) {
>  		/*
> @@ -2385,22 +2410,43 @@ trace(void)
>  		 */
>  		error_msg("pid %u not stopped!", pid);
>  		droptcb(tcp);
> -		return true;
> +		return TE_AGAIN;
>  	}
>  
>  	/* Is this the very first time we see this tracee stopped? */
> -	if (tcp->flags & TCB_STARTUP) {
> +	if (tcp->flags & TCB_STARTUP)
>  		startup_tcb(tcp);
> -	}
>  
>  	sig = WSTOPSIG(status);
>  
>  	switch (event) {
>  		case 0:
> +			/*
> +			 * Is this post-attach SIGSTOP?
> +			 * Interestingly, the process may stop
> +			 * with STOPSIG equal to some other signal
> +			 * than SIGSTOP if we happend to attach
> +			 * just before the process takes a signal.
> +			 */
> +			if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
> +				if (debug_flag)
> +					error_msg("ignored SIGSTOP on pid %d", tcp->pid);
> +				tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
> +				return TE_AGAIN_RESTART;
> +			} else if (sig == syscall_trap_sig) {
> +				return TE_SYSCALL_STOP;
> +			} else {
> +				*si = (siginfo_t) {};

I'm not sure this is portable enough, let's use traditional memset.

> +				/*
> +				 * True if tracee is stopped by signal
> +				 * (as opposed to "tracee received signal").
> +				 * TODO: shouldn't we check for errno == EINVAL too?
> +				 * We can get ESRCH instead, you know...
> +				 */
> +				stopped = ptrace(PTRACE_GETSIGINFO, pid, 0, si) < 0;
> +				return stopped ? TE_GROUP_STOP : TE_SIGNAL_DELIVERY_STOP;

As this is the only place where "stopped" variable is used,
let's move its declaration here.

> +			}
>  			break;
> -		case PTRACE_EVENT_EXIT:
> -			print_event_exit(tcp);
> -			goto restart_tracee_with_sig_0;
>  #if USE_SEIZE
>  		case PTRACE_EVENT_STOP:
>  			/*
> @@ -2412,101 +2458,113 @@ trace(void)
>  				case SIGTSTP:
>  				case SIGTTIN:
>  				case SIGTTOU:
> -					stopped = true;
> -					goto show_stopsig;
> -			}
> -			/* fall through */
> +					return TE_GROUP_STOP;
> +				}
> +			return TE_AGAIN_RESTART;
>  #endif
> +		case PTRACE_EVENT_EXIT:
> +			return TE_STOP_BEFORE_EXIT;
>  		default:
> -			goto restart_tracee_with_sig_0;
> +			return TE_AGAIN_RESTART;
>  	}
> +}
>  
> -	/*
> -	 * Is this post-attach SIGSTOP?
> -	 * Interestingly, the process may stop
> -	 * with STOPSIG equal to some other signal
> -	 * than SIGSTOP if we happend to attach
> -	 * just before the process takes a signal.
> -	 */
> -	if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
> -		if (debug_flag)
> -			error_msg("ignored SIGSTOP on pid %d", tcp->pid);
> -		tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
> -		goto restart_tracee_with_sig_0;
> -	}
> -
> -	if (sig != syscall_trap_sig) {
> -		siginfo_t si = {};
> -
> -		/*
> -		 * True if tracee is stopped by signal
> -		 * (as opposed to "tracee received signal").
> -		 * TODO: shouldn't we check for errno == EINVAL too?
> -		 * We can get ESRCH instead, you know...
> -		 */
> -		stopped = ptrace(PTRACE_GETSIGINFO, pid, 0, &si) < 0;
> -#if USE_SEIZE
> -show_stopsig:
> -#endif
> -		print_stopped(tcp, stopped ? NULL : &si, sig);
> -
> -		if (!stopped)
> -			/* It's signal-delivery-stop. Inject the signal */
> -			goto restart_tracee;
> -
> -		/* It's group-stop */
> -		if (use_seize) {
> +/* Returns true iff the main trace loop has to continue. */
> +static bool
> +dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
> +{
> +	unsigned int restart_op = PTRACE_SYSCALL;
> +	unsigned int restart_sig = 0;
> +	switch (ret) {
> +		case TE_BREAK:
> +			return false;
> +		case TE_AGAIN:
> +			return true;

These case labels are overindented which leads to longer lines, see below.
Please add an empty line between case blocks, it makes the code easier
to read.

> +		case TE_AGAIN_RESTART:
> +			goto restart;

Let's just use "break" instead of "goto restart".

> +		case TE_STOP_BEFORE_EXECVE:
>  			/*
> -			 * This ends ptrace-stop, but does *not* end group-stop.
> -			 * This makes stopping signals work properly on straced process
> -			 * (that is, process really stops. It used to continue to run).
> +			 * Under Linux, execve changes pid to thread leader's pid,
> +			 * and we see this changed pid on EVENT_EXEC and later,
> +			 * execve sysexit. Leader "disappears" without exit
> +			 * notification. Let user know that, drop leader's tcb,
> +			 * and fix up pid in execve thread's tcb.
> +			 * Effectively, execve thread's tcb replaces leader's tcb.
> +			 *
> +			 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
> +			 * on exit syscall) in multithreaded programs exactly
> +			 * in order to handle this case.
> +			 *
> +			 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
> +			 * On 2.6 and earlier, it can return garbage.
>  			 */
> -			if (ptrace_restart(PTRACE_LISTEN, tcp, 0) < 0) {
> -				/* Note: ptrace_restart emitted error message */
> -				exit_code = 1;
> -				return false;
> +			if (os_release >= KERNEL_VERSION(3,0,0))
> +				current_tcp = maybe_switch_tcbs(current_tcp, current_tcp->pid);
> +
> +			if (detach_on_execve) {
> +				if (current_tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
> +					current_tcp->flags &= ~TCB_SKIP_DETACH_ON_FIRST_EXEC;
> +				} else {
> +					detach(current_tcp); /* do "-b execve" thingy */
> +					return true;
> +				}
>  			}
> +			goto restart;
> +		case TE_SIGNALLED:
> +			print_signalled(current_tcp, current_tcp->pid, *pstatus);
> +			droptcb(current_tcp);
>  			return true;
> -		}
> -		/* We don't have PTRACE_LISTEN support... */
> -		goto restart_tracee;
> +		case TE_EXITED:
> +			print_exited(current_tcp, current_tcp->pid, *pstatus);
> +			droptcb(current_tcp);
> +			return true;
> +		case TE_GROUP_STOP:
> +			restart_sig = WSTOPSIG(*pstatus);
> +			print_stopped(current_tcp, NULL, restart_sig);
> +			if (use_seize) {
> +				/*
> +				 * This ends ptrace-stop, but does *not* end group-stop.
> +				 * This makes stopping signals work properly on straced
> +				 * process (that is, process really stops. It used to
> +				 * continue to run).
> +				 */
> +				restart_op = PTRACE_LISTEN;
> +				restart_sig = 0;
> +			}
> +			goto restart;
> +		case TE_SIGNAL_DELIVERY_STOP:
> +			restart_sig = WSTOPSIG(*pstatus);
> +			print_stopped(current_tcp, si, restart_sig);
> +			goto restart;
> +		case TE_SYSCALL_STOP:
> +			if (trace_syscall(current_tcp, &restart_sig) < 0) {
> +				/*
> +				 * ptrace() failed in trace_syscall().
> +				 * Likely a result of process disappearing mid-flight.
> +				 * Observed case: exit_group() or SIGKILL terminating
> +				 * all processes in thread group.
> +				 * We assume that ptrace error was caused by process death.
> +				 * We used to detach(current_tcp) here, but since we no longer
> +				 * implement "detach before death" policy/hack,
> +				 * we can let this process to report its death to us
> +				 * normally, via WIFEXITED or WIFSIGNALED wait status.
> +				 */
> +				return true;
> +			}
> +			goto restart;
> +		case TE_STOP_BEFORE_EXIT:
> +			print_event_exit(current_tcp);
> +			goto restart;
>  	}
> -
> -	/* We handled quick cases, we are permitted to interrupt now. */

Please keep this comment.

> +restart:
>  	if (interrupted)
>  		return false;

If all "goto restart" statements are replaced, the label is no longer
needed.

-- 
ldv
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20170531/4a6df4ad/attachment.bin>