[PATCH] strace.c: split trace() into next_event()/dispatch_event()
Dmitry V. Levin
ldv at altlinux.org
Wed May 31 15:58:07 UTC 2017
On Mon, May 15, 2017 at 01:42:40AM +0300, Victor Krapivensky wrote:
> This is a preparation needed to implement a pull-style API for LuaJIT.
I'm very much in favour of this change, it makes the control flow easier
to understand. Technically, this patch looks correct, although I've spend
a lot of my reviewing time to get to this conclusion. Next time when you
do refactoring, try making the change more obvious. :)
> -/* Returns true iff the main trace loop has to continue. */
> -static bool
> -trace(void)
> +enum trace_event {
> + /* Break the main loop. */
> + TE_BREAK,
> +
> + /* Call next_event() again. */
> + TE_AGAIN,
I suggest renaming TE_AGAIN to TE_NEXT.
> + /* Restart the tracee with signal 0 and call next_event() again. */
> + TE_AGAIN_RESTART,
I suggest renaming TE_AGAIN_RESTART to TE_RESTART.
> + /*
> + * For all the events below, current_tcp is set to current tracee's tcb.
> + * All the suggested actions imply that you want to continue the tracing of the current
> + * tracee; alternatively, you can detach it.
> + */
> +
> + /*
> + * Tracee is going to perform execve().
> + * Restart the tracee with signal 0.
> + */
> + TE_STOP_BEFORE_EXECVE,
> +
> + /*
> + * Tracee is going to terminate.
> + * Restart the tracee with signal 0.
> + */
> + TE_STOP_BEFORE_EXIT,
> +
> + /*
> + * Tracee was killed by a signal with number WTERMSIG(*pstatus).
> + */
> + TE_SIGNALLED,
> +
> + /*
> + * Tracee exited with status WEXITSTATUS(*pstatus).
> + */
> + TE_EXITED,
> +
> + /*
> + * Tracee was stopped by a signal with number WSTOPSIG(*pstatus).
> + * Restart the tracee with that signal number.
> + */
> + TE_GROUP_STOP,
> +
> + /*
> + * Tracee received signal with number WSTOPSIG(*pstatus); signal info is written to *si.
> + * Restart the tracee (with that signal number if you want to deliver it).
> + */
> + TE_SIGNAL_DELIVERY_STOP,
> +
> + /*
> + * Syscall entry or exit.
> + * Restart the tracee with signal 0, or with an injected signal number.
> + */
> + TE_SYSCALL_STOP,
I suggest reordering these states so that the more frequent ones
are listed before the more rare ones.
Likewise, in the switch statement in dispatch_event.
> +};
> +
> +static enum trace_event
> +next_event(int *pstatus, siginfo_t *si)
> {
> int pid;
> int wait_errno;
> @@ -2260,11 +2316,11 @@ trace(void)
> bool stopped;
> unsigned int sig;
> unsigned int event;
> - struct tcb *tcp;
> struct rusage ru;
> + struct tcb *tcp;
>
> if (interrupted)
> - return false;
> + return TE_BREAK;
>
> /*
> * Used to exit simply when nprocs hits zero, but in this testcase:
> @@ -2284,21 +2340,21 @@ trace(void)
> * on exit. Oh well...
> */
> if (nprocs == 0)
> - return false;
> + return TE_BREAK;
> }
>
> if (interactive)
> sigprocmask(SIG_SETMASK, &empty_set, NULL);
> - pid = wait4(-1, &status, __WALL, (cflag ? &ru : NULL));
> + pid = wait4(-1, pstatus, __WALL, (cflag ? &ru : NULL));
> wait_errno = errno;
> if (interactive)
> - sigprocmask(SIG_BLOCK, &blocked_set, NULL);
> + sigprocmask(SIG_SETMASK, &blocked_set, NULL);
>
> if (pid < 0) {
> if (wait_errno == EINTR)
> - return true;
> + return TE_AGAIN;
> if (nprocs == 0 && wait_errno == ECHILD)
> - return false;
> + return TE_BREAK;
> /*
> * If nprocs > 0, ECHILD is not expected,
> * treat it as any other error here:
> @@ -2307,10 +2363,12 @@ trace(void)
> perror_msg_and_die("wait4(__WALL)");
> }
>
> + status = *pstatus;
Do we need status variable along with *pstatus?
> +
> if (pid == popen_pid) {
> if (!WIFSTOPPED(status))
> popen_pid = 0;
> - return true;
> + return TE_AGAIN;
> }
>
> if (debug_flag)
> @@ -2322,61 +2380,28 @@ trace(void)
> if (!tcp) {
> tcp = maybe_allocate_tcb(pid, status);
> if (!tcp)
> - return true;
> + return TE_AGAIN;
> }
>
> clear_regs();
>
> event = (unsigned int) status >> 16;
>
> - if (event == PTRACE_EVENT_EXEC) {
> - /*
> - * Under Linux, execve changes pid to thread leader's pid,
> - * and we see this changed pid on EVENT_EXEC and later,
> - * execve sysexit. Leader "disappears" without exit
> - * notification. Let user know that, drop leader's tcb,
> - * and fix up pid in execve thread's tcb.
> - * Effectively, execve thread's tcb replaces leader's tcb.
> - *
> - * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
> - * on exit syscall) in multithreaded programs exactly
> - * in order to handle this case.
> - *
> - * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
> - * On 2.6 and earlier, it can return garbage.
> - */
> - if (os_release >= KERNEL_VERSION(3,0,0))
> - tcp = maybe_switch_tcbs(tcp, pid);
> -
> - if (detach_on_execve) {
> - if (tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
> - tcp->flags &= ~TCB_SKIP_DETACH_ON_FIRST_EXEC;
> - } else {
> - detach(tcp); /* do "-b execve" thingy */
> - return true;
> - }
> - }
> - }
> -
> - /* Set current output file */
Please keep this comment.
> current_tcp = tcp;
>
> + if (event == PTRACE_EVENT_EXEC)
> + return TE_STOP_BEFORE_EXECVE;
> +
> if (cflag) {
> tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
> tcp->stime = ru.ru_stime;
> }
>
> - if (WIFSIGNALED(status)) {
> - print_signalled(tcp, pid, status);
> - droptcb(tcp);
> - return true;
> - }
> + if (WIFSIGNALED(status))
> + return TE_SIGNALLED;
>
> - if (WIFEXITED(status)) {
> - print_exited(tcp, pid, status);
> - droptcb(tcp);
> - return true;
> - }
> + if (WIFEXITED(status))
> + return TE_EXITED;
>
> if (!WIFSTOPPED(status)) {
> /*
> @@ -2385,22 +2410,43 @@ trace(void)
> */
> error_msg("pid %u not stopped!", pid);
> droptcb(tcp);
> - return true;
> + return TE_AGAIN;
> }
>
> /* Is this the very first time we see this tracee stopped? */
> - if (tcp->flags & TCB_STARTUP) {
> + if (tcp->flags & TCB_STARTUP)
> startup_tcb(tcp);
> - }
>
> sig = WSTOPSIG(status);
>
> switch (event) {
> case 0:
> + /*
> + * Is this post-attach SIGSTOP?
> + * Interestingly, the process may stop
> + * with STOPSIG equal to some other signal
> + * than SIGSTOP if we happend to attach
> + * just before the process takes a signal.
> + */
> + if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
> + if (debug_flag)
> + error_msg("ignored SIGSTOP on pid %d", tcp->pid);
> + tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
> + return TE_AGAIN_RESTART;
> + } else if (sig == syscall_trap_sig) {
> + return TE_SYSCALL_STOP;
> + } else {
> + *si = (siginfo_t) {};
I'm not sure this is portable enough, let's use traditional memset.
> + /*
> + * True if tracee is stopped by signal
> + * (as opposed to "tracee received signal").
> + * TODO: shouldn't we check for errno == EINVAL too?
> + * We can get ESRCH instead, you know...
> + */
> + stopped = ptrace(PTRACE_GETSIGINFO, pid, 0, si) < 0;
> + return stopped ? TE_GROUP_STOP : TE_SIGNAL_DELIVERY_STOP;
As this is the only place where "stopped" variable is used,
let's move its declaration here.
> + }
> break;
> - case PTRACE_EVENT_EXIT:
> - print_event_exit(tcp);
> - goto restart_tracee_with_sig_0;
> #if USE_SEIZE
> case PTRACE_EVENT_STOP:
> /*
> @@ -2412,101 +2458,113 @@ trace(void)
> case SIGTSTP:
> case SIGTTIN:
> case SIGTTOU:
> - stopped = true;
> - goto show_stopsig;
> - }
> - /* fall through */
> + return TE_GROUP_STOP;
> + }
> + return TE_AGAIN_RESTART;
> #endif
> + case PTRACE_EVENT_EXIT:
> + return TE_STOP_BEFORE_EXIT;
> default:
> - goto restart_tracee_with_sig_0;
> + return TE_AGAIN_RESTART;
> }
> +}
>
> - /*
> - * Is this post-attach SIGSTOP?
> - * Interestingly, the process may stop
> - * with STOPSIG equal to some other signal
> - * than SIGSTOP if we happend to attach
> - * just before the process takes a signal.
> - */
> - if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
> - if (debug_flag)
> - error_msg("ignored SIGSTOP on pid %d", tcp->pid);
> - tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
> - goto restart_tracee_with_sig_0;
> - }
> -
> - if (sig != syscall_trap_sig) {
> - siginfo_t si = {};
> -
> - /*
> - * True if tracee is stopped by signal
> - * (as opposed to "tracee received signal").
> - * TODO: shouldn't we check for errno == EINVAL too?
> - * We can get ESRCH instead, you know...
> - */
> - stopped = ptrace(PTRACE_GETSIGINFO, pid, 0, &si) < 0;
> -#if USE_SEIZE
> -show_stopsig:
> -#endif
> - print_stopped(tcp, stopped ? NULL : &si, sig);
> -
> - if (!stopped)
> - /* It's signal-delivery-stop. Inject the signal */
> - goto restart_tracee;
> -
> - /* It's group-stop */
> - if (use_seize) {
> +/* Returns true iff the main trace loop has to continue. */
> +static bool
> +dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
> +{
> + unsigned int restart_op = PTRACE_SYSCALL;
> + unsigned int restart_sig = 0;
> + switch (ret) {
> + case TE_BREAK:
> + return false;
> + case TE_AGAIN:
> + return true;
These case labels are overindented which leads to longer lines, see below.
Please add an empty line between case blocks, it makes the code easier
to read.
> + case TE_AGAIN_RESTART:
> + goto restart;
Let's just use "break" instead of "goto restart".
> + case TE_STOP_BEFORE_EXECVE:
> /*
> - * This ends ptrace-stop, but does *not* end group-stop.
> - * This makes stopping signals work properly on straced process
> - * (that is, process really stops. It used to continue to run).
> + * Under Linux, execve changes pid to thread leader's pid,
> + * and we see this changed pid on EVENT_EXEC and later,
> + * execve sysexit. Leader "disappears" without exit
> + * notification. Let user know that, drop leader's tcb,
> + * and fix up pid in execve thread's tcb.
> + * Effectively, execve thread's tcb replaces leader's tcb.
> + *
> + * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
> + * on exit syscall) in multithreaded programs exactly
> + * in order to handle this case.
> + *
> + * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
> + * On 2.6 and earlier, it can return garbage.
> */
> - if (ptrace_restart(PTRACE_LISTEN, tcp, 0) < 0) {
> - /* Note: ptrace_restart emitted error message */
> - exit_code = 1;
> - return false;
> + if (os_release >= KERNEL_VERSION(3,0,0))
> + current_tcp = maybe_switch_tcbs(current_tcp, current_tcp->pid);
> +
> + if (detach_on_execve) {
> + if (current_tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
> + current_tcp->flags &= ~TCB_SKIP_DETACH_ON_FIRST_EXEC;
> + } else {
> + detach(current_tcp); /* do "-b execve" thingy */
> + return true;
> + }
> }
> + goto restart;
> + case TE_SIGNALLED:
> + print_signalled(current_tcp, current_tcp->pid, *pstatus);
> + droptcb(current_tcp);
> return true;
> - }
> - /* We don't have PTRACE_LISTEN support... */
> - goto restart_tracee;
> + case TE_EXITED:
> + print_exited(current_tcp, current_tcp->pid, *pstatus);
> + droptcb(current_tcp);
> + return true;
> + case TE_GROUP_STOP:
> + restart_sig = WSTOPSIG(*pstatus);
> + print_stopped(current_tcp, NULL, restart_sig);
> + if (use_seize) {
> + /*
> + * This ends ptrace-stop, but does *not* end group-stop.
> + * This makes stopping signals work properly on straced
> + * process (that is, process really stops. It used to
> + * continue to run).
> + */
> + restart_op = PTRACE_LISTEN;
> + restart_sig = 0;
> + }
> + goto restart;
> + case TE_SIGNAL_DELIVERY_STOP:
> + restart_sig = WSTOPSIG(*pstatus);
> + print_stopped(current_tcp, si, restart_sig);
> + goto restart;
> + case TE_SYSCALL_STOP:
> + if (trace_syscall(current_tcp, &restart_sig) < 0) {
> + /*
> + * ptrace() failed in trace_syscall().
> + * Likely a result of process disappearing mid-flight.
> + * Observed case: exit_group() or SIGKILL terminating
> + * all processes in thread group.
> + * We assume that ptrace error was caused by process death.
> + * We used to detach(current_tcp) here, but since we no longer
> + * implement "detach before death" policy/hack,
> + * we can let this process to report its death to us
> + * normally, via WIFEXITED or WIFSIGNALED wait status.
> + */
> + return true;
> + }
> + goto restart;
> + case TE_STOP_BEFORE_EXIT:
> + print_event_exit(current_tcp);
> + goto restart;
> }
> -
> - /* We handled quick cases, we are permitted to interrupt now. */
Please keep this comment.
> +restart:
> if (interrupted)
> return false;
If all "goto restart" statements are replaced, the label is no longer
needed.
--
ldv
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20170531/4a6df4ad/attachment.bin>
More information about the Strace-devel
mailing list