[PATCH v3 2/3] The -z and -Z options print only successful and failing syscalls respectively. However, failure of syscall is only known after syscall return. Thus, we end up with something like this on, e.g., ENOENT:

Paul Chaignon paul.chaignon at gmail.com
Sun May 12 18:20:43 UTC 2019


On Thu, Apr 25, 2019 at 04:22PM, burkhard.kohl at intel.com wrote:
> From: Burkhard Kohl <burkhard.kohl at intel.com>
> 
>   open("does_not_exist", O_RDONLY <unfinished ...>
> 
> whereas the intended result is that the open(...) line is not shown at
> all.
> 
> This patch fixes this issue using open_memstream.  When either the -z
> or the -Z option is used, the output is staged in memory (using
> open_memstream) until we know whether the syscall was successful. If
> the open_memstream function is not available, strace retains the old
> behavior.
> 
> Combining both -z and -Z options may be useful in some cases, e.g.
> when tracing signals.
> 
> Note: Syscalls returning void (e.g. exit_group) will be missed by
> both options.
> 
> Signed-off-by: Burkhard Kohl <burkhard.kohl at intel.com>
> Co-Authored-by: Paul Chaignon <paul.chaignon at gmail.com>
> ---
>  Makefile.am    |  1 +
>  configure.ac   |  1 +
>  defs.h         | 10 ++++++++
>  stage_output.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  strace.1.in    |  7 +++++
>  strace.c       | 12 ++++++++-
>  syscall.c      | 21 +++++++++++++--
>  7 files changed, 118 insertions(+), 3 deletions(-)
>  create mode 100644 stage_output.c
> 
> diff --git a/Makefile.am b/Makefile.am
> index b5946914..4e7ef4f4 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -298,6 +298,7 @@ strace_SOURCES =	\
>  	sock.c		\
>  	sockaddr.c	\
>  	socketutils.c	\
> +	stage_output.c	\
>  	sparc.c		\
>  	sram_alloc.c	\
>  	stat.c		\
> diff --git a/configure.ac b/configure.ac
> index 86d3626b..523a6496 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -262,6 +262,7 @@ AC_CHECK_FUNCS(m4_normalize([
>  	iconv_open
>  	if_indextoname
>  	open64
> +	open_memstream
>  	prctl
>  	preadv
>  	process_vm_readv
> diff --git a/defs.h b/defs.h
> index b5ebb343..ee5da8b3 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -204,6 +204,11 @@ struct tcb {
>  	int sys_func_rval;	/* Syscall entry parser's return value */
>  	int curcol;		/* Output column for this process */
>  	FILE *outf;		/* Output file for this process */
> +#if HAVE_OPEN_MEMSTREAM
> +	FILE *real_outf;	/* backup for real outf while staging */
> +	char *memfptr;
> +	size_t memfloc;
> +#endif
>  	const char *auxstr;	/* Auxiliary info from syscall (see RVAL_STR) */
>  	void *_priv_data;	/* Private data for syscall decoding functions */
>  	void (*_free_priv_data)(void *); /* Callback for freeing priv_data */
> @@ -1165,6 +1170,11 @@ extern void tprints(const char *str);
>  extern void tprintf_comment(const char *fmt, ...) ATTRIBUTE_FORMAT((printf, 1, 2));
>  extern void tprints_comment(const char *str);
> 
> +/* for staging output */
> +#ifdef HAVE_OPEN_MEMSTREAM
> +extern FILE *strace_open_memstream(struct tcb *tcp);
> +extern void strace_close_memstream(struct tcb *tcp, bool);
> +#endif
>  static inline void
>  printaddr_comment(const kernel_ulong_t addr)
>  {
> diff --git a/stage_output.c b/stage_output.c
> new file mode 100644
> index 00000000..c353e265
> --- /dev/null
> +++ b/stage_output.c
> @@ -0,0 +1,69 @@
> +/*
> + * Copyright (c) 2017/2019 Intel Corporation
> + * Copyright (c) 2019 Paul Chaignon <paul.chaignon at gmail.com>
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier: LGPL-2.1-or-later
> + */
> +
> +/*
> + *  Output staging is based on the "open_memstream()" function, see:
> + *  http://man7.org/linux/man-pages/man3/open_memstream.3.html
> + *  Works with glibc version 2.7 or higher
> + *
> + *  open_memstream returns a FILE stream that allows writing to a
> + *  dynamically growing buffer, which then will be either copied
> + *  to tcp->outf (syscall successful) or dropped (syscall failed)
> + *  or vice versa.
> + */
> +
> +#include "defs.h"
> +#if HAVE_OPEN_MEMSTREAM
> +
> +FILE *
> +strace_open_memstream(struct tcb *tcp)
> +{
> +	FILE *fp = NULL;
> +
> +	tcp->memfptr = NULL;
> +	fp = open_memstream(&tcp->memfptr, &tcp->memfloc);
> +	if (!fp)
> +		perror_msg_and_die("open_memstream");
> +	/* Call to fflush required to update tcp->memfptr, see open_memstream
> +	 * man page. */
> +	fflush(fp);
> +
> +	/* store the physical (open) FILE pointer for later restauration */
> +	tcp->real_outf = tcp->outf;
> +	tcp->outf = fp;
> +	return fp;
> +}
> +
> +void
> +strace_close_memstream(struct tcb *tcp, bool drop)
> +{
> +	if (!tcp->real_outf) {

If you want to detect such errors, it would be preferable to set real_outf
to NULL at the end of this function.

> +		if (debug_flag)
> +			error_msg("Strange, strace_closememstream called although FILE pointers not swapped!");

These two lines can be replaced with a call to debug_msg().

The error message could also be shortened.  Maybe just "real_outf is NULL".

> +		return;
> +	}
> +
> +	if (fclose(tcp->outf))
> +		perror_msg("fclose(tcp->outf)");
> +
> +	/* Restore the pyhisical FILE pointer */

Typo on "physical".

> +	tcp->outf   = tcp->real_outf;
> +	if (tcp->memfptr) {
> +		if (drop) {
> +			if (debug_flag)
> +				error_msg("staged syscall output dropped: %s", tcp->memfptr);

Ditto for debug_msg().

> +		} else {
> +			if (debug_flag)
> +				error_msg("staged syscall output printed: %s", tcp->memfptr);

Ditto.

> +			fprintf(tcp->outf, "%s", tcp->memfptr);

This can be replaced with a call to fputs_unlocked().

> +		}
> +		free(tcp->memfptr);
> +		tcp->memfptr = NULL;
> +	}
> +}
> +#endif
> diff --git a/strace.1.in b/strace.1.in
> index 76a74119..6ab95836 100644
> --- a/strace.1.in
> +++ b/strace.1.in
> @@ -771,6 +771,13 @@ Print unabbreviated versions of environment, stat, termios, etc.
>  calls.  These structures are very common in calls and so the default
>  behavior displays a reasonable subset of structure members.  Use
>  this option to get all of the gory details.
> +.TP
> +.B \-z
> +Print successful syscalls only.

It might be better to be precise here regarding what we mean by
"successful" exactly.  Maybe "Print only syscalls that did not return an
error code"?

> +.TP
> +.BI "\-Z "
> +Print failed syscalls only. (-z/-Z options may be combined).
> +Can be useful e.g. to catch only signals.

I'm not sure this is a good idea.  There's already -e trace=none to catch
only signals.  Unless we have a strong use case for this, we should
probably avoid exposing new features; once users depend on it, we can't go
back and prohibit it.  I think we should explicitly prohibit it for now
and allow it in the future if there's ever a need.

>  .SS Tracing
>  .TP 12
>  .BI "\-b " syscall
> diff --git a/strace.c b/strace.c
> index 42cd1419..2b408bcb 100644
> --- a/strace.c
> +++ b/strace.c
> @@ -275,7 +275,13 @@ Filtering:\n\
>    -e expr        a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
>       options:    trace, abbrev, verbose, raw, signal, read, write, fault, inject, kvm\n\
>    -P path        trace accesses to path\n\
> -\n\
> +"
> +#if HAVE_OPEN_MEMSTREAM
> +"  -z             print only succeeding syscalls\n\
> +  -Z             print only failing syscalls\n\
> +"
> +#endif
> +"\n\
>  Tracing:\n\
>    -b execve      detach on execve syscall\n\
>    -D             run tracer process as a detached grandchild, not as parent\n\
> @@ -1587,7 +1593,11 @@ init(int argc, char *argv[])
>  #ifdef ENABLE_STACKTRACE
>  	    "k"
>  #endif
> +#ifdef HAVE_OPEN_MEMSTREAM
>  	    "a:Ab:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxX:yzZ")) != EOF) {
> +#else
> +       "a:Ab:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxX:yz")) != EOF) {
> +#endif
>  		switch (c) {
>  		case 'a':
>  			acolumn = string_to_uint(optarg);
> diff --git a/syscall.c b/syscall.c
> index 949a848b..61057796 100644
> --- a/syscall.c
> +++ b/syscall.c
> @@ -652,6 +652,10 @@ syscall_entering_trace(struct tcb *tcp, unsigned int *sig)
>  	}
>  #endif
> 
> +#if HAVE_OPEN_MEMSTREAM
> +	if (not_failing_only || failing_only)
> +		strace_open_memstream(tcp);
> +#endif
>  	printleader(tcp);
>  	tprintf("%s(", tcp_sysent(tcp)->sys_name);
>  	int res = raw(tcp) ? printargs(tcp) : tcp_sysent(tcp)->sys_func(tcp);
> @@ -753,6 +757,15 @@ syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res)
>  	if (raw(tcp)) {
>  		/* sys_res = printargs(tcp); - but it's nop on sysexit */
>  	} else {
> +#if HAVE_OPEN_MEMSTREAM
> +		if ((not_failing_only && syserror(tcp)) ||
> +			(failing_only && !syserror(tcp))) {
> +			line_ended();
> +			strace_close_memstream(tcp, true);
> +			return 0;   /* ignore failed/successful
> +						 * syscalls */
> +		}
> +#else

The below comment can now be removed.

>  	/* FIXME: not_failing_only (IOW, option -z) is broken:
>  	 * failure of syscall is known only after syscall return.
>  	 * Thus we end up with something like this on, say, ENOENT:
> @@ -763,8 +776,8 @@ syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res)
>  	 */
>  		if ((not_failing_only && syserror(tcp)) ||
>  			(failing_only     && !syserror(tcp)))
> -			return 0;	/* ignore failed/successful
> -						 * syscalls */
> +			return 0;	/* ignore failed syscalls*/

Not sure I understand the change of comment here.

> +#endif
>  		if (tcp->sys_func_rval & RVAL_DECODED)
>  			sys_res = tcp->sys_func_rval;
>  		else
> @@ -900,6 +913,10 @@ syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res)
>  	}
>  	tprints("\n");
>  	dumpio(tcp);
> +#ifdef HAVE_OPEN_MEMSTREAM
> +	if (not_failing_only || failing_only)
> +		strace_close_memstream(tcp, false);
> +#endif
>  	line_ended();
> 
>  #ifdef ENABLE_STACKTRACE
> --
> 2.17.1
> 
> Intel Deutschland GmbH
> Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
> Tel: +49 89 99 8853-0, www.intel.de
> Managing Directors: Christin Eisenschmid, Gary Kershaw
> Chairperson of the Supervisory Board: Nicole Lau
> Registered Office: Munich
> Commercial Register: Amtsgericht Muenchen HRB 186928
> 
> -- 
> Strace-devel mailing list
> Strace-devel at lists.strace.io
> https://lists.strace.io/mailman/listinfo/strace-devel


More information about the Strace-devel mailing list