[PATCH v2 2/3] Stage output for -z and -Z options

Fri Apr 5 14:16:17 UTC 2019

> -----Original Message-----
> From: Dmitry V. Levin [mailto:ldv at altlinux.org]
> Sent: Friday, April 5, 2019 03:18
> To: Paul Chaignon <paul.chaignon at gmail.com>
> Cc: strace-devel at lists.strace.io; Kohl, Burkhard <burkhard.kohl at intel.com>
> Subject: Re: [PATCH v2 2/3] Stage output for -z and -Z options
> 
> On Tue, Apr 02, 2019 at 09:57:04PM +0200, Paul Chaignon wrote:
> > The -z and -Z options print only successful and failing syscalls
> > respectively.  However, failure of syscall is only known after syscall
> > return.  Thus, we end up with something like this on, e.g., ENOENT:
> >
> >   open("does_not_exist", O_RDONLY <unfinished ...>
> >
> > whereas the intended result is that the open(...) line is not shown at
> > all.
> >
> > This patch fixes this issue using open_memstream.  When either the -z
> > or the -Z option is used, the output is staged in memory (using
> > open_memstream) until we know whether the syscall was successful.  If
> > the open_memstream function is not available, strace retains the old behavior.
> >
> > Signed-off-by: Paul Chaignon <paul.chaignon at gmail.com>
> > Co-Authored-by: Burkhard Kohl <burkhard.kohl at intel.com>
> > ---
> >  Makefile.am    |  1 +
> >  configure.ac   |  1 +
> >  defs.h         | 15 ++++++++++
> >  stage_output.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  strace.c       | 62 ++++++++++++++++++++++++++++++++++++------
> >  syscall.c      | 19 +++++++------
> 
> I think both options need to be documented in strace.1.in as well.
> 
> >  6 files changed, 155 insertions(+), 17 deletions(-)  create mode
> > 100644 stage_output.c
> >
> > diff --git a/Makefile.am b/Makefile.am index 56987ae0..dd44b04c 100644
> > --- a/Makefile.am
> > +++ b/Makefile.am
> > @@ -298,6 +298,7 @@ strace_SOURCES =	\
> >  	sock.c		\
> >  	sockaddr.c	\
> >  	socketutils.c	\
> > +	stage_output.c	\
> >  	sparc.c		\
> >  	sram_alloc.c	\
> >  	stat.c		\
> > diff --git a/configure.ac b/configure.ac index c95fd229..91f3152d
> > 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -258,6 +258,7 @@ AC_CHECK_FUNCS(m4_normalize([
> >  	iconv_open
> >  	if_indextoname
> >  	open64
> > +	open_memstream
> >  	prctl
> >  	preadv
> >  	process_vm_readv
> > diff --git a/defs.h b/defs.h
> > index b5ebb343..8600dec2 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -204,6 +204,12 @@ struct tcb {
> >  	int sys_func_rval;	/* Syscall entry parser's return value */
> >  	int curcol;		/* Output column for this process */
> >  	FILE *outf;		/* Output file for this process */
> > +#if HAVE_OPEN_MEMSTREAM
> > +	FILE *memf;		/* Staged output file for this process */
> > +	char *memfptr;
> > +	size_t memfloc;
> > +#endif
> > +
> >  	const char *auxstr;	/* Auxiliary info from syscall (see RVAL_STR) */
> >  	void *_priv_data;	/* Private data for syscall decoding functions */
> >  	void (*_free_priv_data)(void *); /* Callback for freeing priv_data
> > */ @@ -418,6 +424,8 @@ extern bool count_wallclock;  extern unsigned
> > int qflag;  extern bool not_failing_only;  extern bool failing_only;
> > +/* allows dropping of failed/successful syscalls output */ extern
> > +bool stage_output;
> 
> I don't think we need stage_output variable, see below.
> 
> >  extern unsigned int show_fd_path;
> >  /* are we filtering traces based on paths? */  extern struct path_set
> > { @@ -1165,6 +1173,13 @@ extern void tprints(const char *str);  extern
> > void tprintf_comment(const char *fmt, ...) ATTRIBUTE_FORMAT((printf,
> > 1, 2));  extern void tprints_comment(const char *str);
> >
> > +/*
> > + * Staging output for -z/-Z (not_failing_only/failing_only).
> > + */
> > +extern FILE *strace_openmemstream(struct tcb *tcp); extern void
> > +drop_staged_out(struct tcb *tcp); extern void
> > +publish_staged_out(struct tcb *tcp);
> 
> Implementations of drop_staged_out and publish_staged_out share a lot, maybe there should be just a
> single function with a boolean argument.
> 
> > +
> >  static inline void
> >  printaddr_comment(const kernel_ulong_t addr)  { diff --git
> > a/stage_output.c b/stage_output.c new file mode 100644 index
> > 00000000..81b29b31
> > --- /dev/null
> > +++ b/stage_output.c
> > @@ -0,0 +1,74 @@
> > +/*
> > + * Copyright (c) 2017 Burkhard Kohl <burkhard.kohl at intel.com>
> > + * Copyright (c) 2019 Paul Chaignon <paul.chaignon at gmail.com>
> > + * All rights reserved.
> > + *
> > + * SPDX-License-Identifier: LGPL-2.1-or-later  */
> > +
> > +/*
> > + *  Output staging is based on the "open_memstream()" function, see:
> > + *  http://man7.org/linux/man-pages/man3/open_memstream.3.html
> > + *  Requires glibc version 2.7 mininal
> 
> Not really, open_memstream(3) says that
> "open_memstream() was already available in glibc 1.0.x.".
> 

That's right, but in versions prior to 2.7, seeking past the end of a stream 
does not enlarge the buffer, so fseek() will fail with -1.

> > + *
> > + *  open_memstream returns a FILE stream that allows writing to a
> > + *  dynamically growing buffer, that can be either copied to
> > + *  tcp->outf (syscall successful) or dropped (syscall failed) */
> > +
> > +#include "defs.h"
> > +#if HAVE_OPEN_MEMSTREAM
> > +
> > +FILE *
> > +strace_openmemstream(struct tcb *tcp) {
> > +	FILE *fp = NULL;
> > +
> > +	tcp->memfptr = NULL;
> > +	fp = open_memstream(&tcp->memfptr, &tcp->memfloc);
> > +	if (!fp)
> > +		perror_msg_and_die("open_memstream");
> > +	/* Call to fflush required to update tcp->memfptr, see open_memstream
> > +	 * man page. */
> > +	fflush(fp);
> > +	tcp->memf = fp;
> > +	return fp;
> > +}
> > +
> > +void
> > +drop_staged_out(struct tcb *tcp)
> > +{
> > +	if (!tcp->memf)
> > +		return;
> > +
> > +	if (fclose(tcp->memf))
> > +		perror_msg("fclose(tcp->memf)");
> > +	if (tcp->memfptr) {
> > +		if (debug_flag)
> > +			error_msg("syscall output dropped: %s", tcp->memfptr);
> 
> I think we have debug_msg() for that.
> 
> > +		free(tcp->memfptr);
> > +		tcp->memfptr = NULL;
> > +	}
> > +
> > +	/* reopen tcp->memf for subsequent use */
> > +	strace_openmemstream(tcp);
> 
> I don't think we should reopen tcp->memf here, see below.
> 
> > +}
> > +
> > +void
> > +publish_staged_out(struct tcb *tcp)
> > +{
> > +	if (!tcp->memf)
> > +		return;
> > +
> > +	if (fclose(tcp->memf))
> > +		perror_msg("flose(tcp->memf)");
> > +	if (tcp->memfptr) {
> > +		fprintf(tcp->outf, "%s", tcp->memfptr);
> 
> I think we can use fputs_unlocked() here.
> 
> > +		free(tcp->memfptr);
> > +		tcp->memfptr = NULL;
> > +	}
> > +
> > +	/* reopen tcp->memf for subsequent use */
> > +	strace_openmemstream(tcp);
> 
> I don't think we should reopen tcp->memf here, see below.
> 
> > +}
> > +#endif
> > diff --git a/strace.c b/strace.c
> > index bd2dd992..8c5eeb30 100644
> > --- a/strace.c
> > +++ b/strace.c
> > @@ -112,6 +112,7 @@ static int post_attach_sigstop =
> > TCB_IGNORE_ONE_SIGSTOP;
> >  /* Sometimes we want to print succeeding/failing syscalls only. */
> > bool not_failing_only;  bool failing_only;
> > +bool stage_output;
> >
> >  /* Show path associated with fd arguments */  unsigned int
> > show_fd_path; @@ -275,7 +276,13 @@ Filtering:\n\
> >    -e expr        a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
> >       options:    trace, abbrev, verbose, raw, signal, read, write, fault, inject, kvm\n\
> >    -P path        trace accesses to path\n\
> > -\n\
> > +"
> > +#if HAVE_OPEN_MEMSTREAM
> > +"  -z             print only succeeding syscalls\n\
> > +  -Z             print only failing syscalls\n\
> > +"
> > +#endif
> > +"\n\
> >  Tracing:\n\
> >    -b execve      detach on execve syscall\n\
> >    -D             run tracer process as a detached grandchild, not as parent\n\
> > @@ -303,10 +310,6 @@ Miscellaneous:\n\
> >  /* ancient, no one should use it
> >  -F -- attempt to follow vforks (deprecated, use -f)\n\
> >   */
> > -/* this is broken, so don't document it --z -- print only succeeding
> > syscalls\n\ --Z -- print only failing syscalls\n\
> > - */
> >  , DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
> >  	exit(0);
> >  }
> > @@ -512,8 +515,17 @@ ATTRIBUTE_FORMAT((printf, 1, 0))  static void
> > tvprintf(const char *const fmt, va_list args)  {
> > +	int n = 0;
> >  	if (current_tcp) {
> > -		int n = vfprintf(current_tcp->outf, fmt, args);
> > +		/* If open_memstream is not supported, stage_output will always
> > +		 * be false. */
> > +		if (stage_output) {
> > +			if (!current_tcp->memf)
> > +				error_msg_and_die("No file opened for current_tcp->memf\n");
> > +			n = vfprintf(current_tcp->memf, fmt, args);
> > +		} else
> > +			n = vfprintf(current_tcp->outf, fmt, args);
> > +
> 
> Not really.  If open_memstream is not supported, then current_tcp has no member called "memf" and the
> code doesn't compile.
> 
> I'd really prefer a single vfprintf invocation here.
> 
> I don't think we should have and check for stage_output here, see below.
> 
> >  		if (n < 0) {
> >  			/* very unlikely due to vfprintf buffering */
> >  			outf_perror(current_tcp);
> > @@ -539,7 +551,18 @@ void
> >  tprints(const char *str)
> >  {
> >  	if (current_tcp) {
> > -		int n = fputs_unlocked(str, current_tcp->outf);
> > +		int n;
> > +
> > +		/* If open_memstream is not supported, stage_output will always
> > +		 * be false. */
> > +		if (stage_output) {
> > +			if (!current_tcp->memf)
> > +				error_msg_and_die("No file opened for current_tcp->memf\n");
> > +			n = fputs_unlocked(str, current_tcp->memf);
> > +		} else {
> > +			n = fputs_unlocked(str, current_tcp->outf);
> > +		}
> > +
> 
> Likewise.
> 
> >  		if (n >= 0) {
> >  			current_tcp->curcol += strlen(str);
> >  			return;
> > @@ -700,6 +723,11 @@ after_successful_attach(struct tcb *tcp, const unsigned int flags)
> >  		tcp->outf = strace_fopen(name);
> >  	}
> >
> > +#if HAVE_OPEN_MEMSTREAM
> > +	if (not_failing_only || failing_only)
> > +		strace_openmemstream(tcp);
> > +#endif
> 
> I don't think we should call strace_openmemstream here, see below.
> 
> > +
> >  #ifdef ENABLE_STACKTRACE
> >  	if (stack_trace_enabled)
> >  		unwind_tcb_init(tcp);
> > @@ -1563,6 +1591,9 @@ init(int argc, char *argv[])
> >  	int c, i;
> >  	int optF = 0;
> >
> > +	/* during init don't print to internal memf stream */
> > +	stage_output = false;
> > +
> >  	if (!program_invocation_name || !*program_invocation_name) {
> >  		static char name[] = "strace";
> >  		program_invocation_name =
> > @@ -1782,6 +1813,10 @@ init(int argc, char *argv[])
> >  		unwind_init();
> >  #endif
> >
> > +#ifdef HAVE_OPEN_MEMSTREAM
> > +	stage_output = not_failing_only || failing_only; #endif
> > +
> >  	/* See if they want to run as another user. */
> >  	if (username != NULL) {
> >  		struct passwd *pent;
> > @@ -2108,6 +2143,10 @@ print_signalled(struct tcb *tcp, const int pid,
> > int status)  static void  print_exited(struct tcb *tcp, const int pid,
> > int status)  {
> > +	bool saved_stage_output_flag = stage_output;
> > +
> > +	stage_output = false;
> > +
> >  	if (pid == strace_child) {
> >  		exit_code = WEXITSTATUS(status);
> >  		strace_child = 0;
> > @@ -2119,11 +2158,16 @@ print_exited(struct tcb *tcp, const int pid, int status)
> >  		tprintf("+++ exited with %d +++\n", WEXITSTATUS(status));
> >  		line_ended();
> >  	}
> > +
> > +	stage_output = saved_stage_output_flag;
> >  }
> 
> print_exited() and other similar functions demonstrate why I think stage_output variable is not the right
> thing to do.
> 
> I suggest to call strace_openmemstream() on entering syscall instead.
> 
> >  static void
> >  print_stopped(struct tcb *tcp, const siginfo_t *si, const unsigned
> > int sig)  {
> > +	bool saved_stage_output_flag = stage_output;
> > +	stage_output = false;
> > +
> >  	if (cflag != CFLAG_ONLY_STATS
> >  	    && !hide_log(tcp)
> >  	    && is_number_in_set(sig, signal_set)) { @@ -2141,6 +2185,8 @@
> > print_stopped(struct tcb *tcp, const siginfo_t *si, const unsigned int sig)
> >  			unwind_tcb_print(tcp);
> >  #endif
> >  	}
> > +
> > +	stage_output = saved_stage_output_flag;
> >  }
> >
> >  static void
> > @@ -2793,7 +2839,7 @@ static void ATTRIBUTE_NORETURN
> >  terminate(void)
> >  {
> >  	int sig = interrupted;
> > -
> > +	stage_output = false;
> >  	cleanup(sig);
> >  	if (cflag)
> >  		call_summary(shared_log);
> > diff --git a/syscall.c b/syscall.c
> > index 7ea45292..3b4d3b44 100644
> > --- a/syscall.c
> > +++ b/syscall.c
> > @@ -753,18 +753,17 @@ syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res)
> >  	if (raw(tcp)) {
> >  		/* sys_res = printargs(tcp); - but it's nop on sysexit */
> >  	} else {
> > -	/* FIXME: not_failing_only (IOW, option -z) is broken:
> > -	 * failure of syscall is known only after syscall return.
> > -	 * Thus we end up with something like this on, say, ENOENT:
> > -	 *     open("does_not_exist", O_RDONLY <unfinished ...>
> > -	 *     {next syscall decode}
> > -	 * whereas the intended result is that open(...) line
> > -	 * is not shown at all.
> > -	 */
> >  		if ((not_failing_only && syserror(tcp)) ||
> > -		    (failing_only && !syserror(tcp)))
> > +		    (failing_only && !syserror(tcp))) {
> > +			/* If open_memstream is not supported, stage_output
> > +			 * will always be false. */
> > +			if (stage_output) {
> > +				drop_staged_out(tcp);
> > +				line_ended();
> > +			}
> >  			return 0;	/* ignore failed/successful
> >  					 * syscalls */
> > +		}
> >  		if (tcp->sys_func_rval & RVAL_DECODED)
> >  			sys_res = tcp->sys_func_rval;
> >  		else
> > @@ -900,6 +899,8 @@ syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res)
> >  	}
> >  	tprints("\n");
> >  	dumpio(tcp);
> > +	if (stage_output)
> > +		publish_staged_out(tcp);
> >  	line_ended();
> >
> >  #ifdef ENABLE_STACKTRACE
> 
> Something should be done in maybe_switch_tcbs() and droptcb() as well.
> 
> 
> --
> ldv

Regards,
Burkhard
Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Gary Kershaw
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928