[PATCH v9 1/3] Initial support for LuaJIT scripting

Eugene Syromiatnikov esyr at redhat.com
Mon Jul 31 01:18:21 UTC 2017


On Tue, Jul 25, 2017 at 03:59:50PM +0300, Victor Krapivensky wrote:
> * .gitignore: Add luajit_lib.h.
> * Makefile.am: Build with LuaJIT if configured so.
> (strace_SOURCES): Add defs_shared.h, ffi.h.
> (luajit_lib.h): Auto-generate from luajit_lib.lua.
> * configure.ac: Add new --with-luajit configure option.
> * defs.h (TCB_AD_HOC_INJECT): new TCB flag.
> (QUAL_HOOK_ENTRY, QUAL_HOOK_EXIT): new qual flags.
> (struct tcb): If built with LuaJIT support, include currpers field even
> if SUPPORTED_PERSONALITIES is 0.
"even if SUPPORTED_PERSONALITIES is 1", maybe?

> If built with LuaJIT support, include new ad_hoc_inject_opts field.
> Move definitions that need to be fed to LuaJIT's FFI to...
> * defs_shared.h: ...new file.
> * ffi.h: New file.
> * luajit.h: Likewise.
> * luajit_lib.lua: Likewise.
> * qualify.c (syscall_classes): move syscall classes list to the global
> scope, terminate it with a null entry.
> (hook_entry_set, hook_exit_set): New sets (if built with LuaJIT
> support).
> (lookup_class): Use global, null entry-terminated list of syscall
> classes.
> (qual_flags): If built with LuaJIT support, return QUAL_HOOK_ENTRY and
> QUAL_HOOK_EXIT flags.
> (set_hook_qual): New function.
> * strace.1 (LUAJIT SCRIPTING): New section.
> * strace.c (alloctcb): update the condition of presence of currpers
> field.
> (droptcb): If built with LuaJIT support, free ad_hoc_inject_opts.
> (init): New -l option (if built with LuaJIT support).
> (main): run Lua script, if built with LuaJIT support and a script
> was provided.
> * syscall.c (errnoent_vec, nerrnoent_vec, signalent_vec,
> nsignalent_vec, ioctlent_vec, nioctlent_vec): New global variables.
> (tcb_inject_opts): introduce a second argument indicating whether tcp's
> inject_vec should be copied from the global inject_vec if needed.
> If built with LuaJIT support and TCB_AD_HOC_INJECT flag is set, return
> tcp's ad_hoc_inject_opts.
> (tamper_with_syscall_entering): Don't copy inject_vec here; instead,
> pass true as a second argument to tcb_inject_opts.
> (tamper_with_syscall_exiting): Pass false as a second argument to
> tcb_inject_opts.
> (syscall_ad_hoc_inject): New function.
> (syscall_entering_trace): perform ad hoc injection even if the syscall
> is not traced.
> (syscall_exiting_decode): don't return 0 ("bail out") if exiting hook is
> set up for this syscall, or if an ad hoc injection was performed.
> Call tamper_with_syscall_exiting on success.
> (syscall_exiting_trace): Don't call tamper_with_syscall_exiting, check
> if the syscall is not traced again.
> (syscall_exiting_finish): Clear TCB_AD_HOC_INJECT bit.
> * sysent.h: Modify to support inclusion with FFI_CDEF.

As this is quite a user-visible change, relevant NEWS update is also
suitable here.

> ---
>  .gitignore     |   1 +
>  Makefile.am    |  17 +++
>  configure.ac   |  36 ++++++
>  defs.h         |  53 +++-----
>  defs_shared.h  |  66 ++++++++++
>  ffi.h          |  19 +++
>  luajit.h       | 350 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  luajit_lib.lua | 372 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  qualify.c      |  84 ++++++++-----
>  strace.1       | 382 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  strace.c       |  36 +++++-
>  syscall.c      | 117 +++++++++++++++---
>  sysent.h       |  24 +++-
>  13 files changed, 1465 insertions(+), 92 deletions(-)
>  create mode 100644 defs_shared.h
>  create mode 100644 ffi.h
>  create mode 100644 luajit.h
>  create mode 100644 luajit_lib.lua
> 
> diff --git a/.gitignore b/.gitignore
> index 0885bcb5..a5d0c361 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -32,6 +32,7 @@
>  /libmpers-m32.a
>  /libmpers-mx32.a
>  /libstrace.a
> +/luajit_lib.h
>  /m32_funcs.h
>  /m32_printer_decls.h
>  /m32_printer_defs.h
> diff --git a/Makefile.am b/Makefile.am
> index 5d365e24..b73dacca 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -106,6 +106,7 @@ strace_SOURCES =	\
>  	copy_file_range.c \
>  	count.c		\
>  	defs.h		\
> +	defs_shared.h	\
>  	desc.c		\
>  	dirent.c	\
>  	dirent64.c	\
> @@ -128,6 +129,7 @@ strace_SOURCES =	\
>  	fetch_struct_stat.c \
>  	fetch_struct_stat64.c \
>  	fetch_struct_statfs.c \
> +	ffi.h		\
>  	file_handle.c	\
>  	file_ioctl.c	\
>  	flock.c		\
> @@ -302,6 +304,12 @@ strace_LDFLAGS += $(libunwind_LDFLAGS)
>  strace_LDADD += $(libunwind_LIBS)
>  endif
>  
> +if USE_LUAJIT
> +strace_SOURCES += luajit.h
> +strace_CPPFLAGS += $(LUAJIT_CFLAGS)
> +strace_LDADD += $(LUAJIT_LIBS)
> +endif
> +
>  @CODE_COVERAGE_RULES@
>  CODE_COVERAGE_BRANCH_COVERAGE = 1
>  CODE_COVERAGE_GENHTML_OPTIONS = $(CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT) \
> @@ -830,6 +838,7 @@ EXTRA_DIST =				\
>  	linux/xtensa/set_scno.c		\
>  	linux/xtensa/syscallent.h	\
>  	linux/xtensa/userent.h		\
> +	luajit_lib.lua			\
>  	mpers.awk			\
>  	mpers.sh			\
>  	mpers_test.sh			\
> @@ -856,6 +865,9 @@ $(srcdir)/.version:
>  strace_SOURCES_c = \
>  	$(filter %.c,$(strace_SOURCES)) $(filter %.c,$(libstrace_a_SOURCES))
>  
> +luajit_lib.h: luajit_lib.lua
> +	sed 's/["\\]/\\\0/g;s/.*/"\0\\n"/' $< > $@
> +
>  sys_func.h: $(patsubst %,$(srcdir)/%,$(strace_SOURCES_c))
>  	for f in $^; do \
>  		sed -n 's/^SYS_FUNC(.*/extern &;/p' $$f; \
> @@ -934,6 +946,11 @@ CLEANFILES    = $(ioctl_redefs_h) $(ioctlent_h) $(mpers_preproc_files) \
>  		native_printer_decls.h native_printer_defs.h printers.h sen.h sys_func.h
>  DISTCLEANFILES = gnu/stubs-32.h gnu/stubs-x32.h
>  
> +if USE_LUAJIT
> +BUILT_SOURCES += luajit_lib.h
> +CLEANFILES += luajit_lib.h
> +endif
> +
>  include scno.am
>  
>  $(strace_OBJECTS): scno.h
> diff --git a/configure.ac b/configure.ac
> index 0d407aff..a4d661a3 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -734,6 +734,42 @@ AC_SUBST(dl_LIBS)
>  
>  AC_PATH_PROG([PERL], [perl])
>  
> +dnl LuaJIT scripting support
Btw, you can use #-style comments here. This may be useful for those who
will look in the generated configura script.

> +use_luajit=no
> +force_luajit=no
> +luajit_lib=luajit
> +LUAJIT_LIBS=
> +LUAJIT_CFLAGS=
> +AC_ARG_WITH([luajit],
> +            [AS_HELP_STRING([--with-luajit],
> +                            [build with LuaJIT scripting support])],
I think that the fact that one can provide library name as the option value
worth mentioning here, it saves for looking into configure.ac in order
to check whether it is supported or not.

> +            [case "${withval}" in
> +             yes)   force_luajit=yes ;;
> +             check) ;;
> +             *)     force_luajit=yes; luajit_lib="${withval}" ;;
> +             esac],
> +            [:]
> +)
> +AS_IF([test "x$luajit_lib" != xno],
> +      [PKG_CHECK_MODULES([LUAJIT],
> +                         [$luajit_lib],
> +                         [use_luajit=yes],
> +                         [AS_IF([test "x$force_luajit" = xyes],
> +                                [AC_MSG_ERROR([cannot find luajit library: $luajit_lib])]
> +                               )]
> +                        )]
> +)
> +
> +dnl enable LuaJIT
> +AC_MSG_CHECKING([whether to enable Lua scripting])
> +if test "x$use_luajit" = xyes; then
You can use AS_IF everywhere in autoconf files, not only where it
otherwise breaks other m4 scripts.

> +	AC_DEFINE([USE_LUAJIT], 1, [Enable Lua scripting support])
> +	AC_SUBST(LUAJIT_LIBS)
> +	AC_SUBST(LUAJIT_CFLAGS)
> +fi
> +AM_CONDITIONAL([USE_LUAJIT], [test "x$use_luajit" = xyes])
> +AC_MSG_RESULT([$use_luajit])
> +
>  dnl stack trace with libunwind
>  libunwind_CPPFLAGS=
>  libunwind_LDFLAGS=
> diff --git a/defs.h b/defs.h
> index 8aca8083..0de6cacc 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -192,11 +192,6 @@ extern char *stpcpy(char *dst, const char *src);
>  # define PERSONALITY2_INCLUDE_FUNCS "empty.h"
>  #endif
>  
> -typedef struct ioctlent {
> -	const char *symbol;
> -	unsigned int code;
> -} struct_ioctlent;
> -
>  struct inject_opts {
>  	uint16_t first;
>  	uint16_t step;
> @@ -207,39 +202,7 @@ struct inject_opts {
>  #define MAX_ERRNO_VALUE			4095
>  #define INJECT_OPTS_RVAL_DEFAULT	(-(MAX_ERRNO_VALUE + 1))
>  
> -/* Trace Control Block */
> -struct tcb {
> -	int flags;		/* See below for TCB_ values */
> -	int pid;		/* If 0, this tcb is free */
> -	int qual_flg;		/* qual_flags[scno] or DEFAULT_QUAL_FLAGS + RAW */
> -	unsigned long u_error;	/* Error code */
> -	kernel_ulong_t scno;	/* System call number */
> -	kernel_ulong_t u_arg[MAX_ARGS];	/* System call arguments */
> -	kernel_long_t u_rval;	/* Return value */
> -#if SUPPORTED_PERSONALITIES > 1
> -	unsigned int currpers;	/* Personality at the time of scno update */
> -#endif
> -	int sys_func_rval;	/* Syscall entry parser's return value */
> -	int curcol;		/* Output column for this process */
> -	FILE *outf;		/* Output file for this process */
> -	const char *auxstr;	/* Auxiliary info from syscall (see RVAL_STR) */
> -	void *_priv_data;	/* Private data for syscall decoding functions */
> -	void (*_free_priv_data)(void *); /* Callback for freeing priv_data */
> -	const struct_sysent *s_ent; /* sysent[scno] or dummy struct for bad scno */
> -	const struct_sysent *s_prev_ent; /* for "resuming interrupted SYSCALL" msg */
> -	struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
> -	struct timeval stime;	/* System time usage as of last process wait */
> -	struct timeval dtime;	/* Delta for system time usage */
> -	struct timeval etime;	/* Syscall entry time */
> -
> -#ifdef USE_LIBUNWIND
> -	struct UPT_info *libunwind_ui;
> -	struct mmap_cache_t *mmap_cache;
> -	unsigned int mmap_cache_size;
> -	unsigned int mmap_cache_generation;
> -	struct queue_t *queue;
> -#endif
> -};
> +#include "defs_shared.h"
>  
>  /* TCB flags */
>  /* We have attached to this process, but did not see it stopping yet */
> @@ -263,6 +226,7 @@ struct tcb {
>  #define TCB_TAMPERED	0x40	/* A syscall has been tampered with */
>  #define TCB_HIDE_LOG	0x80	/* We should hide everything (until execve) */
>  #define TCB_SKIP_DETACH_ON_FIRST_EXEC	0x100	/* -b execve should skip detach on first execve */
> +#define TCB_AD_HOC_INJECT	0x200	/* an ad hoc injection was performed by Lua script */
>  
>  /* qualifier flags */
>  #define QUAL_TRACE	0x001	/* this system call should be traced */
> @@ -273,6 +237,8 @@ struct tcb {
>  #define QUAL_SIGNAL	0x100	/* report events with this signal */
>  #define QUAL_READ	0x200	/* dump data read from this file descriptor */
>  #define QUAL_WRITE	0x400	/* dump data written to this file descriptor */
> +#define QUAL_HOOK_ENTRY	0x800	/* return this syscall on entry from next_sc() */
> +#define QUAL_HOOK_EXIT	0x1000	/* return this syscall on exit from next_sc() */
>  
>  #define DEFAULT_QUAL_FLAGS (QUAL_TRACE | QUAL_ABBREV | QUAL_VERBOSE)
>  
> @@ -363,6 +329,7 @@ typedef enum {
>  	CFLAG_ONLY_STATS,
>  	CFLAG_BOTH
>  } cflag_t;
> +extern const struct syscall_class syscall_classes[];
>  extern cflag_t cflag;
>  extern bool debug_flag;
>  extern bool Tflag;
> @@ -686,6 +653,9 @@ extern struct number_set signal_set;
>  extern bool is_number_in_set(unsigned int number, const struct number_set *);
>  extern void qualify(const char *);
>  extern unsigned int qual_flags(const unsigned int);
> +#ifdef USE_LUAJIT
> +extern void set_hook_qual(unsigned int scno, unsigned int pers, bool entry_hook, bool exit_hook);
Overly long line.

> +#endif
>  
>  #define DECL_IOCTL(name)						\
>  extern int								\
> @@ -968,6 +938,13 @@ extern const char *const errnoent0[];
>  extern const char *const signalent0[];
>  extern const struct_ioctlent ioctlent0[];
>  
> +extern const char *const *errnoent_vec[SUPPORTED_PERSONALITIES];
> +extern const char *const *signalent_vec[SUPPORTED_PERSONALITIES];
> +extern const struct_ioctlent *const ioctlent_vec[SUPPORTED_PERSONALITIES];
> +extern const unsigned int nerrnoent_vec[SUPPORTED_PERSONALITIES];
> +extern const unsigned int nsignalent_vec[SUPPORTED_PERSONALITIES];
> +extern const unsigned int nioctlent_vec[SUPPORTED_PERSONALITIES];
> +
>  #if SUPPORTED_PERSONALITIES > 1
>  extern const struct_sysent *sysent;
>  extern const char *const *errnoent;
> diff --git a/defs_shared.h b/defs_shared.h
> new file mode 100644
> index 00000000..72ed2213
> --- /dev/null
> +++ b/defs_shared.h
> @@ -0,0 +1,66 @@
> +/*
> + * Should only be included without FFI_CDEF from defs.h, so no include guards.
> + */
> +
> +#include "ffi.h"
> +
> +FFI_CONTENT(
> +struct syscall_class {
> +	const char *name;
> +	unsigned int value;
> +};
> +)
> +
> +FFI_CONTENT(
> +typedef struct ioctlent {
> +	const char *symbol;
> +	unsigned int code;
> +} struct_ioctlent;
> +)
> +
> +/* Trace Control Block */
> +FFI_CONTENT(
> +struct tcb {
> +	int flags;		/* See below for TCB_ values */
> +	int pid;		/* If 0, this tcb is free */
> +	int qual_flg;		/* qual_flags[scno] or DEFAULT_QUAL_FLAGS + RAW */
> +	unsigned long u_error;	/* Error code */
> +	kernel_ulong_t scno;	/* System call number */
> +	kernel_ulong_t u_arg[MAX_ARGS];	/* System call arguments */
> +	kernel_long_t u_rval;	/* Return value */
> +)
> +
> +#if defined(USE_LUAJIT) || SUPPORTED_PERSONALITIES > 1
> +FFI_CONTENT(
> +	unsigned int currpers;	/* Personality at the time of scno update */
> +)
> +#endif
> +
> +#ifndef FFI_CDEF
> +	int sys_func_rval;	/* Syscall entry parser's return value */
> +	int curcol;		/* Output column for this process */
> +	FILE *outf;		/* Output file for this process */
> +	const char *auxstr;	/* Auxiliary info from syscall (see RVAL_STR) */
> +	void *_priv_data;	/* Private data for syscall decoding functions */
> +	void (*_free_priv_data)(void *); /* Callback for freeing priv_data */
> +	const struct_sysent *s_ent; /* sysent[scno] or dummy struct for bad scno */
> +	const struct_sysent *s_prev_ent; /* for "resuming interrupted SYSCALL" msg */
> +# ifdef USE_LUAJIT
> +	struct inject_opts *ad_hoc_inject_opts;
> +# endif
> +	struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
> +	struct timeval stime;	/* System time usage as of last process wait */
> +	struct timeval dtime;	/* Delta for system time usage */
> +	struct timeval etime;	/* Syscall entry time */
> +# ifdef USE_LIBUNWIND
> +	struct UPT_info *libunwind_ui;
> +	struct mmap_cache_t *mmap_cache;
> +	unsigned int mmap_cache_size;
> +	unsigned int mmap_cache_generation;
> +	struct queue_t *queue;
> +# endif
> +#endif /* !FFI_CDEF */
> +
> +FFI_CONTENT(
> +};
> +)
> diff --git a/ffi.h b/ffi.h
> new file mode 100644
> index 00000000..7b89e7a4
> --- /dev/null
> +++ b/ffi.h
> @@ -0,0 +1,19 @@
> +#ifndef STRACE_FFI_H
> +#define STRACE_FFI_H
> +
> +#include "macros.h"
> +
> +#define FFI_CONCAT(a, b) a ## b
> +#define FFI_CONCAT2(a, b) FFI_CONCAT(a, b)
> +
> +/*
> + * FFI_CONTENT expands to FFI_CONTENT_ (which strigifies its arguments) when
> + * FFI_CDEF is defined, and to FFI_CONTENT_FFI_CDEF (which simply expands to its
> + * arguments) when it is not.
> + */
> +#define FFI_CONTENT FFI_CONCAT2(FFI_CONTENT_, FFI_CDEF)
> +
> +#define FFI_CONTENT_(...)         STRINGIFY(__VA_ARGS__)
> +#define FFI_CONTENT_FFI_CDEF(...) __VA_ARGS__
> +
> +#endif /* !STRACE_FFI_H */
> diff --git a/luajit.h b/luajit.h
> new file mode 100644
> index 00000000..154caf6c
> --- /dev/null
> +++ b/luajit.h
> @@ -0,0 +1,350 @@
> +/*
> + * Should only be included from strace.c, so no include guards.
> + */
> +
> +#include <lualib.h>
> +#include <lauxlib.h>
> +
> +#define L script_L
> +
> +static struct tcb *
> +func_next_sc(void)
> +{
> +	static struct timeval tv = {};
> +	static bool first = true;
> +
> +#define MAYBE_RESTART(res, sig)									\
> +	do {											\
> +		if ((res) >= 0 && ptrace_restart(PTRACE_SYSCALL, current_tcp, sig) < 0) {	\
> +			/* Note: ptrace_restart emitted error message */			\
> +			exit_code = 1;								\
> +			goto term;								\
> +		}										\
Overly long lines.

> +	} while (0)
> +
> +	if (!first) {
> +		if (!current_tcp)
> +			return NULL;
> +
> +		unsigned int sig = 0;
> +		int res;
> +		if (entering(current_tcp)) {
> +			res = syscall_entering_trace(current_tcp, &sig);
> +			syscall_entering_finish(current_tcp, res);
> +		} else {
> +			res = syscall_exiting_trace(current_tcp, tv, 1);
> +			syscall_exiting_finish(current_tcp);
> +		}
> +		MAYBE_RESTART(res, sig);
> +	}
> +	first = false;
> +
> +	while (1) {
> +		int status;
> +		siginfo_t si;
> +		enum trace_event ret = next_event(&status, &si);
> +		if (ret == TE_SYSCALL_STOP) {
> +			unsigned int sig = 0;
> +			int res;
> +			if (entering(current_tcp)) {
> +				res = syscall_entering_decode(current_tcp);
> +				switch (res) {
> +				case 0:
> +					break;
> +				case 1:
> +					if (current_tcp->qual_flg & QUAL_HOOK_ENTRY)
> +						return current_tcp;
> +					res = syscall_entering_trace(current_tcp, &sig);
> +					/* fall through */
> +				default:
> +					syscall_entering_finish(current_tcp, res);
> +				}
> +			} else {
> +				res = syscall_exiting_decode(current_tcp, &tv);
> +				switch (res) {
> +				case 0:
> +					break;
> +				case 1:
> +					if (current_tcp->qual_flg & QUAL_HOOK_EXIT)
> +						return current_tcp;
> +					/* fall through */
> +				default:
> +					res = syscall_exiting_trace(current_tcp, tv, res);
> +				}
> +				syscall_exiting_finish(current_tcp);
> +			}
> +			MAYBE_RESTART(res, sig);
> +		} else {
> +			if (!dispatch_event(ret, &status, &si))
> +				goto term;
> +		}
> +	}
> +#undef MAYBE_RESTART
> +term:
> +	current_tcp = NULL;
> +	return NULL;
> +}
Looks like I forgot to answer question about why this should be
re-implemented, my bad. So, it is preferred to be so in order to avoid
code duplication.

So, we may try to do something like this:

diff --git i/defs.h w/defs.h
index 618943b..78b9e79 100644
--- i/defs.h
+++ w/defs.h
@@ -229,6 +229,7 @@ struct inject_opts {
 #define TCB_HIDE_LOG	0x80	/* We should hide everything (until execve) */
 #define TCB_SKIP_DETACH_ON_FIRST_EXEC	0x100	/* -b execve should skip detach on first execve */
 #define TCB_AD_HOC_INJECT	0x200	/* an ad hoc injection was performed by Lua script */
+#define TCB_HOOK	0x400
 
 /* qualifier flags */
 #define QUAL_TRACE	0x001	/* this system call should be traced */
@@ -286,6 +287,7 @@ extern const struct xlat whence_codes[];
 #define RVAL_NONE	040	/* Print nothing */
 
 #define RVAL_DECODED	0100	/* syscall decoding finished */
+#define RVAL_HOOKED	0200
 
 #define IOCTL_NUMBER_UNKNOWN 0
 #define IOCTL_NUMBER_HANDLED 1
diff --git i/luajit.h w/luajit.h
index c486e75..735ff66 100644
--- i/luajit.h
+++ w/luajit.h
@@ -10,32 +10,16 @@
 static struct tcb *
 func_next_sc(void)
 {
-	static struct timeval tv = {};
 	static bool first = true;
 
-#define MAYBE_RESTART(res, sig)									\
-	do {											\
-		if ((res) >= 0 && ptrace_restart(PTRACE_SYSCALL, current_tcp, sig) < 0) {	\
-			/* Note: ptrace_restart emitted error message */			\
-			exit_code = 1;								\
-			goto term;								\
-		}										\
-	} while (0)
-
+	/* Finishing previous hook-interrupted TE_SYSCALL_STOP call */
 	if (!first) {
 		if (!current_tcp)
 			return NULL;
 
-		unsigned int sig = 0;
-		int res;
-		if (entering(current_tcp)) {
-			res = syscall_entering_trace(current_tcp, &sig);
-			syscall_entering_finish(current_tcp, res);
-		} else {
-			res = syscall_exiting_trace(current_tcp, tv, 1);
-			syscall_exiting_finish(current_tcp);
-		}
-		MAYBE_RESTART(res, sig);
+		if (!dispatch_event(TE_SYSCALL_STOP_HOOK_EXIT, NULL, NULL,
+		    true))
+			goto term;
 	}
 	first = false;
 
@@ -43,43 +27,14 @@ func_next_sc(void)
 		int status;
 		siginfo_t si;
 		enum trace_event ret = next_event(&status, &si);
-		if (ret == TE_SYSCALL_STOP) {
-			unsigned int sig = 0;
-			int res;
-			if (entering(current_tcp)) {
-				res = syscall_entering_decode(current_tcp);
-				switch (res) {
-				case 0:
-					break;
-				case 1:
-					if (current_tcp->qual_flg & QUAL_HOOK_ENTRY)
-						return current_tcp;
-					res = syscall_entering_trace(current_tcp, &sig);
-					/* fall through */
-				default:
-					syscall_entering_finish(current_tcp, res);
-				}
-			} else {
-				res = syscall_exiting_decode(current_tcp, &tv);
-				switch (res) {
-				case 0:
-					break;
-				case 1:
-					if (current_tcp->qual_flg & QUAL_HOOK_EXIT)
-						return current_tcp;
-					/* fall through */
-				default:
-					res = syscall_exiting_trace(current_tcp, tv, res);
-				}
-				syscall_exiting_finish(current_tcp);
-			}
-			MAYBE_RESTART(res, sig);
-		} else {
-			if (!dispatch_event(ret, &status, &si))
-				goto term;
+
+		if (!dispatch_event(ret, &status, &si, true))
+			break;
+		if ((ret == TE_SYSCALL_STOP) && (current_tcp->flags & TCB_HOOK)) {
+			current_tcp->flags &= ~TCB_HOOK;
+			return current_tcp;
 		}
 	}
-#undef MAYBE_RESTART
 term:
 	current_tcp = NULL;
 	return NULL;
diff --git i/strace.c w/strace.c
index eadadc7..e10af7e 100644
--- i/strace.c
+++ w/strace.c
@@ -2219,6 +2219,9 @@ enum trace_event {
 	 */
 	TE_SYSCALL_STOP,
 
+	/* Syscall entry or exit, after hook. */
+	TE_SYSCALL_STOP_HOOK_EXIT,
+
 	/*
 	 * Tracee received signal with number WSTOPSIG(*pstatus); signal info
 	 * is written to *si.  Restart the tracee (with that signal number
@@ -2409,25 +2412,57 @@ next_event(int *pstatus, siginfo_t *si)
 	}
 }
 
+enum hook_state {
+	HOOK_ENTER,
+	HOOK_EXIT,
+	HOOK_IGNORE,
+};
+
 static int
-trace_syscall(struct tcb *tcp, unsigned int *sig)
+trace_syscall(struct tcb *tcp, unsigned int *sig, enum hook_state state)
 {
 	if (entering(tcp)) {
-		int res = syscall_entering_decode(tcp);
-		switch (res) {
-		case 0:
-			return 0;
-		case 1:
-			res = syscall_entering_trace(tcp, sig);
+		int res = 1;
+
+		switch (state) {
+		case HOOK_ENTER:
+		case HOOK_IGNORE:
+			res = syscall_entering_decode(tcp);
+			if (!res)
+				return 0;
+			if (res == 1) {
+				if ((state == HOOK_ENTER) &&
+				    (tcp->qual_flg & QUAL_HOOK_ENTRY))
+					return RVAL_HOOKED;
+
+			/* Fall through */
+		case HOOK_EXIT:
+				res = syscall_entering_trace(tcp, sig);
+			}
 		}
+
 		syscall_entering_finish(tcp, res);
 		return res;
 	} else {
-		struct timeval tv = {};
-		int res = syscall_exiting_decode(tcp, &tv);
-		if (res != 0) {
-			res = syscall_exiting_trace(tcp, tv, res);
+		static struct timeval tv = {};
+
+		int res = 1;
+
+		switch (state) {
+		case HOOK_ENTER:
+		case HOOK_IGNORE:
+			res = syscall_exiting_decode(tcp, &tv);
+
+			if ((res == 1) && (state == HOOK_ENTER) &&
+			    (tcp->qual_flg & QUAL_HOOK_EXIT))
+					return RVAL_HOOKED;
+
+			/* Fall through */
+		case HOOK_EXIT:
+			if (res != 0)
+				res = syscall_exiting_trace(tcp, tv, res);
 		}
+
 		syscall_exiting_finish(tcp);
 		return res;
 	}
@@ -2435,10 +2470,11 @@ trace_syscall(struct tcb *tcp, unsigned int *sig)
 
 /* Returns true iff the main trace loop has to continue. */
 static bool
-dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
+dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si, bool hooked)
 {
 	unsigned int restart_op = PTRACE_SYSCALL;
 	unsigned int restart_sig = 0;
+	int res;
 
 	switch (ret) {
 	case TE_BREAK:
@@ -2451,7 +2487,17 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
 		break;
 
 	case TE_SYSCALL_STOP:
-		if (trace_syscall(current_tcp, &restart_sig) < 0) {
+	case TE_SYSCALL_STOP_HOOK_EXIT:
+		res = trace_syscall(current_tcp, &restart_sig,
+			hooked ? (ret == TE_SYSCALL_STOP ? HOOK_ENTER : HOOK_EXIT) :
+			HOOK_IGNORE);
+
+		if (res == RVAL_HOOKED) {
+			current_tcp->flags |= TCB_HOOK;
+			return true;
+		}
+
+		if (res < 0) {
 			/*
 			 * ptrace() failed in trace_syscall().
 			 * Likely a result of process disappearing mid-flight.
@@ -2465,6 +2511,7 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
 			 */
 			return true;
 		}
+
 		break;
 
 	case TE_SIGNAL_DELIVERY_STOP:
@@ -2605,7 +2652,7 @@ main(int argc, char *argv[])
 
 	int status;
 	siginfo_t si;
-	while (dispatch_event(next_event(&status, &si), &status, &si))
+	while (dispatch_event(next_event(&status, &si), &status, &si, false))
 		;
 	terminate();
 }

(I do not particularly like this implementation, especially for employing
pigeon's device, but I think it gives the idea)

> +
> +static bool
> +func_monitor(unsigned int scno, unsigned int pers, bool entry_hook, bool exit_hook)
Overly long line.

> +{
> +	if (pers >= SUPPORTED_PERSONALITIES || scno >= nsyscall_vec[pers])
> +		return false;
> +	set_hook_qual(scno, pers, entry_hook, exit_hook);
> +	return true;
> +}
> +
> +static void
> +prepare_ad_hoc_inject(void)
> +{
> +	struct inject_opts *opts = current_tcp->ad_hoc_inject_opts;
> +	if (!opts) {
> +		opts = current_tcp->ad_hoc_inject_opts = xmalloc(sizeof(*opts));
> +		opts->first = 1;
> +		opts->step = 1;
> +	}
> +	if (!(current_tcp->flags & TCB_AD_HOC_INJECT)) {
> +		opts->signo = 0;
> +		opts->rval = INJECT_OPTS_RVAL_DEFAULT;
> +		current_tcp->qual_flg |= QUAL_INJECT;
> +		current_tcp->flags |= TCB_AD_HOC_INJECT;
> +	}
> +}
> +
> +static bool
> +func_inject_signo(int signo)
> +{
> +	if (!current_tcp || exiting(current_tcp))
> +		/* Too late! */
> +		return false;
> +	if (signo <= 0 || signo > SIGRTMAX)
> +		return false;
> +	prepare_ad_hoc_inject();
> +	current_tcp->ad_hoc_inject_opts->signo = signo;
> +	return true;
> +}
> +
> +static bool
> +func_inject_retval(int retval)
> +{
> +	if (!current_tcp || exiting(current_tcp))
> +		/* Too late! */
> +		return false;
> +	if (retval < -MAX_ERRNO_VALUE)
> +		return false;
> +	prepare_ad_hoc_inject();
> +	current_tcp->ad_hoc_inject_opts->rval = retval;
> +	return true;
> +}
> +
> +static int
> +func_umove(kernel_ulong_t addr, size_t len, void *laddr)
> +{
> +	return current_tcp ? umoven(current_tcp, addr, len, laddr) : -1;
> +}
> +
> +static int
> +func_umove_str(kernel_ulong_t addr, size_t len, char *laddr)
> +{
> +	return current_tcp ? umovestr(current_tcp, addr, len, laddr) : -1;
> +}
> +
> +static bool
> +func_path_match_arr(const char **set, size_t nset)
> +{
> +	if (!current_tcp)
> +		return false;
> +	struct path_set s = {set, nset};
> +	return pathtrace_match_set(current_tcp, &s);
> +}
> +
> +static const char *
> +get_lua_msg(void)
> +{
> +	const char *msg = lua_tostring(L, -1);
> +	return msg ? msg : "(error object can't be converted to string)";
> +}
> +
> +static void
> +assert_lua_impl(int ret, const char *expr, const char *file, int line)
> +{
> +	if (ret == 0)
> +		return;
> +	error_msg_and_die("assert_lua(%s) failed at %s:%d: %s", expr, file,
> +		line, get_lua_msg());
> +}
> +
> +#define assert_lua(expr) assert_lua_impl(expr, #expr, __FILE__, __LINE__)
> +
> +static void
> +check_lua(int ret)
> +{
> +	if (ret == 0)
> +		return;
> +	error_msg_and_die("lua: %s", get_lua_msg());
> +}
> +
> +#ifdef LUA_FFILIBNAME
> +# define FFILIBNAME LUA_FFILIBNAME
> +#else
> +/* non-LuaJIT */
> +# define FFILIBNAME "ffi"
> +#endif
> +
> +#ifdef LUA_BITLIBNAME
> +# define BITLIBNAME LUA_BITLIBNAME
> +#else
> +/* Lua <= 5.1 (non-LuaJIT) */
> +# define BITLIBNAME "bit"
> +#endif
> +
> +static void
> +init_luajit(const char *scriptfile)
> +{
> +	if (L)
> +		/* already initialized? */
> +		error_msg_and_help("multiple -l arguments");
> +
> +	if (!(L = luaL_newstate()))
> +		error_msg_and_die("luaL_newstate failed (out of memory?)");
> +
> +	luaL_openlibs(L);
> +
> +	lua_getglobal(L, "require"); /* L: require */
> +	lua_pushstring(L, FFILIBNAME); /* L: require str */
> +	assert_lua(lua_pcall(L, 1, 1, 0)); /* L: ffi */
> +	lua_getfield(L, -1, "cdef"); /* L: ffi cdef */
> +	luaL_Buffer b;
> +	luaL_buffinit(L, &b); /* L: ffi cdef ? */
> +	{
> +		char buf[128];
> +		snprintf(buf, sizeof(buf),
> +			"typedef int%d_t kernel_long_t;"
> +			"typedef uint%d_t kernel_ulong_t;",
> +			(int) sizeof(kernel_long_t) * 8,
> +			(int) sizeof(kernel_ulong_t) * 8);
> +		luaL_addstring(&b, buf); /* L: ffi cdef ? */
> +	}
> +	const char *defs =
> +#define FFI_CDEF
> +#include "sysent.h"
> +#include "defs_shared.h"
> +#undef FFI_CDEF
> +	;
> +	luaL_addstring(&b, defs); /* L: ffi cdef ? */
> +	luaL_pushresult(&b); /* L: ffi cdef str */
> +	assert_lua(lua_pcall(L, 1, 0, 0)); /* L: ffi */
> +
> +	lua_newtable(L); /* L: ffi table */
> +
> +	lua_getfield(L, -2, "cast"); /* L: ffi table cast */
> +	lua_remove(L, -3); /* L: table cast */
> +

> +#define EXPOSE_FUNC(rettype, ptr, name, ...)					\
> +	do {									\
> +		rettype (*fptr_)(__VA_ARGS__) = ptr;				\
> +		lua_pushvalue(L, -1); /* L: table cast cast */			\
> +		lua_pushstring(L, #rettype " (*)(" #__VA_ARGS__ ")");		\
> +		/* L: table cast cast str */					\
> +		lua_pushlightuserdata(L, * (void **) (&fptr_));			\
> +		/* L: table cast cast str ptr */				\
> +		assert_lua(lua_pcall(L, 2, 1, 0)); /* L: table cast value */	\
> +		lua_setfield(L, -3, name); /* L: table cast */			\
> +	} while (0)
Overly long lines.

> +
> +	EXPOSE_FUNC(bool, func_monitor, "monitor",
> +		unsigned int, unsigned int, bool, bool);
> +	EXPOSE_FUNC(struct tcb *, func_next_sc, "next_sc",
> +		void);
> +	EXPOSE_FUNC(bool, func_inject_signo, "inject_signo",
> +		int);
> +	EXPOSE_FUNC(bool, func_inject_retval, "inject_retval",
> +		int);
> +	EXPOSE_FUNC(int, func_umove, "umove",
> +		kernel_ulong_t, size_t, void *);
> +	EXPOSE_FUNC(int, func_umove_str, "umove_str",
> +		kernel_ulong_t, size_t, char *);
> +	EXPOSE_FUNC(bool, func_path_match_arr, "path_match_arr",
> +		const char **, size_t);
> +
> +#undef EXPOSE_FUNC
Regarding the question "And what if want to expose not a func_ wrapper,
but an actual function", I'd assume that functions which are exposed to
Lua should be wrapped in some way anyway, so this could be a safe
assumption.

> +
> +#define EXPOSE(type, ptr, name)									\
> +	do {											\
> +		/* Get a compilation error/warning on type mismatch */				\
> +		type tmp_ = ptr;								\
> +		(void) tmp_;									\
> +		lua_pushvalue(L, -1); /* L: table cast cast */					\
> +		lua_pushstring(L, #type); /* L: table cast cast str */				\
> +		lua_pushlightuserdata(L, (void *) ptr); /* L: table cast cast str ptr */	\
> +		assert_lua(lua_pcall(L, 2, 1, 0)); /* L: table cast value */			\
> +		lua_setfield(L, -3, name); /* L: table cast */					\
Overly long lines.

> +	} while (0)
> +
> +	EXPOSE(const struct_sysent *const *, sysent_vec, "sysent_vec");
> +	EXPOSE(const char *const **, errnoent_vec, "errnoent_vec");
> +	EXPOSE(const char *const **, signalent_vec, "signalent_vec");
> +	EXPOSE(const struct_ioctlent *const *, ioctlent_vec, "ioctlent_vec");
> +
> +	EXPOSE(const unsigned int *, nsyscall_vec, /*(!)*/ "nsysent_vec");
> +	EXPOSE(const unsigned int *, nerrnoent_vec, "nerrnoent_vec");
> +	EXPOSE(const unsigned int *, nsignalent_vec, "nsignalent_vec");
> +	EXPOSE(const unsigned int *, nioctlent_vec, "nioctlent_vec");
> +
> +	EXPOSE(const struct syscall_class *, syscall_classes, "syscall_classes");
> +
> +#undef EXPOSE
> +
> +	lua_pop(L, 1); /* L: table */
> +
> +	lua_pushinteger(L, SUPPORTED_PERSONALITIES); /* L: table int */
> +	lua_setfield(L, -2, "npersonalities"); /* L: table */
> +
> +	lua_pushinteger(L, MAX_ARGS); /* L: table int */
> +	lua_setfield(L, -2, "max_args"); /* L: table */
> +
> +	lua_pushinteger(L, PATH_MAX); /* L: table int */
> +	lua_setfield(L, -2, "path_max"); /* L: table */
> +
> +	lua_setglobal(L, "strace"); /* L: - */
> +
> +	const char *code =
> +#include "luajit_lib.h"
> +	;
> +	assert_lua(luaL_loadstring(L, code)); /* L: chunk */
> +
> +	lua_newtable(L); /* L: chunk table */
> +
> +	lua_pushstring(L, FFILIBNAME); /* L: chunk table str */
> +	lua_setfield(L, -2, "ffilibname"); /* L: chunk table */
> +	lua_pushstring(L, BITLIBNAME); /* L: chunk table str */
> +	lua_setfield(L, -2, "bitlibname"); /* L: chunk table */
> +	lua_pushinteger(L, TCB_INSYSCALL); /* L: chunk table int */
> +	lua_setfield(L, -2, "tcb_insyscall"); /* L: chunk table */
> +	lua_pushinteger(L, QUAL_TRACE); /* L: chunk table int */
> +	lua_setfield(L, -2, "qual_trace"); /* L: chunk table */
> +	lua_pushinteger(L, QUAL_ABBREV); /* L: chunk table int */
> +	lua_setfield(L, -2, "qual_abbrev"); /* L: chunk table */
> +	lua_pushinteger(L, QUAL_VERBOSE); /* L: chunk table int */
> +	lua_setfield(L, -2, "qual_verbose"); /* L: chunk table */
> +	lua_pushinteger(L, QUAL_RAW); /* L: chunk table int */
> +	lua_setfield(L, -2, "qual_raw"); /* L: chunk table */
> +
> +	assert_lua(lua_pcall(L, 1, 1, 0)); /* L: func */
> +
> +	check_lua(luaL_loadfile(L, scriptfile)); /* L: func chunk */
> +}
> +
> +static void ATTRIBUTE_NORETURN
> +run_luajit(void)
> +{
> +	/* L: func chunk */
> +	check_lua(lua_pcall(L, 0, 0, 0)); /* L: func */
> +	check_lua(lua_pcall(L, 0, 0, 0)); /* L: - */
> +	terminate();
> +}
> +
> +#undef FFILIBNAME
> +#undef BITLIBNAME
> +#undef assert_lua
> +#undef L
> diff --git a/luajit_lib.lua b/luajit_lib.lua
> new file mode 100644
> index 00000000..9b303d96
> --- /dev/null
> +++ b/luajit_lib.lua
> @@ -0,0 +1,372 @@
> +-- This "chunk" of code is loaded and run before the script is.
> +--
> +-- To quote https://www.lua.org/manual/5.1/manual.html#2.4.1,
> +-- "Lua handles a chunk as the body of an anonymous function with a variable
> +--  number of arguments (see §2.5.9). As such, chunks can define local
> +--  variables, receive arguments, and return values."
> +--
> +-- Thanks to Lua's support for closures, all the local variables defined here
> +-- will not leak to another chunks (i.e., the script), but all the functions
> +-- defined here can still access them.
> +--
> +-- strace calls this chunk with a single argument: a table with data that should
> +-- not be exposed to the script, but is needed for some API functions defined
> +-- here.
> +--
> +-- strace expects this chunk to return another function that will be run after
> +-- the script returns.
> +--
> +-- Arguments passed to this chunk are accessible through the "..." vararg
> +-- expression. The following line uses Lua's "adjust" assignment semantics to
> +-- assign the first argument to a local variable "priv".
> +local priv = ...
> +
> +local ffi = require(priv.ffilibname)
> +ffi.cdef[[
> +int strcmp(const char *, const char *);
> +char *realpath(const char *, char *);
> +]]
> +local bit = require(priv.bitlibname)
> +
> +local entry_cbs, exit_cbs, at_exit_cb = {}, {}, nil
> +for p = 0, strace.npersonalities - 1 do
> +	entry_cbs[p] = {}
> +	exit_cbs[p] = {}
> +end
> +
> +local function chain(f, g)
> +	if not f then
> +		return g
> +	end
> +	return function(...)
> +		f(...)
> +		g(...)
> +	end
> +end
> +
> +local function register_hook(scno, pers, on_entry, on_exit, cb)
> +	assert(not not strace.monitor(scno, pers, on_entry, on_exit))
> +	scno, pers = tonumber(scno), tonumber(pers)
> +	if on_entry then
> +		entry_cbs[pers][scno] = chain(entry_cbs[pers][scno], cb)
> +	end
> +	if on_exit then
> +		exit_cbs[pers][scno] = chain(exit_cbs[pers][scno], cb)
> +	end
> +end
> +
> +-- Convert a cdata C string or a Lua string to a Lua string.
> +local function mkstring(s)
> +	return type(s) == 'string' and s or ffi.string(s)
> +end
> +
> +local function parse_pers_spec(pers_spec)
> +	return tonumber(pers_spec) or tonumber(pers_spec.currpers)
> +end
> +
> +function strace.entering(tcp)
> +	return bit.band(tcp.flags, priv.tcb_insyscall) == 0
> +end
> +
> +function strace.exiting(tcp)
> +	return bit.band(tcp.flags, priv.tcb_insyscall) ~= 0
> +end
> +
> +local function alter_trace_opt(flagbit, tcp, ...)
> +	assert(strace.entering(tcp), 'altering tracing options must be done on syscall entry')
Overly long line.

> +	-- i.e., if ... is empty, or the first element of ... is true
> +	if select('#', ...) == 0 or select(1, ...) then
> +		tcp.qual_flg = bit.bor(tcp.qual_flg, flagbit)
> +	else
> +		tcp.qual_flg = bit.band(tcp.qual_flg, bit.bnot(flagbit))
> +	end
> +end
> +function strace.trace  (tcp, ...) alter_trace_opt(priv.qual_trace,   tcp, ...) end
> +function strace.abbrev (tcp, ...) alter_trace_opt(priv.qual_abbrev,  tcp, ...) end
> +function strace.verbose(tcp, ...) alter_trace_opt(priv.qual_verbose, tcp, ...) end
> +function strace.raw    (tcp, ...) alter_trace_opt(priv.qual_raw,     tcp, ...) end
> +
> +function strace.ptr_to_kulong(ptr)
> +	return ffi.cast('kernel_ulong_t', ffi.cast('unsigned long', ptr))
> +end
> +
> +function strace.at_exit(f)
> +	at_exit_cb = chain(at_exit_cb, f)
> +end
> +

> +function strace.get_err_name(err, pers_spec)
> +	local pers = parse_pers_spec(pers_spec)
> +	if err < 0 or err > strace.nerrnoent_vec[pers] then
> +		return nil
> +	end
> +	local s = strace.errnoent_vec[pers][err]
> +	return s ~= nil and ffi.string(s) or nil
> +end
> +
> +function strace.get_sc_name(scno, pers_spec)
> +	local pers = parse_pers_spec(pers_spec)
> +	if scno < 0 or scno >= strace.nsysent_vec[pers] then
> +		return nil
> +	end
> +	local s = strace.sysent_vec[pers][scno].sys_name
> +	return s ~= nil and ffi.string(s) or nil
> +end
> +
> +function strace.get_ioctl_name(code, pers_spec)
> +	local pers = parse_pers_spec(pers_spec)
> +	-- we could have provided a definition for stdlib's bsearch() and used
> +	-- it, but LuaJIT's FFI manual says generated callbacks are a limited
> +	-- resource and also slow. So implement binary search ourselves.
> +	local lb, rb = ffi.cast('unsigned int', 0), strace.nioctlent_vec[pers]
> +	if rb == 0 then
> +		return nil
> +	end
> +	local arr = strace.ioctlent_vec[pers]
> +	while rb - lb > 1 do
> +		local mid = lb + (rb - lb) / 2
> +		if arr[mid].code <= code then
> +			lb = mid
> +		else
> +			rb = mid
> +		end
> +	end
> +	return arr[lb].code == code and ffi.string(arr[lb].symbol) or nil
> +end
> +
> +function strace.get_scno(scname, pers_spec)
> +	local pers = parse_pers_spec(pers_spec)
> +	local cstr = ffi.cast('const char *', scname)
> +	for i = 0, tonumber(strace.nsysent_vec[pers]) - 1 do
> +		local s = strace.sysent_vec[pers][i].sys_name
> +		if s ~= nil and ffi.C.strcmp(s, cstr) == 0 then
> +			return i
> +		end
> +	end
> +	return nil
> +end
> +
> +function strace.get_signo(signame, pers_spec)
> +	local pers = parse_pers_spec(pers_spec)
> +	local cstr = ffi.cast('const char *', signame)
> +	for i = 0, tonumber(strace.nsignalent_vec[pers]) - 1 do
> +		local s = strace.signalent_vec[pers][i]
> +		if s ~= nil and ffi.C.strcmp(s, cstr) == 0 then
> +			return i
> +		end
> +	end
> +	return nil
> +end
> +
> +function strace.get_errno(errname, pers_spec)
> +	local pers = parse_pers_spec(pers_spec)
> +	local cstr = ffi.cast('const char *', errname)
> +	for i = 0, tonumber(strace.nerrnoent_vec[pers]) - 1 do
> +		local s = strace.errnoent_vec[pers][i]
> +		if s ~= nil and ffi.C.strcmp(s, cstr) == 0 then
> +			return i
> +		end
> +	end
> +	return nil
> +end
The problem here is that these function implement the same non-trivial
logic as their C counterpart, thus, any change in the logic should be
made in two places, which is indeed error-prone.

> +
> +function strace.inject_signal(tcp, sig)
> +	local signo = sig
> +	if type(sig) == 'string' or type(sig) == 'cdata' then
> +		signo = assert(strace.get_signo(sig, tcp.currpers),
> +			       'signal not found: ' .. mkstring(sig))
> +	end
> +	assert(not not strace.inject_signo(signo), 'cannot inject signal')
> +end
> +
> +function strace.inject_error(tcp, err)
> +	local errno = err
> +	if type(err) == 'string' or type(err) == 'cdata' then
> +		errno = assert(strace.get_errno(err, tcp.currpers),
> +			       'error not found: ' .. mkstring(err))
> +	end
> +	assert(errno > 0, 'err must be positive')
> +	assert(not not strace.inject_retval(-errno), 'cannot inject error')
> +end
> +
> +local ptr_size = ffi.sizeof('void *')
> +
> +function strace.read_obj(addr, ct, ...)
> +	local obj = ffi.new(ct, ...)
> +	local n = ffi.sizeof(obj)
> +	-- work around FFI pointer semantics
> +	if n == ptr_size then
> +		-- it may be a pointer, and it is cheap to create another copy
> +		local t = ffi.typeof(obj)
> +		local arr = ffi.typeof('$ [1]', t)()
> +		return strace.umove(addr, n, arr) == 0 and t(arr[0]) or nil
> +	else
> +		return strace.umove(addr, n, obj) == 0 and obj       or nil
> +	end
> +end
> +
> +function strace.read_str(addr, maxsz, bufsz)
> +	-- convert it to Lua number to prevent underflows
> +	maxsz = tonumber(maxsz or 4 * 1024 * 1024)
> +	bufsz = bufsz or 1024
> +	local t = {}
> +	local buf = ffi.new('char [?]', bufsz)
> +	while true do
> +		local r = strace.umove_str(addr, bufsz, buf)
> +		if r < 0 then
> +			return nil, 'readerr'
> +		elseif r == 0 then
> +			maxsz = maxsz - bufsz
> +			if maxsz < 0 then
> +				return nil, 'toolong'
> +			end
> +			t[#t + 1] = ffi.string(buf, bufsz)
> +			addr = addr + bufsz
> +		else
> +			local s = ffi.string(buf)
> +			if #s > maxsz then
> +				return nil, 'toolong'
> +			end
> +			return table.concat(t) .. s
> +		end
> +	end
> +end
> +
> +function strace.read_path(addr)
> +	return strace.read_str(addr, strace.path_max, strace.path_max + 1)
> +end
> +
> +local function parse_when(when)
> +	if type(when) == 'table' then
> +		return unpack(when)
> +	elseif when == 'entering' then
> +		return true, false
> +	elseif when == 'exiting' then
> +		return false, true
> +	elseif when == 'both' then
> +		return true, true
> +	else
> +		error('unknown "when" value')
> +	end
> +end
> +
> +function strace.monitor_all(when)
> +	local on_entry, on_exit = parse_when(when)
> +	for p = 0, strace.npersonalities - 1 do
> +		for i = 0, tonumber(strace.nsysent_vec[p]) - 1 do
> +			strace.monitor(i, p, on_entry, on_exit)
> +		end
> +	end
> +end
> +
> +function strace.monitor_name(scname, when)
> +	local on_entry, on_exit = parse_when(when)
> +	if type(scname) == 'table' then
> +		for _, elem in ipairs(scname) do
> +			strace.monitor_name(elem, {on_entry, on_exit})
> +		end
> +		return
> +	end
> +	local found = false
> +	for p = 0, strace.npersonalities - 1 do
> +		local scno = strace.get_scno(scname, p)
> +		if scno then
> +			assert(not not strace.monitor(scno, p, on_entry, on_exit))
> +			found = true
> +		end
> +	end
> +	assert(found, 'syscall not found: ' .. mkstring(scname))
> +end
> +
> +function strace.hook(scname, when, cb)
> +	local on_entry, on_exit = parse_when(when)
> +	if type(scname) == 'table' then
> +		for _, elem in ipairs(scname) do
> +			strace.hook(elem, {on_entry, on_exit}, cb)
> +		end
> +		return
> +	end
> +	local found = false
> +	for p = 0, strace.npersonalities - 1 do
> +		local scno = strace.get_scno(scname, p)
> +		if scno then
> +			register_hook(scno, p, on_entry, on_exit, cb)
> +			found = true
> +		end
> +	end
> +	assert(found, 'syscall not found: ' .. mkstring(scname))
> +end
> +
> +function strace.hook_class(clsname, when, cb)
> +	local on_entry, on_exit = parse_when(when)
> +	if type(clsname) == 'table' then
> +		for _, elem in ipairs(clsname) do
> +			strace.hook_class(elem, {on_entry, on_exit}, cb)
> +		end
> +		return
> +	end
> +	local cstr = ffi.cast('const char *', clsname)
> +	local flag = nil
> +	local ptr = strace.syscall_classes
> +	while ptr.name ~= nil do
> +		if ffi.C.strcmp(ptr.name, cstr) == 0 then
> +			flag = ptr.value
> +			break
> +		end
> +		ptr = ptr + 1
> +	end
> +	assert(flag ~= nil, 'syscall class not found: ' .. mkstring(clsname))
> +	for p = 0, strace.npersonalities - 1 do
> +		for i = 0, tonumber(strace.nsysent_vec[p]) - 1 do
> +			if bit.band(strace.sysent_vec[p][i].sys_flags, flag) ~= 0 then
> +				register_hook(i, p, on_entry, on_exit, cb)
> +			end
> +		end
> +	end
> +end
> +
> +function strace.hook_scno(scno, when, cb, pers_spec)
> +	local on_entry, on_exit = parse_when(when)
> +	local pers = parse_pers_spec(pers_spec)
> +	if type(scno) == 'table' then
> +		for _, elem in ipairs(scno) do
> +			strace.hook_scno(elem, {on_entry, on_exit}, cb, pers)
> +		end
> +		return
> +	end
> +	register_hook(scno, pers, on_entry, on_exit, cb)
> +end
> +
> +function strace.path_match(set)
> +	if type(set) ~= 'table' then
> +		set = {set}
> +	end
> +	local nset = #set
> +	return not not strace.path_match_arr(
> +		ffi.new('const char *[?]', nset, set), nset)
> +end
> +
> +function print(...)
> +	local sep = ''
> +	for i = 1, select('#', ...) do
> +		io.stderr:write(sep .. tostring(select(i, ...)))
> +		sep = '\t'
> +	end
> +	io.stderr:write('\n')
> +end
> +
> +return function()
> +	while true do
> +		local tcp = strace.next_sc()
> +		if tcp == nil then
> +			break
> +		end
> +		local cb = (strace.entering(tcp) and entry_cbs or exit_cbs)
> +			[tonumber(tcp.currpers)][tonumber(tcp.scno)]
> +		if cb then
> +			cb(tcp)
> +		end
> +	end
> +	if at_exit_cb then
> +		at_exit_cb()
> +	end
> +end
> diff --git a/qualify.c b/qualify.c
> index 3df4805a..15f8d40e 100644
> --- a/qualify.c
> +++ b/qualify.c
> @@ -30,6 +30,31 @@
>  #include "nsig.h"
>  #include <regex.h>
>  
> +const struct syscall_class syscall_classes[] = {
> +	{ "desc",	TRACE_DESC	},
> +	{ "file",	TRACE_FILE	},
> +	{ "memory",	TRACE_MEMORY	},
> +	{ "process",	TRACE_PROCESS	},
> +	{ "signal",	TRACE_SIGNAL	},
> +	{ "ipc",	TRACE_IPC	},
> +	{ "network",	TRACE_NETWORK	},
> +	{ "%desc",	TRACE_DESC	},
> +	{ "%file",	TRACE_FILE	},
> +	{ "%memory",	TRACE_MEMORY	},
> +	{ "%process",	TRACE_PROCESS	},
> +	{ "%signal",	TRACE_SIGNAL	},
> +	{ "%ipc",	TRACE_IPC	},
> +	{ "%network",	TRACE_NETWORK	},
> +	{ "%stat",	TRACE_STAT	},
> +	{ "%lstat",	TRACE_LSTAT	},
> +	{ "%fstat",	TRACE_FSTAT	},
> +	{ "%%stat",	TRACE_STAT_LIKE	},
> +	{ "%statfs",	TRACE_STATFS	},
> +	{ "%fstatfs",	TRACE_FSTATFS	},
> +	{ "%%statfs",	TRACE_STATFS_LIKE	},
> +	{}
> +};
> +
>  typedef unsigned int number_slot_t;
>  #define BITS_PER_SLOT (sizeof(number_slot_t) * 8)
>  
> @@ -48,6 +73,10 @@ static struct number_set inject_set[SUPPORTED_PERSONALITIES];
>  static struct number_set raw_set[SUPPORTED_PERSONALITIES];
>  static struct number_set trace_set[SUPPORTED_PERSONALITIES];
>  static struct number_set verbose_set[SUPPORTED_PERSONALITIES];
> +#ifdef USE_LUAJIT
> +static struct number_set hook_entry_set[SUPPORTED_PERSONALITIES];
> +static struct number_set hook_exit_set[SUPPORTED_PERSONALITIES];
> +#endif
>  
>  static void
>  number_setbit(const unsigned int i, number_slot_t *const vec)
> @@ -245,37 +274,10 @@ qualify_syscall_regex(const char *s, struct number_set *set)
>  static unsigned int
>  lookup_class(const char *s)
>  {
> -	static const struct {
> -		const char *name;
> -		unsigned int value;
> -	} syscall_class[] = {
> -		{ "desc",	TRACE_DESC	},
> -		{ "file",	TRACE_FILE	},
> -		{ "memory",	TRACE_MEMORY	},
> -		{ "process",	TRACE_PROCESS	},
> -		{ "signal",	TRACE_SIGNAL	},
> -		{ "ipc",	TRACE_IPC	},
> -		{ "network",	TRACE_NETWORK	},
> -		{ "%desc",	TRACE_DESC	},
> -		{ "%file",	TRACE_FILE	},
> -		{ "%memory",	TRACE_MEMORY	},
> -		{ "%process",	TRACE_PROCESS	},
> -		{ "%signal",	TRACE_SIGNAL	},
> -		{ "%ipc",	TRACE_IPC	},
> -		{ "%network",	TRACE_NETWORK	},
> -		{ "%stat",	TRACE_STAT	},
> -		{ "%lstat",	TRACE_LSTAT	},
> -		{ "%fstat",	TRACE_FSTAT	},
> -		{ "%%stat",	TRACE_STAT_LIKE	},
> -		{ "%statfs",	TRACE_STATFS	},
> -		{ "%fstatfs",	TRACE_FSTATFS	},
> -		{ "%%statfs",	TRACE_STATFS_LIKE	},
> -	};
> -
> -	unsigned int i;
> -	for (i = 0; i < ARRAY_SIZE(syscall_class); ++i) {
> -		if (strcmp(s, syscall_class[i].name) == 0) {
> -			return syscall_class[i].value;
> +	const struct syscall_class *c;
> +	for (c = syscall_classes; c->name; ++c) {
> +		if (strcmp(s, c->name) == 0) {
> +			return c->value;
>  		}
>  	}
>  
> @@ -693,5 +695,23 @@ qual_flags(const unsigned int scno)
>  		| (is_number_in_set(scno, &raw_set[current_personality])
>  		   ? QUAL_RAW : 0)
>  		| (is_number_in_set(scno, &inject_set[current_personality])
> -		   ? QUAL_INJECT : 0);
> +		   ? QUAL_INJECT : 0)
> +#ifdef USE_LUAJIT
> +		| (is_number_in_set(scno, &hook_entry_set[current_personality])
> +		   ? QUAL_HOOK_ENTRY : 0)
> +		| (is_number_in_set(scno, &hook_exit_set[current_personality])
> +		   ? QUAL_HOOK_EXIT : 0)
> +#endif
> +		;
> +}
> +
> +#ifdef USE_LUAJIT
> +void
> +set_hook_qual(unsigned int scno, unsigned int pers, bool entry_hook, bool exit_hook)
> +{
> +	if (entry_hook)
> +		add_number_to_set(scno, &hook_entry_set[pers]);
> +	if (exit_hook)
> +		add_number_to_set(scno, &hook_exit_set[pers]);
>  }
> +#endif
> diff --git a/strace.1 b/strace.1
> index af9704f5..798dffcd 100644
> --- a/strace.1
> +++ b/strace.1
> @@ -751,6 +751,14 @@ Unless this option is used setuid and setgid programs are executed
>  without effective privileges.
>  .SS Miscellaneous
>  .TP 12
> +.BI "\-l " filename
> +Load and run LuaJIT script from
> +.I filename
> +(experimental).
> +This option is available only if
> +.B strace
> +is built with LuaJIT scripting support.
> +.TP
>  .B \-d
>  Show some debugging output of
>  .B strace
> @@ -766,6 +774,380 @@ Print the help summary.
>  .B \-V
>  Print the version number of
>  .BR strace .
> +.SH LUA SCRIPTING
> +If built with LuaJIT support, \fBstrace\fR can execute LuaJIT scripts.
> +A script file is passed to the \fB\-l\fR option.
> +.PP
> +\fBstrace\fR provides the built-in module \fBstrace\fR, which contains various
> +functions and constants.
> +.PP
> +Before any tracing takes place, the script is run.
> +At this stage, it can either:
> +.IP \(bu 3
> +implement its own tracing loop by selecting syscalls it wants to be notified
> +about with
> +.BR strace.monitor / strace.monitor_name / strace.monitor_all
> +and calling \fBstrace.next_sc\fR in a loop until it returns \fBNULL\fR (or
> +return earlier; in this case, the installed hooks for the remaining syscalls are
> +run).
> +Note that \fBstrace\fR performs tracing/tampering of a syscall on the next
> +\fBstrace.next_sc\fR call;
> +.IP \(bu
> +install syscall and at-exit hooks with \fBstrace.hook\fR,
"or install". However, having an additional sentence in the previous
item breaks all the "either" structure.  Maybe rephrase is as "it can do
one of two things:" and format items as a proper sentences (start with
capital letter, end with dot).

> +\fBstrace.hook_class\fR, \fBstrace.hook_scno\fR and \fBstrace.at_exit\fR.
> +.PP
> +Then, \fBstrace\fR enters its own tracing loop, and all the installed hooks are
> +run.
> +.SS Example
> +The following script counts the number of processes (including threads) spawned
> +by the tracee.
> +Note that you would probably want to launch \fBstrace\fR with \fB\-f\fR option,
> +so that children also be traced.
> +.CW
> +n = 0
> +strace.hook({'clone', 'fork', 'vfork'}, 'exiting', function(tcp)
> +    if tcp.u_rval ~= -1 then
> +        n = n + 1
> +    end
> +end)
> +strace.at_exit(function() print('Processes spawned:', n) end)
> +.CE
> +.SS FFI definitions
> +.CW
> +typedef /* implementation-defined signed integer type   */ kernel_long_t;
> +typedef /* implementation-defined unsigned integer type */ kernel_ulong_t;
> +
> +typedef struct sysent {
> +    unsigned nargs;          /* Number of arguments */
> +    int sys_flags;           /* Flags.  Currently, only meaningful in the
> +                              * context of struct syscall_class::value field:
> +                              * a syscall belongs to a class iff
> +                              * (class.value & syscall.sys_flags) != 0. */
> +    const char *sys_name;    /* Name */
> +} struct_sysent;
> +
> +struct syscall_class {
> +    const char *name;        /* Name */
> +    unsigned int value;      /* Flag bit, see the comment on struct
> +                              * sysent::sys_flags field. */
> +};
> +
> +typedef struct ioctlent {
> +    const char *symbol;
> +    unsigned int code;
> +} struct_ioctlent;
> +
> +/* Trace control block */
> +struct tcb {
> +    int pid;                              /* Tracee's PID */
> +    unsigned long u_error;                /* Error code */
> +    kernel_ulong_t scno;                  /* System call number */
> +    kernel_ulong_t u_arg[/* MAX_ARGS */]; /* System call arguments */
> +    kernel_ulong_t u_rval;                /* Return value */
> +    unsigned int currpers;                /* Current personality */
> +};
> +.CE
> +.SS strace module: C function pointers
> +Note: be careful with boxed boolean values and use \fBnot not \fIboxed_bool\fR
> +when in doubt.
> +In particular, an \fBassert\fR on a boxed boolean will never raise an error.
> +.TP
> +\fIstatus\fR = \fBstrace.monitor\fR(\fIscno\fR, \fIpers\fR, \fIon_entry\fR, \fIon_exit\fR)
> +C type:
> +.B bool (*)(unsigned int, unsigned int, bool, bool)
> +.IP
> +Marks the syscall with number \fIscno\fR on personality \fIpers\fR as to be
> +returned from \fBstrace.next_sc\fR.

Regarding personality names — you're free to move PERSONALITY_NAMES
definition into an array similar to *ent ones.

And, again, the concept of personality is not currently mentioned on the
man page, maybe it's a good time to add some information about it.

> +If \fIon_entry\fR is \fBtrue\fR, it is marked as to be returned on syscall
> +entry, and if \fIon_exit\fR is \fBtrue\fR, it is marked as to be returned on
> +syscall exit.
> +.IP
> +Note that this "marking" is a one-way process, and specifying \fBfalse\fR as any
> +of the flags does not undo any previous calls to \fBstrace.monitor\fR.
> +.TP
> +\fItcp\fR = \fBstrace.next_sc\fR()
> +C type:
> +.B struct tcb * (*)(void)
> +.IP
> +If this is not the first call to \fBstrace.next_sc\fR, performs tracing and
> +tampering of the previous syscall.
> +.IP
> +Waits for the next monitored syscall to happen, and returns a pointer to its
> +trace control block.
> +.IP
> +If \fBstrace\fR needs to be terminated (e.g. last tracee has been terminated, or
> +\fBstrace\fR has been interrupted), returns \fBNULL\fR.
> +Once it returned \fBNULL\fR, all subsequent calls to it will also return
> +\fBNULL\fR.
> +.TP
> +\fIstatus\fR = \fBstrace.inject_signo\fR(\fIsigno\fR)
> +C type:
> +.B bool (*)(int)
> +.IP
> +Deliver a signal with number \fIsigno\fR to the current tracee.
> +.IP
> +Note that this must be done on syscall entry.
> +.TP
> +\fIstatus\fR = \fBstrace.inject_retval\fR(\fIval\fR)
> +C type:
> +.B bool (*)(int)
> +.IP
> +Injects a return value to the current syscall invocation.
> +.IP
> +Note that this must be done on syscall entry.
> +.TP
> +\fIret\fR = \fBstrace.umove\fR(\fIaddr\fR, \fIlen\fR, \fIladdr\fR)
> +C type:
> +.B int (*)(kernel_ulong_t, size_t, void *)
> +.IP
> +Copies ("moves") \fIlen\fR bytes of data from the current tracee process at
> +address \fIaddr\fR to a local address \fIladdr\fR.
> +Returns 0 on success and \-1 on failure.
> +.TP
> +\fIret\fR = \fBstrace.umove_str\fR(\fIaddr\fR, \fIlen\fR, \fIladdr\fR)
> +C type:
> +.B int (*)(kernel_ulong_t, size_t, char *)
> +.IP
> +Like \fBstrace.umove\fR, but makes the additional effort of looking for a
> +terminating zero byte.
> +Returns a negative value on failure, a positive value if a NUL was seen, and 0
> +if \fIlen\fR byes were read but no NUL seen.
> +.IP
> +Note: there is no guarantee it won't overwrite some bytes in \fIladdr\fR after
> +terminating NUL (but, of course, it never writes past \fIladdr[len-1]\fR).

> +.TP
> +\fIret\fR = \fBstrace.upoke\fR(\fIaddr\fR, \fIlen\fR, \fIladdr\fR)
> +C type:
> +.B int (*)(kernel_ulong_t, size_t, const void *)
> +.IP
> +Copies ("pokes") \fIlen\fR bytes of data from the local address \fIladdr\fR to
> +the address \fIaddr\fR of the current tracee process' address space.
> +.IP
> +Returns 0 on success and \-1 on failure.
This one is actually introduced only in the next patch, it's weird that
its description is in this patch now.

> +.TP
> +\fIstatus\fR = \fBstrace.path_match_arr\fR(\fIset\fR, \fInset\fR)
> +C type:
> +.B bool (*)(const char **, size_t)
> +.IP
> +Returns \fBtrue\fR if the current syscall accesses one of the paths from a given
> +set of paths, and \fBfalse\fR otherwise.
> +.IP
> +Note: for string path arguments, the path is compared against the set; for file
> +descriptor arguments, the abolute path to the file behind the file descriptor is
> +compared against the set.
> +.SS strace module: proper Lua functions
> +Glossary:
> +.IP \(bu 3
> +an \fIinteger\fR means either an integer Lua number or a cdata integer type;
> +.IP \(bu
> +a \fIboolean\fR means either a Lua boolean or a cdata \fBbool\fR;
> +.IP \(bu
> +a \fIstring\fR means either a Lua string or a cdata C string;
> +.IP \(bu
> +the \fIcurrent tracing control pointer\fR is either the return value of the last
> +invocation of \fBstrace.next_sc\fR (but not a null pointer), or the argument
> +passed to a now-running hook callback function.
It's probably worth noting that it is the pointer to the trace control
block, otherwise these to entities may look unrelated.

> +.PP
> +General conventions:
> +.IP \(bu 3
> +a \fItcp\fR argument is the \fIcurrent tracing control pointer\fR;
> +.IP \(bu
> +a \fIpers_spec\fR argument is either an \fIinteger\fR specifying personality
> +number, or the \fIcurrent tracing control pointer\fR from which a personality
> +number is copied;
> +.IP \(bu
> +an \fIaddr\fR argument is a cdata \fIkernel_ulong_t\fR.
> +.TP
> +\fIstatus\fR = \fBstrace.entering\fR(\fItcp\fR)
> +Returns \fBtrue\fR if this is a syscall entry, and \fBfalse\fR otherwise.
> +.TP
> +\fIstatus\fR = \fBstrace.exiting\fR(\fItcp\fR)
> +Returns \fBtrue\fR if this is a syscall exit, and \fBfalse\fR otherwise.
> +.TP
> +\fBstrace.trace\fR(\fItcp\fR[, \fIflag\fR])
> +.TP
> +\fBstrace.abbrev\fR(\fItcp\fR[, \fIflag\fR])
> +.TP
> +\fBstrace.verbose\fR(\fItcp\fR[, \fIflag\fR])
> +.TP
> +\fBstrace.raw\fR(\fItcp\fR[, \fIflag\fR])
> +These functions alter corresponding trace options.
> +\fIflag\fR is a \fIboolean\fR, defaults to \fBtrue\fR.
> +.TP
> +\fIaddr\fR = \fBstrace.ptr_to_kulong\fR(\fIptr\fR)
> +Converts a cdata pointer to a \fBkernel_ulong_t\fR.
> +.TP
> +\fBstrace.at_exit\fR(\fIfunc\fR)
> +Registers a function \fIfunc\fR to be run when \fBstrace\fR needs to
> +be terminated.
> +.TP
> +\fIname\fR = \fBstrace.get_err_name\fR(\fIerrno\fR, \fIpers_spec\fR)
> +Returns error name (e.g. \fB"ENOENT"\fR) as Lua string by its error number
> +\fIerrno\fR for personality specified by \fIpers_spec\fR, or \fBnil\fR if
> +\fIerrno\fR is invalid.
> +.IP
> +\fIerrno\fR is an \fIinteger\fR.
> +.TP
> +\fIname\fR = \fBstrace.get_sc_name\fR(\fIscno\fR, \fIpers_spec\fR)
> +Returns syscall name as Lua string by its number for personality specified by
> +\fIpers_spec\fR, or \fBnil\fR if \fIscno\fR is invalid.
> +.IP
> +\fIscno\fR is an \fIinteger\fR.

> +.TP
> +\fIname\fR = \fBstrace.get_ioctl_name\fR(\fIreqcode\fR, \fIpers_spec\fR)
> +Returns ioctl symbol name (e.g. \fB"TIOCGWINSZ"\fR) as Lua string by its request
> +code \fIreqcode\fR for personality specified by \fIpers_spec\fR, or \fBnil\fR if
> +\fIreqcode\fR is invalid.
> +.IP
> +\fIreqcode\fR is an \fIinteger\fR.
The reverse conversion may also be useful.

> +.TP
> +\fIscno\fR = \fBstrace.get_scno\fR(\fIscname\fR, \fIpers_spec\fR)
> +Returns syscall number by its name for personality specified by \fIpers_spec\fR,
> +or \fBnil\fR if no such syscall was found.
> +.IP
> +\fIscname\fR is a \fIstring\fR.

> +.TP
> +\fIsigno\fR = \fBstrace.get_signo\fR(\fIsigname\fR, \fIpers_spec\fR)
> +Returns signal number by its name (e.g. \fB"SIGSEGV"\fR) for personality
> +specified by \fIpers_spec\fR, or \fBnil\fR if no such signal was found.
> +.IP
> +\fIsigname\fR is \fIstring\fR.
The reverse conversion may also be useful.

> +.TP
> +\fIerrno\fR = \fBstrace.get_errno\fR(\fIerrname\fR, \fIpers_spec\fR)
> +Returns error number by its name (e.g. \fB"ENOENT"\fR) for personality specified
> +by \fIpers_spec\fR, or \fBnil\fR if no such error was found.
> +.IP
> +\fIsigname\fR is a \fIstring\fR.
"errname"

> +.TP
> +\fBstrace.inject_signal\fR(\fItcp\fR, \fIsig\fR)
> +Delivers a signal to the tracee.
> +\fIsig\fR is either signal number (an \fIinteger\fR) or name (a \fIstring\fR).
> +.IP
> +Note that this must be done on syscall entry.
> +.IP
> +Raises an error on failure.
> +.TP
> +\fBstrace.inject_error\fR(\fItcp\fR, \fIerr\fR)
> +Injects an error into a current syscall invocation.
> +\fIerr\fR is either error number (an \fIinteger\fR) or error name (a
> +\fIstring\fR).
> +.IP
> +Note that this must be done on syscall entry.
> +.IP
> +Raises an error on failure.
> +.TP
> +\fIobj\fR = \fBstrace.read_obj\fR(\fIaddr\fR, \fIct\fR[, \fInelem\fR])
> +Reads an object of type \fIct\fR from the current tracee process at address
> +\fIaddr\fR.
> +\fIct\fR is either a \fIcdecl\fR (a Lua string), a \fIcdata\fR serving as a
> +template type, or a \fIctype\fR (special kind of \fIcdata\fR returned by
> +\fBffi.typeof\fR).
> +.IP
> +VLA/VLS types require the \fInelem\fR argument (an \fIinteger\fR).
> +.IP
> +Returns an object on success and \fBnil\fR on failure.

Again, this will break if tracee's personality is different from
strace's.  This should be noted, at least.  Or fixed, even better.  I
I see no easy fix, however, at this point.

> +.TP
> +\fIstr\fR[, \fIerr_msg\fR] = \fBstrace.read_str\fR(\fIaddr\fR[, \fImaxsz\fR[, \fIbufsz\fR]])
> +Reads a C string from the current tracee process at address \fIaddr\fR using an
> +intermediate buffer of size \fIbufsz\fR and stopping at \fImaxsz\fR bytes.
> +.IP
> +\fImaxsz\fR and \fIbufsz\fR are \fIintegers\fR.
> +\fImaxsz\fR defaults to 4 Mb, \fIbufsz\fR to 1 Kb.
> +.IP
> +Returns a Lua string on success, \fBnil, "readerr"\fR on read error, and
> +\fBnil, "toolong"\fR if the \fImaxsz\fR limit was exceeded.
> +.TP
> +\fIstr\fR[, \fIerr_msg\fR] = \fBstrace.read_path\fR(\fIaddr\fR)
> +Reads a path C string from the current tracee process at address \fIaddr\fR.
> +.IP
> +Returns a Lua string on success, \fBnil, "readerr"\fR on read error, and
> +\fBnil, "toolong"\fR if the \fBPATH_MAX\fR limit was exceeded.
> +.TP
> +\fBstrace.monitor_name\fR(\fIscname\fR, \fIwhen\fR)
> +Marks syscall with name \fIscname\fR as to be returned from
> +\fBstrace.next_sc()\fR.
> +\fIscname\fR is a \fIstring\fR.
> +.TP
> +\fBstrace.monitor_all\fR(\fIwhen\fR)
> +Marks all syscalls on all personalities as to be returned from
> +\fBstrace.next_sc\fR.
> +.TP
> +\fBstrace.hook\fR(\fIscname\fR, \fIwhen\fR, \fIcb\fR)
> +.TP
> +\fBstrace.hook_class\fR(\fIclsname\fR, \fIwhen\fR, \fIcb\fR)
> +.TP
> +\fBstrace.hook_scno\fR(\fIscno\fR, \fIwhen\fR, \fIcb\fR, \fIpers_spec\fR)
> +These functions register a function \fIcb\fR to be run when a syscall with the
> +given name (or with a name from a given set thereof), belonging to a class with
> +the given name (or with a name from a given set thereof), or with the given
> +number (or with a number from a given set thereof) on personality specified by
> +\fIpers_spec\fR, happens.
> +.IP
> +It will be run on syscall entry if \fIwhen\fR is \fB"entering"\fR, syscall exit
> +if \fIwhen\fR is \fB"exiting"\fR, or both if \fIwhen\fR is \fB"both"\fR
> +(\fIwhen\fR is a Lua string).
> +.IP
> +A pointer to the trace control block is passed as the only argument to \fIcb\fR.
> +.IP
> +\fIscname\fR and \fIclsname\fR are either \fIstrings\fR or tables thereof.
> +\fIscno\fR is enther an \fIinteger\fR or a table thereof.
> +.IP
> +Raise an error on failure.
> +.TP
> +\fIstatus\fR = \fBstrace.path_match\fR(\fIset\fR)
> +Returns \fBtrue\fR if the current syscall accesses a given path, or one of the
> +paths from the given set of paths; and \fBfalse\fR otherwise (see the note for
> +\fBstrace.path_match_arr\fR).
> +.IP
> +\fIset\fR is either a \fIstring\fR or a table thereof.
> +.SS strace module: constants
> +.TP
> +.B strace.npersonalities
> +Number of supported personalities (an integer Lua number).
> +.TP
> +.B strace.max_args
> +Size of \fBstruct tcb::u_arg\fR array (an integer Lua number).
> +.TP
> +.B strace.path_max
> +Value of \fBPATH_MAX\fR constant (an integer Lua number).
> +.TP
> +.B strace.sysent_vec
> +Array of syscall tables for each of the supported personalities.
> +\fBstrace.sysent_vec\fR[\fIpers\fR][\fIscno\fR] is a \fBstruct_sysent\fR for
> +syscall number \fIscno\fR on personality \fIpers\fR.
> +May contain null entries (which have a NULL \fBsys_name\fR field).
> +.TP
> +.B strace.errnoent_vec
> +Array of error name tables for each of the supported personalities.
> +\fBstrace.errnoent_vec\fR[\fIpers\fR][\fIerrno\fR] is either a null pointer or a
> +C string with the name of error \fIerrno\fR on personality \fIpers\fR.
> +.TP
> +.B strace.signalent_vec
> +Array of signal name tables for each of the supported personalities.
> +\fBstrace.signalent_vec\fR[\fIpers\fR][\fIsigno\fR] is either a null pointer or
> +a C string with the name of signal \fIsigno\fR on personality \fIpers\fR.
> +.TP
> +.B strace.ioctlent_vec
> +Arrays of sorted known ioctl symbols, sorted by code, for each of the supported
> +personalities.
> +\fBstrace.ioctlent_vec\fR[\fIpers\fR][\fIi\fR] is the \fIi\fR-th, ranked by
> +code, \fBstruct_ioctlent\fR for personality \fIpers\fR.
> +.TP
> +.B strace.nsysent_vec
> +.TP
> +.B strace.nerrnoent_vec
> +.TP
> +.B strace.nsignalent_vec
> +.TP
> +.B strace.nioctlent_vec
> +These are \fBstrace.npersonalities\fR-sized arrays containing sizes of subarrays
> +of
> +.BR strace.sysent_vec ", " strace.errnoent_vec ", " strace.signalent_vec ", and " strace.ioctlent_vec
> +correspondingly.
> +.TP
> +.B strace.syscall_classes
> +Array of \fBstruct syscall_class\fR, with a terminating null entry (which has a
> +NULL \fBname\fR field).
>  .SH DIAGNOSTICS
>  When
>  .I command
> diff --git a/strace.c b/strace.c
> index c503c3d1..e89a90d2 100644
> --- a/strace.c
> +++ b/strace.c
> @@ -45,6 +45,9 @@
>  # include <sys/prctl.h>
>  #endif
>  #include <asm/unistd.h>
> +#ifdef USE_LUAJIT
> +# include <lua.h>
> +#endif
>  
>  #include "scno.h"
>  #include "ptrace.h"
> @@ -169,6 +172,11 @@ static volatile sig_atomic_t interrupted;
>  static volatile int interrupted;
>  #endif
>  
> +#ifdef USE_LUAJIT
> +static lua_State *script_L = NULL;
> +static void init_luajit(const char *scriptfile);
> +#endif
> +
>  #ifndef HAVE_STRERROR
>  
>  #if !HAVE_DECL_SYS_ERRLIST
> @@ -219,6 +227,11 @@ Output format:\n\
>    -k             obtain stack trace between each syscall (experimental)\n\
>  "
>  #endif
> +#ifdef USE_LUAJIT
> +"\
> +  -l file        run a Lua script from FILE\n\
> +"
> +#endif
>  "\
>    -o file        send trace output to FILE instead of stderr\n\
>    -q             suppress messages about attaching, detaching, etc.\n\
> @@ -772,7 +785,7 @@ alloctcb(int pid)
>  		if (!tcp->pid) {
>  			memset(tcp, 0, sizeof(*tcp));
>  			tcp->pid = pid;
> -#if SUPPORTED_PERSONALITIES > 1
> +#if defined(USE_LUAJIT) || SUPPORTED_PERSONALITIES > 1
>  			tcp->currpers = current_personality;
>  #endif
>  
> @@ -828,6 +841,10 @@ droptcb(struct tcb *tcp)
>  	if (tcp->pid == 0)
>  		return;
>  
> +#ifdef USE_LUAJIT
> +	free(tcp->ad_hoc_inject_opts);
> +#endif
> +
>  	int p;
>  	for (p = 0; p < SUPPORTED_PERSONALITIES; ++p)
>  		free(tcp->inject_vec[p]);
> @@ -1648,6 +1665,9 @@ init(int argc, char *argv[])
>  #ifdef USE_LIBUNWIND
>  		"k"
>  #endif
> +#ifdef USE_LUAJIT
> +		"l:"
> +#endif
>  		"D"
>  		"a:e:o:O:p:s:S:u:E:P:I:")) != EOF) {
>  		switch (c) {
> @@ -1758,6 +1778,11 @@ init(int argc, char *argv[])
>  			stack_trace_enabled = true;
>  			break;
>  #endif
> +#ifdef USE_LUAJIT
> +		case 'l':
> +			init_luajit(optarg);
> +			break;
> +#endif
>  		case 'E':
>  			if (putenv(optarg) < 0)
>  				perror_msg_and_die("putenv");
> @@ -2639,6 +2664,10 @@ terminate(void)
>  	exit(exit_code);
>  }
>  
> +#ifdef USE_LUAJIT
> +# include "luajit.h"
> +#endif
> +
>  int
>  main(int argc, char *argv[])
>  {
> @@ -2646,6 +2675,11 @@ main(int argc, char *argv[])
>  
>  	exit_code = !nprocs;
>  
> +#ifdef USE_LUAJIT
> +	if (script_L)
> +		run_luajit();
> +#endif
> +
>  	int status;
>  	siginfo_t si;
>  	while (dispatch_event(next_event(&status, &si), &status, &si))
> diff --git a/syscall.c b/syscall.c
> index f21a1467..58aaa248 100644
> --- a/syscall.c
> +++ b/syscall.c
> @@ -196,6 +196,16 @@ enum {
>  #endif
>  };
>  
> +const char *const *errnoent_vec[SUPPORTED_PERSONALITIES] = {
> +	errnoent0,
> +#if SUPPORTED_PERSONALITIES > 1
> +	errnoent1,
> +# if SUPPORTED_PERSONALITIES > 2
> +	errnoent2,
> +# endif
> +#endif
> +};
> +
>  enum {
>  	nerrnos0 = ARRAY_SIZE(errnoent0)
>  #if SUPPORTED_PERSONALITIES > 1
> @@ -206,6 +216,16 @@ enum {
>  #endif
>  };
>  
> +const unsigned int nerrnoent_vec[] = {
> +	nerrnos0,
> +#if SUPPORTED_PERSONALITIES > 1
> +	nerrnos1,
> +# if SUPPORTED_PERSONALITIES > 2
> +	nerrnos2,
> +# endif
> +#endif
> +};
> +
>  enum {
>  	nsignals0 = ARRAY_SIZE(signalent0)
>  #if SUPPORTED_PERSONALITIES > 1
> @@ -216,6 +236,26 @@ enum {
>  #endif
>  };
>  
> +const char *const *signalent_vec[SUPPORTED_PERSONALITIES] = {
> +	signalent0,
> +#if SUPPORTED_PERSONALITIES > 1
> +	signalent1,
> +# if SUPPORTED_PERSONALITIES > 2
> +	signalent2,
> +# endif
> +#endif
> +};
> +
> +const unsigned int nsignalent_vec[] = {
> +	nsignals0,
> +#if SUPPORTED_PERSONALITIES > 1
> +	nsignals1,
> +# if SUPPORTED_PERSONALITIES > 2
> +	nsignals2,
> +# endif
> +#endif
> +};
> +
>  enum {
>  	nioctlents0 = ARRAY_SIZE(ioctlent0)
>  #if SUPPORTED_PERSONALITIES > 1
> @@ -226,6 +266,26 @@ enum {
>  #endif
>  };
>  
> +const unsigned int nioctlent_vec[] = {
> +	nioctlents0,
> +#if SUPPORTED_PERSONALITIES > 1
> +	nioctlents1,
> +# if SUPPORTED_PERSONALITIES > 2
> +	nioctlents2,
> +# endif
> +#endif
> +};
> +
> +const struct_ioctlent *const ioctlent_vec[SUPPORTED_PERSONALITIES] = {
> +	ioctlent0,
> +#if SUPPORTED_PERSONALITIES > 1
> +	ioctlent1,
> +# if SUPPORTED_PERSONALITIES > 2
> +	ioctlent2,
> +# endif
> +#endif
> +};
> +
>  #if SUPPORTED_PERSONALITIES > 1
>  const struct_sysent *sysent = sysent0;
>  const char *const *errnoent = errnoent0;
> @@ -580,8 +640,19 @@ static int arch_set_success(struct tcb *);
>  struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
>  
>  static struct inject_opts *
> -tcb_inject_opts(struct tcb *tcp)
> +tcb_inject_opts(struct tcb *tcp, bool copy_if_needed)
>  {
> +#ifdef USE_LUAJIT
> +	if (tcp->flags & TCB_AD_HOC_INJECT)
> +		return tcp->ad_hoc_inject_opts;
> +#endif
> +	if (copy_if_needed && !tcp->inject_vec[current_personality]) {
> +		tcp->inject_vec[current_personality] =
> +			xcalloc(nsyscalls, sizeof(**inject_vec));
> +		memcpy(tcp->inject_vec[current_personality],
> +		       inject_vec[current_personality],
> +		       nsyscalls * sizeof(**inject_vec));
> +	}
>  	return (scno_in_range(tcp->scno) && tcp->inject_vec[current_personality])
>  	       ? &tcp->inject_vec[current_personality][tcp->scno] : NULL;
>  }
> @@ -590,15 +661,7 @@ tcb_inject_opts(struct tcb *tcp)
>  static long
>  tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
>  {
> -	if (!tcp->inject_vec[current_personality]) {
> -		tcp->inject_vec[current_personality] =
> -			xcalloc(nsyscalls, sizeof(**inject_vec));
> -		memcpy(tcp->inject_vec[current_personality],
> -		       inject_vec[current_personality],
> -		       nsyscalls * sizeof(**inject_vec));
> -	}
> -
> -	struct inject_opts *opts = tcb_inject_opts(tcp);
> +	struct inject_opts *opts = tcb_inject_opts(tcp, true);
>  
>  	if (!opts || opts->first == 0)
>  		return 0;
> @@ -621,7 +684,7 @@ tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
>  static long
>  tamper_with_syscall_exiting(struct tcb *tcp)
>  {
> -	struct inject_opts *opts = tcb_inject_opts(tcp);
> +	struct inject_opts *opts = tcb_inject_opts(tcp, false);
>  
>  	if (!opts)
>  		return 0;
> @@ -699,6 +762,12 @@ syscall_entering_decode(struct tcb *tcp)
>  	return 1;
>  }
>  
> +static bool
> +syscall_ad_hoc_injected(struct tcb *tcp)
> +{
> +	return (tcp->qual_flg & QUAL_INJECT) && (tcp->flags & TCB_AD_HOC_INJECT);
> +}
> +
>  int
>  syscall_entering_trace(struct tcb *tcp, unsigned int *sig)
>  {
> @@ -721,13 +790,13 @@ syscall_entering_trace(struct tcb *tcp, unsigned int *sig)
>  	 || (tracing_paths && !pathtrace_match(tcp))
>  	) {
>  		tcp->flags |= TCB_FILTERED;
> -		return 0;
> +		goto maybe_ad_hoc_tamper;
>  	}
>  
>  	tcp->flags &= ~TCB_FILTERED;
>  
>  	if (hide_log(tcp)) {
> -		return 0;
> +		goto maybe_ad_hoc_tamper;
>  	}
>  
>  	if (tcp->qual_flg & QUAL_INJECT)
> @@ -750,6 +819,11 @@ syscall_entering_trace(struct tcb *tcp, unsigned int *sig)
>  		? printargs(tcp) : tcp->s_ent->sys_func(tcp);
>  	fflush(tcp->outf);
>  	return res;
> +
> +maybe_ad_hoc_tamper:
> +	if (syscall_ad_hoc_injected(tcp))
> +		tamper_with_syscall_entering(tcp, sig);
> +	return 0;
>  }
>  
>  void
> @@ -790,21 +864,28 @@ syscall_exiting_decode(struct tcb *tcp, struct timeval *ptv)
>  	}
>  #endif
>  
> -	if (filtered(tcp) || hide_log(tcp))
> +	if ((filtered(tcp) || hide_log(tcp))
> +	 && !(tcp->qual_flg & QUAL_HOOK_EXIT) && !syscall_ad_hoc_injected(tcp))
>  		return 0;
>  
>  	get_regs(tcp->pid);
>  #if SUPPORTED_PERSONALITIES > 1
>  	update_personality(tcp, tcp->currpers);
>  #endif
> -	return get_regs_error ? -1 : get_syscall_result(tcp);
> +	if (get_regs_error || get_syscall_result(tcp) == -1)
> +		return -1;
> +
> +	if (syserror(tcp) && syscall_tampered(tcp))
> +		tamper_with_syscall_exiting(tcp);
> +
> +	return 1;
>  }
>  
>  int
>  syscall_exiting_trace(struct tcb *tcp, struct timeval tv, int res)
>  {
> -	if (syserror(tcp) && syscall_tampered(tcp))
> -		tamper_with_syscall_exiting(tcp);
> +	if (filtered(tcp) || hide_log(tcp))
> +		return 0;
>  
>  	if (cflag) {
>  		count_syscall(tcp, &tv);
> @@ -1013,7 +1094,7 @@ syscall_exiting_trace(struct tcb *tcp, struct timeval tv, int res)
>  void
>  syscall_exiting_finish(struct tcb *tcp)
>  {
> -	tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED);
> +	tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED | TCB_AD_HOC_INJECT);
>  	tcp->sys_func_rval = 0;
>  	free_tcb_priv_data(tcp);
>  }
> diff --git a/sysent.h b/sysent.h
> index 92de7468..15b83693 100644
> --- a/sysent.h
> +++ b/sysent.h
> @@ -1,13 +1,31 @@
> -#ifndef STRACE_SYSENT_H
> -#define STRACE_SYSENT_H
> +#if !defined(STRACE_SYSENT_H) || defined(FFI_CDEF)
> +#ifndef FFI_CDEF
> +# define STRACE_SYSENT_H
> +#endif
>  
> +#include "ffi.h"
> +
> +FFI_CONTENT(
>  typedef struct sysent {
>  	unsigned nargs;
>  	int	sys_flags;
> +)
> +/* We don't want to expose sen and sys_func to LuaJIT */
> +#ifdef FFI_CDEF
> +FFI_CONTENT(
> +	int priv1;
> +	void *priv2;
> +)
> +#else
> +FFI_CONTENT(
>  	int	sen;
>  	int	(*sys_func)();
> +)
> +#endif
> +FFI_CONTENT(
>  	const char *sys_name;
>  } struct_sysent;
> +)
>  
>  #define TRACE_FILE			00000001	/* Trace file-related syscalls. */
>  #define TRACE_IPC			00000002	/* Trace IPC-related syscalls. */
> @@ -29,4 +47,4 @@ typedef struct sysent {
>  #define TRACE_FSTAT			00400000	/* Trace *fstat{,at}{,64} syscalls. */
>  #define TRACE_STAT_LIKE			01000000	/* Trace *{,l,f}stat{,x,at}{,64} syscalls. */
>  
> -#endif /* !STRACE_SYSENT_H */
> +#endif /* !defined(STRACE_SYSENT_H) || defined(FFI_CDEF) */
> -- 
> 2.11.0

Again, the conflicts with Nikolay's patches are still unresolved. I'd
suggest resolve them before one of the patches is merged, it would ease
everyone's life.




More information about the Strace-devel mailing list