[PATCH 1/2] Decode UFFDIO_* ioctls

Dr. David Alan Gilbert dgilbert at redhat.com
Tue May 10 09:54:00 UTC 2016


* Dmitry V. Levin (ldv at altlinux.org) wrote:
> On Fri, May 06, 2016 at 12:08:40PM +0100, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert at redhat.com>
> > 
> > Decode the ioctls associated with the userfaultfd fd.
> > Note that they tend to read from and also return result in it's data
> > structure.
> 
> Mostly OK but some minor changes are needed, see my comments below.

Done.

> > * configure.ac: Add test for userfaultfd.h.
> 
> The current tradition for cases like this is to write
> * configure.ac (AC_CHECK_HEADERS): Add linux/userfaultfd.h.
> 
> > * userfaultfd.c: Add ioctl decoder.
> 
> * userfaultfd.c [HAVE_LINUX_USERFAULTFD_H]: Add ioctl decoder.
> 
> > * defs.h: declare that decoder.
> 
> * defs.h (uffdio_ioctl): New prototype.
> 
> > * ioctl.c: Wire in the new decoder.
> 
> * ioctl.c (ioctl_decode) [HAVE_LINUX_USERFAULTFD_H]: Wire in uffdio_ioctl.
> 
> > * xlat/uffd_*.in: Create flag xlat for all the IOCTLs
> > ---
> >  configure.ac                      |   1 +
> >  defs.h                            |   1 +
> >  ioctl.c                           |   4 ++
> >  userfaultfd.c                     | 126 ++++++++++++++++++++++++++++++++++++++
> >  xlat/uffd_api_flags.in            |   4 ++
> >  xlat/uffd_copy_flags.in           |   2 +
> >  xlat/uffd_register_ioctl_flags.in |   4 ++
> >  xlat/uffd_register_mode_flags.in  |   3 +
> >  xlat/uffd_zeropage_flags.in       |   2 +
> >  9 files changed, 147 insertions(+)
> >  create mode 100644 xlat/uffd_api_flags.in
> >  create mode 100644 xlat/uffd_copy_flags.in
> >  create mode 100644 xlat/uffd_register_ioctl_flags.in
> >  create mode 100644 xlat/uffd_register_mode_flags.in
> >  create mode 100644 xlat/uffd_zeropage_flags.in
> > 
> > diff --git a/configure.ac b/configure.ac
> > index 8a776d2..522a666 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -375,6 +375,7 @@ AC_CHECK_HEADERS(m4_normalize([
> >  	linux/securebits.h
> >  	linux/sem.h
> >  	linux/shm.h
> > +	linux/userfaultfd.h
> >  	linux/utsname.h
> >  	mqueue.h
> >  	netinet/sctp.h
> > diff --git a/defs.h b/defs.h
> > index fe2e46b..ebe8fdc 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -654,6 +654,7 @@ extern int scsi_ioctl(struct tcb *, const unsigned int, long);
> >  extern int sock_ioctl(struct tcb *, const unsigned int, long);
> >  extern int term_ioctl(struct tcb *, const unsigned int, long);
> >  extern int ubi_ioctl(struct tcb *, const unsigned int, long);
> > +extern int uffdio_ioctl(struct tcb *, const unsigned int, long);
> >  extern int v4l2_ioctl(struct tcb *, const unsigned int, long);
> >  
> >  extern int tv_nz(const struct timeval *);
> > diff --git a/ioctl.c b/ioctl.c
> > index f70dc44..1b69e97 100644
> > --- a/ioctl.c
> > +++ b/ioctl.c
> > @@ -263,6 +263,10 @@ ioctl_decode(struct tcb *tcp)
> >  	case 'E':
> >  		return evdev_ioctl(tcp, code, arg);
> >  #endif
> > +#ifdef HAVE_LINUX_USERFAULTFD_H
> > +	case 0xaa:
> > +		return uffdio_ioctl(tcp, code, arg);
> > +#endif
> >  	default:
> >  		break;
> >  	}
> > diff --git a/userfaultfd.c b/userfaultfd.c
> > index 15f825a..71d92dd 100644
> > --- a/userfaultfd.c
> > +++ b/userfaultfd.c
> > @@ -30,6 +30,132 @@
> >  
> >  #include "xlat/uffd_flags.h"
> >  
> > +#ifdef HAVE_LINUX_USERFAULTFD_H
> 
> Let's append all new code to the end of this file.

Done.

> 
> > +#include <linux/ioctl.h>
> > +#include <linux/userfaultfd.h>
> > +
> > +#include "xlat/uffd_api_flags.h"
> > +#include "xlat/uffd_copy_flags.h"
> > +#include "xlat/uffd_register_ioctl_flags.h"
> > +#include "xlat/uffd_register_mode_flags.h"
> > +#include "xlat/uffd_zeropage_flags.h"
> 
> We indent preprocessor directives.

Done.

> 
> > +
> > +static void
> > +tprintf_uffdio_range(const struct uffdio_range *range)
> > +{
> > +	tprintf("{start=%#" PRI__x64 ", len=%#" PRI__x64 "}",
> > +		range->start, range->len);
> > +}
> > +
> > +int
> > +uffdio_ioctl(struct tcb *tcp, const unsigned int code, const long arg)
> > +{
> > +	switch (code) {
> > +	case UFFDIO_API: {
> > +		struct uffdio_api ua;
> > +		if (entering(tcp)) {
> > +			tprints(", ");
> > +			if (umove_or_printaddr(tcp, arg, &ua))
> > +				return RVAL_DECODED | 1;
> > +			/* Features is intended to contain some flags, but
> > +			 * there aren't any defined yet.
> > +			 */
> > +			tprintf("{api=%#" PRI__x64
> > +				", features=%#" PRI__x64,
> > +				ua.api, ua.features);
> > +			return 1;
> > +		} else {
> > +			if (!umove(tcp, arg, &ua)) {
> 
> In this and all other cases where umove is invoked on exiting syscall,
> you are probably not interested in seeing garbage in case of syscall error.
> umove_or_printaddr does this check, but umove doesn't, so this has to be
> changed to
> 			if (!syserror(tcp) && !umove(tcp, arg, &ua)) {
> and the test updated accordingly.

Done.

> 
> > +				tprintf(", features.out=%#" PRI__x64
> > +					", ioctls=", ua.features);
> > +				printflags64(uffd_api_flags, ua.ioctls,
> > +					"_UFFDIO_???");
> 
> We usually align wrapped arguments under the first one, e.g.
> 				printflags64(uffd_api_flags, ua.ioctls,
> 					     "_UFFDIO_???");

Done; I'm not used to tabs :-)

> 
> > +			}
> > +			tprintf("}");
> > +			return 1;
> > +		}
> 
> In this and other similar cases, these two "return 1;" statements could be
> moved out of the if/else statement.

Done.

> > +	}
> > +
> > +	case UFFDIO_COPY: {
> > +		struct uffdio_copy uc;
> > +		if (entering(tcp)) {
> > +			tprints(", ");
> > +			if (umove_or_printaddr(tcp, arg, &uc))
> > +				return RVAL_DECODED | 1;
> > +			tprintf("{dst=%#" PRI__x64 ", src=%#" PRI__x64
> > +				", len=%#" PRI__x64 ", mode=",
> > +				uc.dst, uc.src, uc.len);
> > +			printflags64(uffd_copy_flags, uc.mode,
> > +				"UFFDIO_COPY_???");
> > +			return 1;
> > +		} else {
> > +			if (!umove(tcp, arg, &uc))
> > +				tprintf(", copy=%#" PRI__x64, uc.copy);
> > +			tprints("}");
> > +			return 1;
> > +		}
> > +	}
> 
> All comments for UFFDIO_API case also apply here.

Done.

> 
> > +	case UFFDIO_REGISTER: {
> > +		struct uffdio_register ur;
> > +		if (entering(tcp)) {
> > +			tprints(", ");
> > +			if (umove_or_printaddr(tcp, arg, &ur))
> > +				return RVAL_DECODED | 1;
> > +			tprintf("{range=");
> > +			tprintf_uffdio_range(&ur.range);
> > +			tprintf(", mode=");
> > +			printflags64(uffd_register_mode_flags, ur.mode,
> > +					"UFFDIO_REGISTER_MODE_???");
> > +			return 1;
> > +		} else {
> > +			if (!umove(tcp, arg, &ur)) {
> > +				tprintf(", ioctls=");
> > +				printflags64(uffd_register_ioctl_flags, ur.ioctls,
> > +					"UFFDIO_???");
> > +			}
> > +			tprints("}");
> > +			return 1;
> > +		}
> > +	}
> 
> All comments for UFFDIO_API case also apply here.

Done.
> 
> > +	case UFFDIO_UNREGISTER:
> > +	case UFFDIO_WAKE: {
> > +		struct uffdio_range ura;
> > +		if (entering(tcp)) {
> > +			tprints(", ");
> > +			if (umove_or_printaddr(tcp, arg, &ura))
> > +				return RVAL_DECODED | 1;
> > +			tprintf_uffdio_range(&ura);
> > +			return RVAL_DECODED | 1;
> > +		}
> > +	}
> 
> As this case is fully decoded on entering, the check itself could be
> omitted, e.g.
> 		struct uffdio_range ura;
> 		tprints(", ");
> 		if (!umove_or_printaddr(tcp, arg, &ura))
> 			tprintf_uffdio_range(&ura);
> 		return RVAL_DECODED | 1;

Done.

> BTW, whoever marked these two ioctls with _IOR clearly missed the point:
> an ioctl that passes data from userspace to the kernel is a write ioctl
> and should be marked with _IOW.  Now it's too late to change the ABI and
> we'll have to live with two write-only _IOR ioctls.

Thanks; I mentioned this to Andrea; he'll post a patch to comment it in the
userfaultfd.h header and also add a comment to the kernel's ioctl header
making clear which way IOR/IOW means.

> > +	case UFFDIO_ZEROPAGE: {
> > +		struct uffdio_zeropage uz;
> > +		if (entering(tcp)) {
> > +			tprints(", ");
> > +			if (umove_or_printaddr(tcp, arg, &uz))
> > +				return RVAL_DECODED | 1;
> > +			tprintf("{range=");
> > +			tprintf_uffdio_range(&uz.range);
> > +			tprintf(", mode=");
> > +			printflags64(uffd_zeropage_flags, uz.mode,
> > +				"UFFDIO_ZEROPAGE_???");
> > +			return 1;
> > +		} else {
> > +			if (!umove(tcp, arg, &uz))
> > +				tprintf(", zeropage=%#" PRI__x64, uz.zeropage);
> > +			tprints("}");
> > +			return 1;
> > +		}
> > +	}
> 
> All comments for UFFDIO_API case also apply here.

Done.

Thanks,

Dave

> > +
> > +	default:
> > +		return RVAL_DECODED;
> > +	}
> > +}
> > +#endif
> > +
> >  SYS_FUNC(userfaultfd)
> >  {
> >  	printflags(uffd_flags, tcp->u_arg[0], "UFFD_???");
> > diff --git a/xlat/uffd_api_flags.in b/xlat/uffd_api_flags.in
> > new file mode 100644
> > index 0000000..fd21087
> > --- /dev/null
> > +++ b/xlat/uffd_api_flags.in
> > @@ -0,0 +1,4 @@
> > +#val_type uint64_t
> > +1<<_UFFDIO_REGISTER
> > +1<<_UFFDIO_UNREGISTER
> > +1<<_UFFDIO_API
> > diff --git a/xlat/uffd_copy_flags.in b/xlat/uffd_copy_flags.in
> > new file mode 100644
> > index 0000000..02d6b19
> > --- /dev/null
> > +++ b/xlat/uffd_copy_flags.in
> > @@ -0,0 +1,2 @@
> > +#val_type uint64_t
> > +UFFDIO_COPY_MODE_DONTWAKE
> > diff --git a/xlat/uffd_register_ioctl_flags.in b/xlat/uffd_register_ioctl_flags.in
> > new file mode 100644
> > index 0000000..f4e3b94
> > --- /dev/null
> > +++ b/xlat/uffd_register_ioctl_flags.in
> > @@ -0,0 +1,4 @@
> > +#val_type uint64_t
> > +1<<_UFFDIO_WAKE
> > +1<<_UFFDIO_COPY
> > +1<<_UFFDIO_ZEROPAGE
> > diff --git a/xlat/uffd_register_mode_flags.in b/xlat/uffd_register_mode_flags.in
> > new file mode 100644
> > index 0000000..996b1f3
> > --- /dev/null
> > +++ b/xlat/uffd_register_mode_flags.in
> > @@ -0,0 +1,3 @@
> > +#val_type uint64_t
> > +UFFDIO_REGISTER_MODE_MISSING
> > +UFFDIO_REGISTER_MODE_WP
> > diff --git a/xlat/uffd_zeropage_flags.in b/xlat/uffd_zeropage_flags.in
> > new file mode 100644
> > index 0000000..6d48a04
> > --- /dev/null
> > +++ b/xlat/uffd_zeropage_flags.in
> > @@ -0,0 +1,2 @@
> > +#val_type uint64_t
> > +UFFDIO_ZEROPAGE_MODE_DONTWAKE
> > -- 
> > 2.5.5
> 
> 
> -- 
> ldv



> ------------------------------------------------------------------------------
> Find and fix application performance issues faster with Applications Manager
> Applications Manager provides deep performance insights into multiple tiers of
> your business applications. It resolves application problems quickly and
> reduces your MTTR. Get your free trial!
> https://ad.doubleclick.net/ddm/clk/302982198;130105516;z

> _______________________________________________
> Strace-devel mailing list
> Strace-devel at lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/strace-devel

--
Dr. David Alan Gilbert / dgilbert at redhat.com / Manchester, UK




More information about the Strace-devel mailing list