[PATCH 1/2] Decode UFFDIO_* ioctls

Dmitry V. Levin ldv at altlinux.org
Sun May 8 22:50:38 UTC 2016


On Fri, May 06, 2016 at 12:08:40PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert at redhat.com>
> 
> Decode the ioctls associated with the userfaultfd fd.
> Note that they tend to read from and also return result in it's data
> structure.

Mostly OK but some minor changes are needed, see my comments below.

> * configure.ac: Add test for userfaultfd.h.

The current tradition for cases like this is to write
* configure.ac (AC_CHECK_HEADERS): Add linux/userfaultfd.h.

> * userfaultfd.c: Add ioctl decoder.

* userfaultfd.c [HAVE_LINUX_USERFAULTFD_H]: Add ioctl decoder.

> * defs.h: declare that decoder.

* defs.h (uffdio_ioctl): New prototype.

> * ioctl.c: Wire in the new decoder.

* ioctl.c (ioctl_decode) [HAVE_LINUX_USERFAULTFD_H]: Wire in uffdio_ioctl.

> * xlat/uffd_*.in: Create flag xlat for all the IOCTLs
> ---
>  configure.ac                      |   1 +
>  defs.h                            |   1 +
>  ioctl.c                           |   4 ++
>  userfaultfd.c                     | 126 ++++++++++++++++++++++++++++++++++++++
>  xlat/uffd_api_flags.in            |   4 ++
>  xlat/uffd_copy_flags.in           |   2 +
>  xlat/uffd_register_ioctl_flags.in |   4 ++
>  xlat/uffd_register_mode_flags.in  |   3 +
>  xlat/uffd_zeropage_flags.in       |   2 +
>  9 files changed, 147 insertions(+)
>  create mode 100644 xlat/uffd_api_flags.in
>  create mode 100644 xlat/uffd_copy_flags.in
>  create mode 100644 xlat/uffd_register_ioctl_flags.in
>  create mode 100644 xlat/uffd_register_mode_flags.in
>  create mode 100644 xlat/uffd_zeropage_flags.in
> 
> diff --git a/configure.ac b/configure.ac
> index 8a776d2..522a666 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -375,6 +375,7 @@ AC_CHECK_HEADERS(m4_normalize([
>  	linux/securebits.h
>  	linux/sem.h
>  	linux/shm.h
> +	linux/userfaultfd.h
>  	linux/utsname.h
>  	mqueue.h
>  	netinet/sctp.h
> diff --git a/defs.h b/defs.h
> index fe2e46b..ebe8fdc 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -654,6 +654,7 @@ extern int scsi_ioctl(struct tcb *, const unsigned int, long);
>  extern int sock_ioctl(struct tcb *, const unsigned int, long);
>  extern int term_ioctl(struct tcb *, const unsigned int, long);
>  extern int ubi_ioctl(struct tcb *, const unsigned int, long);
> +extern int uffdio_ioctl(struct tcb *, const unsigned int, long);
>  extern int v4l2_ioctl(struct tcb *, const unsigned int, long);
>  
>  extern int tv_nz(const struct timeval *);
> diff --git a/ioctl.c b/ioctl.c
> index f70dc44..1b69e97 100644
> --- a/ioctl.c
> +++ b/ioctl.c
> @@ -263,6 +263,10 @@ ioctl_decode(struct tcb *tcp)
>  	case 'E':
>  		return evdev_ioctl(tcp, code, arg);
>  #endif
> +#ifdef HAVE_LINUX_USERFAULTFD_H
> +	case 0xaa:
> +		return uffdio_ioctl(tcp, code, arg);
> +#endif
>  	default:
>  		break;
>  	}
> diff --git a/userfaultfd.c b/userfaultfd.c
> index 15f825a..71d92dd 100644
> --- a/userfaultfd.c
> +++ b/userfaultfd.c
> @@ -30,6 +30,132 @@
>  
>  #include "xlat/uffd_flags.h"
>  
> +#ifdef HAVE_LINUX_USERFAULTFD_H

Let's append all new code to the end of this file.

> +#include <linux/ioctl.h>
> +#include <linux/userfaultfd.h>
> +
> +#include "xlat/uffd_api_flags.h"
> +#include "xlat/uffd_copy_flags.h"
> +#include "xlat/uffd_register_ioctl_flags.h"
> +#include "xlat/uffd_register_mode_flags.h"
> +#include "xlat/uffd_zeropage_flags.h"

We indent preprocessor directives.

> +
> +static void
> +tprintf_uffdio_range(const struct uffdio_range *range)
> +{
> +	tprintf("{start=%#" PRI__x64 ", len=%#" PRI__x64 "}",
> +		range->start, range->len);
> +}
> +
> +int
> +uffdio_ioctl(struct tcb *tcp, const unsigned int code, const long arg)
> +{
> +	switch (code) {
> +	case UFFDIO_API: {
> +		struct uffdio_api ua;
> +		if (entering(tcp)) {
> +			tprints(", ");
> +			if (umove_or_printaddr(tcp, arg, &ua))
> +				return RVAL_DECODED | 1;
> +			/* Features is intended to contain some flags, but
> +			 * there aren't any defined yet.
> +			 */
> +			tprintf("{api=%#" PRI__x64
> +				", features=%#" PRI__x64,
> +				ua.api, ua.features);
> +			return 1;
> +		} else {
> +			if (!umove(tcp, arg, &ua)) {

In this and all other cases where umove is invoked on exiting syscall,
you are probably not interested in seeing garbage in case of syscall error.
umove_or_printaddr does this check, but umove doesn't, so this has to be
changed to
			if (!syserror(tcp) && !umove(tcp, arg, &ua)) {
and the test updated accordingly.

> +				tprintf(", features.out=%#" PRI__x64
> +					", ioctls=", ua.features);
> +				printflags64(uffd_api_flags, ua.ioctls,
> +					"_UFFDIO_???");

We usually align wrapped arguments under the first one, e.g.
				printflags64(uffd_api_flags, ua.ioctls,
					     "_UFFDIO_???");

> +			}
> +			tprintf("}");
> +			return 1;
> +		}

In this and other similar cases, these two "return 1;" statements could be
moved out of the if/else statement.

> +	}
> +
> +	case UFFDIO_COPY: {
> +		struct uffdio_copy uc;
> +		if (entering(tcp)) {
> +			tprints(", ");
> +			if (umove_or_printaddr(tcp, arg, &uc))
> +				return RVAL_DECODED | 1;
> +			tprintf("{dst=%#" PRI__x64 ", src=%#" PRI__x64
> +				", len=%#" PRI__x64 ", mode=",
> +				uc.dst, uc.src, uc.len);
> +			printflags64(uffd_copy_flags, uc.mode,
> +				"UFFDIO_COPY_???");
> +			return 1;
> +		} else {
> +			if (!umove(tcp, arg, &uc))
> +				tprintf(", copy=%#" PRI__x64, uc.copy);
> +			tprints("}");
> +			return 1;
> +		}
> +	}

All comments for UFFDIO_API case also apply here.

> +	case UFFDIO_REGISTER: {
> +		struct uffdio_register ur;
> +		if (entering(tcp)) {
> +			tprints(", ");
> +			if (umove_or_printaddr(tcp, arg, &ur))
> +				return RVAL_DECODED | 1;
> +			tprintf("{range=");
> +			tprintf_uffdio_range(&ur.range);
> +			tprintf(", mode=");
> +			printflags64(uffd_register_mode_flags, ur.mode,
> +					"UFFDIO_REGISTER_MODE_???");
> +			return 1;
> +		} else {
> +			if (!umove(tcp, arg, &ur)) {
> +				tprintf(", ioctls=");
> +				printflags64(uffd_register_ioctl_flags, ur.ioctls,
> +					"UFFDIO_???");
> +			}
> +			tprints("}");
> +			return 1;
> +		}
> +	}

All comments for UFFDIO_API case also apply here.

> +	case UFFDIO_UNREGISTER:
> +	case UFFDIO_WAKE: {
> +		struct uffdio_range ura;
> +		if (entering(tcp)) {
> +			tprints(", ");
> +			if (umove_or_printaddr(tcp, arg, &ura))
> +				return RVAL_DECODED | 1;
> +			tprintf_uffdio_range(&ura);
> +			return RVAL_DECODED | 1;
> +		}
> +	}

As this case is fully decoded on entering, the check itself could be
omitted, e.g.
		struct uffdio_range ura;
		tprints(", ");
		if (!umove_or_printaddr(tcp, arg, &ura))
			tprintf_uffdio_range(&ura);
		return RVAL_DECODED | 1;

BTW, whoever marked these two ioctls with _IOR clearly missed the point:
an ioctl that passes data from userspace to the kernel is a write ioctl
and should be marked with _IOW.  Now it's too late to change the ABI and
we'll have to live with two write-only _IOR ioctls.

> +	case UFFDIO_ZEROPAGE: {
> +		struct uffdio_zeropage uz;
> +		if (entering(tcp)) {
> +			tprints(", ");
> +			if (umove_or_printaddr(tcp, arg, &uz))
> +				return RVAL_DECODED | 1;
> +			tprintf("{range=");
> +			tprintf_uffdio_range(&uz.range);
> +			tprintf(", mode=");
> +			printflags64(uffd_zeropage_flags, uz.mode,
> +				"UFFDIO_ZEROPAGE_???");
> +			return 1;
> +		} else {
> +			if (!umove(tcp, arg, &uz))
> +				tprintf(", zeropage=%#" PRI__x64, uz.zeropage);
> +			tprints("}");
> +			return 1;
> +		}
> +	}

All comments for UFFDIO_API case also apply here.

> +
> +	default:
> +		return RVAL_DECODED;
> +	}
> +}
> +#endif
> +
>  SYS_FUNC(userfaultfd)
>  {
>  	printflags(uffd_flags, tcp->u_arg[0], "UFFD_???");
> diff --git a/xlat/uffd_api_flags.in b/xlat/uffd_api_flags.in
> new file mode 100644
> index 0000000..fd21087
> --- /dev/null
> +++ b/xlat/uffd_api_flags.in
> @@ -0,0 +1,4 @@
> +#val_type uint64_t
> +1<<_UFFDIO_REGISTER
> +1<<_UFFDIO_UNREGISTER
> +1<<_UFFDIO_API
> diff --git a/xlat/uffd_copy_flags.in b/xlat/uffd_copy_flags.in
> new file mode 100644
> index 0000000..02d6b19
> --- /dev/null
> +++ b/xlat/uffd_copy_flags.in
> @@ -0,0 +1,2 @@
> +#val_type uint64_t
> +UFFDIO_COPY_MODE_DONTWAKE
> diff --git a/xlat/uffd_register_ioctl_flags.in b/xlat/uffd_register_ioctl_flags.in
> new file mode 100644
> index 0000000..f4e3b94
> --- /dev/null
> +++ b/xlat/uffd_register_ioctl_flags.in
> @@ -0,0 +1,4 @@
> +#val_type uint64_t
> +1<<_UFFDIO_WAKE
> +1<<_UFFDIO_COPY
> +1<<_UFFDIO_ZEROPAGE
> diff --git a/xlat/uffd_register_mode_flags.in b/xlat/uffd_register_mode_flags.in
> new file mode 100644
> index 0000000..996b1f3
> --- /dev/null
> +++ b/xlat/uffd_register_mode_flags.in
> @@ -0,0 +1,3 @@
> +#val_type uint64_t
> +UFFDIO_REGISTER_MODE_MISSING
> +UFFDIO_REGISTER_MODE_WP
> diff --git a/xlat/uffd_zeropage_flags.in b/xlat/uffd_zeropage_flags.in
> new file mode 100644
> index 0000000..6d48a04
> --- /dev/null
> +++ b/xlat/uffd_zeropage_flags.in
> @@ -0,0 +1,2 @@
> +#val_type uint64_t
> +UFFDIO_ZEROPAGE_MODE_DONTWAKE
> -- 
> 2.5.5


-- 
ldv
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20160509/8c8d5e55/attachment.bin>


More information about the Strace-devel mailing list