[RFC PATCH] ioctl: decode VFIO_IOMMU_MAP_DMA

Alyssa Ross hi at alyssa.is
Thu May 6 17:07:43 UTC 2021


---
The argument decoding itself is pretty solid, I think (apart from a
couple of small things I'll get to in a bit).

But the main reason I'm submitting this as an RFC is that I'm not sure
how to handle the 'request' argument of the ioctl() call
(i.e. VFIO_IOMMU_MAP_DMA).  Some of the VFIO ioctl request numbers are
reused.  VFIO_IOMMU_MAP_DMA == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO.  To
tell them apart, we have to check the device type of the file the
ioctl was sent to.  It's easy enough to do that in vfio_ioctl(), as
I've done here, but that means that the request argument will still
show up in strace as "VFIO_DEVICE_PCI_HOT_RESET or VFIO_IOMMU_MAP_DMA".
Ideally, we'd do this check earlier, before printing the request
argument, so we could figure out which of the two candidate names to
print.  But this doesn't seem to be a problem strace has solved before,
and it's not at all obvious to me how ioctl.c could be modified to
accomodate this, in a way where the disambiguated name could be passed
through ioctl_decode() to vfio_ioctl().  I'd appreciate any direction I
could be offered on this.

Another, easier question: should I use a bundled header for this?  I
did that in my initial implementation, just because it felt easiest to
me at the time.  (I've ommitted the content of the header in this
patch, since that should make reviewing easier, and I don't expect
this patch to be applied as-is anyway.)  I don't really understand
when it's preferable to let xlat detect values, when fallbacks should
be given, and when bundled headers should be used.

Finally, a note about the test: we have to open /dev/vfio/vfio for
real to test this, since the decoding code will check which device the
ioctl is sent to (so we can't just do ioctl(-1, ...)).  On most Linux
systems today, only root will be able to do that, but this will change
in the next version of systemd[1], so going forward any user will be
able to run this test without it being skipped due to failing to open
/dev/vfio/vfio.

[1]: systemd commit f8eb410 ("udev: make /dev/vfio/vfio 0666")

 bundled/Makefile.am                       |    1 +
 * bundled/linux/include/uapi/linux/vfio.h | 1362 +++++++++++++++++++++++
 src/Makefile.am                           |    1 +
 src/defs.h                                |    6 +
 src/ioctl.c                               |    2 +
 src/mmap_cache.c                          |    1 -
 src/pidns.c                               |    1 -
 src/strace.c                              |    1 -
 src/util.c                                |    9 +-
 src/vfio.c                                |   93 ++
 src/xlat/vfio_iommu_map_flags.in          |    4 +
 tests/.gitignore                          |    1 +
 tests/Makefile.am                         |    1 +
 tests/gen_tests.in                        |    1 +
 tests/ioctl_vfio.c                        |   66 ++
 15 files changed, 1546 insertions(+), 4 deletions(-)
 create mode 100644 bundled/linux/include/uapi/linux/vfio.h
 create mode 100644 src/vfio.c
 create mode 100644 src/xlat/vfio_iommu_map_flags.in
 create mode 100644 tests/ioctl_vfio.c

 * content ommitted from this patch; see above.

diff --git a/bundled/Makefile.am b/bundled/Makefile.am
index fe2b1ce4..398ab85b 100644
--- a/bundled/Makefile.am
+++ b/bundled/Makefile.am
@@ -99,6 +99,7 @@ EXTRA_DIST = \
 	linux/include/uapi/linux/v4l2-common.h \
 	linux/include/uapi/linux/v4l2-controls.h \
 	linux/include/uapi/linux/videodev2.h \
+	linux/include/uapi/linux/vfio.h \
 	linux/include/uapi/mtd/mtd-abi.h \
 	linux/include/uapi/mtd/ubi-user.h \
 	linux/include/uapi/rdma/ib_user_verbs.h \
diff --git a/src/Makefile.am b/src/Makefile.am
index de19b8bf..6e149534 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -368,6 +368,7 @@ libstrace_a_SOURCES =	\
 	utime.c		\
 	utimes.c	\
 	v4l2.c		\
+	vfio.c		\
 	wait.c		\
 	wait.h		\
 	watchdog_ioctl.c \
diff --git a/src/defs.h b/src/defs.h
index 5c369ece..b913b4f2 100644
--- a/src/defs.h
+++ b/src/defs.h
@@ -37,6 +37,7 @@
 # include "error_prints.h"
 # include "gcc_compat.h"
 # include "kernel_types.h"
+# include "largefile_wrappers.h"
 # include "list.h"
 # include "macros.h"
 # include "mpers_type.h"
@@ -1077,6 +1078,10 @@ extern int get_proc_pid(struct tcb *);
 extern int translate_pid(struct tcb *, int dest_id, enum pid_type type,
 		    int *proc_pid_ptr);
 
+strace_stat_t;
+extern int
+stat_fd_in_proc(pid_t pid, int fd, strace_stat_t *statbuf);
+
 extern void
 dumpiov_in_msghdr(struct tcb *, kernel_ulong_t addr, kernel_ulong_t data_size);
 
@@ -1274,6 +1279,7 @@ DECL_IOCTL(tee);
 DECL_IOCTL(term);
 DECL_IOCTL(ubi);
 DECL_IOCTL(uffdio);
+DECL_IOCTL(vfio);
 DECL_IOCTL(watchdog);
 # undef DECL_IOCTL
 
diff --git a/src/ioctl.c b/src/ioctl.c
index 12029028..7d257148 100644
--- a/src/ioctl.c
+++ b/src/ioctl.c
@@ -352,6 +352,8 @@ ioctl_decode(struct tcb *tcp)
 		return scsi_ioctl(tcp, code, arg);
 	case '$': /* 0x24 */
 		return perf_ioctl(tcp, code, arg);
+	case ';': /* 0x3b */
+		return vfio_ioctl(tcp, code, arg);
 	case '=': /* 0x3d */
 		return ptp_ioctl(tcp, code, arg);
 	case 'E':
diff --git a/src/mmap_cache.c b/src/mmap_cache.c
index f113f1e2..53e81c1e 100644
--- a/src/mmap_cache.c
+++ b/src/mmap_cache.c
@@ -8,7 +8,6 @@
 #include "defs.h"
 #include <limits.h>
 
-#include "largefile_wrappers.h"
 #include "mmap_cache.h"
 #include "mmap_notify.h"
 #include "xstring.h"
diff --git a/src/pidns.c b/src/pidns.c
index 17cb7793..05ab5842 100644
--- a/src/pidns.c
+++ b/src/pidns.c
@@ -22,7 +22,6 @@
 #include <sys/stat.h>
 
 #include <linux/nsfs.h>
-#include "largefile_wrappers.h"
 #include "trie.h"
 #include "xmalloc.h"
 #include "xstring.h"
diff --git a/src/strace.c b/src/strace.c
index 61a598cb..da7a57f6 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -30,7 +30,6 @@
 
 #include "kill_save_errno.h"
 #include "filter_seccomp.h"
-#include "largefile_wrappers.h"
 #include "mmap_cache.h"
 #include "number_set.h"
 #include "ptrace_syscall_info.h"
diff --git a/src/util.c b/src/util.c
index e87ac94b..f611128f 100644
--- a/src/util.c
+++ b/src/util.c
@@ -23,7 +23,6 @@
 #endif
 #include <sys/uio.h>
 
-#include "largefile_wrappers.h"
 #include "number_set.h"
 #include "print_utils.h"
 #include "secontext.h"
@@ -684,6 +683,14 @@ printfd_pid_tracee_ns(struct tcb *tcp, pid_t pid, int fd)
 	printfd_pid(tcp, strace_pid, fd);
 }
 
+int
+stat_fd_in_proc(pid_t pid, int fd, strace_stat_t *statbuf)
+{
+	char path[33];
+	xsprintf(path, "/proc/%d/fd/%d", pid, fd);
+	return stat_file(path, statbuf);
+}
+
 /*
  * Quote string `instr' of length `size'
  * Write up to (3 + `size' * 4) bytes to `outstr' buffer.
diff --git a/src/vfio.c b/src/vfio.c
new file mode 100644
index 00000000..b4b96b6a
--- /dev/null
+++ b/src/vfio.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021 Alyssa Ross <hi at alyssa.is>
+ * Copyright (c) 2021 The strace developers.
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "defs.h"
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <linux/vfio.h>
+#include "xlat/vfio_iommu_map_flags.h"
+
+static int
+vfio_device_ioctl(struct tcb *const tcp, const unsigned int code,
+		  const kernel_ulong_t arg)
+{
+	return RVAL_DECODED;
+}
+
+static int
+vfio_iommu_ioctl(struct tcb *const tcp, const unsigned int code,
+		 const kernel_ulong_t arg)
+{
+	switch (code) {
+	case VFIO_IOMMU_MAP_DMA: {
+		struct vfio_iommu_type1_dma_map map;
+		size_t minsz = offsetofend(typeof(map), size);
+
+		tprint_arg_next();
+		if (umove_or_printaddr(tcp, arg, &map))
+			break;
+
+		if (map.argsz < minsz) {
+			printaddr(arg);
+			break;
+		}
+
+		tprint_struct_begin();
+
+		PRINT_FIELD_U(map, argsz);
+
+		tprint_struct_next();
+		PRINT_FIELD_FLAGS(map, flags, vfio_iommu_map_flags,
+				  "VFIO_IOMMU_MAP_FLAG_???");
+
+		tprint_struct_next();
+		PRINT_FIELD_PTR(map, vaddr);
+
+		tprint_struct_next();
+		PRINT_FIELD_PTR(map, iova);
+
+		tprint_struct_next();
+		PRINT_FIELD_U64(map, size);
+
+		if (map.argsz > sizeof map)
+			print_nonzero_bytes(tcp, tprint_struct_next,
+					    arg, sizeof map,
+					    MIN(map.argsz, get_pagesize()),
+					    QUOTE_FORCE_HEX);
+
+		tprint_struct_end();
+		break;
+	}
+
+	default:
+		return RVAL_DECODED;
+	}
+
+	return RVAL_IOCTL_DECODED;
+}
+
+int
+vfio_ioctl(struct tcb *const tcp, const unsigned int code,
+	   const kernel_ulong_t arg)
+{
+	if (!verbose(tcp))
+		return RVAL_DECODED;
+
+	strace_stat_t stat;
+	if (code >= VFIO_IOMMU_GET_INFO) {
+		if (stat_fd_in_proc(tcp->pid, tcp->u_arg[0], &stat) == -1)
+			return RVAL_DECODED;
+
+		if (stat.st_rdev == makedev(0xa, 0xc4))
+			return vfio_iommu_ioctl(tcp, code, arg);
+
+		return vfio_device_ioctl(tcp, code, arg);
+	}
+
+	return RVAL_IOCTL_DECODED;
+}
diff --git a/src/xlat/vfio_iommu_map_flags.in b/src/xlat/vfio_iommu_map_flags.in
new file mode 100644
index 00000000..ac575d19
--- /dev/null
+++ b/src/xlat/vfio_iommu_map_flags.in
@@ -0,0 +1,4 @@
+#unconditional
+VFIO_DMA_MAP_FLAG_READ
+VFIO_DMA_MAP_FLAG_WRITE
+VFIO_DMA_MAP_FLAG_VADDR
diff --git a/tests/.gitignore b/tests/.gitignore
index 8f2472b6..32380ee8 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -337,6 +337,7 @@ ioctl_v4l2-v
 ioctl_v4l2-v-Xabbrev
 ioctl_v4l2-v-Xraw
 ioctl_v4l2-v-Xverbose
+ioctl_vfio
 ioctl_watchdog
 ioperm
 iopl
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 126e8436..42c8bdf9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -199,6 +199,7 @@ check_PROGRAMS = $(PURE_EXECUTABLES) \
 	ioctl_v4l2-success-v-Xabbrev \
 	ioctl_v4l2-success-v-Xraw \
 	ioctl_v4l2-success-v-Xverbose \
+	ioctl_vfio \
 	ioprio--pidns-translation \
 	is_linux_mips_n64 \
 	kcmp-y--pidns-translation \
diff --git a/tests/gen_tests.in b/tests/gen_tests.in
index 97b47b98..ae9ae2fe 100644
--- a/tests/gen_tests.in
+++ b/tests/gen_tests.in
@@ -328,6 +328,7 @@ ioctl_v4l2-v	+ioctl.test -v
 ioctl_v4l2-v-Xabbrev	+ioctl.test -v -Xabbrev
 ioctl_v4l2-v-Xraw	+ioctl.test -v -Xraw
 ioctl_v4l2-v-Xverbose	+ioctl.test -v -Xverbose
+ioctl_vfio	+ioctl.test
 ioctl_watchdog	+ioctl.test
 ioperm	-a27
 iopl	-a8
diff --git a/tests/ioctl_vfio.c b/tests/ioctl_vfio.c
new file mode 100644
index 00000000..ca8c504b
--- /dev/null
+++ b/tests/ioctl_vfio.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Alyssa Ross <hi at alyssa.is>
+ * Copyright (c) 2021 The strace developers.
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "tests.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+
+void test_iommu_map_dma(int fd)
+{
+	struct vfio_iommu_type1_dma_map *map = calloc(1, sizeof *map + 1);
+	if (!map)
+		perror_msg_and_fail("calloc");
+
+	map->argsz = offsetofend(struct vfio_iommu_type1_dma_map, size) - 1;
+	ioctl(fd, VFIO_IOMMU_MAP_DMA, map);
+	printf("ioctl(%d, VFIO_DEVICE_PCI_HOT_RESET or VFIO_IOMMU_MAP_DMA, %p) "
+	       "= -1 EINVAL (%m)\n", fd, map);
+
+	map->argsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
+	ioctl(fd, VFIO_IOMMU_MAP_DMA, map);
+	printf("ioctl(%d, VFIO_DEVICE_PCI_HOT_RESET or VFIO_IOMMU_MAP_DMA, "
+	       "{argsz=32, flags=0, vaddr=NULL, iova=NULL, size=0}) = -1 EINVAL (%m)\n",
+	       fd);
+
+	map->argsz = offsetofend(struct vfio_iommu_type1_dma_map, size) + 1;
+	((unsigned char *)map)[sizeof *map] = 0xff;
+	ioctl(fd, VFIO_IOMMU_MAP_DMA, map);
+	printf("ioctl(%d, VFIO_DEVICE_PCI_HOT_RESET or VFIO_IOMMU_MAP_DMA, "
+	       "{argsz=33, flags=0, vaddr=NULL, iova=NULL, size=0, "
+	       "/* bytes 32..32 */ \"\\xff\"}) = -1 EINVAL (%m)\n", fd);
+
+	map->argsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
+	map->flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE
+		| VFIO_DMA_MAP_FLAG_VADDR;
+	map->vaddr = 0xfedcba9876543210;
+	map->iova = 0x0123456789abcdef;
+	map->size = -1U;
+	ioctl(fd, VFIO_IOMMU_MAP_DMA, map);
+	printf("ioctl(%d, VFIO_DEVICE_PCI_HOT_RESET or VFIO_IOMMU_MAP_DMA, "
+	       "{argsz=32, flags=VFIO_DMA_MAP_FLAG_READ|VFIO_DMA_MAP_FLAG_WRITE"
+	       "|VFIO_DMA_MAP_FLAG_VADDR, vaddr=0xfedcba9876543210, "
+	       "iova=0x123456789abcdef, size=%u}) = -1 EINVAL (%m)\n", fd, -1U);
+
+	free(map);
+}
+
+int
+main(void)
+{
+	int fd = open("/dev/vfio/vfio", O_RDONLY);
+	if (fd == -1)
+		return 77;
+	
+	test_iommu_map_dma(fd);
+
+	printf("+++ exited with 0 +++\n");
+}
-- 
2.31.0



More information about the Strace-devel mailing list