[PATCH 2/4] kvm: attach the exit reason of vcpu as auxstr to KVM_RUN output
Dmitry V. Levin
ldv at altlinux.org
Mon May 14 12:38:39 UTC 2018
On Mon, May 07, 2018 at 08:26:02PM +0900, Masatake YAMATO wrote:
> In KVM, a virtual machine implementation like Qemu can access a vcpu
> via ioctl. KVM_RUN is an ioctl command to enter vcpu. The command
> returns the control for some reasons: Needs of device emulation or
> consuming time slices are typical ones. The vmi takes a different
> action for the reason.
>
> We, strace users, want to know the reason to understand kvm. This
> change prints the reason as auxstr if -K option is given, and if
> strace runs on Linux 4.16.0 or higher, which includes
> e46b469278a59781f9b25ff608af84892963821b, "kvm: embed vcpu id to
> dentry of vcpu anon inode."
>
> The way to get the reason is a bit complicated because the ioctl
> doesn't return it to the userspace directly. Instead, the vmi and kvm
> communicate via an area of the process virtual memory where the fd of
> vcpu is mmap'ed. strace must peek the area to know the reason.
>
> The change does three things: (1) recording the area for given vcpu
> when the target calls VCPU_CREATE to vcpu_info_list per tcb data
> field, (2) verifying the data recorded in vcpu_info_list before
> doing (3), and (3) decoding the exit reason field of the area.
>
> The change is not so simple because there is a case that strace
> doesn't have a chance to do (1) if -p option is used. In the case,
> vcpu_info data created in the step (2).
>
> The area has more fields than "exit reason." Dumping them is future
> work.
>
> * defs.h [HAVE_LINUX_KVM_H]: (kvm_run_structure_decoder_init,
> kvm_vcpu_info_free): New declarations.
> (struct tcb): Add new field vcpu_info_list.
> * strace.c (usage): Describe -K option.
> (init): Introduce -K option. Call
> kvm_run_structure_decoder_init to enable for attaching the exit reason
> to auxstr.
> (droptcb): Call kvm_vcpu_info_free.
> * xlat/kvm_exit_reason.in: The list for decoding the exit reason.
> * kvm.c: Include xmalloc.h, mmap_cache.h, and sys/mman.h.
> (dump_kvm_run_structure_level): New file static variable.
> (kvm_run_structure_decoder_init): New function.
> (vcpu_info): New struct definition representing the 3-tuple: vcpu file
> descriptor, id of the vcpu, and mmap'ed entry.
> (vcpu_find, vcpu_alloc, vcpu_register, vcpu_getinfo,
> kvm_vcpu_info_free): New functions to access tcb's vcpu_info_list
> field and vcpu_info data type.
> (is_map_for_file, extrac_cpuid, map_len): New helper functions.
> (kvm_ioclt_run_attach_auxstr, kvm_ioctl_decode_run): New functions
> decoding vcpu exit reason and attaching the decoded data to auxstr
> field of tcb.
> (kvm_ioctl_create_vcpu): Call vcpu_register to make an entry mapping a
> file descriptor and the vcpu id associated with the fd.
> (kvm_ioctl): Call kvm_ioctl_decode_run.
>
> Signed-off-by: Masatake YAMATO <yamato at redhat.com>
> ---
> defs.h | 9 ++
> kvm.c | 235 ++++++++++++++++++++++++++++++++++++++++
> strace.c | 30 +++++
> xlat/kvm_exit_reason.in | 29 +++++
> 4 files changed, 303 insertions(+)
> create mode 100644 xlat/kvm_exit_reason.in
>
> diff --git a/defs.h b/defs.h
> index 5116fc21..407e3a02 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -220,6 +220,10 @@ struct tcb {
>
> struct mmap_cache_t *mmap_cache;
>
> +#ifdef HAVE_LINUX_KVM_H
> + struct vcpu_info *vcpu_info_list;
> +#endif
> +
> #ifdef ENABLE_STACKTRACE
> void *unwind_ctx;
> struct unwind_queue_t *unwind_queue;
> @@ -845,6 +849,11 @@ extern void unwind_tcb_print(struct tcb *);
> extern void unwind_tcb_capture(struct tcb *);
> #endif
>
> +#ifdef HAVE_LINUX_KVM_H
> +extern void kvm_run_structure_decoder_init(void);
> +extern void kvm_vcpu_info_free(struct tcb *);
> +#endif
> +
> static inline int
> printstrn(struct tcb *tcp, kernel_ulong_t addr, kernel_ulong_t len)
> {
> diff --git a/kvm.c b/kvm.c
> index 86fd9e50..ab9aeb65 100644
> --- a/kvm.c
> +++ b/kvm.c
> @@ -34,13 +34,189 @@
> # include <linux/kvm.h>
> # include "print_fields.h"
> # include "arch_kvm.c"
> +# include "xmalloc.h"
> +# include "mmap_cache.h"
> +# include <sys/mman.h>
Do you need <sys/mman.h>?
> +
> +struct vcpu_info {
> + int fd;
> + int cpuid;
> + long mmap_addr;
> + unsigned long mmap_len;
> + bool resolved;
> + struct vcpu_info *next;
> +};
> +
> +static int dump_kvm_run_structure_level;
Is it expected to have other values than 0 or 1?
> +static struct vcpu_info *
> +vcpu_find(struct tcb *const tcp, int fd)
> +{
> + struct vcpu_info *head = tcp->vcpu_info_list;
> + for (struct vcpu_info *vcpu_info = head;
for (struct vcpu_info *vcpu_info = tcp->vcpu_info_list;
> + vcpu_info;
> + vcpu_info = vcpu_info->next)
> + if (vcpu_info->fd == fd)
> + return vcpu_info;
> +
> + return NULL;
> +}
> +
> +static struct vcpu_info *
> +vcpu_alloc(struct tcb *const tcp, int fd, int cpuid)
> +{
> + struct vcpu_info *vcpu_info = xmalloc(sizeof(struct vcpu_info));
I'd just use xcalloc:
struct vcpu_info *vcpu_info = xcalloc(1, sizeof(*vcpu_info));
> +
> + vcpu_info->fd = fd;
> +
> + vcpu_info->cpuid = cpuid;
> + vcpu_info->resolved = false;
> + vcpu_info->mmap_addr = 0L;
> + vcpu_info->mmap_len = 0L;
> +
> + vcpu_info->next = tcp->vcpu_info_list;
> + tcp->vcpu_info_list = vcpu_info;
> +
> + return vcpu_info;
> +}
> +
> +void
> +kvm_vcpu_info_free(struct tcb *tcp)
> +{
> + struct vcpu_info *head = tcp->vcpu_info_list;
> + struct vcpu_info *next;
> +
> + while (head) {
> + next = head->next;
> + free (head);
We don't use space before function name and parenthesis.
> + head = next;
> + }
for (head = tcp->vcpu_info_list; head; head = next)
> +
> + tcp->vcpu_info_list = NULL;
> +}
> +
> +static void
> +vcpu_register(struct tcb *const tcp, int fd, int cpuid)
> +{
> + struct vcpu_info *vcpu_info = vcpu_find(tcp, fd);
> +
> + if (!vcpu_info)
> + vcpu_info = vcpu_alloc(tcp, fd, cpuid);
> + else if (vcpu_info->cpuid != cpuid)
> + {
> + vcpu_info->cpuid = cpuid;
> + vcpu_info->resolved = false;
> + }
> +}
> +
> +static bool
> +is_map_for_file(struct mmap_cache_entry_t *map_info, void *data)
> +{
> + /* major version for anon inode may be given in get_anon_bdev()
> + * in linux kernel.
> + *
> + * *p = MKDEV(0, dev & MINORMASK);
> + *-----------------^
> + */
> + if (map_info->binary_filename &&
> + map_info->major == 0 &&
> + strcmp(map_info->binary_filename, (const char *)data) == 0)
> + return true;
> + return false;
return map_info->binary_filename &&
map_info->major == 0 &&
strcmp(map_info->binary_filename, (const char *) data) == 0;
> +}
> +
> +static unsigned long
> +map_len(struct mmap_cache_entry_t *map_info)
> +{
> + if (map_info->start_addr >= map_info->end_addr)
> + return 0;
> + else
> + return map_info->end_addr - map_info->start_addr;
return map_info->end_addr > map_info->start_addr ?
map_info->end_addr - map_info->start_addr : 0;
> +}
> +
> +static int
> +extract_cpuid(const char *num)
> +{
> + unsigned long cpuid;
> +
> + errno = 0;
> + cpuid = strtoul(num, NULL, 10);
> + if (errno)
> + return -1;
> + else
> + return (int)cpuid;
> +}
Wouldn't string_to_uint() be OK?
> +
> +static struct vcpu_info*
> +vcpu_get_info(struct tcb *const tcp, int fd)
> +{
> + struct vcpu_info *vcpu_info = vcpu_find(tcp, fd);
> + struct mmap_cache_entry_t *map_info;
> + enum mmap_cache_rebuild_result mc_stat;
> +
> + mc_stat = mmap_cache_rebuild_if_invalid(tcp, __func__);
> + if (mc_stat == MMAP_CACHE_REBUILD_NOCACHE)
> + return NULL;
> +
> + if (vcpu_info && vcpu_info->resolved) {
> + if (mc_stat == MMAP_CACHE_REBUILD_READY)
> + return vcpu_info;
> + else {
> + map_info = mmap_cache_search(tcp, vcpu_info->mmap_addr);
> + if ((map_info
> + && strncmp(map_info->binary_filename,
> + "anon_inode:kvm-vcpu:", 20) == 0)) {
> + int cpuid = extract_cpuid (map_info->binary_filename);
Did you mean map_info->binary_filename + 20?
Please don't hardcode 20, use STR_STRIP_PREFIX instead.
> + if (cpuid < 0)
> + return NULL;
> + else if (vcpu_info->cpuid == cpuid)
> + return vcpu_info;
> + else
> + vcpu_info->resolved = false;
> + } else
> + /* The vcpu vma may be mremap'ed. */
> + vcpu_info->resolved = false;
> + }
> + }
> +
> + /* Slow path */
> + char path[PATH_MAX + 1];
> + if (getfdpath(tcp, fd, path, sizeof(path)) >= 0
> + && strncmp(path, "anon_inode:kvm-vcpu:", 20) == 0) {
> + map_info = mmap_cache_search_custom(tcp, is_map_for_file, path);
> + if (map_info) {
> + int cpuid = extract_cpuid(path + 20);
> + if (cpuid < 0)
> + return NULL;
> + else if (!vcpu_info)
> + vcpu_info = vcpu_alloc(tcp, fd, cpuid);
> + else if (vcpu_info->cpuid != cpuid)
> + vcpu_info->cpuid = cpuid;
> + vcpu_info->mmap_addr = map_info->start_addr;
> + vcpu_info->mmap_len = map_len(map_info);
> + vcpu_info->resolved = true;
> + } else
> + vcpu_info->resolved = false;
> + }
> +
> + if (!vcpu_info->resolved)
> + vcpu_info = NULL;
> + return vcpu_info;
> +}
>
> static int
> kvm_ioctl_create_vcpu(struct tcb *const tcp, const kernel_ulong_t arg)
> {
> uint32_t cpuid = arg;
>
> + if (entering(tcp))
> + return 0;
> +
> tprintf(", %u", cpuid);
Let's print cpuid on entering syscall.
> +
> + if (dump_kvm_run_structure_level && tcp->u_rval >= 0)
> + vcpu_register(tcp, tcp->u_rval, (int)cpuid);
> +
> return RVAL_IOCTL_DECODED | RVAL_FD;
> }
>
> @@ -103,6 +279,55 @@ kvm_ioctl_decode_sregs(struct tcb *const tcp, const unsigned int code,
> }
> # endif /* HAVE_STRUCT_KVM_SREGS */
>
> +# include "xlat/kvm_exit_reason.h"
> +static void
> +kvm_ioctl_run_attach_auxstr(struct tcb *const tcp,
> + struct vcpu_info *info)
> +
> +{
> + static struct kvm_run vcpu_run_struct;
> +
> + if (info->mmap_len < sizeof(vcpu_run_struct))
> + return;
> +
> + if (umoven(tcp, info->mmap_addr,
> + sizeof(vcpu_run_struct), &vcpu_run_struct) < 0)
> + return;
This looks like a use case for umove().
> +
> + tcp->auxstr = xlookup(kvm_exit_reason, vcpu_run_struct.exit_reason);
> + if (!tcp->auxstr)
> + tcp->auxstr = "KVM_EXIT_???";
This looks like a use case for sprintxval().
> +}
> +
> +static int
> +kvm_ioctl_decode_run(struct tcb *const tcp)
> +{
> + int fd;
> + struct vcpu_info *info;
> + int r;
> +
> + if (entering(tcp))
> + return 0;
> +
> + r = RVAL_DECODED;
> +
> + if (tcp->u_rval < 0)
> + return r;
Did you mean syserror(tcp)?
> + if (dump_kvm_run_structure_level) {
> + tcp->auxstr = NULL;
> + fd = tcp->u_arg[0];
> + info = vcpu_get_info(tcp, fd);
> +
> + if (info) {
> + kvm_ioctl_run_attach_auxstr(tcp, info);
> + if (tcp->auxstr)
> + r |= RVAL_STR;
> + }
> + }
> +
> + return r;
> +}
> +
> int
> kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t arg)
> {
> @@ -129,7 +354,10 @@ kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t a
>
> case KVM_CREATE_VM:
> return RVAL_DECODED | RVAL_FD;
> +
> case KVM_RUN:
> + return kvm_ioctl_decode_run(tcp);
> +
> case KVM_GET_VCPU_MMAP_SIZE:
> case KVM_GET_API_VERSION:
> default:
> @@ -137,4 +365,11 @@ kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t a
> }
> }
>
> +void
> +kvm_run_structure_decoder_init(void)
> +{
> + dump_kvm_run_structure_level = 1;
> + mmap_cache_enable();
> +}
> +
> #endif /* HAVE_LINUX_KVM_H */
> diff --git a/strace.c b/strace.c
> index 4b374857..db07b4f1 100644
> --- a/strace.c
> +++ b/strace.c
> @@ -256,6 +256,13 @@ Output format:\n\
> -k obtain stack trace between each syscall\n\
> "
> #endif
> +"\
> +"
> +#ifdef HAVE_LINUX_KVM_H
> +"\
> + -K show the exit reason of KVM run\n\
> +"
> +#endif
> "\
> -o file send trace output to FILE instead of stderr\n\
> -q suppress messages about attaching, detaching, etc.\n\
> @@ -819,6 +826,10 @@ droptcb(struct tcb *tcp)
> unwind_tcb_fin(tcp);
> #endif
>
> +#ifdef HAVE_LINUX_KVM_H
> + kvm_vcpu_info_free(tcp);
> +#endif
> +
> if (tcp->mmap_cache)
> tcp->mmap_cache->free_fn(tcp, __func__);
>
> @@ -1572,6 +1583,9 @@ init(int argc, char *argv[])
> {
> int c, i;
> int optF = 0;
> +#ifdef HAVE_LINUX_KVM_H
> + int kvm_vcpu = 0;
> +#endif
Is it expected to have other values than 0 or 1?
--
ldv
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20180514/eb9797d8/attachment.bin>
More information about the Strace-devel
mailing list