[PATCH 2/4] kvm: attach the exit reason of vcpu as auxstr to KVM_RUN output
Masatake YAMATO
yamato at redhat.com
Wed May 16 21:10:35 UTC 2018
Thank you for reviewing.
I will revise and submit version 2 patch set.
Masatake YAMATO
On Mon, 14 May 2018 15:38:39 +0300, "Dmitry V. Levin" <ldv at altlinux.org> wrote:
> On Mon, May 07, 2018 at 08:26:02PM +0900, Masatake YAMATO wrote:
>> In KVM, a virtual machine implementation like Qemu can access a vcpu
>> via ioctl. KVM_RUN is an ioctl command to enter vcpu. The command
>> returns the control for some reasons: Needs of device emulation or
>> consuming time slices are typical ones. The vmi takes a different
>> action for the reason.
>>
>> We, strace users, want to know the reason to understand kvm. This
>> change prints the reason as auxstr if -K option is given, and if
>> strace runs on Linux 4.16.0 or higher, which includes
>> e46b469278a59781f9b25ff608af84892963821b, "kvm: embed vcpu id to
>> dentry of vcpu anon inode."
>>
>> The way to get the reason is a bit complicated because the ioctl
>> doesn't return it to the userspace directly. Instead, the vmi and kvm
>> communicate via an area of the process virtual memory where the fd of
>> vcpu is mmap'ed. strace must peek the area to know the reason.
>>
>> The change does three things: (1) recording the area for given vcpu
>> when the target calls VCPU_CREATE to vcpu_info_list per tcb data
>> field, (2) verifying the data recorded in vcpu_info_list before
>> doing (3), and (3) decoding the exit reason field of the area.
>>
>> The change is not so simple because there is a case that strace
>> doesn't have a chance to do (1) if -p option is used. In the case,
>> vcpu_info data created in the step (2).
>>
>> The area has more fields than "exit reason." Dumping them is future
>> work.
>>
>> * defs.h [HAVE_LINUX_KVM_H]: (kvm_run_structure_decoder_init,
>> kvm_vcpu_info_free): New declarations.
>> (struct tcb): Add new field vcpu_info_list.
>> * strace.c (usage): Describe -K option.
>> (init): Introduce -K option. Call
>> kvm_run_structure_decoder_init to enable for attaching the exit reason
>> to auxstr.
>> (droptcb): Call kvm_vcpu_info_free.
>> * xlat/kvm_exit_reason.in: The list for decoding the exit reason.
>> * kvm.c: Include xmalloc.h, mmap_cache.h, and sys/mman.h.
>> (dump_kvm_run_structure_level): New file static variable.
>> (kvm_run_structure_decoder_init): New function.
>> (vcpu_info): New struct definition representing the 3-tuple: vcpu file
>> descriptor, id of the vcpu, and mmap'ed entry.
>> (vcpu_find, vcpu_alloc, vcpu_register, vcpu_getinfo,
>> kvm_vcpu_info_free): New functions to access tcb's vcpu_info_list
>> field and vcpu_info data type.
>> (is_map_for_file, extrac_cpuid, map_len): New helper functions.
>> (kvm_ioclt_run_attach_auxstr, kvm_ioctl_decode_run): New functions
>> decoding vcpu exit reason and attaching the decoded data to auxstr
>> field of tcb.
>> (kvm_ioctl_create_vcpu): Call vcpu_register to make an entry mapping a
>> file descriptor and the vcpu id associated with the fd.
>> (kvm_ioctl): Call kvm_ioctl_decode_run.
>>
>> Signed-off-by: Masatake YAMATO <yamato at redhat.com>
>> ---
>> defs.h | 9 ++
>> kvm.c | 235 ++++++++++++++++++++++++++++++++++++++++
>> strace.c | 30 +++++
>> xlat/kvm_exit_reason.in | 29 +++++
>> 4 files changed, 303 insertions(+)
>> create mode 100644 xlat/kvm_exit_reason.in
>>
>> diff --git a/defs.h b/defs.h
>> index 5116fc21..407e3a02 100644
>> --- a/defs.h
>> +++ b/defs.h
>> @@ -220,6 +220,10 @@ struct tcb {
>>
>> struct mmap_cache_t *mmap_cache;
>>
>> +#ifdef HAVE_LINUX_KVM_H
>> + struct vcpu_info *vcpu_info_list;
>> +#endif
>> +
>> #ifdef ENABLE_STACKTRACE
>> void *unwind_ctx;
>> struct unwind_queue_t *unwind_queue;
>> @@ -845,6 +849,11 @@ extern void unwind_tcb_print(struct tcb *);
>> extern void unwind_tcb_capture(struct tcb *);
>> #endif
>>
>> +#ifdef HAVE_LINUX_KVM_H
>> +extern void kvm_run_structure_decoder_init(void);
>> +extern void kvm_vcpu_info_free(struct tcb *);
>> +#endif
>> +
>> static inline int
>> printstrn(struct tcb *tcp, kernel_ulong_t addr, kernel_ulong_t len)
>> {
>> diff --git a/kvm.c b/kvm.c
>> index 86fd9e50..ab9aeb65 100644
>> --- a/kvm.c
>> +++ b/kvm.c
>> @@ -34,13 +34,189 @@
>> # include <linux/kvm.h>
>> # include "print_fields.h"
>> # include "arch_kvm.c"
>> +# include "xmalloc.h"
>> +# include "mmap_cache.h"
>> +# include <sys/mman.h>
>
> Do you need <sys/mman.h>?
>
>> +
>> +struct vcpu_info {
>> + int fd;
>> + int cpuid;
>> + long mmap_addr;
>> + unsigned long mmap_len;
>> + bool resolved;
>> + struct vcpu_info *next;
>> +};
>> +
>> +static int dump_kvm_run_structure_level;
>
> Is it expected to have other values than 0 or 1?
>
>> +static struct vcpu_info *
>> +vcpu_find(struct tcb *const tcp, int fd)
>> +{
>> + struct vcpu_info *head = tcp->vcpu_info_list;
>> + for (struct vcpu_info *vcpu_info = head;
>
> for (struct vcpu_info *vcpu_info = tcp->vcpu_info_list;
>
>> + vcpu_info;
>> + vcpu_info = vcpu_info->next)
>> + if (vcpu_info->fd == fd)
>> + return vcpu_info;
>> +
>> + return NULL;
>> +}
>> +
>> +static struct vcpu_info *
>> +vcpu_alloc(struct tcb *const tcp, int fd, int cpuid)
>> +{
>> + struct vcpu_info *vcpu_info = xmalloc(sizeof(struct vcpu_info));
>
> I'd just use xcalloc:
>
> struct vcpu_info *vcpu_info = xcalloc(1, sizeof(*vcpu_info));
>> +
>> + vcpu_info->fd = fd;
>> +
>> + vcpu_info->cpuid = cpuid;
>> + vcpu_info->resolved = false;
>> + vcpu_info->mmap_addr = 0L;
>> + vcpu_info->mmap_len = 0L;
>> +
>> + vcpu_info->next = tcp->vcpu_info_list;
>> + tcp->vcpu_info_list = vcpu_info;
>> +
>> + return vcpu_info;
>> +}
>> +
>> +void
>> +kvm_vcpu_info_free(struct tcb *tcp)
>> +{
>> + struct vcpu_info *head = tcp->vcpu_info_list;
>> + struct vcpu_info *next;
>> +
>> + while (head) {
>> + next = head->next;
>> + free (head);
>
> We don't use space before function name and parenthesis.
>
>> + head = next;
>> + }
>
> for (head = tcp->vcpu_info_list; head; head = next)
>
>> +
>> + tcp->vcpu_info_list = NULL;
>> +}
>> +
>> +static void
>> +vcpu_register(struct tcb *const tcp, int fd, int cpuid)
>> +{
>> + struct vcpu_info *vcpu_info = vcpu_find(tcp, fd);
>> +
>> + if (!vcpu_info)
>> + vcpu_info = vcpu_alloc(tcp, fd, cpuid);
>> + else if (vcpu_info->cpuid != cpuid)
>> + {
>> + vcpu_info->cpuid = cpuid;
>> + vcpu_info->resolved = false;
>> + }
>> +}
>> +
>> +static bool
>> +is_map_for_file(struct mmap_cache_entry_t *map_info, void *data)
>> +{
>> + /* major version for anon inode may be given in get_anon_bdev()
>> + * in linux kernel.
>> + *
>> + * *p = MKDEV(0, dev & MINORMASK);
>> + *-----------------^
>> + */
>> + if (map_info->binary_filename &&
>> + map_info->major == 0 &&
>> + strcmp(map_info->binary_filename, (const char *)data) == 0)
>> + return true;
>> + return false;
>
> return map_info->binary_filename &&
> map_info->major == 0 &&
> strcmp(map_info->binary_filename, (const char *) data) == 0;
>
>> +}
>> +
>> +static unsigned long
>> +map_len(struct mmap_cache_entry_t *map_info)
>> +{
>> + if (map_info->start_addr >= map_info->end_addr)
>> + return 0;
>> + else
>> + return map_info->end_addr - map_info->start_addr;
>
> return map_info->end_addr > map_info->start_addr ?
> map_info->end_addr - map_info->start_addr : 0;
>
>> +}
>> +
>> +static int
>> +extract_cpuid(const char *num)
>> +{
>> + unsigned long cpuid;
>> +
>> + errno = 0;
>> + cpuid = strtoul(num, NULL, 10);
>> + if (errno)
>> + return -1;
>> + else
>> + return (int)cpuid;
>> +}
>
> Wouldn't string_to_uint() be OK?
>
>> +
>> +static struct vcpu_info*
>> +vcpu_get_info(struct tcb *const tcp, int fd)
>> +{
>> + struct vcpu_info *vcpu_info = vcpu_find(tcp, fd);
>> + struct mmap_cache_entry_t *map_info;
>> + enum mmap_cache_rebuild_result mc_stat;
>> +
>> + mc_stat = mmap_cache_rebuild_if_invalid(tcp, __func__);
>> + if (mc_stat == MMAP_CACHE_REBUILD_NOCACHE)
>> + return NULL;
>> +
>> + if (vcpu_info && vcpu_info->resolved) {
>> + if (mc_stat == MMAP_CACHE_REBUILD_READY)
>> + return vcpu_info;
>> + else {
>> + map_info = mmap_cache_search(tcp, vcpu_info->mmap_addr);
>> + if ((map_info
>> + && strncmp(map_info->binary_filename,
>> + "anon_inode:kvm-vcpu:", 20) == 0)) {
>> + int cpuid = extract_cpuid (map_info->binary_filename);
>
> Did you mean map_info->binary_filename + 20?
> Please don't hardcode 20, use STR_STRIP_PREFIX instead.
>
>> + if (cpuid < 0)
>> + return NULL;
>> + else if (vcpu_info->cpuid == cpuid)
>> + return vcpu_info;
>> + else
>> + vcpu_info->resolved = false;
>> + } else
>> + /* The vcpu vma may be mremap'ed. */
>> + vcpu_info->resolved = false;
>> + }
>> + }
>> +
>> + /* Slow path */
>> + char path[PATH_MAX + 1];
>> + if (getfdpath(tcp, fd, path, sizeof(path)) >= 0
>> + && strncmp(path, "anon_inode:kvm-vcpu:", 20) == 0) {
>> + map_info = mmap_cache_search_custom(tcp, is_map_for_file, path);
>> + if (map_info) {
>> + int cpuid = extract_cpuid(path + 20);
>> + if (cpuid < 0)
>> + return NULL;
>> + else if (!vcpu_info)
>> + vcpu_info = vcpu_alloc(tcp, fd, cpuid);
>> + else if (vcpu_info->cpuid != cpuid)
>> + vcpu_info->cpuid = cpuid;
>> + vcpu_info->mmap_addr = map_info->start_addr;
>> + vcpu_info->mmap_len = map_len(map_info);
>> + vcpu_info->resolved = true;
>> + } else
>> + vcpu_info->resolved = false;
>> + }
>> +
>> + if (!vcpu_info->resolved)
>> + vcpu_info = NULL;
>> + return vcpu_info;
>> +}
>>
>> static int
>> kvm_ioctl_create_vcpu(struct tcb *const tcp, const kernel_ulong_t arg)
>> {
>> uint32_t cpuid = arg;
>>
>> + if (entering(tcp))
>> + return 0;
>> +
>> tprintf(", %u", cpuid);
>
> Let's print cpuid on entering syscall.
>
>> +
>> + if (dump_kvm_run_structure_level && tcp->u_rval >= 0)
>> + vcpu_register(tcp, tcp->u_rval, (int)cpuid);
>> +
>> return RVAL_IOCTL_DECODED | RVAL_FD;
>> }
>>
>> @@ -103,6 +279,55 @@ kvm_ioctl_decode_sregs(struct tcb *const tcp, const unsigned int code,
>> }
>> # endif /* HAVE_STRUCT_KVM_SREGS */
>>
>> +# include "xlat/kvm_exit_reason.h"
>> +static void
>> +kvm_ioctl_run_attach_auxstr(struct tcb *const tcp,
>> + struct vcpu_info *info)
>> +
>> +{
>> + static struct kvm_run vcpu_run_struct;
>> +
>> + if (info->mmap_len < sizeof(vcpu_run_struct))
>> + return;
>> +
>> + if (umoven(tcp, info->mmap_addr,
>> + sizeof(vcpu_run_struct), &vcpu_run_struct) < 0)
>> + return;
>
> This looks like a use case for umove().
>
>> +
>> + tcp->auxstr = xlookup(kvm_exit_reason, vcpu_run_struct.exit_reason);
>> + if (!tcp->auxstr)
>> + tcp->auxstr = "KVM_EXIT_???";
>
> This looks like a use case for sprintxval().
>
>> +}
>> +
>> +static int
>> +kvm_ioctl_decode_run(struct tcb *const tcp)
>> +{
>> + int fd;
>> + struct vcpu_info *info;
>> + int r;
>> +
>> + if (entering(tcp))
>> + return 0;
>> +
>> + r = RVAL_DECODED;
>> +
>> + if (tcp->u_rval < 0)
>> + return r;
>
> Did you mean syserror(tcp)?
>
>> + if (dump_kvm_run_structure_level) {
>> + tcp->auxstr = NULL;
>> + fd = tcp->u_arg[0];
>> + info = vcpu_get_info(tcp, fd);
>> +
>> + if (info) {
>> + kvm_ioctl_run_attach_auxstr(tcp, info);
>> + if (tcp->auxstr)
>> + r |= RVAL_STR;
>> + }
>> + }
>> +
>> + return r;
>> +}
>> +
>> int
>> kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t arg)
>> {
>> @@ -129,7 +354,10 @@ kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t a
>>
>> case KVM_CREATE_VM:
>> return RVAL_DECODED | RVAL_FD;
>> +
>> case KVM_RUN:
>> + return kvm_ioctl_decode_run(tcp);
>> +
>> case KVM_GET_VCPU_MMAP_SIZE:
>> case KVM_GET_API_VERSION:
>> default:
>> @@ -137,4 +365,11 @@ kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t a
>> }
>> }
>>
>> +void
>> +kvm_run_structure_decoder_init(void)
>> +{
>> + dump_kvm_run_structure_level = 1;
>> + mmap_cache_enable();
>> +}
>> +
>> #endif /* HAVE_LINUX_KVM_H */
>> diff --git a/strace.c b/strace.c
>> index 4b374857..db07b4f1 100644
>> --- a/strace.c
>> +++ b/strace.c
>> @@ -256,6 +256,13 @@ Output format:\n\
>> -k obtain stack trace between each syscall\n\
>> "
>> #endif
>> +"\
>> +"
>> +#ifdef HAVE_LINUX_KVM_H
>> +"\
>> + -K show the exit reason of KVM run\n\
>> +"
>> +#endif
>> "\
>> -o file send trace output to FILE instead of stderr\n\
>> -q suppress messages about attaching, detaching, etc.\n\
>> @@ -819,6 +826,10 @@ droptcb(struct tcb *tcp)
>> unwind_tcb_fin(tcp);
>> #endif
>>
>> +#ifdef HAVE_LINUX_KVM_H
>> + kvm_vcpu_info_free(tcp);
>> +#endif
>> +
>> if (tcp->mmap_cache)
>> tcp->mmap_cache->free_fn(tcp, __func__);
>>
>> @@ -1572,6 +1583,9 @@ init(int argc, char *argv[])
>> {
>> int c, i;
>> int optF = 0;
>> +#ifdef HAVE_LINUX_KVM_H
>> + int kvm_vcpu = 0;
>> +#endif
>
> Is it expected to have other values than 0 or 1?
>
>
> --
> ldv
More information about the Strace-devel
mailing list