[PATCH 2/4] kvm: attach the exit reason of vcpu as auxstr to KVM_RUN output

Masatake YAMATO yamato at redhat.com
Wed May 16 21:10:35 UTC 2018


Thank you for reviewing.
I will revise and submit version 2 patch set.

Masatake YAMATO

On Mon, 14 May 2018 15:38:39 +0300, "Dmitry V. Levin" <ldv at altlinux.org> wrote:
> On Mon, May 07, 2018 at 08:26:02PM +0900, Masatake YAMATO wrote:
>> In KVM, a virtual machine implementation like Qemu can access a vcpu
>> via ioctl. KVM_RUN is an ioctl command to enter vcpu. The command
>> returns the control for some reasons: Needs of device emulation or
>> consuming time slices are typical ones. The vmi takes a different
>> action for the reason.
>> 
>> We, strace users, want to know the reason to understand kvm.  This
>> change prints the reason as auxstr if -K option is given, and if
>> strace runs on Linux 4.16.0 or higher, which includes
>> e46b469278a59781f9b25ff608af84892963821b, "kvm: embed vcpu id to
>> dentry of vcpu anon inode."
>> 
>> The way to get the reason is a bit complicated because the ioctl
>> doesn't return it to the userspace directly. Instead, the vmi and kvm
>> communicate via an area of the process virtual memory where the fd of
>> vcpu is mmap'ed. strace must peek the area to know the reason.
>> 
>> The change does three things: (1) recording the area for given vcpu
>> when the target calls VCPU_CREATE to vcpu_info_list per tcb data
>> field, (2) verifying the data recorded in vcpu_info_list before
>> doing (3), and (3) decoding the exit reason field of the area.
>> 
>> The change is not so simple because there is a case that strace
>> doesn't have a chance to do (1) if -p option is used. In the case,
>> vcpu_info data created in the step (2).
>> 
>> The area has more fields than "exit reason." Dumping them is future
>> work.
>> 
>> * defs.h [HAVE_LINUX_KVM_H]: (kvm_run_structure_decoder_init,
>> kvm_vcpu_info_free): New declarations.
>> (struct tcb): Add new field vcpu_info_list.
>> * strace.c (usage): Describe -K option.
>> (init): Introduce -K option. Call
>> kvm_run_structure_decoder_init to enable for attaching the exit reason
>> to auxstr.
>> (droptcb): Call kvm_vcpu_info_free.
>> * xlat/kvm_exit_reason.in: The list for decoding the exit reason.
>> * kvm.c: Include xmalloc.h, mmap_cache.h, and sys/mman.h.
>> (dump_kvm_run_structure_level): New file static variable.
>> (kvm_run_structure_decoder_init): New function.
>> (vcpu_info): New struct definition representing the 3-tuple: vcpu file
>> descriptor, id of the vcpu, and mmap'ed entry.
>> (vcpu_find, vcpu_alloc, vcpu_register, vcpu_getinfo,
>> kvm_vcpu_info_free): New functions to access tcb's vcpu_info_list
>> field and vcpu_info data type.
>> (is_map_for_file, extrac_cpuid, map_len): New helper functions.
>> (kvm_ioclt_run_attach_auxstr, kvm_ioctl_decode_run): New functions
>> decoding vcpu exit reason and attaching the decoded data to auxstr
>> field of tcb.
>> (kvm_ioctl_create_vcpu): Call vcpu_register to make an entry mapping a
>> file descriptor and the vcpu id associated with the fd.
>> (kvm_ioctl): Call kvm_ioctl_decode_run.
>> 
>> Signed-off-by: Masatake YAMATO <yamato at redhat.com>
>> ---
>>  defs.h                  |   9 ++
>>  kvm.c                   | 235 ++++++++++++++++++++++++++++++++++++++++
>>  strace.c                |  30 +++++
>>  xlat/kvm_exit_reason.in |  29 +++++
>>  4 files changed, 303 insertions(+)
>>  create mode 100644 xlat/kvm_exit_reason.in
>> 
>> diff --git a/defs.h b/defs.h
>> index 5116fc21..407e3a02 100644
>> --- a/defs.h
>> +++ b/defs.h
>> @@ -220,6 +220,10 @@ struct tcb {
>>  
>>  	struct mmap_cache_t *mmap_cache;
>>  
>> +#ifdef HAVE_LINUX_KVM_H
>> +	struct vcpu_info *vcpu_info_list;
>> +#endif
>> +
>>  #ifdef ENABLE_STACKTRACE
>>  	void *unwind_ctx;
>>  	struct unwind_queue_t *unwind_queue;
>> @@ -845,6 +849,11 @@ extern void unwind_tcb_print(struct tcb *);
>>  extern void unwind_tcb_capture(struct tcb *);
>>  #endif
>>  
>> +#ifdef HAVE_LINUX_KVM_H
>> +extern void kvm_run_structure_decoder_init(void);
>> +extern void kvm_vcpu_info_free(struct tcb *);
>> +#endif
>> +
>>  static inline int
>>  printstrn(struct tcb *tcp, kernel_ulong_t addr, kernel_ulong_t len)
>>  {
>> diff --git a/kvm.c b/kvm.c
>> index 86fd9e50..ab9aeb65 100644
>> --- a/kvm.c
>> +++ b/kvm.c
>> @@ -34,13 +34,189 @@
>>  # include <linux/kvm.h>
>>  # include "print_fields.h"
>>  # include "arch_kvm.c"
>> +# include "xmalloc.h"
>> +# include "mmap_cache.h"
>> +# include <sys/mman.h>
> 
> Do you need <sys/mman.h>?
> 
>> +
>> +struct vcpu_info {
>> +	int fd;
>> +	int cpuid;
>> +	long mmap_addr;
>> +	unsigned long mmap_len;
>> +	bool resolved;
>> +	struct vcpu_info *next;
>> +};
>> +
>> +static int dump_kvm_run_structure_level;
> 
> Is it expected to have other values than 0 or 1?
> 
>> +static struct vcpu_info *
>> +vcpu_find(struct tcb *const tcp, int fd)
>> +{
>> +	struct vcpu_info *head = tcp->vcpu_info_list;
>> +	for (struct vcpu_info *vcpu_info = head;
> 
> 	for (struct vcpu_info *vcpu_info = tcp->vcpu_info_list;
> 
>> +	     vcpu_info;
>> +	     vcpu_info = vcpu_info->next)
>> +		if (vcpu_info->fd == fd)
>> +			return vcpu_info;
>> +
>> +	return NULL;
>> +}
>> +
>> +static struct vcpu_info *
>> +vcpu_alloc(struct tcb *const tcp, int fd, int cpuid)
>> +{
>> +	struct vcpu_info *vcpu_info = xmalloc(sizeof(struct vcpu_info));
> 
> I'd just use xcalloc:
> 
> 	struct vcpu_info *vcpu_info = xcalloc(1, sizeof(*vcpu_info));
>> +
>> +	vcpu_info->fd = fd;
>> +
>> +	vcpu_info->cpuid = cpuid;
>> +	vcpu_info->resolved = false;
>> +	vcpu_info->mmap_addr = 0L;
>> +	vcpu_info->mmap_len  = 0L;
>> +
>> +	vcpu_info->next = tcp->vcpu_info_list;
>> +	tcp->vcpu_info_list = vcpu_info;
>> +
>> +	return vcpu_info;
>> +}
>> +
>> +void
>> +kvm_vcpu_info_free(struct tcb *tcp)
>> +{
>> +	struct vcpu_info *head = tcp->vcpu_info_list;
>> +	struct vcpu_info *next;
>> +
>> +	while (head) {
>> +		next = head->next;
>> +		free (head);
> 
> We don't use space before function name and parenthesis.
> 
>> +		head = next;
>> +	}
> 
> 	for (head = tcp->vcpu_info_list; head; head = next)
> 
>> +
>> +	tcp->vcpu_info_list = NULL;
>> +}
>> +
>> +static void
>> +vcpu_register(struct tcb *const tcp, int fd, int cpuid)
>> +{
>> +	struct vcpu_info *vcpu_info = vcpu_find(tcp, fd);
>> +
>> +	if (!vcpu_info)
>> +		vcpu_info = vcpu_alloc(tcp, fd, cpuid);
>> +	else if (vcpu_info->cpuid != cpuid)
>> +	{
>> +		vcpu_info->cpuid = cpuid;
>> +		vcpu_info->resolved = false;
>> +	}
>> +}
>> +
>> +static bool
>> +is_map_for_file(struct mmap_cache_entry_t *map_info, void *data)
>> +{
>> +	/* major version for anon inode may be given in get_anon_bdev()
>> +	 * in linux kernel.
>> +	 *
>> +	 * 	*p = MKDEV(0, dev & MINORMASK);
>> +	 *-----------------^
>> +	 */
>> +	if (map_info->binary_filename &&
>> +	    map_info->major == 0 &&
>> +	    strcmp(map_info->binary_filename, (const char *)data) == 0)
>> +		return true;
>> +	return false;
> 
> 	return map_info->binary_filename &&
> 	       map_info->major == 0 &&
> 	       strcmp(map_info->binary_filename, (const char *) data) == 0;
> 
>> +}
>> +
>> +static unsigned long
>> +map_len(struct mmap_cache_entry_t *map_info)
>> +{
>> +	if (map_info->start_addr >= map_info->end_addr)
>> +		return 0;
>> +	else
>> +		return map_info->end_addr - map_info->start_addr;
> 
> 	return map_info->end_addr > map_info->start_addr ?
> 	       map_info->end_addr - map_info->start_addr : 0;
> 
>> +}
>> +
>> +static int
>> +extract_cpuid(const char *num)
>> +{
>> +	unsigned long cpuid;
>> +
>> +	errno = 0;
>> +	cpuid = strtoul(num, NULL, 10);
>> +	if (errno)
>> +		return -1;
>> +	else
>> +		return (int)cpuid;
>> +}
> 
> Wouldn't string_to_uint() be OK?
> 
>> +
>> +static struct vcpu_info*
>> +vcpu_get_info(struct tcb *const tcp, int fd)
>> +{
>> +	struct vcpu_info *vcpu_info = vcpu_find(tcp, fd);
>> +	struct mmap_cache_entry_t *map_info;
>> +	enum mmap_cache_rebuild_result mc_stat;
>> +
>> +	mc_stat = mmap_cache_rebuild_if_invalid(tcp, __func__);
>> +	if (mc_stat == MMAP_CACHE_REBUILD_NOCACHE)
>> +		return NULL;
>> +
>> +	if (vcpu_info && vcpu_info->resolved) {
>> +		if (mc_stat == MMAP_CACHE_REBUILD_READY)
>> +			return vcpu_info;
>> +		else {
>> +			map_info = mmap_cache_search(tcp, vcpu_info->mmap_addr);
>> +			if ((map_info
>> +			     && strncmp(map_info->binary_filename,
>> +					"anon_inode:kvm-vcpu:", 20) == 0)) {
>> +				int cpuid = extract_cpuid (map_info->binary_filename);
> 
> Did you mean map_info->binary_filename + 20?
> Please don't hardcode 20, use STR_STRIP_PREFIX instead.
> 
>> +				if (cpuid < 0)
>> +					return NULL;
>> +				else if (vcpu_info->cpuid == cpuid)
>> +					return vcpu_info;
>> +				else
>> +					vcpu_info->resolved = false;
>> +			} else
>> +				/* The vcpu vma may be mremap'ed. */
>> +				vcpu_info->resolved = false;
>> +		}
>> +	}
>> +
>> +	/* Slow path */
>> +	char path[PATH_MAX + 1];
>> +	if (getfdpath(tcp, fd, path, sizeof(path)) >= 0
>> +	    && strncmp(path, "anon_inode:kvm-vcpu:", 20) == 0) {
>> +		map_info = mmap_cache_search_custom(tcp, is_map_for_file, path);
>> +		if (map_info) {
>> +			int cpuid = extract_cpuid(path + 20);
>> +			if (cpuid < 0)
>> +				return NULL;
>> +			else if (!vcpu_info)
>> +				vcpu_info = vcpu_alloc(tcp, fd, cpuid);
>> +			else if (vcpu_info->cpuid != cpuid)
>> +				vcpu_info->cpuid = cpuid;
>> +			vcpu_info->mmap_addr = map_info->start_addr;
>> +			vcpu_info->mmap_len  = map_len(map_info);
>> +			vcpu_info->resolved  = true;
>> +		} else
>> +			vcpu_info->resolved = false;
>> +	}
>> +
>> +	if (!vcpu_info->resolved)
>> +		vcpu_info = NULL;
>> +	return vcpu_info;
>> +}
>>  
>>  static int
>>  kvm_ioctl_create_vcpu(struct tcb *const tcp, const kernel_ulong_t arg)
>>  {
>>  	uint32_t cpuid = arg;
>>  
>> +	if (entering(tcp))
>> +		return 0;
>> +
>>  	tprintf(", %u", cpuid);
> 
> Let's print cpuid on entering syscall.
> 
>> +
>> +	if (dump_kvm_run_structure_level && tcp->u_rval >= 0)
>> +		vcpu_register(tcp, tcp->u_rval, (int)cpuid);
>> +
>>  	return RVAL_IOCTL_DECODED | RVAL_FD;
>>  }
>>  
>> @@ -103,6 +279,55 @@ kvm_ioctl_decode_sregs(struct tcb *const tcp, const unsigned int code,
>>  }
>>  # endif /* HAVE_STRUCT_KVM_SREGS */
>>  
>> +# include "xlat/kvm_exit_reason.h"
>> +static void
>> +kvm_ioctl_run_attach_auxstr(struct tcb *const tcp,
>> +			    struct vcpu_info *info)
>> +
>> +{
>> +	static struct kvm_run vcpu_run_struct;
>> +
>> +	if (info->mmap_len < sizeof(vcpu_run_struct))
>> +		return;
>> +
>> +	if (umoven(tcp, info->mmap_addr,
>> +		   sizeof(vcpu_run_struct), &vcpu_run_struct) < 0)
>> +		return;
> 
> This looks like a use case for umove().
> 
>> +
>> +	tcp->auxstr = xlookup(kvm_exit_reason, vcpu_run_struct.exit_reason);
>> +	if (!tcp->auxstr)
>> +		tcp->auxstr = "KVM_EXIT_???";
> 
> This looks like a use case for sprintxval().
> 
>> +}
>> +
>> +static int
>> +kvm_ioctl_decode_run(struct tcb *const tcp)
>> +{
>> +	int fd;
>> +	struct vcpu_info *info;
>> +	int r;
>> +
>> +	if (entering(tcp))
>> +		return 0;
>> +
>> +	r = RVAL_DECODED;
>> +
>> +	if (tcp->u_rval < 0)
>> +		return r;
> 
> Did you mean syserror(tcp)?
> 
>> +	if (dump_kvm_run_structure_level) {
>> +		tcp->auxstr = NULL;
>> +		fd = tcp->u_arg[0];
>> +		info = vcpu_get_info(tcp, fd);
>> +
>> +		if (info) {
>> +			kvm_ioctl_run_attach_auxstr(tcp, info);
>> +			if (tcp->auxstr)
>> +				r |= RVAL_STR;
>> +		}
>> +	}
>> +
>> +	return r;
>> +}
>> +
>>  int
>>  kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t arg)
>>  {
>> @@ -129,7 +354,10 @@ kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t a
>>  
>>  	case KVM_CREATE_VM:
>>  		return RVAL_DECODED | RVAL_FD;
>> +
>>  	case KVM_RUN:
>> +		return kvm_ioctl_decode_run(tcp);
>> +
>>  	case KVM_GET_VCPU_MMAP_SIZE:
>>  	case KVM_GET_API_VERSION:
>>  	default:
>> @@ -137,4 +365,11 @@ kvm_ioctl(struct tcb *const tcp, const unsigned int code, const kernel_ulong_t a
>>  	}
>>  }
>>  
>> +void
>> +kvm_run_structure_decoder_init(void)
>> +{
>> +	dump_kvm_run_structure_level = 1;
>> +	mmap_cache_enable();
>> +}
>> +
>>  #endif /* HAVE_LINUX_KVM_H */
>> diff --git a/strace.c b/strace.c
>> index 4b374857..db07b4f1 100644
>> --- a/strace.c
>> +++ b/strace.c
>> @@ -256,6 +256,13 @@ Output format:\n\
>>    -k             obtain stack trace between each syscall\n\
>>  "
>>  #endif
>> +"\
>> +"
>> +#ifdef HAVE_LINUX_KVM_H
>> +"\
>> +  -K             show the exit reason of KVM run\n\
>> +"
>> +#endif
>>  "\
>>    -o file        send trace output to FILE instead of stderr\n\
>>    -q             suppress messages about attaching, detaching, etc.\n\
>> @@ -819,6 +826,10 @@ droptcb(struct tcb *tcp)
>>  		unwind_tcb_fin(tcp);
>>  #endif
>>  
>> +#ifdef HAVE_LINUX_KVM_H
>> +	kvm_vcpu_info_free(tcp);
>> +#endif
>> +
>>  	if (tcp->mmap_cache)
>>  		tcp->mmap_cache->free_fn(tcp, __func__);
>>  
>> @@ -1572,6 +1583,9 @@ init(int argc, char *argv[])
>>  {
>>  	int c, i;
>>  	int optF = 0;
>> +#ifdef HAVE_LINUX_KVM_H
>> +	int kvm_vcpu = 0;
>> +#endif
> 
> Is it expected to have other values than 0 or 1?
> 
> 
> -- 
> ldv


More information about the Strace-devel mailing list