[PATCH v6 1/5] PID namespace translation support

Dmitry V. Levin ldv at altlinux.org
Fri Aug 7 18:23:18 UTC 2020


On Thu, Aug 06, 2020 at 09:02:07PM +0200, Ákos Uzonyi wrote:
[...]
> diff --git a/NEWS b/NEWS
> index b83f9e80..9086fde9 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -14,6 +14,7 @@ Noteworthy changes in release ?.? (????-??-??)
>      KEYCTL_*, KVM_*, LOOP_*, NDA_*, RTC_*, TCA_*, STATX_*, and *_MAGIC
>      constants.
>    * Updated lists of ioctl commands from Linux 5.8.
> +  * Added --pidns-translation opition for PID namespace translation.

Typo?

> @@ -983,6 +1005,27 @@ print_local_array_ex(struct tcb *tcp,
>  extern kernel_ulong_t *
>  fetch_indirect_syscall_args(struct tcb *, kernel_ulong_t addr, unsigned int n_args);
>  
> +extern void pidns_init(void);
> +
> +/**
> + * Returns the pid of the tracee as present in /proc (can be different from
> + * tcp->pid if /proc and the tracee process are in different PID namespaces).
> + */
> +extern int get_proc_pid(struct tcb *);

I'm still puzzled what /proc are you talking about, the tracer's /proc or
the tracee's /proc.

> @@ -1058,6 +1101,14 @@ printfd(struct tcb *tcp, int fd)
>   * of the tracee the descriptor tcp).  This is a stub.
>   */
>  extern void printfd_pid_tracee_ns(struct tcb *tcp, pid_t pid, int fd);
> +
> +extern void printpid(struct tcb *, int pid, enum pid_type type);

Although this might look obvious, I suggest adding a short comment for
printpid, too, to make it clear that printpid prints the pid given
in the tracee's pid namespace.

> +/**
> + * Key:   Proc PID
> + * Value: stuct proc_data

Typo?

> +/**
> + * Helper function for creating a trie.
> + *
> + * For node_key_bits and data_block_key_bits 4 is used (so trie height is 32 / 4
> + * = 8, and node sizes are 8 byte * 2^4 = 128 bytes), which seems to be a good
> + * tradeoff between between memory usage and lookup time. It should not be too

Typo?

> + * large, since there can be large holes between PIDs, and it would be just a
> + * waste of memory having large nodes with lot of NULL pointers in them.
> + */
> +static struct trie *
> +create_trie_4(uint8_t key_size, uint8_t item_size_lg, uint64_t empty_value)
> +{
> +	struct trie *t = trie_create(key_size, item_size_lg, 4, 4, empty_value);
> +	if (!t)
> +		error_msg_and_die("creating trie failed");
> +
> +	return t;
> +}

I believe empty_value is always 0, so why bother with the argument?

> +	while (p) {
> +		errno = 0;
> +		long id = strtol(p, NULL, 10);
> +
> +		if (errno || (id < 1) || (id > INT_MAX)) {

I don't think these extra brackets increase readability.

> +			perror_func_msg("converting pid to int");
> +			break;
> +		}

I suggest also printing the value of p in this unlikely case.

> +/**
> + * Returns whether the /proc filesystem's PID namespace is same as strace's.

Grammar: is the same.

> diff --git a/strace.1.in b/strace.1.in
> index 3b21caec..adca194c 100644
> --- a/strace.1.in
> +++ b/strace.1.in
> @@ -1075,6 +1075,10 @@ Print all available information associated with file descritors:
>  protocol-specific information associated with socket file descriptors,
>  block/character device number associated with device file descriptors,
>  and PIDs asociated with pidfd file descriptors.
> +.TP
> +.B \-\-pidns\-translation
> +If strace and tracee are in different PID namespaces, print PIDs in
> +strace's namespace also.

Changing "print PIDs in strace's namespace also" to either
"print PIDs also in strace's namespace" or
"print PIDs in strace's namespace, too" would likely result
to a more grammatically correct sentence.

> diff --git a/trie.c b/trie.c
> new file mode 100644
> index 00000000..76cecc25
> --- /dev/null
> +++ b/trie.c
> @@ -0,0 +1,250 @@
> +/*
> + * Simple trie implementation for key-value mapping storage
> + *
> + * Copyright (c) 2020 Ákos Uzonyi <uzonyi.akos at gmail.com>
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier: LGPL-2.1-or-later
> + */
> +
> +#include "defs.h"
> +
> +#include <assert.h>

Does this code use assert?

> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +#include "trie.h"
> +
> +static const uint8_t ptr_sz_lg = (sizeof(uint64_t *) == 8 ? 6 : 5);

Hmm, pidns.c uses a slightly different (although essentially the same)
definition of ptr_sz_lg.

> +
> +/**
> + * Returns lg2 of node size for the specific level of the trie. If max_depth
> + * provided is less than zero, it is calculated via trie_get_depth call.
> + */
> +static uint8_t
> +trie_get_node_size(struct trie *t, uint8_t depth)

What do you mean by "If max_depth provided"?

> +struct trie *
> +trie_create(uint8_t key_size, uint8_t item_size_lg, uint8_t node_key_bits,
> +            uint8_t data_block_key_bits, uint64_t empty_value)
> +{
> +	if (item_size_lg > 6)
> +		return NULL;
> +	if (key_size > 64)
> +		return NULL;
> +	if (node_key_bits < 1)
> +		return NULL;
> +	if (data_block_key_bits < 1 || data_block_key_bits > key_size)
> +		return NULL;

Should there be a check that node_key_bits isn't too big, for consistency?

> +static uint64_t
> +trie_iterate_keys_node(struct trie *t,
> +                       trie_iterate_fn fn, void *fn_data,
> +                       void *node, uint64_t start, uint64_t end,
> +                       uint8_t depth)
> +{
> +	if (start > end || !node)
> +		return 0;
> +
> +	if (depth == t->max_depth) {
> +		for (uint64_t i = start; i <= end; i++)
> +			fn(fn_data, i, trie_data_block_get(t,
> +				(uint64_t *) node, i));
> +
> +		return end - start + 1; //TODO: overflow

Overflow?

> +	/**
> +	 * Number of bits in key that makes a symbol for a node.
> +	 * (equals to log2 of the child count of the node)
> +	 */
> +	uint8_t node_key_bits;
> +
> +	/**
> +	 * Number of bits in key that make a symbol for the data block (leaf).

Typo?

> +	 * (equals to log2 of the value count stored in a data block)
> +	 */
> +	uint8_t data_block_key_bits;
> +
> +	/** The depth of the data block. Caluclated from the values above */

Typo?


-- 
ldv


More information about the Strace-devel mailing list