[PATCH v6 1/5] PID namespace translation support
Dmitry V. Levin
ldv at altlinux.org
Fri Aug 7 18:23:18 UTC 2020
On Thu, Aug 06, 2020 at 09:02:07PM +0200, Ákos Uzonyi wrote:
[...]
> diff --git a/NEWS b/NEWS
> index b83f9e80..9086fde9 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -14,6 +14,7 @@ Noteworthy changes in release ?.? (????-??-??)
> KEYCTL_*, KVM_*, LOOP_*, NDA_*, RTC_*, TCA_*, STATX_*, and *_MAGIC
> constants.
> * Updated lists of ioctl commands from Linux 5.8.
> + * Added --pidns-translation opition for PID namespace translation.
Typo?
> @@ -983,6 +1005,27 @@ print_local_array_ex(struct tcb *tcp,
> extern kernel_ulong_t *
> fetch_indirect_syscall_args(struct tcb *, kernel_ulong_t addr, unsigned int n_args);
>
> +extern void pidns_init(void);
> +
> +/**
> + * Returns the pid of the tracee as present in /proc (can be different from
> + * tcp->pid if /proc and the tracee process are in different PID namespaces).
> + */
> +extern int get_proc_pid(struct tcb *);
I'm still puzzled what /proc are you talking about, the tracer's /proc or
the tracee's /proc.
> @@ -1058,6 +1101,14 @@ printfd(struct tcb *tcp, int fd)
> * of the tracee the descriptor tcp). This is a stub.
> */
> extern void printfd_pid_tracee_ns(struct tcb *tcp, pid_t pid, int fd);
> +
> +extern void printpid(struct tcb *, int pid, enum pid_type type);
Although this might look obvious, I suggest adding a short comment for
printpid, too, to make it clear that printpid prints the pid given
in the tracee's pid namespace.
> +/**
> + * Key: Proc PID
> + * Value: stuct proc_data
Typo?
> +/**
> + * Helper function for creating a trie.
> + *
> + * For node_key_bits and data_block_key_bits 4 is used (so trie height is 32 / 4
> + * = 8, and node sizes are 8 byte * 2^4 = 128 bytes), which seems to be a good
> + * tradeoff between between memory usage and lookup time. It should not be too
Typo?
> + * large, since there can be large holes between PIDs, and it would be just a
> + * waste of memory having large nodes with lot of NULL pointers in them.
> + */
> +static struct trie *
> +create_trie_4(uint8_t key_size, uint8_t item_size_lg, uint64_t empty_value)
> +{
> + struct trie *t = trie_create(key_size, item_size_lg, 4, 4, empty_value);
> + if (!t)
> + error_msg_and_die("creating trie failed");
> +
> + return t;
> +}
I believe empty_value is always 0, so why bother with the argument?
> + while (p) {
> + errno = 0;
> + long id = strtol(p, NULL, 10);
> +
> + if (errno || (id < 1) || (id > INT_MAX)) {
I don't think these extra brackets increase readability.
> + perror_func_msg("converting pid to int");
> + break;
> + }
I suggest also printing the value of p in this unlikely case.
> +/**
> + * Returns whether the /proc filesystem's PID namespace is same as strace's.
Grammar: is the same.
> diff --git a/strace.1.in b/strace.1.in
> index 3b21caec..adca194c 100644
> --- a/strace.1.in
> +++ b/strace.1.in
> @@ -1075,6 +1075,10 @@ Print all available information associated with file descritors:
> protocol-specific information associated with socket file descriptors,
> block/character device number associated with device file descriptors,
> and PIDs asociated with pidfd file descriptors.
> +.TP
> +.B \-\-pidns\-translation
> +If strace and tracee are in different PID namespaces, print PIDs in
> +strace's namespace also.
Changing "print PIDs in strace's namespace also" to either
"print PIDs also in strace's namespace" or
"print PIDs in strace's namespace, too" would likely result
to a more grammatically correct sentence.
> diff --git a/trie.c b/trie.c
> new file mode 100644
> index 00000000..76cecc25
> --- /dev/null
> +++ b/trie.c
> @@ -0,0 +1,250 @@
> +/*
> + * Simple trie implementation for key-value mapping storage
> + *
> + * Copyright (c) 2020 Ákos Uzonyi <uzonyi.akos at gmail.com>
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier: LGPL-2.1-or-later
> + */
> +
> +#include "defs.h"
> +
> +#include <assert.h>
Does this code use assert?
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +#include "trie.h"
> +
> +static const uint8_t ptr_sz_lg = (sizeof(uint64_t *) == 8 ? 6 : 5);
Hmm, pidns.c uses a slightly different (although essentially the same)
definition of ptr_sz_lg.
> +
> +/**
> + * Returns lg2 of node size for the specific level of the trie. If max_depth
> + * provided is less than zero, it is calculated via trie_get_depth call.
> + */
> +static uint8_t
> +trie_get_node_size(struct trie *t, uint8_t depth)
What do you mean by "If max_depth provided"?
> +struct trie *
> +trie_create(uint8_t key_size, uint8_t item_size_lg, uint8_t node_key_bits,
> + uint8_t data_block_key_bits, uint64_t empty_value)
> +{
> + if (item_size_lg > 6)
> + return NULL;
> + if (key_size > 64)
> + return NULL;
> + if (node_key_bits < 1)
> + return NULL;
> + if (data_block_key_bits < 1 || data_block_key_bits > key_size)
> + return NULL;
Should there be a check that node_key_bits isn't too big, for consistency?
> +static uint64_t
> +trie_iterate_keys_node(struct trie *t,
> + trie_iterate_fn fn, void *fn_data,
> + void *node, uint64_t start, uint64_t end,
> + uint8_t depth)
> +{
> + if (start > end || !node)
> + return 0;
> +
> + if (depth == t->max_depth) {
> + for (uint64_t i = start; i <= end; i++)
> + fn(fn_data, i, trie_data_block_get(t,
> + (uint64_t *) node, i));
> +
> + return end - start + 1; //TODO: overflow
Overflow?
> + /**
> + * Number of bits in key that makes a symbol for a node.
> + * (equals to log2 of the child count of the node)
> + */
> + uint8_t node_key_bits;
> +
> + /**
> + * Number of bits in key that make a symbol for the data block (leaf).
Typo?
> + * (equals to log2 of the value count stored in a data block)
> + */
> + uint8_t data_block_key_bits;
> +
> + /** The depth of the data block. Caluclated from the values above */
Typo?
--
ldv
More information about the Strace-devel
mailing list