[PATCH v4 5/6] Implement new filtering language parsing

Nikolay Marchuk marchuk.nikolay.a at gmail.com
Fri Jul 14 14:55:34 UTC 2017


* filter.h (parse_filter_action, parse_filter_expression): Add new
 declarations.
* filter_action.c (parse_filter_action): Add new parsing function.
* filter_expression.c (parse_filter_expression): Implement parsing of filter
 expression.
(add_variable_token, add_operator_token, parse_operator, unescape_argument): Add
 helper functions.
* filter_parse.c: New file.
* filter_qualify.c (parse_read, parse_write, qualify_signals, parse_trace,
 parse_abbrev, parse_verbose, parse_raw, parse_inject_common, parse_fault,
 parse_inject): Use new parsing api.
* strace.c (init): Use filtering_parse instead of parse_qualify_filter.
* Makefile.am (strace_SOURCES): Add filter_parse.c.
* defs.h (filtering_parse): Use common parsing function.
---
 Makefile.am         |   1 +
 defs.h              |   2 +-
 filter.h            |   4 +
 filter_action.c     |   9 +++
 filter_expression.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 filter_parse.c      | 147 +++++++++++++++++++++++++++++++++
 filter_qualify.c    |  90 ++++++++++-----------
 strace.c            |   6 +-
 8 files changed, 437 insertions(+), 51 deletions(-)
 create mode 100644 filter_parse.c

diff --git a/Makefile.am b/Makefile.am
index ddc1fed..b2f11ca 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -134,6 +134,7 @@ strace_SOURCES =	\
 	file_ioctl.c	\
 	filter_action.c	\
 	filter_expression.c \
+	filter_parse.c	\
 	filter_qualify.c \
 	filter.c	\
 	filter.h	\
diff --git a/defs.h b/defs.h
index b03693d..c9876cc 100644
--- a/defs.h
+++ b/defs.h
@@ -684,7 +684,7 @@ extern struct number_set signal_set;
 extern bool is_number_in_set(unsigned int number, const struct number_set *);
 extern void filtering_parsing_finish(void);
 extern void filter_syscall(struct tcb *);
-extern void parse_qualify_filter(const char *);
+extern void filtering_parse(const char *);
 
 #define DECL_IOCTL(name)						\
 extern int								\
diff --git a/filter.h b/filter.h
index 0017d6c..8b42208 100644
--- a/filter.h
+++ b/filter.h
@@ -57,6 +57,7 @@ void set_filters_qualify_mode(struct filter **, unsigned int *nfilters);
 /* filter action api */
 struct filter *create_filter(struct filter_action *, const char *name);
 struct filter_action *find_or_add_action(const char *);
+void parse_filter_action(const char *, const char *, const char *);
 void *get_filter_action_priv_data(struct filter_action *);
 void set_filter_action_priv_data(struct filter_action *, void *);
 void set_qualify_mode(struct filter_action *);
@@ -65,5 +66,8 @@ void set_qualify_mode(struct filter_action *);
 struct bool_expression *create_expression();
 bool run_expression(struct bool_expression *, unsigned int, bool *);
 void set_expression_qualify_mode(struct bool_expression *);
+void parse_filter_expression(struct bool_expression *, const char *,
+			     struct filter_action *, unsigned int);
 
+void parse_qualify_action(const char *, const char *, const char *);
 #endif
diff --git a/filter_action.c b/filter_action.c
index 4c944fc..a489436 100644
--- a/filter_action.c
+++ b/filter_action.c
@@ -185,6 +185,15 @@ find_or_add_action(const char *name)
 	return add_action(type);
 }
 
+void
+parse_filter_action(const char *action_name, const char *expr,
+		       const char *args)
+{
+	struct filter_action *action = find_or_add_action(action_name);
+	parse_filter_expression(action->expr, expr, action, action->nfilters);
+	action->_priv_data = action->type->parse_args(args);
+}
+
 static void
 run_filter_action(struct tcb *tcp, struct filter_action *action)
 {
diff --git a/filter_expression.c b/filter_expression.c
index 8a41783..afb2642 100644
--- a/filter_expression.c
+++ b/filter_expression.c
@@ -25,6 +25,10 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include "defs.h"
+#include "filter.h"
+
+extern bool is_allowed_in_name(char);
+extern bool is_space_ascii(char);
 
 struct expression_token {
 	enum token_type {
@@ -67,6 +71,34 @@ reallocate_expression(struct bool_expression *const expr,
 	expr->ntokens = new_ntokens;
 }
 
+static void
+add_variable_token(struct bool_expression *expr, unsigned int id)
+{
+	struct expression_token token;
+	token.type = TOK_VARIABLE;
+	token.data.variable_id = id;
+	reallocate_expression(expr, expr->ntokens + 1);
+	expr->tokens[expr->ntokens - 1] = token;
+}
+
+static void
+add_operator_token(struct bool_expression *expr, char c) {
+	struct expression_token token;
+	token.type = TOK_OPERATOR;
+	switch (c){
+	case '!':
+		token.data.operator_id = OP_NOT;
+		break;
+	case '&':
+		token.data.operator_id = OP_AND;
+		break;
+	case '|':
+		token.data.operator_id = OP_OR;
+	}
+	reallocate_expression(expr, expr->ntokens + 1);
+	expr->tokens[expr->ntokens - 1] = token;
+}
+
 #define STACK_SIZE 32
 static bool stack[STACK_SIZE];
 
@@ -130,3 +162,200 @@ run_expression(struct bool_expression *expr,
 		error_msg_and_die("corrupted filter expression");
 	return stack[0];
 }
+
+/* Parse operator and move str to the end of operator.
+ * Return \0 if no operator found.
+ */
+static char
+parse_operator(char **str)
+{
+	char *p = *str;
+	switch (*p) {
+	case '!':
+		return '!';
+	case '&':
+		if (p[1] == '&') {
+			*str = p + 1;
+			return '&';
+		}
+		break;
+	case '|':
+		if (p[1] == '|') {
+			*str = p + 1;
+			return '|';
+		}
+		break;
+	case 'n':
+		if (p[1] == 'o' && p[2] == 't' && !is_allowed_in_name(p[3])) {
+			*str = p + 2;
+			return '!';
+		}
+		break;
+	case 'a':
+		if (p[1] == 'n' && p[2] == 'd' && !is_allowed_in_name(p[3])) {
+			*str = p + 2;
+			return '&';
+		}
+		break;
+	case 'o':
+		if (p[1] == 'r' && !is_allowed_in_name(p[2])) {
+			*str = p + 1;
+			return '|';
+		}
+	}
+	return '\0';
+}
+
+static char *
+unescape_argument(char **str)
+{
+	char *p;
+	char *p_new;
+	bool escaped = false;
+	unsigned int size = 1;
+	char *new_str = xcalloc(strlen(*str) + 1, sizeof(char));
+	for (p = *str, p_new = new_str; *p; ++p) {
+		if (!escaped) {
+			if (*p == '\\') {
+				escaped = true;
+				continue;
+			} else if (is_space_ascii(*p) || *p == ')' || *p == '|'
+			           || *p == '&') {
+				break;
+			}
+		}
+		escaped = false;
+		*(p_new++) = *p;
+		size++;
+	}
+	*str = p - 1;
+	xreallocarray(new_str, size, sizeof(char));
+	return new_str;
+}
+
+void
+parse_filter_expression(struct bool_expression *expr, const char *str,
+		        struct filter_action *action, unsigned int start_id)
+{
+	enum {
+		WAIT_FILTER,
+		FILTER_NAME,
+		FILTER_ARG,
+		WAIT_OPERATOR
+	} state = WAIT_FILTER;
+	unsigned int variable_id = start_id;
+	/* Current stack index */
+	unsigned int index = 0;
+	char op_stack[STACK_SIZE];
+	char *buf = xstrdup(str);
+	struct filter *cur_filter = NULL;
+	char *filter_name = NULL;
+	char *filter_arg = NULL;
+	char *p;
+	for (p = buf; *p; ++p) {
+		switch (state) {
+		case WAIT_FILTER:
+			/* Ignore leading spaces */
+			if (is_space_ascii(*p))
+				continue;
+			switch (*p) {
+			case '(':
+				if (index == STACK_SIZE)
+					error_msg_and_die("stack overflow");
+				op_stack[index++] = '(';
+				continue;
+			/* Try to parse not operator. */
+			case '!':
+			case 'n':
+			{
+				char op = parse_operator(&p);
+				if (op == '!') {
+					if (index == STACK_SIZE)
+						error_msg_and_die("stack"
+								  "overflow");
+					op_stack[index++] = '!';
+					continue;
+				} else if (op)
+					error_msg_and_die("invalid filter "
+							"expression '%s'", str);
+			}
+			default:
+				if (is_allowed_in_name(*p)) {
+					filter_name = p;
+					state = FILTER_NAME;
+					continue;
+				}
+			}
+		case FILTER_NAME:
+			if (is_allowed_in_name(*p)) {
+				continue;
+			} else if (is_space_ascii(*p)) {
+				*p = '\0';
+				cur_filter = create_filter(action, filter_name);
+				filter_arg = NULL;
+				state = FILTER_ARG;
+				continue;
+			} else {
+				error_msg_and_die("invalid filter "
+						  "expression '%s'", str);
+			}
+		case FILTER_ARG:
+			if (!filter_arg && is_space_ascii(*p))
+				continue;
+			filter_arg = unescape_argument(&p);
+			error_msg("%s", filter_arg);
+			parse_filter(cur_filter, filter_arg);
+			free(filter_arg);
+			add_variable_token(expr, variable_id++);
+			state = WAIT_OPERATOR;
+			continue;
+		case WAIT_OPERATOR:
+			if (is_space_ascii(*p))
+				continue;
+			if (*p == ')') {
+				while ((index > 0)
+				       && (op_stack[index - 1] != '('))
+					add_operator_token(expr,
+							   op_stack[--index]);
+				if (index == 0)
+					error_msg_and_die("invalid filter "
+							"expression '%s'", str);
+				--index;
+				continue;
+			}
+			{
+				char op = parse_operator(&p);
+				if (!op || op == '!')
+					error_msg_and_die("invalid filter "
+							"expression '%s'", str);
+				/* Pop operators with higher priority. */
+				while ((index > 0)
+				       && (op_stack[index - 1] == '!'))
+					add_operator_token(expr,
+							   op_stack[--index]);
+				if (op == '|')
+					while ((index > 0)
+					       && (op_stack[index - 1] == '&'))
+						add_operator_token(expr,
+							op_stack[--index]);
+				op_stack[index++] = op;
+				state = WAIT_FILTER;
+				continue;
+			}
+		}
+	}
+	free(buf);
+	while (index > 0)
+		add_operator_token(expr, op_stack[--index]);
+	if (start_id > 0)
+		add_operator_token(expr, '&');
+	struct expression_token *tok = expr->tokens;
+	struct expression_token *const tok_end = expr->tokens + expr->ntokens;
+	for (; tok != tok_end; ++tok) {
+		if (tok->type == TOK_VARIABLE) {
+			error_msg("variable: %u", tok->data.variable_id);
+		} else {
+			error_msg("operator: %u", tok->data.operator_id);
+		}
+	}
+}
diff --git a/filter_parse.c b/filter_parse.c
new file mode 100644
index 0000000..93a37d8
--- /dev/null
+++ b/filter_parse.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2017 Nikolay Marchuk <marchuk.nikolay.a at gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "defs.h"
+#include "filter.h"
+
+bool
+is_allowed_in_name(char c)
+{
+	return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+	       || (c >= '0' && c <= '9') || (c == '_');
+}
+
+bool
+is_space_ascii(char c)
+{
+	return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') ||
+	       (c == '\v') || (c == '\f');
+}
+
+void
+filtering_parse(const char *str)
+{
+	enum parsing_states {
+		F_BEGIN,
+		F_QUAL_SET,
+		F_FILT_EXPR,
+		F_QUAL_ARGS,
+		F_FILT_ARGS
+	} state = F_BEGIN;
+	const char *action_name = NULL;
+	const char *main_part = NULL;
+	const char *args = NULL;
+	unsigned int str_len = strlen(str);
+	char *buf = xstrdup(str);
+	char *p;
+	for (p = buf; *p; ++p) {
+		switch (state) {
+		case F_BEGIN:
+			switch (*p) {
+			/* action(...) */
+			case '(':
+				if (p != buf) {
+					if (buf[str_len - 1] != ')')
+						error_msg_and_die("missing ')'"
+							" in '%s'", str);
+					*p = '\0';
+					buf[str_len - 1] = '\0';
+					action_name = buf;
+					main_part = p + 1;
+					state = F_FILT_EXPR;
+				} else {
+					action_name = "trace";
+					main_part = buf;
+					state = F_FILT_EXPR;
+				}
+				continue;
+			/* action=... */
+			case '=':
+				if (p == buf) {
+					error_msg_and_die("invalid expression "
+							  "'%s'", str);
+				}
+				*p = '\0';
+				action_name = buf;
+				main_part = p + 1;
+				state = F_QUAL_SET;
+				continue;
+			/* qualify set without action. */
+			case ',':
+			case '?':
+			case '!':
+			case '/':
+			case '%':
+			case '-':
+				action_name = "trace";
+				main_part = buf;
+				state = F_QUAL_SET;
+				continue;
+			default:
+				/* new expression without action. */
+				if (is_space_ascii(*p)) {
+					action_name = "trace";
+					main_part = buf;
+					state = F_FILT_EXPR;
+					continue;
+				}
+				if (!is_allowed_in_name(*p))
+					error_msg_and_die("invalid expression "
+							  "'%s'", str);
+			}
+		case F_QUAL_SET:
+			if (*p == ':') {
+				*p = '\0';
+				args = p + 1;
+				state = F_QUAL_ARGS;
+				continue;
+			}
+		case F_FILT_EXPR:
+			if (*p == ';') {
+				*p = '\0';
+				args = p + 1;
+				state = F_FILT_ARGS;
+				continue;
+			}
+		case F_QUAL_ARGS:
+		case F_FILT_ARGS:
+			continue;
+		}
+	}
+	switch (state) {
+	case F_BEGIN:
+		action_name = "trace";
+		main_part = buf;
+	case F_QUAL_SET:
+	case F_QUAL_ARGS:
+		parse_qualify_action(action_name, main_part, args);
+		break;
+	case F_FILT_EXPR:
+	case F_FILT_ARGS:
+		parse_filter_action(action_name, main_part, args);
+		break;
+	}
+}
diff --git a/filter_qualify.c b/filter_qualify.c
index 7d65963..c3a476d 100644
--- a/filter_qualify.c
+++ b/filter_qualify.c
@@ -178,86 +178,87 @@ parse_inject_common_args(char *str, struct inject_opts *const opts,
 }
 
 static void
-parse_read(const char *const str)
+parse_read(const char *const main_part, const char *const args)
 {
 	struct filter_action *action = find_or_add_action("read");
 	struct filter *filter = create_filter(action, "fd");
-	parse_filter(filter, str);
+	parse_filter(filter, main_part);
 	set_qualify_mode(action);
 }
 
 static void
-parse_write(const char *const str)
+parse_write(const char *const main_part, const char *const args)
 {
 	struct filter_action *action = find_or_add_action("write");
 	struct filter *filter = create_filter(action, "fd");
-	parse_filter(filter, str);
+	parse_filter(filter, main_part);
 	set_qualify_mode(action);
 }
 
 static void
-qualify_signals(const char *const str)
+qualify_signals(const char *const main_part, const char *const args)
 {
-	parse_set(str, &signal_set, sigstr_to_uint, "signal");
+	parse_set(main_part, &signal_set, sigstr_to_uint, "signal");
 }
 
 static void
-parse_trace(const char *const str)
+parse_trace(const char *const main_part, const char *const args)
 {
+	if (args && *args)
+		error_msg_and_die("invalid filter action arguments '%s'", args);
 	struct filter_action *action = find_or_add_action("trace");
 	struct filter *filter = create_filter(action, "syscall");
-	parse_filter(filter, str);
+	parse_filter(filter, main_part);
 	set_qualify_mode(action);
 }
 
 static void
-parse_abbrev(const char *const str)
+parse_abbrev(const char *const main_part, const char *const args)
 {
+	if (args && *args)
+		error_msg_and_die("invalid filter action arguments '%s'", args);
 	struct filter_action *action = find_or_add_action("abbrev");
 	struct filter *filter = create_filter(action, "syscall");
-	parse_filter(filter, str);
+	parse_filter(filter, main_part);
 	set_qualify_mode(action);
 }
 
 static void
-parse_verbose(const char *const str)
+parse_verbose(const char *const main_part, const char *const args)
 {
+	if (args && *args)
+		error_msg_and_die("invalid filter action arguments '%s'", args);
 	struct filter_action *action = find_or_add_action("verbose");
 	struct filter *filter = create_filter(action, "syscall");
-	parse_filter(filter, str);
+	parse_filter(filter, main_part);
 	set_qualify_mode(action);
 }
 
 static void
-parse_raw(const char *const str)
+parse_raw(const char *const main_part, const char *const args)
 {
+	if (args && *args)
+		error_msg_and_die("invalid filter action arguments '%s'", args);
 	struct filter_action *action = find_or_add_action("raw");
 	struct filter *filter = create_filter(action, "syscall");
-	parse_filter(filter, str);
+	parse_filter(filter, main_part);
 	set_qualify_mode(action);
 }
 
 static void
-parse_inject_common(const char *const str,
-		      const bool fault_tokens_only,
-		      const char *const description)
+parse_inject_common(const char *const main_part, const char *const args,
+		    const bool fault_tokens_only,
+		    const char *const description)
 {
 	struct inject_opts *opts = xmalloc(sizeof(struct inject_opts));
-	char *buf = xstrdup(str);
-	char *name = buf;
-	char *args = strchr(buf, ':');
-	if (!args) {
-		args = NULL;
-	} else {
-		*(args++) = '\0';
-	}
+	char *buf = args ? xstrdup(args) : NULL;
 
 	struct filter_action *action = find_or_add_action("inject");
 	struct filter *filter = create_filter(action, "syscall");
-	parse_filter(filter, name);
+	parse_filter(filter, main_part);
 	set_qualify_mode(action);
 
-	parse_inject_common_args(args, opts, ":", fault_tokens_only);
+	parse_inject_common_args(buf, opts, ":", fault_tokens_only);
 	if (!opts->init) {
 		error_msg_and_die("invalid %s '%s'", description, args);
 	}
@@ -266,20 +267,20 @@ parse_inject_common(const char *const str,
 }
 
 static void
-parse_fault(const char *const str)
+parse_fault(const char *const main_part, const char *const args)
 {
-	parse_inject_common(str, true, "fault argument");
+	parse_inject_common(main_part, args, true, "fault argument");
 }
 
 static void
-parse_inject(const char *const str)
+parse_inject(const char *const main_part, const char *const args)
 {
-	parse_inject_common(str, false, "inject argument");
+	parse_inject_common(main_part, args, false, "inject argument");
 }
 
 static const struct qual_options {
 	const char *name;
-	void (*qualify)(const char *);
+	void (*qualify)(const char *, const char *);
 } qual_options[] = {
 	{ "trace",	parse_trace	},
 	{ "t",		parse_trace	},
@@ -302,23 +303,18 @@ static const struct qual_options {
 	{ "inject",	parse_inject	},
 };
 
-void
-parse_qualify_filter(const char *str)
+void parse_qualify_action(const char *action_name, const char *main_part,
+			  const char *args)
 {
-	const struct qual_options *opt = qual_options;
+	const struct qual_options *opt = NULL;
 	unsigned int i;
-
 	for (i = 0; i < ARRAY_SIZE(qual_options); ++i) {
-		const char *name = qual_options[i].name;
-		const size_t len = strlen(name);
-		const char *val = str_strip_prefix_len(str, name, len);
-
-		if (val == str || *val != '=')
-			continue;
-		str = val + 1;
-		opt = &qual_options[i];
-		break;
+		if (!strcmp(action_name, qual_options[i].name)) {
+			opt = &qual_options[i];
+			break;
+		}
 	}
-
-	opt->qualify(str);
+	if (!opt)
+		error_msg_and_die("invalid filter action '%s'", action_name);
+	opt->qualify(main_part, args);
 }
diff --git a/strace.c b/strace.c
index b8c027a..6f8022a 100644
--- a/strace.c
+++ b/strace.c
@@ -1640,7 +1640,7 @@ init(int argc, char *argv[])
 #if DEFAULT_QUAL_FLAGS != (QUAL_TRACE | QUAL_ABBREV | QUAL_VERBOSE)
 # error Bug in DEFAULT_QUAL_FLAGS
 #endif
-	parse_qualify_filter("signal=all");
+	filtering_parse("signal=all");
 	while ((c = getopt(argc, argv,
 		"+b:cCdfFhiqrtTvVwxyz"
 #ifdef USE_LIBUNWIND
@@ -1707,7 +1707,7 @@ init(int argc, char *argv[])
 			show_fd_path++;
 			break;
 		case 'v':
-			parse_qualify_filter("abbrev=none");
+			filtering_parse("abbrev=none");
 			break;
 		case 'V':
 			print_version();
@@ -1722,7 +1722,7 @@ init(int argc, char *argv[])
 				error_opt_arg(c, optarg);
 			break;
 		case 'e':
-			parse_qualify_filter(optarg);
+			filtering_parse(optarg);
 			break;
 		case 'o':
 			outfname = xstrdup(optarg);
-- 
2.1.4





More information about the Strace-devel mailing list