[PATCH 1/3] maint: add a definition-based syscall decoder generator
Srikavin Ramkumar
srikavinramkumar at gmail.com
Sat Aug 21 13:51:56 UTC 2021
Implement a code generation tool capable of parsing system call definitions
and generating system call decoders.
* maint/gen/.gitignore: New file.
* maint/gen/Makefile: Likewise.
* maint/gen/README.md: Likewise.
* maint/gen/ast.c: Likewise.
* maint/gen/ast.h: Likewise.
* maint/gen/codegen.c: Likewise.
* maint/gen/deflang.h: Likewise.
* maint/gen/defs/common.syzlang: Likewise.
* maint/gen/grammar.txt: Likewise.
* maint/gen/lex.l: Likewise.
* maint/gen/parse.y: Likewise.
* maint/gen/preprocess.c: Likewise.
* maint/gen/preprocess.h: Likewise.
* maint/gen/symbols.c: Likewise.
* maint/gen/symbols.h: Likewise.
---
maint/gen/.gitignore | 6 +
maint/gen/Makefile | 16 +
maint/gen/README.md | 115 ++++
maint/gen/ast.c | 340 ++++++++++++
maint/gen/ast.h | 226 ++++++++
maint/gen/codegen.c | 958 ++++++++++++++++++++++++++++++++++
maint/gen/deflang.h | 29 +
maint/gen/defs/common.syzlang | 49 ++
maint/gen/grammar.txt | 41 ++
maint/gen/lex.l | 262 ++++++++++
maint/gen/parse.y | 383 ++++++++++++++
maint/gen/preprocess.c | 284 ++++++++++
maint/gen/preprocess.h | 110 ++++
maint/gen/symbols.c | 150 ++++++
maint/gen/symbols.h | 39 ++
15 files changed, 3008 insertions(+)
create mode 100644 maint/gen/.gitignore
create mode 100644 maint/gen/Makefile
create mode 100644 maint/gen/README.md
create mode 100644 maint/gen/ast.c
create mode 100644 maint/gen/ast.h
create mode 100644 maint/gen/codegen.c
create mode 100644 maint/gen/deflang.h
create mode 100644 maint/gen/defs/common.syzlang
create mode 100644 maint/gen/grammar.txt
create mode 100644 maint/gen/lex.l
create mode 100644 maint/gen/parse.y
create mode 100644 maint/gen/preprocess.c
create mode 100644 maint/gen/preprocess.h
create mode 100644 maint/gen/symbols.c
create mode 100644 maint/gen/symbols.h
diff --git a/maint/gen/.gitignore b/maint/gen/.gitignore
new file mode 100644
index 000000000..d690f4c72
--- /dev/null
+++ b/maint/gen/.gitignore
@@ -0,0 +1,6 @@
+lex.yy.c
+parse.tab.c
+parse.tab.h
+parse
+parse.output
+/gen
diff --git a/maint/gen/Makefile b/maint/gen/Makefile
new file mode 100644
index 000000000..85d35d20d
--- /dev/null
+++ b/maint/gen/Makefile
@@ -0,0 +1,16 @@
+CFLAGS += -ggdb -std=gnu99 -Wall -Wextra
+
+all: gen
+
+gen: parse.tab.o lex.yy.o ast.o codegen.o symbols.o parse.tab.h lex.yy.c preprocess.o
+ $(CC) $(CFLAGS) parse.tab.o lex.yy.o ast.o codegen.o symbols.o preprocess.o -o ./gen
+
+lex.yy.c: lex.l parse.tab.h
+ flex lex.l
+
+parse.tab.c parse.tab.h: parse.y
+ bison -d parse.y
+
+clean:
+ rm -f lex.yy.o ast.o parse.tab.o codegen.o preprocess.o symbols.o
+ rm -f gen parse.tab.c parse.tab.h lex.yy.c lex.yy.h
diff --git a/maint/gen/README.md b/maint/gen/README.md
new file mode 100644
index 000000000..5f33db864
--- /dev/null
+++ b/maint/gen/README.md
@@ -0,0 +1,115 @@
+Syscall Definitions
+====
+
+This syscall definition language is based on the [syzkaller description language](https://github.com/google/syzkaller/blob/master/docs/syscall_descriptions.md).
+
+All non-syscall statements maintain their relative ordering and are placed
+before syscall statements in the generated C code.
+
+## Syntax
+
+### Types
+
+Types have the following format `type_name[type_option]`.
+The `type_name` can include alphanumeric characters and `$_`.
+The `type_option` can be another type or a number.
+
+Numbers can be specified as a decimal number (`65`), as a hex number (`0x41`), or as a character constant (`'A'`).
+
+The default types are the following:
+ * standard C types: `void`, `int`, `char`, `long`, `uint`, `ulong`, `longlong`, `ulonglong`, `double`, `float`
+ * `stddef.h` types: `size_t`, `ssize_t`, ...
+ * `stdint.h` types: `uint8_t`, `int8_t`, `uint64_t`, `int64_t`, ...
+ * kernel types: `kernel_long_t`, `kernel_ulong_t`, ...
+ * `fd`: A file descriptor
+ * `tid`: A thread id
+ * `string`: A null terminated char buffer
+ * `path` A null terminated path string
+ * `stringnoz[n]`: A non-null terminated char buffer of length `n`
+ * `const[x]`: A constant of value `x` that inherits its parent type
+ * `const[x:y]`: A constant with a value between `x` and `y` (inclusive) that inherits its parent type
+ * `ptr[dir, typ]`: A pointer to object of type `typ`; direction can be `in`, `out`, `inout`
+ * `ref[argname]`: A reference to the value of another parameter with name `argname` or `@ret`
+ * `xor_flags[xlat_name, ???, underlying_typ]`: A integer type (`underlying_typ`)
+ containing mutually exclusive flags with xlat symbol name `xlat_name`
+ * `or_flags[xlat_name, ???, underlying_typ]`: A integer type (`underlying_typ`)
+ containing flags that are ORed together with xlat symbol name `xlat_name`
+
+Constants (`const`) can only be used within variant syscalls.
+
+### Syscalls
+Syscall definitions have the format
+```
+syscall_name (arg_type1 arg_name1, arg_type2 arg_name2, ...) return_type
+```
+
+The `return_type` is optional if no special printing mode is needed.
+
+Some system calls have various modes of operations. Consider the `fcntl` syscall.
+Its second parameter determines the types of the remaining arguments. To
+handle this, a variant syscall definition can be used:
+```
+fcntl(filedes fd, cmd xor_flags[fcntl_cmds, F_???, kernel_ulong_t], arg kernel_ulong_t) kernel_ulong_t
+fcntl$F_DUPFD(filedes fd, cmd const[F_DUPFD], arg kernel_ulong_t) fd
+fcntl$F_DUPFD_CLOEXEC(filedes fd, cmd const[F_DUPFD_CLOEXEC], arg kernel_ulong_t) fd
+...
+```
+
+The `$` character is used to indicate that a syscall is a variant of another one.
+The `const` parameters of a variant syscall will be used to determine which
+variant to use. If no variant syscalls match, the base syscall will be used.
+
+### Custom Decoders
+
+Custom decoders have the format
+```
+:type[argname, arg2[$3], $1] %{
+ do_something(tcp, $$, $1);
+%}
+```
+
+The type following the `:` indicates which type this decoder should apply to.
+Template variables (`$` followed by 1 or more numbers) can be used to reference
+the value of a type option. These variables can be used within the body of the
+custom decoder and will be substituted with the resolved value.
+
+The special `$$` variable refers to the root argument.
+
+For example, the syscall `example(arg1 type[test, type2[5], 1]` would have the
+following decoder for the arg1 parameter:
+```
+do_something(tcp, tcp->u_arg[1], 1);
+```
+
+### #import
+
+Import statements have the format
+```
+#import "filename.def"
+```
+
+The contents of the `filename.def` will be treated as if they were placed in the current file.
+
+### #ifdef/#ifndef
+
+Ifdef, ifndef statements have the format
+```
+#ifdef condition
+#ifndef condition
+#endif
+#endif
+```
+
+Ifdef, ifndef, and define statements will be included as-is in the generated output.
+Unlike C, these cannot be placed in the middle of another statement.
+
+### define/include
+
+Include and define statements have the format
+```
+define DEBUG 1
+include "filename.h"
+include <filename.h>
+```
+
+The contents of include and define statements will be included as-is in the generated output.
diff --git a/maint/gen/ast.c b/maint/gen/ast.c
new file mode 100644
index 000000000..ebab08538
--- /dev/null
+++ b/maint/gen/ast.c
@@ -0,0 +1,340 @@
+#include "ast.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "deflang.h"
+#include "symbols.h"
+#include "parse.tab.h"
+
+void *
+xmalloc(size_t n)
+{
+ void *ret = malloc(n);
+
+ if (!ret) {
+ fprintf(stderr, "allocation failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return ret;
+}
+
+void *
+xcalloc(size_t n)
+{
+ void *ret = calloc(1, n);
+
+ if (!ret) {
+ fprintf(stderr, "allocation failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return ret;
+}
+
+struct ast_node *
+create_ast_node(enum ast_node_type type, void *loc)
+{
+ struct ast_node *node = xmalloc(sizeof *node);
+ *node = (struct ast_node) {
+ .type = type,
+ .loc = {
+ .lineno = ((YYLTYPE *) loc)->first_line,
+ .colno = ((YYLTYPE *) loc)->first_column,
+ .file = strdup(cur_filename)
+ },
+ .next = NULL
+ };
+ return node;
+}
+
+struct ast_type_option_list *
+create_ast_type_option_list(struct ast_type_option *cur, struct ast_type_option_list *next)
+{
+ struct ast_type_option_list *list = xmalloc(sizeof *list);
+ *list = (struct ast_type_option_list) {
+ .next = next,
+ .option = cur
+ };
+ return list;
+}
+
+struct ast_syscall_arg *
+create_ast_syscall_arg(char *name, struct ast_type *type, struct ast_syscall_arg *next)
+{
+ struct ast_syscall_arg *arg = xmalloc(sizeof *arg);
+ *arg = (struct ast_syscall_arg) {
+ .name = name,
+ .type = type,
+ .next = next
+ };
+ return arg;
+}
+
+struct ast_flag_values *
+create_ast_flag_values(char *name, struct ast_flag_values *next)
+{
+ struct ast_flag_values *arg = xmalloc(sizeof *arg);
+ *arg = (struct ast_flag_values) {
+ .name = name,
+ .next = next
+ };
+ return arg;
+}
+
+struct ast_struct_element *
+create_ast_struct_element(char *name, struct ast_type *type, struct ast_struct_element *next)
+{
+ struct ast_struct_element *struct_element = xmalloc(sizeof *struct_element);
+ *struct_element = (struct ast_struct_element) {
+ .name = name,
+ .type = type,
+ .next = next
+ };
+ return struct_element;
+}
+
+struct known_type {
+ struct ast_type type;
+ struct known_type *next;
+};
+
+static struct known_type *known_types = NULL;
+
+struct known_type_option {
+ struct ast_type_option type_option;
+ struct known_type_option *next;
+};
+
+static struct known_type_option *known_type_options = NULL;
+
+static bool
+compare_type_option_list(struct ast_type_option_list *a, struct ast_type_option_list *b,
+ bool match_templates)
+{
+ struct ast_type_option_list *cur_a = a;
+ struct ast_type_option_list *cur_b = b;
+
+ while (cur_a != NULL) {
+ if (cur_b == NULL) {
+ return false;
+ }
+
+ if (cur_a->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID ||
+ cur_b->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID) {
+ if (match_templates) {
+ // templates are able to match all other type options
+ cur_a = cur_a->next;
+ cur_b = cur_b->next;
+ continue;
+ }
+ return false;
+ }
+
+ if (cur_a->option->child_type != cur_b->option->child_type) {
+ return false;
+ }
+
+ if (cur_a->option->child_type == AST_TYPE_CHILD_NUMBER &&
+ (cur_a->option->number.val != cur_b->option->number.val)) {
+ return false;
+ }
+
+ if (cur_a->option->child_type == AST_TYPE_CHILD_TYPE) {
+ if (!(strcmp(cur_a->option->type->name, cur_b->option->type->name) == 0 &&
+ compare_type_option_list(cur_a->option->type->options,
+ cur_b->option->type->options, match_templates))) {
+ return false;
+ }
+ }
+
+ cur_a = cur_a->next;
+ cur_b = cur_b->next;
+ }
+
+ if (cur_b != NULL) {
+ return false;
+ }
+
+ return true;
+}
+
+bool
+ast_type_matching(struct ast_type *a, struct ast_type *b)
+{
+ return strcmp(a->name, b->name) == 0 && compare_type_option_list(a->options, b->options, true);
+}
+
+struct ast_type *
+create_or_get_type(char **error, char *name, struct ast_type_option_list *options)
+{
+ // check if we've seen this type before
+ for (struct known_type *cur = known_types; cur != NULL; cur = cur->next) {
+ if (strcmp(cur->type.name, name) == 0 &&
+ compare_type_option_list(cur->type.options, options, false)) {
+ return &cur->type;
+ }
+ }
+
+ // allocate a new type
+ struct known_type *type = xmalloc(sizeof *type);
+
+ char *status = resolve_type(&type->type, name, options);
+ type->next = known_types;
+
+ if (error) {
+ *error = status;
+ }
+
+ if (status != NULL) {
+ free(type);
+ return NULL;
+ }
+
+ known_types = type;
+
+ return &type->type;
+}
+
+struct ast_type_option *
+create_or_get_type_option_number(struct ast_number number)
+{
+ // check if we've seen this type option before
+ for (struct known_type_option *cur = known_type_options; cur != NULL; cur = cur->next) {
+ if (cur->type_option.child_type == AST_TYPE_CHILD_NUMBER &&
+ cur->type_option.number.val == number.val) {
+ return &cur->type_option;
+ }
+ }
+
+ // allocate a new type option
+ struct known_type_option *option = xmalloc(sizeof *option);
+ *option = (struct known_type_option) {
+ .type_option = {
+ .child_type = AST_TYPE_CHILD_NUMBER,
+ .number = number
+ },
+ .next = known_type_options
+ };
+
+ known_type_options = option;
+
+ return &option->type_option;
+}
+
+struct ast_type_option *
+create_type_template_identifier(struct ast_number number)
+{
+ struct ast_type_option *option = xmalloc(sizeof *option);
+ *option = (struct ast_type_option) {
+ .child_type = AST_TYPE_CHILD_TEMPLATE_ID,
+ .template = {
+ .id = number.val
+ }
+ };
+
+ return option;
+}
+
+struct ast_type_option *
+create_or_get_type_option_nested(struct ast_type *child)
+{
+ // check if we've seen this type option before
+ for (struct known_type_option *cur = known_type_options; cur != NULL; cur = cur->next) {
+ // since all types are allocated by create_or_get_type,
+ // types that are equal have the same address
+ if (cur->type_option.child_type == AST_TYPE_CHILD_TYPE && cur->type_option.type == child) {
+ return &cur->type_option;
+ }
+ }
+
+ // allocate a new type option
+ struct known_type_option *option = xmalloc(sizeof *option);
+ *option = (struct known_type_option) {
+ .type_option = {
+ .child_type = AST_TYPE_CHILD_TYPE,
+ .type = child
+ },
+ .next = known_type_options
+ };
+
+ known_type_options = option;
+
+ return &option->type_option;
+}
+
+struct ast_type_option *
+create_type_option_range(struct ast_type_option *min, struct ast_type_option *max)
+{
+ struct ast_type_option *ret = xmalloc(sizeof *ret);
+ *ret = (struct ast_type_option) {
+ .child_type = AST_TYPE_CHILD_RANGE,
+ .range = {
+ .min = min,
+ .max = max
+ }
+ };
+ return ret;
+}
+
+
+void
+free_ast_tree(struct ast_node *root)
+{
+ switch (root->type) {
+ case AST_IFDEF:
+ free(root->ifdef.value);
+ break;
+ case AST_DEFINE:
+ free(root->define.value);
+ break;
+ case AST_INCLUDE:
+ free(root->include.value);
+ break;
+ case AST_STRUCT: {
+ struct ast_struct_element *cur = root->ast_struct.elements;
+ while (cur != NULL) {
+ struct ast_struct_element *tmp = cur;
+ cur = tmp->next;
+ free(tmp->name);
+ free(tmp);
+ }
+ break;
+ }
+ case AST_COMPOUND: {
+ struct ast_node *cur = root->compound.children;
+ while (cur != NULL) {
+ struct ast_node *tmp = cur;
+ cur = tmp->next;
+ free_ast_tree(tmp);
+ }
+ break;
+ }
+ case AST_SYSCALL: {
+ struct ast_syscall_arg *cur = root->syscall.args;
+ while (cur != NULL) {
+ struct ast_syscall_arg *tmp = cur;
+ cur = tmp->next;
+ free(tmp->name);
+ free(tmp);
+ }
+ break;
+ }
+ case AST_FLAGS: {
+ struct ast_flag_values *cur = root->flags.values;
+ while (cur != NULL) {
+ struct ast_flag_values *tmp = cur;
+ cur = tmp->next;
+ free(tmp->name);
+ free(tmp);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ free(root);
+}
diff --git a/maint/gen/ast.h b/maint/gen/ast.h
new file mode 100644
index 000000000..2a5ee9805
--- /dev/null
+++ b/maint/gen/ast.h
@@ -0,0 +1,226 @@
+#ifndef AST_H
+#define AST_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct ast_number {
+ char *raw;
+ intmax_t val;
+};
+
+enum ast_node_type {
+ AST_IFDEF,
+ AST_SYSCALL,
+ AST_DEFINE,
+ AST_INCLUDE,
+ AST_COMPOUND,
+ AST_STRUCT,
+ AST_DECODER,
+ AST_FLAGS
+};
+
+struct ast_struct {
+ char *name;
+ struct ast_struct_element *elements;
+};
+
+struct ast_struct_element {
+ char *name;
+ struct ast_type *type;
+ struct ast_struct_element *next;
+};
+
+struct ast_syscall {
+ char *name;
+ struct ast_syscall_arg *args;
+ struct ast_type *return_type;
+};
+
+struct ast_syscall_arg {
+ char *name;
+ struct ast_type *type;
+ struct ast_syscall_arg *next;
+};
+
+enum standard_types {
+ // non-special type
+ TYPE_BASIC,
+ // const[typ, val]
+ TYPE_CONST,
+ // ptr[dir, typ]
+ TYPE_PTR,
+ // ref[argname]
+ TYPE_REF,
+ // xorflags[flag_typ]
+ TYPE_XORFLAGS,
+ // orflags[flag_typ]
+ TYPE_ORFLAGS
+};
+
+#define IS_IN_PTR(x) ((x)->type == TYPE_PTR && \
+((x)->ptr.dir == PTR_DIR_INOUT || (x)->ptr.dir == PTR_DIR_IN))
+
+#define IS_OUT_PTR(x) ((x)->type == TYPE_PTR && \
+((x)->ptr.dir == PTR_DIR_INOUT || (x)->ptr.dir == PTR_DIR_OUT))
+
+#define IS_INOUT_PTR(x) ((x)->type == TYPE_PTR && (x)->ptr.dir == PTR_DIR_INOUT)
+
+enum ptr_dir {
+ PTR_DIR_IN,
+ PTR_DIR_OUT,
+ PTR_DIR_INOUT,
+};
+
+struct ast_type {
+ enum standard_types type;
+ char *name;
+ struct ast_type_option_list *options;
+ union {
+ struct {
+ struct ast_type_option *len;
+ } stringnoz;
+ struct {
+ struct ast_type_option *value;
+ struct ast_type *real_type;
+ } constt;
+ struct {
+ enum ptr_dir dir;
+ struct ast_type *type;
+ } ptr;
+ struct {
+ struct ast_type_option *type;
+ struct ast_type_option *len;
+ } array;
+ struct {
+ bool return_value;
+ // only set if return_value is false
+ char *argname;
+ } ref;
+ struct {
+ struct ast_type_option *flag_type;
+ char *dflt;
+ struct ast_type *underlying;
+ } xorflags;
+ struct {
+ struct ast_type_option *flag_type;
+ char *dflt;
+ struct ast_type *underlying;
+ } orflags;
+ };
+};
+
+struct ast_type_option_list {
+ struct ast_type_option *option;
+ struct ast_type_option_list *next;
+};
+
+enum ast_type_option_child {
+ AST_TYPE_CHILD_RANGE,
+ AST_TYPE_CHILD_NUMBER,
+ AST_TYPE_CHILD_TYPE,
+ AST_TYPE_CHILD_TEMPLATE_ID
+};
+
+struct ast_type_option {
+ enum ast_type_option_child child_type;
+ union {
+ struct ast_type *type;
+ struct ast_number number;
+ struct {
+ struct ast_type_option *min;
+ struct ast_type_option *max;
+ } range;
+ struct {
+ intmax_t id;
+ } template;
+ };
+};
+
+struct ast_flag_values {
+ char *name;
+ struct ast_flag_values *next;
+};
+
+struct ast_loc {
+ char *file;
+ int lineno;
+ int colno;
+};
+
+struct ast_node {
+ enum ast_node_type type;
+ struct ast_loc loc;
+
+ // used when this node's parent is AST_COMPOUND
+ struct ast_node *next;
+
+ union {
+ struct ast_syscall syscall;
+ struct ast_struct ast_struct;
+ struct {
+ char *value;
+ bool invert;
+ struct ast_node *child;
+ } ifdef;
+ struct {
+ char *value;
+ } include;
+ struct {
+ char *value;
+ } define;
+ struct {
+ struct ast_node *children;
+ } compound;
+ struct {
+ char *name;
+ struct ast_flag_values *values;
+ } flags;
+ struct {
+ struct ast_type *type;
+ char *decoder;
+ } decoder;
+ };
+};
+
+struct ast_node *
+create_ast_node(enum ast_node_type type, void *location);
+
+struct ast_type_option_list *
+create_ast_type_option_list(struct ast_type_option *cur, struct ast_type_option_list *next);
+
+struct ast_struct_element *
+create_ast_struct_element(char *name, struct ast_type *type, struct ast_struct_element *next);
+
+struct ast_syscall_arg *
+create_ast_syscall_arg(char *name, struct ast_type *type, struct ast_syscall_arg *next);
+
+struct ast_flag_values *
+create_ast_flag_values(char *name, struct ast_flag_values *next);
+
+// returns true if two types are equal; false otherwise
+bool
+ast_type_matching(struct ast_type *a, struct ast_type *b);
+
+/*
+ * On error, returns NULL and sets an error string to error.
+ */
+struct ast_type *
+create_or_get_type(char **error, char *name, struct ast_type_option_list *options);
+
+struct ast_type_option *
+create_or_get_type_option_number(struct ast_number number);
+
+struct ast_type_option *
+create_or_get_type_option_nested(struct ast_type *child);
+
+struct ast_type_option *
+create_type_option_range(struct ast_type_option *min, struct ast_type_option *max);
+
+struct ast_type_option *
+create_type_template_identifier(struct ast_number number);
+
+void
+free_ast_tree(struct ast_node *root);
+
+#endif
diff --git a/maint/gen/codegen.c b/maint/gen/codegen.c
new file mode 100644
index 000000000..0e3fcc4f3
--- /dev/null
+++ b/maint/gen/codegen.c
@@ -0,0 +1,958 @@
+#include <assert.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ast.h"
+#include "deflang.h"
+#include "symbols.h"
+
+struct {
+ char *name;
+ char *ctype;
+} basic_types[] = {
+ {"uchar", "unsigned char"},
+ {"ushort", "unsigned short"},
+ {"uint", "unsigned int"},
+ {"ulong", "unsigned long"},
+ {"longlong", "long long"},
+ {"ulonglong", "unsigned long long"},
+ {"longdouble", "long double"},
+ {"string", "char"},
+ {"path", "char"},
+ {"size", "kernel_size_t"},
+ {"size_t", "kernel_size_t"},
+ {"gid", "gid_t"}
+};
+
+char *signed_int_types[] = {
+ "char",
+ "short",
+ "int",
+ "long",
+ "longlong",
+ "kernel_long_t",
+ "ssize_t"
+};
+
+char *unsigned_int_types[] = {
+ "uchar",
+ "ushort",
+ "uint",
+ "ulong",
+ "ulonglong",
+ "kernel_ulong_t",
+ "size_t",
+ "size"
+};
+
+static struct decoder_list *decoders = NULL;
+
+#define VARIANT_FUNC_NAME_LEN 64
+#define SYSCALL_RET_FLAG_LEN 64
+#define SYSCALL_ARG_STR_LEN 16
+#define DECODER_PROTOTYPE_LEN 128
+
+#define ARRAY_LEN(x) (sizeof(x) / sizeof((x)[0]))
+
+/* convenience macros */
+
+#define OUTFI(...) outf_indent(indent_level, out, __VA_ARGS__)
+
+#define OUTF(...) outf(out, __VA_ARGS__)
+
+#define OUTC(c) outc(out, c)
+
+#define OUTSI(s) outs_indent(indent_level, out, s)
+
+#define OUTS(s) outs(out, s)
+
+static void
+outf_indent(int indent_level, FILE *out, const char *fmt,
+ ...) __attribute__((format(printf, 3, 4)));
+
+static void
+outf(FILE *out, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+
+static void
+outc(FILE *out, int c)
+{
+ fputc(c, out);
+}
+
+static void
+outs(FILE *out, const char *s)
+{
+ fputs(s, out);
+}
+
+static void
+indent(FILE *out, int indent)
+{
+ for (int i = 0; i < indent; ++i) {
+ outc(out, '\t');
+ }
+}
+
+static void
+outs_indent(int indent_level, FILE *out, const char *s)
+{
+ indent(out, indent_level);
+ fprintf(out, "%s", s);
+}
+
+static void
+outf(FILE *out, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ vfprintf(out, fmt, args);
+
+ va_end(args);
+}
+
+static void
+outf_indent(int indent_level, FILE *out, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ indent(out, indent_level);
+ vfprintf(out, fmt, args);
+
+ va_end(args);
+}
+
+static void
+log_warning(char *fmt, struct ast_loc node, ...)
+{
+ va_list args;
+ va_start(args, node);
+
+ fprintf(stderr, "Codegen Warning: ");
+ fprintf(stderr, "line %d, col %d: ", node.lineno, node.colno);
+
+ vfprintf(stderr, fmt, args);
+
+ fprintf(stderr, "\n");
+
+ va_end(args);
+}
+
+static bool
+is_signed_integer_typename(const char *name)
+{
+ for (size_t i = 0; i < ARRAY_LEN(signed_int_types); ++i) {
+ if (strcmp(signed_int_types[i], name) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+is_unsigned_integer_typename(const char *name)
+{
+ for (size_t i = 0; i < ARRAY_LEN(unsigned_int_types); ++i) {
+ if (strcmp(unsigned_int_types[i], name) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Stores a string referring to the i-th argument in the current syscall.
+ */
+static void
+get_syscall_arg_value(char out[static SYSCALL_ARG_STR_LEN], struct syscall *syscall, size_t i)
+{
+ if (syscall->is_ioctl) {
+ if (i >= 1 && i <= 2) {
+ const char *ioctl_args[3] = {"", "code", "arg"};
+ snprintf(out, SYSCALL_ARG_STR_LEN, "%s", ioctl_args[i]);
+ return;
+ }
+
+ log_warning("ioctl decoder referenced OOB argument %zu", syscall->loc, i);
+ }
+ snprintf(out, SYSCALL_ARG_STR_LEN, "tcp->u_arg[%zu]", i);
+}
+
+/*
+ * Stores a string referring to the return value of the current syscall.
+ */
+static void
+get_syscall_ret_value(char out[static SYSCALL_ARG_STR_LEN])
+{
+ snprintf(out, SYSCALL_ARG_STR_LEN, "tcp->u_rval");
+}
+
+/*
+ * Converts a string containing the C equivalent of a given type.
+ */
+static char *
+type_to_ctype(struct ast_type *type)
+{
+ if (type->type == TYPE_BASIC) {
+ for (size_t i = 0; i < ARRAY_LEN(basic_types); ++i) {
+ if (strcmp(type->name, basic_types[i].name) == 0) {
+ return basic_types[i].ctype;
+ }
+ }
+
+ struct ast_node *def = symbol_get(type->name);
+ if (def != NULL && def->type == AST_STRUCT) {
+ size_t len = sizeof("struct ") + strlen(type->name);
+ char *ret = xmalloc(len);
+ snprintf(ret, len, "struct %s", type->name);
+ return ret;
+ }
+
+ return type->name;
+ }
+
+ if (type->type == TYPE_PTR) {
+ char *underlying = type_to_ctype(type->ptr.type);
+
+ size_t len = strlen(underlying) + sizeof(" *");
+ char *ret = xmalloc(len);
+ snprintf(ret, len, "%s *", underlying);
+ return ret;
+ }
+
+ if (type->type == TYPE_XORFLAGS) {
+ return type_to_ctype(type->xorflags.underlying);
+ }
+
+ if (type->type == TYPE_ORFLAGS) {
+ return type_to_ctype(type->orflags.underlying);
+ }
+
+ return type->name;
+}
+
+/*
+ * Get flags to return from a SYS_FUNC.
+ */
+static void
+get_sys_func_return_flags(char out[static SYSCALL_RET_FLAG_LEN], struct ast_type *type,
+ bool is_ioctl)
+{
+ struct {
+ char *type;
+ char *flag;
+ } flags[] = {
+ {"fd", "RVAL_FD"},
+ {"tid", "RVAL_TID"},
+ {"sid", "RVAL_SID"},
+ {"tgid", "RVAL_TGID"},
+ {"pgid", "RVAL_PGID"}
+ };
+
+ char *base = "RVAL_DECODED";
+ if (is_ioctl) {
+ base = "RVAL_IOCTL_DECODED";
+ }
+
+ char *following = NULL;
+ for (size_t i = 0; i < ARRAY_LEN(flags); ++i) {
+ if (strcmp(flags[i].type, type->name) == 0) {
+ following = flags[i].flag;
+ break;
+ }
+ }
+
+ if (following) {
+ snprintf(out, SYSCALL_RET_FLAG_LEN, "%s | %s", base, following);
+ } else {
+ snprintf(out, SYSCALL_RET_FLAG_LEN, "%s", base);
+ }
+}
+
+/*
+ * Resolves a type option to a concrete value.
+ *
+ * For example, const[PATH_MAX] is resolved to PATH_MAX
+ * and const[ref[argname]] is resolved to tcp->u_arg[2]
+ * (where argname is the name of the 3rd syscall argument).
+ *
+ * The specified type option MUST NOT be a range or a template id.
+ */
+static char *
+resolve_type_option_to_value(struct syscall *syscall, struct ast_type_option *option)
+{
+ assert(option->child_type != AST_TYPE_CHILD_RANGE &&
+ option->child_type != AST_TYPE_CHILD_TEMPLATE_ID);
+
+ if (option->child_type == AST_TYPE_CHILD_NUMBER) {
+ // return the number exactly as specified in the source file
+ return option->number.raw;
+ } else if (option->child_type == AST_TYPE_CHILD_TYPE) {
+ if (option->type->type == TYPE_REF) {
+ // identify which argument is being referred to
+
+ // syscall return value
+ if (option->type->ref.return_value) {
+ char *ret = xmalloc(SYSCALL_ARG_STR_LEN);
+ get_syscall_ret_value(ret);
+ return ret;
+ }
+
+ // find syscall argument by name
+ bool found = false;
+ size_t index = 0;
+
+ for (; index < syscall->arg_count; ++index) {
+ if (strcmp(option->type->ref.argname, syscall->args[index].name) == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ char *ret = xmalloc(SYSCALL_ARG_STR_LEN);
+ get_syscall_arg_value(ret, syscall, index);
+ return ret;
+ }
+
+ log_warning("Failed to resolve 'ref' type with value \"%s\" to argument",
+ syscall->loc, option->type->ref.argname);
+ return "#error FAILED TO RESOLVE REF TYPE TO VALUE";
+ } else {
+ // assume the given value is a constant or from a #define
+ return option->type->name;
+ }
+ }
+
+ assert(false);
+}
+
+/*
+ * Stores the value of a given variable using set_tcb_priv_data.
+ */
+static void
+store_single_value(FILE *out, struct ast_type *type, char *arg, int indent_level)
+{
+ OUTFI("{\n");
+ indent_level++;
+
+ OUTFI("%s %s;\n", type_to_ctype(type->ptr.type), "tmp_var");
+ OUTFI("if (!umove_or_printaddr(tcp, %s, &tmp_var)) {\n", arg);
+ indent_level++;
+
+ OUTFI("void *tmp_buffer = xmalloc(sizeof(%s));\n", type_to_ctype(type->ptr.type));
+ OUTFI("memcpy(tmp_buffer, tmp_var, sizeof(%s));\n", type_to_ctype(type->ptr.type));
+ OUTFI("set_tcb_priv_data(tcp, tmp_buffer, free);\n");
+
+ indent_level--;
+ OUTFI("}\n");
+
+ indent_level--;
+ OUTFI("}\n");
+}
+
+static void
+generate_printer(FILE *out, struct syscall *syscall, const char *argname,
+ const char *arg, bool entering,
+ struct ast_type *type, int indent_level);
+
+static void
+generate_printer_ptr(FILE *out, struct syscall *syscall, const char *argname,
+ const char *arg, bool entering,
+ struct ast_type *type, int indent_level)
+{
+ struct ast_type *underlying = type->ptr.type;
+
+ // copy from target memory and use decoder for resulting value
+ char var_name[32];
+ snprintf(var_name, 32, "tmpvar_%s", argname);
+
+ if ((IS_IN_PTR(type) && entering) || (IS_OUT_PTR(type) && !entering)) {
+ OUTFI("%s %s;\n", type_to_ctype(type->ptr.type), var_name);
+ OUTFI("if (!umove_or_printaddr(tcp, %s, &%s)) {\n",
+ arg, var_name);
+ indent_level++;
+
+ OUTFI("tprint_indirect_begin();\n");
+ generate_printer(out, syscall, argname, var_name, entering,
+ type->ptr.type, indent_level);
+ OUTFI("tprint_indirect_end();\n");
+
+ indent_level--;
+ OUTSI("}\n");
+ }
+}
+
+static void
+generate_templated_printer(FILE *out, struct syscall *syscall,
+ const char *arg, struct ast_type *arg_type,
+ struct decoder templated_decoder)
+{
+ struct {
+ char *value;
+ intmax_t template_id;
+ } substitutions[256];
+ int subs_pos = 0;
+
+ // Do a DFS over the template type to find substitution markers
+ struct dfs_stack_entry {
+ struct ast_type *template;
+ struct ast_type *actual;
+ };
+
+ struct dfs_stack_entry dfs_stack[128] = {0};
+ int stack_ptr = 0;
+
+ dfs_stack[stack_ptr] = (struct dfs_stack_entry) {
+ .template = templated_decoder.matching_type,
+ .actual = arg_type
+ };
+ stack_ptr++;
+
+ while (stack_ptr != 0) {
+ stack_ptr--;
+ struct dfs_stack_entry entry = dfs_stack[stack_ptr];
+
+ if (entry.actual == NULL || entry.template == NULL) {
+ continue;
+ }
+
+ if (strcmp(entry.actual->name, entry.template->name) != 0) {
+ continue;
+ }
+
+ struct ast_type_option_list *template_option = entry.template->options;
+ struct ast_type_option_list *actual_option = entry.actual->options;
+ for (; actual_option != NULL && template_option != NULL;
+ actual_option = actual_option->next, template_option = template_option->next) {
+ if (template_option->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID) {
+ substitutions[subs_pos].value = resolve_type_option_to_value(syscall,
+ actual_option->option);
+ substitutions[subs_pos].template_id = template_option->option->template.id;
+ subs_pos++;
+ continue;
+ }
+
+ if (actual_option->option->child_type != template_option->option->child_type) {
+ break;
+ }
+
+ if (template_option->option->child_type == AST_TYPE_CHILD_TYPE) {
+ dfs_stack[stack_ptr] = (struct dfs_stack_entry) {
+ .template = template_option->option->type,
+ .actual = actual_option->option->type
+ };
+ stack_ptr++;
+ }
+ }
+ }
+
+ // Output the template string and replace substitution markers with real values
+ const char *template = templated_decoder.fmt_string;
+ size_t template_len = strlen(template);
+
+ intmax_t cur = 0;
+ bool in_template_number = false;
+ for (size_t i = 0; i < template_len; ++i) {
+ if (template[i] == '$' && (template[i + 1] == '$')) {
+ OUTF("(%s)", arg);
+ i++;
+ continue;
+ }
+
+ if (template[i] == '$' && (isdigit(template[i + 1]))) {
+ cur = 0;
+ in_template_number = true;
+ continue;
+ }
+
+ if (!in_template_number) {
+ OUTC(template[i]);
+ continue;
+ }
+
+ if (isdigit(template[i])) {
+ cur = cur * 10 + (template[i] - '0');
+ }
+
+ if (!isdigit(template[i]) || i == template_len - 1) {
+ in_template_number = false;
+
+ int found = -1;
+ // find matching substitution
+ for (int j = 0; j < subs_pos; ++j) {
+ if (substitutions[j].template_id == cur) {
+ found = j;
+ break;
+ }
+ }
+
+ if (found == -1) {
+ log_warning("Template variable $%" PRIdMAX " could not be resolved!",
+ syscall->loc, cur);
+ continue;
+ }
+
+ OUTF("(%s)", substitutions[found].value);
+
+ if (i != template_len - 1) {
+ OUTC(template[i]);
+ }
+ }
+ }
+}
+
+/*
+ * Outputs a call to a function/macro to print out arg with the given type.
+ */
+static void
+generate_printer(FILE *out, struct syscall *syscall,
+ const char *argname, const char *arg, bool entering,
+ struct ast_type *type, int indent_level)
+{
+ for (struct decoder_list *cur = decoders; cur != NULL; cur = cur->next) {
+ if (ast_type_matching(cur->decoder.matching_type, type)) {
+ OUTFI("/* using decoder from %s:%d:%d */\n", cur->decoder.loc.file,
+ cur->decoder.loc.lineno, cur->decoder.loc.colno);
+ generate_templated_printer(out, syscall, arg, type, cur->decoder);
+ OUTC('\n');
+ return;
+ }
+ }
+
+ if (type->type == TYPE_BASIC) {
+ if (is_signed_integer_typename(type->name)) {
+ OUTFI("PRINT_VAL_D((%s) %s);\n", type_to_ctype(type), arg);
+ return;
+ } else if (is_unsigned_integer_typename(type->name)) {
+ OUTFI("PRINT_VAL_U((%s) %s);\n", type_to_ctype(type), arg);
+ return;
+ }
+
+ log_warning("No known printer for basic type %s", syscall->loc, type->name);
+ outf_indent(indent_level, out, "#error UNHANDLED BASIC TYPE: %s\n", type->name);
+ } else if (type->type == TYPE_PTR) {
+ generate_printer_ptr(out, syscall, argname, arg, entering, type, indent_level);
+ } else if (type->type == TYPE_ORFLAGS) {
+ OUTFI("printflags(%s, %s, \"%s\");\n", type->orflags.flag_type->type->name, arg,
+ type->orflags.dflt);
+ } else if (type->type == TYPE_XORFLAGS) {
+ OUTFI("printxval(%s, %s, \"%s\");\n", type->xorflags.flag_type->type->name, arg,
+ type->xorflags.dflt);
+ } else if (strcmp(type->name, "stringnoz") == 0 || strcmp(type->name, "string") == 0) {
+ log_warning("Type '%s' should be wrapped in a ptr type to indicate direction",
+ syscall->loc, type->name);
+ } else if (type->type == TYPE_CONST) {
+ if (!type->constt.real_type) {
+ log_warning("Const type (%s) has no matching parent syscall argument.", syscall->loc,
+ argname);
+ return;
+ }
+ OUTFI("/* inherited parent type (%s) */\n", type_to_ctype(type->constt.real_type));
+ generate_printer(out, syscall, argname, arg, entering,
+ type->constt.real_type, indent_level);
+ } else {
+ log_warning("Type '%s' is currently unhandled", syscall->loc, type->name);
+ outf_indent(indent_level, out, "#error UNHANDLED TYPE: %s\n", type->name);
+ }
+}
+
+static void
+generate_return_flags(FILE *out, struct syscall *syscall, int indent_level)
+{
+ struct ast_type ret = syscall->ret;
+ if (ret.type == TYPE_ORFLAGS) {
+ OUTFI("tcp->auxstr = sprintflags(\"%s\", %s, (kernel_ulong_t) tcp->u_rval);\n",
+ ret.orflags.dflt, ret.orflags.flag_type->type->name);
+ OUTFI("return RVAL_STR;\n");
+ } else if (ret.type == TYPE_XORFLAGS) {
+ OUTFI("tcp->auxstr = xlookup(%s, (kernel_ulong_t) tcp->u_rval);\n",
+ ret.xorflags.flag_type->type->name);
+ OUTFI("return RVAL_STR;\n");
+ } else {
+ char flags[SYSCALL_RET_FLAG_LEN];
+ get_sys_func_return_flags(flags, &ret, syscall->is_ioctl);
+ OUTFI("return %s;\n", flags);
+ }
+}
+
+/*
+ * Transforms a variant syscall name (like fcntl$F_DUPFD) to a valid C function
+ * name (like var_fcntl_F_DUPFD).
+ *
+ * The is_leaf parameter should be set if corresponding syscall is a leaf node,
+ * i.e. has no sub syscalls.
+ */
+static void
+get_variant_function_name(char out[static VARIANT_FUNC_NAME_LEN], char *variant_name, bool is_leaf)
+{
+ snprintf(out, VARIANT_FUNC_NAME_LEN, "var_%s%s", is_leaf ? "leaf_" : "", variant_name);
+ for (int i = 0; i < VARIANT_FUNC_NAME_LEN; ++i) {
+ if (out[i] == '\0') {
+ break;
+ }
+ if (out[i] == '$') {
+ out[i] = '_';
+ }
+ }
+}
+
+/*
+ * Output the start of any preprocessor conditions.
+ *
+ * For example:
+ * #ifdef linux
+ */
+void
+out_statement_condition_start(FILE *out, struct statement_condition *condition)
+{
+ if (condition == NULL) {
+ return;
+ }
+ for (size_t i = 0; i < condition->count; ++i) {
+ OUTF("%s\n", condition->values[i]);
+ }
+}
+
+/*
+ * Output the end of the specified preprocessor conditions.
+ *
+ * For example:
+ * #endif
+ */
+void
+out_statement_condition_end(FILE *out, struct statement_condition *condition)
+{
+ if (condition == NULL) {
+ return;
+ }
+ for (size_t i = 0; i < condition->count; ++i) {
+ OUTS("#endif\n\n");
+ }
+}
+
+static void
+get_decoder_prototype(char out[static DECODER_PROTOTYPE_LEN], bool internal,
+ struct syscall *syscall, char *func_name)
+{
+ snprintf(out, DECODER_PROTOTYPE_LEN, "%sint\n"
+ "%s(struct tcb *tcp%s)\n",
+ internal ? "static " : "",
+ func_name,
+ syscall->is_ioctl ? ", unsigned int code, kernel_ulong_t arg" : "");
+}
+
+/*
+ * Prints out a decoder for the given system call.
+ */
+static void
+generate_decoder(FILE *out, struct syscall *syscall, bool is_variant, bool ioctl_fallback)
+{
+ int indent_level = 0;
+
+ out_statement_condition_start(out, syscall->conditions);
+
+ int arg_offset = 0;
+ int arg_index = 0;
+
+ if (syscall->is_ioctl) {
+ // no need to decode code, or arg for ioctl variant syscalls
+ arg_offset = 2;
+ arg_index = 2;
+ }
+
+ // determine which strategy to use depending on how many OUT ptrs there are
+ size_t out_ptrs = 0;
+ for (size_t i = arg_offset; i < syscall->arg_count; i++) {
+ if (IS_OUT_PTR(syscall->args[i].type)) {
+ out_ptrs++;
+ }
+ }
+
+ // output function declaration
+ if (is_variant) {
+ char func_name[VARIANT_FUNC_NAME_LEN];
+ get_variant_function_name(func_name, syscall->name, true);
+
+ char decoder_prototype[DECODER_PROTOTYPE_LEN];
+ get_decoder_prototype(decoder_prototype, true, syscall, func_name);
+
+ OUTSI(decoder_prototype);
+ } else {
+ OUTFI("SYS_FUNC(%s)\n", syscall->name);
+ }
+ OUTSI("{\n");
+ indent_level++;
+
+ if (syscall->is_ioctl && ioctl_fallback) {
+ OUTSI("return RVAL_DECODED;\n");
+ indent_level--;
+ OUTSI("}\n");
+ return;
+ }
+
+ char arg_val[SYSCALL_ARG_STR_LEN];
+
+ if (out_ptrs == 0) {
+ if (syscall->is_ioctl) {
+ OUTFI("tprint_arg_next();\n");
+ }
+
+ // 0 out ptrs: print all args in sysenter
+ for (size_t i = arg_offset; i < syscall->arg_count; i++) {
+ struct syscall_argument arg = syscall->args[i];
+ OUTFI("/* arg: %s (%s) */\n", arg.name, type_to_ctype(arg.type));
+ get_syscall_arg_value(arg_val, syscall, arg_index++);
+
+ generate_printer(out, syscall, arg.name, arg_val, true, arg.type,
+ indent_level);
+
+ if (i < syscall->arg_count - 1) {
+ OUTSI("tprint_arg_next();\n");
+ }
+ OUTC('\n');
+ }
+ } else if (out_ptrs == 1) {
+ // == 1 out ptrs: print args until the out ptr in sysenter, rest in sysexit
+ size_t cur = arg_offset;
+
+ OUTSI("if (entering(tcp)) {\n");
+ indent_level++;
+
+ if (syscall->is_ioctl) {
+ OUTFI("tprint_arg_next();\n");
+ }
+
+ for (; cur < syscall->arg_count; ++cur) {
+ struct syscall_argument arg = syscall->args[cur];
+ if (IS_OUT_PTR(arg.type)) {
+ break;
+ }
+
+ OUTFI("/* arg: %s (%s) */\n", arg.name, type_to_ctype(arg.type));
+ get_syscall_arg_value(arg_val, syscall, arg_index++);
+
+ generate_printer(out, syscall, arg.name, arg_val, true, arg.type,
+ indent_level);
+
+ if (cur < syscall->arg_count - 1) {
+ OUTSI("tprint_arg_next();\n\n");
+ }
+ }
+
+ if (cur < syscall->arg_count && IS_INOUT_PTR(syscall->args[cur].type)) {
+ store_single_value(out, syscall->args[cur].type, arg_val, indent_level);
+ }
+
+ OUTSI("return 0;\n");
+ indent_level--;
+ OUTSI("}\n");
+
+ if (cur < syscall->arg_count && IS_INOUT_PTR(syscall->args[cur].type)) {
+ // TODO: compare the current value with the previous value
+ // and print only if changed
+ }
+
+ for (; cur < syscall->arg_count; ++cur) {
+ struct syscall_argument arg = syscall->args[cur];
+ OUTFI("/* arg: %s (%s) */\n", arg.name, type_to_ctype(arg.type));
+ get_syscall_arg_value(arg_val, syscall, arg_index++);
+
+ generate_printer(out, syscall, arg.name, arg_val, false, arg.type,
+ indent_level);
+
+ if (cur < syscall->arg_count - 1) {
+ OUTSI("tprint_arg_next();\n");
+ }
+ OUTC('\n');
+ }
+ } else {
+ // TODO: > 1 out ptrs; store necessary ptr values using set_tcb_priv_data
+ OUTSI("#error TODO\n");
+ }
+
+ generate_return_flags(out, syscall, indent_level);
+
+ indent_level--;
+ OUTSI("}\n");
+
+ out_statement_condition_end(out, syscall->conditions);
+}
+
+/*
+ * Write out the specified #define statements.
+ */
+void
+output_defines(FILE *out, struct preprocessor_statement_list *defines)
+{
+ struct preprocessor_statement_list *cur = defines;
+ while (cur != NULL) {
+ out_statement_condition_start(out, cur->stmt.conditions);
+ OUTF("#%s\n", cur->stmt.value);
+ out_statement_condition_end(out, cur->stmt.conditions);
+ cur = cur->next;
+ }
+}
+
+/*
+ * Outputs a function which delegates to the child syscalls based on the
+ * values of the child's const-typed arguments.
+ *
+ * The is_variant flag indicates whether the group's base syscall is a child of
+ * a variant syscall itself.
+ */
+void
+output_variant_syscall_group(FILE *out, struct syscall_group *group, bool is_variant)
+{
+ int indent_level = 0;
+ if (is_variant) {
+ // variant system call
+ char func_name[VARIANT_FUNC_NAME_LEN];
+ get_variant_function_name(func_name, group->base->name, false);
+
+ char decoder_prototype[DECODER_PROTOTYPE_LEN];
+ get_decoder_prototype(decoder_prototype, false, group->base, func_name);
+
+ OUTSI(decoder_prototype);
+ } else {
+ // base system call
+ OUTFI("SYS_FUNC(%s) {\n", group->base->name);
+ }
+ OUTSI("{\n");
+ indent_level++;
+
+ OUTSI("");
+ for (size_t child = 0; child < group->child_count; child++) {
+ struct syscall_group *cur_child_grp = &group->children[child];
+ struct syscall *cur_child = cur_child_grp->base;
+
+ out_statement_condition_start(out, cur_child->conditions);
+
+ OUTS("if (");
+
+ bool first = true;
+ for (size_t arg_idx = 0; arg_idx < cur_child->arg_count; ++arg_idx) {
+ struct syscall_argument arg = cur_child->args[arg_idx];
+
+ if (arg.type->type != TYPE_CONST) {
+ continue;
+ }
+
+ if (first) {
+ first = false;
+ } else {
+ OUTS(" && ");
+ }
+
+ char arg_str[SYSCALL_ARG_STR_LEN];
+ get_syscall_arg_value(arg_str, cur_child, arg_idx);
+
+ if (arg.type->constt.value->child_type == AST_TYPE_CHILD_RANGE) {
+ OUTF("((%s) <= (%s) && (%s) <= (%s))", arg_str,
+ resolve_type_option_to_value(cur_child, arg.type->constt.value->range.min),
+ arg_str,
+ resolve_type_option_to_value(cur_child, arg.type->constt.value->range.max)
+ );
+ } else {
+ OUTF("(%s) == (%s)",
+ arg_str,
+ resolve_type_option_to_value(cur_child, arg.type->constt.value));
+ }
+ }
+ OUTS(") {\n");
+
+ indent_level++;
+
+ char func_name[VARIANT_FUNC_NAME_LEN];
+ get_variant_function_name(func_name, cur_child->name, cur_child_grp->child_count == 0);
+ OUTFI("return %s(tcp%s);\n", func_name, cur_child->is_ioctl ? ", code, arg" : "");
+
+ indent_level--;
+ OUTSI("} else ");
+ }
+
+ OUTS("{\n");
+ indent_level++;
+
+ char func_name[VARIANT_FUNC_NAME_LEN];
+ get_variant_function_name(func_name, group->base->name, true);
+ OUTFI("return %s(tcp%s);\n", func_name, group->base->is_ioctl ? ", code, arg" : "");
+
+ indent_level--;
+ OUTSI("}\n");
+
+ indent_level--;
+ OUTSI("}\n");
+}
+
+/*
+ * Outputs a syscall group and syscall variants.
+ */
+void
+output_syscall_groups(FILE *out, struct syscall_group *groups,
+ size_t group_count, struct syscall_group *parent)
+{
+ for (size_t i = 0; i < group_count; ++i) {
+ struct syscall_group *cur = &groups[i];
+
+ if (parent) {
+ // store the real type of const parameters based on their parent
+ for (size_t j = 0; j < cur->base->arg_count && j < parent->base->arg_count; ++j) {
+ struct syscall_argument *cur_arg = &cur->base->args[j];
+ struct syscall_argument *parent_arg = &parent->base->args[j];
+ if (cur_arg->type->type == TYPE_CONST) {
+ if (parent_arg->type->type == TYPE_CONST) {
+ cur_arg->type->constt.real_type = parent_arg->type->constt.real_type;
+ } else {
+ cur_arg->type->constt.real_type = parent_arg->type;
+ }
+ }
+ }
+ }
+
+ if (groups[i].child_count == 0) {
+ generate_decoder(out, groups[i].base, parent != NULL, false);
+ continue;
+ }
+
+ output_syscall_groups(out, groups[i].children, groups[i].child_count, &groups[i]);
+
+ if (strcmp(groups[i].base->name, "ioctl") != 0) {
+ generate_decoder(out, groups[i].base, true, true);
+
+ output_variant_syscall_group(out, &groups[i], parent != NULL);
+ }
+ }
+}
+
+bool
+generate_code(const char *in_filename, const char *out_filename, struct processed_ast *ast)
+{
+ FILE *out = fopen(out_filename, "w");
+
+ if (out == NULL) {
+ return false;
+ }
+
+ outf(out, "/* AUTOMATICALLY GENERATED FROM %s - DO NOT EDIT */\n\n", in_filename);
+ outf(out, "%s",
+ "#include <stddef.h>\n"
+ "#include \"generated.h\"\n\n"
+ "typedef kernel_ulong_t kernel_size_t;\n\n"
+ );
+
+ decoders = ast->decoders;
+
+ output_defines(out, ast->preprocessor_stmts);
+ output_syscall_groups(out, ast->syscall_groups, ast->syscall_group_count, NULL);
+
+ fclose(out);
+
+ return true;
+}
diff --git a/maint/gen/deflang.h b/maint/gen/deflang.h
new file mode 100644
index 000000000..260b58260
--- /dev/null
+++ b/maint/gen/deflang.h
@@ -0,0 +1,29 @@
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "ast.h"
+#include "preprocess.h"
+
+extern int yylineno;
+extern FILE *yyin;
+
+extern int last_line_location;
+extern char *cur_filename;
+
+void *
+xmalloc(size_t n);
+
+void *
+xcalloc(size_t n);
+
+extern int
+yylex_destroy(void);
+
+bool
+lexer_init_newfile(char *filename);
+
+void
+yyerror(const char *s, ...) __attribute__ ((format (printf, 1, 2)));
+
+bool
+generate_code(const char *in_filename, const char *out_filename, struct processed_ast *ast);
\ No newline at end of file
diff --git a/maint/gen/defs/common.syzlang b/maint/gen/defs/common.syzlang
new file mode 100644
index 000000000..24322e9c1
--- /dev/null
+++ b/maint/gen/defs/common.syzlang
@@ -0,0 +1,49 @@
+:fd %{ printfd(tcp, $$); %}
+:uid %{ printuid($$); %}
+:gid %{ printuid($$); %}
+
+:ptr[$9, stringnoz[$1]] %{
+ if (entering(tcp)) {
+ printstrn(tcp, $$, $1);
+ } else if (syserror(tcp)) {
+ printaddr($$);
+ } else {
+ printstrn(tcp, $$, $1);
+ }
+%}
+
+:ptr[$9, string] %{
+ if (entering(tcp)) {
+ printstr(tcp, $$);
+ } else if (syserror(tcp)) {
+ printaddr($$);
+ } else {
+ printstr(tcp, $$);
+ }
+%}
+
+:path %{
+ if (entering(tcp)) {
+ printpath(tcp, $$);
+ } else if (syserror(tcp)) {
+ printaddr($$);
+ } else {
+ printpath(tcp, $$);
+ }
+%}
+
+/* ptr[in, array[uint32_t, $length]] */
+:ptr[in, array[uint32_t, $1]] %{
+ {
+ uint32_t int_buffer;
+ print_array(tcp, $$, $1, &int_buffer, sizeof(int_buffer), tfetch_mem, print_uint32_array_member, 0);
+ }
+%}
+
+:ptr[out, ulong] %{
+ if (exiting(tcp)) {
+ printnum_ulong(tcp, $$);
+ }
+%}
+
+ioctl(fd fd, code kernel_ulong_t, arg kernel_ulong_t)
diff --git a/maint/gen/grammar.txt b/maint/gen/grammar.txt
new file mode 100644
index 000000000..b4a6efdbd
--- /dev/null
+++ b/maint/gen/grammar.txt
@@ -0,0 +1,41 @@
+syscall ::= <identifier>(<arglist>) <type> (type)
+arglist ::= <arg> | <arg>, <arglist>
+arg ::= <identifier> <type>
+type ::= <identifier> | <identifier>[<type_options>]
+type_options ::= <type_option> | <type_option>, <type_options>
+type_option ::= <type> | <number>
+identifier ::= /[A-Za-z_][A-Za-z0-9_\$]*/
+number ::= /0x[0-9A-Fa-f]+/ | /[0-9]+/
+
+define ::= #define name definition
+ifdef ::= #ifdef condition statement #endif
+ifndef ::= #ifndef condition statement #endif
+include ::= include file
+import ::= #import "filename"
+compound ::= <statement> <compound> | <statement>
+
+statement ::= <compound> | <define> | <ifdef> | <ifndef> | <syscall> | <struct> | <flags>
+
+struct ::= <identifier> { <struct_elems> } <struct_attr>
+struct_elems ::= <struct_elem> <struct_elems> '\n' | <struct_elem>
+struct_elem ::= <type> <identifier>
+struct_attr ::= [<type>] | <empty>
+
+flags ::= <identifier> = <flag_elements>
+flag_elements ::= <identifier>, <flag_elements> | <identifier>
+
+Default Types:
+ * void (default if type is excluded)
+ * int
+ * long
+ * size_t
+ * uint8_t
+ * uint16_t
+ * uint64_t
+ * kernel_ulong_t
+ * kernel_long_t
+ * string: zero terminated buffer
+ * stringnoz[n]: buffer of length n
+ * const[typ, x]: constant of value x with type typ
+ * ptr[typ, direction]: pointer to object of type typ, with direction in, out, inout
+ * array[typ, n]: array of n objects with type typ
diff --git a/maint/gen/lex.l b/maint/gen/lex.l
new file mode 100644
index 000000000..3e25a6145
--- /dev/null
+++ b/maint/gen/lex.l
@@ -0,0 +1,262 @@
+%option noyywrap yylineno nodefault warn
+/* %option debug */
+
+%{
+#include <stdio.h>
+#include <stdint.h>
+#include "deflang.h"
+#include "ast.h"
+#include "parse.tab.h"
+
+YYSTYPE yylval;
+YYLTYPE yylloc;
+
+static void
+update_yylloc();
+
+#define YY_USER_ACTION update_yylloc();
+
+#define MAX_IMPORT_LEVEL 10
+
+struct saved_import_state {
+ YYLTYPE location;
+ char *filename;
+ int cur_location;
+ int last_line_location;
+};
+
+// a stack to store state before an import
+static struct saved_import_state import_states[MAX_IMPORT_LEVEL];
+// the current index into import_states
+static int import_level = 0;
+
+char *cur_filename;
+
+static int cur_location;
+
+int last_line_location;
+%}
+
+%x COMMENT_MULTI
+%x COMMENT_LINE
+%x IMPORT
+
+%%
+"," return T_COMMA;
+"(" return T_LPAREN;
+")" return T_RPAREN;
+"[" return T_LBRACKET;
+"]" return T_RBRACKET;
+"{" return T_LCURLY;
+"}" return T_RCURLY;
+"=" return T_EQUALS;
+":" return T_COLON;
+
+(-)?"0x"[0-9A-Fa-f]+ {
+ yylval.number.raw = strdup(yytext);
+ yylval.number.val = strtol(yytext, NULL, 16);
+ return T_NUMBER;
+}
+
+(-)?[0-9]+ {
+ yylval.number.raw = strdup(yytext);
+ yylval.number.val = strtol(yytext, NULL, 10);
+ return T_NUMBER;
+}
+
+(-)?"0b"[01]+ {
+ int sign = (yytext[0] == '-') ? -1 : +1;
+ int offset = ((sign == -1) ? sizeof("-0b") : sizeof("0b")) - 1;
+
+ // binary literals are supported in C by GNU extension
+ yylval.number.raw = strdup(yytext);
+ yylval.number.val = sign * strtol(yytext + offset, NULL, 2);
+ return T_NUMBER;
+}
+
+\'.\' {
+ yylval.number.raw = strdup(yytext);
+ yylval.number.val = yytext[1];
+ return T_NUMBER;
+}
+
+\$[0-9]+ {
+ yylval.number.val = strtol(yytext + 1, NULL, 10);
+ yylval.number.raw = NULL;
+ return T_TEMPLATE_IDENTIFIER;
+}
+
+[A-Za-z_@\?][A-Za-z0-9_\?\$]* {
+ if (yytext[0] == '@' && strcmp(yytext, "@ret") != 0) {
+ yyerror("@ can only be used in @ret");
+ yyterminate();
+ }
+ yylval.str = strdup(yytext);
+ return T_IDENTIFIER;
+}
+
+(?x: "%{" ( [^%] | %+ [^}] )* %* "%}" ) {
+ yylval.str = strdup(yytext + 2);
+ yylval.str[strlen(yylval.str) - 2] ='\0';
+
+ return T_DECODER_SOURCE;
+}
+
+"define".+ {
+ yylval.str = strdup(yytext);
+ return T_DEFINE;
+}
+"#ifdef".+ {
+ yylval.str = strdup(yytext);
+ return T_IFDEF;
+}
+"#ifndef".+ {
+ yylval.str = strdup(yytext);
+ return T_IFNDEF;
+}
+"include".+ {
+ yylval.str = strdup(yytext);
+ return T_INCLUDE;
+}
+"#endif".* {
+ return T_ENDIF;
+}
+
+"#import \"" {
+ BEGIN(IMPORT);
+}
+<IMPORT>[^\n\"]+ {
+ if (import_level >= MAX_IMPORT_LEVEL) {
+ fprintf(stderr, "imports are nested more than %d levels\n", MAX_IMPORT_LEVEL);
+ yyterminate();
+ }
+
+ // eat characters until newline
+ int c = input();
+ cur_location++;
+ while(c && c != '\n'){
+ cur_location++;
+ c = input();
+ }
+ // update current location
+ yylloc.last_line++;
+ yylloc.last_column = 1;
+ last_line_location = cur_location;
+
+ // save current state
+ import_states[import_level++] = (struct saved_import_state) {
+ .filename = cur_filename,
+ .location = yylloc,
+ .cur_location = cur_location,
+ .last_line_location = last_line_location
+ };
+
+ cur_filename = strdup(yytext);
+
+ yylloc = (struct YYLTYPE) {1, 1, 1, 1};
+ cur_location = 0;
+ last_line_location = 0;
+
+ yyin = fopen(yytext, "r");
+
+ if (yyin == NULL) {
+ fprintf(stderr, "failed to import file '%s' on line %d\n", yytext, yylineno);
+ yyterminate();
+ }
+
+ yypush_buffer_state(yy_create_buffer(yyin, YY_BUF_SIZE));
+ BEGIN(INITIAL);
+}
+
+<<EOF>> {
+ // emit a newline at the end of a file before EOF
+ // to ensure the last statement in the file is terminated
+ static int emitted_newline;
+
+ if (!emitted_newline) {
+ emitted_newline = 1;
+ return T_NEWLINE;
+ }
+
+ emitted_newline = 0;
+
+ if (import_level > 0) {
+ free(cur_filename);
+
+ struct saved_import_state saved = import_states[--import_level];
+
+ cur_filename = saved.filename;
+ cur_location = saved.cur_location;
+ last_line_location = saved.last_line_location;
+ yylloc = saved.location;
+ }
+ yypop_buffer_state();
+ if (!YY_CURRENT_BUFFER) {
+ yyterminate();
+ }
+}
+
+"/*" {
+ BEGIN(COMMENT_MULTI);
+}
+<COMMENT_MULTI>"*/" {
+ BEGIN(INITIAL);
+}
+
+"//"|"#" {
+ BEGIN(COMMENT_LINE);
+}
+<COMMENT_LINE>\n {
+ BEGIN(INITIAL);
+}
+
+<COMMENT_LINE,COMMENT_MULTI>.|\n {}
+
+[ \t\r] {}
+
+\n {
+ return T_NEWLINE;
+}
+
+. {
+ yyerror("unexpected character: %s", yytext);
+ yyterminate();
+}
+
+%%
+
+static void
+update_yylloc()
+{
+ yylloc.first_line = yylloc.last_line;
+ yylloc.first_column = yylloc.last_column;
+
+ int i = 0;
+ while (yytext[i] != '\0') {
+ cur_location++;
+ if (yytext[i] == '\n') {
+ yylloc.last_line++;
+ yylloc.last_column = 1;
+ last_line_location = cur_location;
+ } else {
+ yylloc.last_column++;
+ }
+ i++;
+ }
+}
+
+bool
+lexer_init_newfile(char *filename)
+{
+ // clean up internal state managed by flex
+ yylex_destroy();
+
+ yyin = fopen(filename, "r");
+ if (yyin == NULL) {
+ return false;
+ }
+
+ cur_filename = filename;
+
+ return true;
+}
diff --git a/maint/gen/parse.y b/maint/gen/parse.y
new file mode 100644
index 000000000..23b773153
--- /dev/null
+++ b/maint/gen/parse.y
@@ -0,0 +1,383 @@
+%define api.token.prefix {T_}
+%define parse.lac full
+%define parse.error detailed
+
+%locations
+
+%code requires {
+#include "deflang.h"
+#include "ast.h"
+}
+
+%{
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "symbols.h"
+
+static struct ast_node *root;
+
+static void error_prev_decl(char *identifier, struct ast_node *prev);
+%}
+
+%union {
+ char* str;
+ struct ast_number number;
+
+ struct ast_node *node;
+ struct ast_type *type;
+ struct ast_type_option *type_option;
+ struct ast_type_option_list *type_option_list;
+ struct ast_syscall_arg *syscall_arg;
+ struct ast_struct_element *struct_element;
+ struct ast_flag_values *flag_values;
+}
+
+%token NEWLINE
+%token LPAREN "("
+%token RPAREN ")"
+%token LBRACKET "["
+%token RBRACKET "]"
+%token LCURLY "{"
+%token RCURLY "}"
+%token COMMA ","
+%token EQUALS "="
+%token COLON ":"
+%token <str> DEFINE "#define"
+%token <str> IFDEF "#ifdef"
+%token ENDIF "#endif"
+%token <str> IFNDEF "#ifndef"
+%token <str> INCLUDE "include"
+%token <str> IDENTIFIER
+%token <number> TEMPLATE_IDENTIFIER
+%token <number> NUMBER
+%token <str> DECODER_SOURCE
+
+%type <node> compound compound_stmt statement decoder define ifdef ifndef include syscall struct flags
+%type <type> type syscall_return_type
+%type <type_option_list> type_options
+%type <type_option> type_option type_option_range
+%type <syscall_arg> syscall_arglist syscall_arg
+%type <struct_element> struct_element struct_elements
+%type <flag_values> flag_elements
+
+%destructor { free($$); } <str>
+%destructor { free($$.raw); } <number>
+%destructor { free_ast_tree($$); } <node>
+
+%start start
+
+%%
+
+start: opt_linebreak compound_stmt
+ {
+ root = $2;
+ }
+
+opt_linebreak: linebreaks | %empty
+
+linebreaks: NEWLINE linebreaks
+ | NEWLINE
+
+compound: linebreaks compound_stmt
+ {
+ $$ = $2;
+ }
+
+compound_stmt: statement linebreaks compound_stmt
+ {
+ $1->next = $3->compound.children;
+ $3->compound.children = $1;
+ $$ = $3;
+ }
+ | statement linebreaks
+ {
+ $$ = create_ast_node(AST_COMPOUND, &@$);
+ $$->compound.children = $1;
+ }
+ | error linebreaks compound_stmt
+ {
+ $$ = $3;
+ }
+
+statement: define
+ | ifdef
+ | ifndef
+ | include
+ | syscall
+ | struct
+ | flags
+ | decoder
+
+decoder: ":" type DECODER_SOURCE
+ {
+ $$ = create_ast_node(AST_DECODER, &@$);
+ $$->decoder.type = $2;
+ $$->decoder.decoder = $3;
+ }
+
+syscall: IDENTIFIER "(" syscall_arglist ")" syscall_return_type syscall_attribute
+ {
+ $$ = create_ast_node(AST_SYSCALL, &@$);
+ $$->syscall = (struct ast_syscall) {
+ .name = $1,
+ .args = $3,
+ .return_type = $5
+ };
+
+ struct ast_node *prev_decl = symbol_add($1, $$);
+ if (prev_decl) {
+ error_prev_decl($1, prev_decl);
+ YYERROR;
+ }
+ }
+ | IDENTIFIER "(" ")" syscall_return_type syscall_attribute
+ {
+ $$ = create_ast_node(AST_SYSCALL, &@$);
+ $$->syscall = (struct ast_syscall) {
+ .name = $1,
+ .args = NULL,
+ .return_type = $4
+ };
+
+ struct ast_node *prev_decl = symbol_add($1, $$);
+ if (prev_decl) {
+ error_prev_decl($1, prev_decl);
+ YYERROR;
+ }
+ }
+
+syscall_return_type: type
+ {
+ $$ = $1;
+ }
+ | %empty
+ {
+ $$ = create_or_get_type(NULL, "void", NULL);
+ }
+
+syscall_attribute: "(" type_options ")"
+ | %empty
+
+syscall_arglist: syscall_arg
+ {
+ $$ = $1;
+ }
+ | syscall_arg "," syscall_arglist
+ {
+ $$ = $1;
+ $1->next = $3;
+ }
+
+syscall_arg: IDENTIFIER type
+ {
+ $$ = create_ast_syscall_arg($1, $2, NULL);;
+ }
+
+type: IDENTIFIER
+ {
+ char *error = NULL;
+ $$ = create_or_get_type(&error, $1, NULL);
+ if (error) {
+ yyerror("%s", error);
+ YYERROR;
+ }
+ }
+ | IDENTIFIER "[" type_options "]"
+ {
+ char *error = NULL;
+ $$ = create_or_get_type(&error, $1, $3);
+ if (error) {
+ yyerror("%s", error);
+ YYERROR;
+ }
+ }
+
+type_options: type_option_range "," type_options
+ {
+ $$ = create_ast_type_option_list($1, $3);
+ }
+ | type_option_range
+ {
+ $$ = create_ast_type_option_list($1, NULL);
+ }
+
+type_option_range: type_option ":" type_option
+ {
+ $$ = create_type_option_range($1, $3);
+ }
+ | type_option
+ {
+ $$ = $1;
+ }
+
+type_option: type
+ {
+ $$ = create_or_get_type_option_nested($1);
+ }
+ | NUMBER
+ {
+ $$ = create_or_get_type_option_number($1);
+ }
+ | TEMPLATE_IDENTIFIER
+ {
+ $$ = create_type_template_identifier($1);
+ }
+
+define: DEFINE
+ {
+ $$ = create_ast_node(AST_DEFINE, &@$);
+ $$->define.value = $1;
+ }
+
+ifdef: IFDEF compound ENDIF
+ {
+ $$ = create_ast_node(AST_IFDEF, &@$);
+ $$->ifdef.value = $1;
+ $$->ifdef.invert = false;
+ $$->ifdef.child = $2;
+ }
+
+ifndef: IFNDEF compound ENDIF
+ {
+ $$ = create_ast_node(AST_IFDEF, &@$);
+ $$->ifdef.value = $1;
+ $$->ifdef.invert = true;
+ $$->ifdef.child = $2;
+ }
+
+include: INCLUDE
+ {
+ $$ = create_ast_node(AST_INCLUDE, &@$);
+ $$->include.value = $1;
+ }
+
+struct: IDENTIFIER "{" linebreaks struct_elements "}" struct_attr
+ {
+ $$ = create_ast_node(AST_STRUCT, &@$);
+ $$->ast_struct.name = $1;
+ $$->ast_struct.elements = $4;
+
+ struct ast_node *prev_decl = symbol_add($1, $$);
+ if (prev_decl) {
+ error_prev_decl($1, prev_decl);
+ YYERROR;
+ }
+ }
+ | IDENTIFIER "{" linebreaks "}" struct_attr
+ {
+ yyerror("struct '%s' has no members", $1);
+ $$ = NULL;
+ YYERROR;
+ }
+
+struct_elements: struct_element struct_elements
+ {
+ $$ = $1;
+ $$->next = $2;
+ }
+ | struct_element
+ {
+ $$ = $1;
+ }
+
+struct_element: IDENTIFIER type linebreaks
+ {
+ $$ = create_ast_struct_element($1, $2, NULL);
+ }
+
+struct_attr: "[" type "]"
+ | %empty
+
+flags: IDENTIFIER "=" flag_elements
+ {
+ $$ = create_ast_node(AST_FLAGS, &@$);
+ $$->flags.name = $1;
+ $$->flags.values = $3;
+
+ struct ast_node *prev_decl = symbol_add($1, $$);
+ if (prev_decl) {
+ error_prev_decl($1, prev_decl);
+ YYERROR;
+ }
+ }
+
+flag_elements: IDENTIFIER "," flag_elements
+ {
+ $$ = create_ast_flag_values($1, $3);
+ }
+ | IDENTIFIER
+ {
+ $$ = create_ast_flag_values($1, NULL);
+ }
+
+%%
+
+static void error_prev_decl(char *identifier, struct ast_node *prev)
+{
+ yyerror("Previous declaration of %s at line %d col %d", identifier,
+ prev->loc.lineno, prev->loc.colno);
+}
+
+void
+yyerror (const char* fmt, ...)
+{
+ char buffer[257] = {0};
+
+ if (yyin == NULL) {
+ return;
+ }
+
+ long int saved = ftell(yyin);
+ fseek(yyin, last_line_location, SEEK_SET);
+ fgets(buffer, 256, yyin);
+ fseek(yyin, saved, SEEK_SET);
+
+ // add a new line if necessary
+ size_t len = strlen(buffer);
+ if (len > 0 && buffer[len - 1] != '\n') {
+ buffer[len] = '\n';
+ buffer[len + 1] = '\0';
+ }
+
+ va_list args;
+ va_start(args, fmt);
+
+ fprintf(stderr, "error %d: %s: line %d column %d\n", yynerrs, cur_filename,
+ yylloc.first_line, yylloc.first_column);
+ fprintf(stderr, "\t%s", buffer);
+ fprintf(stderr, "\t%*s ", yylloc.first_column, "^");
+ vfprintf(stderr, fmt, args);
+ fprintf(stderr, "\n");
+
+ va_end(args);
+}
+
+int
+main(int argc, char **argv)
+{
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s [input file] [output file]\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if (!lexer_init_newfile(argv[1])) {
+ fprintf(stderr, "Failed to open file %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (yyparse() != 0) {
+ return EXIT_FAILURE;
+ }
+
+ if (!generate_code(argv[1], argv[2], preprocess(root))) {
+ free_ast_tree(root);
+ return EXIT_FAILURE;
+ }
+
+ free_ast_tree(root);
+
+ return EXIT_SUCCESS;
+}
diff --git a/maint/gen/preprocess.c b/maint/gen/preprocess.c
new file mode 100644
index 000000000..333b56293
--- /dev/null
+++ b/maint/gen/preprocess.c
@@ -0,0 +1,284 @@
+#include <assert.h>
+#include <ctype.h>
+#include <memory.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ast.h"
+#include "deflang.h"
+#include "symbols.h"
+#include "printf.h"
+
+#define MAX_PREPROCESSOR_NEST 16
+#define MAX_SYSCALL_COUNT 4096
+
+
+struct condition_stack {
+ size_t idx;
+ char *stack[MAX_PREPROCESSOR_NEST];
+};
+
+/*
+ * Copies (pointers to) the strings stored in the condition stack into a
+ * statement_condition struct.
+ *
+ * Returns NULL if the stack is empty (i.e. there are no conditions)
+ */
+static struct statement_condition *
+create_statement_condition(struct condition_stack *stack)
+{
+ if (stack->idx == 0) {
+ return NULL;
+ }
+ struct statement_condition *ret = xmalloc((sizeof *ret) + stack->idx * (sizeof(char *)));
+ ret->count = stack->idx;
+ memcpy(ret->values, stack->stack, stack->idx * (sizeof(char *)));
+ return ret;
+}
+
+static char *
+strip_whitespace(char *str)
+{
+ if (*str == '\0') {
+ return str;
+ }
+
+ char *end = str + strlen(str) - 1;
+
+ while (end > str && isspace(*end)) {
+ end--;
+ }
+
+ end[1] = '\0';
+
+ return str;
+}
+
+
+struct processing_state {
+ struct preprocessor_statement_list *preprocessor_head;
+ struct preprocessor_statement_list *preprocessor_tail;
+ struct decoder_list *decoder_head;
+ struct struct_def *struct_stmts;
+ struct syscall **syscall_buffer;
+ size_t syscall_index;
+};
+
+/*
+ * Splits the AST into preprocessor definitions, struct definitions, and
+ * syscall definitions while maintaining the necessary information about
+ * ifdef/ifndef conditions.
+ */
+static void
+preprocess_rec(struct ast_node *root, struct condition_stack *cur,
+ struct processing_state *state)
+{
+ if (root->type == AST_IFDEF) {
+ assert(cur->idx < MAX_PREPROCESSOR_NEST);
+ cur->stack[cur->idx] = root->ifdef.value;
+ cur->idx++;
+ preprocess_rec(root->ifdef.child, cur, state);
+ cur->idx--;
+ cur->stack[cur->idx] = NULL;
+ } else if (root->type == AST_DECODER) {
+ struct decoder_list *decoder = xmalloc(sizeof *decoder);
+ *decoder = (struct decoder_list) {
+ .decoder = {
+ .loc = root->loc,
+ .matching_type = root->decoder.type,
+ .fmt_string = strip_whitespace(root->decoder.decoder)
+ },
+ .next = state->decoder_head
+ };
+ state->decoder_head = decoder;
+ } else if (root->type == AST_DEFINE || root->type == AST_INCLUDE) {
+ struct statement_condition *conditions = create_statement_condition(cur);
+ struct preprocessor_statement_list *new = xmalloc(sizeof *new);
+
+ new->next = NULL;
+ new->stmt.conditions = conditions;
+ new->stmt.loc = root->loc;
+ if (root->type == AST_DEFINE) {
+ new->stmt.value = root->define.value;
+ } else {
+ new->stmt.value = root->include.value;
+ }
+ if (state->preprocessor_tail) {
+ state->preprocessor_tail->next = new;
+ state->preprocessor_tail = new;
+ } else {
+ state->preprocessor_head = new;
+ state->preprocessor_tail = new;
+ }
+ } else if (root->type == AST_COMPOUND) {
+ for (struct ast_node *node = root->compound.children; node != NULL; node = node->next) {
+ preprocess_rec(node, cur, state);
+ }
+ } else if (root->type == AST_SYSCALL) {
+ size_t arg_count = 0;
+ for (struct ast_syscall_arg *arg = root->syscall.args; arg != NULL; arg = arg->next) {
+ arg_count++;
+ }
+
+ struct syscall *new = xmalloc(sizeof(*new) + sizeof(struct syscall_argument) * arg_count);
+ *new = (struct syscall) {
+ .name = root->syscall.name,
+ .conditions = create_statement_condition(cur),
+ .ret = *root->syscall.return_type,
+ .arg_count = arg_count,
+ .loc = root->loc,
+ .is_ioctl = strncmp(root->syscall.name, "ioctl$", 6) == 0
+ };
+
+ size_t cur_count = 0;
+ for (struct ast_syscall_arg *arg = root->syscall.args; arg != NULL; arg = arg->next) {
+ new->args[cur_count] = (struct syscall_argument) {
+ .name = arg->name,
+ .type = arg->type
+ };
+ cur_count++;
+ }
+
+ state->syscall_buffer[state->syscall_index] = new;
+ state->syscall_index++;
+ }
+}
+
+/*
+ * Create a group of variant syscalls from a name-sorted list of syscalls.
+
+ * Returns the number of syscalls processed.
+ *
+ * For example, ["prctl" "prctl$PR_CAP_AMBIENT", "prctl$GET_FP_MODE", "ioctl"]
+ * would group together the prctl variants, store a syscall_group in out[out_idx]
+ * and returns 3.
+ */
+static size_t
+find_matching(struct syscall **syscall_buffer, size_t syscall_count,
+ struct syscall_group *out)
+{
+ struct syscall *base = syscall_buffer[0];
+ assert(base != NULL);
+
+ size_t base_name_len = strlen(base->name);
+ size_t matching = 0;
+ for (size_t i = 1; i < syscall_count; i++) {
+ struct syscall *cur = syscall_buffer[i];
+ // all variants start with the same name as the base
+ if (strncmp(cur->name, base->name, base_name_len) != 0) {
+ break;
+ }
+ // and their last '$' is immediately after the base name
+ char *last_dollar = strrchr(cur->name, '$');
+ if (last_dollar == cur->name + base_name_len) {
+ matching++;
+ }
+ }
+
+ if (matching == 0) {
+ out[0] = (struct syscall_group) {
+ .base = base,
+ .child_count = 0,
+ .children = NULL,
+ };
+ return 1;
+ }
+
+ struct syscall_group *children = xmalloc(sizeof(struct syscall_group) * matching);
+ size_t children_idx = 0;
+
+ size_t i = 1;
+ while (i < syscall_count) {
+ struct syscall *cur = syscall_buffer[i];
+ if (strncmp(cur->name, base->name, base_name_len) != 0) {
+ break;
+ }
+ char *last_dollar = strrchr(cur->name, '$');
+ if (last_dollar != cur->name + base_name_len) {
+ // not a direct subvariant
+ fprintf(stderr, "not subvariant %s -> %s \n", base->name, cur->name);
+ i += 1;
+ continue;
+ }
+ i += find_matching(syscall_buffer + i, syscall_count - i,
+ children + children_idx);
+ children_idx++;
+ }
+
+ assert(children_idx == matching);
+
+ out[0] = (struct syscall_group) {
+ .base = base,
+ .child_count = children_idx,
+ .children = children
+ };
+ return i;
+}
+
+static int
+syscall_comparator(const void *a, const void *b)
+{
+ const struct syscall *syscall_a = *(const struct syscall **) a;
+ const struct syscall *syscall_b = *(const struct syscall **) b;
+
+ return strcmp(syscall_a->name, syscall_b->name);
+}
+
+static struct syscall_group *
+group_syscall_variants(struct processing_state *state, size_t *out_count)
+{
+ // The idea is to sort the syscalls by name:
+ // "prctl" "prctl$GET_FP_MODE"
+ // "prctl$PR_CAP_AMBIENT" "prctl$PR_CAP_AMBIENT$PR_CAP_AMBIENT_LOWER"
+ // This way, every variant will immediately follow the base syscall and will
+ // be grouped into a syscall_group 'find_matching'.
+
+ qsort(state->syscall_buffer, state->syscall_index,
+ sizeof(struct syscall *), syscall_comparator);
+
+ // in the worst case (no variants), there can be MAX_SYSCALL_COUNT syscall groups
+ struct syscall_group *scratch = xcalloc(sizeof(*scratch) * MAX_SYSCALL_COUNT);
+
+ size_t groups = 0;
+ size_t i = 0;
+ while (i < state->syscall_index) {
+ i += find_matching(state->syscall_buffer + i, state->syscall_index - i, scratch + groups);
+ groups++;
+ }
+
+ struct syscall_group *ret = realloc(scratch, sizeof(*scratch) * (groups + 1));
+
+ if (ret == NULL) {
+ fprintf(stderr, "realloc failed for %zu bytes\n", sizeof(*scratch) * groups);
+ exit(1);
+ }
+
+ *out_count = groups;
+ return ret;
+}
+
+struct processed_ast *
+preprocess(struct ast_node *root)
+{
+ struct processed_ast *ret = xmalloc(sizeof *ret);
+
+ struct processing_state state = (struct processing_state) {
+ .syscall_buffer = xcalloc(sizeof(struct syscall *) * MAX_SYSCALL_COUNT),
+ .syscall_index = 0,
+ .struct_stmts = NULL,
+ .preprocessor_head = NULL,
+ .preprocessor_tail = NULL,
+ .decoder_head = NULL
+ };
+
+ struct condition_stack conditions = {.idx = 0, .stack = {0}};
+ preprocess_rec(root, &conditions, &state);
+
+ ret->preprocessor_stmts = state.preprocessor_head;
+ ret->struct_stmts = state.struct_stmts;
+ ret->syscall_groups = group_syscall_variants(&state, &ret->syscall_group_count);
+ ret->decoders = state.decoder_head;
+
+ return ret;
+}
diff --git a/maint/gen/preprocess.h b/maint/gen/preprocess.h
new file mode 100644
index 000000000..31bda8658
--- /dev/null
+++ b/maint/gen/preprocess.h
@@ -0,0 +1,110 @@
+#ifndef PREPROCESS_H
+#define PREPROCESS_H
+
+/*
+ * Stores nested #ifdef/#ifndef statements sequentially (as a stack)
+ *
+ * #ifdef test1
+ * #ifdef test2 && test3
+ * #endif
+ * #endif
+ *
+ * is stored as count = 2, ["#ifdef test1", "#ifdef test2 && test3"]
+ */
+struct statement_condition {
+ size_t count;
+ char *values[];
+};
+
+/*
+ * Stores define and include statements
+ */
+struct preprocessor_statement {
+ struct ast_loc loc;
+
+ // can be NULL
+ struct statement_condition *conditions;
+
+ char *value;
+};
+
+struct preprocessor_statement_list {
+ struct preprocessor_statement stmt;
+ struct preprocessor_statement_list *next;
+};
+
+struct struct_def {
+ struct ast_loc loc;
+
+ char *name;
+ struct statement_condition *conditions;
+ // TODO
+};
+
+struct syscall_argument {
+ char *name;
+ struct ast_type *type;
+};
+
+struct decoder {
+ struct ast_loc loc;
+
+ // the type this decoder handles
+ struct ast_type *matching_type;
+
+ // a format string containing C source code of a decoder capable of handling
+ // arguments/return values of type 'matching_type'.
+ // the first printf arg is a variable containing the value of the argument.
+ // the second printf arg is the index of the argument.
+ char *fmt_string;
+};
+
+struct decoder_list {
+ struct decoder decoder;
+ struct decoder_list *next;
+};
+
+struct syscall {
+ struct ast_loc loc;
+
+ // can be NULL
+ struct statement_condition *conditions;
+
+ // name of the syscall
+ char *name;
+
+ bool is_ioctl;
+
+ // the return value of the syscall
+ struct ast_type ret;
+
+ // the defined arguments
+ size_t arg_count;
+ struct syscall_argument args[];
+};
+
+/*
+ * A group of syscall variants.
+ *
+ * The child syscall_groups will be output first, then the base syscall
+ * will be generated.
+ */
+struct syscall_group {
+ struct syscall *base;
+
+ size_t child_count;
+ struct syscall_group *children;
+};
+
+struct processed_ast {
+ struct preprocessor_statement_list *preprocessor_stmts;
+ struct decoder_list *decoders;
+ struct struct_def *struct_stmts;
+ size_t syscall_group_count;
+ struct syscall_group *syscall_groups;
+};
+
+struct processed_ast *
+preprocess(struct ast_node *root);
+
+#endif
diff --git a/maint/gen/symbols.c b/maint/gen/symbols.c
new file mode 100644
index 000000000..bf4e1d3a0
--- /dev/null
+++ b/maint/gen/symbols.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2021 Srikavin Ramkumar <srikavinramkumar at gmail.com>
+ * Copyright (c) 2021 The strace developers.
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include <string.h>
+
+#include "deflang.h"
+#include "symbols.h"
+
+#define ARRAY_LEN(x) (sizeof(x) / sizeof((x)[0]))
+
+struct symbol_entry {
+ char *name;
+ struct ast_node *source;
+ struct symbol_entry *next;
+};
+
+struct symbol_entry *symbol_table;
+
+struct ast_node *
+symbol_get(char *name)
+{
+ for (struct symbol_entry *cur = symbol_table; cur != NULL; cur = cur->next) {
+ if (strcmp(cur->name, name) == 0) {
+ return cur->source;
+ }
+ }
+
+ return NULL;
+}
+
+struct ast_node *
+symbol_add(char *name, struct ast_node *source)
+{
+ struct ast_node *previous_def = symbol_get(name);
+ if (previous_def != NULL) {
+ return previous_def;
+ }
+
+ struct symbol_entry *entry = xmalloc(sizeof *entry);
+ *entry = (struct symbol_entry) {
+ .name = name,
+ .source = source,
+ .next = symbol_table
+ };
+
+ symbol_table = entry;
+
+ return NULL;
+}
+
+
+char *
+resolve_type(struct ast_type *out, char *name, struct ast_type_option_list *options)
+{
+ out->name = name;
+ out->options = options;
+ out->type = TYPE_BASIC;
+
+ struct {
+ char *name;
+ size_t expected_args;
+ } expected_options_len[] = {
+ {"const", 1},
+ {"ptr", 2},
+ {"ref", 1},
+ {"xor_flags", 3},
+ {"or_flags", 3},
+ };
+
+ size_t options_len = 0;
+ for (struct ast_type_option_list *cur = options; cur != NULL; cur = cur->next) {
+ if (cur->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID) {
+ return NULL;
+ }
+
+ options_len++;
+ }
+
+ for (size_t i = 0; i < ARRAY_LEN(expected_options_len); ++i) {
+ if (strcmp(name, expected_options_len[i].name) == 0) {
+ if (options_len != expected_options_len[i].expected_args) {
+ char *error = xmalloc(128);
+ snprintf(error, 128, "type '%s' expects %zu type options; got %zu",
+ name, expected_options_len[i].expected_args, options_len);
+ return error;
+ }
+ }
+ }
+
+ if (strcmp(name, "const") == 0) {
+ out->type = TYPE_CONST;
+ out->constt.value = options->option;
+ out->constt.real_type = NULL;
+ } else if (strcmp(name, "ptr") == 0) {
+ out->type = TYPE_PTR;
+ if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+ return "first type option for ptr must be 'in', 'out' or 'inout'";
+ }
+ if (strcmp(options->option->type->name, "in") == 0) {
+ out->ptr.dir = PTR_DIR_IN;
+ } else if (strcmp(options->option->type->name, "out") == 0) {
+ out->ptr.dir = PTR_DIR_OUT;
+ } else if (strcmp(options->option->type->name, "inout") == 0) {
+ out->ptr.dir = PTR_DIR_INOUT;
+ } else {
+ return "first type option for ptr must be 'in', 'out' or 'inout'";
+ }
+ out->ptr.type = options->next->option->type;
+ } else if (strcmp(name, "ref") == 0) {
+ out->type = TYPE_REF;
+ if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+ return "first type option for len must be the name of another argument or $ret";
+ }
+ if (strcmp(options->option->type->name, "@ret") == 0) {
+ out->ref.return_value = true;
+ } else {
+ out->ref.return_value = false;
+ out->ref.argname = options->option->type->name;
+ }
+ } else if (strcmp(name, "xor_flags") == 0) {
+ out->type = TYPE_XORFLAGS;
+ out->xorflags.flag_type = options->option;
+ if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+ return "first type option for ptr must be a string";
+ }
+ out->xorflags.dflt = options->next->option->type->name;
+ if (options->next->next->option->child_type != AST_TYPE_CHILD_TYPE) {
+ return "third type option for xor_flags must be the underlying flag type";
+ }
+ out->xorflags.underlying = options->next->next->option->type;
+ } else if (strcmp(name, "or_flags") == 0) {
+ out->type = TYPE_ORFLAGS;
+ out->orflags.flag_type = options->option;
+ if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+ return "first type option for ptr must be a string";
+ }
+ out->orflags.dflt = options->next->option->type->name;
+ if (options->next->next->option->child_type != AST_TYPE_CHILD_TYPE) {
+ return "third type option for or_flags must be the underlying flag type";
+ }
+ out->orflags.underlying = options->next->next->option->type;
+ }
+
+ return NULL;
+}
diff --git a/maint/gen/symbols.h b/maint/gen/symbols.h
new file mode 100644
index 000000000..1737ddccf
--- /dev/null
+++ b/maint/gen/symbols.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Srikavin Ramkumar <srikavinramkumar at gmail.com>
+ * Copyright (c) 2021 The strace developers.
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef SYMBOLS_H
+#define SYMBOLS_H
+
+#include <stdbool.h>
+
+#include "ast.h"
+
+/*
+ * Returns a error string if the given type is
+ * invalid. Otherwise, returns NULL if the type is
+ * valid.
+ */
+char *
+resolve_type(struct ast_type *out, char *name, struct ast_type_option_list *options);
+
+/*
+ * Returns NULL if successfully added a symbol.
+ * If the symbol is already defined, returns the
+ * source node for the previous definition.
+ */
+struct ast_node *
+symbol_add(char *name, struct ast_node *source);
+
+/*
+ * Gets the definition of a previously added symbol.
+ * Returns NULL if symbol is not stored.
+ */
+struct ast_node *
+symbol_get(char *name);
+
+#endif //SYMBOLS_H
--
2.25.1
More information about the Strace-devel
mailing list