[PATCH v2 1/3] maint: add a definition-based syscall decoder generator

Srikavin Ramkumar srikavinramkumar at gmail.com
Wed Sep 8 15:36:52 UTC 2021


Implement a code generation tool capable of parsing system call definitions
and generating system call decoders.

* maint/gen/.gitignore: New file.
* maint/gen/Makefile: Likewise.
* maint/gen/README.md: Likewise.
* maint/gen/ast.c: Likewise.
* maint/gen/ast.h: Likewise.
* maint/gen/codegen.c: Likewise.
* maint/gen/deflang.h: Likewise.
* maint/gen/defs/common.def: Likewise.
* maint/gen/lex.l: Likewise.
* maint/gen/parse.y: Likewise.
* maint/gen/preprocess.c: Likewise.
* maint/gen/preprocess.h: Likewise.
* maint/gen/symbols.c: Likewise.
* maint/gen/symbols.h: Likewise.
---

Changes since V1:
    * maint/gen/grammar.txt: Removed (and updated commit message).
    * maint/gen/codegen.c: Changed warning generated and modified generation of
                           variable declarations.
    * maint/gen/defs/common.syzlang: Renamed to common.def for consistency.


 maint/gen/.gitignore      |   6 +
 maint/gen/Makefile        |  16 +
 maint/gen/README.md       | 115 +++++
 maint/gen/ast.c           | 340 +++++++++++++
 maint/gen/ast.h           | 226 +++++++++
 maint/gen/codegen.c       | 992 ++++++++++++++++++++++++++++++++++++++
 maint/gen/deflang.h       |  29 ++
 maint/gen/defs/common.def |  49 ++
 maint/gen/lex.l           | 262 ++++++++++
 maint/gen/parse.y         | 383 +++++++++++++++
 maint/gen/preprocess.c    | 284 +++++++++++
 maint/gen/preprocess.h    | 110 +++++
 maint/gen/symbols.c       | 150 ++++++
 maint/gen/symbols.h       |  39 ++
 14 files changed, 3001 insertions(+)
 create mode 100644 maint/gen/.gitignore
 create mode 100644 maint/gen/Makefile
 create mode 100644 maint/gen/README.md
 create mode 100644 maint/gen/ast.c
 create mode 100644 maint/gen/ast.h
 create mode 100644 maint/gen/codegen.c
 create mode 100644 maint/gen/deflang.h
 create mode 100644 maint/gen/defs/common.def
 create mode 100644 maint/gen/lex.l
 create mode 100644 maint/gen/parse.y
 create mode 100644 maint/gen/preprocess.c
 create mode 100644 maint/gen/preprocess.h
 create mode 100644 maint/gen/symbols.c
 create mode 100644 maint/gen/symbols.h

diff --git a/maint/gen/.gitignore b/maint/gen/.gitignore
new file mode 100644
index 000000000..d690f4c72
--- /dev/null
+++ b/maint/gen/.gitignore
@@ -0,0 +1,6 @@
+lex.yy.c
+parse.tab.c
+parse.tab.h
+parse
+parse.output
+/gen
diff --git a/maint/gen/Makefile b/maint/gen/Makefile
new file mode 100644
index 000000000..85d35d20d
--- /dev/null
+++ b/maint/gen/Makefile
@@ -0,0 +1,16 @@
+CFLAGS += -ggdb -std=gnu99 -Wall -Wextra
+
+all: gen
+
+gen: parse.tab.o lex.yy.o ast.o codegen.o symbols.o parse.tab.h lex.yy.c preprocess.o
+	$(CC) $(CFLAGS) parse.tab.o lex.yy.o ast.o codegen.o symbols.o preprocess.o -o ./gen
+
+lex.yy.c: lex.l parse.tab.h
+	flex lex.l
+
+parse.tab.c parse.tab.h: parse.y
+	bison -d parse.y
+
+clean:
+	rm -f lex.yy.o ast.o parse.tab.o codegen.o preprocess.o symbols.o
+	rm -f gen parse.tab.c parse.tab.h lex.yy.c lex.yy.h
diff --git a/maint/gen/README.md b/maint/gen/README.md
new file mode 100644
index 000000000..5f33db864
--- /dev/null
+++ b/maint/gen/README.md
@@ -0,0 +1,115 @@
+Syscall Definitions
+====
+
+This syscall definition language is based on the [syzkaller description language](https://github.com/google/syzkaller/blob/master/docs/syscall_descriptions.md).
+
+All non-syscall statements maintain their relative ordering and are placed
+before syscall statements in the generated C code.
+
+## Syntax
+
+### Types
+
+Types have the following format `type_name[type_option]`.
+The `type_name` can include alphanumeric characters and `$_`.
+The `type_option` can be another type or a number.
+
+Numbers can be specified as a decimal number (`65`), as a hex number (`0x41`), or as a character constant (`'A'`).
+
+The default types are the following:
+ * standard C types: `void`, `int`, `char`, `long`, `uint`, `ulong`, `longlong`, `ulonglong`, `double`, `float`
+ * `stddef.h` types: `size_t`, `ssize_t`, ...
+ * `stdint.h` types: `uint8_t`, `int8_t`, `uint64_t`, `int64_t`, ...
+ * kernel types: `kernel_long_t`, `kernel_ulong_t`, ...
+ * `fd`: A file descriptor
+ * `tid`: A thread id
+ * `string`: A null terminated char buffer
+ * `path` A null terminated path string
+ * `stringnoz[n]`: A non-null terminated char buffer of length `n`
+ * `const[x]`: A constant of value `x` that inherits its parent type
+ * `const[x:y]`: A constant with a value between `x` and `y` (inclusive) that inherits its parent type
+ * `ptr[dir, typ]`: A pointer to object of type `typ`; direction can be `in`, `out`, `inout`
+ * `ref[argname]`: A reference to the value of another parameter with name `argname` or `@ret`
+ * `xor_flags[xlat_name, ???, underlying_typ]`: A integer type (`underlying_typ`)
+    containing mutually exclusive flags with xlat symbol name `xlat_name`
+ * `or_flags[xlat_name, ???, underlying_typ]`: A integer type (`underlying_typ`)
+    containing flags that are ORed together with xlat symbol name `xlat_name`
+
+Constants (`const`) can only be used within variant syscalls.
+
+### Syscalls
+Syscall definitions have the format
+```
+syscall_name (arg_type1 arg_name1, arg_type2 arg_name2, ...) return_type
+```
+
+The `return_type` is optional if no special printing mode is needed.
+
+Some system calls have various modes of operations. Consider the `fcntl` syscall.
+Its second parameter determines the types of the remaining arguments. To
+handle this, a variant syscall definition can be used:
+```
+fcntl(filedes fd, cmd xor_flags[fcntl_cmds, F_???, kernel_ulong_t], arg kernel_ulong_t) kernel_ulong_t
+fcntl$F_DUPFD(filedes fd, cmd const[F_DUPFD], arg kernel_ulong_t) fd
+fcntl$F_DUPFD_CLOEXEC(filedes fd, cmd const[F_DUPFD_CLOEXEC], arg kernel_ulong_t) fd
+...
+```
+
+The `$` character is used to indicate that a syscall is a variant of another one.
+The `const` parameters of a variant syscall will be used to determine which
+variant to use. If no variant syscalls match, the base syscall will be used.
+
+### Custom Decoders
+
+Custom decoders have the format
+```
+:type[argname, arg2[$3], $1] %{
+    do_something(tcp, $$, $1);
+%}
+```
+
+The type following the `:` indicates which type this decoder should apply to.
+Template variables (`$` followed by 1 or more numbers) can be used to reference
+the value of a type option. These variables can be used within the body of the
+custom decoder and will be substituted with the resolved value.
+
+The special `$$` variable refers to the root argument.
+
+For example, the syscall `example(arg1 type[test, type2[5], 1]` would have the
+following decoder for the arg1 parameter:
+```
+do_something(tcp, tcp->u_arg[1], 1);
+```
+
+### #import
+
+Import statements have the format
+```
+#import "filename.def"
+```
+
+The contents of the `filename.def` will be treated as if they were placed in the current file.
+
+### #ifdef/#ifndef
+
+Ifdef, ifndef statements have the format
+```
+#ifdef condition
+#ifndef condition
+#endif
+#endif
+```
+
+Ifdef, ifndef, and define statements will be included as-is in the generated output.
+Unlike C, these cannot be placed in the middle of another statement.
+
+### define/include
+
+Include and define statements have the format
+```
+define DEBUG 1
+include "filename.h"
+include <filename.h>
+```
+
+The contents of include and define statements will be included as-is in the generated output.
diff --git a/maint/gen/ast.c b/maint/gen/ast.c
new file mode 100644
index 000000000..ebab08538
--- /dev/null
+++ b/maint/gen/ast.c
@@ -0,0 +1,340 @@
+#include "ast.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "deflang.h"
+#include "symbols.h"
+#include "parse.tab.h"
+
+void *
+xmalloc(size_t n)
+{
+	void *ret = malloc(n);
+
+	if (!ret) {
+		fprintf(stderr, "allocation failed\n");
+		exit(EXIT_FAILURE);
+	}
+
+	return ret;
+}
+
+void *
+xcalloc(size_t n)
+{
+	void *ret = calloc(1, n);
+
+	if (!ret) {
+		fprintf(stderr, "allocation failed\n");
+		exit(EXIT_FAILURE);
+	}
+
+	return ret;
+}
+
+struct ast_node *
+create_ast_node(enum ast_node_type type, void *loc)
+{
+	struct ast_node *node = xmalloc(sizeof *node);
+	*node = (struct ast_node) {
+		.type = type,
+		.loc = {
+			.lineno = ((YYLTYPE *) loc)->first_line,
+			.colno = ((YYLTYPE *) loc)->first_column,
+			.file = strdup(cur_filename)
+		},
+		.next = NULL
+	};
+	return node;
+}
+
+struct ast_type_option_list *
+create_ast_type_option_list(struct ast_type_option *cur, struct ast_type_option_list *next)
+{
+	struct ast_type_option_list *list = xmalloc(sizeof *list);
+	*list = (struct ast_type_option_list) {
+		.next = next,
+		.option = cur
+	};
+	return list;
+}
+
+struct ast_syscall_arg *
+create_ast_syscall_arg(char *name, struct ast_type *type, struct ast_syscall_arg *next)
+{
+	struct ast_syscall_arg *arg = xmalloc(sizeof *arg);
+	*arg = (struct ast_syscall_arg) {
+		.name = name,
+		.type = type,
+		.next = next
+	};
+	return arg;
+}
+
+struct ast_flag_values *
+create_ast_flag_values(char *name, struct ast_flag_values *next)
+{
+	struct ast_flag_values *arg = xmalloc(sizeof *arg);
+	*arg = (struct ast_flag_values) {
+		.name = name,
+		.next = next
+	};
+	return arg;
+}
+
+struct ast_struct_element *
+create_ast_struct_element(char *name, struct ast_type *type, struct ast_struct_element *next)
+{
+	struct ast_struct_element *struct_element = xmalloc(sizeof *struct_element);
+	*struct_element = (struct ast_struct_element) {
+		.name = name,
+		.type = type,
+		.next = next
+	};
+	return struct_element;
+}
+
+struct known_type {
+	struct ast_type type;
+	struct known_type *next;
+};
+
+static struct known_type *known_types = NULL;
+
+struct known_type_option {
+	struct ast_type_option type_option;
+	struct known_type_option *next;
+};
+
+static struct known_type_option *known_type_options = NULL;
+
+static bool
+compare_type_option_list(struct ast_type_option_list *a, struct ast_type_option_list *b,
+						 bool match_templates)
+{
+	struct ast_type_option_list *cur_a = a;
+	struct ast_type_option_list *cur_b = b;
+
+	while (cur_a != NULL) {
+		if (cur_b == NULL) {
+			return false;
+		}
+
+		if (cur_a->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID ||
+			cur_b->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID) {
+			if (match_templates) {
+				// templates are able to match all other type options
+				cur_a = cur_a->next;
+				cur_b = cur_b->next;
+				continue;
+			}
+			return false;
+		}
+
+		if (cur_a->option->child_type != cur_b->option->child_type) {
+			return false;
+		}
+
+		if (cur_a->option->child_type == AST_TYPE_CHILD_NUMBER &&
+			(cur_a->option->number.val != cur_b->option->number.val)) {
+			return false;
+		}
+
+		if (cur_a->option->child_type == AST_TYPE_CHILD_TYPE) {
+			if (!(strcmp(cur_a->option->type->name, cur_b->option->type->name) == 0 &&
+				  compare_type_option_list(cur_a->option->type->options,
+										   cur_b->option->type->options, match_templates))) {
+				return false;
+			}
+		}
+
+		cur_a = cur_a->next;
+		cur_b = cur_b->next;
+	}
+
+	if (cur_b != NULL) {
+		return false;
+	}
+
+	return true;
+}
+
+bool
+ast_type_matching(struct ast_type *a, struct ast_type *b)
+{
+	return strcmp(a->name, b->name) == 0 && compare_type_option_list(a->options, b->options, true);
+}
+
+struct ast_type *
+create_or_get_type(char **error, char *name, struct ast_type_option_list *options)
+{
+	// check if we've seen this type before
+	for (struct known_type *cur = known_types; cur != NULL; cur = cur->next) {
+		if (strcmp(cur->type.name, name) == 0 &&
+			compare_type_option_list(cur->type.options, options, false)) {
+			return &cur->type;
+		}
+	}
+
+	// allocate a new type
+	struct known_type *type = xmalloc(sizeof *type);
+
+	char *status = resolve_type(&type->type, name, options);
+	type->next = known_types;
+
+	if (error) {
+		*error = status;
+	}
+
+	if (status != NULL) {
+		free(type);
+		return NULL;
+	}
+
+	known_types = type;
+
+	return &type->type;
+}
+
+struct ast_type_option *
+create_or_get_type_option_number(struct ast_number number)
+{
+	// check if we've seen this type option before
+	for (struct known_type_option *cur = known_type_options; cur != NULL; cur = cur->next) {
+		if (cur->type_option.child_type == AST_TYPE_CHILD_NUMBER &&
+			cur->type_option.number.val == number.val) {
+			return &cur->type_option;
+		}
+	}
+
+	// allocate a new type option
+	struct known_type_option *option = xmalloc(sizeof *option);
+	*option = (struct known_type_option) {
+		.type_option = {
+			.child_type = AST_TYPE_CHILD_NUMBER,
+			.number = number
+		},
+		.next = known_type_options
+	};
+
+	known_type_options = option;
+
+	return &option->type_option;
+}
+
+struct ast_type_option *
+create_type_template_identifier(struct ast_number number)
+{
+	struct ast_type_option *option = xmalloc(sizeof *option);
+	*option = (struct ast_type_option) {
+		.child_type = AST_TYPE_CHILD_TEMPLATE_ID,
+		.template = {
+			.id = number.val
+		}
+	};
+
+	return option;
+}
+
+struct ast_type_option *
+create_or_get_type_option_nested(struct ast_type *child)
+{
+	// check if we've seen this type option before
+	for (struct known_type_option *cur = known_type_options; cur != NULL; cur = cur->next) {
+		// since all types are allocated by create_or_get_type,
+		// types that are equal have the same address
+		if (cur->type_option.child_type == AST_TYPE_CHILD_TYPE && cur->type_option.type == child) {
+			return &cur->type_option;
+		}
+	}
+
+	// allocate a new type option
+	struct known_type_option *option = xmalloc(sizeof *option);
+	*option = (struct known_type_option) {
+		.type_option = {
+			.child_type = AST_TYPE_CHILD_TYPE,
+			.type = child
+		},
+		.next = known_type_options
+	};
+
+	known_type_options = option;
+
+	return &option->type_option;
+}
+
+struct ast_type_option *
+create_type_option_range(struct ast_type_option *min, struct ast_type_option *max)
+{
+	struct ast_type_option *ret = xmalloc(sizeof *ret);
+	*ret = (struct ast_type_option) {
+		.child_type = AST_TYPE_CHILD_RANGE,
+		.range = {
+			.min = min,
+			.max = max
+		}
+	};
+	return ret;
+}
+
+
+void
+free_ast_tree(struct ast_node *root)
+{
+	switch (root->type) {
+		case AST_IFDEF:
+			free(root->ifdef.value);
+			break;
+		case AST_DEFINE:
+			free(root->define.value);
+			break;
+		case AST_INCLUDE:
+			free(root->include.value);
+			break;
+		case AST_STRUCT: {
+			struct ast_struct_element *cur = root->ast_struct.elements;
+			while (cur != NULL) {
+				struct ast_struct_element *tmp = cur;
+				cur = tmp->next;
+				free(tmp->name);
+				free(tmp);
+			}
+			break;
+		}
+		case AST_COMPOUND: {
+			struct ast_node *cur = root->compound.children;
+			while (cur != NULL) {
+				struct ast_node *tmp = cur;
+				cur = tmp->next;
+				free_ast_tree(tmp);
+			}
+			break;
+		}
+		case AST_SYSCALL: {
+			struct ast_syscall_arg *cur = root->syscall.args;
+			while (cur != NULL) {
+				struct ast_syscall_arg *tmp = cur;
+				cur = tmp->next;
+				free(tmp->name);
+				free(tmp);
+			}
+			break;
+		}
+		case AST_FLAGS: {
+			struct ast_flag_values *cur = root->flags.values;
+			while (cur != NULL) {
+				struct ast_flag_values *tmp = cur;
+				cur = tmp->next;
+				free(tmp->name);
+				free(tmp);
+			}
+			break;
+		}
+		default:
+			break;
+	}
+
+	free(root);
+}
diff --git a/maint/gen/ast.h b/maint/gen/ast.h
new file mode 100644
index 000000000..2a5ee9805
--- /dev/null
+++ b/maint/gen/ast.h
@@ -0,0 +1,226 @@
+#ifndef AST_H
+#define AST_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct ast_number {
+	char *raw;
+	intmax_t val;
+};
+
+enum ast_node_type {
+	AST_IFDEF,
+	AST_SYSCALL,
+	AST_DEFINE,
+	AST_INCLUDE,
+	AST_COMPOUND,
+	AST_STRUCT,
+	AST_DECODER,
+	AST_FLAGS
+};
+
+struct ast_struct {
+	char *name;
+	struct ast_struct_element *elements;
+};
+
+struct ast_struct_element {
+	char *name;
+	struct ast_type *type;
+	struct ast_struct_element *next;
+};
+
+struct ast_syscall {
+	char *name;
+	struct ast_syscall_arg *args;
+	struct ast_type *return_type;
+};
+
+struct ast_syscall_arg {
+	char *name;
+	struct ast_type *type;
+	struct ast_syscall_arg *next;
+};
+
+enum standard_types {
+	// non-special type
+	TYPE_BASIC,
+	// const[typ, val]
+	TYPE_CONST,
+	// ptr[dir, typ]
+	TYPE_PTR,
+	// ref[argname]
+	TYPE_REF,
+	// xorflags[flag_typ]
+	TYPE_XORFLAGS,
+	// orflags[flag_typ]
+	TYPE_ORFLAGS
+};
+
+#define IS_IN_PTR(x) ((x)->type == TYPE_PTR && \
+((x)->ptr.dir == PTR_DIR_INOUT || (x)->ptr.dir == PTR_DIR_IN))
+
+#define IS_OUT_PTR(x) ((x)->type == TYPE_PTR && \
+((x)->ptr.dir == PTR_DIR_INOUT || (x)->ptr.dir == PTR_DIR_OUT))
+
+#define IS_INOUT_PTR(x) ((x)->type == TYPE_PTR && (x)->ptr.dir == PTR_DIR_INOUT)
+
+enum ptr_dir {
+	PTR_DIR_IN,
+	PTR_DIR_OUT,
+	PTR_DIR_INOUT,
+};
+
+struct ast_type {
+	enum standard_types type;
+	char *name;
+	struct ast_type_option_list *options;
+	union {
+		struct {
+			struct ast_type_option *len;
+		} stringnoz;
+		struct {
+			struct ast_type_option *value;
+			struct ast_type *real_type;
+		} constt;
+		struct {
+			enum ptr_dir dir;
+			struct ast_type *type;
+		} ptr;
+		struct {
+			struct ast_type_option *type;
+			struct ast_type_option *len;
+		} array;
+		struct {
+			bool return_value;
+			// only set if return_value is false
+			char *argname;
+		} ref;
+		struct {
+			struct ast_type_option *flag_type;
+			char *dflt;
+			struct ast_type *underlying;
+		} xorflags;
+		struct {
+			struct ast_type_option *flag_type;
+			char *dflt;
+			struct ast_type *underlying;
+		} orflags;
+	};
+};
+
+struct ast_type_option_list {
+	struct ast_type_option *option;
+	struct ast_type_option_list *next;
+};
+
+enum ast_type_option_child {
+	AST_TYPE_CHILD_RANGE,
+	AST_TYPE_CHILD_NUMBER,
+	AST_TYPE_CHILD_TYPE,
+	AST_TYPE_CHILD_TEMPLATE_ID
+};
+
+struct ast_type_option {
+	enum ast_type_option_child child_type;
+	union {
+		struct ast_type *type;
+		struct ast_number number;
+		struct {
+			struct ast_type_option *min;
+			struct ast_type_option *max;
+		} range;
+		struct {
+			intmax_t id;
+		} template;
+	};
+};
+
+struct ast_flag_values {
+	char *name;
+	struct ast_flag_values *next;
+};
+
+struct ast_loc {
+	char *file;
+	int lineno;
+	int colno;
+};
+
+struct ast_node {
+	enum ast_node_type type;
+	struct ast_loc loc;
+
+	// used when this node's parent is AST_COMPOUND
+	struct ast_node *next;
+
+	union {
+		struct ast_syscall syscall;
+		struct ast_struct ast_struct;
+		struct {
+			char *value;
+			bool invert;
+			struct ast_node *child;
+		} ifdef;
+		struct {
+			char *value;
+		} include;
+		struct {
+			char *value;
+		} define;
+		struct {
+			struct ast_node *children;
+		} compound;
+		struct {
+			char *name;
+			struct ast_flag_values *values;
+		} flags;
+		struct {
+			struct ast_type *type;
+			char *decoder;
+		} decoder;
+	};
+};
+
+struct ast_node *
+create_ast_node(enum ast_node_type type, void *location);
+
+struct ast_type_option_list *
+create_ast_type_option_list(struct ast_type_option *cur, struct ast_type_option_list *next);
+
+struct ast_struct_element *
+create_ast_struct_element(char *name, struct ast_type *type, struct ast_struct_element *next);
+
+struct ast_syscall_arg *
+create_ast_syscall_arg(char *name, struct ast_type *type, struct ast_syscall_arg *next);
+
+struct ast_flag_values *
+create_ast_flag_values(char *name, struct ast_flag_values *next);
+
+// returns true if two types are equal; false otherwise
+bool
+ast_type_matching(struct ast_type *a, struct ast_type *b);
+
+/*
+ * On error, returns NULL and sets an error string to error.
+ */
+struct ast_type *
+create_or_get_type(char **error, char *name, struct ast_type_option_list *options);
+
+struct ast_type_option *
+create_or_get_type_option_number(struct ast_number number);
+
+struct ast_type_option *
+create_or_get_type_option_nested(struct ast_type *child);
+
+struct ast_type_option *
+create_type_option_range(struct ast_type_option *min, struct ast_type_option *max);
+
+struct ast_type_option *
+create_type_template_identifier(struct ast_number number);
+
+void
+free_ast_tree(struct ast_node *root);
+
+#endif
diff --git a/maint/gen/codegen.c b/maint/gen/codegen.c
new file mode 100644
index 000000000..05dfb08a5
--- /dev/null
+++ b/maint/gen/codegen.c
@@ -0,0 +1,992 @@
+#include <assert.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ast.h"
+#include "deflang.h"
+#include "symbols.h"
+
+struct {
+	char *name;
+	char *ctype;
+} basic_types[] = {
+	{"uchar", "unsigned char"},
+	{"ushort", "unsigned short"},
+	{"uint", "unsigned int"},
+	{"ulong", "unsigned long"},
+	{"longlong", "long long"},
+	{"ulonglong", "unsigned long long"},
+	{"longdouble", "long double"},
+	{"string", "char"},
+	{"path", "char"},
+	{"size", "kernel_size_t"},
+	{"size_t", "kernel_size_t"},
+	{"gid", "gid_t"}
+};
+
+char *signed_int_types[] = {
+	"char",
+	"short",
+	"int",
+	"long",
+	"longlong",
+	"kernel_long_t",
+	"ssize_t"
+};
+
+char *unsigned_int_types[] = {
+	"uchar",
+	"ushort",
+	"uint",
+	"ulong",
+	"ulonglong",
+	"kernel_ulong_t",
+	"size_t",
+	"size"
+};
+
+static struct decoder_list *decoders = NULL;
+
+#define VARIANT_FUNC_NAME_LEN 64
+#define SYSCALL_RET_FLAG_LEN 64
+#define SYSCALL_ARG_STR_LEN 16
+#define DECODER_PROTOTYPE_LEN 128
+
+#define ARRAY_LEN(x) (sizeof(x) / sizeof((x)[0]))
+
+/* convenience macros */
+
+#define OUTFI(...) outf_indent(indent_level, out, __VA_ARGS__)
+
+#define OUTF(...) outf(out, __VA_ARGS__)
+
+#define OUTC(c) outc(out, c)
+
+#define OUTSI(s) outs_indent(indent_level, out, s)
+
+#define OUTS(s) outs(out, s)
+
+static void
+outf_indent(int indent_level, FILE *out, const char *fmt,
+			...) __attribute__((format(printf, 3, 4)));
+
+static void
+outf(FILE *out, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+
+static void
+outc(FILE *out, int c)
+{
+	fputc(c, out);
+}
+
+static void
+outs(FILE *out, const char *s)
+{
+	fputs(s, out);
+}
+
+static void
+indent(FILE *out, int indent)
+{
+	for (int i = 0; i < indent; ++i) {
+		outc(out, '\t');
+	}
+}
+
+static void
+outs_indent(int indent_level, FILE *out, const char *s)
+{
+	indent(out, indent_level);
+	fprintf(out, "%s", s);
+}
+
+static void
+outf(FILE *out, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+
+	vfprintf(out, fmt, args);
+
+	va_end(args);
+}
+
+static void
+outf_indent(int indent_level, FILE *out, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+
+	indent(out, indent_level);
+	vfprintf(out, fmt, args);
+
+	va_end(args);
+}
+
+static void
+log_warning(char *fmt, struct ast_loc node, ...)
+{
+	va_list args;
+	va_start(args, node);
+
+	fprintf(stderr, "Codegen Warning: ");
+	fprintf(stderr, "line %d, col %d: ", node.lineno, node.colno);
+
+	vfprintf(stderr, fmt, args);
+
+	fprintf(stderr, "\n");
+
+	va_end(args);
+}
+
+static bool
+is_signed_integer_typename(const char *name)
+{
+	for (size_t i = 0; i < ARRAY_LEN(signed_int_types); ++i) {
+		if (strcmp(signed_int_types[i], name) == 0) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool
+is_unsigned_integer_typename(const char *name)
+{
+	for (size_t i = 0; i < ARRAY_LEN(unsigned_int_types); ++i) {
+		if (strcmp(unsigned_int_types[i], name) == 0) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Stores a string referring to the i-th argument in the current syscall.
+ */
+static void
+get_syscall_arg_value(char out[static SYSCALL_ARG_STR_LEN], struct syscall *syscall, size_t i)
+{
+	if (syscall->is_ioctl) {
+		if (i >= 1 && i <= 2) {
+			const char *ioctl_args[3] = {"", "code", "arg"};
+			snprintf(out, SYSCALL_ARG_STR_LEN, "%s", ioctl_args[i]);
+			return;
+		}
+
+		log_warning("ioctl decoder referenced OOB argument %zu", syscall->loc, i);
+	}
+	snprintf(out, SYSCALL_ARG_STR_LEN, "tcp->u_arg[%zu]", i);
+}
+
+/*
+ * Stores a string referring to the return value of the current syscall.
+ */
+static void
+get_syscall_ret_value(char out[static SYSCALL_ARG_STR_LEN])
+{
+	snprintf(out, SYSCALL_ARG_STR_LEN, "tcp->u_rval");
+}
+
+/*
+ * Converts a string containing the C equivalent of a given type.
+ */
+static char *
+type_to_ctype(const struct ast_type *type)
+{
+	if (type->type == TYPE_BASIC) {
+		for (size_t i = 0; i < ARRAY_LEN(basic_types); ++i) {
+			if (strcmp(type->name, basic_types[i].name) == 0) {
+				return basic_types[i].ctype;
+			}
+		}
+
+		struct ast_node *def = symbol_get(type->name);
+		if (def != NULL && def->type == AST_STRUCT) {
+			size_t len = sizeof("struct ") + strlen(type->name);
+			char *ret = xmalloc(len);
+			snprintf(ret, len, "struct %s", type->name);
+			return ret;
+		}
+
+		return type->name;
+	}
+
+	if (type->type == TYPE_PTR) {
+		char *underlying = type_to_ctype(type->ptr.type);
+
+		size_t len = strlen(underlying) + sizeof(" *");
+		char *ret = xmalloc(len);
+		snprintf(ret, len, "%s *", underlying);
+		return ret;
+	}
+
+	if (type->type == TYPE_XORFLAGS) {
+		return type_to_ctype(type->xorflags.underlying);
+	}
+
+	if (type->type == TYPE_ORFLAGS) {
+		return type_to_ctype(type->orflags.underlying);
+	}
+
+	return type->name;
+}
+
+static char *
+type_variable_declaration(const struct ast_type *type, const char *var_name) {
+	if (type->type == TYPE_BASIC && strcmp(type->name, "array") == 0) {
+		if (type->options != NULL && type->options->next != NULL) {
+			struct ast_type_option *underlying_type = type->options->option;
+			struct ast_type_option *member_count_type = type->options->next->option;
+			if (underlying_type->child_type == AST_TYPE_CHILD_TYPE &&
+				member_count_type->child_type == AST_TYPE_CHILD_NUMBER) {
+				char *underlying = type_to_ctype(underlying_type->type);
+
+				size_t len = strlen(underlying) + sizeof("() [];") + strlen(var_name) + strlen(member_count_type->number.raw);
+				char *ret = xmalloc(len);
+				snprintf(ret, len, "%s %s[%s];", underlying, var_name, member_count_type->number.raw);
+
+				return ret;
+			}
+		}
+	}
+
+	char *underlying = type_to_ctype(type);
+
+	size_t len = strlen(underlying) + strlen(var_name) + sizeof(" ;");
+	char *ret = xmalloc(len);
+	snprintf(ret, len, "%s %s;", underlying, var_name);
+
+	return ret;
+
+}
+/*
+ * Get flags to return from a SYS_FUNC.
+ */
+static void
+get_sys_func_return_flags(char out[static SYSCALL_RET_FLAG_LEN], struct ast_type *type,
+						  bool is_ioctl)
+{
+	struct {
+		char *type;
+		char *flag;
+	} flags[] = {
+		{"fd", "RVAL_FD"},
+		{"tid", "RVAL_TID"},
+		{"sid", "RVAL_SID"},
+		{"tgid", "RVAL_TGID"},
+		{"pgid", "RVAL_PGID"}
+	};
+
+	char *base = "RVAL_DECODED";
+	if (is_ioctl) {
+		base = "RVAL_IOCTL_DECODED";
+	}
+
+	char *following = NULL;
+	for (size_t i = 0; i < ARRAY_LEN(flags); ++i) {
+		if (strcmp(flags[i].type, type->name) == 0) {
+			following = flags[i].flag;
+			break;
+		}
+	}
+
+	if (following) {
+		snprintf(out, SYSCALL_RET_FLAG_LEN, "%s | %s", base, following);
+	} else {
+		snprintf(out, SYSCALL_RET_FLAG_LEN, "%s", base);
+	}
+}
+
+/*
+ * Resolves a type option to a concrete value.
+ *
+ * For example, const[PATH_MAX] is resolved to PATH_MAX
+ * and const[ref[argname]] is resolved to tcp->u_arg[2]
+ * (where argname is the name of the 3rd syscall argument).
+ *
+ * The specified type option MUST NOT be a range or a template id.
+ */
+static char *
+resolve_type_option_to_value(struct syscall *syscall, struct ast_type_option *option)
+{
+	assert(option->child_type != AST_TYPE_CHILD_RANGE &&
+		   option->child_type != AST_TYPE_CHILD_TEMPLATE_ID);
+
+	if (option->child_type == AST_TYPE_CHILD_NUMBER) {
+		// return the number exactly as specified in the source file
+		return option->number.raw;
+	} else if (option->child_type == AST_TYPE_CHILD_TYPE) {
+		if (option->type->type == TYPE_REF) {
+			// identify which argument is being referred to
+
+			// syscall return value
+			if (option->type->ref.return_value) {
+				char *ret = xmalloc(SYSCALL_ARG_STR_LEN);
+				get_syscall_ret_value(ret);
+				return ret;
+			}
+
+			// find syscall argument by name
+			bool found = false;
+			size_t index = 0;
+
+			for (; index < syscall->arg_count; ++index) {
+				if (strcmp(option->type->ref.argname, syscall->args[index].name) == 0) {
+					found = true;
+					break;
+				}
+			}
+
+			if (found) {
+				char *ret = xmalloc(SYSCALL_ARG_STR_LEN);
+				get_syscall_arg_value(ret, syscall, index);
+				return ret;
+			}
+
+			log_warning("Failed to resolve 'ref' type with value \"%s\" to argument",
+						syscall->loc, option->type->ref.argname);
+			return "#error FAILED TO RESOLVE REF TYPE TO VALUE";
+		} else {
+			// assume the given value is a constant or from a #define
+			return option->type->name;
+		}
+	}
+
+	assert(false);
+}
+
+/*
+ * Stores the value of a given variable using set_tcb_priv_data.
+ */
+static void
+store_single_value(FILE *out, struct ast_type *type, char *arg, int indent_level)
+{
+	OUTFI("{\n");
+	indent_level++;
+
+	OUTFI("%s\n", type_variable_declaration(type->ptr.type, "tmp_var"));
+	OUTFI("if (!umove_or_printaddr(tcp, %s, &tmp_var)) {\n", arg);
+	indent_level++;
+
+	OUTFI("void *tmp_buffer = xmalloc(sizeof(%s));\n", "tmp_var");
+	OUTFI("memcpy(tmp_buffer, tmp_var, sizeof(%s));\n", "tmp_var");
+	OUTFI("set_tcb_priv_data(tcp, tmp_buffer, free);\n");
+
+	indent_level--;
+	OUTFI("}\n");
+
+	indent_level--;
+	OUTFI("}\n");
+}
+
+static void
+generate_printer(FILE *out, struct syscall *syscall, const char *argname,
+				 const char *arg, bool entering,
+				 struct ast_type *type, int indent_level);
+
+static void
+generate_printer_ptr(FILE *out, struct syscall *syscall, const char *argname,
+					 const char *arg, bool entering,
+					 struct ast_type *type, int indent_level)
+{
+	struct ast_type *underlying = type->ptr.type;
+
+	// copy from target memory and use decoder for resulting value
+	char var_name[32];
+	snprintf(var_name, 32, "tmpvar_%s", argname);
+
+	if ((IS_IN_PTR(type) && entering) || (IS_OUT_PTR(type) && !entering)) {
+		OUTFI("%s\n", type_variable_declaration(type->ptr.type, var_name));
+		OUTFI("if (!umove_or_printaddr(tcp, %s, &%s)) {\n",
+			  arg, var_name);
+		indent_level++;
+
+		OUTFI("tprint_indirect_begin();\n");
+		generate_printer(out, syscall, argname, var_name, entering,
+						 type->ptr.type, indent_level);
+		OUTFI("tprint_indirect_end();\n");
+
+		indent_level--;
+		OUTSI("}\n");
+	}
+}
+
+static void
+generate_templated_printer(FILE *out, struct syscall *syscall,
+						   const char *arg, struct ast_type *arg_type,
+						   struct decoder templated_decoder)
+{
+	struct {
+		char *value;
+		intmax_t template_id;
+	} substitutions[256];
+	int subs_pos = 0;
+
+	// Do a DFS over the template type to find substitution markers
+	struct dfs_stack_entry {
+		struct ast_type *template;
+		struct ast_type *actual;
+	};
+
+	struct dfs_stack_entry dfs_stack[128] = {0};
+	int stack_ptr = 0;
+
+	dfs_stack[stack_ptr] = (struct dfs_stack_entry) {
+		.template = templated_decoder.matching_type,
+		.actual = arg_type
+	};
+	stack_ptr++;
+
+	while (stack_ptr != 0) {
+		stack_ptr--;
+		struct dfs_stack_entry entry = dfs_stack[stack_ptr];
+
+		if (entry.actual == NULL || entry.template == NULL) {
+			continue;
+		}
+
+		if (strcmp(entry.actual->name, entry.template->name) != 0) {
+			continue;
+		}
+
+		struct ast_type_option_list *template_option = entry.template->options;
+		struct ast_type_option_list *actual_option = entry.actual->options;
+		for (; actual_option != NULL && template_option != NULL;
+			   actual_option = actual_option->next, template_option = template_option->next) {
+			if (template_option->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID) {
+				substitutions[subs_pos].value = resolve_type_option_to_value(syscall,
+																			 actual_option->option);
+				substitutions[subs_pos].template_id = template_option->option->template.id;
+				subs_pos++;
+				continue;
+			}
+
+			if (actual_option->option->child_type != template_option->option->child_type) {
+				break;
+			}
+
+			if (template_option->option->child_type == AST_TYPE_CHILD_TYPE) {
+				dfs_stack[stack_ptr] = (struct dfs_stack_entry) {
+					.template = template_option->option->type,
+					.actual = actual_option->option->type
+				};
+				stack_ptr++;
+			}
+		}
+	}
+
+	// Output the template string and replace substitution markers with real values
+	const char *template = templated_decoder.fmt_string;
+	size_t template_len = strlen(template);
+
+	intmax_t cur = 0;
+	bool in_template_number = false;
+	for (size_t i = 0; i < template_len; ++i) {
+		if (template[i] == '$' && (template[i + 1] == '$')) {
+			OUTF("(%s)", arg);
+			i++;
+			continue;
+		}
+
+		if (template[i] == '$' && (isdigit(template[i + 1]))) {
+			cur = 0;
+			in_template_number = true;
+			continue;
+		}
+
+		if (!in_template_number) {
+			OUTC(template[i]);
+			continue;
+		}
+
+		if (isdigit(template[i])) {
+			cur = cur * 10 + (template[i] - '0');
+		}
+
+		if (!isdigit(template[i]) || i == template_len - 1) {
+			in_template_number = false;
+
+			int found = -1;
+			// find matching substitution
+			for (int j = 0; j < subs_pos; ++j) {
+				if (substitutions[j].template_id == cur) {
+					found = j;
+					break;
+				}
+			}
+
+			if (found == -1) {
+				log_warning("Template variable $%" PRIdMAX " could not be resolved!",
+							syscall->loc, cur);
+				continue;
+			}
+
+			OUTF("(%s)", substitutions[found].value);
+
+			if (i != template_len - 1) {
+				OUTC(template[i]);
+			}
+		}
+	}
+}
+
+/*
+ * Outputs a call to a function/macro to print out arg with the given type.
+ */
+static void
+generate_printer(FILE *out, struct syscall *syscall,
+				 const char *argname, const char *arg, bool entering,
+				 struct ast_type *type, int indent_level)
+{
+	for (struct decoder_list *cur = decoders; cur != NULL; cur = cur->next) {
+		if (ast_type_matching(cur->decoder.matching_type, type)) {
+			OUTFI("/* using decoder from %s:%d:%d */\n", cur->decoder.loc.file,
+				  cur->decoder.loc.lineno, cur->decoder.loc.colno);
+			generate_templated_printer(out, syscall, arg, type, cur->decoder);
+			OUTC('\n');
+			return;
+		}
+	}
+
+	if (type->type == TYPE_BASIC) {
+		if (is_signed_integer_typename(type->name)) {
+			OUTFI("PRINT_VAL_D((%s) %s);\n", type_to_ctype(type), arg);
+			return;
+		} else if (is_unsigned_integer_typename(type->name)) {
+			OUTFI("PRINT_VAL_U((%s) %s);\n", type_to_ctype(type), arg);
+			return;
+		}
+
+		log_warning("No known printer for basic type %s", syscall->loc, type->name);
+		outf_indent(indent_level, out, "#error UNHANDLED BASIC TYPE: %s\n", type->name);
+	} else if (type->type == TYPE_PTR) {
+		generate_printer_ptr(out, syscall, argname, arg, entering, type, indent_level);
+	} else if (type->type == TYPE_ORFLAGS) {
+		OUTFI("printflags(%s, %s, \"%s\");\n", type->orflags.flag_type->type->name, arg,
+			  type->orflags.dflt);
+	} else if (type->type == TYPE_XORFLAGS) {
+		OUTFI("printxval(%s, %s, \"%s\");\n", type->xorflags.flag_type->type->name, arg,
+			  type->xorflags.dflt);
+	} else if (strcmp(type->name, "stringnoz") == 0 || strcmp(type->name, "string") == 0) {
+		log_warning("Type '%s' should be wrapped in a ptr type to indicate direction",
+					syscall->loc, type->name);
+	} else if (type->type == TYPE_CONST) {
+		if (!type->constt.real_type) {
+			log_warning("Const type (%s) has no matching parent syscall argument.", syscall->loc,
+						argname);
+			return;
+		}
+		OUTFI("/* inherited parent type (%s) */\n", type_to_ctype(type->constt.real_type));
+		generate_printer(out, syscall, argname, arg, entering,
+						 type->constt.real_type, indent_level);
+	} else {
+		log_warning("Type '%s' is currently unhandled", syscall->loc, type->name);
+		outf_indent(indent_level, out, "#error UNHANDLED TYPE: %s\n", type->name);
+	}
+}
+
+static void
+generate_return_flags(FILE *out, struct syscall *syscall, int indent_level)
+{
+	struct ast_type ret = syscall->ret;
+	if (ret.type == TYPE_ORFLAGS) {
+		OUTFI("tcp->auxstr = sprintflags(\"%s\", %s, (kernel_ulong_t) tcp->u_rval);\n",
+			  ret.orflags.dflt, ret.orflags.flag_type->type->name);
+		OUTFI("return RVAL_STR;\n");
+	} else if (ret.type == TYPE_XORFLAGS) {
+		OUTFI("tcp->auxstr = xlookup(%s, (kernel_ulong_t) tcp->u_rval);\n",
+			  ret.xorflags.flag_type->type->name);
+		OUTFI("return RVAL_STR;\n");
+	} else {
+		char flags[SYSCALL_RET_FLAG_LEN];
+		get_sys_func_return_flags(flags, &ret, syscall->is_ioctl);
+		OUTFI("return %s;\n", flags);
+	}
+}
+
+/*
+ * Transforms a variant syscall name (like fcntl$F_DUPFD) to a valid C function
+ * name (like var_fcntl_F_DUPFD).
+ *
+ * The is_leaf parameter should be set if corresponding syscall is a leaf node,
+ * i.e. has no sub syscalls.
+ */
+static void
+get_variant_function_name(char out[static VARIANT_FUNC_NAME_LEN], char *variant_name, bool is_leaf)
+{
+	snprintf(out, VARIANT_FUNC_NAME_LEN, "var_%s%s", is_leaf ? "leaf_" : "", variant_name);
+	for (int i = 0; i < VARIANT_FUNC_NAME_LEN; ++i) {
+		if (out[i] == '\0') {
+			break;
+		}
+		if (out[i] == '$') {
+			out[i] = '_';
+		}
+	}
+}
+
+/*
+ * Output the start of any preprocessor conditions.
+ *
+ * For example:
+ * #ifdef linux
+ */
+void
+out_statement_condition_start(FILE *out, struct statement_condition *condition)
+{
+	if (condition == NULL) {
+		return;
+	}
+	for (size_t i = 0; i < condition->count; ++i) {
+		OUTF("%s\n", condition->values[i]);
+	}
+}
+
+/*
+ * Output the end of the specified preprocessor conditions.
+ *
+ * For example:
+ * #endif
+ */
+void
+out_statement_condition_end(FILE *out, struct statement_condition *condition)
+{
+	if (condition == NULL) {
+		return;
+	}
+	for (size_t i = 0; i < condition->count; ++i) {
+		OUTS("#endif\n\n");
+	}
+}
+
+static void
+get_decoder_prototype(char out[static DECODER_PROTOTYPE_LEN], bool internal,
+					  struct syscall *syscall, char *func_name)
+{
+	snprintf(out, DECODER_PROTOTYPE_LEN, "%sint\n"
+										 "%s(struct tcb *tcp%s)\n",
+			 internal ? "static " : "",
+			 func_name,
+			 syscall->is_ioctl ? ", unsigned int code, kernel_ulong_t arg" : "");
+}
+
+/*
+ * Prints out a decoder for the given system call.
+ */
+static void
+generate_decoder(FILE *out, struct syscall *syscall, bool is_variant, bool ioctl_fallback)
+{
+	int indent_level = 0;
+
+	out_statement_condition_start(out, syscall->conditions);
+
+	int arg_offset = 0;
+	int arg_index = 0;
+
+	if (syscall->is_ioctl) {
+		// no need to decode code, or arg for ioctl variant syscalls
+		arg_offset = 2;
+		arg_index = 2;
+	}
+
+	// determine which strategy to use depending on how many OUT ptrs there are
+	size_t out_ptrs = 0;
+	for (size_t i = arg_offset; i < syscall->arg_count; i++) {
+		if (IS_OUT_PTR(syscall->args[i].type)) {
+			out_ptrs++;
+		}
+	}
+
+	// output function declaration
+	if (is_variant) {
+		char func_name[VARIANT_FUNC_NAME_LEN];
+		get_variant_function_name(func_name, syscall->name, true);
+
+		char decoder_prototype[DECODER_PROTOTYPE_LEN];
+		get_decoder_prototype(decoder_prototype, true, syscall, func_name);
+
+		OUTSI(decoder_prototype);
+	} else {
+		OUTFI("SYS_FUNC(%s)\n", syscall->name);
+	}
+	OUTSI("{\n");
+	indent_level++;
+
+	if (syscall->is_ioctl && ioctl_fallback) {
+		OUTSI("return RVAL_DECODED;\n");
+		indent_level--;
+		OUTSI("}\n");
+		return;
+	}
+
+	char arg_val[SYSCALL_ARG_STR_LEN];
+
+	if (out_ptrs == 0) {
+		if (syscall->is_ioctl) {
+			OUTFI("tprint_arg_next();\n");
+		}
+
+		// 0 out ptrs: print all args in sysenter
+		for (size_t i = arg_offset; i < syscall->arg_count; i++) {
+			struct syscall_argument arg = syscall->args[i];
+			OUTFI("/* arg: %s (%s) */\n", arg.name, type_to_ctype(arg.type));
+			get_syscall_arg_value(arg_val, syscall, arg_index++);
+
+			generate_printer(out, syscall, arg.name, arg_val, true, arg.type,
+							 indent_level);
+
+			if (i < syscall->arg_count - 1) {
+				OUTSI("tprint_arg_next();\n");
+			}
+			OUTC('\n');
+		}
+	} else if (out_ptrs == 1) {
+		// == 1 out ptrs: print args until the out ptr in sysenter, rest in sysexit
+		size_t cur = arg_offset;
+
+		OUTSI("if (entering(tcp)) {\n");
+		indent_level++;
+
+		if (syscall->is_ioctl) {
+			OUTFI("tprint_arg_next();\n");
+		}
+
+		for (; cur < syscall->arg_count; ++cur) {
+			struct syscall_argument arg = syscall->args[cur];
+			if (IS_OUT_PTR(arg.type)) {
+				break;
+			}
+
+			OUTFI("/* arg: %s (%s) */\n", arg.name, type_to_ctype(arg.type));
+			get_syscall_arg_value(arg_val, syscall, arg_index++);
+
+			generate_printer(out, syscall, arg.name, arg_val, true, arg.type,
+							 indent_level);
+
+			if (cur < syscall->arg_count - 1) {
+				OUTSI("tprint_arg_next();\n\n");
+			}
+		}
+
+		if (cur < syscall->arg_count && IS_INOUT_PTR(syscall->args[cur].type)) {
+			get_syscall_arg_value(arg_val, syscall, cur);
+			store_single_value(out, syscall->args[cur].type, arg_val, indent_level);
+		}
+
+		OUTSI("return 0;\n");
+		indent_level--;
+		OUTSI("}\n");
+
+		if (cur < syscall->arg_count && IS_INOUT_PTR(syscall->args[cur].type)) {
+			// TODO: compare the current value with the previous value
+			//		 and print only if changed
+		}
+
+		for (; cur < syscall->arg_count; ++cur) {
+			struct syscall_argument arg = syscall->args[cur];
+			OUTFI("/* arg: %s (%s) */\n", arg.name, type_to_ctype(arg.type));
+			get_syscall_arg_value(arg_val, syscall, arg_index++);
+
+			if (IS_INOUT_PTR(syscall->args[cur].type)) {
+				generate_printer(out, syscall, arg.name, "get_tcb_priv_data(tcp)", false, arg.type,
+								 indent_level);
+			}
+
+			generate_printer(out, syscall, arg.name, arg_val, false, arg.type,
+							 indent_level);
+
+			if (cur < syscall->arg_count - 1) {
+				OUTSI("tprint_arg_next();\n");
+			}
+			OUTC('\n');
+		}
+	} else {
+		// TODO: > 1 out ptrs; store necessary ptr values using set_tcb_priv_data
+		OUTSI("#error TODO\n");
+	}
+
+	generate_return_flags(out, syscall, indent_level);
+
+	indent_level--;
+	OUTSI("}\n");
+
+	out_statement_condition_end(out, syscall->conditions);
+}
+
+/*
+ * Write out the specified #define statements.
+ */
+void
+output_defines(FILE *out, struct preprocessor_statement_list *defines)
+{
+	struct preprocessor_statement_list *cur = defines;
+	while (cur != NULL) {
+		out_statement_condition_start(out, cur->stmt.conditions);
+		OUTF("#%s\n", cur->stmt.value);
+		out_statement_condition_end(out, cur->stmt.conditions);
+		cur = cur->next;
+	}
+}
+
+/*
+ * Outputs a function which delegates to the child syscalls based on the
+ * values of the child's const-typed arguments.
+ *
+ * The is_variant flag indicates whether the group's base syscall is a child of
+ * a variant syscall itself.
+ */
+void
+output_variant_syscall_group(FILE *out, struct syscall_group *group, bool is_variant)
+{
+	int indent_level = 0;
+	if (is_variant) {
+		// variant system call
+		char func_name[VARIANT_FUNC_NAME_LEN];
+		get_variant_function_name(func_name, group->base->name, false);
+
+		char decoder_prototype[DECODER_PROTOTYPE_LEN];
+		get_decoder_prototype(decoder_prototype, false, group->base, func_name);
+
+		OUTSI(decoder_prototype);
+	} else {
+		// base system call
+		OUTFI("SYS_FUNC(%s) {\n", group->base->name);
+	}
+	OUTSI("{\n");
+	indent_level++;
+
+	OUTSI("");
+	for (size_t child = 0; child < group->child_count; child++) {
+		struct syscall_group *cur_child_grp = &group->children[child];
+		struct syscall *cur_child = cur_child_grp->base;
+
+		out_statement_condition_start(out, cur_child->conditions);
+
+		OUTS("if (");
+
+		bool first = true;
+		for (size_t arg_idx = 0; arg_idx < cur_child->arg_count; ++arg_idx) {
+			struct syscall_argument arg = cur_child->args[arg_idx];
+
+			if (arg.type->type != TYPE_CONST) {
+				continue;
+			}
+
+			if (first) {
+				first = false;
+			} else {
+				OUTS(" && ");
+			}
+
+			char arg_str[SYSCALL_ARG_STR_LEN];
+			get_syscall_arg_value(arg_str, cur_child, arg_idx);
+
+			if (arg.type->constt.value->child_type == AST_TYPE_CHILD_RANGE) {
+				OUTF("((%s) <= (%s) && (%s) <= (%s))", arg_str,
+					 resolve_type_option_to_value(cur_child, arg.type->constt.value->range.min),
+					 arg_str,
+					 resolve_type_option_to_value(cur_child, arg.type->constt.value->range.max)
+				);
+			} else {
+				OUTF("(%s) == (%s)",
+					 arg_str,
+					 resolve_type_option_to_value(cur_child, arg.type->constt.value));
+			}
+		}
+		OUTS(") {\n");
+
+		indent_level++;
+
+		char func_name[VARIANT_FUNC_NAME_LEN];
+		get_variant_function_name(func_name, cur_child->name, cur_child_grp->child_count == 0);
+		OUTFI("return %s(tcp%s);\n", func_name, cur_child->is_ioctl ? ", code, arg" : "");
+
+		indent_level--;
+		OUTSI("} else ");
+	}
+
+	OUTS("{\n");
+	indent_level++;
+
+	char func_name[VARIANT_FUNC_NAME_LEN];
+	get_variant_function_name(func_name, group->base->name, true);
+	OUTFI("return %s(tcp%s);\n", func_name, group->base->is_ioctl ? ", code, arg" : "");
+
+	indent_level--;
+	OUTSI("}\n");
+
+	indent_level--;
+	OUTSI("}\n");
+}
+
+/*
+ * Outputs a syscall group and syscall variants.
+ */
+void
+output_syscall_groups(FILE *out, struct syscall_group *groups,
+					  size_t group_count, struct syscall_group *parent)
+{
+	for (size_t i = 0; i < group_count; ++i) {
+		struct syscall_group *cur = &groups[i];
+
+		if (parent) {
+			// store the real type of const parameters based on their parent
+			for (size_t j = 0; j < cur->base->arg_count && j < parent->base->arg_count; ++j) {
+				struct syscall_argument *cur_arg = &cur->base->args[j];
+				struct syscall_argument *parent_arg = &parent->base->args[j];
+				if (cur_arg->type->type == TYPE_CONST) {
+					if (parent_arg->type->type == TYPE_CONST) {
+						cur_arg->type->constt.real_type = parent_arg->type->constt.real_type;
+					} else {
+						cur_arg->type->constt.real_type = parent_arg->type;
+					}
+				}
+			}
+		}
+
+		if (groups[i].child_count == 0) {
+			generate_decoder(out, groups[i].base, parent != NULL, false);
+			continue;
+		}
+
+		output_syscall_groups(out, groups[i].children, groups[i].child_count, &groups[i]);
+
+		if (strcmp(groups[i].base->name, "ioctl") != 0) {
+			generate_decoder(out, groups[i].base, true, true);
+
+			output_variant_syscall_group(out, &groups[i], parent != NULL);
+		}
+	}
+}
+
+bool
+generate_code(const char *in_filename, const char *out_filename, struct processed_ast *ast)
+{
+	FILE *out = fopen(out_filename, "w");
+
+	if (out == NULL) {
+		return false;
+	}
+
+	outf(out, "/* Generated by ./maint/gen/generate.sh from ./maint/gen/%s; do not edit. */\n\n", in_filename);
+	outf(out, "%s",
+		 "#include <stddef.h>\n"
+		 "#include \"generated.h\"\n\n"
+		 "typedef kernel_ulong_t kernel_size_t;\n\n"
+	);
+
+	decoders = ast->decoders;
+
+	output_defines(out, ast->preprocessor_stmts);
+	output_syscall_groups(out, ast->syscall_groups, ast->syscall_group_count, NULL);
+
+	fclose(out);
+
+	return true;
+}
diff --git a/maint/gen/deflang.h b/maint/gen/deflang.h
new file mode 100644
index 000000000..260b58260
--- /dev/null
+++ b/maint/gen/deflang.h
@@ -0,0 +1,29 @@
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "ast.h"
+#include "preprocess.h"
+
+extern int yylineno;
+extern FILE *yyin;
+
+extern int last_line_location;
+extern char *cur_filename;
+
+void *
+xmalloc(size_t n);
+
+void *
+xcalloc(size_t n);
+
+extern int
+yylex_destroy(void);
+
+bool
+lexer_init_newfile(char *filename);
+
+void
+yyerror(const char *s, ...) __attribute__ ((format (printf, 1, 2)));
+
+bool
+generate_code(const char *in_filename, const char *out_filename, struct processed_ast *ast);
\ No newline at end of file
diff --git a/maint/gen/defs/common.def b/maint/gen/defs/common.def
new file mode 100644
index 000000000..24322e9c1
--- /dev/null
+++ b/maint/gen/defs/common.def
@@ -0,0 +1,49 @@
+:fd %{ printfd(tcp, $$); %}
+:uid %{ printuid($$); %}
+:gid %{ printuid($$); %}
+
+:ptr[$9, stringnoz[$1]] %{
+	if (entering(tcp)) {
+		printstrn(tcp, $$, $1);
+	} else if (syserror(tcp)) {
+		printaddr($$);
+	} else {
+		printstrn(tcp, $$, $1);
+	}
+%}
+
+:ptr[$9, string] %{
+	if (entering(tcp)) {
+		printstr(tcp, $$);
+	} else if (syserror(tcp)) {
+		printaddr($$);
+	} else {
+		printstr(tcp, $$);
+	}
+%}
+
+:path %{
+	if (entering(tcp)) {
+		printpath(tcp, $$);
+	} else if (syserror(tcp)) {
+		printaddr($$);
+	} else {
+		printpath(tcp, $$);
+	}
+%}
+
+/* ptr[in, array[uint32_t, $length]] */
+:ptr[in, array[uint32_t, $1]] %{
+	{
+		uint32_t int_buffer;
+		print_array(tcp, $$, $1, &int_buffer, sizeof(int_buffer), tfetch_mem, print_uint32_array_member, 0);
+	}
+%}
+
+:ptr[out, ulong] %{
+	if (exiting(tcp)) {
+		printnum_ulong(tcp, $$);
+	}
+%}
+
+ioctl(fd fd, code kernel_ulong_t, arg kernel_ulong_t)
diff --git a/maint/gen/lex.l b/maint/gen/lex.l
new file mode 100644
index 000000000..3e25a6145
--- /dev/null
+++ b/maint/gen/lex.l
@@ -0,0 +1,262 @@
+%option noyywrap yylineno nodefault warn
+/* %option debug */
+
+%{
+#include <stdio.h>
+#include <stdint.h>
+#include "deflang.h"
+#include "ast.h"
+#include "parse.tab.h"
+
+YYSTYPE yylval;
+YYLTYPE yylloc;
+
+static void
+update_yylloc();
+
+#define YY_USER_ACTION update_yylloc();
+
+#define MAX_IMPORT_LEVEL 10
+
+struct saved_import_state {
+	YYLTYPE location;
+	char *filename;
+	int cur_location;
+	int last_line_location;
+};
+
+// a stack to store state before an import
+static struct saved_import_state import_states[MAX_IMPORT_LEVEL];
+// the current index into import_states
+static int import_level = 0;
+
+char *cur_filename;
+
+static int cur_location;
+
+int last_line_location;
+%}
+
+%x COMMENT_MULTI
+%x COMMENT_LINE
+%x IMPORT
+
+%%
+"," return T_COMMA;
+"(" return T_LPAREN;
+")" return T_RPAREN;
+"[" return T_LBRACKET;
+"]" return T_RBRACKET;
+"{" return T_LCURLY;
+"}" return T_RCURLY;
+"=" return T_EQUALS;
+":" return T_COLON;
+
+(-)?"0x"[0-9A-Fa-f]+ {
+	yylval.number.raw = strdup(yytext);
+	yylval.number.val = strtol(yytext, NULL, 16);
+	return T_NUMBER;
+}
+
+(-)?[0-9]+ {
+	yylval.number.raw = strdup(yytext);
+	yylval.number.val = strtol(yytext, NULL, 10);
+	return T_NUMBER;
+}
+
+(-)?"0b"[01]+ {
+	int sign = (yytext[0] == '-') ? -1 : +1;
+	int offset = ((sign == -1) ? sizeof("-0b") : sizeof("0b")) - 1;
+
+	// binary literals are supported in C by GNU extension
+	yylval.number.raw = strdup(yytext);
+	yylval.number.val = sign * strtol(yytext + offset, NULL, 2);
+	return T_NUMBER;
+}
+
+\'.\' {
+	yylval.number.raw = strdup(yytext);
+	yylval.number.val = yytext[1];
+	return T_NUMBER;
+}
+
+\$[0-9]+ {
+	yylval.number.val = strtol(yytext + 1, NULL, 10);
+	yylval.number.raw = NULL;
+	return T_TEMPLATE_IDENTIFIER;
+}
+
+[A-Za-z_@\?][A-Za-z0-9_\?\$]* {
+	if (yytext[0] == '@' && strcmp(yytext, "@ret") != 0) {
+		yyerror("@ can only be used in @ret");
+		yyterminate();
+	}
+	yylval.str = strdup(yytext);
+	return T_IDENTIFIER;
+}
+
+(?x: "%{" ( [^%] | %+ [^}] )* %* "%}" ) {
+	yylval.str = strdup(yytext + 2);
+	yylval.str[strlen(yylval.str) - 2] ='\0';
+
+	return T_DECODER_SOURCE;
+}
+
+"define".+ {
+	yylval.str = strdup(yytext);
+	return T_DEFINE;
+}
+"#ifdef".+ {
+	yylval.str = strdup(yytext);
+	return T_IFDEF;
+}
+"#ifndef".+ {
+	yylval.str = strdup(yytext);
+	return T_IFNDEF;
+}
+"include".+ {
+	yylval.str = strdup(yytext);
+	return T_INCLUDE;
+}
+"#endif".* {
+	return T_ENDIF;
+}
+
+"#import \"" {
+	BEGIN(IMPORT);
+}
+<IMPORT>[^\n\"]+ {
+	if (import_level >= MAX_IMPORT_LEVEL) {
+		fprintf(stderr, "imports are nested more than %d levels\n", MAX_IMPORT_LEVEL);
+		yyterminate();
+	}
+
+	// eat characters until newline
+	int c = input();
+	cur_location++;
+	while(c && c != '\n'){
+		cur_location++;
+		c = input();
+	}
+	// update current location
+	yylloc.last_line++;
+	yylloc.last_column = 1;
+	last_line_location = cur_location;
+
+	// save current state
+	import_states[import_level++] = (struct saved_import_state) {
+		.filename = cur_filename,
+		.location = yylloc,
+		.cur_location = cur_location,
+		.last_line_location = last_line_location
+	};
+
+	cur_filename = strdup(yytext);
+
+	yylloc = (struct YYLTYPE) {1, 1, 1, 1};
+	cur_location = 0;
+	last_line_location = 0;
+
+	yyin = fopen(yytext, "r");
+
+	if (yyin == NULL) {
+		fprintf(stderr, "failed to import file '%s' on line %d\n", yytext, yylineno);
+		yyterminate();
+	}
+
+	yypush_buffer_state(yy_create_buffer(yyin, YY_BUF_SIZE));
+	BEGIN(INITIAL);
+}
+
+<<EOF>> {
+	// emit a newline at the end of a file before EOF
+	// to ensure the last statement in the file is terminated
+	static int emitted_newline;
+
+	if (!emitted_newline) {
+		emitted_newline = 1;
+		return T_NEWLINE;
+	}
+
+	emitted_newline = 0;
+
+	if (import_level > 0) {
+		free(cur_filename);
+
+		struct saved_import_state saved = import_states[--import_level];
+
+		cur_filename = saved.filename;
+		cur_location = saved.cur_location;
+		last_line_location = saved.last_line_location;
+		yylloc = saved.location;
+	}
+	yypop_buffer_state();
+	if (!YY_CURRENT_BUFFER) {
+		yyterminate();
+	}
+}
+
+"/*" {
+	BEGIN(COMMENT_MULTI);
+}
+<COMMENT_MULTI>"*/" {
+	BEGIN(INITIAL);
+}
+
+"//"|"#" {
+	BEGIN(COMMENT_LINE);
+}
+<COMMENT_LINE>\n {
+	BEGIN(INITIAL);
+}
+
+<COMMENT_LINE,COMMENT_MULTI>.|\n {}
+
+[ \t\r] {}
+
+\n {
+	return T_NEWLINE;
+}
+
+. {
+	yyerror("unexpected character: %s", yytext);
+	yyterminate();
+}
+
+%%
+
+static void
+update_yylloc()
+{
+	yylloc.first_line = yylloc.last_line;
+	yylloc.first_column = yylloc.last_column;
+
+	int i = 0;
+	while (yytext[i] != '\0') {
+		cur_location++;
+		if (yytext[i] == '\n') {
+			yylloc.last_line++;
+			yylloc.last_column = 1;
+			last_line_location = cur_location;
+		} else {
+			yylloc.last_column++;
+		}
+		i++;
+	}
+}
+
+bool
+lexer_init_newfile(char *filename)
+{
+	// clean up internal state managed by flex
+	yylex_destroy();
+
+	yyin = fopen(filename, "r");
+	if (yyin == NULL) {
+		return false;
+	}
+
+	cur_filename = filename;
+
+	return true;
+}
diff --git a/maint/gen/parse.y b/maint/gen/parse.y
new file mode 100644
index 000000000..23b773153
--- /dev/null
+++ b/maint/gen/parse.y
@@ -0,0 +1,383 @@
+%define api.token.prefix {T_}
+%define parse.lac full
+%define parse.error detailed
+
+%locations
+
+%code requires {
+#include "deflang.h"
+#include "ast.h"
+}
+
+%{
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "symbols.h"
+
+static struct ast_node *root;
+
+static void error_prev_decl(char *identifier, struct ast_node *prev);
+%}
+
+%union {
+	char* str;
+	struct ast_number number;
+
+	struct ast_node *node;
+	struct ast_type *type;
+	struct ast_type_option *type_option;
+	struct ast_type_option_list *type_option_list;
+	struct ast_syscall_arg *syscall_arg;
+	struct ast_struct_element *struct_element;
+	struct ast_flag_values *flag_values;
+}
+
+%token NEWLINE
+%token LPAREN "("
+%token RPAREN ")"
+%token LBRACKET "["
+%token RBRACKET "]"
+%token LCURLY "{"
+%token RCURLY "}"
+%token COMMA ","
+%token EQUALS "="
+%token COLON ":"
+%token <str> DEFINE "#define"
+%token <str> IFDEF "#ifdef"
+%token ENDIF "#endif"
+%token <str> IFNDEF "#ifndef"
+%token <str> INCLUDE "include"
+%token <str> IDENTIFIER
+%token <number> TEMPLATE_IDENTIFIER
+%token <number> NUMBER
+%token <str> DECODER_SOURCE
+
+%type <node> compound compound_stmt statement decoder define ifdef ifndef include syscall struct flags
+%type <type> type syscall_return_type
+%type <type_option_list> type_options
+%type <type_option> type_option type_option_range
+%type <syscall_arg> syscall_arglist syscall_arg
+%type <struct_element> struct_element struct_elements
+%type <flag_values> flag_elements
+
+%destructor { free($$); } <str>
+%destructor { free($$.raw); } <number>
+%destructor { free_ast_tree($$); } <node>
+
+%start start
+
+%%
+
+start: opt_linebreak compound_stmt
+		{
+			root = $2;
+		}
+
+opt_linebreak: linebreaks | %empty
+
+linebreaks: NEWLINE linebreaks
+	| NEWLINE
+
+compound: linebreaks compound_stmt
+		{
+			$$ = $2;
+		}
+
+compound_stmt: statement linebreaks compound_stmt
+		{
+			$1->next = $3->compound.children;
+			$3->compound.children = $1;
+			$$ = $3;
+		}
+	| statement linebreaks
+		{
+			$$ = create_ast_node(AST_COMPOUND, &@$);
+			$$->compound.children = $1;
+		}
+	| error linebreaks compound_stmt
+		{
+			$$ = $3;
+		}
+
+statement: define
+	| ifdef
+	| ifndef
+	| include
+	| syscall
+	| struct
+	| flags
+	| decoder
+
+decoder: ":" type DECODER_SOURCE
+		{
+			$$ = create_ast_node(AST_DECODER, &@$);
+			$$->decoder.type = $2;
+			$$->decoder.decoder = $3;
+		}
+
+syscall: IDENTIFIER "(" syscall_arglist ")" syscall_return_type syscall_attribute
+		{
+			$$ = create_ast_node(AST_SYSCALL, &@$);
+			$$->syscall = (struct ast_syscall) {
+				.name = $1,
+				.args = $3,
+				.return_type = $5
+			};
+
+			struct ast_node *prev_decl = symbol_add($1, $$);
+			if (prev_decl) {
+				error_prev_decl($1, prev_decl);
+				YYERROR;
+			}
+		}
+	| IDENTIFIER "(" ")" syscall_return_type syscall_attribute
+			{
+				$$ = create_ast_node(AST_SYSCALL, &@$);
+				$$->syscall = (struct ast_syscall) {
+					.name = $1,
+					.args = NULL,
+					.return_type = $4
+				};
+
+				struct ast_node *prev_decl = symbol_add($1, $$);
+				if (prev_decl) {
+					error_prev_decl($1, prev_decl);
+					YYERROR;
+				}
+			}
+
+syscall_return_type: type
+		{
+			$$ = $1;
+		}
+	| %empty
+		{
+			$$ = create_or_get_type(NULL, "void", NULL);
+		}
+
+syscall_attribute: "(" type_options ")"
+	| %empty
+
+syscall_arglist: syscall_arg
+		{
+			$$ = $1;
+		}
+	| syscall_arg "," syscall_arglist
+		{
+			$$ = $1;
+			$1->next = $3;
+		}
+
+syscall_arg: IDENTIFIER type
+		{
+			$$ = create_ast_syscall_arg($1, $2, NULL);;
+		}
+
+type: IDENTIFIER
+		{
+			char *error = NULL;
+			$$ = create_or_get_type(&error, $1, NULL);
+			if (error) {
+				yyerror("%s", error);
+				YYERROR;
+			}
+		}
+	| IDENTIFIER "[" type_options "]"
+		{
+			char *error = NULL;
+			$$ = create_or_get_type(&error, $1, $3);
+			if (error) {
+				yyerror("%s", error);
+				YYERROR;
+			}
+		}
+
+type_options: type_option_range "," type_options
+		{
+			$$ = create_ast_type_option_list($1, $3);
+		}
+	| type_option_range
+		{
+			$$ = create_ast_type_option_list($1, NULL);
+		}
+
+type_option_range: type_option ":" type_option
+		{
+			$$ = create_type_option_range($1, $3);
+		}
+	| type_option
+		{
+			$$ = $1;
+		}
+
+type_option: type
+		{
+			$$ = create_or_get_type_option_nested($1);
+		}
+	| NUMBER
+		{
+			$$ = create_or_get_type_option_number($1);
+		}
+	| TEMPLATE_IDENTIFIER
+		{
+			$$ = create_type_template_identifier($1);
+		}
+
+define: DEFINE
+		{
+		   $$ = create_ast_node(AST_DEFINE, &@$);
+		   $$->define.value = $1;
+		}
+
+ifdef: IFDEF compound ENDIF
+		{
+			$$ = create_ast_node(AST_IFDEF, &@$);
+			$$->ifdef.value = $1;
+			$$->ifdef.invert = false;
+			$$->ifdef.child = $2;
+		}
+
+ifndef: IFNDEF compound ENDIF
+		{
+			$$ = create_ast_node(AST_IFDEF, &@$);
+			$$->ifdef.value = $1;
+			$$->ifdef.invert = true;
+			$$->ifdef.child = $2;
+		}
+
+include: INCLUDE
+		{
+			$$ = create_ast_node(AST_INCLUDE, &@$);
+			$$->include.value = $1;
+		}
+
+struct: IDENTIFIER "{" linebreaks struct_elements "}" struct_attr
+		{
+			$$ = create_ast_node(AST_STRUCT, &@$);
+			$$->ast_struct.name = $1;
+			$$->ast_struct.elements = $4;
+
+			struct ast_node *prev_decl = symbol_add($1, $$);
+			if (prev_decl) {
+				error_prev_decl($1, prev_decl);
+				YYERROR;
+			}
+		}
+	| IDENTIFIER "{" linebreaks "}" struct_attr
+		{
+			yyerror("struct '%s' has no members", $1);
+			$$ = NULL;
+			YYERROR;
+		}
+
+struct_elements: struct_element struct_elements
+		{
+			$$ = $1;
+			$$->next = $2;
+		}
+	| struct_element
+		{
+			$$ = $1;
+		}
+
+struct_element: IDENTIFIER type linebreaks
+		{
+			$$ = create_ast_struct_element($1, $2, NULL);
+		}
+
+struct_attr: "[" type "]"
+	| %empty
+
+flags: IDENTIFIER "=" flag_elements
+		{
+			$$ = create_ast_node(AST_FLAGS, &@$);
+			$$->flags.name = $1;
+			$$->flags.values = $3;
+
+			struct ast_node *prev_decl = symbol_add($1, $$);
+			if (prev_decl) {
+				error_prev_decl($1, prev_decl);
+				YYERROR;
+			}
+		}
+
+flag_elements: IDENTIFIER "," flag_elements
+		{
+			$$ = create_ast_flag_values($1, $3);
+		}
+	| IDENTIFIER
+		{
+			$$ = create_ast_flag_values($1, NULL);
+		}
+
+%%
+
+static void error_prev_decl(char *identifier, struct ast_node *prev)
+{
+	yyerror("Previous declaration of %s at line %d col %d", identifier,
+			prev->loc.lineno, prev->loc.colno);
+}
+
+void
+yyerror (const char* fmt, ...)
+{
+	char buffer[257] = {0};
+
+	if (yyin == NULL) {
+		return;
+	}
+
+	long int saved = ftell(yyin);
+	fseek(yyin, last_line_location, SEEK_SET);
+	fgets(buffer, 256, yyin);
+	fseek(yyin, saved, SEEK_SET);
+
+	// add a new line if necessary
+	size_t len = strlen(buffer);
+	if (len > 0 && buffer[len - 1] != '\n') {
+		buffer[len] = '\n';
+		buffer[len + 1] = '\0';
+	}
+
+	va_list args;
+	va_start(args, fmt);
+
+	fprintf(stderr, "error %d: %s: line %d column %d\n", yynerrs, cur_filename,
+			yylloc.first_line, yylloc.first_column);
+	fprintf(stderr, "\t%s", buffer);
+	fprintf(stderr, "\t%*s ", yylloc.first_column, "^");
+	vfprintf(stderr, fmt, args);
+	fprintf(stderr, "\n");
+
+	va_end(args);
+}
+
+int
+main(int argc, char **argv)
+{
+	if (argc < 3) {
+		fprintf(stderr, "Usage: %s [input file] [output file]\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	if (!lexer_init_newfile(argv[1])) {
+		fprintf(stderr, "Failed to open file %s\n", argv[1]);
+		return EXIT_FAILURE;
+	}
+
+	if (yyparse() != 0) {
+		return EXIT_FAILURE;
+	}
+
+	if (!generate_code(argv[1], argv[2], preprocess(root))) {
+		free_ast_tree(root);
+		return EXIT_FAILURE;
+	}
+
+	free_ast_tree(root);
+
+	return EXIT_SUCCESS;
+}
diff --git a/maint/gen/preprocess.c b/maint/gen/preprocess.c
new file mode 100644
index 000000000..333b56293
--- /dev/null
+++ b/maint/gen/preprocess.c
@@ -0,0 +1,284 @@
+#include <assert.h>
+#include <ctype.h>
+#include <memory.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ast.h"
+#include "deflang.h"
+#include "symbols.h"
+#include "printf.h"
+
+#define MAX_PREPROCESSOR_NEST 16
+#define MAX_SYSCALL_COUNT 4096
+
+
+struct condition_stack {
+	size_t idx;
+	char *stack[MAX_PREPROCESSOR_NEST];
+};
+
+/*
+ * Copies (pointers to) the strings stored in the condition stack into a
+ * statement_condition struct.
+ *
+ * Returns NULL if the stack is empty (i.e. there are no conditions)
+ */
+static struct statement_condition *
+create_statement_condition(struct condition_stack *stack)
+{
+	if (stack->idx == 0) {
+		return NULL;
+	}
+	struct statement_condition *ret = xmalloc((sizeof *ret) + stack->idx * (sizeof(char *)));
+	ret->count = stack->idx;
+	memcpy(ret->values, stack->stack, stack->idx * (sizeof(char *)));
+	return ret;
+}
+
+static char *
+strip_whitespace(char *str)
+{
+	if (*str == '\0') {
+		return str;
+	}
+
+	char *end = str + strlen(str) - 1;
+
+	while (end > str && isspace(*end)) {
+		end--;
+	}
+
+	end[1] = '\0';
+
+	return str;
+}
+
+
+struct processing_state {
+	struct preprocessor_statement_list *preprocessor_head;
+	struct preprocessor_statement_list *preprocessor_tail;
+	struct decoder_list *decoder_head;
+	struct struct_def *struct_stmts;
+	struct syscall **syscall_buffer;
+	size_t syscall_index;
+};
+
+/*
+ * Splits the AST into preprocessor definitions, struct definitions, and
+ * syscall definitions while maintaining the necessary information about
+ * ifdef/ifndef conditions.
+ */
+static void
+preprocess_rec(struct ast_node *root, struct condition_stack *cur,
+			   struct processing_state *state)
+{
+	if (root->type == AST_IFDEF) {
+		assert(cur->idx < MAX_PREPROCESSOR_NEST);
+		cur->stack[cur->idx] = root->ifdef.value;
+		cur->idx++;
+		preprocess_rec(root->ifdef.child, cur, state);
+		cur->idx--;
+		cur->stack[cur->idx] = NULL;
+	} else if (root->type == AST_DECODER) {
+		struct decoder_list *decoder = xmalloc(sizeof *decoder);
+		*decoder = (struct decoder_list) {
+			.decoder = {
+				.loc = root->loc,
+				.matching_type = root->decoder.type,
+				.fmt_string = strip_whitespace(root->decoder.decoder)
+			},
+			.next = state->decoder_head
+		};
+		state->decoder_head = decoder;
+	} else if (root->type == AST_DEFINE || root->type == AST_INCLUDE) {
+		struct statement_condition *conditions = create_statement_condition(cur);
+		struct preprocessor_statement_list *new = xmalloc(sizeof *new);
+
+		new->next = NULL;
+		new->stmt.conditions = conditions;
+		new->stmt.loc = root->loc;
+		if (root->type == AST_DEFINE) {
+			new->stmt.value = root->define.value;
+		} else {
+			new->stmt.value = root->include.value;
+		}
+		if (state->preprocessor_tail) {
+			state->preprocessor_tail->next = new;
+			state->preprocessor_tail = new;
+		} else {
+			state->preprocessor_head = new;
+			state->preprocessor_tail = new;
+		}
+	} else if (root->type == AST_COMPOUND) {
+		for (struct ast_node *node = root->compound.children; node != NULL; node = node->next) {
+			preprocess_rec(node, cur, state);
+		}
+	} else if (root->type == AST_SYSCALL) {
+		size_t arg_count = 0;
+		for (struct ast_syscall_arg *arg = root->syscall.args; arg != NULL; arg = arg->next) {
+			arg_count++;
+		}
+
+		struct syscall *new = xmalloc(sizeof(*new) + sizeof(struct syscall_argument) * arg_count);
+		*new = (struct syscall) {
+			.name = root->syscall.name,
+			.conditions = create_statement_condition(cur),
+			.ret = *root->syscall.return_type,
+			.arg_count = arg_count,
+			.loc = root->loc,
+			.is_ioctl = strncmp(root->syscall.name, "ioctl$", 6) == 0
+		};
+
+		size_t cur_count = 0;
+		for (struct ast_syscall_arg *arg = root->syscall.args; arg != NULL; arg = arg->next) {
+			new->args[cur_count] = (struct syscall_argument) {
+				.name = arg->name,
+				.type = arg->type
+			};
+			cur_count++;
+		}
+
+		state->syscall_buffer[state->syscall_index] = new;
+		state->syscall_index++;
+	}
+}
+
+/*
+ * Create a group of variant syscalls from a name-sorted list of syscalls.
+
+ * Returns the number of syscalls processed.
+ *
+ * For example, ["prctl" "prctl$PR_CAP_AMBIENT", "prctl$GET_FP_MODE", "ioctl"]
+ * would group together the prctl variants, store a syscall_group in out[out_idx]
+ * and returns 3.
+ */
+static size_t
+find_matching(struct syscall **syscall_buffer, size_t syscall_count,
+			  struct syscall_group *out)
+{
+	struct syscall *base = syscall_buffer[0];
+	assert(base != NULL);
+
+	size_t base_name_len = strlen(base->name);
+	size_t matching = 0;
+	for (size_t i = 1; i < syscall_count; i++) {
+		struct syscall *cur = syscall_buffer[i];
+		// all variants start with the same name as the base
+		if (strncmp(cur->name, base->name, base_name_len) != 0) {
+			break;
+		}
+		// and their last '$' is immediately after the base name
+		char *last_dollar = strrchr(cur->name, '$');
+		if (last_dollar == cur->name + base_name_len) {
+			matching++;
+		}
+	}
+
+	if (matching == 0) {
+		out[0] = (struct syscall_group) {
+			.base = base,
+			.child_count = 0,
+			.children = NULL,
+		};
+		return 1;
+	}
+
+	struct syscall_group *children = xmalloc(sizeof(struct syscall_group) * matching);
+	size_t children_idx = 0;
+
+	size_t i = 1;
+	while (i < syscall_count) {
+		struct syscall *cur = syscall_buffer[i];
+		if (strncmp(cur->name, base->name, base_name_len) != 0) {
+			break;
+		}
+		char *last_dollar = strrchr(cur->name, '$');
+		if (last_dollar != cur->name + base_name_len) {
+			// not a direct subvariant
+			fprintf(stderr, "not subvariant %s -> %s \n", base->name, cur->name);
+			i += 1;
+			continue;
+		}
+		i += find_matching(syscall_buffer + i, syscall_count - i,
+						   children + children_idx);
+		children_idx++;
+	}
+
+	assert(children_idx == matching);
+
+	out[0] = (struct syscall_group) {
+		.base = base,
+		.child_count = children_idx,
+		.children = children
+	};
+	return i;
+}
+
+static int
+syscall_comparator(const void *a, const void *b)
+{
+	const struct syscall *syscall_a = *(const struct syscall **) a;
+	const struct syscall *syscall_b = *(const struct syscall **) b;
+
+	return strcmp(syscall_a->name, syscall_b->name);
+}
+
+static struct syscall_group *
+group_syscall_variants(struct processing_state *state, size_t *out_count)
+{
+	// The idea is to sort the syscalls by name:
+	// "prctl" "prctl$GET_FP_MODE"
+	//         "prctl$PR_CAP_AMBIENT" "prctl$PR_CAP_AMBIENT$PR_CAP_AMBIENT_LOWER"
+	// This way, every variant will immediately follow the base syscall and will
+	// be grouped into a syscall_group 'find_matching'.
+
+	qsort(state->syscall_buffer, state->syscall_index,
+		  sizeof(struct syscall *), syscall_comparator);
+
+	// in the worst case (no variants), there can be MAX_SYSCALL_COUNT syscall groups
+	struct syscall_group *scratch = xcalloc(sizeof(*scratch) * MAX_SYSCALL_COUNT);
+
+	size_t groups = 0;
+	size_t i = 0;
+	while (i < state->syscall_index) {
+		i += find_matching(state->syscall_buffer + i, state->syscall_index - i, scratch + groups);
+		groups++;
+	}
+
+	struct syscall_group *ret = realloc(scratch, sizeof(*scratch) * (groups + 1));
+
+	if (ret == NULL) {
+		fprintf(stderr, "realloc failed for %zu bytes\n", sizeof(*scratch) * groups);
+		exit(1);
+	}
+
+	*out_count = groups;
+	return ret;
+}
+
+struct processed_ast *
+preprocess(struct ast_node *root)
+{
+	struct processed_ast *ret = xmalloc(sizeof *ret);
+
+	struct processing_state state = (struct processing_state) {
+		.syscall_buffer = xcalloc(sizeof(struct syscall *) * MAX_SYSCALL_COUNT),
+		.syscall_index = 0,
+		.struct_stmts = NULL,
+		.preprocessor_head = NULL,
+		.preprocessor_tail = NULL,
+		.decoder_head = NULL
+	};
+
+	struct condition_stack conditions = {.idx = 0, .stack = {0}};
+	preprocess_rec(root, &conditions, &state);
+
+	ret->preprocessor_stmts = state.preprocessor_head;
+	ret->struct_stmts = state.struct_stmts;
+	ret->syscall_groups = group_syscall_variants(&state, &ret->syscall_group_count);
+	ret->decoders = state.decoder_head;
+
+	return ret;
+}
diff --git a/maint/gen/preprocess.h b/maint/gen/preprocess.h
new file mode 100644
index 000000000..31bda8658
--- /dev/null
+++ b/maint/gen/preprocess.h
@@ -0,0 +1,110 @@
+#ifndef PREPROCESS_H
+#define PREPROCESS_H
+
+/*
+ * Stores nested #ifdef/#ifndef statements sequentially (as a stack)
+ *
+ * #ifdef test1
+ * #ifdef test2 && test3
+ * #endif
+ * #endif
+ *
+ * is stored as count = 2, ["#ifdef test1", "#ifdef test2 && test3"]
+ */
+struct statement_condition {
+	size_t count;
+	char *values[];
+};
+
+/*
+ * Stores define and include statements
+ */
+struct preprocessor_statement {
+	struct ast_loc loc;
+
+	// can be NULL
+	struct statement_condition *conditions;
+
+	char *value;
+};
+
+struct preprocessor_statement_list {
+	struct preprocessor_statement stmt;
+	struct preprocessor_statement_list *next;
+};
+
+struct struct_def {
+	struct ast_loc loc;
+
+	char *name;
+	struct statement_condition *conditions;
+	// TODO
+};
+
+struct syscall_argument {
+	char *name;
+	struct ast_type *type;
+};
+
+struct decoder {
+	struct ast_loc loc;
+
+	// the type this decoder handles
+	struct ast_type *matching_type;
+
+	// a format string containing C source code of a decoder capable of handling
+	// arguments/return values of type 'matching_type'.
+	// the first printf arg is a variable containing the value of the argument.
+	// the second printf arg is the index of the argument.
+	char *fmt_string;
+};
+
+struct decoder_list {
+	struct decoder decoder;
+	struct decoder_list *next;
+};
+
+struct syscall {
+	struct ast_loc loc;
+
+	// can be NULL
+	struct statement_condition *conditions;
+
+	// name of the syscall
+	char *name;
+
+	bool is_ioctl;
+
+	// the return value of the syscall
+	struct ast_type ret;
+
+	// the defined arguments
+	size_t arg_count;
+	struct syscall_argument args[];
+};
+
+/*
+ * A group of syscall variants.
+ *
+ * The child syscall_groups will be output first, then the base syscall
+ * will be generated.
+ */
+struct syscall_group {
+	struct syscall *base;
+
+	size_t child_count;
+	struct syscall_group *children;
+};
+
+struct processed_ast {
+	struct preprocessor_statement_list *preprocessor_stmts;
+	struct decoder_list *decoders;
+	struct struct_def *struct_stmts;
+	size_t syscall_group_count;
+	struct syscall_group *syscall_groups;
+};
+
+struct processed_ast *
+preprocess(struct ast_node *root);
+
+#endif
diff --git a/maint/gen/symbols.c b/maint/gen/symbols.c
new file mode 100644
index 000000000..bf4e1d3a0
--- /dev/null
+++ b/maint/gen/symbols.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2021 Srikavin Ramkumar <srikavinramkumar at gmail.com>
+ * Copyright (c) 2021 The strace developers.
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include <string.h>
+
+#include "deflang.h"
+#include "symbols.h"
+
+#define ARRAY_LEN(x) (sizeof(x) / sizeof((x)[0]))
+
+struct symbol_entry {
+	char *name;
+	struct ast_node *source;
+	struct symbol_entry *next;
+};
+
+struct symbol_entry *symbol_table;
+
+struct ast_node *
+symbol_get(char *name)
+{
+	for (struct symbol_entry *cur = symbol_table; cur != NULL; cur = cur->next) {
+		if (strcmp(cur->name, name) == 0) {
+			return cur->source;
+		}
+	}
+
+	return NULL;
+}
+
+struct ast_node *
+symbol_add(char *name, struct ast_node *source)
+{
+	struct ast_node *previous_def = symbol_get(name);
+	if (previous_def != NULL) {
+		return previous_def;
+	}
+
+	struct symbol_entry *entry = xmalloc(sizeof *entry);
+	*entry = (struct symbol_entry) {
+		.name = name,
+		.source = source,
+		.next = symbol_table
+	};
+
+	symbol_table = entry;
+
+	return NULL;
+}
+
+
+char *
+resolve_type(struct ast_type *out, char *name, struct ast_type_option_list *options)
+{
+	out->name = name;
+	out->options = options;
+	out->type = TYPE_BASIC;
+
+	struct {
+		char *name;
+		size_t expected_args;
+	} expected_options_len[] = {
+		{"const", 1},
+		{"ptr", 2},
+		{"ref", 1},
+		{"xor_flags", 3},
+		{"or_flags", 3},
+	};
+
+	size_t options_len = 0;
+	for (struct ast_type_option_list *cur = options; cur != NULL; cur = cur->next) {
+		if (cur->option->child_type == AST_TYPE_CHILD_TEMPLATE_ID) {
+			return NULL;
+		}
+
+		options_len++;
+	}
+
+	for (size_t i = 0; i < ARRAY_LEN(expected_options_len); ++i) {
+		if (strcmp(name, expected_options_len[i].name) == 0) {
+			if (options_len != expected_options_len[i].expected_args) {
+				char *error = xmalloc(128);
+				snprintf(error, 128, "type '%s' expects %zu type options; got %zu",
+						 name, expected_options_len[i].expected_args, options_len);
+				return error;
+			}
+		}
+	}
+
+	if (strcmp(name, "const") == 0) {
+		out->type = TYPE_CONST;
+		out->constt.value = options->option;
+		out->constt.real_type = NULL;
+	} else if (strcmp(name, "ptr") == 0) {
+		out->type = TYPE_PTR;
+		if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+			return "first type option for ptr must be 'in', 'out' or 'inout'";
+		}
+		if (strcmp(options->option->type->name, "in") == 0) {
+			out->ptr.dir = PTR_DIR_IN;
+		} else if (strcmp(options->option->type->name, "out") == 0) {
+			out->ptr.dir = PTR_DIR_OUT;
+		} else if (strcmp(options->option->type->name, "inout") == 0) {
+			out->ptr.dir = PTR_DIR_INOUT;
+		} else {
+			return "first type option for ptr must be 'in', 'out' or 'inout'";
+		}
+		out->ptr.type = options->next->option->type;
+	} else if (strcmp(name, "ref") == 0) {
+		out->type = TYPE_REF;
+		if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+			return "first type option for len must be the name of another argument or $ret";
+		}
+		if (strcmp(options->option->type->name, "@ret") == 0) {
+			out->ref.return_value = true;
+		} else {
+			out->ref.return_value = false;
+			out->ref.argname = options->option->type->name;
+		}
+	} else if (strcmp(name, "xor_flags") == 0) {
+		out->type = TYPE_XORFLAGS;
+		out->xorflags.flag_type = options->option;
+		if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+			return "first type option for ptr must be a string";
+		}
+		out->xorflags.dflt = options->next->option->type->name;
+		if (options->next->next->option->child_type != AST_TYPE_CHILD_TYPE) {
+			return "third type option for xor_flags must be the underlying flag type";
+		}
+		out->xorflags.underlying = options->next->next->option->type;
+	} else if (strcmp(name, "or_flags") == 0) {
+		out->type = TYPE_ORFLAGS;
+		out->orflags.flag_type = options->option;
+		if (options->option->child_type != AST_TYPE_CHILD_TYPE) {
+			return "first type option for ptr must be a string";
+		}
+		out->orflags.dflt = options->next->option->type->name;
+		if (options->next->next->option->child_type != AST_TYPE_CHILD_TYPE) {
+			return "third type option for or_flags must be the underlying flag type";
+		}
+		out->orflags.underlying = options->next->next->option->type;
+	}
+
+	return NULL;
+}
diff --git a/maint/gen/symbols.h b/maint/gen/symbols.h
new file mode 100644
index 000000000..1737ddccf
--- /dev/null
+++ b/maint/gen/symbols.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Srikavin Ramkumar <srikavinramkumar at gmail.com>
+ * Copyright (c) 2021 The strace developers.
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef SYMBOLS_H
+#define SYMBOLS_H
+
+#include <stdbool.h>
+
+#include "ast.h"
+
+/*
+ * Returns a error string if the given type is
+ * invalid. Otherwise, returns NULL if the type is
+ * valid.
+ */
+char *
+resolve_type(struct ast_type *out, char *name, struct ast_type_option_list *options);
+
+/*
+ * Returns NULL if successfully added a symbol.
+ * If the symbol is already defined, returns the
+ * source node for the previous definition.
+ */
+struct ast_node *
+symbol_add(char *name, struct ast_node *source);
+
+/*
+ * Gets the definition of a previously added symbol.
+ * Returns NULL if symbol is not stored.
+ */
+struct ast_node *
+symbol_get(char *name);
+
+#endif //SYMBOLS_H
-- 
2.25.1



More information about the Strace-devel mailing list