[PATCH] unwind-libdw: add caching
Jann Horn
jannh at google.com
Sun Mar 22 19:40:22 UTC 2020
libdw's address-to-symbol logic is quite slow. Since when stracing a
process, it is normal to see the same call sites over and over again, let's
address that by adding a straightforward N-way associative cache in front
of libdw.
The cache parameters could probably use some fine-tuning, but ought to be
good enough for now.
The resulting performance improvement:
$ time ./strace-orig -o /dev/null -k bash -c 'for i in {0..10000}; do echo; done' >/dev/null
real 0m12,014s
user 0m11,512s
sys 0m0,575s
$ time ./strace-cache -o /dev/null -k bash -c 'for i in {0..10000}; do echo; done' >/dev/null
real 0m0,765s
user 0m0,451s
sys 0m0,371s
---
unwind-libdw.c | 101 +++++++++++++++++++++++++++++++++++++++----------
1 file changed, 82 insertions(+), 19 deletions(-)
diff --git a/unwind-libdw.c b/unwind-libdw.c
index 871f74a3..8f821265 100644
--- a/unwind-libdw.c
+++ b/unwind-libdw.c
@@ -19,12 +19,32 @@
#include "mmap_notify.h"
#include <elfutils/libdwfl.h>
+#define STRACE_UW_CACHE_SIZE 2048
+#define STRACE_UW_CACHE_ASSOC 32
+
+struct cache_entry {
+ /* key */
+ Dwarf_Addr pc;
+ unsigned long long generation;
+
+ /* value */
+ const char *modname;
+ const char *symname;
+ GElf_Off off;
+ Dwarf_Addr true_offset;
+
+ /* replacement */
+ unsigned long long last_use;
+};
+
struct ctx {
Dwfl *dwfl;
- unsigned int last_proc_updating;
+ unsigned long long last_proc_updating;
+ struct cache_entry cache[STRACE_UW_CACHE_SIZE];
};
-static unsigned int mapping_generation;
+static unsigned long long mapping_generation = 1;
+static unsigned long long uwcache_clock;
static void
update_mapping_generation(struct tcb *tcp, void *unused)
@@ -70,6 +90,7 @@ tcb_init(struct tcb *tcp)
struct ctx *ctx = xmalloc(sizeof(*ctx));
ctx->dwfl = dwfl;
ctx->last_proc_updating = mapping_generation - 1;
+ memset(ctx->cache, 0, sizeof(ctx->cache));
return ctx;
}
@@ -113,8 +134,33 @@ struct frame_user_data {
unwind_error_action_fn error_action;
void *data;
int stack_depth;
+ struct ctx *ctx;
};
+static bool find_bucket(struct ctx *ctx, Dwarf_Addr pc, struct cache_entry **res) {
+ unsigned int idx = pc & ((STRACE_UW_CACHE_SIZE-1) &
+ ~(STRACE_UW_CACHE_ASSOC-1));
+ struct cache_entry *unused = NULL;
+ struct cache_entry *lru = ctx->cache + idx;
+ for (int i=0; i<STRACE_UW_CACHE_ASSOC; i++) {
+ struct cache_entry *ce = ctx->cache + (idx + i);
+ if (ce->generation == mapping_generation && ce->pc == pc) {
+ ce->last_use = uwcache_clock++;
+ *res = ce;
+ return true;
+ }
+ if (ce->generation != mapping_generation) {
+ unused = ce;
+ continue;
+ }
+ if (ce->last_use < lru->last_use)
+ lru = ce;
+ }
+ *res = unused ? unused : lru;
+
+ return false;
+}
+
static int
frame_callback(Dwfl_Frame *state, void *arg)
{
@@ -130,24 +176,40 @@ frame_callback(Dwfl_Frame *state, void *arg)
if (!isactivation)
pc--;
- Dwfl *dwfl = dwfl_thread_dwfl(dwfl_frame_thread(state));
- Dwfl_Module *mod = dwfl_addrmodule(dwfl, pc);
- GElf_Off off = 0;
-
- if (mod != NULL) {
- const char *modname = NULL;
- const char *symname = NULL;
- GElf_Sym sym;
- Dwarf_Addr true_offset = pc;
-
- modname = dwfl_module_info(mod, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL);
- symname = dwfl_module_addrinfo(mod, pc, &off, &sym,
- NULL, NULL, NULL);
- dwfl_module_relocate_address(mod, &true_offset);
- user_data->call_action(user_data->data, modname, symname,
- off, true_offset);
+ struct cache_entry *ce;
+ if (find_bucket(user_data->ctx, pc, &ce)) {
+ user_data->call_action(user_data->data,
+ ce->modname, ce->symname,
+ ce->off, ce->true_offset);
+ } else {
+ Dwfl *dwfl = dwfl_thread_dwfl(dwfl_frame_thread(state));
+ Dwfl_Module *mod = dwfl_addrmodule(dwfl, pc);
+ GElf_Off off = 0;
+
+ if (mod != NULL) {
+ const char *modname = NULL;
+ const char *symname = NULL;
+ GElf_Sym sym;
+ Dwarf_Addr true_offset = pc;
+
+ modname = dwfl_module_info(mod, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ symname = dwfl_module_addrinfo(mod, pc, &off, &sym,
+ NULL, NULL, NULL);
+ dwfl_module_relocate_address(mod, &true_offset);
+ user_data->call_action(user_data->data, modname, symname,
+ off, true_offset);
+
+ ce->generation = mapping_generation;
+ ce->pc = pc;
+ ce->modname = modname;
+ ce->symname = symname;
+ ce->off = off;
+ ce->true_offset = true_offset;
+ ce->last_use = uwcache_clock++;
+ }
}
+
/* Max number of frames to print reached? */
if (user_data->stack_depth-- == 0)
return DWARF_CB_ABORT;
@@ -170,6 +232,7 @@ tcb_walk(struct tcb *tcp,
.error_action = error_action,
.data = data,
.stack_depth = 256,
+ .ctx = ctx,
};
flush_cache_maybe(tcp);
--
2.25.1
More information about the Strace-devel
mailing list