[PATCH] RFC: clean up mmap decoding

Denys Vlasenko dvlasenk at redhat.com
Mon Feb 18 20:27:19 UTC 2013


mmap handling is a mess. We try to merge too many similar,
but different ways of decoding it. For example, sys_old_mmap
is "params in memory" API... except SH[64], where it is
"params in regs", i.e. what *sys_mmap* ("new mmap")
does on other arches!

It's much simpler when mmap handlers are cleanly split
by API (whether params are in regs or in memory,
and whether offset is byte or page).

Then we just insert correct function pointers into
arch syscall tables.

It turns out there are only three common mmap APIs over
all architectures which exist in Linux kernel,
and two outliers (for SH and S390).

A number of mmap decoders were plain wrong in arch tables.
For example, BFIN has no old_mmap. It returns ENOSYS.
I checked kernel sources.

There was dead code for x86_64 for old_mmap:
x86_64 has no old_mmap.

I will polish this change a bit more, but it is largely complete.
However, it is not that simple. Dmitry, can you look over this patch?
-- 
vda


diff --git a/linux/arm/syscallent.h b/linux/arm/syscallent.h
index 123b910..a331e8f 100644
--- a/linux/arm/syscallent.h
+++ b/linux/arm/syscallent.h
@@ -219,7 +219,7 @@
 	{ 5,	0,	sys_putpmsg,		"putpmsg"	}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 2,	0,	sys_getrlimit,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_mmap_pgoff,		"mmap2"		}, /* 192 */
 	{ 4,	TF,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 4,	TF,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/bfin/syscallent.h b/linux/bfin/syscallent.h
index 05194fb..be8f532 100644
--- a/linux/bfin/syscallent.h
+++ b/linux/bfin/syscallent.h
@@ -116,7 +116,7 @@
 	{ 2,	TF,	sys_swapon,		"swapon"	}, /* 87 */
 	{ 4,	0,	sys_reboot,		"reboot"	}, /* 88 */
 	{ 3,	TD,	sys_readdir,		"readdir"	}, /* 89 */
-	{ 6,	TD|TM,	sys_old_mmap,		"old_mmap"	}, /* 90 */
+	{ 6,	TD|TM,	printargs,		"old_mmap"	}, /* 90: not implemented in kernel */
 	{ 2,	TM,	sys_munmap,		"munmap"	}, /* 91 */
 	{ 2,	TF,	sys_truncate,		"truncate"	}, /* 92 */
 	{ 2,	TD,	sys_ftruncate,		"ftruncate"	}, /* 93 */
@@ -218,7 +218,7 @@
 	{ 5,	0,	sys_putpmsg,		"putpmsg"	}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 2,	0,	sys_getrlimit,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_mmap_pgoff,		"mmap2"		}, /* 192 */
 	{ 3,	TF,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 3,	TD,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/i386/syscallent.h b/linux/i386/syscallent.h
index ee61933..3d1e738 100644
--- a/linux/i386/syscallent.h
+++ b/linux/i386/syscallent.h
@@ -219,7 +219,7 @@
 	{ 5,	0,	sys_putpmsg,		"putpmsg"	}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 2,	0,	sys_getrlimit,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_mmap_pgoff,		"mmap2"		}, /* 192 */
 	{ 3,	TF,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 3,	TD,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/m68k/syscallent.h b/linux/m68k/syscallent.h
index 184f01c..5922788 100644
--- a/linux/m68k/syscallent.h
+++ b/linux/m68k/syscallent.h
@@ -218,7 +218,7 @@
 	{ 5,	0,	sys_putpmsg,		"putpmsg"	}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 2,	0,	sys_getrlimit,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_mmap_pgoff,		"mmap2"		}, /* 192 */
 	{ 3,	TF,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 3,	TF,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/microblaze/syscallent.h b/linux/microblaze/syscallent.h
index c5a52fa..0c8fbbd 100644
--- a/linux/microblaze/syscallent.h
+++ b/linux/microblaze/syscallent.h
@@ -116,7 +116,7 @@
 	{ 2,	TF,	sys_swapon,		"swapon"	}, /* 87 */
 	{ 4,	0,	sys_reboot,		"reboot"	}, /* 88 */
 	{ 3,	0,	sys_readdir,		"readdir"	}, /* 89 */
-	{ 6,	TD|TM,	sys_old_mmap,		"old_mmap"	}, /* 90 */
+	{ 6,	TD|TM,	sys_mmap,		"old_mmap"	}, /* 90 */
 	{ 2,	TM,	sys_munmap,		"munmap"	}, /* 91 */
 	{ 2,	TF,	sys_truncate,		"truncate"	}, /* 92 */
 	{ 2,	0,	sys_ftruncate,		"ftruncate"	}, /* 93 */
@@ -218,7 +218,7 @@
 	{ 5,	0,	sys_putpmsg,		"putpmsg"	}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 2,	0,	sys_getrlimit,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_mmap_pgoff,		"mmap2"		}, /* 192 */
 	{ 3,	TF,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 3,	TF,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/s390/syscallent.h b/linux/s390/syscallent.h
index 94f4d25..a336e02 100644
--- a/linux/s390/syscallent.h
+++ b/linux/s390/syscallent.h
@@ -220,7 +220,7 @@
 	{ 5,	0,	sys_putpmsg,		"putpmsg"	}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 2,	0,	sys_getrlimit,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_old_mmap_pgoff,	"mmap2"		}, /* 192 */
 	{ 2,	TF,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 2,	TD,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/sh/syscallent.h b/linux/sh/syscallent.h
index 1546ff6..b647046 100644
--- a/linux/sh/syscallent.h
+++ b/linux/sh/syscallent.h
@@ -118,7 +118,7 @@
 	{ 2,	TF,	sys_swapon,		"swapon"	}, /* 87 */
 	{ 4,	0,	sys_reboot,		"reboot"	}, /* 88 */
 	{ 3,	TD,	sys_readdir,		"readdir"	}, /* 89 */
-	{ 6,	TD|TM,	sys_old_mmap,		"old_mmap"	}, /* 90 */
+	{ 6,	TD|TM,	sys_mmap,		"old_mmap"	}, /* 90 */
 	{ 2,	TM,	sys_munmap,		"munmap"	}, /* 91 */
 	{ 2,	TF,	sys_truncate,		"truncate"	}, /* 92 */
 	{ 2,	TD,	sys_ftruncate,		"ftruncate"	}, /* 93 */
@@ -221,7 +221,7 @@
 	{ 5,	0,	NULL,			NULL		}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 5,	0,	printargs,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_mmap_4koff,		"mmap2"		}, /* 192 */
 	{ 5,	0,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 5,	TD,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/sh64/syscallent.h b/linux/sh64/syscallent.h
index 4e20c47..0591f07 100644
--- a/linux/sh64/syscallent.h
+++ b/linux/sh64/syscallent.h
@@ -116,7 +116,7 @@
 	{ 2,	TF,	sys_swapon,		"swapon"	}, /* 87 */
 	{ 4,	0,	sys_reboot,		"reboot"	}, /* 88 */
 	{ 3,	TD,	sys_readdir,		"readdir"	}, /* 89 */
-	{ 6,	TD|TM,	sys_old_mmap,		"old_mmap"	}, /* 90 */
+	{ 6,	TD|TM,	sys_mmap,		"old_mmap"	}, /* 90 */
 	{ 2,	TM,	sys_munmap,		"munmap"	}, /* 91 */
 	{ 2,	TF,	sys_truncate,		"truncate"	}, /* 92 */
 	{ 2,	TD,	sys_ftruncate,		"ftruncate"	}, /* 93 */
@@ -218,7 +218,7 @@
 	{ 5,	0,	NULL,			NULL		}, /* 189 */
 	{ 0,	TP,	sys_vfork,		"vfork"		}, /* 190 */
 	{ 2,	0,	printargs,		"getrlimit"	}, /* 191 */
-	{ 6,	TD|TM,	sys_mmap,		"mmap2"		}, /* 192 */
+	{ 6,	TD|TM,	sys_mmap_4koff,		"mmap2"		}, /* 192 */
 	{ 2,	TF,	sys_truncate64,		"truncate64"	}, /* 193 */
 	{ 2,	TD,	sys_ftruncate64,	"ftruncate64"	}, /* 194 */
 	{ 2,	TF,	sys_stat64,		"stat64"	}, /* 195 */
diff --git a/linux/syscall.h b/linux/syscall.h
index 0107564..39c0ff2 100644
--- a/linux/syscall.h
+++ b/linux/syscall.h
@@ -138,6 +138,8 @@ int sys_mknod();
 int sys_mknodat();
 int sys_mlockall();
 int sys_mmap();
+int sys_mmap_pgoff();
+int sys_mmap_4koff();
 int sys_modify_ldt();
 int sys_mount();
 int sys_move_pages();
@@ -157,6 +159,7 @@ int sys_munmap();
 int sys_nanosleep();
 int sys_newfstatat();
 int sys_old_mmap();
+int sys_old_mmap_pgoff();
 int sys_oldfstat();
 int sys_oldlstat();
 int sys_oldselect();
diff --git a/mem.c b/mem.c
index b67a1b6..bbc738b 100644
--- a/mem.c
+++ b/mem.c
@@ -39,8 +39,13 @@
 #  define modify_ldt_ldt_s user_desc
 # endif
 #endif
-#if defined(SH64)
-# include <asm/page.h>	    /* for PAGE_SHIFT */
+#include <sys/user.h>		/* for PAGE_SHIFT */
+//#if defined(SH64)
+//# include <asm/page.h>	/* for PAGE_SHIFT */
+//#endif
+#if !defined(PAGE_SHIFT)
+# warning Failed to get PAGE_SHIFT, assuming 12
+# define PAGE_SHIFT 12
 #endif

 int
@@ -236,8 +241,19 @@ print_mmap(struct tcb *tcp, long *u_arg, unsigned long long offset)
 	return RVAL_HEX;
 }

-int sys_old_mmap(struct tcb *tcp)
+/* Syscall name<->function correspondence is messed up on many arches.
+ * For example:
+ * i386 has __NR_mmap == 90, and it is "old mmap", and
+ * also it has __NR_mmap2 == 192, which is a "new mmap with page offsets".
+ * But x86_64 has just one __NR_mmap == 9, a "new mmap with byte offsets".
+ * Confused? Me too!
+ */
+
+/* Params are pointed to by u_arg[0], offset is in bytes */
+int
+sys_old_mmap(struct tcb *tcp)
 {
+	long u_arg[6];
 #if defined(IA64)
 	/*
 	 * IA64 processes never call this routine, they only use the
@@ -246,17 +262,12 @@ int sys_old_mmap(struct tcb *tcp)
 	 * that they pushed onto the stack, into longs.
 	 */
 	int i;
-	long u_arg[6];
 	unsigned narrow_arg[6];
 	if (umoven(tcp, tcp->u_arg[0], sizeof(narrow_arg), (char *) narrow_arg) == -1)
 		return 0;
 	for (i = 0; i < 6; i++)
 		u_arg[i] = narrow_arg[i];
-#elif defined(SH) || defined(SH64)
-	/* SH has always passed the args in registers */
-	long *u_arg = tcp->u_arg;
 #elif defined(X86_64)
-	long u_arg[6];
 	if (current_personality == 1) {
 		int i;
 		unsigned narrow_arg[6];
@@ -265,41 +276,79 @@ int sys_old_mmap(struct tcb *tcp)
 		for (i = 0; i < 6; ++i)
 			u_arg[i] = narrow_arg[i];
 	} else {
-		if (umoven(tcp, tcp->u_arg[0], sizeof(u_arg), (char *) u_arg) == -1)
-			return 0;
+		fprintf(stderr, "BUG: x86_64/x32 syscall table must not contain sys_old_mmap\n");
+		return 0;
 	}
 #else
-	long u_arg[6];
 	if (umoven(tcp, tcp->u_arg[0], sizeof(u_arg), (char *) u_arg) == -1)
 		return 0;
 #endif
 	return print_mmap(tcp, u_arg, (unsigned long)u_arg[5]);
 }

+#if defined(S390) || defined(S390X)
+/* Params are pointed to by u_arg[0], offset is in pages */
+int
+sys_old_mmap_pgoff(struct tcb *tcp)
+{
+	long u_arg[5];
+	int i;
+	unsigned narrow_arg[6];
+	unsigned long long offset;
+	if (umoven(tcp, tcp->u_arg[0], sizeof(narrow_arg), (char *) narrow_arg) == -1)
+		return 0;
+	for (i = 0; i < 5; i++)
+		u_arg[i] = narrow_arg[i];
+	offset = narrow_arg[5];
+	offset <<= PAGE_SHIFT;
+	return print_mmap(tcp, u_arg, offset);
+}
+#endif
+
+/* Params are passed directly, offset is in bytes */
 int
 sys_mmap(struct tcb *tcp)
 {
 	unsigned long long offset = (unsigned long) tcp->u_arg[5];

-#if defined(SH64)
-	/*
-	 * Old mmap differs from new mmap in specifying the
-	 * offset in units of bytes rather than pages.  We
-	 * pretend it's in byte units so the user only ever
-	 * sees bytes in the printout.
-	 */
-	offset <<= PAGE_SHIFT;
-#elif defined(I386)
-	/* Try test/mmap_offset_decode.c */
-	offset <<= 12; /* 4096 byte pages */
-#elif defined(LINUX_MIPSN32) || defined(X32)
+#if defined(LINUX_MIPSN32) || defined(X32)
 	/* Try test/x32_mmap.c */
 	/* At least for X32 it definitely should not be page-shifted! */
 	offset = tcp->ext_arg[5];
+#elif defined(X86_64)
+	/* Should not be page-shifted -
+	 * arch/x86/kernel/sys_x86_64.c::SYSCALL_DEFINE6(mmap, ...) does
+	 * error = sys_mmap_pgoff(..., off >> PAGE_SHIFT); i.e. off is bytes,
+	 * it converts them to pages (and back, inside sys_mmap_pgoff).
+	 */
 #endif
 	return print_mmap(tcp, tcp->u_arg, offset);
 }

+/* Params are passed directly, offset is in pages */
+int
+sys_mmap_pgoff(struct tcb *tcp)
+{
+	/* Try test/mmap_offset_decode.c */
+	unsigned long long offset;
+	offset = (unsigned long) tcp->u_arg[5];
+	offset <<= PAGE_SHIFT;
+	return print_mmap(tcp, tcp->u_arg, offset);
+}
+
+#if defined(SH) || defined(SH64)
+/* Params are passed directly, offset is in 4k units */
+/* (SH can have 4, 8, 16 or 64k pages) */
+int
+sys_mmap_4koff(struct tcb *tcp)
+{
+	unsigned long long offset;
+	offset = (unsigned long) tcp->u_arg[5];
+	offset <<= 12;
+	return print_mmap(tcp, tcp->u_arg, offset);
+}
+#endif
+
 int
 sys_munmap(struct tcb *tcp)
 {




More information about the Strace-devel mailing list