A: sched_xetattr.test fails consistently on aarch64

Mike Frysinger vapier at gentoo.org
Tue Mar 14 04:57:05 UTC 2017


On 14 Mar 2017 00:13, Dmitry V. Levin wrote:
> sched_xetattr.test introduced by commit v4.16-10-gf31755f
> (tests: check decoding of sched_[gs]etattr corner cases)
> consistently fails on aarch64.
> 
> At the same time the test passes on all other architectures
> where strace is being tested regularly, both 64-bit
> (alpha, ia64, mips64, ppc64, s390x, sparc64, x86_64)
> and 32-bit (arm, hppa, mips, ppc, s390, sparc, x86).
> 
> The test failure is visible on debian, fedora, and obs farms:
> 
> http://www.einval.com/debian/strace/build-logs/arm64/2017-03-13-040306-log-asachi-TESTFAIL.txt
> https://kojipkgs.fedoraproject.org//work/tasks/5517/18365517/build.log
> https://build.opensuse.org/public/build/home:ldv_alt/openSUSE_Factory_ARM/aarch64/strace/_log
> 
> The part of test that fails on aarch64 is this harmless syscall:
> 
>         sys_sched_getattr(F8ILL_KULONG_MASK, (unsigned long) attr,
> 	                  F8ILL_KULONG_MASK | sizeof(*attr), F8ILL_KULONG_MASK);
> 
> I wouldn't be very much surprised if the test has hit some subtle kernel
> bug on aarch64.

it's failing on my system.  here's my setup:
kernel 3.18 / gcc 4.9 / binutils 2.25:
	Linux koi 3.18.0-13745-gf83aa11 #1 SMP PREEMPT Thu Feb 16 16:56:32 PST 2017 aarch64 GNU/Linux
	aarch64-cros-linux-gnu-gcc (4.9.2_cos_gg_4.9.2-r150-41f3e25635616c067b9ee272304e6f86ac8ee9db_4.9.2-r150) 4.9.x 20150123 (prerelease)
	GNU ld (binutils-2.25.51-r63-082ed0f10cf59b53381cefda2f90247e2a81015b_cos_gg) 2.25.51.20141117
userland is built with slightly newer tools:
	gcc (Gentoo 4.9.4 p1.0, pie-0.6.4) 4.9.4
	GNU ld (Gentoo 2.25.1 p1.1) 2.25.1
i haven't had a chance yet to build+boot a newer kernel.  hopefully next week.

seems like it's the F8ILL_KULONG_MASK in the size field.
if i drop that, the test passes.  which doesn't make sense ...

sched_getattr in arm64 is wired up to the common code directly:
arch/arm64/kernel/sys.c:
	#define __SYSCALL(nr, sym)  [nr] = sym,
	void * const sys_call_table[__NR_syscalls] __aligned(4096) = {
	    [0 ... __NR_syscalls - 1] = sys_ni_syscall,
	#include <asm/unistd.h>
	};

and then down the rabbit hole we go until we hit asm-generic:
arch/arm64/include/asm/unistd.h:
	#include <uapi/asm/unistd.h>
arch/arm64/include/uapi/asm/unistd.h:
	#include <asm-generic/unistd.h>
include/asm-generic/unistd.h:
	#include <uapi/asm-generic/unistd.h>
include/uapi/asm-generic/unistd.h:
	#define __NR_sched_getattr 275
	__SYSCALL(__NR_sched_getattr, sys_sched_getattr)

which means __NR_sched_getattr should take us directly to the C entry
point of sys_sched_getattr which is here:
kernel/sched/core.c:
	SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
	        unsigned int, size, unsigned int, flags)

so it wants unsigned int (32-bit), but strace is giving it unsigned long
(64-bit) with the high 32-bits filled.

glibc's syscall wrapper naturally passes along the full registers.
with aarch64, the assembly notion is that x regs are the full 64-bit
while w regs are the lower 32-bits.  so here we're moving 64-bits.
/usr/include/unistd.h:
	extern long int syscall (long int __sysno, ...) __THROW;
sysdeps/unix/sysv/linux/aarch64/syscall.S:
ENTRY (syscall)
        uxtw    x8, w0
        mov     x0, x1
        mov     x1, x2
        mov     x2, x3
        mov     x3, x4
        mov     x4, x5
        mov     x5, x6
        mov     x6, x7
        svc     0x0
        cmn     x0, #4095
        b.cs    1f
        RET
1:
        b       SYSCALL_ERROR
PSEUDO_END (syscall)

let's look at the kernel source first:
kernel/sched/core.c:
SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
        unsigned int, size, unsigned int, flags)
{
    struct sched_attr attr = {
        .size = sizeof(struct sched_attr),
    };
    struct task_struct *p;
    int retval;

    if (!uattr || pid < 0 || size > PAGE_SIZE ||
        size < SCHED_ATTR_SIZE_VER0 || flags)
        return -EINVAL;

    rcu_read_lock();
    p = find_process_by_pid(pid);   
    retval = -ESRCH;
    if (!p)
        goto out_unlock;

    retval = security_task_getscheduler(p);
    if (retval)
        goto out_unlock;

    attr.sched_policy = p->policy;
    if (p->sched_reset_on_fork)
        attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
    if (task_has_dl_policy(p))
        __getparam_dl(p, &attr);
    else if (task_has_rt_policy(p))
        attr.sched_priority = p->rt_priority;
    else
        attr.sched_nice = task_nice(p);

    rcu_read_unlock();

    retval = sched_read_attr(uattr, &attr, size);
    return retval;

out_unlock:
    rcu_read_unlock();
    return retval;
}

static int sched_read_attr(struct sched_attr __user *uattr,
               struct sched_attr *attr,
               unsigned int usize)
{   
    int ret;

    if (!access_ok(VERIFY_WRITE, uattr, usize))
        return -EFAULT;

    /*
     * If we're handed a smaller struct than we know of,
     * ensure all the unknown bits are 0 - i.e. old
     * user-space does not get uncomplete information.
     */
    if (usize < sizeof(*attr)) {
        unsigned char *addr;
        unsigned char *end;

        addr = (void *)attr + usize;
        end  = (void *)attr + sizeof(*attr);

        for (; addr < end; addr++) {
            if (*addr)
                return -EFBIG;
        }

        attr->size = usize;
    }

    ret = copy_to_user(uattr, attr, attr->size);
    if (ret)
        return -EFAULT;

    return 0;
}

let's flip over to the disassembly.  here's from that kernel.
$ aarch64-cros-linux-gnu-objdump -dr kernel/sched/core.o
...
00000000000085d4 <SyS_sched_getattr>:
// some stack setup
    85d4:   a9b77bfd    stp x29, x30, [sp,#-144]!
    85d8:   910003fd    mov x29, sp
    85dc:   a90153f3    stp x19, x20, [sp,#16]
    85e0:   a9025bf5    stp x21, x22, [sp,#32]
    85e4:   f9001bf7    str x23, [sp,#48]
    85e8:   90000015    adrp    x21, 0 <__stack_chk_guard>
            85e8: R_AARCH64_ADR_PREL_PG_HI21    __stack_chk_guard
    85ec:   910163b4    add x20, x29, #0x58
// load arg0 (pid) into x19
    85f0:   aa0003f3    mov x19, x0
    85f4:   aa1e03e0    mov x0, x30
// load arg1 (uaddr) into x22
    85f8:   aa0103f6    mov x22, x1
// load arg2 (size) into x23
    85fc:   aa0203f7    mov x23, x2
    8600:   f90027a3    str x3, [x29,#72]
    8604:   94000000    bl  0 <_mcount>
            8604: R_AARCH64_CALL26  _mcount
    8608:   f94002a0    ldr x0, [x21]
            8608: R_AARCH64_LDST64_ABS_LO12_NC  __stack_chk_guard
    860c:   a9007e9f    stp xzr, xzr, [x20]
    8610:   f90047a0    str x0, [x29,#136]
    8614:   52800600    mov w0, #0x30                   // #48
    8618:   a9017e9f    stp xzr, xzr, [x20,#16]
    861c:   a9027e9f    stp xzr, xzr, [x20,#32]
    8620:   b9005ba0    str w0, [x29,#88]
// branch to EINVAL error path
    8624:   b4000936    cbz x22, 8748 <SyS_sched_getattr+0x174>
    8628:   37f80913    tbnz    w19, #31, 8748 <SyS_sched_getattr+0x174>
    862c:   f94027a3    ldr x3, [x29,#72]
// branch to EINVAL error path
    8630:   350008c3    cbnz    w3, 8748 <SyS_sched_getattr+0x174>
// compare w23 (low 32-bits) against 0x30 (SCHED_ATTR_SIZE_VER0)
// size < SCHED_ATTR_SIZE_VER0
    8634:   5100c2e0    sub w0, w23, #0x30
// compare the result against PAGE_SIZE (optimized)
// size > PAGE_SIZE
    8638:   713f401f    cmp w0, #0xfd0
// branch if condition is "hi" to EINVAL error path
    863c:   54000868    b.hi    8748 <SyS_sched_getattr+0x174>
    8640:   94000000    bl  0 <__rcu_read_lock>
            8640: R_AARCH64_CALL26  __rcu_read_lock
    8644:   2a1303e0    mov w0, w19
    8648:   97ffe938    bl  2b28 <find_process_by_pid>
    864c:   aa0003f3    mov x19, x0
// branch to ESRCH error path
    8650:   b4000700    cbz x0, 8730 <SyS_sched_getattr+0x15c>
    8654:   94000000    bl  0 <security_task_getscheduler>
            8654: R_AARCH64_CALL26  security_task_getscheduler
    8658:   350006e0    cbnz    w0, 8734 <SyS_sched_getattr+0x160>
    865c:   b942fe60    ldr w0, [x19,#764]
    8660:   39500261    ldrb    w1, [x19,#1024]
    8664:   b9000680    str w0, [x20,#4]
    8668:   36100081    tbz w1, #2, 8678 <SyS_sched_getattr+0xa4>
    866c:   f9400681    ldr x1, [x20,#8]
    8670:   b2400021    orr x1, x1, #0x1
    8674:   f9000681    str x1, [x20,#8]
    8678:   7100181f    cmp w0, #0x6
    867c:   54000181    b.ne    86ac <SyS_sched_getattr+0xd8>
    8680:   b9405660    ldr w0, [x19,#84]
    8684:   b9006fa0    str w0, [x29,#108]
    8688:   f9412a60    ldr x0, [x19,#592]
    868c:   f9003ba0    str x0, [x29,#112]
    8690:   f9412e60    ldr x0, [x19,#600]
    8694:   f9003fa0    str x0, [x29,#120]
    8698:   f9413260    ldr x0, [x19,#608]
    869c:   f90043a0    str x0, [x29,#128]
    86a0:   b9428260    ldr w0, [x19,#640]
    86a4:   f90033a0    str x0, [x29,#96]
    86a8:   1400000a    b   86d0 <SyS_sched_getattr+0xfc>
    86ac:   51000400    sub w0, w0, #0x1
    86b0:   7100041f    cmp w0, #0x1
    86b4:   54000088    b.hi    86c4 <SyS_sched_getattr+0xf0>
    86b8:   b9405660    ldr w0, [x19,#84]
    86bc:   b9006fa0    str w0, [x29,#108]
    86c0:   14000004    b   86d0 <SyS_sched_getattr+0xfc>
    86c4:   b9404e60    ldr w0, [x19,#76]
    86c8:   5101e000    sub w0, w0, #0x78
    86cc:   b9006ba0    str w0, [x29,#104]
    86d0:   94000000    bl  0 <__rcu_read_unlock>
            86d0: R_AARCH64_CALL26  __rcu_read_unlock
    86d4:   910003e0    mov x0, sp
    86d8:   9272c400    and x0, x0, #0xffffffffffffc000
    86dc:   aa1603e1    mov x1, x22
    86e0:   f9400400    ldr x0, [x0,#8]
// start of access_ok macro
// note that x23 here is still the full 64-bits of arg2 (size) afaict.
// looks like the bug is here.
    86e4:   ab170021    adds    x1, x1, x23
    86e8:   fa403022    ccmp    x1, x0, #0x2, cc
    86ec:   9a9f87e2    cset    x2, ls
// if check failed, fall through, else jump over
    86f0:   b5000062    cbnz    x2, 86fc <SyS_sched_getattr+0x128>
// error code path (14 == EFAULT)
    86f4:   928001a0    mov x0, #0xfffffffffffffff2     // #-14
    86f8:   14000015    b   874c <SyS_sched_getattr+0x178>
    86fc:   b9405ba2    ldr w2, [x29,#88]
    8700:   aa1603e1    mov x1, x22
    8704:   ab020021    adds    x1, x1, x2
    8708:   fa403022    ccmp    x1, x0, #0x2, cc
    870c:   9a9f87e3    cset    x3, ls
    8710:   b40000a3    cbz x3, 8724 <SyS_sched_getattr+0x150>
    8714:   aa1603e0    mov x0, x22
    8718:   aa1403e1    mov x1, x20
    871c:   94000000    bl  0 <__copy_to_user>
            871c: R_AARCH64_CALL26  __copy_to_user
    8720:   aa0003e2    mov x2, x0
    8724:   d2800000    mov x0, #0x0                    // #0
    8728:   34000122    cbz w2, 874c <SyS_sched_getattr+0x178>
// branch to EFAULT error path
    872c:   17fffff2    b   86f4 <SyS_sched_getattr+0x120>
// error code path (3 == ESRCH)
    8730:   12800040    mov w0, #0xfffffffd             // #-3
    8734:   f90027a0    str x0, [x29,#72]
    8738:   94000000    bl  0 <__rcu_read_unlock>
            8738: R_AARCH64_CALL26  __rcu_read_unlock
    873c:   f94027a0    ldr x0, [x29,#72]
    8740:   93407c00    sxtw    x0, w0
    8744:   14000002    b   874c <SyS_sched_getattr+0x178>
// error code path (22 == -EINVAL)
    8748:   928002a0    mov x0, #0xffffffffffffffea     // #-22
    874c:   f94047a2    ldr x2, [x29,#136]
    8750:   f94002a1    ldr x1, [x21]
            8750: R_AARCH64_LDST64_ABS_LO12_NC  __stack_chk_guard
    8754:   eb01005f    cmp x2, x1
    8758:   54000040    b.eq    8760 <SyS_sched_getattr+0x18c>
    875c:   94000000    bl  0 <__stack_chk_fail>
            875c: R_AARCH64_CALL26  __stack_chk_fail
    8760:   a94153f3    ldp x19, x20, [sp,#16]
    8764:   a9425bf5    ldp x21, x22, [sp,#32]
    8768:   f9401bf7    ldr x23, [sp,#48]
    876c:   a8c97bfd    ldp x29, x30, [sp],#144
    8770:   d65f03c0    ret

so the generated code from the C code looks OK at a glance (the initial
size checks around EINVAL).  but the inline asm w/access_ok looks broken.

let's try a little reduced code.  dropping static from sched_read_attr
to force it to uninline:
int sched_read_attr(struct sched_attr __user *uattr,
               struct sched_attr *attr,
               unsigned int usize)
{   
...
0000000000004334 <sched_read_attr>:
    4334:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
    4338:       d5384103        mrs     x3, sp_el0
    433c:       910003fd        mov     x29, sp
    4340:       f9400464        ldr     x4, [x3,#8]
    4344:       aa0003e3        mov     x3, x0
// start of access_ok macro
// we still see that the full 64-bits are checked from x2
    4348:       ab020063        adds    x3, x3, x2
    434c:       fa443062        ccmp    x3, x4, #0x2, cc
    4350:       9a9f87e5        cset    x5, ls
    4354:       b5000065        cbnz    x5, 4360 <sched_read_attr+0x2c>
    4358:       128001a0        mov     w0, #0xfffffff2                 // #-14
    435c:       1400001a        b       43c4 <sched_read_attr+0x90>
...
    43c4:       a8c17bfd        ldp     x29, x30, [sp],#16
    43c8:       d65f03c0        ret

so there's a disagreement in the overall system somewhere.
i'd assume either the kernel's implementation of access_ok,
or gcc's handling of the inline assembly.

once i have access to a local system and reduce a bit more,
i'll file some upstream bugs.
-mike
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: Digital signature
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20170314/de5c3bd1/attachment.bin>


More information about the Strace-devel mailing list