[alsa-devel] sound: use-after-free in snd_timer_interrupt
Hello,
The following program triggers use-after-free in snd_timer_interrupt:
// autogenerated by syzkaller (http://github.com/google/syzkaller) #include <unistd.h> #include <sys/syscall.h> #include <string.h> #include <stdint.h> #include <pthread.h>
long r[84];
void *thr(void *arg) { switch ((long)arg) { case 0: r[0] = syscall(SYS_mmap, 0x20000000ul, 0xe000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul); break; case 1: memcpy((void*)0x20000990, "\x2f\x64\x65\x76\x2f\x73\x6e\x64\x2f\x74\x69\x6d\x65\x72", 14); r[2] = syscall(SYS_open, 0x20000990ul, 0x1ul, 0x0ul, 0, 0, 0); break; case 2: *(uint32_t*)0x20000000 = (uint32_t)0x1; *(uint32_t*)0x20000004 = (uint32_t)0x7; *(uint32_t*)0x20000008 = (uint32_t)0x3; *(uint32_t*)0x2000000c = (uint32_t)0x0; *(uint32_t*)0x20000010 = (uint32_t)0x0; *(uint8_t*)0x20000014 = (uint8_t)0x0; *(uint8_t*)0x20000015 = (uint8_t)0x0; *(uint8_t*)0x20000016 = (uint8_t)0x0; *(uint8_t*)0x20000017 = (uint8_t)0x0; *(uint8_t*)0x20000018 = (uint8_t)0x0; *(uint8_t*)0x20000019 = (uint8_t)0x0; *(uint8_t*)0x2000001a = (uint8_t)0x0; *(uint8_t*)0x2000001b = (uint8_t)0x0; *(uint8_t*)0x2000001c = (uint8_t)0x0; *(uint8_t*)0x2000001d = (uint8_t)0x0; *(uint8_t*)0x2000001e = (uint8_t)0x0; *(uint8_t*)0x2000001f = (uint8_t)0x0; *(uint8_t*)0x20000020 = (uint8_t)0x0; *(uint8_t*)0x20000021 = (uint8_t)0x0; *(uint8_t*)0x20000022 = (uint8_t)0x0; *(uint8_t*)0x20000023 = (uint8_t)0x0; *(uint8_t*)0x20000024 = (uint8_t)0x0; *(uint8_t*)0x20000025 = (uint8_t)0x0; *(uint8_t*)0x20000026 = (uint8_t)0x0; *(uint8_t*)0x20000027 = (uint8_t)0x0; *(uint8_t*)0x20000028 = (uint8_t)0x0; *(uint8_t*)0x20000029 = (uint8_t)0x0; *(uint8_t*)0x2000002a = (uint8_t)0x0; *(uint8_t*)0x2000002b = (uint8_t)0x0; *(uint8_t*)0x2000002c = (uint8_t)0x0; *(uint8_t*)0x2000002d = (uint8_t)0x0; *(uint8_t*)0x2000002e = (uint8_t)0x0; *(uint8_t*)0x2000002f = (uint8_t)0x0; *(uint8_t*)0x20000030 = (uint8_t)0x0; *(uint8_t*)0x20000031 = (uint8_t)0x0; *(uint8_t*)0x20000032 = (uint8_t)0x0; *(uint8_t*)0x20000033 = (uint8_t)0x0; r[40] = syscall(SYS_ioctl, r[2], 0x40345410ul, 0x20000000ul, 0, 0, 0); break; case 3: r[41] = syscall(SYS_ioctl, r[2], 0x54a2ul, 0, 0, 0, 0); break; case 4: r[42] = syscall(SYS_mmap, 0x2000e000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul); break; case 5: *(uint32_t*)0x2000efcc = (uint32_t)0x7; *(uint32_t*)0x2000efd0 = (uint32_t)0x9; *(uint32_t*)0x2000efd4 = (uint32_t)0x4513; *(uint32_t*)0x2000efd8 = (uint32_t)0x9; *(uint32_t*)0x2000efdc = (uint32_t)0x5; *(uint8_t*)0x2000efe0 = (uint8_t)0x0; *(uint8_t*)0x2000efe1 = (uint8_t)0x0; *(uint8_t*)0x2000efe2 = (uint8_t)0x0; *(uint8_t*)0x2000efe3 = (uint8_t)0x0; *(uint8_t*)0x2000efe4 = (uint8_t)0x0; *(uint8_t*)0x2000efe5 = (uint8_t)0x0; *(uint8_t*)0x2000efe6 = (uint8_t)0x0; *(uint8_t*)0x2000efe7 = (uint8_t)0x0; *(uint8_t*)0x2000efe8 = (uint8_t)0x0; *(uint8_t*)0x2000efe9 = (uint8_t)0x0; *(uint8_t*)0x2000efea = (uint8_t)0x0; *(uint8_t*)0x2000efeb = (uint8_t)0x0; *(uint8_t*)0x2000efec = (uint8_t)0x0; *(uint8_t*)0x2000efed = (uint8_t)0x0; *(uint8_t*)0x2000efee = (uint8_t)0x0; *(uint8_t*)0x2000efef = (uint8_t)0x0; *(uint8_t*)0x2000eff0 = (uint8_t)0x0; *(uint8_t*)0x2000eff1 = (uint8_t)0x0; *(uint8_t*)0x2000eff2 = (uint8_t)0x0; *(uint8_t*)0x2000eff3 = (uint8_t)0x0; *(uint8_t*)0x2000eff4 = (uint8_t)0x0; *(uint8_t*)0x2000eff5 = (uint8_t)0x0; *(uint8_t*)0x2000eff6 = (uint8_t)0x0; *(uint8_t*)0x2000eff7 = (uint8_t)0x0; *(uint8_t*)0x2000eff8 = (uint8_t)0x0; *(uint8_t*)0x2000eff9 = (uint8_t)0x0; *(uint8_t*)0x2000effa = (uint8_t)0x0; *(uint8_t*)0x2000effb = (uint8_t)0x0; *(uint8_t*)0x2000effc = (uint8_t)0x0; *(uint8_t*)0x2000effd = (uint8_t)0x0; *(uint8_t*)0x2000effe = (uint8_t)0x0; *(uint8_t*)0x2000efff = (uint8_t)0x0; r[80] = syscall(SYS_ioctl, r[2], 0x40345410ul, 0x2000efccul, 0, 0, 0); break; case 6: r[81] = syscall(SYS_ioctl, r[2], 0x54a0ul, 0, 0, 0, 0); break; case 7: r[82] = syscall(SYS_mmap, 0x2000f000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul); break; case 8: r[83] = syscall(SYS_ioctl, r[2], 0x80e85411ul, 0x2000ffd4ul, 0, 0, 0); break; } return 0; }
int main() { long i; pthread_t th[9];
memset(r, -1, sizeof(r)); for (i = 0; i < 9; i++) { pthread_create(&th[i], 0, thr, (void*)i); usleep(10000); } for (i = 0; i < 9; i++) { pthread_create(&th[i], 0, thr, (void*)i); if (i%2==0) usleep(10000); } usleep(100000); return 0; }
================================================================== BUG: KASAN: use-after-free in snd_timer_interrupt+0xb11/0xbf0 at addr ffff88002fd230d8 Read of size 8 by task syz-executor/8301 ============================================================================= BUG kmalloc-256 (Not tainted): kasan: bad access detected -----------------------------------------------------------------------------
INFO: Allocated in snd_timer_instance_new+0x52/0x3a0 age=2 cpu=1 pid=8283 [< none >] ___slab_alloc+0x486/0x4e0 mm/slub.c:2468 [< none >] __slab_alloc+0x66/0xc0 mm/slub.c:2497 [< inline >] slab_alloc_node mm/slub.c:2560 [< inline >] slab_alloc mm/slub.c:2602 [< none >] kmem_cache_alloc_trace+0x284/0x310 mm/slub.c:2619 [< inline >] kmalloc include/linux/slab.h:458 [< inline >] kzalloc include/linux/slab.h:602 [< none >] snd_timer_instance_new+0x52/0x3a0 sound/core/timer.c:105 [< none >] snd_timer_open+0x522/0xc90 sound/core/timer.c:286 [< inline >] snd_timer_user_tselect sound/core/timer.c:1527 [< none >] snd_timer_user_ioctl+0x89f/0x2540 sound/core/timer.c:1809 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Freed in snd_timer_close+0x354/0x5f0 age=1 cpu=1 pid=8283 [< none >] __slab_free+0x1fc/0x320 mm/slub.c:2678 [< inline >] slab_free mm/slub.c:2833 [< none >] kfree+0x2a8/0x2d0 mm/slub.c:3662 [< none >] snd_timer_close+0x354/0x5f0 sound/core/timer.c:364 [< inline >] snd_timer_user_tselect sound/core/timer.c:1517 [< none >] snd_timer_user_ioctl+0x784/0x2540 sound/core/timer.c:1809 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Slab 0xffffea0000bf4800 objects=22 used=2 fp=0xffff88002fd23058 flags=0x1fffc0000004080 INFO: Object 0xffff88002fd23058 @offset=12376 fp=0xffff88002fd227d0 CPU: 2 PID: 8301 Comm: syz-executor Tainted: G B 4.4.0+ #240 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff88006d607b08 ffffffff82926eed ffff88003e807000 ffff88002fd23058 ffff88002fd20000 ffff88006d607b38 ffffffff81740ca4 ffff88003e807000 ffffea0000bf4800 ffff88002fd23058 ffff88002fd230d8
Call Trace: [<ffffffff8174a1fe>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295 [<ffffffff84ebe841>] snd_timer_interrupt+0xb11/0xbf0 sound/core/timer.c:680 [<ffffffff84ebe9dd>] snd_timer_s_function+0xbd/0x130 sound/core/timer.c:963 [<ffffffff814b8c56>] call_timer_fn+0x176/0x550 kernel/time/timer.c:1178 [< inline >] __run_timers kernel/time/timer.c:1254 [<ffffffff814ba175>] run_timer_softirq+0x5c5/0x9f0 kernel/time/timer.c:1437 [<ffffffff813606a8>] __do_softirq+0x268/0x920 kernel/softirq.c:273 [< inline >] invoke_softirq kernel/softirq.c:350 [<ffffffff813610ef>] irq_exit+0x18f/0x1d0 kernel/softirq.c:391 [< inline >] exiting_irq ./arch/x86/include/asm/apic.h:659 [<ffffffff8125157b>] smp_apic_timer_interrupt+0x7b/0xa0 arch/x86/kernel/apic/apic.c:932 [<ffffffff86273dec>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 <EOI> [<ffffffff813ac59d>] ? alloc_pid+0x5d/0xc90 kernel/pid.c:306 [< inline >] slab_alloc_node mm/slub.c:2560 [< inline >] slab_alloc mm/slub.c:2602 [<ffffffff817446e1>] kmem_cache_alloc+0x261/0x2e0 mm/slub.c:2607 [<ffffffff813ac59d>] alloc_pid+0x5d/0xc90 kernel/pid.c:306 [<ffffffff8134ca2e>] copy_process.part.35+0x374e/0x5770 kernel/fork.c:1463 [< inline >] copy_process kernel/fork.c:1275 [<ffffffff8134ed7c>] _do_fork+0x1bc/0xcb0 kernel/fork.c:1724 [< inline >] SYSC_clone kernel/fork.c:1833 [<ffffffff8134f947>] SyS_clone+0x37/0x50 kernel/fork.c:1827 [<ffffffff86272ff6>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 ==================================================================
On commit 67990608c8b95d2b8ccc29932376ae73d5818727 (Jan 12).
On Wed, 13 Jan 2016 16:00:09 +0100, Dmitry Vyukov wrote:
Hello,
The following program triggers use-after-free in snd_timer_interrupt:
// autogenerated by syzkaller (http://github.com/google/syzkaller) #include <unistd.h> #include <sys/syscall.h> #include <string.h> #include <stdint.h> #include <pthread.h>
long r[84];
void *thr(void *arg) { switch ((long)arg) { case 0: r[0] = syscall(SYS_mmap, 0x20000000ul, 0xe000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul); break; case 1: memcpy((void*)0x20000990, "\x2f\x64\x65\x76\x2f\x73\x6e\x64\x2f\x74\x69\x6d\x65\x72", 14); r[2] = syscall(SYS_open, 0x20000990ul, 0x1ul, 0x0ul, 0, 0, 0); break; case 2: *(uint32_t*)0x20000000 = (uint32_t)0x1; *(uint32_t*)0x20000004 = (uint32_t)0x7; *(uint32_t*)0x20000008 = (uint32_t)0x3; *(uint32_t*)0x2000000c = (uint32_t)0x0; *(uint32_t*)0x20000010 = (uint32_t)0x0; *(uint8_t*)0x20000014 = (uint8_t)0x0; *(uint8_t*)0x20000015 = (uint8_t)0x0; *(uint8_t*)0x20000016 = (uint8_t)0x0; *(uint8_t*)0x20000017 = (uint8_t)0x0; *(uint8_t*)0x20000018 = (uint8_t)0x0; *(uint8_t*)0x20000019 = (uint8_t)0x0; *(uint8_t*)0x2000001a = (uint8_t)0x0; *(uint8_t*)0x2000001b = (uint8_t)0x0; *(uint8_t*)0x2000001c = (uint8_t)0x0; *(uint8_t*)0x2000001d = (uint8_t)0x0; *(uint8_t*)0x2000001e = (uint8_t)0x0; *(uint8_t*)0x2000001f = (uint8_t)0x0; *(uint8_t*)0x20000020 = (uint8_t)0x0; *(uint8_t*)0x20000021 = (uint8_t)0x0; *(uint8_t*)0x20000022 = (uint8_t)0x0; *(uint8_t*)0x20000023 = (uint8_t)0x0; *(uint8_t*)0x20000024 = (uint8_t)0x0; *(uint8_t*)0x20000025 = (uint8_t)0x0; *(uint8_t*)0x20000026 = (uint8_t)0x0; *(uint8_t*)0x20000027 = (uint8_t)0x0; *(uint8_t*)0x20000028 = (uint8_t)0x0; *(uint8_t*)0x20000029 = (uint8_t)0x0; *(uint8_t*)0x2000002a = (uint8_t)0x0; *(uint8_t*)0x2000002b = (uint8_t)0x0; *(uint8_t*)0x2000002c = (uint8_t)0x0; *(uint8_t*)0x2000002d = (uint8_t)0x0; *(uint8_t*)0x2000002e = (uint8_t)0x0; *(uint8_t*)0x2000002f = (uint8_t)0x0; *(uint8_t*)0x20000030 = (uint8_t)0x0; *(uint8_t*)0x20000031 = (uint8_t)0x0; *(uint8_t*)0x20000032 = (uint8_t)0x0; *(uint8_t*)0x20000033 = (uint8_t)0x0; r[40] = syscall(SYS_ioctl, r[2], 0x40345410ul, 0x20000000ul, 0, 0, 0); break; case 3: r[41] = syscall(SYS_ioctl, r[2], 0x54a2ul, 0, 0, 0, 0); break; case 4: r[42] = syscall(SYS_mmap, 0x2000e000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul); break; case 5: *(uint32_t*)0x2000efcc = (uint32_t)0x7; *(uint32_t*)0x2000efd0 = (uint32_t)0x9; *(uint32_t*)0x2000efd4 = (uint32_t)0x4513; *(uint32_t*)0x2000efd8 = (uint32_t)0x9; *(uint32_t*)0x2000efdc = (uint32_t)0x5; *(uint8_t*)0x2000efe0 = (uint8_t)0x0; *(uint8_t*)0x2000efe1 = (uint8_t)0x0; *(uint8_t*)0x2000efe2 = (uint8_t)0x0; *(uint8_t*)0x2000efe3 = (uint8_t)0x0; *(uint8_t*)0x2000efe4 = (uint8_t)0x0; *(uint8_t*)0x2000efe5 = (uint8_t)0x0; *(uint8_t*)0x2000efe6 = (uint8_t)0x0; *(uint8_t*)0x2000efe7 = (uint8_t)0x0; *(uint8_t*)0x2000efe8 = (uint8_t)0x0; *(uint8_t*)0x2000efe9 = (uint8_t)0x0; *(uint8_t*)0x2000efea = (uint8_t)0x0; *(uint8_t*)0x2000efeb = (uint8_t)0x0; *(uint8_t*)0x2000efec = (uint8_t)0x0; *(uint8_t*)0x2000efed = (uint8_t)0x0; *(uint8_t*)0x2000efee = (uint8_t)0x0; *(uint8_t*)0x2000efef = (uint8_t)0x0; *(uint8_t*)0x2000eff0 = (uint8_t)0x0; *(uint8_t*)0x2000eff1 = (uint8_t)0x0; *(uint8_t*)0x2000eff2 = (uint8_t)0x0; *(uint8_t*)0x2000eff3 = (uint8_t)0x0; *(uint8_t*)0x2000eff4 = (uint8_t)0x0; *(uint8_t*)0x2000eff5 = (uint8_t)0x0; *(uint8_t*)0x2000eff6 = (uint8_t)0x0; *(uint8_t*)0x2000eff7 = (uint8_t)0x0; *(uint8_t*)0x2000eff8 = (uint8_t)0x0; *(uint8_t*)0x2000eff9 = (uint8_t)0x0; *(uint8_t*)0x2000effa = (uint8_t)0x0; *(uint8_t*)0x2000effb = (uint8_t)0x0; *(uint8_t*)0x2000effc = (uint8_t)0x0; *(uint8_t*)0x2000effd = (uint8_t)0x0; *(uint8_t*)0x2000effe = (uint8_t)0x0; *(uint8_t*)0x2000efff = (uint8_t)0x0; r[80] = syscall(SYS_ioctl, r[2], 0x40345410ul, 0x2000efccul, 0, 0, 0); break; case 6: r[81] = syscall(SYS_ioctl, r[2], 0x54a0ul, 0, 0, 0, 0); break; case 7: r[82] = syscall(SYS_mmap, 0x2000f000ul, 0x1000ul, 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul); break; case 8: r[83] = syscall(SYS_ioctl, r[2], 0x80e85411ul, 0x2000ffd4ul, 0, 0, 0); break; } return 0; }
int main() { long i; pthread_t th[9];
memset(r, -1, sizeof(r)); for (i = 0; i < 9; i++) { pthread_create(&th[i], 0, thr, (void*)i); usleep(10000); } for (i = 0; i < 9; i++) { pthread_create(&th[i], 0, thr, (void*)i); if (i%2==0) usleep(10000); } usleep(100000); return 0;
}
================================================================== BUG: KASAN: use-after-free in snd_timer_interrupt+0xb11/0xbf0 at addr ffff88002fd230d8 Read of size 8 by task syz-executor/8301 ============================================================================= BUG kmalloc-256 (Not tainted): kasan: bad access detected
INFO: Allocated in snd_timer_instance_new+0x52/0x3a0 age=2 cpu=1 pid=8283 [< none >] ___slab_alloc+0x486/0x4e0 mm/slub.c:2468 [< none >] __slab_alloc+0x66/0xc0 mm/slub.c:2497 [< inline >] slab_alloc_node mm/slub.c:2560 [< inline >] slab_alloc mm/slub.c:2602 [< none >] kmem_cache_alloc_trace+0x284/0x310 mm/slub.c:2619 [< inline >] kmalloc include/linux/slab.h:458 [< inline >] kzalloc include/linux/slab.h:602 [< none >] snd_timer_instance_new+0x52/0x3a0 sound/core/timer.c:105 [< none >] snd_timer_open+0x522/0xc90 sound/core/timer.c:286 [< inline >] snd_timer_user_tselect sound/core/timer.c:1527 [< none >] snd_timer_user_ioctl+0x89f/0x2540 sound/core/timer.c:1809 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Freed in snd_timer_close+0x354/0x5f0 age=1 cpu=1 pid=8283 [< none >] __slab_free+0x1fc/0x320 mm/slub.c:2678 [< inline >] slab_free mm/slub.c:2833 [< none >] kfree+0x2a8/0x2d0 mm/slub.c:3662 [< none >] snd_timer_close+0x354/0x5f0 sound/core/timer.c:364 [< inline >] snd_timer_user_tselect sound/core/timer.c:1517 [< none >] snd_timer_user_ioctl+0x784/0x2540 sound/core/timer.c:1809 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Slab 0xffffea0000bf4800 objects=22 used=2 fp=0xffff88002fd23058 flags=0x1fffc0000004080 INFO: Object 0xffff88002fd23058 @offset=12376 fp=0xffff88002fd227d0 CPU: 2 PID: 8301 Comm: syz-executor Tainted: G B 4.4.0+ #240 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff88006d607b08 ffffffff82926eed ffff88003e807000 ffff88002fd23058 ffff88002fd20000 ffff88006d607b38 ffffffff81740ca4 ffff88003e807000 ffffea0000bf4800 ffff88002fd23058 ffff88002fd230d8
Call Trace: [<ffffffff8174a1fe>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295 [<ffffffff84ebe841>] snd_timer_interrupt+0xb11/0xbf0 sound/core/timer.c:680 [<ffffffff84ebe9dd>] snd_timer_s_function+0xbd/0x130 sound/core/timer.c:963 [<ffffffff814b8c56>] call_timer_fn+0x176/0x550 kernel/time/timer.c:1178 [< inline >] __run_timers kernel/time/timer.c:1254 [<ffffffff814ba175>] run_timer_softirq+0x5c5/0x9f0 kernel/time/timer.c:1437 [<ffffffff813606a8>] __do_softirq+0x268/0x920 kernel/softirq.c:273 [< inline >] invoke_softirq kernel/softirq.c:350 [<ffffffff813610ef>] irq_exit+0x18f/0x1d0 kernel/softirq.c:391 [< inline >] exiting_irq ./arch/x86/include/asm/apic.h:659 [<ffffffff8125157b>] smp_apic_timer_interrupt+0x7b/0xa0 arch/x86/kernel/apic/apic.c:932 [<ffffffff86273dec>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 <EOI> [<ffffffff813ac59d>] ? alloc_pid+0x5d/0xc90 kernel/pid.c:306 [< inline >] slab_alloc_node mm/slub.c:2560 [< inline >] slab_alloc mm/slub.c:2602 [<ffffffff817446e1>] kmem_cache_alloc+0x261/0x2e0 mm/slub.c:2607 [<ffffffff813ac59d>] alloc_pid+0x5d/0xc90 kernel/pid.c:306 [<ffffffff8134ca2e>] copy_process.part.35+0x374e/0x5770 kernel/fork.c:1463 [< inline >] copy_process kernel/fork.c:1275 [<ffffffff8134ed7c>] _do_fork+0x1bc/0xcb0 kernel/fork.c:1724 [< inline >] SYSC_clone kernel/fork.c:1833 [<ffffffff8134f947>] SyS_clone+0x37/0x50 kernel/fork.c:1827 [<ffffffff86272ff6>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 ==================================================================
On commit 67990608c8b95d2b8ccc29932376ae73d5818727 (Jan 12).
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
thanks,
Takashi
--- From: Takashi Iwai tiwai@suse.de Subject: [PATCH] ALSA: timer: Fix race among timer ioctls
ALSA timer ioctls have an open race and this may lead to a use-after-free of timer instance object. A simplistic fix is to make each ioctl exclusive. We have already tread_sem for controlling the tread, and extend this as a global mutex to be applied to each ioctl.
The downside is, of course, the worse concurrency. But these ioctls aren't to be parallel accessible, in anyway, so it should be fine to serialize there.
Reported-by: Dmitry Vyukov dvyukov@google.com Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai tiwai@suse.de --- sound/core/timer.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/sound/core/timer.c b/sound/core/timer.c index 31f40f03e5b7..b03a9e489286 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -73,7 +73,7 @@ struct snd_timer_user { struct timespec tstamp; /* trigger tstamp */ wait_queue_head_t qchange_sleep; struct fasync_struct *fasync; - struct mutex tread_sem; + struct mutex ioctl_lock; };
/* list of timers */ @@ -1253,7 +1253,7 @@ static int snd_timer_user_open(struct inode *inode, struct file *file) return -ENOMEM; spin_lock_init(&tu->qlock); init_waitqueue_head(&tu->qchange_sleep); - mutex_init(&tu->tread_sem); + mutex_init(&tu->ioctl_lock); tu->ticks = 1; tu->queue_size = 128; tu->queue = kmalloc(tu->queue_size * sizeof(struct snd_timer_read), @@ -1273,8 +1273,10 @@ static int snd_timer_user_release(struct inode *inode, struct file *file) if (file->private_data) { tu = file->private_data; file->private_data = NULL; + mutex_lock(&tu->ioctl_lock); if (tu->timeri) snd_timer_close(tu->timeri); + mutex_unlock(&tu->ioctl_lock); kfree(tu->queue); kfree(tu->tqueue); kfree(tu); @@ -1512,7 +1514,6 @@ static int snd_timer_user_tselect(struct file *file, int err = 0;
tu = file->private_data; - mutex_lock(&tu->tread_sem); if (tu->timeri) { snd_timer_close(tu->timeri); tu->timeri = NULL; @@ -1556,7 +1557,6 @@ static int snd_timer_user_tselect(struct file *file, }
__err: - mutex_unlock(&tu->tread_sem); return err; }
@@ -1769,7 +1769,7 @@ enum { SNDRV_TIMER_IOCTL_PAUSE_OLD = _IO('T', 0x23), };
-static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, +static long __snd_timer_user_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_timer_user *tu; @@ -1786,17 +1786,11 @@ static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, { int xarg;
- mutex_lock(&tu->tread_sem); - if (tu->timeri) { /* too late */ - mutex_unlock(&tu->tread_sem); + if (tu->timeri) /* too late */ return -EBUSY; - } - if (get_user(xarg, p)) { - mutex_unlock(&tu->tread_sem); + if (get_user(xarg, p)) return -EFAULT; - } tu->tread = xarg ? 1 : 0; - mutex_unlock(&tu->tread_sem); return 0; } case SNDRV_TIMER_IOCTL_GINFO: @@ -1829,6 +1823,18 @@ static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, return -ENOTTY; }
+static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct snd_timer_user *tu = file->private_data; + long ret; + + mutex_lock(&tu->ioctl_lock); + ret = __snd_timer_user_ioctl(file, cmd, arg); + mutex_unlock(&tu->ioctl_lock); + return ret; +} + static int snd_timer_user_fasync(int fd, struct file * file, int on) { struct snd_timer_user *tu;
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote:
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501 [<ffffffff84ebd4f4>] snd_timer_stop+0x24/0x140 sound/core/timer.c:535 [<ffffffff84ebd648>] snd_timer_close+0x38/0x5f0 sound/core/timer.c:317 [< inline >] snd_timer_user_tselect sound/core/timer.c:1518 [< inline >] __snd_timer_user_ioctl sound/core/timer.c:1803 [<ffffffff84ec4362>] snd_timer_user_ioctl+0x7b2/0x25c0 sound/core/timer.c:1833 [< inline >] vfs_ioctl fs/ioctl.c:43 [<ffffffff817cbd3c>] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [<ffffffff817ccbdf>] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [<ffffffff86273076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 ---[ end trace bfebf27b922184a1 ]---
On Wed, 13 Jan 2016 19:34:36 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote:
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501
This is
list_del_init(&timeri->active_list);
right? Possibly the following oneliner covers it?
thanks,
Takashi
--- diff --git a/sound/core/timer.c b/sound/core/timer.c index b03a9e489286..3810ee8f1205 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -694,7 +694,7 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) } else { ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING; if (--timer->running) - list_del(&ti->active_list); + list_del_init(&ti->active_list); } if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) || (ti->flags & SNDRV_TIMER_IFLG_FAST))
On Wed, Jan 13, 2016 at 8:05 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 19:34:36 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote:
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501
This is
list_del_init(&timeri->active_list);
right? Possibly the following oneliner covers it?
Yes, that is this line. Yes, these two patches fix use-after-frees and GPFs.
Tested-by: Dmitry Vyukov dvyukov@google.com
The only one that still happens is "sound: spinlock lockup in sound/core/timer.c".
thanks,
Takashi
diff --git a/sound/core/timer.c b/sound/core/timer.c index b03a9e489286..3810ee8f1205 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -694,7 +694,7 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) } else { ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING; if (--timer->running)
list_del(&ti->active_list);
list_del_init(&ti->active_list); } if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) || (ti->flags & SNDRV_TIMER_IFLG_FAST))
On Wed, Jan 13, 2016 at 8:30 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Wed, Jan 13, 2016 at 8:05 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 19:34:36 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote:
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501
This is
list_del_init(&timeri->active_list);
right? Possibly the following oneliner covers it?
Yes, that is this line. Yes, these two patches fix use-after-frees and GPFs.
Tested-by: Dmitry Vyukov dvyukov@google.com
I've re-tested the programs that I reported. But when I started the fuzzer again I hit a similar use-after-free in snd_timer_interrupt:
BUG: KASAN: use-after-free in snd_timer_interrupt+0xaea/0xc40 at addr ffff8800644df960 Read of size 8 by task kworker/u10:3/561 ============================================================================= BUG kmalloc-256 (Not tainted): kasan: bad access detected -----------------------------------------------------------------------------
INFO: Allocated in snd_timer_instance_new+0x52/0x3a0 age=13 cpu=2 pid=18656 [< none >] ___slab_alloc+0x486/0x4e0 mm/slub.c:2468 [< none >] __slab_alloc+0x66/0xc0 mm/slub.c:2497 [< inline >] slab_alloc_node mm/slub.c:2560 [< inline >] slab_alloc mm/slub.c:2602 [< none >] kmem_cache_alloc_trace+0x284/0x310 mm/slub.c:2619 [< inline >] kmalloc include/linux/slab.h:458 [< inline >] kzalloc include/linux/slab.h:602 [< none >] snd_timer_instance_new+0x52/0x3a0 sound/core/timer.c:105 [< none >] snd_timer_open+0x522/0xc90 sound/core/timer.c:286 [< none >] snd_seq_timer_open+0x223/0x540 sound/core/seq/seq_timer.c:279 [< none >] snd_seq_queue_use+0x147/0x230 sound/core/seq/seq_queue.c:528 [< none >] snd_seq_queue_alloc+0x36a/0x4d0 sound/core/seq/seq_queue.c:199 [< none >] snd_seq_ioctl_create_queue+0xdb/0x2b0 sound/core/seq/seq_clientmgr.c:1536 [< none >] snd_seq_do_ioctl+0x19a/0x1c0 sound/core/seq/seq_clientmgr.c:2209 [< none >] snd_seq_ioctl+0x5d/0x80 sound/core/seq/seq_clientmgr.c:2224 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Freed in snd_timer_close+0x354/0x5f0 age=13 cpu=3 pid=18658 [< none >] __slab_free+0x1fc/0x320 mm/slub.c:2678 [< inline >] slab_free mm/slub.c:2833 [< none >] kfree+0x2a8/0x2d0 mm/slub.c:3662 [< none >] snd_timer_close+0x354/0x5f0 sound/core/timer.c:364 [< none >] snd_seq_timer_close+0x9e/0x100 sound/core/seq/seq_timer.c:312 snd_seq_queue_timer [< none >] snd_seq_ioctl_set_queue_timer+0x159/0x300 sound/core/seq/seq_clientmgr.c:1809 [< none >] snd_seq_do_ioctl+0x19a/0x1c0 sound/core/seq/seq_clientmgr.c:2209 [< none >] snd_seq_ioctl+0x5d/0x80 sound/core/seq/seq_clientmgr.c:2224 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Slab 0xffffea0001913700 objects=22 used=10 fp=0xffff8800644df8e0 flags=0x5fffc0000004080 INFO: Object 0xffff8800644df8e0 @offset=14560 fp=0xffff8800644ddf48 CPU: 2 PID: 561 Comm: kworker/u10:3 Tainted: G B 4.4.0+ #243 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: events_unbound call_usermodehelper_exec_work 00000000ffffffff ffff88006d607be0 ffffffff82926eed ffff88003e807000 ffff8800644df8e0 ffff8800644dc000 ffff88006d607c10 ffffffff81740ca4 ffff88003e807000 ffffea0001913700 ffff8800644df8e0 ffff8800644df960
Call Trace: [< inline >] kasan_report mm/kasan/report.c:274 [<ffffffff8174a1fe>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295 [<ffffffff84ebe84a>] snd_timer_interrupt+0xaea/0xc40 sound/core/timer.c:680 [<ffffffff84ec6c16>] snd_hrtimer_callback+0x166/0x230 sound/core/hrtimer.c:54 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c3723>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c5732>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124e10f>] local_apic_timer_interrupt+0x6f/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81251576>] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86273eec>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 <EOI> [< inline >] alloc_task_struct_node kernel/fork.c:142 <EOI> [< inline >] dup_task_struct kernel/fork.c:342 <EOI> [<ffffffff8134950e>] copy_process.part.35+0x22e/0x5770 kernel/fork.c:1304 [< inline >] slab_alloc_node mm/slub.c:2560 [<ffffffff817447f3>] kmem_cache_alloc_node+0x93/0x300 mm/slub.c:2630 [< inline >] alloc_task_struct_node kernel/fork.c:142 [< inline >] dup_task_struct kernel/fork.c:342 [<ffffffff8134950e>] copy_process.part.35+0x22e/0x5770 kernel/fork.c:1304 [< inline >] copy_process kernel/fork.c:1275 [<ffffffff8134ed7c>] _do_fork+0x1bc/0xcb0 kernel/fork.c:1724 [<ffffffff8134f8a4>] kernel_thread+0x34/0x40 kernel/fork.c:1785 [< inline >] call_usermodehelper_exec_sync kernel/kmod.c:275 [<ffffffff81391874>] call_usermodehelper_exec_work+0xf4/0x230 kernel/kmod.c:327 [<ffffffff8139e824>] process_one_work+0x794/0x1440 kernel/workqueue.c:2036 [<ffffffff8139f5ab>] worker_thread+0xdb/0xfc0 kernel/workqueue.c:2170 [<ffffffff813b2cef>] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303 [<ffffffff862734af>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468 ==================================================================
On Wed, 13 Jan 2016 20:41:30 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 8:30 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Wed, Jan 13, 2016 at 8:05 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 19:34:36 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote:
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501
This is
list_del_init(&timeri->active_list);
right? Possibly the following oneliner covers it?
Yes, that is this line. Yes, these two patches fix use-after-frees and GPFs.
Tested-by: Dmitry Vyukov dvyukov@google.com
I've re-tested the programs that I reported. But when I started the fuzzer again I hit a similar use-after-free in snd_timer_interrupt:
Is it the result with all patches, i.e. four patches (two for sequencer and two for timer)?
Takashi
BUG: KASAN: use-after-free in snd_timer_interrupt+0xaea/0xc40 at addr ffff8800644df960 Read of size 8 by task kworker/u10:3/561 ============================================================================= BUG kmalloc-256 (Not tainted): kasan: bad access detected
INFO: Allocated in snd_timer_instance_new+0x52/0x3a0 age=13 cpu=2 pid=18656 [< none >] ___slab_alloc+0x486/0x4e0 mm/slub.c:2468 [< none >] __slab_alloc+0x66/0xc0 mm/slub.c:2497 [< inline >] slab_alloc_node mm/slub.c:2560 [< inline >] slab_alloc mm/slub.c:2602 [< none >] kmem_cache_alloc_trace+0x284/0x310 mm/slub.c:2619 [< inline >] kmalloc include/linux/slab.h:458 [< inline >] kzalloc include/linux/slab.h:602 [< none >] snd_timer_instance_new+0x52/0x3a0 sound/core/timer.c:105 [< none >] snd_timer_open+0x522/0xc90 sound/core/timer.c:286 [< none >] snd_seq_timer_open+0x223/0x540 sound/core/seq/seq_timer.c:279 [< none >] snd_seq_queue_use+0x147/0x230 sound/core/seq/seq_queue.c:528 [< none >] snd_seq_queue_alloc+0x36a/0x4d0 sound/core/seq/seq_queue.c:199 [< none >] snd_seq_ioctl_create_queue+0xdb/0x2b0 sound/core/seq/seq_clientmgr.c:1536 [< none >] snd_seq_do_ioctl+0x19a/0x1c0 sound/core/seq/seq_clientmgr.c:2209 [< none >] snd_seq_ioctl+0x5d/0x80 sound/core/seq/seq_clientmgr.c:2224 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Freed in snd_timer_close+0x354/0x5f0 age=13 cpu=3 pid=18658 [< none >] __slab_free+0x1fc/0x320 mm/slub.c:2678 [< inline >] slab_free mm/slub.c:2833 [< none >] kfree+0x2a8/0x2d0 mm/slub.c:3662 [< none >] snd_timer_close+0x354/0x5f0 sound/core/timer.c:364 [< none >] snd_seq_timer_close+0x9e/0x100 sound/core/seq/seq_timer.c:312 snd_seq_queue_timer [< none >] snd_seq_ioctl_set_queue_timer+0x159/0x300 sound/core/seq/seq_clientmgr.c:1809 [< none >] snd_seq_do_ioctl+0x19a/0x1c0 sound/core/seq/seq_clientmgr.c:2209 [< none >] snd_seq_ioctl+0x5d/0x80 sound/core/seq/seq_clientmgr.c:2224 [< inline >] vfs_ioctl fs/ioctl.c:43 [< none >] do_vfs_ioctl+0x18c/0xfa0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [< none >] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185
INFO: Slab 0xffffea0001913700 objects=22 used=10 fp=0xffff8800644df8e0 flags=0x5fffc0000004080 INFO: Object 0xffff8800644df8e0 @offset=14560 fp=0xffff8800644ddf48 CPU: 2 PID: 561 Comm: kworker/u10:3 Tainted: G B 4.4.0+ #243 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: events_unbound call_usermodehelper_exec_work 00000000ffffffff ffff88006d607be0 ffffffff82926eed ffff88003e807000 ffff8800644df8e0 ffff8800644dc000 ffff88006d607c10 ffffffff81740ca4 ffff88003e807000 ffffea0001913700 ffff8800644df8e0 ffff8800644df960
Call Trace: [< inline >] kasan_report mm/kasan/report.c:274 [<ffffffff8174a1fe>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295 [<ffffffff84ebe84a>] snd_timer_interrupt+0xaea/0xc40 sound/core/timer.c:680 [<ffffffff84ec6c16>] snd_hrtimer_callback+0x166/0x230 sound/core/hrtimer.c:54 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c3723>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c5732>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124e10f>] local_apic_timer_interrupt+0x6f/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81251576>] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86273eec>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 <EOI> [< inline >] alloc_task_struct_node kernel/fork.c:142 <EOI> [< inline >] dup_task_struct kernel/fork.c:342 <EOI> [<ffffffff8134950e>] copy_process.part.35+0x22e/0x5770 kernel/fork.c:1304 [< inline >] slab_alloc_node mm/slub.c:2560 [<ffffffff817447f3>] kmem_cache_alloc_node+0x93/0x300 mm/slub.c:2630 [< inline >] alloc_task_struct_node kernel/fork.c:142 [< inline >] dup_task_struct kernel/fork.c:342 [<ffffffff8134950e>] copy_process.part.35+0x22e/0x5770 kernel/fork.c:1304 [< inline >] copy_process kernel/fork.c:1275 [<ffffffff8134ed7c>] _do_fork+0x1bc/0xcb0 kernel/fork.c:1724 [<ffffffff8134f8a4>] kernel_thread+0x34/0x40 kernel/fork.c:1785 [< inline >] call_usermodehelper_exec_sync kernel/kmod.c:275 [<ffffffff81391874>] call_usermodehelper_exec_work+0xf4/0x230 kernel/kmod.c:327 [<ffffffff8139e824>] process_one_work+0x794/0x1440 kernel/workqueue.c:2036 [<ffffffff8139f5ab>] worker_thread+0xdb/0xfc0 kernel/workqueue.c:2170 [<ffffffff813b2cef>] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303 [<ffffffff862734af>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468 ==================================================================
On Wed, Jan 13, 2016 at 9:30 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 20:41:30 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 8:30 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Wed, Jan 13, 2016 at 8:05 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 19:34:36 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote:
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501
This is
list_del_init(&timeri->active_list);
right? Possibly the following oneliner covers it?
Yes, that is this line. Yes, these two patches fix use-after-frees and GPFs.
Tested-by: Dmitry Vyukov dvyukov@google.com
I've re-tested the programs that I reported. But when I started the fuzzer again I hit a similar use-after-free in snd_timer_interrupt:
Is it the result with all patches, i.e. four patches (two for sequencer and two for timer)?
Yes, with 4 recent patches.
On Wed, 13 Jan 2016 21:48:47 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 9:30 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 20:41:30 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 8:30 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Wed, Jan 13, 2016 at 8:05 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 19:34:36 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote: > This and your other relevant reports seem pointing the race of timer > ioctls. Although snd_timer_close() itself calls snd_timer_stop(), > there is no other protection against the concurrent execution. > > If my guess is correct, a simplistic fix like below should work. It > basically serializes the timer ioctl by using a new mutex (and > replacing the old tread_sem mutex). They are no longtime blocking > calls, so this shouldn't be a big problem. But certainly there can be > a less intrusive way to paper over this if this really matters. > > In this case for timer.c, I'd leave the final decision rather to > Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501
This is
list_del_init(&timeri->active_list);
right? Possibly the following oneliner covers it?
Yes, that is this line. Yes, these two patches fix use-after-frees and GPFs.
Tested-by: Dmitry Vyukov dvyukov@google.com
I've re-tested the programs that I reported. But when I started the fuzzer again I hit a similar use-after-free in snd_timer_interrupt:
Is it the result with all patches, i.e. four patches (two for sequencer and two for timer)?
Yes, with 4 recent patches.
OK, then this might be a possible race at the current snd_timer_stop() implementation. There is no sync action there, so the ISR might be still alive after snd_timer_close() call. Or might be another race. This pattern looks a bit different, as it's involved with hrtimer.
I'll take a look at it tomorrow.
thanks,
Takashi
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote:
OK, then this might be a possible race at the current snd_timer_stop() implementation. There is no sync action there, so the ISR might be still alive after snd_timer_close() call. Or might be another race. This pattern looks a bit different, as it's involved with hrtimer.
I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
thanks,
Takashi
-- 8< -- From: Takashi Iwai tiwai@suse.de Subject: [PATCH] ALSA: timer: Harden slave timer list handling
A slave timer instance might be still accessible in a racy way while operating the master instance as it lacks of locking. Since the master operation is mostly protected with timer->lock, we should cope with it while changing the slave instance, too. Also, some linked lists (active_list and ack_list) of slave instances aren't unlinked immediately at stopping or closing, and this may lead to unexpected accesses.
This patch tries to address these issues. It adds spin lock of timer->lock (either from master or slave, which is equivalent) in a few places. For avoiding a deadlock, we ensure that the global slave_active_lock is always locked at first before each timer lock.
Also, ack and active_list of slave instances are properly unlinked at snd_timer_stop() and snd_timer_close().
Last but not least, remove the superfluous call of _snd_timer_stop() at removing slave links. This is a noop, and calling it may confuse readers wrt locking. Further cleanup will follow in a later patch.
Actually we've got reports of use-after-free by syzkaller fuzzer, and this hopefully fixes these issues.
Reported-by: Dmitry Vyukov dvyukov@google.com Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai tiwai@suse.de --- sound/core/timer.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/sound/core/timer.c b/sound/core/timer.c index 3810ee8f1205..4e8d7bfffff6 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -215,11 +215,13 @@ static void snd_timer_check_master(struct snd_timer_instance *master) slave->slave_id == master->slave_id) { list_move_tail(&slave->open_list, &master->slave_list_head); spin_lock_irq(&slave_active_lock); + spin_lock(&master->timer->lock); slave->master = master; slave->timer = master->timer; if (slave->flags & SNDRV_TIMER_IFLG_RUNNING) list_add_tail(&slave->active_list, &master->slave_active_head); + spin_unlock(&master->timer->lock); spin_unlock_irq(&slave_active_lock); } } @@ -346,15 +348,18 @@ int snd_timer_close(struct snd_timer_instance *timeri) timer->hw.close) timer->hw.close(timer); /* remove slave links */ + spin_lock_irq(&slave_active_lock); + spin_lock(&timer->lock); list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) { - spin_lock_irq(&slave_active_lock); - _snd_timer_stop(slave, 1, SNDRV_TIMER_EVENT_RESOLUTION); list_move_tail(&slave->open_list, &snd_timer_slave_list); slave->master = NULL; slave->timer = NULL; - spin_unlock_irq(&slave_active_lock); + list_del_init(&slave->ack_list); + list_del_init(&slave->active_list); } + spin_unlock(&timer->lock); + spin_unlock_irq(&slave_active_lock); mutex_unlock(®ister_mutex); } out: @@ -441,9 +446,12 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri)
spin_lock_irqsave(&slave_active_lock, flags); timeri->flags |= SNDRV_TIMER_IFLG_RUNNING; - if (timeri->master) + if (timeri->master && timeri->timer) { + spin_lock(&timeri->timer->lock); list_add_tail(&timeri->active_list, &timeri->master->slave_active_head); + spin_unlock(&timeri->timer->lock); + } spin_unlock_irqrestore(&slave_active_lock, flags); return 1; /* delayed start */ } @@ -489,6 +497,8 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, if (!keep_flag) { spin_lock_irqsave(&slave_active_lock, flags); timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; + list_del_init(&timeri->ack_list); + list_del_init(&timeri->active_list); spin_unlock_irqrestore(&slave_active_lock, flags); } goto __end;
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote:
OK, then this might be a possible race at the current snd_timer_stop() implementation. There is no sync action there, so the ISR might be still alive after snd_timer_close() call. Or might be another race. This pattern looks a bit different, as it's involved with hrtimer.
I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I also start qemu with "-soundhw all" arg.
CONFIG_SOUND=y CONFIG_SOUND_OSS_CORE=y CONFIG_SOUND_OSS_CORE_PRECLAIM=y CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y CONFIG_SND_HWDEP=y CONFIG_SND_RAWMIDI=y CONFIG_SND_JACK=y CONFIG_SND_SEQUENCER=y CONFIG_SND_SEQ_DUMMY=y CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_PCM_TIMER=y CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_HRTIMER=y CONFIG_SND_SEQ_HRTIMER_DEFAULT=y CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_PROC_FS=y CONFIG_SND_VERBOSE_PROCFS=y CONFIG_SND_VMASTER=y CONFIG_SND_DMA_SGBUF=y CONFIG_SND_RAWMIDI_SEQ=y CONFIG_SND_OPL3_LIB_SEQ=y CONFIG_SND_MPU401_UART=y CONFIG_SND_OPL3_LIB=y CONFIG_SND_AC97_CODEC=y CONFIG_SND_DRIVERS=y CONFIG_SND_AC97_POWER_SAVE=y CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 CONFIG_SND_SB_COMMON=y CONFIG_SND_PCI=y CONFIG_SND_AD1889=y CONFIG_SND_ALS300=y CONFIG_SND_ALS4000=y CONFIG_SND_ALI5451=y CONFIG_SND_OXYGEN_LIB=y CONFIG_SND_VIRTUOSO=y CONFIG_SND_HDA=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_SND_HDA_RECONFIG=y CONFIG_SND_HDA_INPUT_BEEP=y CONFIG_SND_HDA_INPUT_BEEP_MODE=1 CONFIG_SND_HDA_PATCH_LOADER=y CONFIG_SND_HDA_CODEC_REALTEK=y CONFIG_SND_HDA_CODEC_ANALOG=y CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_VIA=y CONFIG_SND_HDA_CODEC_HDMI=y CONFIG_SND_HDA_GENERIC=y CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 CONFIG_SND_HDA_CORE=y CONFIG_SND_HDA_I915=y CONFIG_SND_HDA_PREALLOC_SIZE=64 CONFIG_SND_USB=y CONFIG_SND_USB_AUDIO=y CONFIG_SND_FIREWIRE=y CONFIG_SND_FIREWIRE_LIB=y CONFIG_SND_DICE=y CONFIG_SND_OXFW=y CONFIG_SND_ISIGHT=y CONFIG_SND_SCS1X=y CONFIG_SND_FIREWORKS=y CONFIG_SND_BEBOB=y CONFIG_SND_FIREWIRE_DIGI00X=y CONFIG_SND_FIREWIRE_TASCAM=y CONFIG_SND_PCMCIA=y
Takashi
-- 8< -- From: Takashi Iwai tiwai@suse.de Subject: [PATCH] ALSA: timer: Harden slave timer list handling
A slave timer instance might be still accessible in a racy way while operating the master instance as it lacks of locking. Since the master operation is mostly protected with timer->lock, we should cope with it while changing the slave instance, too. Also, some linked lists (active_list and ack_list) of slave instances aren't unlinked immediately at stopping or closing, and this may lead to unexpected accesses.
This patch tries to address these issues. It adds spin lock of timer->lock (either from master or slave, which is equivalent) in a few places. For avoiding a deadlock, we ensure that the global slave_active_lock is always locked at first before each timer lock.
Also, ack and active_list of slave instances are properly unlinked at snd_timer_stop() and snd_timer_close().
Last but not least, remove the superfluous call of _snd_timer_stop() at removing slave links. This is a noop, and calling it may confuse readers wrt locking. Further cleanup will follow in a later patch.
Actually we've got reports of use-after-free by syzkaller fuzzer, and this hopefully fixes these issues.
Reported-by: Dmitry Vyukov dvyukov@google.com Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai tiwai@suse.de
sound/core/timer.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/sound/core/timer.c b/sound/core/timer.c index 3810ee8f1205..4e8d7bfffff6 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -215,11 +215,13 @@ static void snd_timer_check_master(struct snd_timer_instance *master) slave->slave_id == master->slave_id) { list_move_tail(&slave->open_list, &master->slave_list_head); spin_lock_irq(&slave_active_lock);
spin_lock(&master->timer->lock); slave->master = master; slave->timer = master->timer; if (slave->flags & SNDRV_TIMER_IFLG_RUNNING) list_add_tail(&slave->active_list, &master->slave_active_head);
spin_unlock(&master->timer->lock); spin_unlock_irq(&slave_active_lock); } }
@@ -346,15 +348,18 @@ int snd_timer_close(struct snd_timer_instance *timeri) timer->hw.close) timer->hw.close(timer); /* remove slave links */
spin_lock_irq(&slave_active_lock);
spin_lock(&timer->lock); list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) {
spin_lock_irq(&slave_active_lock);
_snd_timer_stop(slave, 1, SNDRV_TIMER_EVENT_RESOLUTION); list_move_tail(&slave->open_list, &snd_timer_slave_list); slave->master = NULL; slave->timer = NULL;
spin_unlock_irq(&slave_active_lock);
list_del_init(&slave->ack_list);
list_del_init(&slave->active_list); }
spin_unlock(&timer->lock);
out:spin_unlock_irq(&slave_active_lock); mutex_unlock(®ister_mutex); }
@@ -441,9 +446,12 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri)
spin_lock_irqsave(&slave_active_lock, flags); timeri->flags |= SNDRV_TIMER_IFLG_RUNNING;
if (timeri->master)
if (timeri->master && timeri->timer) {
spin_lock(&timeri->timer->lock); list_add_tail(&timeri->active_list, &timeri->master->slave_active_head);
spin_unlock(&timeri->timer->lock);
} spin_unlock_irqrestore(&slave_active_lock, flags); return 1; /* delayed start */
} @@ -489,6 +497,8 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, if (!keep_flag) { spin_lock_irqsave(&slave_active_lock, flags); timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
list_del_init(&timeri->ack_list);
list_del_init(&timeri->active_list); spin_unlock_irqrestore(&slave_active_lock, flags); } goto __end;
-- 2.7.0
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote:
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote:
OK, then this might be a possible race at the current snd_timer_stop() implementation. There is no sync action there, so the ISR might be still alive after snd_timer_close() call. Or might be another race. This pattern looks a bit different, as it's involved with hrtimer.
I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Takashi
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote:
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote:
OK, then this might be a possible race at the current snd_timer_stop() implementation. There is no sync action there, so the ISR might be still alive after snd_timer_close() call. Or might be another race. This pattern looks a bit different, as it's involved with hrtimer.
I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote:
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote:
OK, then this might be a possible race at the current snd_timer_stop() implementation. There is no sync action there, so the ISR might be still alive after snd_timer_close() call. Or might be another race. This pattern looks a bit different, as it's involved with hrtimer.
I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
Hm, that has lots of other drivers built-in...
I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
And which test did trigger use-after-free, even with all previous patches?
Takashi
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote:
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote:
OK, then this might be a possible race at the current snd_timer_stop() implementation. There is no sync action there, so the ISR might be still alive after snd_timer_close() call. Or might be another race. This pattern looks a bit different, as it's involved with hrtimer.
I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
Hm, that has lots of other drivers built-in...
I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
And which test did trigger use-after-free, even with all previous patches?
I will try to extract a new reproducer now. Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
On Fri, 15 Jan 2016 15:38:58 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote:
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote: > > OK, then this might be a possible race at the current snd_timer_stop() > implementation. There is no sync action there, so the ISR might be > still alive after snd_timer_close() call. Or might be another race. > This pattern looks a bit different, as it's involved with hrtimer. > > I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
Hm, that has lots of other drivers built-in...
I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
And which test did trigger use-after-free, even with all previous patches?
I will try to extract a new reproducer now. Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
So you're running this in parallel? Or a tight sequential loop? I did the latter, and I tried even this on a bare metal, but couldn't trigger the Oops, so far.
Meanwhile, I pushed the tree including all fixes at for-linus branch: git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git for-linus
It'd be appreciated if you can test this one.
thanks,
Takashi
On Fri, Jan 15, 2016 at 4:21 PM, Takashi Iwai tiwai@suse.de wrote:
So you're running this in parallel? Or a tight sequential loop? I did the latter, and I tried even this on a bare metal, but couldn't trigger the Oops, so far.
Yes, I run it in parallel using:
$ go get golang.org/x/tools/cmd/stress $ ./stress -p 8 ./a.out
But it just keeps 8 parallel processes running.
Meanwhile, I pushed the tree including all fixes at for-linus branch: git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git for-linus
It'd be appreciated if you can test this one.
Is it different from the patches you mailed? I keep several dozens fixes for bugs that are not yet merged into Linus tree + own kcov patch. It is not easy to rebase... Here is what I now have for sound/ https://gist.githubusercontent.com/dvyukov/dc29dbfd320126285fd8/raw/e2ca7b59...
On Fri, 15 Jan 2016 16:28:33 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 4:21 PM, Takashi Iwai tiwai@suse.de wrote:
So you're running this in parallel? Or a tight sequential loop? I did the latter, and I tried even this on a bare metal, but couldn't trigger the Oops, so far.
Yes, I run it in parallel using:
$ go get golang.org/x/tools/cmd/stress $ ./stress -p 8 ./a.out
But it just keeps 8 parallel processes running.
OK, then a bit different than I tested. Will check.
Meanwhile, I pushed the tree including all fixes at for-linus branch: git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git for-linus
It'd be appreciated if you can test this one.
Is it different from the patches you mailed?
No, they should be basically same, but just to make sure that we're on the same ground.
I keep several dozens fixes for bugs that are not yet merged into Linus tree + own kcov patch. It is not easy to rebase...
The branch should be pullable onto 4.4-final cleanly.
Here is what I now have for sound/ https://gist.githubusercontent.com/dvyukov/dc29dbfd320126285fd8/raw/e2ca7b59...
Takashi
On Fri, Jan 15, 2016 at 3:38 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote:
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 21:54:10 +0100, Takashi Iwai wrote: > > OK, then this might be a possible race at the current snd_timer_stop() > implementation. There is no sync action there, so the ISR might be > still alive after snd_timer_close() call. Or might be another race. > This pattern looks a bit different, as it's involved with hrtimer. > > I'll take a look at it tomorrow.
I've audited the code today, but the open window doesn't look like what I expected. I found only some possible cases with slave timer instances.
In anyway, below is a test fix patch. Since I couldn't reproduce the issue on my local machines, it's hard to say whether this covers the holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
Hm, that has lots of other drivers built-in...
I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
And which test did trigger use-after-free, even with all previous patches?
I will try to extract a new reproducer now.
Ok, I does not seem to see any crashes except the timer hangs below. Let's consider all other bugs as fixed. I will report anything new that I see separately.
Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
On Fri, 15 Jan 2016 20:13:11 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 3:38 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote:
On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote: > On Wed, 13 Jan 2016 21:54:10 +0100, > Takashi Iwai wrote: >> >> OK, then this might be a possible race at the current snd_timer_stop() >> implementation. There is no sync action there, so the ISR might be >> still alive after snd_timer_close() call. Or might be another race. >> This pattern looks a bit different, as it's involved with hrtimer. >> >> I'll take a look at it tomorrow. > > I've audited the code today, but the open window doesn't look like > what I expected. I found only some possible cases with slave timer > instances. > > In anyway, below is a test fix patch. Since I couldn't reproduce the > issue on my local machines, it's hard to say whether this covers the > holes you fell. Let's see...
Hi Takashi,
I would be interested to understand why other people can't reproduce issues that I hit pretty reliably. I suspect that it can be due to .config. Please try with the following config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
Hm, that has lots of other drivers built-in...
I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
And which test did trigger use-after-free, even with all previous patches?
I will try to extract a new reproducer now.
Ok, I does not seem to see any crashes except the timer hangs below. Let's consider all other bugs as fixed. I will report anything new that I see separately.
OK, good to hear.
Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
I could reproduce this after your suggestion with parallel runs.
This seems specific to hrtimer. Possibly it's not about the snd-timer core itself. Could you check whether this doesn't happen when CONFIG_SND_HRTIMER isn't set?
thanks,
Takashi
On Fri, Jan 15, 2016 at 8:18 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 20:13:11 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 3:38 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 09:06:10 +0100, Dmitry Vyukov wrote: > > On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote: > > On Wed, 13 Jan 2016 21:54:10 +0100, > > Takashi Iwai wrote: > >> > >> OK, then this might be a possible race at the current snd_timer_stop() > >> implementation. There is no sync action there, so the ISR might be > >> still alive after snd_timer_close() call. Or might be another race. > >> This pattern looks a bit different, as it's involved with hrtimer. > >> > >> I'll take a look at it tomorrow. > > > > I've audited the code today, but the open window doesn't look like > > what I expected. I found only some possible cases with slave timer > > instances. > > > > In anyway, below is a test fix patch. Since I couldn't reproduce the > > issue on my local machines, it's hard to say whether this covers the > > holes you fell. Let's see... > > > Hi Takashi, > > I would be interested to understand why other people can't reproduce > issues that I hit pretty reliably. > I suspect that it can be due to .config. Please try with the following > config values.
I guess rather other config, e.g. the kernel debug options. I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
Hm, that has lots of other drivers built-in...
> I also start qemu with "-soundhw all" arg.
OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
And which test did trigger use-after-free, even with all previous patches?
I will try to extract a new reproducer now.
Ok, I does not seem to see any crashes except the timer hangs below. Let's consider all other bugs as fixed. I will report anything new that I see separately.
OK, good to hear.
Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
I could reproduce this after your suggestion with parallel runs.
This seems specific to hrtimer. Possibly it's not about the snd-timer core itself. Could you check whether this doesn't happen when CONFIG_SND_HRTIMER isn't set?
Does not happen without CONFIG_SND_HRTIMER. Do you mean that this is hrtimer bug?
On Fri, 15 Jan 2016 20:47:05 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 8:18 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 20:13:11 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 3:38 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote: > On Fri, 15 Jan 2016 09:06:10 +0100, > Dmitry Vyukov wrote: >> >> On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote: >> > On Wed, 13 Jan 2016 21:54:10 +0100, >> > Takashi Iwai wrote: >> >> >> >> OK, then this might be a possible race at the current snd_timer_stop() >> >> implementation. There is no sync action there, so the ISR might be >> >> still alive after snd_timer_close() call. Or might be another race. >> >> This pattern looks a bit different, as it's involved with hrtimer. >> >> >> >> I'll take a look at it tomorrow. >> > >> > I've audited the code today, but the open window doesn't look like >> > what I expected. I found only some possible cases with slave timer >> > instances. >> > >> > In anyway, below is a test fix patch. Since I couldn't reproduce the >> > issue on my local machines, it's hard to say whether this covers the >> > holes you fell. Let's see... >> >> >> Hi Takashi, >> >> I would be interested to understand why other people can't reproduce >> issues that I hit pretty reliably. >> I suspect that it can be due to .config. Please try with the following >> config values. > > I guess rather other config, e.g. the kernel debug options. > I suppose you enabled KASAN and DEBUG_LIST. What else?
I've attached my config (you will need to disable CONFIG_KCOV, it is not upstreamed).
Hm, that has lots of other drivers built-in...
>> I also start qemu with "-soundhw all" arg. > > OK, so you're testing with VM? This makes easier to recheck.
Yes, I start qemu as:
qemu-system-x86_64 -hda wheezy.img -net user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all
And which test did trigger use-after-free, even with all previous patches?
I will try to extract a new reproducer now.
Ok, I does not seem to see any crashes except the timer hangs below. Let's consider all other bugs as fixed. I will report anything new that I see separately.
OK, good to hear.
Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
I could reproduce this after your suggestion with parallel runs.
This seems specific to hrtimer. Possibly it's not about the snd-timer core itself. Could you check whether this doesn't happen when CONFIG_SND_HRTIMER isn't set?
Does not happen without CONFIG_SND_HRTIMER. Do you mean that this is hrtimer bug?
I guess rather it's a bug in snd-hrtimer driver. Will check it later.
Takashi
On Fri, 15 Jan 2016 22:22:46 +0100, Takashi Iwai wrote:
On Fri, 15 Jan 2016 20:47:05 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 8:18 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 20:13:11 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 3:38 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 12:03:17 +0100, Dmitry Vyukov wrote: > > On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote: > > On Fri, 15 Jan 2016 09:06:10 +0100, > > Dmitry Vyukov wrote: > >> > >> On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote: > >> > On Wed, 13 Jan 2016 21:54:10 +0100, > >> > Takashi Iwai wrote: > >> >> > >> >> OK, then this might be a possible race at the current snd_timer_stop() > >> >> implementation. There is no sync action there, so the ISR might be > >> >> still alive after snd_timer_close() call. Or might be another race. > >> >> This pattern looks a bit different, as it's involved with hrtimer. > >> >> > >> >> I'll take a look at it tomorrow. > >> > > >> > I've audited the code today, but the open window doesn't look like > >> > what I expected. I found only some possible cases with slave timer > >> > instances. > >> > > >> > In anyway, below is a test fix patch. Since I couldn't reproduce the > >> > issue on my local machines, it's hard to say whether this covers the > >> > holes you fell. Let's see... > >> > >> > >> Hi Takashi, > >> > >> I would be interested to understand why other people can't reproduce > >> issues that I hit pretty reliably. > >> I suspect that it can be due to .config. Please try with the following > >> config values. > > > > I guess rather other config, e.g. the kernel debug options. > > I suppose you enabled KASAN and DEBUG_LIST. What else? > > I've attached my config (you will need to disable CONFIG_KCOV, it is > not upstreamed).
Hm, that has lots of other drivers built-in...
> >> I also start qemu with "-soundhw all" arg. > > > > OK, so you're testing with VM? This makes easier to recheck. > > Yes, I start qemu as: > > qemu-system-x86_64 -hda wheezy.img -net > user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel > arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug > earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa > node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp > sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet > -soundhw all
And which test did trigger use-after-free, even with all previous patches?
I will try to extract a new reproducer now.
Ok, I does not seem to see any crashes except the timer hangs below. Let's consider all other bugs as fixed. I will report anything new that I see separately.
OK, good to hear.
Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
I could reproduce this after your suggestion with parallel runs.
This seems specific to hrtimer. Possibly it's not about the snd-timer core itself. Could you check whether this doesn't happen when CONFIG_SND_HRTIMER isn't set?
Does not happen without CONFIG_SND_HRTIMER. Do you mean that this is hrtimer bug?
I guess rather it's a bug in snd-hrtimer driver. Will check it later.
The patch below *might* fix the issue. There was a deadlock problem and the current code has a weird workaround for it. I suspect it being the cause.
If this works, I'll happily apply it before submitting the next pull request for 4.5. If not, I'll take a closer look at it in the next week :)
thanks,
Takashi
--- diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index f845ecf7e172..a8027350520d 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -39,6 +39,7 @@ struct snd_hrtimer { struct snd_timer *timer; struct hrtimer hrt; atomic_t running; + bool cancel_pending; };
static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt) @@ -62,7 +63,7 @@ static int snd_hrtimer_open(struct snd_timer *t) { struct snd_hrtimer *stime;
- stime = kmalloc(sizeof(*stime), GFP_KERNEL); + stime = kzalloc(sizeof(*stime), GFP_KERNEL); if (!stime) return -ENOMEM; hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -90,7 +91,10 @@ static int snd_hrtimer_start(struct snd_timer *t) struct snd_hrtimer *stime = t->private_data;
atomic_set(&stime->running, 0); - hrtimer_cancel(&stime->hrt); + if (stime->cancel_pending) { + hrtimer_cancel(&stime->hrt); + stime->cancel_pending = false; + } hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); atomic_set(&stime->running, 1); @@ -101,6 +105,8 @@ static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0); + if (hrtimer_try_to_cancel(&stime->hrt) < 0) + stime->cancel_pending = true; return 0; }
On Fri, Jan 15, 2016 at 10:44 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 22:22:46 +0100, Takashi Iwai wrote:
On Fri, 15 Jan 2016 20:47:05 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 8:18 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 20:13:11 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 3:38 PM, Dmitry Vyukov dvyukov@google.com wrote:
On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote: > On Fri, 15 Jan 2016 12:03:17 +0100, > Dmitry Vyukov wrote: >> >> On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote: >> > On Fri, 15 Jan 2016 09:06:10 +0100, >> > Dmitry Vyukov wrote: >> >> >> >> On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote: >> >> > On Wed, 13 Jan 2016 21:54:10 +0100, >> >> > Takashi Iwai wrote: >> >> >> >> >> >> OK, then this might be a possible race at the current snd_timer_stop() >> >> >> implementation. There is no sync action there, so the ISR might be >> >> >> still alive after snd_timer_close() call. Or might be another race. >> >> >> This pattern looks a bit different, as it's involved with hrtimer. >> >> >> >> >> >> I'll take a look at it tomorrow. >> >> > >> >> > I've audited the code today, but the open window doesn't look like >> >> > what I expected. I found only some possible cases with slave timer >> >> > instances. >> >> > >> >> > In anyway, below is a test fix patch. Since I couldn't reproduce the >> >> > issue on my local machines, it's hard to say whether this covers the >> >> > holes you fell. Let's see... >> >> >> >> >> >> Hi Takashi, >> >> >> >> I would be interested to understand why other people can't reproduce >> >> issues that I hit pretty reliably. >> >> I suspect that it can be due to .config. Please try with the following >> >> config values. >> > >> > I guess rather other config, e.g. the kernel debug options. >> > I suppose you enabled KASAN and DEBUG_LIST. What else? >> >> I've attached my config (you will need to disable CONFIG_KCOV, it is >> not upstreamed). > > Hm, that has lots of other drivers built-in... > >> >> I also start qemu with "-soundhw all" arg. >> > >> > OK, so you're testing with VM? This makes easier to recheck. >> >> Yes, I start qemu as: >> >> qemu-system-x86_64 -hda wheezy.img -net >> user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel >> arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug >> earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa >> node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp >> sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet >> -soundhw all > > And which test did trigger use-after-free, even with all previous > patches?
I will try to extract a new reproducer now.
Ok, I does not seem to see any crashes except the timer hangs below. Let's consider all other bugs as fixed. I will report anything new that I see separately.
OK, good to hear.
Meanwhile, can you try to reproduce this one: https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ ? I run the program in a tight parallel loop.
I could reproduce this after your suggestion with parallel runs.
This seems specific to hrtimer. Possibly it's not about the snd-timer core itself. Could you check whether this doesn't happen when CONFIG_SND_HRTIMER isn't set?
Does not happen without CONFIG_SND_HRTIMER. Do you mean that this is hrtimer bug?
I guess rather it's a bug in snd-hrtimer driver. Will check it later.
The patch below *might* fix the issue. There was a deadlock problem and the current code has a weird workaround for it. I suspect it being the cause.
If this works, I'll happily apply it before submitting the next pull request for 4.5. If not, I'll take a closer look at it in the next week :)
No, unfortunately the hang still happens with the patch:
INFO: rcu_sched detected stalls on CPUs/tasks: (detected by 3, t=26002 jiffies, g=7699, c=7698, q=270) All QSes seen, last rcu_sched kthread activity 26002 (4294752045-4294726043), jiffies_till_next_fqs=3, root ->qsmask 0x0 a.out R running task 30184 10406 6864 0x0000000a ffff8800636ef610 ffff88006d707ca8 ffffffff813e7989 00000000fffc519b 00000000fffc519b ffff88006d720fc0 00000000fffcb72d dffffc0000000000 0000000000000000 ffff88006d707d80 ffffffff814b33aa 0000000000000000 Call Trace: <IRQ> [<ffffffff813e7989>] sched_show_task+0x269/0x3b0 kernel/sched/core.c:5036 [< inline >] print_other_cpu_stall kernel/rcu/tree.c:1318 [< inline >] check_cpu_stall kernel/rcu/tree.c:1424 [< inline >] __rcu_pending kernel/rcu/tree.c:3906 [< inline >] rcu_pending kernel/rcu/tree.c:3970 [<ffffffff814b33aa>] rcu_check_callbacks+0x1dfa/0x1e10 kernel/rcu/tree.c:2795 [<ffffffff814c2b0a>] update_process_times+0x3a/0x70 kernel/time/timer.c:1420 [<ffffffff814ec04f>] tick_sched_handle.isra.20+0xaf/0xe0 kernel/time/tick-sched.c:151 [<ffffffff814ec775>] tick_sched_timer+0x75/0x100 kernel/time/tick-sched.c:1086 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c48d3>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c68d2>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124f062>] local_apic_timer_interrupt+0x72/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81252589>] smp_apic_timer_interrupt+0x79/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86326e6c>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 [<ffffffff814fab0c>] smp_call_function_many+0x59c/0x720 kernel/smp.c:435 [<ffffffff812931e6>] native_flush_tlb_others+0xd6/0x370 arch/x86/mm/tlb.c:154 [< inline >] flush_tlb_others ./arch/x86/include/asm/paravirt.h:329 [<ffffffff8129394f>] flush_tlb_mm_range+0x10f/0x560 arch/x86/mm/tlb.c:235 [<ffffffff816df2d2>] tlb_flush_mmu_tlbonly+0x1e2/0x3f0 mm/memory.c:243 [< inline >] tlb_flush_mmu mm/memory.c:264 [<ffffffff816e2afb>] tlb_finish_mmu+0x1b/0xa0 mm/memory.c:276 [<ffffffff816f8631>] unmap_region+0x261/0x350 mm/mmap.c:2408 [<ffffffff816ff5cb>] do_munmap+0x70b/0xf30 mm/mmap.c:2602 [<ffffffff81702fa2>] mmap_region+0x152/0x1010 mm/mmap.c:1558 [<ffffffff817045b4>] do_mmap+0x754/0x990 mm/mmap.c:1396 [< inline >] do_mmap_pgoff include/linux/mm.h:1982 [<ffffffff816b46bf>] vm_mmap_pgoff+0x15f/0x1b0 mm/util.c:328 [< inline >] SYSC_mmap_pgoff mm/mmap.c:1446 [<ffffffff816fd868>] SyS_mmap_pgoff+0x208/0x580 mm/mmap.c:1404 [< inline >] SYSC_mmap arch/x86/kernel/sys_x86_64.c:95 [<ffffffff811b0a86>] SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:86 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 rcu_sched kthread starved for 26002 jiffies! g7699 c7698 f0x2 RCU_GP_WAIT_FQS(3) ->state=0x100 rcu_sched W ffff88003dfb7a98 29272 8 2 0x00000000 ffff88003dfb7a98 ffff88003ec16d40 ffff88003df81fe0 00ffed0007bf6f6f ffff88003ec20af0 ffff88003ec20ac8 ffff88003ec20158 ffff88003df817c8 ffff88003ec20140 ffffffff875adc40 ffff88003df817c0 ffff88003dfb0000 Call Trace: [<ffffffff86317517>] schedule+0x97/0x1c0 kernel/sched/core.c:3311 [<ffffffff863235db>] schedule_timeout+0x36b/0x920 kernel/time/timer.c:1531 [<ffffffff814af882>] rcu_gp_kthread+0xad2/0x1b60 kernel/rcu/tree.c:2125 [<ffffffff813b423f>] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303 [<ffffffff8632642f>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468 BUG: spinlock lockup suspected on CPU#0, a.out/10400 lock: 0xffff880065e348e0, .magic: dead4ead, .owner: a.out/10401, .owner_cpu: 2 CPU: 0 PID: 10400 Comm: a.out Not tainted 4.4.0+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff88003ec07c60 ffffffff8298aced ffff880065e348e0 ffff8800356917c0 ffff8800634cc740 ffff88003ec07c98 ffffffff814643fd ffffffff84f30457 ffffffff00000000 ffff880065e348e0 ffff880065e348f0 Call Trace: <IRQ> [< inline >] __dump_stack lib/dump_stack.c:15 <IRQ> [<ffffffff8298aced>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff814643fd>] spin_dump+0x14d/0x280 kernel/locking/spinlock_debug.c:67 [< inline >] __spin_lock_debug kernel/locking/spinlock_debug.c:117 [<ffffffff8146471d>] do_raw_spin_lock+0x15d/0x2b0 kernel/locking/spinlock_debug.c:137 [< inline >] __raw_spin_lock include/linux/spinlock_api_smp.h:145 [<ffffffff86324f9b>] _raw_spin_lock+0x3b/0x50 kernel/locking/spinlock.c:151 [< inline >] spin_lock include/linux/spinlock.h:302 [<ffffffff84f30457>] snd_timer_interrupt+0x677/0xc40 sound/core/timer.c:749 [<ffffffff84f38d59>] snd_hrtimer_callback+0x169/0x230 sound/core/hrtimer.c:55 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c48d3>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c68d2>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124f062>] local_apic_timer_interrupt+0x72/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81252589>] smp_apic_timer_interrupt+0x79/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86326e6c>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 [< inline >] spin_unlock_irqrestore include/linux/spinlock.h:362 [<ffffffff84f32cbf>] snd_timer_start+0x12f/0x1d0 sound/core/timer.c:481 [< inline >] snd_timer_user_start sound/core/timer.c:1732 [< inline >] __snd_timer_user_ioctl sound/core/timer.c:1816 [<ffffffff84f363d1>] snd_timer_user_ioctl+0x761/0x25c0 sound/core/timer.c:1837 [< inline >] vfs_ioctl fs/ioctl.c:43 [<ffffffff817ebfac>] do_vfs_ioctl+0x18c/0xfb0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [<ffffffff817ece5f>] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Sending NMI to all CPUs: NMI backtrace for cpu 0 CPU: 0 PID: 10400 Comm: a.out Not tainted 4.4.0+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800634cc740 ti: ffff880062ad8000 task.ti: ffff880062ad8000 RIP: 0010:[<ffffffff81262cc6>] [<ffffffff81262cc6>] flat_send_IPI_mask+0x156/0x290 RSP: 0018:ffff88003ec07be0 EFLAGS: 00000046 RAX: 0000000000000000 RBX: 0000000000000c00 RCX: 0000000000000000 RDX: 0000000000000c00 RSI: 0000000000000000 RDI: ffffffffff5fc300 RBP: ffff88003ec07c08 R08: 0000000000000001 R09: 0000000000000000 R10: fffffbfff10ffef2 R11: 1ffffffff1293eb5 R12: 0000000000000086 R13: 000000000f000000 R14: ffffffff87631d60 R15: 0000000000000002 FS: 00007fd28b9a2700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007efdbab09e78 CR3: 0000000062f3f000 CR4: 00000000000006f0 Stack: ffffffff87631d60 ffffffff882bc380 0000000000000040 fffffbfff1057278 ffff88006d71a4e0 ffff88003ec07c28 ffffffff8125874e ffffffff866bf480 dffffc0000000000 ffff88003ec07c88 ffffffff829961da ffff88003ec07c60 Call Trace: <IRQ> d [<ffffffff8125874e>] nmi_raise_cpu_backtrace+0x5e/0x80 arch/x86/kernel/apic/hw_nmi.c:33 [<ffffffff829961da>] nmi_trigger_all_cpu_backtrace+0x49a/0x530 lib/nmi_backtrace.c:85 [<ffffffff812587a4>] arch_trigger_all_cpu_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 [< inline >] trigger_all_cpu_backtrace include/linux/nmi.h:41 [< inline >] __spin_lock_debug kernel/locking/spinlock_debug.c:119 [<ffffffff81464727>] do_raw_spin_lock+0x167/0x2b0 kernel/locking/spinlock_debug.c:137 [< inline >] __raw_spin_lock include/linux/spinlock_api_smp.h:145 [<ffffffff86324f9b>] _raw_spin_lock+0x3b/0x50 kernel/locking/spinlock.c:151 [< inline >] spin_lock include/linux/spinlock.h:302 [<ffffffff84f30457>] snd_timer_interrupt+0x677/0xc40 sound/core/timer.c:749 [<ffffffff84f38d59>] snd_hrtimer_callback+0x169/0x230 sound/core/hrtimer.c:55 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c48d3>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c68d2>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124f062>] local_apic_timer_interrupt+0x72/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81252589>] smp_apic_timer_interrupt+0x79/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86326e6c>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 [< inline >] spin_unlock_irqrestore include/linux/spinlock.h:362 [<ffffffff84f32cbf>] snd_timer_start+0x12f/0x1d0 sound/core/timer.c:481 [< inline >] snd_timer_user_start sound/core/timer.c:1732 [< inline >] __snd_timer_user_ioctl sound/core/timer.c:1816 [<ffffffff84f363d1>] snd_timer_user_ioctl+0x761/0x25c0 sound/core/timer.c:1837 [< inline >] vfs_ioctl fs/ioctl.c:43 [<ffffffff817ebfac>] do_vfs_ioctl+0x18c/0xfb0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [<ffffffff817ece5f>] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Code: 00 c3 5f ff 80 e6 10 75 e1 41 c1 e5 18 44 89 2c 25 10 c3 5f ff 44 89 fa 09 da 80 cf 04 41 83 ff 02 0f 44 d3 89 14 25 00 c3 5f ff <41> f7 c4 00 02 00 00 74 4a e8 fc 01 1f 00 48 c7 c7 68 58 63 87 NMI backtrace for cpu 1 BUG: spinlock lockup suspected on CPU#1, a.out/10267 lock: 0xffff880065e348e0, .magic: dead4ead, .owner: a.out/10401, .owner_cpu: 2 CPU: 1 PID: 10267 Comm: a.out Not tainted 4.4.0+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800330dfb10 ffffffff8298aced ffff880065e348e0 ffff8800356917c0 ffff880032b08000 ffff8800330dfb48 ffffffff814643fd ffffffff84f2ef29 ffff880000000000 ffff880065e348e0 ffff880065e348f0 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff8298aced>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff814643fd>] spin_dump+0x14d/0x280 kernel/locking/spinlock_debug.c:67 [< inline >] __spin_lock_debug kernel/locking/spinlock_debug.c:117 [<ffffffff8146471d>] do_raw_spin_lock+0x15d/0x2b0 kernel/locking/spinlock_debug.c:137 [< inline >] __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:119 [<ffffffff86325a37>] _raw_spin_lock_irqsave+0xa7/0xd0 kernel/locking/spinlock.c:159 [<ffffffff84f2ef29>] _snd_timer_stop+0x99/0x5c0 sound/core/timer.c:505 [<ffffffff84f2f472>] snd_timer_stop+0x22/0x140 sound/core/timer.c:539 [<ffffffff84f2f5c3>] snd_timer_close+0x33/0x700 sound/core/timer.c:318 [<ffffffff84f2fd27>] snd_timer_user_release+0x97/0x130 sound/core/timer.c:1282 [<ffffffff817b36e6>] __fput+0x236/0x780 fs/file_table.c:208 [<ffffffff817b3cb5>] ____fput+0x15/0x20 fs/file_table.c:244 [<ffffffff813af2b0>] task_work_run+0x170/0x210 kernel/task_work.c:115 [< inline >] exit_task_work include/linux/task_work.h:21 [<ffffffff8135b275>] do_exit+0x8b5/0x2c60 kernel/exit.c:750 [<ffffffff8135d798>] do_group_exit+0x108/0x330 kernel/exit.c:880 [< inline >] SYSC_exit_group kernel/exit.c:891 [<ffffffff8135d9dd>] SyS_exit_group+0x1d/0x20 kernel/exit.c:889 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 CPU: 1 PID: 10267 Comm: a.out Not tainted 4.4.0+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff880032b08000 ti: ffff8800330d8000 task.ti: ffff8800330d8000 RIP: 0010:[<ffffffff829b7e77>] [<ffffffff829b7e77>] delay_tsc+0x27/0x70 RSP: 0018:ffff8800330dfb38 EFLAGS: 00000002 RAX: 000000009ff37355 RBX: ffff880065e348e0 RCX: 0000000000000029 RDX: 0000000000000053 RSI: 000000539ff37337 RDI: 0000000000000001 RBP: ffff8800330dfb38 R08: 0000000000000001 R09: 0000000000000001 R10: ffff880032b08000 R11: 0000000000000000 R12: ffff880065e348f0 R13: 000000009a9d2d40 R14: ffff880065e348e8 R15: 000000009a8b4de6 FS: 0000000000000000(0000) GS:ffff88003ed00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020001000 CR3: 00000000075a3000 CR4: 00000000000006e0 Stack: ffff8800330dfb48 ffffffff829b7dba ffff8800330dfb80 ffffffff81464709 ffff880065e348e0 0000000000000286 ffff8800339656d0 ffff880065e347b0 ffff880065e348e0 ffff8800330dfba8 ffffffff86325a37 ffffffff84f2ef29 Call Trace: [<ffffffff829b7dba>] __delay+0xa/0x10 arch/x86/lib/delay.c:153 [< inline >] __spin_lock_debug kernel/locking/spinlock_debug.c:114 [<ffffffff81464709>] do_raw_spin_lock+0x149/0x2b0 kernel/locking/spinlock_debug.c:137 [< inline >] __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:119 [<ffffffff86325a37>] _raw_spin_lock_irqsave+0xa7/0xd0 kernel/locking/spinlock.c:159 [<ffffffff84f2ef29>] _snd_timer_stop+0x99/0x5c0 sound/core/timer.c:505 [<ffffffff84f2f472>] snd_timer_stop+0x22/0x140 sound/core/timer.c:539 [<ffffffff84f2f5c3>] snd_timer_close+0x33/0x700 sound/core/timer.c:318 [<ffffffff84f2fd27>] snd_timer_user_release+0x97/0x130 sound/core/timer.c:1282 [<ffffffff817b36e6>] __fput+0x236/0x780 fs/file_table.c:208 [<ffffffff817b3cb5>] ____fput+0x15/0x20 fs/file_table.c:244 [<ffffffff813af2b0>] task_work_run+0x170/0x210 kernel/task_work.c:115 [< inline >] exit_task_work include/linux/task_work.h:21 [<ffffffff8135b275>] do_exit+0x8b5/0x2c60 kernel/exit.c:750 [<ffffffff8135d798>] do_group_exit+0x108/0x330 kernel/exit.c:880 [< inline >] SYSC_exit_group kernel/exit.c:891 [<ffffffff8135d9dd>] SyS_exit_group+0x1d/0x20 kernel/exit.c:889 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Code: 00 00 00 00 55 65 ff 05 f8 cf 65 7d 48 89 e5 65 44 8b 05 bd b2 65 7d 0f ae e8 0f 31 48 c1 e2 20 48 89 d6 48 09 c6 0f ae e8 0f 31 <48> c1 e2 20 48 09 d0 48 89 c1 48 29 f1 48 39 f9 73 27 65 ff 0d NMI backtrace for cpu 2 CPU: 2 PID: 10401 Comm: a.out Not tainted 4.4.0+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800356917c0 ti: ffff880033f48000 task.ti: ffff880033f48000 RIP: 0010:[<ffffffff81455fe8>] [<ffffffff81455fe8>] __lock_acquire+0x28a8/0x4700 RSP: 0018:ffff880033f4f6f0 EFLAGS: 00000046 RAX: 0000000000000086 RBX: ffffffff888ef368 RCX: ffff880035691fe8 RDX: 1ffffffff0ec6b0c RSI: ffff880035692038 RDI: ffffffff87635860 RBP: ffff880033f4f870 R08: ffffffff88ef3310 R09: 0000000000000001 R10: ffff8800356917c0 R11: 0000000000000000 R12: ffff880035692038 R13: ffffffff888ef360 R14: 000000125492c000 R15: ffffffff894e0ae0 FS: 00007efdbab0a700(0000) GS:ffff88006d600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000c820346c00 CR3: 00000000331e5000 CR4: 00000000000006e0 Stack: ffff8800356917c0 ffffffff888eeac8 000000000092a02d ffffffff888eeac0 dffffc0000000000 ffffffff894e0ae0 ffff880035691fe0 ffff8800356917c0 ffffffff88ef3310 ffffffff88f431a0 ffff880000000000 ffff880035692050 Call Trace: [<ffffffff8145a28c>] lock_acquire+0x1dc/0x430 kernel/locking/lockdep.c:3585 [< inline >] __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:112 [<ffffffff86325a2f>] _raw_spin_lock_irqsave+0x9f/0xd0 kernel/locking/spinlock.c:159 [<ffffffff814c3ae5>] lock_hrtimer_base.isra.19+0x75/0x130 kernel/time/hrtimer.c:147 [<ffffffff814c4008>] hrtimer_try_to_cancel+0xb8/0x4a0 kernel/time/hrtimer.c:1042 [<ffffffff814c4412>] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1065 [<ffffffff84f38ae0>] snd_hrtimer_start+0x140/0x1b0 sound/core/hrtimer.c:95 [<ffffffff84f32615>] snd_timer_start1+0x215/0x2c0 sound/core/timer.c:434 [<ffffffff84f32cb1>] snd_timer_start+0x121/0x1d0 sound/core/timer.c:480 [< inline >] snd_timer_user_start sound/core/timer.c:1732 [< inline >] __snd_timer_user_ioctl sound/core/timer.c:1816 [<ffffffff84f363d1>] snd_timer_user_ioctl+0x761/0x25c0 sound/core/timer.c:1837 [< inline >] vfs_ioctl fs/ioctl.c:43 [<ffffffff817ebfac>] do_vfs_ioctl+0x18c/0xfb0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [<ffffffff817ece5f>] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Code: 4c 8b 54 24 68 e9 80 ea ff ff 4c 8b 54 24 40 4c 89 54 24 78 e8 ca 4f ff ff 4c 8b 54 24 78 4c 89 54 24 78 e8 4b 4d ff ff f6 c4 02 <4c> 8b 54 24 78 0f 85 97 04 00 00 65 ff 05 86 0c bc 7e e9 43 e5 NMI backtrace for cpu 3 CPU: 3 PID: 10406 Comm: a.out Not tainted 4.4.0+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff880062924740 ti: ffff8800636e8000 task.ti: ffff8800636e8000 RIP: 0010:[<ffffffff8157508e>] [<ffffffff8157508e>] __sanitizer_cov_trace_pc+0xe/0x50 RSP: 0018:ffff8800636ef878 EFLAGS: 00000282 RAX: ffff880062924740 RBX: ffff8800636ef8f8 RCX: ffff880062924f68 RDX: 0000000000000000 RSI: ffff880062924f68 RDI: 0000000000000286 RBP: ffff8800636ef920 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 00000000ffffffff R14: 1ffff1000c6ddf13 R15: dffffc0000000000 FS: 00007fd28b1a1700(0000) GS:ffff88006d700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fd28b1a0e78 CR3: 0000000062f3f000 CR4: 00000000000006e0 Stack: ffffffff814fa075 ffff8800636ef9e8 ffffffff81292a70 0000000000000040 0000000041b58ab3 ffffffff8740aa52 ffffffff814f9e50 ffffffff882b8278 0000000000000000 ffffffff81292a70 ffff8800636ef9e8 0000000000000003 Call Trace: [<ffffffff814fab0c>] smp_call_function_many+0x59c/0x720 kernel/smp.c:435 [<ffffffff812931e6>] native_flush_tlb_others+0xd6/0x370 arch/x86/mm/tlb.c:154 [< inline >] flush_tlb_others ./arch/x86/include/asm/paravirt.h:329 [<ffffffff8129394f>] flush_tlb_mm_range+0x10f/0x560 arch/x86/mm/tlb.c:235 [<ffffffff816df2d2>] tlb_flush_mmu_tlbonly+0x1e2/0x3f0 mm/memory.c:243 [< inline >] tlb_flush_mmu mm/memory.c:264 [<ffffffff816e2afb>] tlb_finish_mmu+0x1b/0xa0 mm/memory.c:276 [<ffffffff816f8631>] unmap_region+0x261/0x350 mm/mmap.c:2408 [<ffffffff816ff5cb>] do_munmap+0x70b/0xf30 mm/mmap.c:2602 [<ffffffff81702fa2>] mmap_region+0x152/0x1010 mm/mmap.c:1558 [<ffffffff817045b4>] do_mmap+0x754/0x990 mm/mmap.c:1396 [< inline >] do_mmap_pgoff include/linux/mm.h:1982 [<ffffffff816b46bf>] vm_mmap_pgoff+0x15f/0x1b0 mm/util.c:328 [< inline >] SYSC_mmap_pgoff mm/mmap.c:1446 [<ffffffff816fd868>] SyS_mmap_pgoff+0x208/0x580 mm/mmap.c:1404 [< inline >] SYSC_mmap arch/x86/kernel/sys_x86_64.c:95 [<ffffffff811b0a86>] SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:86 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Code: a0 00 00 00 55 48 89 e5 48 8b 80 88 01 00 00 f0 ff 00 5d c3 66 0f 1f 84 00 00 00 00 00 65 48 8b 04 25 c0 4e 01 00 48 85 c0 74 1a <65> 8b 15 bb fd a9 7e 81 e2 00 ff 1f 00 75 0b 8b 90 68 12 00 00 INFO: rcu_sched detected stalls on CPUs/tasks: (detected by 3, t=104007 jiffies, g=7699, c=7698, q=270) All QSes seen, last rcu_sched kthread activity 104007 (4294830050-4294726043), jiffies_till_next_fqs=3, root ->qsmask 0x0 a.out R running task 30184 10406 6864 0x0000000a ffff8800636ef610 ffff88006d707ca8 ffffffff813e7989 00000000fffc519b 00000000fffc519b ffff88006d720fc0 00000000fffde7e2 dffffc0000000000 0000000000000000 ffff88006d707d80 ffffffff814b33aa 0000000000000000 Call Trace: <IRQ> [<ffffffff813e7989>] sched_show_task+0x269/0x3b0 kernel/sched/core.c:5036 [< inline >] print_other_cpu_stall kernel/rcu/tree.c:1318 [< inline >] check_cpu_stall kernel/rcu/tree.c:1424 [< inline >] __rcu_pending kernel/rcu/tree.c:3906 [< inline >] rcu_pending kernel/rcu/tree.c:3970 [<ffffffff814b33aa>] rcu_check_callbacks+0x1dfa/0x1e10 kernel/rcu/tree.c:2795 [<ffffffff814c2b0a>] update_process_times+0x3a/0x70 kernel/time/timer.c:1420 [<ffffffff814ec04f>] tick_sched_handle.isra.20+0xaf/0xe0 kernel/time/tick-sched.c:151 [<ffffffff814ec775>] tick_sched_timer+0x75/0x100 kernel/time/tick-sched.c:1086 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c48d3>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c68d2>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124f062>] local_apic_timer_interrupt+0x72/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81252589>] smp_apic_timer_interrupt+0x79/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86326e6c>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 [<ffffffff814fab0c>] smp_call_function_many+0x59c/0x720 kernel/smp.c:435 [<ffffffff812931e6>] native_flush_tlb_others+0xd6/0x370 arch/x86/mm/tlb.c:154 [< inline >] flush_tlb_others ./arch/x86/include/asm/paravirt.h:329 [<ffffffff8129394f>] flush_tlb_mm_range+0x10f/0x560 arch/x86/mm/tlb.c:235 [<ffffffff816df2d2>] tlb_flush_mmu_tlbonly+0x1e2/0x3f0 mm/memory.c:243 [< inline >] tlb_flush_mmu mm/memory.c:264 [<ffffffff816e2afb>] tlb_finish_mmu+0x1b/0xa0 mm/memory.c:276 [<ffffffff816f8631>] unmap_region+0x261/0x350 mm/mmap.c:2408 [<ffffffff816ff5cb>] do_munmap+0x70b/0xf30 mm/mmap.c:2602 [<ffffffff81702fa2>] mmap_region+0x152/0x1010 mm/mmap.c:1558 [<ffffffff817045b4>] do_mmap+0x754/0x990 mm/mmap.c:1396 [< inline >] do_mmap_pgoff include/linux/mm.h:1982 [<ffffffff816b46bf>] vm_mmap_pgoff+0x15f/0x1b0 mm/util.c:328 [< inline >] SYSC_mmap_pgoff mm/mmap.c:1446 [<ffffffff816fd868>] SyS_mmap_pgoff+0x208/0x580 mm/mmap.c:1404 [< inline >] SYSC_mmap arch/x86/kernel/sys_x86_64.c:95 [<ffffffff811b0a86>] SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:86 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 rcu_sched kthread starved for 104007 jiffies! g7699 c7698 f0x2 RCU_GP_WAIT_FQS(3) ->state=0x100 rcu_sched W ffff88003dfb7a98 29272 8 2 0x00000000 ffff88003dfb7a98 ffff88003ec16d40 ffff88003df81fe0 00ffed0007bf6f6f ffff88003ec20af0 ffff88003ec20ac8 ffff88003ec20158 ffff88003df817c8 ffff88003ec20140 ffffffff875adc40 ffff88003df817c0 ffff88003dfb0000 Call Trace: [<ffffffff86317517>] schedule+0x97/0x1c0 kernel/sched/core.c:3311 [<ffffffff863235db>] schedule_timeout+0x36b/0x920 kernel/time/timer.c:1531 [<ffffffff814af882>] rcu_gp_kthread+0xad2/0x1b60 kernel/rcu/tree.c:2125 [<ffffffff813b423f>] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303 [<ffffffff8632642f>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468 INFO: rcu_sched detected stalls on CPUs/tasks: (detected by 3, t=182012 jiffies, g=7699, c=7698, q=270) All QSes seen, last rcu_sched kthread activity 182012 (4294908055-4294726043), jiffies_till_next_fqs=3, root ->qsmask 0x0 a.out R running task 30184 10406 6864 0x0000000a ffff8800636ef610 ffff88006d707ca8 ffffffff813e7989 00000000fffc519b 00000000fffc519b ffff88006d720fc0 00000000ffff1897 dffffc0000000000 0000000000000000 ffff88006d707d80 ffffffff814b33aa 0000000000000000 Call Trace: <IRQ> [<ffffffff813e7989>] sched_show_task+0x269/0x3b0 kernel/sched/core.c:5036 [< inline >] print_other_cpu_stall kernel/rcu/tree.c:1318 [< inline >] check_cpu_stall kernel/rcu/tree.c:1424 [< inline >] __rcu_pending kernel/rcu/tree.c:3906 [< inline >] rcu_pending kernel/rcu/tree.c:3970 [<ffffffff814b33aa>] rcu_check_callbacks+0x1dfa/0x1e10 kernel/rcu/tree.c:2795 [<ffffffff814c2b0a>] update_process_times+0x3a/0x70 kernel/time/timer.c:1420 [<ffffffff814ec04f>] tick_sched_handle.isra.20+0xaf/0xe0 kernel/time/tick-sched.c:151 [<ffffffff814ec775>] tick_sched_timer+0x75/0x100 kernel/time/tick-sched.c:1086 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c48d3>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c68d2>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124f062>] local_apic_timer_interrupt+0x72/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81252589>] smp_apic_timer_interrupt+0x79/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86326e6c>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 [<ffffffff814fab0c>] smp_call_function_many+0x59c/0x720 kernel/smp.c:435 [<ffffffff812931e6>] native_flush_tlb_others+0xd6/0x370 arch/x86/mm/tlb.c:154 [< inline >] flush_tlb_others ./arch/x86/include/asm/paravirt.h:329 [<ffffffff8129394f>] flush_tlb_mm_range+0x10f/0x560 arch/x86/mm/tlb.c:235 [<ffffffff816df2d2>] tlb_flush_mmu_tlbonly+0x1e2/0x3f0 mm/memory.c:243 [< inline >] tlb_flush_mmu mm/memory.c:264 [<ffffffff816e2afb>] tlb_finish_mmu+0x1b/0xa0 mm/memory.c:276 [<ffffffff816f8631>] unmap_region+0x261/0x350 mm/mmap.c:2408 [<ffffffff816ff5cb>] do_munmap+0x70b/0xf30 mm/mmap.c:2602 [<ffffffff81702fa2>] mmap_region+0x152/0x1010 mm/mmap.c:1558 [<ffffffff817045b4>] do_mmap+0x754/0x990 mm/mmap.c:1396 [< inline >] do_mmap_pgoff include/linux/mm.h:1982 [<ffffffff816b46bf>] vm_mmap_pgoff+0x15f/0x1b0 mm/util.c:328 [< inline >] SYSC_mmap_pgoff mm/mmap.c:1446 [<ffffffff816fd868>] SyS_mmap_pgoff+0x208/0x580 mm/mmap.c:1404 [< inline >] SYSC_mmap arch/x86/kernel/sys_x86_64.c:95 [<ffffffff811b0a86>] SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:86 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 rcu_sched kthread starved for 182012 jiffies! g7699 c7698 f0x2 RCU_GP_WAIT_FQS(3) ->state=0x100 rcu_sched W ffff88003dfb7a98 29272 8 2 0x00000000 ffff88003dfb7a98 ffff88003ec16d40 ffff88003df81fe0 00ffed0007bf6f6f ffff88003ec20af0 ffff88003ec20ac8 ffff88003ec20158 ffff88003df817c8 ffff88003ec20140 ffffffff875adc40 ffff88003df817c0 ffff88003dfb0000 Call Trace: [<ffffffff86317517>] schedule+0x97/0x1c0 kernel/sched/core.c:3311 [<ffffffff863235db>] schedule_timeout+0x36b/0x920 kernel/time/timer.c:1531 [<ffffffff814af882>] rcu_gp_kthread+0xad2/0x1b60 kernel/rcu/tree.c:2125 [<ffffffff813b423f>] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303 [<ffffffff8632642f>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468 INFO: rcu_sched detected stalls on CPUs/tasks: (detected by 3, t=260017 jiffies, g=7699, c=7698, q=270) All QSes seen, last rcu_sched kthread activity 260017 (4294986060-4294726043), jiffies_till_next_fqs=3, root ->qsmask 0x0 a.out R running task 30184 10406 6864 0x0000000a ffff8800636ef610 ffff88006d707ca8 ffffffff813e7989 00000000fffc519b 00000000fffc519b ffff88006d720fc0 000000010000494c dffffc0000000000 0000000000000000 ffff88006d707d80 ffffffff814b33aa 0000000000000000 Call Trace: <IRQ> [<ffffffff813e7989>] sched_show_task+0x269/0x3b0 kernel/sched/core.c:5036 [< inline >] print_other_cpu_stall kernel/rcu/tree.c:1318 [< inline >] check_cpu_stall kernel/rcu/tree.c:1424 [< inline >] __rcu_pending kernel/rcu/tree.c:3906 [< inline >] rcu_pending kernel/rcu/tree.c:3970 [<ffffffff814b33aa>] rcu_check_callbacks+0x1dfa/0x1e10 kernel/rcu/tree.c:2795 [<ffffffff814c2b0a>] update_process_times+0x3a/0x70 kernel/time/timer.c:1420 [<ffffffff814ec04f>] tick_sched_handle.isra.20+0xaf/0xe0 kernel/time/tick-sched.c:151 [<ffffffff814ec775>] tick_sched_timer+0x75/0x100 kernel/time/tick-sched.c:1086 [< inline >] __run_hrtimer kernel/time/hrtimer.c:1229 [<ffffffff814c48d3>] __hrtimer_run_queues+0x363/0xc10 kernel/time/hrtimer.c:1293 [<ffffffff814c68d2>] hrtimer_interrupt+0x182/0x430 kernel/time/hrtimer.c:1327 [<ffffffff8124f062>] local_apic_timer_interrupt+0x72/0xe0 arch/x86/kernel/apic/apic.c:907 [<ffffffff81252589>] smp_apic_timer_interrupt+0x79/0xa0 arch/x86/kernel/apic/apic.c:931 [<ffffffff86326e6c>] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:520 [<ffffffff814fab0c>] smp_call_function_many+0x59c/0x720 kernel/smp.c:435 [<ffffffff812931e6>] native_flush_tlb_others+0xd6/0x370 arch/x86/mm/tlb.c:154 [< inline >] flush_tlb_others ./arch/x86/include/asm/paravirt.h:329 [<ffffffff8129394f>] flush_tlb_mm_range+0x10f/0x560 arch/x86/mm/tlb.c:235 [<ffffffff816df2d2>] tlb_flush_mmu_tlbonly+0x1e2/0x3f0 mm/memory.c:243 [< inline >] tlb_flush_mmu mm/memory.c:264 [<ffffffff816e2afb>] tlb_finish_mmu+0x1b/0xa0 mm/memory.c:276 [<ffffffff816f8631>] unmap_region+0x261/0x350 mm/mmap.c:2408 [<ffffffff816ff5cb>] do_munmap+0x70b/0xf30 mm/mmap.c:2602 [<ffffffff81702fa2>] mmap_region+0x152/0x1010 mm/mmap.c:1558 [<ffffffff817045b4>] do_mmap+0x754/0x990 mm/mmap.c:1396 [< inline >] do_mmap_pgoff include/linux/mm.h:1982 [<ffffffff816b46bf>] vm_mmap_pgoff+0x15f/0x1b0 mm/util.c:328 [< inline >] SYSC_mmap_pgoff mm/mmap.c:1446 [<ffffffff816fd868>] SyS_mmap_pgoff+0x208/0x580 mm/mmap.c:1404 [< inline >] SYSC_mmap arch/x86/kernel/sys_x86_64.c:95 [<ffffffff811b0a86>] SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:86 [<ffffffff86326076>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 rcu_sched kthread starved for 260017 jiffies! g7699 c7698 f0x2 RCU_GP_WAIT_FQS(3) ->state=0x100 rcu_sched W ffff88003dfb7a98 29272 8 2 0x00000000 ffff88003dfb7a98 ffff88003ec16d40 ffff88003df81fe0 00ffed0007bf6f6f ffff88003ec20af0 ffff88003ec20ac8 ffff88003ec20158 ffff88003df817c8 ffff88003ec20140 ffffffff875adc40 ffff88003df817c0 ffff88003dfb0000 Call Trace: [<ffffffff86317517>] schedule+0x97/0x1c0 kernel/sched/core.c:3311 [<ffffffff863235db>] schedule_timeout+0x36b/0x920 kernel/time/timer.c:1531 [<ffffffff814af882>] rcu_gp_kthread+0xad2/0x1b60 kernel/rcu/tree.c:2125 [<ffffffff813b423f>] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303 [<ffffffff8632642f>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index f845ecf7e172..a8027350520d 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -39,6 +39,7 @@ struct snd_hrtimer { struct snd_timer *timer; struct hrtimer hrt; atomic_t running;
bool cancel_pending;
};
static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt) @@ -62,7 +63,7 @@ static int snd_hrtimer_open(struct snd_timer *t) { struct snd_hrtimer *stime;
stime = kmalloc(sizeof(*stime), GFP_KERNEL);
stime = kzalloc(sizeof(*stime), GFP_KERNEL); if (!stime) return -ENOMEM; hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -90,7 +91,10 @@ static int snd_hrtimer_start(struct snd_timer *t) struct snd_hrtimer *stime = t->private_data;
atomic_set(&stime->running, 0);
hrtimer_cancel(&stime->hrt);
if (stime->cancel_pending) {
hrtimer_cancel(&stime->hrt);
stime->cancel_pending = false;
} hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); atomic_set(&stime->running, 1);
@@ -101,6 +105,8 @@ static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0);
if (hrtimer_try_to_cancel(&stime->hrt) < 0)
stime->cancel_pending = true; return 0;
}
On Mon, 18 Jan 2016 11:53:00 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 10:44 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 22:22:46 +0100, Takashi Iwai wrote:
On Fri, 15 Jan 2016 20:47:05 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 8:18 PM, Takashi Iwai tiwai@suse.de wrote:
On Fri, 15 Jan 2016 20:13:11 +0100, Dmitry Vyukov wrote:
On Fri, Jan 15, 2016 at 3:38 PM, Dmitry Vyukov dvyukov@google.com wrote: > On Fri, Jan 15, 2016 at 2:51 PM, Takashi Iwai tiwai@suse.de wrote: >> On Fri, 15 Jan 2016 12:03:17 +0100, >> Dmitry Vyukov wrote: >>> >>> On Fri, Jan 15, 2016 at 12:00 PM, Takashi Iwai tiwai@suse.de wrote: >>> > On Fri, 15 Jan 2016 09:06:10 +0100, >>> > Dmitry Vyukov wrote: >>> >> >>> >> On Thu, Jan 14, 2016 at 5:09 PM, Takashi Iwai tiwai@suse.de wrote: >>> >> > On Wed, 13 Jan 2016 21:54:10 +0100, >>> >> > Takashi Iwai wrote: >>> >> >> >>> >> >> OK, then this might be a possible race at the current snd_timer_stop() >>> >> >> implementation. There is no sync action there, so the ISR might be >>> >> >> still alive after snd_timer_close() call. Or might be another race. >>> >> >> This pattern looks a bit different, as it's involved with hrtimer. >>> >> >> >>> >> >> I'll take a look at it tomorrow. >>> >> > >>> >> > I've audited the code today, but the open window doesn't look like >>> >> > what I expected. I found only some possible cases with slave timer >>> >> > instances. >>> >> > >>> >> > In anyway, below is a test fix patch. Since I couldn't reproduce the >>> >> > issue on my local machines, it's hard to say whether this covers the >>> >> > holes you fell. Let's see... >>> >> >>> >> >>> >> Hi Takashi, >>> >> >>> >> I would be interested to understand why other people can't reproduce >>> >> issues that I hit pretty reliably. >>> >> I suspect that it can be due to .config. Please try with the following >>> >> config values. >>> > >>> > I guess rather other config, e.g. the kernel debug options. >>> > I suppose you enabled KASAN and DEBUG_LIST. What else? >>> >>> I've attached my config (you will need to disable CONFIG_KCOV, it is >>> not upstreamed). >> >> Hm, that has lots of other drivers built-in... >> >>> >> I also start qemu with "-soundhw all" arg. >>> > >>> > OK, so you're testing with VM? This makes easier to recheck. >>> >>> Yes, I start qemu as: >>> >>> qemu-system-x86_64 -hda wheezy.img -net >>> user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel >>> arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug >>> earlyprintk=serial slub_debug=UZ" -enable-kvm -m 2G -numa >>> node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp >>> sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet >>> -soundhw all >> >> And which test did trigger use-after-free, even with all previous >> patches? > > I will try to extract a new reproducer now.
Ok, I does not seem to see any crashes except the timer hangs below. Let's consider all other bugs as fixed. I will report anything new that I see separately.
OK, good to hear.
> Meanwhile, can you try to reproduce this one: > https://groups.google.com/forum/#!msg/syzkaller/bbtG9_h1ONU/CPLblMC6FAAJ > ? I run the program in a tight parallel loop.
I could reproduce this after your suggestion with parallel runs.
This seems specific to hrtimer. Possibly it's not about the snd-timer core itself. Could you check whether this doesn't happen when CONFIG_SND_HRTIMER isn't set?
Does not happen without CONFIG_SND_HRTIMER. Do you mean that this is hrtimer bug?
I guess rather it's a bug in snd-hrtimer driver. Will check it later.
The patch below *might* fix the issue. There was a deadlock problem and the current code has a weird workaround for it. I suspect it being the cause.
If this works, I'll happily apply it before submitting the next pull request for 4.5. If not, I'll take a closer look at it in the next week :)
No, unfortunately the hang still happens with the patch:
Thanks for testing. I think I understood the problem. We faced a similar issue and moved hrtimer_cancel() in the past. But this wasn't enough, as the start function may be called also in interrupt, too.
How about the one below instead?
Takashi
--- From: Takashi Iwai tiwai@suse.de Subject: [PATCH] ALSA: hrtimer: Fix stall by hrtimer_cancel()
hrtimer_cancel() waits for the completion from the callback, thus it must not be called inside the callback itself. This was already a problem, and the early commit [fcfdebe70759: ALSA: hrtimer - Fix lock-up] tried to address it.
However, the previous fix is still insufficient: it may still cause a lockup when the ALSA timer instance reprograms itself at its callback. Then it invokes the start function even in snd_timer_interrupt() that is called in hrtimer callback itself, results in a CPU stall. It's not a hypothetical problem, as actually triggered by syzkaller fuzzer.
This patch tries to fix the issue again. Now we call hrtimer_try_to_cancel() at both start and stop functions so that it won't fall into a deadlock, yet giving some chance to cancel the queue if the functions have been called outside the callback. The proper hrtimer_cancel() is called in anyway at closing, so this should be enough.
Reported-by: Dmitry Vyukov dvyukov@google.com Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai tiwai@suse.de --- sound/core/hrtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index f845ecf7e172..656d9a9032dc 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -90,7 +90,7 @@ static int snd_hrtimer_start(struct snd_timer *t) struct snd_hrtimer *stime = t->private_data;
atomic_set(&stime->running, 0); - hrtimer_cancel(&stime->hrt); + hrtimer_try_to_cancel(&stime->hrt); hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); atomic_set(&stime->running, 1); @@ -101,6 +101,7 @@ static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0); + hrtimer_try_to_cancel(&stime->hrt); return 0; }
On Mon, Jan 18, 2016 at 2:06 PM, Takashi Iwai tiwai@suse.de wrote:
No, unfortunately the hang still happens with the patch:
Thanks for testing. I think I understood the problem. We faced a similar issue and moved hrtimer_cancel() in the past. But this wasn't enough, as the start function may be called also in interrupt, too.
How about the one below instead?
Yes, this fixes the hang. Thanks!
Tested-by: Dmitry Vyukov dvyukov@google.com
Takashi
From: Takashi Iwai tiwai@suse.de Subject: [PATCH] ALSA: hrtimer: Fix stall by hrtimer_cancel()
hrtimer_cancel() waits for the completion from the callback, thus it must not be called inside the callback itself. This was already a problem, and the early commit [fcfdebe70759: ALSA: hrtimer - Fix lock-up] tried to address it.
However, the previous fix is still insufficient: it may still cause a lockup when the ALSA timer instance reprograms itself at its callback. Then it invokes the start function even in snd_timer_interrupt() that is called in hrtimer callback itself, results in a CPU stall. It's not a hypothetical problem, as actually triggered by syzkaller fuzzer.
This patch tries to fix the issue again. Now we call hrtimer_try_to_cancel() at both start and stop functions so that it won't fall into a deadlock, yet giving some chance to cancel the queue if the functions have been called outside the callback. The proper hrtimer_cancel() is called in anyway at closing, so this should be enough.
Reported-by: Dmitry Vyukov dvyukov@google.com Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai tiwai@suse.de
sound/core/hrtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index f845ecf7e172..656d9a9032dc 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -90,7 +90,7 @@ static int snd_hrtimer_start(struct snd_timer *t) struct snd_hrtimer *stime = t->private_data;
atomic_set(&stime->running, 0);
hrtimer_cancel(&stime->hrt);
hrtimer_try_to_cancel(&stime->hrt); hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); atomic_set(&stime->running, 1);
@@ -101,6 +101,7 @@ static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0);
hrtimer_try_to_cancel(&stime->hrt); return 0;
}
-- 2.7.0
On Mon, 18 Jan 2016 14:30:13 +0100, Dmitry Vyukov wrote:
On Mon, Jan 18, 2016 at 2:06 PM, Takashi Iwai tiwai@suse.de wrote:
No, unfortunately the hang still happens with the patch:
Thanks for testing. I think I understood the problem. We faced a similar issue and moved hrtimer_cancel() in the past. But this wasn't enough, as the start function may be called also in interrupt, too.
How about the one below instead?
Yes, this fixes the hang. Thanks!
Tested-by: Dmitry Vyukov dvyukov@google.com
Great, I'll queue the fix. Thanks for patient testing!
Takashi
On Wed, 13 Jan 2016 20:30:01 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 8:05 PM, Takashi Iwai tiwai@suse.de wrote:
On Wed, 13 Jan 2016 19:34:36 +0100, Dmitry Vyukov wrote:
On Wed, Jan 13, 2016 at 5:53 PM, Takashi Iwai tiwai@suse.de wrote:
This and your other relevant reports seem pointing the race of timer ioctls. Although snd_timer_close() itself calls snd_timer_stop(), there is no other protection against the concurrent execution.
If my guess is correct, a simplistic fix like below should work. It basically serializes the timer ioctl by using a new mutex (and replacing the old tread_sem mutex). They are no longtime blocking calls, so this shouldn't be a big problem. But certainly there can be a less intrusive way to paper over this if this really matters.
In this case for timer.c, I'd leave the final decision rather to Jaroslav. Jaroslav, what do you think?
After applying this patch I still see the following WARNINGS:
------------[ cut here ]------------ WARNING: CPU: 2 PID: 30398 at lib/list_debug.c:53 __list_del_entry+0x10b/0x1e0() list_del corruption, ffff880032d933b0->next is LIST_POISON1 (dead000000000100) Modules linked in: CPU: 2 PID: 30398 Comm: syz-executor Not tainted 4.4.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 00000000ffffffff ffff8800627778d8 ffffffff82926eed ffff880062777948 ffff880061c2af80 ffffffff8660b640 ffff880062777918 ffffffff81350c89 ffffffff8298e77b ffffed000c4eef25 ffffffff8660b640 0000000000000035 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [<ffffffff82926eed>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [<ffffffff81350c89>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:483 [<ffffffff81350d99>] warn_slowpath_fmt+0xa9/0xd0 kernel/panic.c:495 [<ffffffff8298e77b>] __list_del_entry+0x10b/0x1e0 lib/list_debug.c:51 [< inline >] list_del_init include/linux/list.h:145 [<ffffffff84ebd199>] _snd_timer_stop+0x119/0x450 sound/core/timer.c:501
This is
list_del_init(&timeri->active_list);
right? Possibly the following oneliner covers it?
Yes, that is this line. Yes, these two patches fix use-after-frees and GPFs.
Tested-by: Dmitry Vyukov dvyukov@google.com
OK, I'm going to queue the list fix now, at least. Below is the patch with a proper description.
If there comes no objection, I'll queue the former ioctl mutex fix, too.
The only one that still happens is "sound: spinlock lockup in sound/core/timer.c".
Oh well, there is another :-<
thanks,
Takashi
-- 8< -- From: Takashi Iwai tiwai@suse.de Subject: [PATCH] ALSA: timer: Fix double unlink of active_list
ALSA timer instance object has a couple of linked lists and they are unlinked unconditionally at snd_timer_stop(). Meanwhile snd_timer_interrupt() unlinks it, but it calls list_del() which leaves the element list itself unchanged. This ends up with unlinking twice, and it was caught by syzkaller fuzzer.
The fix is to use list_del_init() variant properly there, too.
Reported-by: Dmitry Vyukov dvyukov@google.com Tested-by: Dmitry Vyukov dvyukov@google.com Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai tiwai@suse.de --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/core/timer.c b/sound/core/timer.c index 31f40f03e5b7..9241784dfe7d 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -694,7 +694,7 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) } else { ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING; if (--timer->running) - list_del(&ti->active_list); + list_del_init(&ti->active_list); } if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) || (ti->flags & SNDRV_TIMER_IFLG_FAST))
participants (2)
-
Dmitry Vyukov
-
Takashi Iwai