[syzbot] possible deadlock in __snd_pcm_lib_xfer
Hello,
syzbot found the following issue on:
HEAD commit: 8515d05bf6bc Add linux-next specific files for 20220328 git tree: linux-next console output: https://syzkaller.appspot.com/x/log.txt?x=15c0acc7700000 kernel config: https://syzkaller.appspot.com/x/.config?x=530c68bef4e2b8a8 dashboard link: https://syzkaller.appspot.com/bug?extid=6e5c88838328e99c7e1c compiler: gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for Debian) 2.35.2 syz repro: https://syzkaller.appspot.com/x/repro.syz?x=14433ca5700000 C reproducer: https://syzkaller.appspot.com/x/repro.c?x=163bb77d700000
IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+6e5c88838328e99c7e1c@syzkaller.appspotmail.com
====================================================== WARNING: possible circular locking dependency detected 5.17.0-next-20220328-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor329/3588 is trying to acquire lock: ffff8880243c1d28 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0xa1/0x170 mm/memory.c:5300
but task is already holding lock: ffff88801afef230 (&runtime->buffer_mutex){+.+.}-{3:3}, at: wait_for_avail sound/core/pcm_lib.c:1913 [inline] ffff88801afef230 (&runtime->buffer_mutex){+.+.}-{3:3}, at: __snd_pcm_lib_xfer+0xbca/0x1e20 sound/core/pcm_lib.c:2263
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&runtime->buffer_mutex){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:600 [inline] __mutex_lock+0x12f/0x12f0 kernel/locking/mutex.c:733 snd_pcm_hw_params+0xc9/0x18a0 sound/core/pcm_native.c:705 snd_pcm_kernel_ioctl+0x164/0x310 sound/core/pcm_native.c:3410 snd_pcm_oss_change_params_locked+0x14e2/0x3a70 sound/core/oss/pcm_oss.c:976 snd_pcm_oss_change_params sound/core/oss/pcm_oss.c:1116 [inline] snd_pcm_oss_mmap+0x442/0x550 sound/core/oss/pcm_oss.c:2929 call_mmap include/linux/fs.h:2085 [inline] mmap_region+0xba5/0x14a0 mm/mmap.c:1791 do_mmap+0x863/0xfa0 mm/mmap.c:1582 vm_mmap_pgoff+0x1b7/0x290 mm/util.c:519 ksys_mmap_pgoff+0x40d/0x5a0 mm/mmap.c:1628 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae
-> #0 (&mm->mmap_lock#2){++++}-{3:3}: check_prev_add kernel/locking/lockdep.c:3096 [inline] check_prevs_add kernel/locking/lockdep.c:3219 [inline] validate_chain kernel/locking/lockdep.c:3834 [inline] __lock_acquire+0x2ac6/0x56c0 kernel/locking/lockdep.c:5060 lock_acquire kernel/locking/lockdep.c:5672 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5637 __might_fault mm/memory.c:5301 [inline] __might_fault+0x104/0x170 mm/memory.c:5294 _copy_to_user+0x25/0x140 lib/usercopy.c:28 copy_to_user include/linux/uaccess.h:160 [inline] default_read_copy+0x10f/0x180 sound/core/pcm_lib.c:2013 __snd_pcm_lib_xfer+0x1517/0x1e20 sound/core/pcm_lib.c:2282 snd_pcm_oss_read3+0x1c4/0x400 sound/core/oss/pcm_oss.c:1292 snd_pcm_oss_read2+0x300/0x3f0 sound/core/oss/pcm_oss.c:1503 snd_pcm_oss_read1 sound/core/oss/pcm_oss.c:1550 [inline] snd_pcm_oss_read+0x620/0x7a0 sound/core/oss/pcm_oss.c:2788 vfs_read+0x1ef/0x5d0 fs/read_write.c:480 ksys_read+0x127/0x250 fs/read_write.c:620 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(&runtime->buffer_mutex); lock(&mm->mmap_lock#2); lock(&runtime->buffer_mutex); lock(&mm->mmap_lock#2);
*** DEADLOCK ***
1 lock held by syz-executor329/3588: #0: ffff88801afef230 (&runtime->buffer_mutex){+.+.}-{3:3}, at: wait_for_avail sound/core/pcm_lib.c:1913 [inline] #0: ffff88801afef230 (&runtime->buffer_mutex){+.+.}-{3:3}, at: __snd_pcm_lib_xfer+0xbca/0x1e20 sound/core/pcm_lib.c:2263
stack backtrace: CPU: 0 PID: 3588 Comm: syz-executor329 Not tainted 5.17.0-next-20220328-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 check_noncircular+0x25f/0x2e0 kernel/locking/lockdep.c:2176 check_prev_add kernel/locking/lockdep.c:3096 [inline] check_prevs_add kernel/locking/lockdep.c:3219 [inline] validate_chain kernel/locking/lockdep.c:3834 [inline] __lock_acquire+0x2ac6/0x56c0 kernel/locking/lockdep.c:5060 lock_acquire kernel/locking/lockdep.c:5672 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5637 __might_fault mm/memory.c:5301 [inline] __might_fault+0x104/0x170 mm/memory.c:5294 _copy_to_user+0x25/0x140 lib/usercopy.c:28 copy_to_user include/linux/uaccess.h:160 [inline] default_read_copy+0x10f/0x180 sound/core/pcm_lib.c:2013 __snd_pcm_lib_xfer+0x1517/0x1e20 sound/core/pcm_lib.c:2282 snd_pcm_oss_read3+0x1c4/0x400 sound/core/oss/pcm_oss.c:1292 snd_pcm_oss_read2+0x300/0x3f0 sound/core/oss/pcm_oss.c:1503 snd_pcm_oss_read1 sound/core/oss/pcm_oss.c:1550 [inline] snd_pcm_oss_read+0x620/0x7a0 sound/core/oss/pcm_oss.c:2788 vfs_read+0x1ef/0x5d0 fs/read_write.c:480 ksys_read+0x127/0x250 fs/read_write.c:620 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f72068ad0f9 Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff51e1f1c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f72068ad0f9 RDX: 0000000000000ff2 RSI: 0000000020000780 RDI: 0000000000000004 RBP: 00007f72068710e0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f7206871170 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 </TASK>
--- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkaller@googlegroups.com.
syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot. syzbot can test patches for this issue, for details see: https://goo.gl/tpsmEJ#testing-patches
On Tue, 29 Mar 2022 23:32:25 +0200, syzbot wrote:
Hello,
syzbot found the following issue on:
HEAD commit: 8515d05bf6bc Add linux-next specific files for 20220328 git tree: linux-next console output: https://syzkaller.appspot.com/x/log.txt?x=15c0acc7700000 kernel config: https://syzkaller.appspot.com/x/.config?x=530c68bef4e2b8a8 dashboard link: https://syzkaller.appspot.com/bug?extid=6e5c88838328e99c7e1c compiler: gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for Debian) 2.35.2 syz repro: https://syzkaller.appspot.com/x/repro.syz?x=14433ca5700000 C reproducer: https://syzkaller.appspot.com/x/repro.c?x=163bb77d700000
IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+6e5c88838328e99c7e1c@syzkaller.appspotmail.com
====================================================== WARNING: possible circular locking dependency detected 5.17.0-next-20220328-syzkaller #0 Not tainted
syz-executor329/3588 is trying to acquire lock: ffff8880243c1d28 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0xa1/0x170 mm/memory.c:5300
but task is already holding lock: ffff88801afef230 (&runtime->buffer_mutex){+.+.}-{3:3}, at: wait_for_avail sound/core/pcm_lib.c:1913 [inline] ffff88801afef230 (&runtime->buffer_mutex){+.+.}-{3:3}, at: __snd_pcm_lib_xfer+0xbca/0x1e20 sound/core/pcm_lib.c:2263
OK, that's a similar issue as we've hit in the past for OSS. Now it appears as we're taking a big lock at the whole read/write operations.
The fix patch below.
Takashi
-- 8< -- From: Takashi Iwai tiwai@suse.de Subject: [PATCH] ALSA: pcm: Fix potential AB/BA lock with buffer_mutex and mmap_lock
syzbot caught a potential deadlock between the PCM runtime->buffer_mutex and the mm->mmap_lock. It was brought by the recent fix to cover the racy read/write and other ioctls, and in that commit, I overlooked a (hopefully only) corner case that may take the revert lock, namely, the OSS mmap. The OSS mmap operation exceptionally allows to re-configure the parameters inside the OSS syscall, where mm->mmap_mutex is already held. Meanwhile, the copy_from/to_user calls at read/write operation also takes the mm->mmap_lock internally, hence it may read to a AB/BA deadlock.
A similar problem was seen in the past and we fixed it with a refcount (in commit b248371628aa) -- although this covered only the call paths with OSS read/write and OSS ioctls. Now we need to cover the concurrent access via both ALSA and OSS APIs.
This patch addresses the problem above, by replacing the lock in the read/write operations with a refcount similar as we've used for OSS. The new field, runtime->buffer_accessing, keeps the concurrent read/write operations. Unlike the former buffer_mutex protection, this protects only around the copy_from/to_user() calls; the other codes are basically protected by the PCM stream lock. When it's already blocked (a negative value), it aborts. In the ioctl side, they check the buffer_accessing, and set to a negative value unless it's already being accessed.
Reported-by: syzbot+6e5c88838328e99c7e1c@syzkaller.appspotmail.com Fixes: dca947d4d26d ("ALSA: pcm: Fix races among concurrent read/write and buffer changes") Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai tiwai@suse.de --- include/sound/pcm.h | 1 + sound/core/pcm.c | 1 + sound/core/pcm_lib.c | 9 +++++---- sound/core/pcm_native.c | 39 ++++++++++++++++++++++++++++++++------- 4 files changed, 39 insertions(+), 11 deletions(-)
diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 314f2779cab5..6b99310b5b88 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -402,6 +402,7 @@ struct snd_pcm_runtime { struct fasync_struct *fasync; bool stop_operating; /* sync_stop will be called */ struct mutex buffer_mutex; /* protect for buffer changes */ + atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */
/* -- private section -- */ void *private_data; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index edd9849210f2..977d54320a5c 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -970,6 +970,7 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream,
runtime->status->state = SNDRV_PCM_STATE_OPEN; mutex_init(&runtime->buffer_mutex); + atomic_set(&runtime->buffer_accessing, 0);
substream->runtime = runtime; substream->private_data = pcm->private_data; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index a40a35e51fad..1fc7c50ffa62 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1906,11 +1906,9 @@ static int wait_for_avail(struct snd_pcm_substream *substream, if (avail >= runtime->twake) break; snd_pcm_stream_unlock_irq(substream); - mutex_unlock(&runtime->buffer_mutex);
tout = schedule_timeout(wait_time);
- mutex_lock(&runtime->buffer_mutex); snd_pcm_stream_lock_irq(substream); set_current_state(TASK_INTERRUPTIBLE); switch (runtime->status->state) { @@ -2221,7 +2219,6 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream,
nonblock = !!(substream->f_flags & O_NONBLOCK);
- mutex_lock(&runtime->buffer_mutex); snd_pcm_stream_lock_irq(substream); err = pcm_accessible_state(runtime); if (err < 0) @@ -2276,6 +2273,10 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, err = -EINVAL; goto _end_unlock; } + if (!atomic_inc_unless_negative(&runtime->buffer_accessing)) { + err = -EBUSY; + goto _end_unlock; + } snd_pcm_stream_unlock_irq(substream); if (!is_playback) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU); @@ -2284,6 +2285,7 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, if (is_playback) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); snd_pcm_stream_lock_irq(substream); + atomic_dec(&runtime->buffer_accessing); if (err < 0) goto _end_unlock; err = pcm_accessible_state(runtime); @@ -2313,7 +2315,6 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, if (xfer > 0 && err >= 0) snd_pcm_update_state(substream, runtime); snd_pcm_stream_unlock_irq(substream); - mutex_unlock(&runtime->buffer_mutex); return xfer > 0 ? (snd_pcm_sframes_t)xfer : err; } EXPORT_SYMBOL(__snd_pcm_lib_xfer); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 704fdc9ebf91..2b630b2b64be 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -685,6 +685,24 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm, return 0; }
+/* acquire buffer_mutex; if it's in r/w operation, return -EBUSY, otherwise + * block the further r/w operations + */ +static int snd_pcm_buffer_access_lock(struct snd_pcm_runtime *runtime) +{ + if (!atomic_dec_unless_positive(&runtime->buffer_accessing)) + return -EBUSY; + mutex_lock(&runtime->buffer_mutex); + return 0; /* keep buffer_mutex */ +} + +/* clear r/w access flag and release the buffer_mutex */ +static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime) +{ + atomic_inc(&runtime->buffer_accessing); + mutex_unlock(&runtime->buffer_mutex); +} + #if IS_ENABLED(CONFIG_SND_PCM_OSS) #define is_oss_stream(substream) ((substream)->oss.oss) #else @@ -695,14 +713,16 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_runtime *runtime; - int err = 0, usecs; + int err, usecs; unsigned int bits; snd_pcm_uframes_t frames;
if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; - mutex_lock(&runtime->buffer_mutex); + err = snd_pcm_buffer_access_lock(runtime); + if (err < 0) + return err; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_OPEN: @@ -820,7 +840,7 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, snd_pcm_lib_free_pages(substream); } unlock: - mutex_unlock(&runtime->buffer_mutex); + snd_pcm_buffer_access_unlock(runtime); return err; }
@@ -865,7 +885,9 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream) if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; - mutex_lock(&runtime->buffer_mutex); + result = snd_pcm_buffer_access_lock(runtime); + if (result < 0) + return result; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_SETUP: @@ -884,7 +906,7 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream) snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); cpu_latency_qos_remove_request(&substream->latency_pm_qos_req); unlock: - mutex_unlock(&runtime->buffer_mutex); + snd_pcm_buffer_access_unlock(runtime); return result; }
@@ -1369,12 +1391,15 @@ static int snd_pcm_action_nonatomic(const struct action_ops *ops,
/* Guarantee the group members won't change during non-atomic action */ down_read(&snd_pcm_link_rwsem); - mutex_lock(&substream->runtime->buffer_mutex); + res = snd_pcm_buffer_access_lock(substream->runtime); + if (res < 0) + goto unlock; if (snd_pcm_stream_linked(substream)) res = snd_pcm_action_group(ops, substream, state, false); else res = snd_pcm_action_single(ops, substream, state); - mutex_unlock(&substream->runtime->buffer_mutex); + snd_pcm_buffer_access_unlock(substream->runtime); + unlock: up_read(&snd_pcm_link_rwsem); return res; }
participants (2)
-
syzbot
-
Takashi Iwai