[PATCH v2 0/4] ALSA: Defer async signal handling
Hi,
this is a revised patch series for another attempt to address the potential deadlocks via kill_fasync() calls that have been reported by syzbot for long time. Only a missing linux/fs.h inclusion was added from v1 series[1].
Instead of the previous series to drop the async handler[2], this tries to defer the kill_fasync() call. A few new common helpers are introduced at first, then the actual usages are replaced in the following patches.
The patches passed the quick tests with alsa-lib test cases.
Takashi
[1] https://lore.kernel.org/r/20220726153420.3403-1-tiwai@suse.de [2] https://lore.kernel.org/r/20220717070549.5993-1-tiwai@suse.de
===
Takashi Iwai (4): ALSA: core: Add async signal helpers ALSA: timer: Use deferred fasync helper ALSA: pcm: Use deferred fasync helper ALSA: control: Use deferred fasync helper
include/sound/control.h | 2 +- include/sound/core.h | 8 ++++ include/sound/pcm.h | 2 +- sound/core/control.c | 7 +-- sound/core/misc.c | 94 +++++++++++++++++++++++++++++++++++++++++ sound/core/pcm.c | 1 + sound/core/pcm_lib.c | 2 +- sound/core/pcm_native.c | 2 +- sound/core/timer.c | 11 ++--- 9 files changed, 117 insertions(+), 12 deletions(-)
Currently the call of kill_fasync() from an interrupt handler might lead to potential spin deadlocks, as spotted by syzkaller. Unfortunately, it's not so trivial to fix this lock chain as it's involved with the tasklist_lock that is touched in allover places.
As a temporary workaround, this patch provides the way to defer the async signal notification in a work. The new helper functions, snd_fasync_helper() and snd_kill_faync() are replacements for fasync_helper() and kill_fasync(), respectively. In addition, snd_fasync_free() needs to be called at the destructor of the relevant file object.
Signed-off-by: Takashi Iwai tiwai@suse.de --- include/sound/core.h | 8 ++++ sound/core/misc.c | 94 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+)
diff --git a/include/sound/core.h b/include/sound/core.h index dd28de2343b8..4365c35d038b 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -507,4 +507,12 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device, } #endif
+/* async signal helpers */ +struct snd_fasync; + +int snd_fasync_helper(int fd, struct file *file, int on, + struct snd_fasync **fasyncp); +void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll); +void snd_fasync_free(struct snd_fasync *fasync); + #endif /* __SOUND_CORE_H */ diff --git a/sound/core/misc.c b/sound/core/misc.c index 50e4aaa6270d..d32a19976a2b 100644 --- a/sound/core/misc.c +++ b/sound/core/misc.c @@ -10,6 +10,7 @@ #include <linux/time.h> #include <linux/slab.h> #include <linux/ioport.h> +#include <linux/fs.h> #include <sound/core.h>
#ifdef CONFIG_SND_DEBUG @@ -145,3 +146,96 @@ snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list) } EXPORT_SYMBOL(snd_pci_quirk_lookup); #endif + +/* + * Deferred async signal helpers + * + * Below are a few helper functions to wrap the async signal handling + * in the deferred work. The main purpose is to avoid the messy deadlock + * around tasklist_lock and co at the kill_fasync() invocation. + * fasync_helper() and kill_fasync() are replaced with snd_fasync_helper() + * and snd_kill_fasync(), respectively. In addition, snd_fasync_free() has + * to be called at releasing the relevant file object. + */ +struct snd_fasync { + struct fasync_struct *fasync; + int signal; + int poll; + int on; + struct list_head list; +}; + +static DEFINE_SPINLOCK(snd_fasync_lock); +static LIST_HEAD(snd_fasync_list); + +static void snd_fasync_work_fn(struct work_struct *work) +{ + struct snd_fasync *fasync; + + spin_lock_irq(&snd_fasync_lock); + while (!list_empty(&snd_fasync_list)) { + fasync = list_first_entry(&snd_fasync_list, struct snd_fasync, list); + list_del_init(&fasync->list); + spin_unlock_irq(&snd_fasync_lock); + if (fasync->on) + kill_fasync(&fasync->fasync, fasync->signal, fasync->poll); + spin_lock_irq(&snd_fasync_lock); + } + spin_unlock_irq(&snd_fasync_lock); +} + +static DECLARE_WORK(snd_fasync_work, snd_fasync_work_fn); + +int snd_fasync_helper(int fd, struct file *file, int on, + struct snd_fasync **fasyncp) +{ + struct snd_fasync *fasync = NULL; + + if (on) { + fasync = kzalloc(sizeof(*fasync), GFP_KERNEL); + if (!fasync) + return -ENOMEM; + INIT_LIST_HEAD(&fasync->list); + } + + spin_lock_irq(&snd_fasync_lock); + if (*fasyncp) { + kfree(fasync); + fasync = *fasyncp; + } else { + if (!fasync) { + spin_unlock_irq(&snd_fasync_lock); + return 0; + } + *fasyncp = fasync; + } + fasync->on = on; + spin_unlock_irq(&snd_fasync_lock); + return fasync_helper(fd, file, on, &fasync->fasync); +} +EXPORT_SYMBOL_GPL(snd_fasync_helper); + +void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) +{ + unsigned long flags; + + if (!fasync || !fasync->on) + return; + spin_lock_irqsave(&snd_fasync_lock, flags); + fasync->signal = signal; + fasync->poll = poll; + list_move(&fasync->list, &snd_fasync_list); + schedule_work(&snd_fasync_work); + spin_unlock_irqrestore(&snd_fasync_lock, flags); +} +EXPORT_SYMBOL_GPL(snd_kill_fasync); + +void snd_fasync_free(struct snd_fasync *fasync) +{ + if (!fasync) + return; + fasync->on = 0; + flush_work(&snd_fasync_work); + kfree(fasync); +} +EXPORT_SYMBOL_GPL(snd_fasync_free);
On 28. 07. 22 14:59, Takashi Iwai wrote:
Currently the call of kill_fasync() from an interrupt handler might lead to potential spin deadlocks, as spotted by syzkaller. Unfortunately, it's not so trivial to fix this lock chain as it's involved with the tasklist_lock that is touched in allover places.
As a temporary workaround, this patch provides the way to defer the async signal notification in a work. The new helper functions, snd_fasync_helper() and snd_kill_faync() are replacements for fasync_helper() and kill_fasync(), respectively. In addition, snd_fasync_free() needs to be called at the destructor of the relevant file object.
Signed-off-by: Takashi Iwai tiwai@suse.de
...
+void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) +{
- unsigned long flags;
- if (!fasync || !fasync->on)
return;
- spin_lock_irqsave(&snd_fasync_lock, flags);
- fasync->signal = signal;
- fasync->poll = poll;
- list_move(&fasync->list, &snd_fasync_list);
- schedule_work(&snd_fasync_work);
- spin_unlock_irqrestore(&snd_fasync_lock, flags);
+}
The schedule_work() may be called outside the spinlock - it calls queue_work_on() / __queue_work() which has already own protection for the concurrent execution.
Jaroslav
On Mon, 01 Aug 2022 10:05:59 +0200, Jaroslav Kysela wrote:
On 28. 07. 22 14:59, Takashi Iwai wrote:
Currently the call of kill_fasync() from an interrupt handler might lead to potential spin deadlocks, as spotted by syzkaller. Unfortunately, it's not so trivial to fix this lock chain as it's involved with the tasklist_lock that is touched in allover places.
As a temporary workaround, this patch provides the way to defer the async signal notification in a work. The new helper functions, snd_fasync_helper() and snd_kill_faync() are replacements for fasync_helper() and kill_fasync(), respectively. In addition, snd_fasync_free() needs to be called at the destructor of the relevant file object.
Signed-off-by: Takashi Iwai tiwai@suse.de
...
+void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) +{
- unsigned long flags;
- if (!fasync || !fasync->on)
return;
- spin_lock_irqsave(&snd_fasync_lock, flags);
- fasync->signal = signal;
- fasync->poll = poll;
- list_move(&fasync->list, &snd_fasync_list);
- schedule_work(&snd_fasync_work);
- spin_unlock_irqrestore(&snd_fasync_lock, flags);
+}
The schedule_work() may be called outside the spinlock - it calls queue_work_on() / __queue_work() which has already own protection for the concurrent execution.
It can be outside, too, but scheduling earlier reduces the possible unnecessary scheduling. Suppose that a list is added while the work is already running in another CPU. If we call schedule_work() outside this lock, it might be already the time after the work has processed the queued item, and hence it can be a superfluous scheduling call.
thanks,
Takashi
On 01. 08. 22 12:13, Takashi Iwai wrote:
On Mon, 01 Aug 2022 10:05:59 +0200, Jaroslav Kysela wrote:
On 28. 07. 22 14:59, Takashi Iwai wrote:
Currently the call of kill_fasync() from an interrupt handler might lead to potential spin deadlocks, as spotted by syzkaller. Unfortunately, it's not so trivial to fix this lock chain as it's involved with the tasklist_lock that is touched in allover places.
As a temporary workaround, this patch provides the way to defer the async signal notification in a work. The new helper functions, snd_fasync_helper() and snd_kill_faync() are replacements for fasync_helper() and kill_fasync(), respectively. In addition, snd_fasync_free() needs to be called at the destructor of the relevant file object.
Signed-off-by: Takashi Iwai tiwai@suse.de
...
+void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) +{
- unsigned long flags;
- if (!fasync || !fasync->on)
return;
- spin_lock_irqsave(&snd_fasync_lock, flags);
- fasync->signal = signal;
- fasync->poll = poll;
- list_move(&fasync->list, &snd_fasync_list);
- schedule_work(&snd_fasync_work);
- spin_unlock_irqrestore(&snd_fasync_lock, flags);
+}
The schedule_work() may be called outside the spinlock - it calls queue_work_on() / __queue_work() which has already own protection for the concurrent execution.
It can be outside, too, but scheduling earlier reduces the possible unnecessary scheduling. Suppose that a list is added while the work is already running in another CPU. If we call schedule_work() outside this lock, it might be already the time after the work has processed the queued item, and hence it can be a superfluous scheduling call.
It's really a negligible optimization. It would be better to not block other CPUs here to allow insertion of the new event. Also the __queue_work() is a bit complex code, so the call outside the spin lock may be better.
But either code is acceptable for me.
Jaroslav
On Mon, 01 Aug 2022 12:43:36 +0200, Jaroslav Kysela wrote:
On 01. 08. 22 12:13, Takashi Iwai wrote:
On Mon, 01 Aug 2022 10:05:59 +0200, Jaroslav Kysela wrote:
On 28. 07. 22 14:59, Takashi Iwai wrote:
Currently the call of kill_fasync() from an interrupt handler might lead to potential spin deadlocks, as spotted by syzkaller. Unfortunately, it's not so trivial to fix this lock chain as it's involved with the tasklist_lock that is touched in allover places.
As a temporary workaround, this patch provides the way to defer the async signal notification in a work. The new helper functions, snd_fasync_helper() and snd_kill_faync() are replacements for fasync_helper() and kill_fasync(), respectively. In addition, snd_fasync_free() needs to be called at the destructor of the relevant file object.
Signed-off-by: Takashi Iwai tiwai@suse.de
...
+void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) +{
- unsigned long flags;
- if (!fasync || !fasync->on)
return;
- spin_lock_irqsave(&snd_fasync_lock, flags);
- fasync->signal = signal;
- fasync->poll = poll;
- list_move(&fasync->list, &snd_fasync_list);
- schedule_work(&snd_fasync_work);
- spin_unlock_irqrestore(&snd_fasync_lock, flags);
+}
The schedule_work() may be called outside the spinlock - it calls queue_work_on() / __queue_work() which has already own protection for the concurrent execution.
It can be outside, too, but scheduling earlier reduces the possible unnecessary scheduling. Suppose that a list is added while the work is already running in another CPU. If we call schedule_work() outside this lock, it might be already the time after the work has processed the queued item, and hence it can be a superfluous scheduling call.
It's really a negligible optimization. It would be better to not block other CPUs here to allow insertion of the new event. Also the __queue_work() is a bit complex code, so the call outside the spin lock may be better.
It depends on how often this code path is used. Supposing the rare use case of this, we don't need to care too much, IMO. And, if we really want better concurrency, it should be replaced with RCU :)
But either code is acceptable for me.
As I've already queued the patches in the original form in the last week, let's keep it as is.
thanks,
Takashi
For avoiding the potential deadlock via kill_fasync() call, use the new fasync helpers to defer the invocation from PCI API. Note that it's merely a workaround.
Reported-by: syzbot+1ee0910eca9c94f71f25@syzkaller.appspotmail.com Reported-by: syzbot+49b10793b867871ee26f@syzkaller.appspotmail.com Reported-by: syzbot+8285e973a41b5aa68902@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai tiwai@suse.de --- sound/core/timer.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/sound/core/timer.c b/sound/core/timer.c index b3214baa8919..e08a37c23add 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -83,7 +83,7 @@ struct snd_timer_user { unsigned int filter; struct timespec64 tstamp; /* trigger tstamp */ wait_queue_head_t qchange_sleep; - struct fasync_struct *fasync; + struct snd_fasync *fasync; struct mutex ioctl_lock; };
@@ -1345,7 +1345,7 @@ static void snd_timer_user_interrupt(struct snd_timer_instance *timeri, } __wake: spin_unlock(&tu->qlock); - kill_fasync(&tu->fasync, SIGIO, POLL_IN); + snd_kill_fasync(tu->fasync, SIGIO, POLL_IN); wake_up(&tu->qchange_sleep); }
@@ -1383,7 +1383,7 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri, spin_lock_irqsave(&tu->qlock, flags); snd_timer_user_append_to_tqueue(tu, &r1); spin_unlock_irqrestore(&tu->qlock, flags); - kill_fasync(&tu->fasync, SIGIO, POLL_IN); + snd_kill_fasync(tu->fasync, SIGIO, POLL_IN); wake_up(&tu->qchange_sleep); }
@@ -1453,7 +1453,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri, spin_unlock(&tu->qlock); if (append == 0) return; - kill_fasync(&tu->fasync, SIGIO, POLL_IN); + snd_kill_fasync(tu->fasync, SIGIO, POLL_IN); wake_up(&tu->qchange_sleep); }
@@ -1521,6 +1521,7 @@ static int snd_timer_user_release(struct inode *inode, struct file *file) snd_timer_instance_free(tu->timeri); } mutex_unlock(&tu->ioctl_lock); + snd_fasync_free(tu->fasync); kfree(tu->queue); kfree(tu->tqueue); kfree(tu); @@ -2135,7 +2136,7 @@ static int snd_timer_user_fasync(int fd, struct file * file, int on) struct snd_timer_user *tu;
tu = file->private_data; - return fasync_helper(fd, file, on, &tu->fasync); + return snd_fasync_helper(fd, file, on, &tu->fasync); }
static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
For avoiding the potential deadlock via kill_fasync() call, use the new fasync helpers to defer the invocation from timer API. Note that it's merely a workaround.
Reported-by: syzbot+8285e973a41b5aa68902@syzkaller.appspotmail.com Reported-by: syzbot+669c9abf11a6a011dd09@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai tiwai@suse.de --- include/sound/pcm.h | 2 +- sound/core/pcm.c | 1 + sound/core/pcm_lib.c | 2 +- sound/core/pcm_native.c | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 2d03c10f6a36..8c48a5bce88c 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -399,7 +399,7 @@ struct snd_pcm_runtime { snd_pcm_uframes_t twake; /* do transfer (!poll) wakeup if non-zero */ wait_queue_head_t sleep; /* poll sleep */ wait_queue_head_t tsleep; /* transfer sleep */ - struct fasync_struct *fasync; + struct snd_fasync *fasync; bool stop_operating; /* sync_stop will be called */ struct mutex buffer_mutex; /* protect for buffer changes */ atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 03fc5fa5813e..2ac742035310 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -1007,6 +1007,7 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) substream->runtime = NULL; } mutex_destroy(&runtime->buffer_mutex); + snd_fasync_free(runtime->fasync); kfree(runtime); put_pid(substream->pid); substream->pid = NULL; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 1fc7c50ffa62..40751e5aff09 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1822,7 +1822,7 @@ void snd_pcm_period_elapsed_under_stream_lock(struct snd_pcm_substream *substrea snd_timer_interrupt(substream->timer, 1); #endif _end: - kill_fasync(&runtime->fasync, SIGIO, POLL_IN); + snd_kill_fasync(runtime->fasync, SIGIO, POLL_IN); } EXPORT_SYMBOL(snd_pcm_period_elapsed_under_stream_lock);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index aa0453e51595..ad0541e9e888 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3951,7 +3951,7 @@ static int snd_pcm_fasync(int fd, struct file * file, int on) runtime = substream->runtime; if (runtime->status->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; - return fasync_helper(fd, file, on, &runtime->fasync); + return snd_fasync_helper(fd, file, on, &runtime->fasync); }
/*
For avoiding the potential deadlock via kill_fasync() call, use the new fasync helpers to defer the invocation from the control API. Note that it's merely a workaround.
Another note: although we haven't received reports about the deadlock with the control API, the deadlock is still potentially possible, and it's better to align the behavior with other core APIs (PCM and timer); so let's move altogether.
Signed-off-by: Takashi Iwai tiwai@suse.de --- include/sound/control.h | 2 +- sound/core/control.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/include/sound/control.h b/include/sound/control.h index fcd3cce673ec..eae443ba79ba 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -109,7 +109,7 @@ struct snd_ctl_file { int preferred_subdevice[SND_CTL_SUBDEV_ITEMS]; wait_queue_head_t change_sleep; spinlock_t read_lock; - struct fasync_struct *fasync; + struct snd_fasync *fasync; int subscribed; /* read interface is activated */ struct list_head events; /* waiting events for read */ }; diff --git a/sound/core/control.c b/sound/core/control.c index 4dba3a342458..f3e893715369 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -127,6 +127,7 @@ static int snd_ctl_release(struct inode *inode, struct file *file) if (control->vd[idx].owner == ctl) control->vd[idx].owner = NULL; up_write(&card->controls_rwsem); + snd_fasync_free(ctl->fasync); snd_ctl_empty_read_queue(ctl); put_pid(ctl->pid); kfree(ctl); @@ -181,7 +182,7 @@ void snd_ctl_notify(struct snd_card *card, unsigned int mask, _found: wake_up(&ctl->change_sleep); spin_unlock(&ctl->read_lock); - kill_fasync(&ctl->fasync, SIGIO, POLL_IN); + snd_kill_fasync(ctl->fasync, SIGIO, POLL_IN); } read_unlock_irqrestore(&card->ctl_files_rwlock, flags); } @@ -2134,7 +2135,7 @@ static int snd_ctl_fasync(int fd, struct file * file, int on) struct snd_ctl_file *ctl;
ctl = file->private_data; - return fasync_helper(fd, file, on, &ctl->fasync); + return snd_fasync_helper(fd, file, on, &ctl->fasync); }
/* return the preferred subdevice number if already assigned; @@ -2302,7 +2303,7 @@ static int snd_ctl_dev_disconnect(struct snd_device *device) read_lock_irqsave(&card->ctl_files_rwlock, flags); list_for_each_entry(ctl, &card->ctl_files, list) { wake_up(&ctl->change_sleep); - kill_fasync(&ctl->fasync, SIGIO, POLL_ERR); + snd_kill_fasync(ctl->fasync, SIGIO, POLL_ERR); } read_unlock_irqrestore(&card->ctl_files_rwlock, flags);
On 28. 07. 22 14:59, Takashi Iwai wrote:
Hi,
this is a revised patch series for another attempt to address the potential deadlocks via kill_fasync() calls that have been reported by syzbot for long time. Only a missing linux/fs.h inclusion was added from v1 series[1].
Instead of the previous series to drop the async handler[2], this tries to defer the kill_fasync() call. A few new common helpers are introduced at first, then the actual usages are replaced in the following patches.
The patches passed the quick tests with alsa-lib test cases.
Thank you for your work. The code looks good (I would only move schedule_work() call outside the spin lock context - commented in the patch thread).
Reviewed-by: Jaroslav Kysela perex@perex.cz
participants (2)
-
Jaroslav Kysela
-
Takashi Iwai