[PATCH] ASoC: dpcm: fix race condition to dpcm links in dpcm_be_dai_trigger
If routing change and underrun stop is run at the same time, data abort can be occurred by the following sequence.
CPU0: Processing underrun CPU1: Processing routing change dpcm_be_dai_trigger(): dpcm_be_disconnect():
for_each_dpcm_be(fe, stream, dpcm) {
spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_del(&dpcm->list_be); list_del(&dpcm->list_fe); spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); kfree(dpcm);
struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
To prevent this situation, dpcm_lock is needed during accessing the lists for dpcm links.
Signed-off-by: Gyeongtaek Lee gt82.lee@samsung.com Cc: stable@vger.kernel.org --- sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 48f71bb81a2f..df2cd4c0dabe 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, return ret; }
+struct dpcm_be_list { + unsigned int num; + struct snd_soc_pcm_runtime *be[]; +}; + +static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream, + struct dpcm_be_list **be_list) +{ + struct snd_soc_dpcm *dpcm; + struct dpcm_be_list *be; + int size = 0; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&fe->card->dpcm_lock, flags); + + for_each_dpcm_be(fe, stream, dpcm) + size++; + + be = kzalloc(struct_size(be, be, size), GFP_ATOMIC); + if (!be) { + ret = -ENOMEM; + } else { + unsigned int i = 0; + + for_each_dpcm_be(fe, stream, dpcm) + be->be[i++] = dpcm->be; + + *be_list = be; + } + + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); + + return ret; +} + +static void dpcm_free_be_list(struct dpcm_be_list *be_list) +{ + kfree(be_list); +} + int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd) { struct snd_soc_pcm_runtime *be; - struct snd_soc_dpcm *dpcm; + struct dpcm_be_list *be_list; int ret = 0; + int i;
- for_each_dpcm_be(fe, stream, dpcm) { + ret = dpcm_create_be_list(fe, stream, &be_list); + if (ret < 0) + return ret; + + for(i = 0; i < be_list->num; i++) { struct snd_pcm_substream *be_substream;
- be = dpcm->be; + be = be_list->be[i]; be_substream = snd_soc_dpcm_get_substream(be, stream);
/* is this op for this BE ? */ @@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, if (ret < 0) dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n", __func__, be->dai_link->name, ret); + dpcm_free_be_list(be_list); return ret; } EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);
base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
On 9/29/21 12:49 AM, Gyeongtaek Lee wrote:
If routing change and underrun stop is run at the same time, data abort can be occurred by the following sequence.
CPU0: Processing underrun CPU1: Processing routing change dpcm_be_dai_trigger(): dpcm_be_disconnect():
for_each_dpcm_be(fe, stream, dpcm) {
spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_del(&dpcm->list_be); list_del(&dpcm->list_fe); spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); kfree(dpcm);
struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
To prevent this situation, dpcm_lock is needed during accessing the lists for dpcm links.
Isn't there still a possible inconsistency here introduced by the duplication of the BE list?
You protect the list creation, but before you use it in dpcm_be_dai_trigger(), there's a time window where the function could be pre-empted and a disconnect event might have happened. As a result you could trigger a BE that's no longer connected.
What you identified as a race is likely valid, but how to fix it isn't clear to me - the DPCM code isn't self-explanatory at all with its use in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
Ideally we would need to find a way to prevent changes in connections while we are doing the triggers, but triggers can take a bit of time if they involve any sort of communication over a bus. I really wonder if this dpcm_lock should be a mutex and if the model for DPCM really involves interrupt contexts as the irqsave/irqrestore mentions hint at.
Signed-off-by: Gyeongtaek Lee gt82.lee@samsung.com Cc: stable@vger.kernel.org
sound/soc/soc-pcm.c | 53 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 48f71bb81a2f..df2cd4c0dabe 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1993,17 +1993,63 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, return ret; }
+struct dpcm_be_list {
- unsigned int num;
- struct snd_soc_pcm_runtime *be[];
+};
+static int dpcm_create_be_list(struct snd_soc_pcm_runtime *fe, int stream,
struct dpcm_be_list **be_list)
+{
- struct snd_soc_dpcm *dpcm;
- struct dpcm_be_list *be;
- int size = 0;
- int ret = 0;
- unsigned long flags;
- spin_lock_irqsave(&fe->card->dpcm_lock, flags);
- for_each_dpcm_be(fe, stream, dpcm)
size++;
- be = kzalloc(struct_size(be, be, size), GFP_ATOMIC);
- if (!be) {
ret = -ENOMEM;
- } else {
unsigned int i = 0;
for_each_dpcm_be(fe, stream, dpcm)
be->be[i++] = dpcm->be;
*be_list = be;
- }
- spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
- return ret;
+}
+static void dpcm_free_be_list(struct dpcm_be_list *be_list) +{
- kfree(be_list);
+}
int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd) { struct snd_soc_pcm_runtime *be;
- struct snd_soc_dpcm *dpcm;
- struct dpcm_be_list *be_list; int ret = 0;
- int i;
- for_each_dpcm_be(fe, stream, dpcm) {
- ret = dpcm_create_be_list(fe, stream, &be_list);
- if (ret < 0)
return ret;
- for(i = 0; i < be_list->num; i++) { struct snd_pcm_substream *be_substream;
be = dpcm->be;
be = be_list->be[i];
be_substream = snd_soc_dpcm_get_substream(be, stream);
/* is this op for this BE ? */
@@ -2092,6 +2138,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, if (ret < 0) dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n", __func__, be->dai_link->name, ret);
- dpcm_free_be_list(be_list); return ret;
} EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger);
base-commit: 4ac6d90867a4de2e12117e755dbd76e08d88697f
If routing change and underrun stop is run at the same time, data abort can be occurred by the following sequence.
CPU0: Processing underrun CPU1: Processing routing change dpcm_be_dai_trigger(): dpcm_be_disconnect():
for_each_dpcm_be(fe, stream, dpcm) {
spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_del(&dpcm->list_be); list_del(&dpcm->list_fe); spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); kfree(dpcm);
struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
To prevent this situation, dpcm_lock is needed during accessing the lists for dpcm links.
Isn't there still a possible inconsistency here introduced by the duplication of the BE list?
You protect the list creation, but before you use it in dpcm_be_dai_trigger(), there's a time window where the function could be pre-empted and a disconnect event might have happened. As a result you could trigger a BE that's no longer connected.
What you identified as a race is likely valid, but how to fix it isn't clear to me - the DPCM code isn't self-explanatory at all with its use in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
Ideally we would need to find a way to prevent changes in connections while we are doing the triggers, but triggers can take a bit of time if they involve any sort of communication over a bus. I really wonder if this dpcm_lock should be a mutex and if the model for DPCM really involves interrupt contexts as the irqsave/irqrestore mentions hint at.
To follow-up on this, I started experimenting with a replacement of the 'dpcm_lock' spinlock with a 'dpcm_mutex', see https://github.com/thesofproject/linux/pull/3186
If we combine both of our results, the 'right' solution might be to take this mutex before every use of for_each_dpcm_be(), and unlock it at the end of the loop, which additional changes to avoid re-taking the same mutex in helper functions.
there's still a part in DPCM that I can't figure out, there is an elaborate trick with an explicit comment
/* if FE's runtime_update is already set, we're in race; * process this trigger later at exit */
Which looks like a missing mutex somewhere, or an overkill solution that might never be needed.
If routing change and underrun stop is run at the same time, data abort can be occurred by the following sequence.
CPU0: Processing underrun CPU1: Processing routing change dpcm_be_dai_trigger(): dpcm_be_disconnect():
for_each_dpcm_be(fe, stream, dpcm) {
spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_del(&dpcm->list_be); list_del(&dpcm->list_fe); spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); kfree(dpcm);
struct snd_soc_pcm_runtime *be = dpcm->be; <-- Accessing freed memory
To prevent this situation, dpcm_lock is needed during accessing the lists for dpcm links.
Isn't there still a possible inconsistency here introduced by the duplication of the BE list?
You protect the list creation, but before you use it in dpcm_be_dai_trigger(), there's a time window where the function could be pre-empted and a disconnect event might have happened. As a result you could trigger a BE that's no longer connected.
What you identified as a race is likely valid, but how to fix it isn't clear to me - the DPCM code isn't self-explanatory at all with its use in various places of the dpcm_lock spinlock, the pcm mutex, the card mutex.
Ideally we would need to find a way to prevent changes in connections while we are doing the triggers, but triggers can take a bit of time if they involve any sort of communication over a bus. I really wonder if this dpcm_lock should be a mutex and if the model for DPCM really involves interrupt contexts as the irqsave/irqrestore mentions hint at.
To follow-up on this, I started experimenting with a replacement of the 'dpcm_lock' spinlock with a 'dpcm_mutex', see https://protect2.fireeye.com/v1/url?k=bdfd74d3-e2664dcc-bdfcff9c-000babdfecb...
If we combine both of our results, the 'right' solution might be to take this mutex before every use of for_each_dpcm_be(), and unlock it at the end of the loop, which additional changes to avoid re-taking the same mutex in helper functions.
there's still a part in DPCM that I can't figure out, there is an elaborate trick with an explicit comment
/* if FE's runtime_update is already set, we're in race; * process this trigger later at exit */
Which looks like a missing mutex somewhere, or an overkill solution that might never be needed.
You are right. This patch can't resolve inconsistency problem completely. I thought that even part of the problem can be resolved by this patch and it could help some other developers and me also. And I also thought that invalid trigger on disconnected BE DAI can be protected by the state check in the trigger function like the below.
int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd) { struct snd_soc_dpcm *dpcm; int ret = 0;
for_each_dpcm_be(fe, stream, dpcm) { ....... switch (cmd) { case SNDRV_PCM_TRIGGER_START: /* Following if statement protect invalid control. */ if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) continue;
ret = dpcm_do_trigger(dpcm, be_substream, cmd);
I really appreciate that there is a project about this problem already. But if the project needs more time to be merged into the mainline, I think that this patch can be used until the project is merged. If you don't mind, would you reconsider this patch one more time?
Thank you, Gyeongtaek Lee.
participants (2)
-
Gyeongtaek Lee
-
Pierre-Louis Bossart