[PATCH 06/14] ASoC: Intel: avs: Coredump and recovery flow
Pierre-Louis Bossart
pierre-louis.bossart at linux.intel.com
Tue Apr 26 23:53:37 CEST 2022
On 4/26/22 12:23, Cezary Rojewski wrote:
> In rare occassions, under stress conditions or hardware malfunction, DSP
occasions
> firmware may fail. Software is notified about such situation with
> EXCEPTION_CAUGHT notification. IPC timeout is also counted as critical
> device failure. More often than not, driver can recover from such
> situations by performing full reset: killing and restarting ADSP.
>
> Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski at linux.intel.com>
> Signed-off-by: Cezary Rojewski <cezary.rojewski at intel.com>
> ---
> sound/soc/intel/Kconfig | 1 +
> sound/soc/intel/avs/avs.h | 4 ++
> sound/soc/intel/avs/ipc.c | 95 +++++++++++++++++++++++++++++++++-
> sound/soc/intel/avs/messages.h | 5 ++
> 4 files changed, 103 insertions(+), 2 deletions(-)
>
> diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
> index c364ddf22267..05ad6bdecfc5 100644
> --- a/sound/soc/intel/Kconfig
> +++ b/sound/soc/intel/Kconfig
> @@ -218,6 +218,7 @@ config SND_SOC_INTEL_AVS
> select SND_HDA_EXT_CORE
> select SND_HDA_DSP_LOADER
> select SND_INTEL_NHLT
> + select WANT_DEV_COREDUMP
> help
> Enable support for Intel(R) cAVS 1.5 platforms with DSP
> capabilities. This includes Skylake, Kabylake, Amberlake and
> diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h
> index e628f78d1864..02c2aa1bcd5c 100644
> --- a/sound/soc/intel/avs/avs.h
> +++ b/sound/soc/intel/avs/avs.h
> @@ -42,6 +42,7 @@ struct avs_dsp_ops {
> int (* const load_basefw)(struct avs_dev *, struct firmware *);
> int (* const load_lib)(struct avs_dev *, struct firmware *, u32);
> int (* const transfer_mods)(struct avs_dev *, bool, struct avs_module_entry *, u32);
> + int (* const coredump)(struct avs_dev *, union avs_notify_msg *);
> };
>
> #define avs_dsp_op(adev, op, ...) \
> @@ -164,12 +165,15 @@ struct avs_ipc {
> struct avs_ipc_msg rx;
> u32 default_timeout_ms;
> bool ready;
> + bool recovering;
>
> bool rx_completed;
> spinlock_t rx_lock;
> struct mutex msg_mutex;
> struct completion done_completion;
> struct completion busy_completion;
> +
> + struct work_struct recovery_work;
> };
>
> #define AVS_EIPC EREMOTEIO
> diff --git a/sound/soc/intel/avs/ipc.c b/sound/soc/intel/avs/ipc.c
> index 68aaf01edbf2..84cb411c82fa 100644
> --- a/sound/soc/intel/avs/ipc.c
> +++ b/sound/soc/intel/avs/ipc.c
> @@ -14,6 +14,87 @@
>
> #define AVS_IPC_TIMEOUT_MS 300
>
> +static void avs_dsp_recovery(struct avs_dev *adev)
> +{
> + struct avs_soc_component *acomp;
> + unsigned int core_mask;
> + int ret;
> +
> + if (adev->ipc->recovering)
> + return;
> + adev->ipc->recovering = true;
don't you need some sort of lock to test/clear this flag?
> +
> + mutex_lock(&adev->comp_list_mutex);
> + /* disconnect all running streams */
> + list_for_each_entry(acomp, &adev->comp_list, node) {
> + struct snd_soc_pcm_runtime *rtd;
> + struct snd_soc_card *card;
> +
> + card = acomp->base.card;
> + if (!card)
> + continue;
> +
> + for_each_card_rtds(card, rtd) {
> + struct snd_pcm *pcm;
> + int dir;
> +
> + pcm = rtd->pcm;
> + if (!pcm || rtd->dai_link->no_pcm)
> + continue;
> +
> + for_each_pcm_streams(dir) {
> + struct snd_pcm_substream *substream;
> +
> + substream = pcm->streams[dir].substream;
> + if (!substream || !substream->runtime)
> + continue;
> +
> + snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED);
> + }
> + }
> + }
> + mutex_unlock(&adev->comp_list_mutex);
> +
> + /* forcibly shutdown all cores */
> + core_mask = GENMASK(adev->hw_cfg.dsp_cores - 1, 0);
> + avs_dsp_core_disable(adev, core_mask);
> +
> + /* attempt dsp reboot */
> + ret = avs_dsp_boot_firmware(adev, true);
> + if (ret < 0)
> + dev_err(adev->dev, "dsp reboot failed: %d\n", ret);
> +
> + pm_runtime_mark_last_busy(adev->dev);
> + pm_runtime_enable(adev->dev);
> + pm_request_autosuspend(adev->dev);
there are zero users of this routine in the entire sound/ tree, can you clarify why this is needed or what you are trying to do?
> +
> + adev->ipc->recovering = false;
> +}
More information about the Alsa-devel
mailing list