[alsa-devel] [3.6-rc7] switcheroo race with Intel HDA...

Takashi Iwai tiwai at suse.de
Wed Oct 10 14:34:30 CEST 2012


At Tue, 9 Oct 2012 22:26:40 +0800,
Daniel J Blueman wrote:
> 
> On 9 October 2012 21:04, Takashi Iwai <tiwai at suse.de> wrote:
> > At Tue, 9 Oct 2012 19:23:56 +0800,
> > Daniel J Blueman wrote:
> >> On 9 October 2012 18:07, Takashi Iwai <tiwai at suse.de> wrote:
> >> > At Tue, 09 Oct 2012 12:04:08 +0200,
> >> > Takashi Iwai wrote:
> >> >> At Tue, 9 Oct 2012 00:34:09 +0800,
> >> >> Daniel J Blueman wrote:
> >> >> > On 8 October 2012 20:58, Takashi Iwai <tiwai at suse.de> wrote:
> >> >> > > At Tue, 25 Sep 2012 13:20:05 +0800,
> >> >> > > Daniel J Blueman wrote:
> >> >> > >> On my Macbook with a discrete Nvidia GPU, there is a race between
> >> >> > >> selecting the integrated GPU and putting the discrete GPU into D3 [1],
> >> >> > >> reliably causing a kernel oops [2].
> >> >> > >>
> >> >> > >> Introducing a delay of ~1s between the calls prevents this. When the
> >> >> > >> second 'OFF' write path executes, it looks like struct azx at
> >> >> > >> card->private_data hasn't yet been allocated yet [3], so there is
> >> >> > >> likely some locking missing.
> >> >> > >
> >> >> > > It's rather pci_get_drvdata() returning NULL (i.e. card is NULL, thus
> >> >> > > card->private_data causes Oops).  Could you check the patch like below
> >> >> > > and see whether you get a kernel warning (but no Oops) or the problem
> >> >> > > gets fixed by shifting the assignment of pci drvdata?
> >> >> > [...]
> >> >> >
> >> >> > Good patching. Calling pci_set_drvdata later prevents the oops in HDA,
> >> >> > though we see unexpected 0x0 responses in the response ring buffer
> >> >> > [1], which we don't see when there's a >~1.5s delay between IGD and
> >> >> > OFF.
> >> >>
> >> >> If the previous patch fixed, it means that the switching occurred
> >> >> during the device was being probed.  Maybe a better approach to
> >> >> register the VGA switcheroo after the proper initialization.
> >> >>
> >> >> The patch below is a revised one.  Please give it a try.
> >> >
> >> > Also, it's not clear which card spews the spurious response.
> >> > Apply the patch below in addition.
> >> [...]
> >>
> >> hda-intel: 0000:01:00.1: spurious response 0x0:0x0, last cmd=0x1f0004
> >> $ lspci -s :1:0.1
> >> 01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff)
> >>
> >> It's the NVIDIA device which presumably hasn't completed it's
> >> transition to D3 at the time the OFF is executed.
> >
> > OK, then could you try the patch below on the top of previous two
> > patches?
> 
> The first IGD switcheroo command fails to switch to the integrated GPU:
> 
> # cat /sys/kernel/debug/vgaswitcheroo/switch
> 0:DIS:+:Pwr:0000:01:00.0
> 1:IGD: :Pwr:0000:00:02.0
> 2:DIS-Audio: :Pwr:0000:01:00.1
> # echo IGD >/sys/kernel/debug/vgaswitcheroo/switch
> vga_switcheroo: client 1 refused switch
> 
> I also instrumented snd_hda_lock_devices, but none of the failure
> paths are being taken, which would leave inconsistent state, as the
> return value isn't checked.

Hm, right, the return value of snd_hda_lock_devices() isn't checked,
but I don't understand how this results like above.
Basically switching is protected by mutex in vga_switcheroo.c, so the
whole operation in the client side should be serialized.

In anyway, try the patch below cleanly, and see the spurious message
error coming up at which timing.


thanks,

Takashi

---
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 6833835..4518b05 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -501,6 +501,7 @@ struct azx {
 
 	/* VGA-switcheroo setup */
 	unsigned int use_vga_switcheroo:1;
+	unsigned int vga_switcheroo_registered:1;
 	unsigned int init_failed:1; /* delayed init failed */
 	unsigned int disabled:1; /* disabled by VGA-switcher */
 
@@ -1597,6 +1598,8 @@ static int DELAYED_INIT_MARK azx_codec_create(struct azx *chip, const char *mode
 	int c, codecs, err;
 	int max_slots;
 
+	printk(KERN_DEBUG "XXX %s: azx_codec_create entered\n",
+	       pci_name(chip->pci));
 	memset(&bus_temp, 0, sizeof(bus_temp));
 	bus_temp.private_data = chip;
 	bus_temp.modelname = model;
@@ -1673,6 +1676,8 @@ static int DELAYED_INIT_MARK azx_codec_create(struct azx *chip, const char *mode
 		snd_printk(KERN_ERR SFX "no codecs initialized\n");
 		return -ENXIO;
 	}
+	printk(KERN_DEBUG "XXX %s: azx_codec_create done\n",
+	       pci_name(chip->pci));
 	return 0;
 }
 
@@ -2615,6 +2620,8 @@ static void azx_vs_set_state(struct pci_dev *pci,
 		return;
 
 	disabled = (state == VGA_SWITCHEROO_OFF);
+	printk(KERN_DEBUG "XXX %s: chip->disabled=%d, disabled=%d\n",
+	       pci_name(chip->pci), chip->disabled, disabled);
 	if (chip->disabled == disabled)
 		return;
 
@@ -2638,14 +2645,26 @@ static void azx_vs_set_state(struct pci_dev *pci,
 			   disabled ? "Disabling" : "Enabling",
 			   pci_name(chip->pci));
 		if (disabled) {
+			printk(KERN_DEBUG "XXX %s: azx_suspend...\n",
+			       pci_name(chip->pci));
 			azx_suspend(&pci->dev);
+			printk(KERN_DEBUG "XXX %s: azx_suspend done...\n",
+			       pci_name(chip->pci));
 			chip->disabled = true;
-			snd_hda_lock_devices(chip->bus);
+			if (snd_hda_lock_devices(chip->bus))
+				snd_printk(KERN_WARNING SFX
+					   "Cannot lock devices!\n");
 		} else {
 			snd_hda_unlock_devices(chip->bus);
 			chip->disabled = false;
+			printk(KERN_DEBUG "XXX %s: azx_resume...\n",
+			       pci_name(chip->pci));
 			azx_resume(&pci->dev);
+			printk(KERN_DEBUG "XXX %s: azx_resume done...\n",
+			       pci_name(chip->pci));
 		}
+		printk(KERN_DEBUG "XXX %s: switching finished: disabled=%d\n",
+		       pci_name(chip->pci), chip->disabled);
 	}
 }
 
@@ -2683,14 +2702,20 @@ static const struct vga_switcheroo_client_ops azx_vs_ops = {
 
 static int __devinit register_vga_switcheroo(struct azx *chip)
 {
+	int err;
+
 	if (!chip->use_vga_switcheroo)
 		return 0;
 	/* FIXME: currently only handling DIS controller
 	 * is there any machine with two switchable HDMI audio controllers?
 	 */
-	return vga_switcheroo_register_audio_client(chip->pci, &azx_vs_ops,
+	err = vga_switcheroo_register_audio_client(chip->pci, &azx_vs_ops,
 						    VGA_SWITCHEROO_DIS,
 						    chip->bus != NULL);
+	if (err < 0)
+		return err;
+	chip->vga_switcheroo_registered = 1;
+	return 0;
 }
 #else
 #define init_vga_switcheroo(chip)		/* NOP */
@@ -2712,7 +2737,8 @@ static int azx_free(struct azx *chip)
 	if (use_vga_switcheroo(chip)) {
 		if (chip->disabled && chip->bus)
 			snd_hda_unlock_devices(chip->bus);
-		vga_switcheroo_unregister_client(chip->pci);
+		if (chip->vga_switcheroo_registered)
+			vga_switcheroo_unregister_client(chip->pci);
 	}
 
 	if (chip->initialized) {
@@ -3062,14 +3088,6 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	}
 
  ok:
-	err = register_vga_switcheroo(chip);
-	if (err < 0) {
-		snd_printk(KERN_ERR SFX
-			   "Error registering VGA-switcheroo client\n");
-		azx_free(chip);
-		return err;
-	}
-
 	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
 	if (err < 0) {
 		snd_printk(KERN_ERR SFX "Error creating device [card]!\n");
@@ -3340,6 +3358,13 @@ static int __devinit azx_probe(struct pci_dev *pci,
 	if (pci_dev_run_wake(pci))
 		pm_runtime_put_noidle(&pci->dev);
 
+	err = register_vga_switcheroo(chip);
+	if (err < 0) {
+		snd_printk(KERN_ERR SFX
+			   "Error registering VGA-switcheroo client\n");
+		goto out_free;
+	}
+
 	dev++;
 	return 0;
 


More information about the Alsa-devel mailing list