[PATCH] ALSA: memalloc: Workaround for Xen PV
We change recently the memalloc helper to use dma_alloc_noncontiguous() and the fallback to get_pages(). Although lots of issues with IOMMU (or non-IOMMU) have been addressed, but there seems still a regression on Xen PV. Interestingly, the only proper way to work is use dma_alloc_coherent(). The use of dma_alloc_coherent() for SG buffer was dropped as it's problematic on IOMMU systems. OTOH, Xen PV has a different way, and it's fine to use the dma_alloc_coherent().
This patch is a workaround for Xen PV. It consists of the following changes: - For Xen PV, use only the fallback allocation without dma_alloc_noncontiguous() - In the fallback allocation, use dma_alloc_coherent(); the DMA address from dma_alloc_coherent() is returned in get_addr ops - The DMA addresses are stored in an array; the first entry stores the number of allocated pages in lower bits, which are referred at releasing pages again
Reported-by: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Fixes: a8d302a0b770 ("ALSA: memalloc: Revive x86-specific WC page allocations again") Fixes: 9736a325137b ("ALSA: memalloc: Don't fall back for SG-buffer with IOMMU") Link: https://lore.kernel.org/r/87tu256lqs.wl-tiwai@suse.de Signed-off-by: Takashi Iwai tiwai@suse.de ---
Marek, this is another respin of the fix. Please check this one and report back. Thanks!
sound/core/memalloc.c | 87 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 18 deletions(-)
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 81025f50a542..f901504b5afc 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -541,16 +541,15 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) struct sg_table *sgt; void *p;
+#ifdef CONFIG_SND_DMA_SGBUF + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return snd_dma_sg_fallback_alloc(dmab, size); +#endif sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, DEFAULT_GFP, 0); #ifdef CONFIG_SND_DMA_SGBUF - if (!sgt && !get_dma_ops(dmab->dev.dev)) { - if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) - dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; - else - dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + if (!sgt && !get_dma_ops(dmab->dev.dev)) return snd_dma_sg_fallback_alloc(dmab, size); - } #endif if (!sgt) return NULL; @@ -717,19 +716,38 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = {
/* Fallback SG-buffer allocations for x86 */ struct snd_dma_sg_fallback { + bool use_dma_alloc_coherent; size_t count; struct page **pages; + /* DMA address array; the first page contains #pages in ~PAGE_MASK */ + dma_addr_t *addrs; };
static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, struct snd_dma_sg_fallback *sgbuf) { - bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; - size_t i; - - for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) - do_free_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc); + size_t i, size; + + if (sgbuf->pages && sgbuf->addrs) { + i = 0; + while (i < sgbuf->count) { + if (!sgbuf->pages[i] || !sgbuf->addrs[i]) + break; + size = sgbuf->addrs[i] & ~PAGE_MASK; + if (WARN_ON(!size)) + break; + if (sgbuf->use_dma_alloc_coherent) + dma_free_coherent(dmab->dev.dev, size << PAGE_SHIFT, + page_address(sgbuf->pages[i]), + sgbuf->addrs[i] & PAGE_MASK); + else + do_free_pages(page_address(sgbuf->pages[i]), + size << PAGE_SHIFT, false); + i += size; + } + } kvfree(sgbuf->pages); + kvfree(sgbuf->addrs); kfree(sgbuf); }
@@ -738,24 +756,36 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) struct snd_dma_sg_fallback *sgbuf; struct page **pagep, *curp; size_t chunk, npages; + dma_addr_t *addrp; dma_addr_t addr; void *p; - bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; + + /* correct the type */ + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + else if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); if (!sgbuf) return NULL; + sgbuf->use_dma_alloc_coherent = cpu_feature_enabled(X86_FEATURE_XENPV); size = PAGE_ALIGN(size); sgbuf->count = size >> PAGE_SHIFT; sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL); - if (!sgbuf->pages) + sgbuf->addrs = kvcalloc(sgbuf->count, sizeof(*sgbuf->addrs), GFP_KERNEL); + if (!sgbuf->pages || !sgbuf->addrs) goto error;
pagep = sgbuf->pages; - chunk = size; + addrp = sgbuf->addrs; + chunk = (PAGE_SIZE - 1) << PAGE_SHIFT; /* to fit in low bits in addrs */ while (size > 0) { chunk = min(size, chunk); - p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc); + if (sgbuf->use_dma_alloc_coherent) + p = dma_alloc_coherent(dmab->dev.dev, chunk, &addr, DEFAULT_GFP); + else + p = do_alloc_pages(dmab->dev.dev, chunk, &addr, false); if (!p) { if (chunk <= PAGE_SIZE) goto error; @@ -767,17 +797,25 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) size -= chunk; /* fill pages */ npages = chunk >> PAGE_SHIFT; + *addrp = npages; /* store in lower bits */ curp = virt_to_page(p); - while (npages--) + while (npages--) { *pagep++ = curp++; + *addrp++ |= addr; + addr += PAGE_SIZE; + } }
p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL); if (!p) goto error; + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wc(sgbuf->pages, sgbuf->count); + dmab->private_data = sgbuf; /* store the first page address for convenience */ - dmab->addr = snd_sgbuf_get_addr(dmab, 0); + dmab->addr = sgbuf->addrs[0] & PAGE_MASK; return p;
error: @@ -787,10 +825,23 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) { + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wb(sgbuf->pages, sgbuf->count); vunmap(dmab->area); __snd_dma_sg_fallback_free(dmab, dmab->private_data); }
+static dma_addr_t snd_dma_sg_fallback_get_addr(struct snd_dma_buffer *dmab, + size_t offset) +{ + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; + size_t index = offset >> PAGE_SHIFT; + + return (sgbuf->addrs[index] & PAGE_MASK) | (offset & ~PAGE_MASK); +} + static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { @@ -805,8 +856,8 @@ static const struct snd_malloc_ops snd_dma_sg_fallback_ops = { .alloc = snd_dma_sg_fallback_alloc, .free = snd_dma_sg_fallback_free, .mmap = snd_dma_sg_fallback_mmap, + .get_addr = snd_dma_sg_fallback_get_addr, /* reuse vmalloc helpers */ - .get_addr = snd_dma_vmalloc_get_addr, .get_page = snd_dma_vmalloc_get_page, .get_chunk_size = snd_dma_vmalloc_get_chunk_size, };
On Wed, Jan 25, 2023 at 04:31:04PM +0100, Takashi Iwai wrote:
We change recently the memalloc helper to use dma_alloc_noncontiguous() and the fallback to get_pages(). Although lots of issues with IOMMU (or non-IOMMU) have been addressed, but there seems still a regression on Xen PV. Interestingly, the only proper way to work is use dma_alloc_coherent(). The use of dma_alloc_coherent() for SG buffer was dropped as it's problematic on IOMMU systems. OTOH, Xen PV has a different way, and it's fine to use the dma_alloc_coherent().
This patch is a workaround for Xen PV. It consists of the following changes:
- For Xen PV, use only the fallback allocation without dma_alloc_noncontiguous()
- In the fallback allocation, use dma_alloc_coherent(); the DMA address from dma_alloc_coherent() is returned in get_addr ops
- The DMA addresses are stored in an array; the first entry stores the number of allocated pages in lower bits, which are referred at releasing pages again
Reported-by: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Fixes: a8d302a0b770 ("ALSA: memalloc: Revive x86-specific WC page allocations again") Fixes: 9736a325137b ("ALSA: memalloc: Don't fall back for SG-buffer with IOMMU") Link: https://lore.kernel.org/r/87tu256lqs.wl-tiwai@suse.de Signed-off-by: Takashi Iwai tiwai@suse.de
Uptime ~20h and it still works, so Tested-by: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com
Marek, this is another respin of the fix. Please check this one and report back. Thanks!
sound/core/memalloc.c | 87 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 18 deletions(-)
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 81025f50a542..f901504b5afc 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -541,16 +541,15 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) struct sg_table *sgt; void *p;
+#ifdef CONFIG_SND_DMA_SGBUF
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
return snd_dma_sg_fallback_alloc(dmab, size);
+#endif sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, DEFAULT_GFP, 0); #ifdef CONFIG_SND_DMA_SGBUF
- if (!sgt && !get_dma_ops(dmab->dev.dev)) {
if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
else
dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
- if (!sgt && !get_dma_ops(dmab->dev.dev)) return snd_dma_sg_fallback_alloc(dmab, size);
- }
#endif if (!sgt) return NULL; @@ -717,19 +716,38 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = {
/* Fallback SG-buffer allocations for x86 */ struct snd_dma_sg_fallback {
- bool use_dma_alloc_coherent; size_t count; struct page **pages;
- /* DMA address array; the first page contains #pages in ~PAGE_MASK */
- dma_addr_t *addrs;
};
static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, struct snd_dma_sg_fallback *sgbuf) {
- bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
- size_t i;
- for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++)
do_free_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc);
- size_t i, size;
- if (sgbuf->pages && sgbuf->addrs) {
i = 0;
while (i < sgbuf->count) {
if (!sgbuf->pages[i] || !sgbuf->addrs[i])
break;
size = sgbuf->addrs[i] & ~PAGE_MASK;
if (WARN_ON(!size))
break;
if (sgbuf->use_dma_alloc_coherent)
dma_free_coherent(dmab->dev.dev, size << PAGE_SHIFT,
page_address(sgbuf->pages[i]),
sgbuf->addrs[i] & PAGE_MASK);
else
do_free_pages(page_address(sgbuf->pages[i]),
size << PAGE_SHIFT, false);
i += size;
}
- } kvfree(sgbuf->pages);
- kvfree(sgbuf->addrs); kfree(sgbuf);
}
@@ -738,24 +756,36 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) struct snd_dma_sg_fallback *sgbuf; struct page **pagep, *curp; size_t chunk, npages;
- dma_addr_t *addrp; dma_addr_t addr; void *p;
- bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
/* correct the type */
if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG)
dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
else if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); if (!sgbuf) return NULL;
sgbuf->use_dma_alloc_coherent = cpu_feature_enabled(X86_FEATURE_XENPV); size = PAGE_ALIGN(size); sgbuf->count = size >> PAGE_SHIFT; sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL);
- if (!sgbuf->pages)
sgbuf->addrs = kvcalloc(sgbuf->count, sizeof(*sgbuf->addrs), GFP_KERNEL);
if (!sgbuf->pages || !sgbuf->addrs) goto error;
pagep = sgbuf->pages;
- chunk = size;
- addrp = sgbuf->addrs;
- chunk = (PAGE_SIZE - 1) << PAGE_SHIFT; /* to fit in low bits in addrs */ while (size > 0) { chunk = min(size, chunk);
p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc);
if (sgbuf->use_dma_alloc_coherent)
p = dma_alloc_coherent(dmab->dev.dev, chunk, &addr, DEFAULT_GFP);
else
if (!p) { if (chunk <= PAGE_SIZE) goto error;p = do_alloc_pages(dmab->dev.dev, chunk, &addr, false);
@@ -767,17 +797,25 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) size -= chunk; /* fill pages */ npages = chunk >> PAGE_SHIFT;
curp = virt_to_page(p);*addrp = npages; /* store in lower bits */
while (npages--)
while (npages--) { *pagep++ = curp++;
*addrp++ |= addr;
addr += PAGE_SIZE;
}
}
p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL); if (!p) goto error;
if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
set_pages_array_wc(sgbuf->pages, sgbuf->count);
dmab->private_data = sgbuf; /* store the first page address for convenience */
- dmab->addr = snd_sgbuf_get_addr(dmab, 0);
dmab->addr = sgbuf->addrs[0] & PAGE_MASK; return p;
error:
@@ -787,10 +825,23 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) {
- struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
- if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
vunmap(dmab->area); __snd_dma_sg_fallback_free(dmab, dmab->private_data);set_pages_array_wb(sgbuf->pages, sgbuf->count);
}
+static dma_addr_t snd_dma_sg_fallback_get_addr(struct snd_dma_buffer *dmab,
size_t offset)
+{
- struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
- size_t index = offset >> PAGE_SHIFT;
- return (sgbuf->addrs[index] & PAGE_MASK) | (offset & ~PAGE_MASK);
+}
static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { @@ -805,8 +856,8 @@ static const struct snd_malloc_ops snd_dma_sg_fallback_ops = { .alloc = snd_dma_sg_fallback_alloc, .free = snd_dma_sg_fallback_free, .mmap = snd_dma_sg_fallback_mmap,
- .get_addr = snd_dma_sg_fallback_get_addr, /* reuse vmalloc helpers */
- .get_addr = snd_dma_vmalloc_get_addr, .get_page = snd_dma_vmalloc_get_page, .get_chunk_size = snd_dma_vmalloc_get_chunk_size,
};
2.35.3
participants (2)
-
Marek Marczykowski-Górecki
-
Takashi Iwai