Hi Takashi,
I resubmitted the patch (V2) with some modifications: - I split the changes so you will not have to apply all of them; - the 4 first patches are the ones for safety, and barely produce the same binary output some I am highly confident in the fact they do not need testing; - the 2 last patches are small changes that can help the compiler producing a better/smaller code. They have not been tested yet. Have you any ready to go benchmark to test with? - the patches work both on 32- and 64bit version.
To compare the binary output, I used objdump and diff on libpcm.a. I compared the master with each patch with both 32 and 64 versions.
Hope it will help. Regards, Frédéric
De: "tiwai" tiwai@suse.de À: "frederic recoules" frederic.recoules@univ-grenoble-alpes.fr Cc: "alsa-devel" alsa-devel@alsa-project.org, "frederic recoules" frederic.recoules@orange.fr Envoyé: Mercredi 29 Avril 2020 10:19:11 Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
On Mon, 27 Apr 2020 18:57:07 +0200, frederic.recoules@univ-grenoble-alpes.fr wrote:
From: Frédéric Recoules frederic.recoules@orange.fr
Enabler change:
- set HAVE_MMX in configure if the compiler is aware of MMX technology
Main changes are:
- move 'size' and 'old_ebx' to the output list since they are clobbered
- add the "memory" keyword since input pointers are dereferenced
- add mmx registers in the clobber list and add an initialization for mm1
- add ebx in clobbers via a set of macro when GCC is newer than 5.0
(it will work for other compilers or non-PIC mode too)
Minor changes are:
- keep consistent the token numbering in the template
- remove the manual save/restore ebx when it is in the clobber list
- allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates
- allows 'size' to be given by register (e.g. ebp)
- add "cc" keyword since the eflag register is clobbered
Signed-off-by: Frédéric Recoules frederic.recoules@orange.fr
Thanks. Now I confirmed that the build test passed.
But before merging the patch: have you actually tested the code on your machine and confirmed that it keeps working? If you've tested, it'd be helpful to mention about the test coverage, it's a good measure for judgment. Since it handles different compile flavors (with and without MMX support), I'd like to know whether it's actually tested.
BTW, when you resubmit a patch with the correction, please put "v2" or whatever the revision in the subject to tell from the earlier patches. Also, it'd be appreciated to list up summaries of changes between v1 and v2 in the patch description or after the separator line ("---") if you don't need to include that in the git commit log.
thanks,
Takashi
configure.ac | 7 ++ src/pcm/pcm_dmix_i386.h | 174 +++++++++++++++++++++++----------------- 2 files changed, 106 insertions(+), 75 deletions(-)
diff --git a/configure.ac b/configure.ac index 4b5ab662..1838e50b 100644 --- a/configure.ac +++ b/configure.ac @@ -516,6 +516,13 @@ if test -z "$gcc_have_atomics"; then fi AC_MSG_RESULT($gcc_have_atomics)
+dnl check mmx register for pcm_dmix_i386
+AC_TRY_LINK([],
- [__asm__ volatile ("" : : : "mm0");],
- [AC_DEFINE([HAVE_MMX], "1", [MMX technology is enabled])],
- [])
PCM_PLUGIN_LIST="copy linear route mulaw alaw adpcm rate plug multi shm file null empty share meter hooks lfloat ladspa dmix dshare dsnoop asym iec958 softvol extplug ioplug mmap_emul"
build_pcm_plugin="no" diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h index 2778cb1d..aa1717f6 100644 --- a/src/pcm/pcm_dmix_i386.h +++ b/src/pcm/pcm_dmix_i386.h @@ -26,6 +26,13 @@
*/
+#define COMMA , +#if __GNUC__ < 5 && defined(__PIC__) +# define GCC_PIC_SWITCH(before,after) before +#else +# define GCC_PIC_SWITCH(before,after) after +#endif
/*
- for plain i386
*/ @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size, __asm__ __volatile__ ( "\n"
- "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
/*
- initialization, load ESI, EDI, EBX registers
*/
- "\tmovl %1, %%edi\n"
- "\tmovl %2, %%esi\n"
- "\tmovl %3, %%ebx\n"
- "\tmovl %2, %%edi\n"
- "\tmovl %3, %%esi\n"
- "\tmovl %4, %%ebx\n"
"\tcmpl $0, %0\n" "\tjnz 2f\n" "\tjmp 7f\n" @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size, */ "\t.p2align 4,,15\n" "1:"
- "\tadd %4, %%edi\n"
- "\tadd %5, %%esi\n"
- "\tadd %6, %%ebx\n"
- "\tadd %5, %%edi\n"
- "\tadd %6, %%esi\n"
- "\tadd %7, %%ebx\n"
/*
- sample = *src;
@@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size, "\tjnz 4b\n" "\tdecl %0\n" "\tjnz 1b\n"
"7:"
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
: /* no output regs */
: "m" (size), "m" (dst), "m" (src),
"m" (sum), "m" (dst_step), "m" (src_step),
"m" (sum_step), "m" (old_ebx)
: "esi", "edi", "edx", "ecx", "eax"
- "7:"
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
- : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
- : "m" (dst), "m" (src), "m" (sum),
- "im" (dst_step), "im" (src_step), "im" (sum_step)
- : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
- "memory", "cc"
); }
@@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size, __asm__ __volatile__ ( "\n"
- "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
/*
- initialization, load ESI, EDI, EBX registers
- initialization, load ESI, EDI, EBX registers, clear MM1
*/
- "\tmovl %1, %%edi\n"
- "\tmovl %2, %%esi\n"
- "\tmovl %3, %%ebx\n"
- "\tpxor %%mm1, %%mm1\n"
- "\tmovl %2, %%edi\n"
- "\tmovl %3, %%esi\n"
- "\tmovl %4, %%ebx\n"
"\tcmpl $0, %0\n" "\tjnz 2f\n" "\tjmp 5f\n"
"\t.p2align 4,,15\n" "1:"
- "\tadd %4, %%edi\n"
- "\tadd %5, %%esi\n"
- "\tadd %6, %%ebx\n"
- "\tadd %5, %%edi\n"
- "\tadd %6, %%esi\n"
- "\tadd %7, %%ebx\n"
"2:" /* @@ -230,13 +241,20 @@ static void MIX_AREAS_16_MMX(unsigned int size, "\tjnz 1b\n" "\temms\n" "5:"
- "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
- : /* no output regs */
- : "m" (size), "m" (dst), "m" (src),
- "m" (sum), "m" (dst_step), "m" (src_step),
- "m" (sum_step), "m" (old_ebx)
- : "esi", "edi", "edx", "ecx", "eax"
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
- : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
- : "m" (dst), "m" (src), "m" (sum),
- "im" (dst_step), "im" (src_step), "im" (sum_step)
- : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
- "memory", "cc",
+# ifdef HAVE_MMX
- "mm0", "mm1"
+# else
- "st", "st(1)", "st(2)", "st(3)",
- "st(4)", "st(5)", "st(6)", "st(7)"
+# endif ); }
@@ -261,13 +279,14 @@ static void MIX_AREAS_32(unsigned int size, __asm__ __volatile__ ( "\n"
- "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
/*
- initialization, load ESI, EDI, EBX registers
*/
- "\tmovl %1, %%edi\n"
- "\tmovl %2, %%esi\n"
- "\tmovl %3, %%ebx\n"
- "\tmovl %2, %%edi\n"
- "\tmovl %3, %%esi\n"
- "\tmovl %4, %%ebx\n"
"\tcmpl $0, %0\n" "\tjnz 1f\n" "\tjmp 6f\n" @@ -337,19 +356,20 @@ static void MIX_AREAS_32(unsigned int size, */ "\tdecl %0\n" "\tjz 6f\n"
- "\tadd %4, %%edi\n"
- "\tadd %5, %%esi\n"
- "\tadd %6, %%ebx\n"
- "\tadd %5, %%edi\n"
- "\tadd %6, %%esi\n"
- "\tadd %7, %%ebx\n"
"\tjmp 1b\n"
"6:"
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
: /* no output regs */
: "m" (size), "m" (dst), "m" (src),
"m" (sum), "m" (dst_step), "m" (src_step),
"m" (sum_step), "m" (old_ebx)
: "esi", "edi", "edx", "ecx", "eax"
- "6:"
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
- : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
- : "m" (dst), "m" (src), "m" (sum),
- "im" (dst_step), "im" (src_step), "im" (sum_step)
- : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
- "memory", "cc"
); }
@@ -374,13 +394,14 @@ static void MIX_AREAS_24(unsigned int size, __asm__ __volatile__ ( "\n"
- "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
/*
- initialization, load ESI, EDI, EBX registers
*/
- "\tmovl %1, %%edi\n"
- "\tmovl %2, %%esi\n"
- "\tmovl %3, %%ebx\n"
- "\tmovl %2, %%edi\n"
- "\tmovl %3, %%esi\n"
- "\tmovl %4, %%ebx\n"
"\tcmpl $0, %0\n" "\tjnz 1f\n" "\tjmp 6f\n" @@ -443,19 +464,20 @@ static void MIX_AREAS_24(unsigned int size, */ "\tdecl %0\n" "\tjz 6f\n"
- "\tadd %4, %%edi\n"
- "\tadd %5, %%esi\n"
- "\tadd %6, %%ebx\n"
- "\tadd %5, %%edi\n"
- "\tadd %6, %%esi\n"
- "\tadd %7, %%ebx\n"
"\tjmp 1b\n"
"6:"
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
: /* no output regs */
: "m" (size), "m" (dst), "m" (src),
"m" (sum), "m" (dst_step), "m" (src_step),
"m" (sum_step), "m" (old_ebx)
: "esi", "edi", "edx", "ecx", "eax"
- "6:"
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
- : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
- : "m" (dst), "m" (src), "m" (sum),
- "im" (dst_step), "im" (src_step), "im" (sum_step)
- : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
- "memory", "cc"
); }
@@ -480,13 +502,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size, __asm__ __volatile__ ( "\n"
- "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
/*
- initialization, load ESI, EDI, EBX registers
*/
- "\tmovl %1, %%edi\n"
- "\tmovl %2, %%esi\n"
- "\tmovl %3, %%ebx\n"
- "\tmovl %2, %%edi\n"
- "\tmovl %3, %%esi\n"
- "\tmovl %4, %%ebx\n"
"\tcmpl $0, %0\n" "\tjz 6f\n"
@@ -541,19 +564,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size, /*
- while (size-- > 0)
*/
- "\tadd %4, %%edi\n"
- "\tadd %5, %%esi\n"
- "\tadd %6, %%ebx\n"
- "\tadd %5, %%edi\n"
- "\tadd %6, %%esi\n"
- "\tadd %7, %%ebx\n"
"\tdecl %0\n" "\tjnz 1b\n"
"6:"
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
: /* no output regs */
: "m" (size), "m" (dst), "m" (src),
"m" (sum), "m" (dst_step), "m" (src_step),
"m" (sum_step), "m" (old_ebx)
: "esi", "edi", "edx", "ecx", "eax"
- "6:"
- /* ebx is GOT pointer (-fPIC) */
- GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
- : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
- : "m" (dst), "m" (src), "m" (sum),
- "im" (dst_step), "im" (src_step), "im" (sum_step)
- : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
- "memory", "cc"
); } -- 2.17.1