From 56b5c0911b9f87e6308e92632fd58df7ad2719f6 Mon Sep 17 00:00:00 2001 From: Tobias Doerffel Date: Thu, 4 Dec 2008 19:59:23 +0000 Subject: [PATCH] BasicOps/X86: regenerated with latest GCC 4.4 - brings in a few more optimizations as well as bugfixes git-svn-id: https://lmms.svn.sf.net/svnroot/lmms/trunk/lmms@1869 0778d3d1-df1d-0410-868b-ea421aaaa00d --- ChangeLog | 8 ++ src/core/basic_ops_x86_64_sse.s | 8 +- src/core/basic_ops_x86_64_sse2.s | 14 ++-- src/core/basic_ops_x86_mmx.s | 2 +- src/core/basic_ops_x86_sse.s | 137 +++++++++++++++---------------- src/core/basic_ops_x86_sse2.s | 2 +- 6 files changed, 87 insertions(+), 84 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9331915c5..d052a7bb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2008-12-04 Tobias Doerffel + * src/core/basic_ops_x86_mmx.s: + * src/core/basic_ops_x86_sse.s: + * src/core/basic_ops_x86_sse2.s: + * src/core/basic_ops_x86_64_sse.s: + * src/core/basic_ops_x86_64_sse2.s: + regenerated with latest GCC 4.4 - brings in a few more optimizations + as well as bugfixes + * plugins/vst_base/remote_vst_plugin.cpp: explicitely declare updateSampleRate() and updateBufferSize() as virtual diff --git a/src/core/basic_ops_x86_64_sse.s b/src/core/basic_ops_x86_64_sse.s index 6c42a8f45..663d92fe5 100644 --- a/src/core/basic_ops_x86_64_sse.s +++ b/src/core/basic_ops_x86_64_sse.s @@ -9,11 +9,9 @@ alignedMemCpySSE: shrq $6, %rdx testl %edx, %edx jle .L4 - leal -1(%rdx), %r9d + subl $1, %edx xorl %eax, %eax - mov %r9d, %r8d - leaq 1(%r8), %rcx - movq %rcx, %rdx + addq $1, %rdx salq $6, %rdx .align 16 .L3: @@ -560,4 +558,4 @@ unalignedBufMixLRCoeffSSE: .byte 0x0 .align 8 .LEFDE15: - .ident "GCC: (GNU) 4.4.0 20081110 (experimental)" + .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" diff --git a/src/core/basic_ops_x86_64_sse2.s b/src/core/basic_ops_x86_64_sse2.s index 94fc2de6d..fa688a4ce 100644 --- a/src/core/basic_ops_x86_64_sse2.s +++ b/src/core/basic_ops_x86_64_sse2.s @@ -9,11 +9,9 @@ alignedMemCpySSE2: shrq $6, %rdx testl %edx, %edx jle .L4 - leal -1(%rdx), %r9d + subl $1, %edx xorl %eax, %eax - mov %r9d, %r8d - leaq 1(%r8), %rcx - movq %rcx, %rdx + addq $1, %rdx salq $6, %rdx .align 16 .L3: @@ -149,7 +147,8 @@ alignedConvertToS16SSE2: cmovg %esi, %ebx movzbl %bh, %ebp sall $8, %ebx - orl %ebp, %ebx + movl %ebp, %r9d + orl %r9d, %ebx movw %bx, (%rdx) cvttss2si %xmm1, %ebx cmpl $-32768, %ebx @@ -160,7 +159,8 @@ alignedConvertToS16SSE2: addq $8, %rcx movzbl %bh, %ebp sall $8, %ebx - orl %ebp, %ebx + movl %ebp, %r9d + orl %r9d, %ebx movw %bx, 2(%rdx) addq $4, %rdx cmpw %r8w, %ax @@ -392,4 +392,4 @@ alignedConvertToS16SSE2: .byte 0x2 .align 8 .LEFDE5: - .ident "GCC: (GNU) 4.4.0 20081110 (experimental)" + .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" diff --git a/src/core/basic_ops_x86_mmx.s b/src/core/basic_ops_x86_mmx.s index a1bdc3240..71f677407 100644 --- a/src/core/basic_ops_x86_mmx.s +++ b/src/core/basic_ops_x86_mmx.s @@ -103,5 +103,5 @@ movq %mm0, 56(%edx) emms ret .size alignedMemClearMMX, .-alignedMemClearMMX - .ident "GCC: (GNU) 4.4.0 20081110 (experimental)" + .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" .section .note.GNU-stack,"",@progbits diff --git a/src/core/basic_ops_x86_sse.s b/src/core/basic_ops_x86_sse.s index ab5d006e7..b2810e0f5 100644 --- a/src/core/basic_ops_x86_sse.s +++ b/src/core/basic_ops_x86_sse.s @@ -230,25 +230,25 @@ alignedBufWetDryMixSplittedSSE: pushl %edi pushl %esi pushl %ebx - subl $140, %esp - movl 180(%esp), %eax - movl 160(%esp), %edx - movl 164(%esp), %esi - movl 168(%esp), %ecx + subl $124, %esp + movl 164(%esp), %eax + movl 144(%esp), %edx + movl 148(%esp), %esi + movl 152(%esp), %ecx testl %eax, %eax - movss 172(%esp), %xmm4 - movss 176(%esp), %xmm5 + movss 156(%esp), %xmm4 + movss 160(%esp), %xmm5 jle .L39 - movl 180(%esp), %eax + movl 164(%esp), %eax subl $1, %eax shrl %eax addl $1, %eax movl %eax, %ebp - movl %eax, 112(%esp) + movl %eax, 96(%esp) shrl $2, %ebp - cmpl $3, 112(%esp) + cmpl $3, 96(%esp) leal 0(,%ebp,4), %eax - movl %eax, 116(%esp) + movl %eax, 100(%esp) jbe .L40 testl %eax, %eax jne .L34 @@ -264,11 +264,11 @@ alignedBufWetDryMixSplittedSSE: movaps %xmm5, %xmm1 movl %esi, %ebx shufps $0, %xmm1, %xmm1 - movaps %xmm2, 32(%esp) + movaps %xmm2, (%esp) xorl %eax, %eax xorl %edi, %edi - movss %xmm5, 124(%esp) - movss %xmm4, 120(%esp) + movss %xmm5, 108(%esp) + movss %xmm4, 104(%esp) movaps %xmm1, %xmm4 .p2align 4,,7 .p2align 3 @@ -276,87 +276,84 @@ alignedBufWetDryMixSplittedSSE: movaps 16(%edx,%eax,2), %xmm3 addl $1, %edi movaps (%edx,%eax,2), %xmm2 - movaps 48(%edx,%eax,2), %xmm0 - movaps %xmm2, %xmm5 - shufps $221, %xmm3, %xmm2 movaps 32(%edx,%eax,2), %xmm1 - shufps $136, %xmm3, %xmm5 - movaps %xmm2, 96(%esp) - movaps %xmm1, %xmm7 + movaps %xmm2, %xmm7 + shufps $221, %xmm3, %xmm2 + movaps 48(%edx,%eax,2), %xmm0 + shufps $136, %xmm3, %xmm7 + movaps %xmm2, 64(%esp) + movaps %xmm1, %xmm2 shufps $221, %xmm0, %xmm1 - shufps $136, %xmm0, %xmm7 - movaps %xmm1, 64(%esp) + shufps $136, %xmm0, %xmm2 movaps %xmm6, %xmm3 - movaps %xmm5, (%esp) - shufps $136, %xmm7, %xmm5 + movaps %xmm2, 48(%esp) movlps (%ebx), %xmm3 - movaps %xmm6, %xmm2 movhps 8(%ebx), %xmm3 - movaps %xmm7, 80(%esp) - movlps 16(%ebx), %xmm2 - movhps 24(%ebx), %xmm2 - movaps 96(%esp), %xmm7 - addl $32, %ebx + movaps %xmm7, %xmm5 movaps %xmm3, %xmm0 - shufps $221, %xmm2, %xmm3 + movaps %xmm6, %xmm2 + movlps 16(%ebx), %xmm2 + shufps $136, 48(%esp), %xmm5 + movhps 24(%ebx), %xmm2 shufps $136, %xmm2, %xmm0 - shufps $136, 64(%esp), %xmm7 - mulps 32(%esp), %xmm0 + addl $32, %ebx + mulps %xmm4, %xmm5 + shufps $221, %xmm2, %xmm3 + movaps %xmm1, 32(%esp) + mulps (%esp), %xmm0 movaps %xmm6, %xmm1 + shufps $221, 48(%esp), %xmm7 movlps (%ecx,%eax), %xmm1 movhps 8(%ecx,%eax), %xmm1 - movaps 96(%esp), %xmm2 + movaps 64(%esp), %xmm2 mulps %xmm4, %xmm7 - shufps $221, 64(%esp), %xmm2 - mulps %xmm4, %xmm5 - mulps 32(%esp), %xmm3 - movaps %xmm7, 16(%esp) - movaps %xmm1, %xmm7 addps %xmm0, %xmm5 movaps %xmm6, %xmm0 movlps 16(%ecx,%eax), %xmm0 movhps 24(%ecx,%eax), %xmm0 - shufps $136, %xmm0, %xmm7 - shufps $221, %xmm0, %xmm1 - mulps 32(%esp), %xmm7 - mulps 32(%esp), %xmm1 + shufps $221, 32(%esp), %xmm2 + movaps %xmm5, 16(%esp) + movaps 64(%esp), %xmm5 mulps %xmm4, %xmm2 - movaps %xmm7, 48(%esp) - movaps 16(%esp), %xmm7 - addps 48(%esp), %xmm7 - addps %xmm1, %xmm2 - movaps %xmm7, 16(%esp) - movaps (%esp), %xmm7 - shufps $221, 80(%esp), %xmm7 - movaps 16(%esp), %xmm1 - mulps %xmm4, %xmm7 - movaps 16(%esp), %xmm0 - unpckhps %xmm2, %xmm1 - unpcklps %xmm2, %xmm0 - movaps %xmm1, %xmm2 + shufps $136, 32(%esp), %xmm5 + mulps (%esp), %xmm3 + mulps %xmm4, %xmm5 addps %xmm3, %xmm7 - movaps %xmm5, %xmm3 + movaps 16(%esp), %xmm3 + movaps %xmm5, 80(%esp) + movaps %xmm1, %xmm5 + shufps $221, %xmm0, %xmm1 + shufps $136, %xmm0, %xmm5 + mulps (%esp), %xmm1 unpcklps %xmm7, %xmm3 - unpckhps %xmm7, %xmm5 + mulps (%esp), %xmm5 + movaps 16(%esp), %xmm0 + addps %xmm1, %xmm2 movaps %xmm3, %xmm1 - unpckhps %xmm0, %xmm3 - unpcklps %xmm0, %xmm1 + addps 80(%esp), %xmm5 + unpckhps %xmm7, %xmm0 + movaps %xmm0, %xmm7 movaps %xmm5, %xmm0 - unpckhps %xmm2, %xmm5 unpcklps %xmm2, %xmm0 + unpckhps %xmm2, %xmm5 + unpcklps %xmm0, %xmm1 + unpckhps %xmm0, %xmm3 + movaps %xmm7, %xmm0 + unpckhps %xmm5, %xmm7 + unpcklps %xmm5, %xmm0 movaps %xmm1, (%edx,%eax,2) movaps %xmm3, 16(%edx,%eax,2) movaps %xmm0, 32(%edx,%eax,2) - movaps %xmm5, 48(%edx,%eax,2) + movaps %xmm7, 48(%edx,%eax,2) addl $32, %eax cmpl %edi, %ebp ja .L37 - movl 116(%esp), %edi - movl 112(%esp), %eax - movss 120(%esp), %xmm4 - movss 124(%esp), %xmm5 + movl 100(%esp), %edi + movl 96(%esp), %eax + movss 104(%esp), %xmm4 + movss 108(%esp), %xmm5 addl %edi, %edi - cmpl %eax, 116(%esp) + cmpl %eax, 100(%esp) je .L39 .L36: leal (%edx,%edi,8), %ebx @@ -394,10 +391,10 @@ alignedBufWetDryMixSplittedSSE: addss %xmm1, %xmm0 movss %xmm0, 4(%edx) addl $16, %edx - cmpl %eax, 180(%esp) + cmpl %eax, 164(%esp) jg .L38 .L39: - addl $140, %esp + addl $124, %esp popl %ebx popl %esi popl %edi @@ -501,5 +498,5 @@ unalignedBufMixLRCoeffSSE: addl $8, %eax jmp .L44 .size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE - .ident "GCC: (GNU) 4.4.0 20081110 (experimental)" + .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" .section .note.GNU-stack,"",@progbits diff --git a/src/core/basic_ops_x86_sse2.s b/src/core/basic_ops_x86_sse2.s index c575a150f..f44e65a92 100644 --- a/src/core/basic_ops_x86_sse2.s +++ b/src/core/basic_ops_x86_sse2.s @@ -345,5 +345,5 @@ alignedConvertToS16SSE2: .long 65280 .long 65280 .long 65280 - .ident "GCC: (GNU) 4.4.0 20081110 (experimental)" + .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" .section .note.GNU-stack,"",@progbits