diff --git a/plugins/midi_import/midi_import.cpp b/plugins/midi_import/midi_import.cpp index 964503403..e5ad69c6a 100644 --- a/plugins/midi_import/midi_import.cpp +++ b/plugins/midi_import/midi_import.cpp @@ -1,7 +1,7 @@ /* * midi_import.cpp - support for importing MIDI-files * - * Copyright (c) 2005-2008 Tobias Doerffel + * Copyright (c) 2005-2009 Tobias Doerffel * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "midi_import.h" @@ -92,6 +93,18 @@ bool midiImport::tryImport( trackContainer * _tc ) { return( FALSE ); } + if( engine::hasGUI() && + configManager::inst()->defaultSoundfont().isEmpty() ) + { + QMessageBox::information( engine::getMainWindow(), + tr( "Setup incomplete" ), + tr( "You do not have set up a default soundfont in " + "the settings dialog (Edit->Settings). " + "Therefore no sound will be played back after " + "importing this MIDI file. You should download " + "a General MIDI soundfont, specify it in " + "settings dialog and try again." ) ); + } switch( readID() ) { @@ -201,7 +214,7 @@ public: { isSF2 = true; it_inst->loadFile( configManager::inst()->defaultSoundfont() ); - it_inst->getChildModel( "bank" )->setValue( 128 ); + it_inst->getChildModel( "bank" )->setValue( 0 ); it_inst->getChildModel( "patch" )->setValue( 0 ); } else @@ -471,51 +484,6 @@ bool midiImport::readSMF( trackContainer * _tc ) } return true; - - - /* - // the curren position is immediately after the "MThd" id - int header_len = readInt( 4 ); - if( header_len < 6 ) - { -invalid_format: - printf( "midiImport::readSMF(): invalid file format\n" ); - return( FALSE ); - } - - int type = readInt( 2 ); - if( type != 0 && type != 1 ) - { - printf( "midiImport::readSMF(): type %d format is not " - "supported\n", type ); - return( FALSE ); - } - - int num_tracks = readInt( 2 ); - if( num_tracks < 1 || num_tracks > 1000 ) - { - printf( "midiImport::readSMF(): invalid number of tracks (%d)\n", - num_tracks ); - num_tracks = 0; - return( FALSE ); - } - -#ifdef LMMS_DEBUG - printf( "tracks: %d\n", num_tracks ); -#endif - - m_timingDivision = readInt( 2 ); - if( m_timingDivision < 0 ) - { - goto invalid_format; - } -#ifdef LMMS_DEBUG - printf( "time-division: %d\n", m_timingDivision ); -#endif - - - */ - } diff --git a/src/core/basic_ops_x86_64_sse.s b/src/core/basic_ops_x86_64_sse.s index 663d92fe5..0cd43e3e3 100644 --- a/src/core/basic_ops_x86_64_sse.s +++ b/src/core/basic_ops_x86_64_sse.s @@ -36,14 +36,14 @@ alignedMemCpySSE: .type alignedMemClearSSE, @function alignedMemClearSSE: .LFB510: - movslq %esi,%rax - shrq $6, %rax - testl %eax, %eax + movslq %esi,%rsi + shrq $6, %rsi + testl %esi, %esi jle .L10 - subl $1, %eax + subl $1, %esi xorps %xmm0, %xmm0 - salq $6, %rax - leaq 64(%rax,%rdi), %rax + salq $6, %rsi + leaq 64(%rdi,%rsi), %rax .align 16 .L9: movaps %xmm0, (%rdi) @@ -65,23 +65,23 @@ alignedBufApplyGainSSE: .LFB511: testl %esi, %esi jle .L15 - subl $1, %esi + leal -1(%rsi), %edx shufps $0, %xmm0, %xmm0 - shrl $3, %esi xorl %eax, %eax - leal 1(%rsi), %edx + shrl $3, %edx + addl $1, %edx .align 16 .L14: - movaps %xmm0, %xmm3 + movaps 16(%rdi), %xmm3 addl $1, %eax - movaps %xmm0, %xmm2 - movaps %xmm0, %xmm1 - movaps %xmm0, %xmm4 - mulps 16(%rdi), %xmm3 - mulps 32(%rdi), %xmm2 - mulps 48(%rdi), %xmm1 - mulps (%rdi), %xmm4 + movaps 32(%rdi), %xmm2 + mulps %xmm0, %xmm3 + movaps 48(%rdi), %xmm1 + mulps %xmm0, %xmm2 + movaps (%rdi), %xmm4 + mulps %xmm0, %xmm1 movaps %xmm3, 16(%rdi) + mulps %xmm0, %xmm4 movaps %xmm2, 32(%rdi) movaps %xmm1, 48(%rdi) movaps %xmm4, (%rdi) @@ -100,11 +100,11 @@ alignedBufMixSSE: .LFB512: testl %edx, %edx jle .L20 - subl $1, %edx + leal -1(%rdx), %ecx xorl %eax, %eax - shrl $3, %edx - leal 1(%rdx), %ecx xorl %edx, %edx + shrl $3, %ecx + addl $1, %ecx .align 16 .L19: movaps 16(%rdi,%rax), %xmm2 @@ -136,23 +136,23 @@ alignedBufMixLRCoeffSSE: testl %edx, %edx jle .L25 unpcklps %xmm1, %xmm0 - subl $1, %edx - shrl $2, %edx + leal -1(%rdx), %ecx xorl %eax, %eax - leal 1(%rdx), %ecx xorl %edx, %edx + shrl $2, %ecx movlhps %xmm0, %xmm0 + addl $1, %ecx .align 16 .L24: - movaps %xmm0, %xmm1 + movaps 16(%rsi,%rax), %xmm2 addl $1, %edx - movaps %xmm0, %xmm2 - mulps 16(%rsi,%rax), %xmm1 - mulps (%rsi,%rax), %xmm2 - addps 16(%rdi,%rax), %xmm1 - addps (%rdi,%rax), %xmm2 - movaps %xmm1, 16(%rdi,%rax) - movaps %xmm2, (%rdi,%rax) + movaps (%rsi,%rax), %xmm3 + mulps %xmm0, %xmm2 + mulps %xmm0, %xmm3 + addps 16(%rdi,%rax), %xmm2 + addps (%rdi,%rax), %xmm3 + movaps %xmm2, 16(%rdi,%rax) + movaps %xmm3, (%rdi,%rax) addq $32, %rax cmpl %edx, %ecx ja .L24 @@ -168,25 +168,25 @@ alignedBufWetDryMixSSE: .LFB515: testl %edx, %edx jle .L30 - subl $1, %edx + leal -1(%rdx), %ecx shufps $0, %xmm1, %xmm1 shufps $0, %xmm0, %xmm0 - shrl $2, %edx - leal 1(%rdx), %ecx xorl %eax, %eax + shrl $2, %ecx xorl %edx, %edx + addl $1, %ecx .align 16 .L29: - movaps %xmm1, %xmm3 + movaps 16(%rsi,%rax), %xmm3 addl $1, %edx - movaps %xmm0, %xmm2 - mulps 16(%rdi,%rax), %xmm3 - movaps %xmm1, %xmm4 - mulps 16(%rsi,%rax), %xmm2 - mulps (%rdi,%rax), %xmm4 + movaps 16(%rdi,%rax), %xmm2 + mulps %xmm0, %xmm3 + movaps (%rsi,%rax), %xmm4 + mulps %xmm1, %xmm2 + mulps %xmm0, %xmm4 addps %xmm3, %xmm2 - movaps %xmm0, %xmm3 - mulps (%rsi,%rax), %xmm3 + movaps (%rdi,%rax), %xmm3 + mulps %xmm1, %xmm3 movaps %xmm2, 16(%rdi,%rax) addps %xmm4, %xmm3 movaps %xmm3, (%rdi,%rax) @@ -226,84 +226,80 @@ alignedBufWetDryMixSplittedSSE: .L34: movaps %xmm1, %xmm2 movq %rdi, %rax - xorps %xmm6, %xmm6 + xorps %xmm10, %xmm10 movq %rsi, %r9 shufps $0, %xmm2, %xmm2 movq %rdx, %r8 xorl %r10d, %r10d - movaps %xmm2, %xmm8 + movaps %xmm2, %xmm12 movaps %xmm0, %xmm2 shufps $0, %xmm2, %xmm2 - movaps %xmm2, %xmm7 + movaps %xmm2, %xmm11 .align 16 .L37: - movaps (%rax), %xmm12 + movaps (%rax), %xmm2 addl $1, %r10d - movaps %xmm6, %xmm3 + movaps %xmm10, %xmm9 movaps 16(%rax), %xmm5 - movaps %xmm12, %xmm14 - movlps (%r8), %xmm3 - movaps 32(%rax), %xmm9 - shufps $136, %xmm5, %xmm14 - shufps $221, %xmm5, %xmm12 - movhps 8(%r8), %xmm3 - movaps 48(%rax), %xmm4 - movaps %xmm9, %xmm13 - movaps %xmm6, %xmm5 - shufps $221, %xmm4, %xmm9 - movlps (%r9), %xmm5 - shufps $136, %xmm4, %xmm13 - movaps %xmm6, %xmm4 - movhps 8(%r9), %xmm5 - movaps %xmm14, %xmm11 - movlps 16(%r9), %xmm4 - movaps %xmm12, %xmm15 - movaps %xmm5, %xmm2 - movhps 24(%r9), %xmm4 - shufps $136, %xmm13, %xmm11 - movaps %xmm3, %xmm10 + movaps %xmm2, %xmm4 + movlps (%r9), %xmm9 + movaps %xmm10, %xmm8 + movaps 32(%rax), %xmm14 + shufps $136, %xmm5, %xmm4 + movhps 8(%r9), %xmm9 + movaps 48(%rax), %xmm3 + movaps %xmm14, %xmm15 + movlps 16(%r9), %xmm8 + shufps $221, %xmm5, %xmm2 + shufps $136, %xmm3, %xmm15 + movhps 24(%r9), %xmm8 + shufps $221, %xmm3, %xmm14 + movaps %xmm4, %xmm5 addq $32, %r9 - shufps $136, %xmm4, %xmm2 - mulps %xmm8, %xmm11 - mulps %xmm7, %xmm2 - shufps $221, %xmm13, %xmm14 - shufps $136, %xmm9, %xmm15 - shufps $221, %xmm4, %xmm5 - addps %xmm2, %xmm11 - movaps %xmm6, %xmm2 - shufps $221, %xmm9, %xmm12 - movlps 16(%r8), %xmm2 - mulps %xmm8, %xmm14 - movhps 24(%r8), %xmm2 - mulps %xmm7, %xmm5 - movaps %xmm11, %xmm9 + movaps %xmm9, %xmm3 + shufps $136, %xmm15, %xmm5 + movaps %xmm10, %xmm7 + shufps $136, %xmm8, %xmm3 + movlps (%r8), %xmm7 + movaps %xmm10, %xmm6 + mulps %xmm12, %xmm5 + movhps 8(%r8), %xmm7 + mulps %xmm11, %xmm3 + movlps 16(%r8), %xmm6 + movaps %xmm7, %xmm13 + movhps 24(%r8), %xmm6 + shufps $221, %xmm15, %xmm4 + shufps $221, %xmm8, %xmm9 addq $32, %r8 - shufps $136, %xmm2, %xmm10 - shufps $221, %xmm2, %xmm3 - movaps %xmm14, %xmm4 - mulps %xmm8, %xmm15 - addps %xmm5, %xmm4 - mulps %xmm7, %xmm10 - movaps %xmm11, %xmm5 - mulps %xmm8, %xmm12 - mulps %xmm7, %xmm3 - addps %xmm15, %xmm10 - unpcklps %xmm4, %xmm9 - movaps %xmm12, %xmm2 + shufps $136, %xmm6, %xmm13 + addps %xmm3, %xmm5 + movaps %xmm2, %xmm3 + shufps $221, %xmm6, %xmm7 + shufps $136, %xmm14, %xmm3 + shufps $221, %xmm14, %xmm2 + mulps %xmm11, %xmm13 + movaps %xmm5, %xmm6 + mulps %xmm12, %xmm3 + mulps %xmm12, %xmm4 + mulps %xmm11, %xmm9 + addps %xmm13, %xmm3 + mulps %xmm12, %xmm2 + mulps %xmm11, %xmm7 + addps %xmm9, %xmm4 + addps %xmm7, %xmm2 + unpcklps %xmm4, %xmm6 unpckhps %xmm4, %xmm5 - addps %xmm3, %xmm2 - movaps %xmm10, %xmm4 - movaps %xmm10, %xmm3 + movaps %xmm3, %xmm4 unpcklps %xmm2, %xmm4 unpckhps %xmm2, %xmm3 - movaps %xmm9, %xmm2 + movaps %xmm6, %xmm2 unpcklps %xmm4, %xmm2 - unpckhps %xmm4, %xmm9 + unpckhps %xmm4, %xmm6 movaps %xmm2, (%rax) movaps %xmm5, %xmm2 unpckhps %xmm3, %xmm5 unpcklps %xmm3, %xmm2 - movaps %xmm9, 16(%rax) + movaps %xmm6, 16(%rax) movaps %xmm2, 32(%rax) movaps %xmm5, 48(%rax) addq $64, %rax @@ -325,34 +321,34 @@ alignedBufWetDryMixSplittedSSE: addq %rbx, %rdx .align 16 .L38: - movaps %xmm1, %xmm3 + movss (%r11), %xmm3 addl $2, %r9d - movaps %xmm0, %xmm2 - mulss (%r8), %xmm3 - mulss (%r11), %xmm2 + movss (%r8), %xmm2 + mulss %xmm0, %xmm3 + mulss %xmm1, %xmm2 addq $8, %r11 addss %xmm3, %xmm2 - movaps %xmm1, %xmm3 - mulss 4(%r8), %xmm3 movss %xmm2, (%r8) - movaps %xmm0, %xmm2 - mulss (%r10), %xmm2 + movss 4(%r8), %xmm2 + movss (%r10), %xmm3 + mulss %xmm1, %xmm2 addq $8, %r10 + mulss %xmm0, %xmm3 addss %xmm3, %xmm2 - movaps %xmm1, %xmm3 movss %xmm2, 4(%r8) - movaps %xmm0, %xmm2 addq $16, %r8 - mulss (%rax), %xmm3 - mulss (%rsi), %xmm2 + movss (%rsi), %xmm3 addq $8, %rsi + movss (%rax), %xmm2 + mulss %xmm0, %xmm3 + mulss %xmm1, %xmm2 addss %xmm3, %xmm2 - movaps %xmm1, %xmm3 - mulss 4(%rax), %xmm3 movss %xmm2, (%rax) - movaps %xmm0, %xmm2 - mulss (%rdx), %xmm2 + movss 4(%rax), %xmm2 + movss (%rdx), %xmm3 + mulss %xmm1, %xmm2 addq $8, %rdx + mulss %xmm0, %xmm3 addss %xmm3, %xmm2 movss %xmm2, 4(%rax) addq $16, %rax @@ -369,11 +365,11 @@ alignedBufWetDryMixSplittedSSE: .type unalignedBufMixLRCoeffSSE, @function unalignedBufMixLRCoeffSSE: .LFB514: - movl %edx, %eax - shrl $31, %eax - leal (%rdx,%rax), %ecx - andl $1, %ecx - cmpl %eax, %ecx + movl %edx, %ecx + shrl $31, %ecx + leal (%rdx,%rcx), %eax + andl $1, %eax + cmpl %ecx, %eax jne .L52 .L44: testl %edx, %edx @@ -413,20 +409,20 @@ unalignedBufMixLRCoeffSSE: salq $4, %rdx .align 16 .L48: - movaps %xmm0, %xmm2 - mulss (%rsi,%rax), %xmm2 + movss (%rsi,%rax), %xmm2 + mulss %xmm0, %xmm2 addss (%rdi,%rax), %xmm2 movss %xmm2, (%rdi,%rax) - movaps %xmm1, %xmm2 - mulss 4(%rsi,%rax), %xmm2 + movss 4(%rsi,%rax), %xmm2 + mulss %xmm1, %xmm2 addss 4(%rdi,%rax), %xmm2 movss %xmm2, 4(%rdi,%rax) - movaps %xmm0, %xmm2 - mulss 8(%rsi,%rax), %xmm2 + movss 8(%rsi,%rax), %xmm2 + mulss %xmm0, %xmm2 addss 8(%rdi,%rax), %xmm2 movss %xmm2, 8(%rdi,%rax) - movaps %xmm1, %xmm2 - mulss 12(%rsi,%rax), %xmm2 + movss 12(%rsi,%rax), %xmm2 + mulss %xmm1, %xmm2 addss 12(%rdi,%rax), %xmm2 movss %xmm2, 12(%rdi,%rax) addq $16, %rax @@ -436,17 +432,15 @@ unalignedBufMixLRCoeffSSE: rep ret .L52: - movaps %xmm0, %xmm2 + movss (%rsi), %xmm2 subl $1, %edx - movss (%rdi), %xmm3 - mulss (%rsi), %xmm2 - addss %xmm3, %xmm2 - movss 4(%rdi), %xmm3 + mulss %xmm0, %xmm2 + addss (%rdi), %xmm2 movss %xmm2, (%rdi) - movaps %xmm1, %xmm2 - mulss 4(%rsi), %xmm2 + movss 4(%rsi), %xmm2 addq $8, %rsi - addss %xmm3, %xmm2 + mulss %xmm1, %xmm2 + addss 4(%rdi), %xmm2 movss %xmm2, 4(%rdi) addq $8, %rdi jmp .L44 @@ -558,4 +552,4 @@ unalignedBufMixLRCoeffSSE: .byte 0x0 .align 8 .LEFDE15: - .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" + .ident "GCC: (GNU) 4.4.0 20090304 (experimental)" diff --git a/src/core/basic_ops_x86_64_sse2.s b/src/core/basic_ops_x86_64_sse2.s index fa688a4ce..78ac365f1 100644 --- a/src/core/basic_ops_x86_64_sse2.s +++ b/src/core/basic_ops_x86_64_sse2.s @@ -36,14 +36,14 @@ alignedMemCpySSE2: .type alignedMemClearSSE2, @function alignedMemClearSSE2: .LFB510: - movslq %esi,%rax - shrq $6, %rax - testl %eax, %eax + movslq %esi,%rsi + shrq $6, %rsi + testl %esi, %esi jle .L10 - subl $1, %eax + subl $1, %esi pxor %xmm0, %xmm0 - salq $6, %rax - leaq 64(%rax,%rdi), %rax + salq $6, %rsi + leaq 64(%rdi,%rsi), %rax .align 16 .L9: movdqa %xmm0, (%rdi) @@ -89,11 +89,11 @@ alignedConvertToS16SSE2: movl $-32768, %edi .align 16 .L25: - movaps %xmm0, %xmm1 - mulss (%rcx), %xmm1 + movss (%rcx), %xmm1 + mulss %xmm0, %xmm1 cvttss2si %xmm1, %esi - movaps %xmm0, %xmm1 - mulss 4(%rcx), %xmm1 + movss 4(%rcx), %xmm1 + mulss %xmm0, %xmm1 cmpl $-32768, %esi cmovl %edi, %esi cmpl $32767, %esi @@ -136,11 +136,11 @@ alignedConvertToS16SSE2: movl $32767, %esi .align 16 .L20: - movaps %xmm0, %xmm1 - mulss (%rcx), %xmm1 + movss (%rcx), %xmm1 + mulss %xmm0, %xmm1 cvttss2si %xmm1, %ebx - movaps %xmm0, %xmm1 - mulss 4(%rcx), %xmm1 + movss 4(%rcx), %xmm1 + mulss %xmm0, %xmm1 cmpl $-32768, %ebx cmovl %edi, %ebx cmpl $32767, %ebx @@ -176,71 +176,71 @@ alignedConvertToS16SSE2: je .L27 movaps %xmm0, %xmm1 movq %rdi, %rcx - movdqa .LC1(%rip), %xmm2 + movdqa .LC1(%rip), %xmm4 movq %rsi, %r10 shufps $0, %xmm1, %xmm1 xorl %r9d, %r9d - movdqa .LC3(%rip), %xmm8 + movdqa .LC2(%rip), %xmm3 movaps %xmm1, %xmm9 - movdqa .LC2(%rip), %xmm1 + movdqa .LC3(%rip), %xmm8 .align 16 .L19: - movaps %xmm9, %xmm4 + movaps (%rcx), %xmm1 addl $1, %r9d - movaps %xmm9, %xmm3 - mulps (%rcx), %xmm4 - movdqa %xmm1, %xmm6 - mulps 16(%rcx), %xmm3 + movdqa %xmm3, %xmm5 + mulps %xmm9, %xmm1 + movaps 16(%rcx), %xmm6 + movdqa %xmm3, %xmm7 addq $32, %rcx - cvttps2dq %xmm4, %xmm4 - movdqa %xmm4, %xmm5 - pcmpgtd %xmm2, %xmm5 - cvttps2dq %xmm3, %xmm3 - pand %xmm5, %xmm4 - pandn %xmm2, %xmm5 - por %xmm5, %xmm4 - movdqa %xmm4, %xmm5 - pcmpgtd %xmm1, %xmm5 - pand %xmm5, %xmm6 - pandn %xmm4, %xmm5 - movdqa %xmm5, %xmm4 - movdqa %xmm3, %xmm5 - por %xmm6, %xmm4 - movdqa %xmm1, %xmm6 - pcmpgtd %xmm2, %xmm5 - pand %xmm5, %xmm3 - pandn %xmm2, %xmm5 - movdqa %xmm4, %xmm7 - pslld $8, %xmm4 - pand %xmm8, %xmm7 - por %xmm5, %xmm3 - psrad $8, %xmm7 - movdqa %xmm3, %xmm5 - pcmpgtd %xmm1, %xmm5 - pand %xmm5, %xmm6 - pandn %xmm3, %xmm5 - movdqa %xmm5, %xmm3 - por %xmm6, %xmm3 - movdqa %xmm7, %xmm6 - movdqa %xmm3, %xmm5 - pslld $8, %xmm3 + mulps %xmm9, %xmm6 + cvttps2dq %xmm1, %xmm1 + movdqa %xmm1, %xmm2 + pcmpgtd %xmm4, %xmm2 + cvttps2dq %xmm6, %xmm6 + pand %xmm2, %xmm1 + pandn %xmm4, %xmm2 + por %xmm1, %xmm2 + movdqa %xmm2, %xmm1 + pcmpgtd %xmm3, %xmm1 + pand %xmm1, %xmm5 + pandn %xmm2, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm6, %xmm1 + por %xmm5, %xmm2 + pcmpgtd %xmm4, %xmm1 + pand %xmm1, %xmm6 + pandn %xmm4, %xmm1 + movdqa %xmm2, %xmm5 + pslld $8, %xmm2 pand %xmm8, %xmm5 + por %xmm6, %xmm1 psrad $8, %xmm5 - punpcklwd %xmm5, %xmm7 - punpckhwd %xmm5, %xmm6 - movdqa %xmm4, %xmm5 - punpcklwd %xmm3, %xmm4 - movdqa %xmm7, %xmm10 - punpckhwd %xmm3, %xmm5 - punpcklwd %xmm6, %xmm7 - punpckhwd %xmm6, %xmm10 - punpcklwd %xmm10, %xmm7 - movdqa %xmm4, %xmm10 - punpcklwd %xmm5, %xmm4 - punpckhwd %xmm5, %xmm10 - punpcklwd %xmm10, %xmm4 - por %xmm7, %xmm4 - movdqa %xmm4, (%r10) + movdqa %xmm1, %xmm6 + pcmpgtd %xmm3, %xmm6 + pand %xmm6, %xmm7 + pandn %xmm1, %xmm6 + movdqa %xmm6, %xmm1 + por %xmm7, %xmm1 + movdqa %xmm5, %xmm7 + movdqa %xmm1, %xmm6 + pslld $8, %xmm1 + pand %xmm8, %xmm6 + psrad $8, %xmm6 + punpcklwd %xmm6, %xmm5 + punpckhwd %xmm6, %xmm7 + movdqa %xmm5, %xmm6 + punpcklwd %xmm7, %xmm5 + punpckhwd %xmm7, %xmm6 + punpcklwd %xmm6, %xmm5 + movdqa %xmm2, %xmm6 + punpcklwd %xmm1, %xmm2 + punpckhwd %xmm1, %xmm6 + movdqa %xmm2, %xmm1 + punpcklwd %xmm6, %xmm2 + punpckhwd %xmm6, %xmm1 + punpcklwd %xmm1, %xmm2 + por %xmm2, %xmm5 + movdqa %xmm5, (%r10) addq $16, %r10 cmpw %r9w, %bx ja .L19 @@ -253,54 +253,54 @@ alignedConvertToS16SSE2: je .L28 movaps %xmm0, %xmm1 movq %rdi, %rcx - movdqa .LC1(%rip), %xmm2 + movdqa .LC1(%rip), %xmm4 movq %rsi, %r10 shufps $0, %xmm1, %xmm1 xorl %r9d, %r9d + movdqa .LC2(%rip), %xmm3 movaps %xmm1, %xmm6 - movdqa .LC2(%rip), %xmm1 .align 16 .L24: - movaps %xmm6, %xmm4 + movaps (%rcx), %xmm1 addl $1, %r9d - movaps %xmm6, %xmm3 - mulps (%rcx), %xmm4 - movdqa %xmm1, %xmm7 - mulps 16(%rcx), %xmm3 + movdqa %xmm3, %xmm7 + mulps %xmm6, %xmm1 + movaps 16(%rcx), %xmm5 addq $32, %rcx - cvttps2dq %xmm4, %xmm4 - movdqa %xmm4, %xmm5 - pcmpgtd %xmm2, %xmm5 - cvttps2dq %xmm3, %xmm3 - pand %xmm5, %xmm4 - pandn %xmm2, %xmm5 - por %xmm5, %xmm4 - movdqa %xmm4, %xmm5 - pcmpgtd %xmm1, %xmm5 + mulps %xmm6, %xmm5 + cvttps2dq %xmm1, %xmm1 + movdqa %xmm1, %xmm2 + pcmpgtd %xmm4, %xmm2 + cvttps2dq %xmm5, %xmm5 + pand %xmm2, %xmm1 + pandn %xmm4, %xmm2 + por %xmm1, %xmm2 + movdqa %xmm2, %xmm1 + pcmpgtd %xmm3, %xmm1 + pand %xmm1, %xmm7 + pandn %xmm2, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm5, %xmm1 + por %xmm7, %xmm2 + movdqa %xmm3, %xmm7 + pcmpgtd %xmm4, %xmm1 + pand %xmm1, %xmm5 + pandn %xmm4, %xmm1 + por %xmm5, %xmm1 + movdqa %xmm1, %xmm5 + pcmpgtd %xmm3, %xmm5 pand %xmm5, %xmm7 - pandn %xmm4, %xmm5 - movdqa %xmm5, %xmm4 - movdqa %xmm3, %xmm5 - por %xmm7, %xmm4 - movdqa %xmm1, %xmm7 - pcmpgtd %xmm2, %xmm5 - pand %xmm5, %xmm3 - pandn %xmm2, %xmm5 - por %xmm5, %xmm3 - movdqa %xmm3, %xmm5 - pcmpgtd %xmm1, %xmm5 - pand %xmm5, %xmm7 - pandn %xmm3, %xmm5 - movdqa %xmm5, %xmm3 - movdqa %xmm4, %xmm5 - por %xmm7, %xmm3 - punpcklwd %xmm3, %xmm4 - punpckhwd %xmm3, %xmm5 - movdqa %xmm4, %xmm7 - punpcklwd %xmm5, %xmm4 - punpckhwd %xmm5, %xmm7 - punpcklwd %xmm7, %xmm4 - movdqa %xmm4, (%r10) + pandn %xmm1, %xmm5 + movdqa %xmm5, %xmm1 + movdqa %xmm2, %xmm5 + por %xmm7, %xmm1 + punpcklwd %xmm1, %xmm2 + punpckhwd %xmm1, %xmm5 + movdqa %xmm2, %xmm1 + punpcklwd %xmm5, %xmm2 + punpckhwd %xmm5, %xmm1 + punpcklwd %xmm1, %xmm2 + movdqa %xmm2, (%r10) addq $16, %r10 cmpw %r9w, %bx ja .L24 @@ -392,4 +392,4 @@ alignedConvertToS16SSE2: .byte 0x2 .align 8 .LEFDE5: - .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" + .ident "GCC: (GNU) 4.4.0 20090304 (experimental)" diff --git a/src/core/basic_ops_x86_mmx.s b/src/core/basic_ops_x86_mmx.s index 71f677407..0ea75c74a 100644 --- a/src/core/basic_ops_x86_mmx.s +++ b/src/core/basic_ops_x86_mmx.s @@ -10,11 +10,11 @@ alignedMemCpyMMX: movl 124(%esp), %eax shrl $6, %ebx #APP -# 42 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1 +# 42 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 fsave 4(%esp); fwait # 0 "" 2 -# 44 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1 +# 44 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 1: prefetchnta (%eax) prefetchnta 64(%eax) prefetchnta 128(%eax) @@ -31,7 +31,7 @@ alignedMemCpyMMX: .p2align 3 .L3: #APP -# 53 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1 +# 53 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 1: prefetchnta 320(%eax) 2: movq (%eax), %mm0 movq 8(%eax), %mm1 @@ -59,7 +59,7 @@ alignedMemCpyMMX: jne .L3 .L2: #APP -# 75 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1 +# 75 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 fsave 4(%esp); fwait # 0 "" 2 @@ -83,7 +83,7 @@ alignedMemClearMMX: .p2align 3 .L9: #APP -# 90 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1 +# 90 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 movq %mm0, (%edx) movq %mm0, 8(%edx) movq %mm0, 16(%edx) @@ -103,5 +103,5 @@ movq %mm0, 56(%edx) emms ret .size alignedMemClearMMX, .-alignedMemClearMMX - .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" + .ident "GCC: (GNU) 4.4.0 20090304 (experimental)" .section .note.GNU-stack,"",@progbits diff --git a/src/core/basic_ops_x86_sse.s b/src/core/basic_ops_x86_sse.s index b2810e0f5..16cc8a239 100644 --- a/src/core/basic_ops_x86_sse.s +++ b/src/core/basic_ops_x86_sse.s @@ -77,16 +77,16 @@ alignedBufApplyGainSSE: .p2align 4,,7 .p2align 3 .L14: - movaps %xmm0, %xmm3 + movaps 16(%eax), %xmm3 addl $1, %edx - movaps %xmm0, %xmm2 - movaps %xmm0, %xmm1 - movaps %xmm0, %xmm4 - mulps 16(%eax), %xmm3 - mulps 32(%eax), %xmm2 - mulps 48(%eax), %xmm1 + movaps 32(%eax), %xmm2 + mulps %xmm0, %xmm3 + movaps 48(%eax), %xmm1 + mulps %xmm0, %xmm2 + movaps (%eax), %xmm4 + mulps %xmm0, %xmm1 movaps %xmm3, 16(%eax) - mulps (%eax), %xmm4 + mulps %xmm0, %xmm4 movaps %xmm2, 32(%eax) movaps %xmm1, 48(%eax) movaps %xmm4, (%eax) @@ -148,28 +148,28 @@ alignedBufMixLRCoeffSSE: movl 16(%esp), %ebx testl %esi, %esi jle .L25 - movss 24(%esp), %xmm0 + movss 24(%esp), %xmm2 subl $1, %esi - movss 20(%esp), %xmm1 + movss 20(%esp), %xmm0 xorl %eax, %eax shrl $2, %esi xorl %ecx, %ecx addl $1, %esi - unpcklps %xmm0, %xmm1 - movaps %xmm1, %xmm0 - movlhps %xmm1, %xmm0 + unpcklps %xmm2, %xmm0 + movaps %xmm0, %xmm2 + movlhps %xmm0, %xmm2 .p2align 4,,7 .p2align 3 .L24: - movaps %xmm0, %xmm1 + movaps 16(%ebx,%eax), %xmm0 addl $1, %ecx - movaps %xmm0, %xmm2 - mulps 16(%ebx,%eax), %xmm1 - mulps (%ebx,%eax), %xmm2 - addps 16(%edx,%eax), %xmm1 - addps (%edx,%eax), %xmm2 - movaps %xmm1, 16(%edx,%eax) - movaps %xmm2, (%edx,%eax) + movaps (%ebx,%eax), %xmm1 + mulps %xmm2, %xmm0 + mulps %xmm2, %xmm1 + addps 16(%edx,%eax), %xmm0 + addps (%edx,%eax), %xmm1 + movaps %xmm0, 16(%edx,%eax) + movaps %xmm1, (%edx,%eax) addl $32, %eax cmpl %ecx, %esi ja .L24 @@ -189,31 +189,31 @@ alignedBufWetDryMixSSE: movl 16(%esp), %ebx testl %esi, %esi jle .L30 - movss 24(%esp), %xmm1 + movss 24(%esp), %xmm3 subl $1, %esi - movss 20(%esp), %xmm0 + movss 20(%esp), %xmm2 xorl %eax, %eax shrl $2, %esi xorl %ecx, %ecx - shufps $0, %xmm1, %xmm1 + shufps $0, %xmm3, %xmm3 addl $1, %esi - shufps $0, %xmm0, %xmm0 + shufps $0, %xmm2, %xmm2 .p2align 4,,7 .p2align 3 .L29: - movaps %xmm1, %xmm3 + movaps 16(%ebx,%eax), %xmm1 addl $1, %ecx - movaps %xmm0, %xmm2 - movaps %xmm1, %xmm4 - mulps 16(%edx,%eax), %xmm3 - mulps 16(%ebx,%eax), %xmm2 - mulps (%edx,%eax), %xmm4 - addps %xmm3, %xmm2 - movaps %xmm0, %xmm3 - mulps (%ebx,%eax), %xmm3 - movaps %xmm2, 16(%edx,%eax) - addps %xmm4, %xmm3 - movaps %xmm3, (%edx,%eax) + movaps 16(%edx,%eax), %xmm0 + mulps %xmm2, %xmm1 + movaps (%ebx,%eax), %xmm4 + mulps %xmm3, %xmm0 + mulps %xmm2, %xmm4 + addps %xmm1, %xmm0 + movaps (%edx,%eax), %xmm1 + mulps %xmm3, %xmm1 + movaps %xmm0, 16(%edx,%eax) + addps %xmm4, %xmm1 + movaps %xmm1, (%edx,%eax) addl $32, %eax cmpl %ecx, %esi ja .L29 @@ -230,131 +230,129 @@ alignedBufWetDryMixSplittedSSE: pushl %edi pushl %esi pushl %ebx - subl $124, %esp - movl 164(%esp), %eax - movl 144(%esp), %edx - movl 148(%esp), %esi - movl 152(%esp), %ecx + subl $140, %esp + movl 180(%esp), %eax + flds 172(%esp) + movl 160(%esp), %edx + movl 164(%esp), %esi testl %eax, %eax - movss 156(%esp), %xmm4 - movss 160(%esp), %xmm5 - jle .L39 - movl 164(%esp), %eax + movl 168(%esp), %ecx + flds 176(%esp) + jle .L43 + movl 180(%esp), %eax subl $1, %eax shrl %eax addl $1, %eax movl %eax, %ebp - movl %eax, 96(%esp) + movl %eax, 120(%esp) shrl $2, %ebp - cmpl $3, 96(%esp) + cmpl $3, 120(%esp) leal 0(,%ebp,4), %eax - movl %eax, 100(%esp) + movl %eax, 124(%esp) jbe .L40 testl %eax, %eax jne .L34 .L40: + fxch %st(1) xorl %edi, %edi jmp .L36 .p2align 4,,7 .p2align 3 .L34: - movaps %xmm4, %xmm2 - xorps %xmm6, %xmm6 - shufps $0, %xmm2, %xmm2 - movaps %xmm5, %xmm1 + fsts 12(%esp) + fxch %st(1) + xorps %xmm7, %xmm7 + movss 12(%esp), %xmm0 movl %esi, %ebx - shufps $0, %xmm1, %xmm1 - movaps %xmm2, (%esp) + fsts 12(%esp) xorl %eax, %eax xorl %edi, %edi - movss %xmm5, 108(%esp) - movss %xmm4, 104(%esp) - movaps %xmm1, %xmm4 + shufps $0, %xmm0, %xmm0 + movaps %xmm0, 32(%esp) + movss 12(%esp), %xmm0 + shufps $0, %xmm0, %xmm0 + movaps %xmm0, 16(%esp) .p2align 4,,7 .p2align 3 .L37: - movaps 16(%edx,%eax,2), %xmm3 + movaps (%edx,%eax,2), %xmm5 addl $1, %edi - movaps (%edx,%eax,2), %xmm2 - movaps 32(%edx,%eax,2), %xmm1 - movaps %xmm2, %xmm7 - shufps $221, %xmm3, %xmm2 - movaps 48(%edx,%eax,2), %xmm0 - shufps $136, %xmm3, %xmm7 - movaps %xmm2, 64(%esp) - movaps %xmm1, %xmm2 - shufps $221, %xmm0, %xmm1 - shufps $136, %xmm0, %xmm2 - movaps %xmm6, %xmm3 - movaps %xmm2, 48(%esp) - movlps (%ebx), %xmm3 - movhps 8(%ebx), %xmm3 - movaps %xmm7, %xmm5 - movaps %xmm3, %xmm0 - movaps %xmm6, %xmm2 - movlps 16(%ebx), %xmm2 - shufps $136, 48(%esp), %xmm5 - movhps 24(%ebx), %xmm2 - shufps $136, %xmm2, %xmm0 - addl $32, %ebx - mulps %xmm4, %xmm5 - shufps $221, %xmm2, %xmm3 - movaps %xmm1, 32(%esp) - mulps (%esp), %xmm0 - movaps %xmm6, %xmm1 - shufps $221, 48(%esp), %xmm7 - movlps (%ecx,%eax), %xmm1 - movhps 8(%ecx,%eax), %xmm1 - movaps 64(%esp), %xmm2 - mulps %xmm4, %xmm7 - addps %xmm0, %xmm5 - movaps %xmm6, %xmm0 - movlps 16(%ecx,%eax), %xmm0 - movhps 24(%ecx,%eax), %xmm0 - shufps $221, 32(%esp), %xmm2 - movaps %xmm5, 16(%esp) - movaps 64(%esp), %xmm5 - mulps %xmm4, %xmm2 - shufps $136, 32(%esp), %xmm5 - mulps (%esp), %xmm3 - mulps %xmm4, %xmm5 - addps %xmm3, %xmm7 - movaps 16(%esp), %xmm3 - movaps %xmm5, 80(%esp) - movaps %xmm1, %xmm5 - shufps $221, %xmm0, %xmm1 - shufps $136, %xmm0, %xmm5 - mulps (%esp), %xmm1 - unpcklps %xmm7, %xmm3 - mulps (%esp), %xmm5 - movaps 16(%esp), %xmm0 - addps %xmm1, %xmm2 - movaps %xmm3, %xmm1 - addps 80(%esp), %xmm5 - unpckhps %xmm7, %xmm0 - movaps %xmm0, %xmm7 + movaps 16(%edx,%eax,2), %xmm6 movaps %xmm5, %xmm0 - unpcklps %xmm2, %xmm0 - unpckhps %xmm2, %xmm5 - unpcklps %xmm0, %xmm1 - unpckhps %xmm0, %xmm3 - movaps %xmm7, %xmm0 - unpckhps %xmm5, %xmm7 - unpcklps %xmm5, %xmm0 + shufps $136, %xmm6, %xmm0 + movaps 32(%edx,%eax,2), %xmm4 + shufps $221, %xmm6, %xmm5 + movaps %xmm0, 96(%esp) + movaps 48(%edx,%eax,2), %xmm3 + movaps %xmm4, %xmm0 + shufps $136, %xmm3, %xmm0 + movaps 96(%esp), %xmm2 + shufps $221, %xmm3, %xmm4 + movaps %xmm7, %xmm6 + movlps (%ebx), %xmm6 + movaps %xmm5, 80(%esp) + movhps 8(%ebx), %xmm6 + shufps $136, %xmm0, %xmm2 + movaps %xmm0, 64(%esp) + movaps %xmm7, %xmm5 + movaps %xmm6, %xmm0 + movlps 16(%ebx), %xmm5 + movhps 24(%ebx), %xmm5 + shufps $136, %xmm5, %xmm0 + mulps 32(%esp), %xmm2 + shufps $221, %xmm5, %xmm6 + movaps %xmm4, 48(%esp) + addl $32, %ebx + mulps 16(%esp), %xmm0 + movaps %xmm7, %xmm4 + movlps (%eax,%ecx), %xmm4 + movaps %xmm7, %xmm3 + movhps 8(%eax,%ecx), %xmm4 + movaps %xmm4, %xmm1 + movlps 16(%ecx,%eax), %xmm3 + movhps 24(%ecx,%eax), %xmm3 + shufps $136, %xmm3, %xmm1 + addps %xmm0, %xmm2 + movaps 80(%esp), %xmm0 + shufps $221, %xmm3, %xmm4 + shufps $136, 48(%esp), %xmm0 + mulps 16(%esp), %xmm1 + movaps %xmm2, %xmm3 + movaps 80(%esp), %xmm5 + mulps 32(%esp), %xmm0 + shufps $221, 48(%esp), %xmm5 + mulps 16(%esp), %xmm6 + addps %xmm1, %xmm0 + movaps 96(%esp), %xmm1 + shufps $221, 64(%esp), %xmm1 + mulps 16(%esp), %xmm4 + mulps 32(%esp), %xmm1 + mulps 32(%esp), %xmm5 + addps %xmm6, %xmm1 + addps %xmm4, %xmm5 + movaps %xmm0, %xmm4 + unpcklps %xmm1, %xmm3 + unpcklps %xmm5, %xmm4 + unpckhps %xmm1, %xmm2 + movaps %xmm3, %xmm1 + unpckhps %xmm5, %xmm0 + unpcklps %xmm4, %xmm1 + unpckhps %xmm4, %xmm3 movaps %xmm1, (%edx,%eax,2) + movaps %xmm2, %xmm1 + unpckhps %xmm0, %xmm2 + unpcklps %xmm0, %xmm1 movaps %xmm3, 16(%edx,%eax,2) - movaps %xmm0, 32(%edx,%eax,2) - movaps %xmm7, 48(%edx,%eax,2) + movaps %xmm1, 32(%edx,%eax,2) + movaps %xmm2, 48(%edx,%eax,2) addl $32, %eax cmpl %edi, %ebp ja .L37 - movl 100(%esp), %edi - movl 96(%esp), %eax - movss 104(%esp), %xmm4 - movss 108(%esp), %xmm5 + movl 124(%esp), %edi + movl 120(%esp), %eax addl %edi, %edi - cmpl %eax, 100(%esp) - je .L39 + cmpl %eax, 124(%esp) + je .L44 .L36: leal (%edx,%edi,8), %ebx xorl %ebp, %ebp @@ -363,38 +361,50 @@ alignedBufWetDryMixSplittedSSE: .p2align 4,,7 .p2align 3 .L38: - movaps %xmm5, %xmm1 + flds (%ebx) addl $2, %ebp - movaps %xmm4, %xmm0 - mulss (%ebx), %xmm1 - mulss (%esi,%eax,4), %xmm0 - addss %xmm1, %xmm0 - movaps %xmm5, %xmm1 - movss %xmm0, (%ebx) - movaps %xmm4, %xmm0 - mulss 4(%ebx), %xmm1 - mulss (%ecx,%eax,4), %xmm0 - addss %xmm1, %xmm0 - movaps %xmm5, %xmm1 - movss %xmm0, 4(%ebx) + fmul %st(2), %st + flds (%esi,%eax,4) + fmul %st(2), %st + faddp %st, %st(1) + fstps (%ebx) + flds 4(%ebx) + fmul %st(2), %st + flds (%ecx,%eax,4) + fmul %st(2), %st + faddp %st, %st(1) + fstps 4(%ebx) addl $16, %ebx - movaps %xmm4, %xmm0 - mulss (%edx), %xmm1 - mulss 4(%esi,%eax,4), %xmm0 - addss %xmm1, %xmm0 - movaps %xmm5, %xmm1 - movss %xmm0, (%edx) - movaps %xmm4, %xmm0 - mulss 4(%edx), %xmm1 - mulss 4(%ecx,%eax,4), %xmm0 + flds (%edx) + fmul %st(2), %st + flds 4(%esi,%eax,4) + fmul %st(2), %st + faddp %st, %st(1) + fstps (%edx) + flds 4(%edx) + fmul %st(2), %st + flds 4(%ecx,%eax,4) leal (%edi,%ebp), %eax - addss %xmm1, %xmm0 - movss %xmm0, 4(%edx) + fmul %st(2), %st + faddp %st, %st(1) + fstps 4(%edx) addl $16, %edx - cmpl %eax, 164(%esp) + cmpl %eax, 180(%esp) jg .L38 + fstp %st(0) + fstp %st(0) + jmp .L39 +.L43: + fstp %st(0) + fstp %st(0) + jmp .L39 +.L44: + fstp %st(0) + fstp %st(0) + .p2align 4,,7 + .p2align 3 .L39: - addl $124, %esp + addl $140, %esp popl %ebx popl %esi popl %edi @@ -407,34 +417,39 @@ alignedBufWetDryMixSplittedSSE: unalignedBufMixLRCoeffSSE: pushl %esi pushl %ebx - movl 28(%esp), %esi - movl 12(%esp), %eax - movl 16(%esp), %edx - movss 20(%esp), %xmm0 - movl %esi, %ecx - shrl $31, %ecx - leal (%esi,%ecx), %ebx - andl $1, %ebx - cmpl %ecx, %ebx - movss 24(%esp), %xmm3 - jne .L52 -.L44: + subl $4, %esp + movl 32(%esp), %esi + flds 24(%esp) + movl 16(%esp), %eax + movl 20(%esp), %edx + movl %esi, %ebx + flds 28(%esp) + shrl $31, %ebx + leal (%esi,%ebx), %ecx + andl $1, %ecx + cmpl %ebx, %ecx + jne .L54 +.L46: testl %esi, %esi - jle .L49 + jle .L55 leal -1(%esi), %ebx shrl %ebx testb $15, %al - jne .L46 - movaps %xmm0, %xmm1 + jne .L48 + fxch %st(1) + fstps (%esp) xorps %xmm2, %xmm2 - unpcklps %xmm3, %xmm1 + movss (%esp), %xmm0 addl $1, %ebx + fstps (%esp) xorl %ecx, %ecx - movaps %xmm1, %xmm3 - movlhps %xmm1, %xmm3 + movss (%esp), %xmm1 + unpcklps %xmm1, %xmm0 + movaps %xmm0, %xmm3 + movlhps %xmm0, %xmm3 .p2align 4,,7 .p2align 3 -.L47: +.L49: movaps %xmm2, %xmm1 addl $1, %ecx movlps (%edx), %xmm1 @@ -448,55 +463,65 @@ unalignedBufMixLRCoeffSSE: movaps %xmm0, (%eax) addl $16, %eax cmpl %ebx, %ecx - jb .L47 -.L49: + jb .L49 + jmp .L51 + .p2align 4,,7 + .p2align 3 +.L55: + fstp %st(0) + fstp %st(0) + .p2align 4,,7 + .p2align 3 +.L51: + addl $4, %esp popl %ebx popl %esi ret .p2align 4,,7 .p2align 3 -.L46: - xorl %ecx, %ecx - .p2align 4,,7 - .p2align 3 .L48: - movaps %xmm0, %xmm1 - mulss (%edx,%ecx,8), %xmm1 - addss (%eax,%ecx,8), %xmm1 - movss %xmm1, (%eax,%ecx,8) - movaps %xmm3, %xmm1 - mulss 4(%edx,%ecx,8), %xmm1 - addss 4(%eax,%ecx,8), %xmm1 - movss %xmm1, 4(%eax,%ecx,8) - movaps %xmm0, %xmm1 - mulss 8(%edx,%ecx,8), %xmm1 - addss 8(%eax,%ecx,8), %xmm1 - movss %xmm1, 8(%eax,%ecx,8) - movaps %xmm3, %xmm1 - mulss 12(%edx,%ecx,8), %xmm1 - addss 12(%eax,%ecx,8), %xmm1 - movss %xmm1, 12(%eax,%ecx,8) + xorl %ecx, %ecx + .p2align 4,,7 + .p2align 3 +.L50: + flds (%edx,%ecx,8) + fmul %st(2), %st + fadds (%eax,%ecx,8) + fstps (%eax,%ecx,8) + flds 4(%edx,%ecx,8) + fmul %st(1), %st + fadds 4(%eax,%ecx,8) + fstps 4(%eax,%ecx,8) + flds 8(%edx,%ecx,8) + fmul %st(2), %st + fadds 8(%eax,%ecx,8) + fstps 8(%eax,%ecx,8) + flds 12(%edx,%ecx,8) + fmul %st(1), %st + fadds 12(%eax,%ecx,8) + fstps 12(%eax,%ecx,8) addl $2, %ecx cmpl %ecx, %esi - jg .L48 + jg .L50 + fstp %st(0) + fstp %st(0) + addl $4, %esp popl %ebx popl %esi ret -.L52: - movaps %xmm0, %xmm1 +.L54: + flds (%edx) subl $1, %esi - movss (%eax), %xmm2 - mulss (%edx), %xmm1 - addss %xmm2, %xmm1 - movss 4(%eax), %xmm2 - movss %xmm1, (%eax) - movaps %xmm3, %xmm1 - mulss 4(%edx), %xmm1 + fmul %st(2), %st + fadds (%eax) + fstps (%eax) + flds 4(%edx) addl $8, %edx - addss %xmm2, %xmm1 - movss %xmm1, 4(%eax) + fmul %st(1), %st + fadds 4(%eax) + fstps 4(%eax) addl $8, %eax - jmp .L44 + jmp .L46 .size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE - .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" + .ident "GCC: (GNU) 4.4.0 20090304 (experimental)" .section .note.GNU-stack,"",@progbits diff --git a/src/core/basic_ops_x86_sse2.s b/src/core/basic_ops_x86_sse2.s index f44e65a92..fb33cc85d 100644 --- a/src/core/basic_ops_x86_sse2.s +++ b/src/core/basic_ops_x86_sse2.s @@ -70,19 +70,19 @@ alignedConvertToS16SSE2: pushl %ebx subl $8, %esp movl 36(%esp), %eax - movss .LC0, %xmm4 cmpb $0, 44(%esp) + flds .LC0 movl 28(%esp), %edx movl 32(%esp), %ebx movl %eax, %esi - mulss 40(%esp), %xmm4 + fmuls 40(%esp) jne .L13 testw %ax, %ax - jle .L15 + jle .L35 movl %eax, %edi shrw $2, %di cmpw $3, %ax - movw %ax, 2(%esp) + movw %ax, 4(%esp) leal 0(,%edi,4), %ebp ja .L33 .L28: @@ -98,17 +98,19 @@ alignedConvertToS16SSE2: .p2align 4,,7 .p2align 3 .L25: - movaps %xmm4, %xmm0 - mulss (%edx), %xmm0 - cvttss2si %xmm0, %ecx - movaps %xmm4, %xmm0 - mulss 4(%edx), %xmm0 + flds (%edx) + fmul %st(1), %st + fstps 4(%esp) + cvttss2si 4(%esp), %ecx + flds 4(%edx) + fmul %st(1), %st cmpl $-32768, %ecx cmovl %edi, %ecx cmpl $32767, %ecx cmovg %ebx, %ecx + fstps 4(%esp) movw %cx, (%eax) - cvttss2si %xmm0, %ecx + cvttss2si 4(%esp), %ecx cmpl $-32768, %ecx cmovl %edi, %ecx cmpl $32767, %ecx @@ -119,6 +121,15 @@ alignedConvertToS16SSE2: addl $4, %eax cmpw %bp, %si jg .L25 + fstp %st(0) + jmp .L15 +.L35: + fstp %st(0) + jmp .L15 +.L36: + fstp %st(0) + .p2align 4,,7 + .p2align 3 .L15: movswl %si,%esi addl $8, %esp @@ -132,11 +143,11 @@ alignedConvertToS16SSE2: .p2align 3 .L13: testw %ax, %ax - jle .L15 + jle .L36 movl %eax, %ebp shrw $2, %bp cmpw $3, %si - movw %ax, 2(%esp) + movw %ax, 4(%esp) leal 0(,%ebp,4), %eax ja .L34 .L27: @@ -151,12 +162,13 @@ alignedConvertToS16SSE2: .p2align 4,,7 .p2align 3 .L20: - movaps %xmm4, %xmm0 + flds (%ecx) movl $32767, %ebp - mulss (%ecx), %xmm0 - cvttss2si %xmm0, %ebx - movaps %xmm4, %xmm0 - mulss 4(%ecx), %xmm0 + fmul %st(1), %st + fstps 4(%esp) + cvttss2si 4(%esp), %ebx + flds 4(%ecx) + fmul %st(1), %st cmpl $-32768, %ebx cmovl %edi, %ebx cmpl $32767, %ebx @@ -165,8 +177,9 @@ alignedConvertToS16SSE2: sall $8, %ebx orl %ebp, %ebx movl $32767, %ebp + fstps 4(%esp) movw %bx, (%edx) - cvttss2si %xmm0, %ebx + cvttss2si 4(%esp), %ebx cmpl $-32768, %ebx cmovl %edi, %ebx cmpl $32767, %ebx @@ -180,146 +193,149 @@ alignedConvertToS16SSE2: addl $4, %edx cmpw %ax, %si jg .L20 + fstp %st(0) jmp .L15 .p2align 4,,7 .p2align 3 .L34: testw %ax, %ax je .L27 - movaps %xmm4, %xmm0 + fsts (%esp) xorl %ecx, %ecx - movdqa .LC1, %xmm1 - movss %xmm4, 4(%esp) - shufps $0, %xmm0, %xmm0 + movdqa .LC1, %xmm3 + movss (%esp), %xmm0 xorl %edi, %edi + movdqa .LC2, %xmm2 + shufps $0, %xmm0, %xmm0 movaps %xmm0, %xmm7 - movdqa .LC2, %xmm0 .p2align 4,,7 .p2align 3 .L19: - movaps %xmm7, %xmm3 - movdqa %xmm0, %xmm5 - movdqa %xmm0, %xmm6 - movaps %xmm7, %xmm2 + movaps (%edx,%ecx,2), %xmm0 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm6 addl $1, %edi - mulps (%edx,%ecx,2), %xmm3 - mulps 16(%edx,%ecx,2), %xmm2 - cvttps2dq %xmm3, %xmm3 - movdqa %xmm3, %xmm4 - pcmpgtd %xmm1, %xmm4 - pand %xmm4, %xmm3 - pandn %xmm1, %xmm4 - por %xmm4, %xmm3 - cvttps2dq %xmm2, %xmm2 - movdqa %xmm3, %xmm4 - pcmpgtd %xmm0, %xmm4 - pand %xmm4, %xmm5 - pandn %xmm3, %xmm4 - movdqa %xmm4, %xmm3 - movdqa %xmm2, %xmm4 - por %xmm5, %xmm3 - pcmpgtd %xmm1, %xmm4 + movaps 16(%edx,%ecx,2), %xmm5 + mulps %xmm7, %xmm0 + mulps %xmm7, %xmm5 + cvttps2dq %xmm0, %xmm0 + movdqa %xmm0, %xmm1 + pcmpgtd %xmm3, %xmm1 + pand %xmm1, %xmm0 + pandn %xmm3, %xmm1 + por %xmm0, %xmm1 + cvttps2dq %xmm5, %xmm5 + movdqa %xmm1, %xmm0 + pcmpgtd %xmm2, %xmm0 + pand %xmm0, %xmm4 + pandn %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm5, %xmm0 + por %xmm4, %xmm1 + pcmpgtd %xmm3, %xmm0 + movdqa .LC3, %xmm4 + pand %xmm0, %xmm5 + pand %xmm1, %xmm4 + pandn %xmm3, %xmm0 + psrad $8, %xmm4 + por %xmm5, %xmm0 + pslld $8, %xmm1 + movdqa %xmm0, %xmm5 + pcmpgtd %xmm2, %xmm5 + pand %xmm5, %xmm6 + pandn %xmm0, %xmm5 + movdqa %xmm5, %xmm0 movdqa .LC3, %xmm5 - pand %xmm4, %xmm2 - pand %xmm3, %xmm5 - pandn %xmm1, %xmm4 + por %xmm6, %xmm0 + pand %xmm0, %xmm5 + pslld $8, %xmm0 psrad $8, %xmm5 - por %xmm4, %xmm2 - pslld $8, %xmm3 - movdqa %xmm2, %xmm4 - pcmpgtd %xmm0, %xmm4 - pand %xmm4, %xmm6 - pandn %xmm2, %xmm4 - movdqa %xmm4, %xmm2 - por %xmm6, %xmm2 - movdqa .LC3, %xmm6 - pand %xmm2, %xmm6 - pslld $8, %xmm2 - psrad $8, %xmm6 - movdqa %xmm5, %xmm4 - punpcklwd %xmm6, %xmm5 - punpckhwd %xmm6, %xmm4 - movdqa %xmm5, %xmm6 - punpcklwd %xmm4, %xmm5 - punpckhwd %xmm4, %xmm6 - movdqa %xmm3, %xmm4 - punpcklwd %xmm6, %xmm5 - punpckhwd %xmm2, %xmm4 - punpcklwd %xmm2, %xmm3 - movdqa %xmm3, %xmm6 - punpcklwd %xmm4, %xmm3 - punpckhwd %xmm4, %xmm6 - punpcklwd %xmm6, %xmm3 - por %xmm3, %xmm5 - movdqa %xmm5, (%ebx,%ecx) + movdqa %xmm4, %xmm6 + punpcklwd %xmm5, %xmm4 + punpckhwd %xmm5, %xmm6 + movdqa %xmm4, %xmm5 + punpcklwd %xmm6, %xmm4 + punpckhwd %xmm6, %xmm5 + punpcklwd %xmm5, %xmm4 + movdqa %xmm1, %xmm5 + punpcklwd %xmm0, %xmm1 + punpckhwd %xmm0, %xmm5 + movdqa %xmm1, %xmm0 + punpcklwd %xmm5, %xmm1 + punpckhwd %xmm5, %xmm0 + punpcklwd %xmm0, %xmm1 + por %xmm1, %xmm4 + movdqa %xmm4, (%ebx,%ecx) addl $16, %ecx cmpw %di, %bp ja .L19 - cmpw 2(%esp), %ax - movss 4(%esp), %xmm4 + cmpw 4(%esp), %ax jne .L18 + fstp %st(0) jmp .L15 .p2align 4,,7 .p2align 3 .L33: testw %bp, %bp - .p2align 4,,3 + .p2align 4,,4 .p2align 3 je .L28 - movaps %xmm4, %xmm0 + fsts (%esp) xorl %eax, %eax - movdqa .LC1, %xmm1 - shufps $0, %xmm0, %xmm0 + movdqa .LC1, %xmm3 + movss (%esp), %xmm0 xorl %ecx, %ecx - movaps %xmm0, %xmm6 - movdqa .LC2, %xmm0 + movdqa .LC2, %xmm2 + shufps $0, %xmm0, %xmm0 + movaps %xmm0, %xmm5 .p2align 4,,7 .p2align 3 .L24: - movaps %xmm6, %xmm3 + movaps (%edx,%eax,2), %xmm0 addl $1, %ecx - movdqa %xmm0, %xmm7 - movaps %xmm6, %xmm2 - mulps (%edx,%eax,2), %xmm3 - mulps 16(%edx,%eax,2), %xmm2 - cvttps2dq %xmm3, %xmm3 - movdqa %xmm3, %xmm5 - pcmpgtd %xmm1, %xmm5 - pand %xmm5, %xmm3 - pandn %xmm1, %xmm5 - por %xmm5, %xmm3 - cvttps2dq %xmm2, %xmm2 - movdqa %xmm3, %xmm5 - pcmpgtd %xmm0, %xmm5 - pand %xmm5, %xmm7 - pandn %xmm3, %xmm5 - movdqa %xmm5, %xmm3 - movdqa %xmm2, %xmm5 - por %xmm7, %xmm3 - pcmpgtd %xmm1, %xmm5 - movdqa %xmm0, %xmm7 - pand %xmm5, %xmm2 - pandn %xmm1, %xmm5 - por %xmm5, %xmm2 - movdqa %xmm2, %xmm5 - pcmpgtd %xmm0, %xmm5 - pand %xmm5, %xmm7 - pandn %xmm2, %xmm5 - movdqa %xmm5, %xmm2 - movdqa %xmm3, %xmm5 - por %xmm7, %xmm2 - punpckhwd %xmm2, %xmm5 - punpcklwd %xmm2, %xmm3 - movdqa %xmm3, %xmm7 - punpcklwd %xmm5, %xmm3 - punpckhwd %xmm5, %xmm7 - punpcklwd %xmm7, %xmm3 - movdqa %xmm3, (%ebx,%eax) + movdqa %xmm2, %xmm6 + movaps 16(%edx,%eax,2), %xmm4 + mulps %xmm5, %xmm0 + mulps %xmm5, %xmm4 + cvttps2dq %xmm0, %xmm0 + movdqa %xmm0, %xmm1 + pcmpgtd %xmm3, %xmm1 + pand %xmm1, %xmm0 + pandn %xmm3, %xmm1 + por %xmm0, %xmm1 + cvttps2dq %xmm4, %xmm4 + movdqa %xmm1, %xmm0 + pcmpgtd %xmm2, %xmm0 + pand %xmm0, %xmm6 + pandn %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm4, %xmm0 + por %xmm6, %xmm1 + pcmpgtd %xmm3, %xmm0 + movdqa %xmm2, %xmm6 + pand %xmm0, %xmm4 + pandn %xmm3, %xmm0 + por %xmm4, %xmm0 + movdqa %xmm0, %xmm4 + pcmpgtd %xmm2, %xmm4 + pand %xmm4, %xmm6 + pandn %xmm0, %xmm4 + movdqa %xmm4, %xmm0 + movdqa %xmm1, %xmm4 + por %xmm6, %xmm0 + punpckhwd %xmm0, %xmm4 + punpcklwd %xmm0, %xmm1 + movdqa %xmm1, %xmm0 + punpcklwd %xmm4, %xmm1 + punpckhwd %xmm4, %xmm0 + punpcklwd %xmm0, %xmm1 + movdqa %xmm1, (%ebx,%eax) addl $16, %eax cmpw %cx, %di ja .L24 - cmpw %bp, 2(%esp) + cmpw %bp, 4(%esp) jne .L23 + fstp %st(0) jmp .L15 .size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2 .section .rodata.cst4,"aM",@progbits,4 @@ -345,5 +361,5 @@ alignedConvertToS16SSE2: .long 65280 .long 65280 .long 65280 - .ident "GCC: (GNU) 4.4.0 20081204 (experimental)" + .ident "GCC: (GNU) 4.4.0 20090304 (experimental)" .section .note.GNU-stack,"",@progbits diff --git a/src/tracks/pattern.cpp b/src/tracks/pattern.cpp index c3eb52033..441cf5a0a 100644 --- a/src/tracks/pattern.cpp +++ b/src/tracks/pattern.cpp @@ -1086,6 +1086,10 @@ void patternView::wheelEvent( QWheelEvent * _we ) { n->setVolume( vol + 5 ); } + else + { + n->setVolume( 100 ); + } } else {