CPU: new framework for optimized CPU-specific routines

The new CPU framework replaces the old BasicOps framework. It is more
flexible and the build process isn't such a mess anymore (pre-compiled
assembler files etc.). It will hopefully see some improvements and
extensions soon.

Signed-off-by: Tobias Doerffel <tobias.doerffel@gmail.com>
This commit is contained in:
Tobias Doerffel
2009-08-03 14:47:28 +02:00
parent 27d9c17e3f
commit 89fa5c99e9
23 changed files with 247 additions and 2172 deletions

View File

@@ -454,47 +454,42 @@ SET(LMMS_ER_H ${CMAKE_CURRENT_BINARY_DIR}/embedded_resources.h)
ADD_FILE_DEPENDENCIES(${CMAKE_BINARY_DIR}/lmmsconfig.h ${lmms_MOC_out})
ADD_CUSTOM_COMMAND(OUTPUT ${LMMS_ER_H} COMMAND ${BIN2RES} ARGS ${lmms_EMBEDDED_RESOURCES} > ${LMMS_ER_H} DEPENDS ${BIN2RES})
SET(BASIC_OPS_X86_C "${CMAKE_SOURCE_DIR}/src/core/basic_ops_x86.c")
# build CPU specific optimized modules
IF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)
ADD_CUSTOM_TARGET(regen-basic-ops)
IF(LMMS_HOST_X86)
SET(opt_targets mmx sse sse2)
SET(host_arch x86)
ELSE(LMMS_HOST_X86)
SET(opt_targets sse sse2)
SET(host_arch x86_64)
ENDIF(LMMS_HOST_X86)
IF(LMMS_HOST_X86)
SET(opt_targets mmx sse sse2)
ELSE(LMMS_HOST_X86)
SET(opt_targets sse sse2)
ENDIF(LMMS_HOST_X86)
FOREACH(opt_target ${opt_targets})
STRING(TOUPPER ${opt_target} OPT_TARGET)
SET(BASIC_OPS_X86_TARGET_S "${CMAKE_SOURCE_DIR}/src/core/basic_ops_${host_arch}_${opt_target}.s")
SET(BASIC_OPS_X86_TARGET_O "${CMAKE_BINARY_DIR}/basic_ops_${host_arch}_${opt_target}.o")
SET(BASIC_OPS_X86_TARGET_S "")
SET(CPU_X86_C "${CMAKE_SOURCE_DIR}/src/core/CpuX86.c")
SET(CPU_X86_TARGET_O "${CMAKE_BINARY_DIR}/CpuX86_${opt_target}.o")
SET(FPMATH_FLAGS "")
IF(NOT "${OPT_TARGET}" STREQUAL "MMX")
SET(FPMATH_FLAGS "-mfpmath=sse")
ENDIF(NOT "${OPT_TARGET}" STREQUAL "MMX")
IF(EXISTS "$ENV{SVN_C_COMPILER}")
SET(C_COMPILER $ENV{SVN_C_COMPILER})
ELSE(EXISTS "$ENV{SVN_C_COMPILER}")
SET(C_COMPILER ${CMAKE_C_COMPILER})
ENDIF(EXISTS "$ENV{SVN_C_COMPILER}")
IF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc")
SET(CMAKE_C_COMPILER_ARG1 gcc)
ENDIF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc")
ADD_CUSTOM_TARGET(regen-basic-ops-${opt_target} COMMAND ${C_COMPILER} -O2 -fno-stack-protector -ftree-vectorize -ftree-vectorizer-verbose=2 -fomit-frame-pointer -c -S -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS} -o ${BASIC_OPS_X86_TARGET_S} ${BASIC_OPS_X86_C} DEPENDS ${BASIC_OPS_X86_C})
ADD_CUSTOM_COMMAND(OUTPUT ${BASIC_OPS_X86_TARGET_O} COMMAND ${CMAKE_C_COMPILER} ARGS ${CMAKE_C_COMPILER_ARG1} ${BASIC_OPS_X86_TARGET_S} -c -o ${BASIC_OPS_X86_TARGET_O} DEPENDS ${BASIC_OPS_X86_TARGET_S})
ADD_DEPENDENCIES(regen-basic-ops regen-basic-ops-${opt_target})
SET(opt_target_objects ${opt_target_objects} ${BASIC_OPS_X86_TARGET_O})
SET(COMPILE_CMD ${CMAKE_C_COMPILER} ${CPU_X86_C} -O2 -fno-stack-protector -ftree-vectorize -fomit-frame-pointer -c -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS})
ADD_CUSTOM_COMMAND(OUTPUT ${CPU_X86_TARGET_O} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O} DEPENDS ${CPU_X86_C})
ADD_CUSTOM_TARGET(debug-${opt_target} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O}.s -S -ftree-vectorizer-verbose=2)
SET(cpu_objects ${cpu_objects} ${CPU_X86_TARGET_O})
ENDFOREACH(opt_target ${opt_targets})
SET(lmms_SOURCES ${lmms_SOURCES} ${opt_target_objects})
# to be used by maintainer with special ultra-optimizing super duper GCC
SET(lmms_SOURCES ${lmms_SOURCES} ${cpu_objects})
ENDIF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)
IF(WIN32)
SET(WINRC "${CMAKE_BINARY_DIR}/lmmsrc.obj")
IF(LMMS_HOST_X86_64)

View File

@@ -1,8 +1,8 @@
/*
* basic_ops.h - basic memory operations
* Cpu.h - CPU specific accellerated operations
*
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -22,9 +22,8 @@
*
*/
#ifndef _BASIC_OPS_H
#define _BASIC_OPS_H
#ifndef _CPU_H
#define _CPU_H
#include "lmms_basics.h"
@@ -32,56 +31,64 @@
#include <stdbool.h>
#endif
void initBasicOps( void );
#ifdef __cplusplus
namespace CPU
{
#endif
void * alignedMalloc( int _bytes );
void alignedFree( void * _buf );
void init();
sampleFrameA * alignedAllocFrames( int _frames );
void alignedFreeFrames( sampleFrameA * _buf );
void * memAlloc( int _bytes );
void memFree( void * _buf );
sampleFrameA * allocFrames( int _frames );
void freeFrames( sampleFrameA * _buf );
// all aligned* functions assume data to be 16 byte aligned and size to be
// multiples of 64
typedef void (*alignedMemCpyFunc)( void * RP _dst, const void * RP _src,
// all functions assume data to be 16 byte and size to be
// multiples of 64 (except for unaligned*())
typedef void (*MemCpyFunc)( void * RP _dst, const void * RP _src,
int _size );
typedef void (*alignedMemClearFunc)( void * RP _dst, int _size );
typedef void (*alignedBufApplyGainFunc)( sampleFrameA * RP _dst,
typedef void (*MemClearFunc)( void * RP _dst, int _size );
typedef void (*BufApplyGainFunc)( sampleFrameA * RP _dst,
float _gain, int _frames );
typedef void (*alignedBufMixFunc)( sampleFrameA * RP _dst,
typedef void (*BufMixFunc)( sampleFrameA * RP _dst,
const sampleFrameA * RP _src,
int _frames );
typedef void (*alignedBufMixLRCoeffFunc)( sampleFrameA * RP _dst,
typedef void (*BufMixLRCoeffFunc)( sampleFrameA * RP _dst,
const sampleFrameA * RP _src,
float _left, float _right,
int _frames );
typedef void (*unalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst,
typedef void (*UnalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst,
const sampleFrame * RP _src,
float _left, float _right,
int _frames );
typedef void (*alignedBufWetDryMixFunc)( sampleFrameA * RP _dst,
typedef void (*BufWetDryMixFunc)( sampleFrameA * RP _dst,
const sampleFrameA * RP _src,
float _wet, float _dry, int _frames );
typedef void (*alignedBufWetDryMixSplittedFunc)( sampleFrameA * RP _dst,
typedef void (*BufWetDryMixSplittedFunc)( sampleFrameA * RP _dst,
const float * RP _left,
const float * RP _right,
float _wet, float _dry, int _frames );
typedef int (*alignedConvertToS16Func)( const sampleFrameA * RP _src,
typedef int (*ConvertToS16Func)( const sampleFrameA * RP _src,
intSampleFrameA * RP _dst,
const fpp_t _frames,
const float _master_gain,
const bool _convert_endian );
extern alignedMemCpyFunc alignedMemCpy;
extern alignedMemClearFunc alignedMemClear;
extern alignedBufApplyGainFunc alignedBufApplyGain;
extern alignedBufMixFunc alignedBufMix;
extern alignedBufMixLRCoeffFunc alignedBufMixLRCoeff;
extern unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff;
extern alignedBufWetDryMixFunc alignedBufWetDryMix;
extern alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted;
extern alignedConvertToS16Func alignedConvertToS16;
extern MemCpyFunc memCpy;
extern MemClearFunc memClear;
extern BufApplyGainFunc bufApplyGain;
extern BufMixFunc bufMix;
extern BufMixLRCoeffFunc bufMixLRCoeff;
extern UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff;
extern BufWetDryMixFunc bufWetDryMix;
extern BufWetDryMixSplittedFunc bufWetDryMixSplitted;
extern ConvertToS16Func convertToS16;
#ifdef __cplusplus
}
#endif
#ifdef LMMS_HOST_X86
#define X86_OPTIMIZATIONS

View File

@@ -1,8 +1,8 @@
/*
* audio_dummy.h - dummy-audio-device
*
* Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -22,12 +22,11 @@
*
*/
#ifndef _AUDIO_DUMMY_H
#define _AUDIO_DUMMY_H
#include "audio_device.h"
#include "basic_ops.h"
#include "Cpu.h"
#include "micro_timer.h"
@@ -45,7 +44,7 @@ public:
stopProcessing();
}
inline static QString name( void )
inline static QString name()
{
return( QT_TRANSLATE_NOOP( "setupWidget",
"Dummy (no sound output)" ) );
@@ -64,11 +63,11 @@ public:
{
}
virtual void saveSettings( void )
virtual void saveSettings()
{
}
virtual void show( void )
virtual void show()
{
parentWidget()->hide();
QWidget::show();
@@ -78,12 +77,12 @@ public:
private:
virtual void startProcessing( void )
virtual void startProcessing()
{
start();
}
virtual void stopProcessing( void )
virtual void stopProcessing()
{
if( isRunning() )
{
@@ -92,7 +91,7 @@ private:
}
}
virtual void run( void )
virtual void run()
{
microTimer timer;
while( true )
@@ -104,7 +103,7 @@ private:
{
break;
}
alignedFreeFrames( b );
CPU::freeFrames( b );
const Sint32 microseconds = static_cast<Sint32>(
getMixer()->framesPerPeriod() *

View File

@@ -3,7 +3,7 @@
*
* Copyright (c) 2006-2008 Danny McRae <khjklujn/at/users.sourceforge.net>
* Copyright (c) 2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
*
*/
#include <QtGui/QMessageBox>
#include "ladspa_effect.h"
@@ -35,7 +34,7 @@
#include "ladspa_subplugin_features.h"
#include "mixer.h"
#include "effect_chain.h"
#include "basic_ops.h"
#include "Cpu.h"
#include "automation_pattern.h"
#include "controller_connection.h"
@@ -82,7 +81,7 @@ ladspaEffect::ladspaEffect( model * _parent,
arg( m_key.second ),
QMessageBox::Ok, QMessageBox::NoButton );
}
setOkay( FALSE );
setOkay( false );
return;
}
@@ -105,7 +104,7 @@ ladspaEffect::~ladspaEffect()
void ladspaEffect::changeSampleRate( void )
void ladspaEffect::changeSampleRate()
{
multimediaProject mmp( multimediaProject::EffectSettings );
m_controls->saveState( mmp, mmp.content() );
@@ -141,7 +140,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
if( !isOkay() || dontRun() || !isRunning() || !isEnabled() )
{
m_pluginMutex.unlock();
return( FALSE );
return false;
}
int frames = _frames;
@@ -150,7 +149,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
if( m_maxSampleRate < engine::getMixer()->processingSampleRate() )
{
o_buf = _buf;
_buf = alignedAllocFrames( _frames );
_buf = CPU::allocFrames( _frames );
sampleDown( o_buf, _buf, m_maxSampleRate );
frames = _frames * m_maxSampleRate /
engine::getMixer()->processingSampleRate();
@@ -258,7 +257,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
}
if( channel >= 1 && channel <= DEFAULT_CHANNELS )
{
alignedBufWetDryMixSplitted( _buf, buffers[0], buffers[1],
CPU::bufWetDryMixSplitted( _buf, buffers[0], buffers[1],
getWetLevel(), getDryLevel(), frames );
}
@@ -272,7 +271,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
if( o_buf != NULL )
{
sampleBack( _buf, o_buf, m_maxSampleRate );
alignedFreeFrames( _buf );
CPU::freeFrames( _buf );
}
checkGate( out_sum / frames );
@@ -280,7 +279,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
bool is_running = isRunning();
m_pluginMutex.unlock();
return( is_running );
return is_running;
}
@@ -298,7 +297,7 @@ void ladspaEffect::setControl( int _control, LADSPA_Data _value )
void ladspaEffect::pluginInstantiation( void )
void ladspaEffect::pluginInstantiation()
{
m_maxSampleRate = maxSamplerate( displayName() );
@@ -469,7 +468,7 @@ void ladspaEffect::pluginInstantiation( void )
QMessageBox::warning( 0, "Effect",
"Can't get LADSPA descriptor function: " + m_key.second,
QMessageBox::Ok, QMessageBox::NoButton );
setOkay( FALSE );
setOkay( false );
return;
}
if( m_descriptor->run == NULL )
@@ -477,7 +476,7 @@ void ladspaEffect::pluginInstantiation( void )
QMessageBox::warning( 0, "Effect",
"Plugin has no processor: " + m_key.second,
QMessageBox::Ok, QMessageBox::NoButton );
setDontRun( TRUE );
setDontRun( true );
}
for( ch_cnt_t proc = 0; proc < getProcessorCount(); proc++ )
{
@@ -488,7 +487,7 @@ void ladspaEffect::pluginInstantiation( void )
QMessageBox::warning( 0, "Effect",
"Can't get LADSPA instance: " + m_key.second,
QMessageBox::Ok, QMessageBox::NoButton );
setOkay( FALSE );
setOkay( false );
return;
}
m_handles.append( effect );
@@ -508,7 +507,7 @@ void ladspaEffect::pluginInstantiation( void )
QMessageBox::warning( 0, "Effect",
"Failed to connect port: " + m_key.second,
QMessageBox::Ok, QMessageBox::NoButton );
setDontRun( TRUE );
setDontRun( true );
return;
}
}
@@ -525,7 +524,7 @@ void ladspaEffect::pluginInstantiation( void )
void ladspaEffect::pluginDestruction( void )
void ladspaEffect::pluginDestruction()
{
if( !isOkay() )
{
@@ -571,9 +570,9 @@ sample_rate_t ladspaEffect::maxSamplerate( const QString & _name )
}
if( __buggy_plugins.contains( _name ) )
{
return( __buggy_plugins[_name] );
return __buggy_plugins[_name];
}
return( engine::getMixer()->processingSampleRate() );
return engine::getMixer()->processingSampleRate();
}
@@ -585,9 +584,9 @@ extern "C"
// neccessary for getting instance out of shared lib
plugin * PLUGIN_EXPORT lmms_plugin_main( model * _parent, void * _data )
{
return( new ladspaEffect( _parent,
return new ladspaEffect( _parent,
static_cast<const plugin::descriptor::subPluginFeatures::key *>(
_data ) ) );
_data ) );
}
}

View File

@@ -1,8 +1,8 @@
/*
* basic_ops.cpp - basic memory operations
* Cpu.cpp - CPU specific accellerated operations
*
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -23,17 +23,19 @@
*/
#include "basic_ops.h"
#include "Cpu.h"
#include <cstdlib>
#include <cstdio>
#include <memory.h>
void * alignedMalloc( int _bytes )
namespace CPU
{
char *ptr,*ptr2,*aligned_ptr;
void * memAlloc( int _bytes )
{
char *ptr,*ptr2,*_ptr;
int align_mask = ALIGN_SIZE- 1;
ptr =(char *) malloc( _bytes + ALIGN_SIZE + sizeof(int) );
if( ptr == NULL )
@@ -42,17 +44,19 @@ void * alignedMalloc( int _bytes )
}
ptr2 = ptr + sizeof(int);
aligned_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );
_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );
ptr2 = aligned_ptr - sizeof(int);
*((int *) ptr2) = (int)( aligned_ptr - ptr );
ptr2 = _ptr - sizeof(int);
*((int *) ptr2) = (int)( _ptr - ptr );
return aligned_ptr;
return _ptr;
}
void alignedFree( void * _buf )
void memFree( void * _buf )
{
if( _buf )
{
@@ -66,22 +70,26 @@ void alignedFree( void * _buf )
}
sampleFrameA * alignedAllocFrames( int _n )
sampleFrameA * allocFrames( int _n )
{
return (sampleFrameA *) alignedMalloc( _n * sizeof( sampleFrameA ) );
return (sampleFrameA *) memAlloc( _n * sizeof( sampleFrameA ) );
}
void alignedFreeFrames( sampleFrame * _buf )
void freeFrames( sampleFrame * _buf )
{
alignedFree( _buf );
memFree( _buf );
}
// slow fallback
void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
void memCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
{
const int s = _size / sizeof( int );
const int * RP src = (const int *) _src;
@@ -110,7 +118,7 @@ void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
// slow fallback
void alignedMemClearNoOpt( void * _dst, int _size )
void memClearNoOpt( void * _dst, int _size )
{
const int s = _size / ( sizeof( int ) * 4 );
int * dst = (int *) _dst;
@@ -126,7 +134,7 @@ void alignedMemClearNoOpt( void * _dst, int _size )
void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
void bufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
int _frames )
{
for( int i = 0; i < _frames; )
@@ -152,7 +160,7 @@ void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
}
void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
void bufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
int _frames )
{
for( int i = 0; i < _frames; )
@@ -171,7 +179,7 @@ void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
void alignedBufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
void bufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
const sampleFrameA * RP _src,
float _left, float _right, int _frames )
{
@@ -217,7 +225,7 @@ void unalignedBufMixLRCoeffNoOpt( sampleFrame * RP _dst,
void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst,
void bufWetDryMixNoOpt( sampleFrameA * RP _dst,
const sampleFrameA * RP _src,
float _wet, float _dry, int _frames )
{
@@ -231,7 +239,7 @@ void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst,
void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
void bufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
const float * RP _left,
const float * RP _right,
float _wet, float _dry, int _frames )
@@ -248,7 +256,7 @@ void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
int alignedConvertToS16NoOpt( const sampleFrameA * RP _src,
int convertToS16NoOpt( const sampleFrameA * RP _src,
intSampleFrameA * RP _dst,
const fpp_t _frames,
const float _master_gain,
@@ -294,15 +302,15 @@ int alignedConvertToS16NoOpt( const sampleFrameA * RP _src,
}
alignedMemCpyFunc alignedMemCpy = alignedMemCpyNoOpt;
alignedMemClearFunc alignedMemClear = alignedMemClearNoOpt;
alignedBufApplyGainFunc alignedBufApplyGain = alignedBufApplyGainNoOpt;
alignedBufMixFunc alignedBufMix = alignedBufMixNoOpt;
alignedBufMixLRCoeffFunc alignedBufMixLRCoeff = alignedBufMixLRCoeffNoOpt;
unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
alignedBufWetDryMixFunc alignedBufWetDryMix = alignedBufWetDryMixNoOpt;
alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted = alignedBufWetDryMixSplittedNoOpt;
alignedConvertToS16Func alignedConvertToS16 = alignedConvertToS16NoOpt;
MemCpyFunc memCpy = memCpyNoOpt;
MemClearFunc memClear = memClearNoOpt;
BufApplyGainFunc bufApplyGain = bufApplyGainNoOpt;
BufMixFunc bufMix = bufMixNoOpt;
BufMixLRCoeffFunc bufMixLRCoeff = bufMixLRCoeffNoOpt;
UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
BufWetDryMixFunc bufWetDryMix = bufWetDryMixNoOpt;
BufWetDryMixSplittedFunc bufWetDryMixSplitted = bufWetDryMixSplittedNoOpt;
ConvertToS16Func convertToS16 = convertToS16NoOpt;
#ifdef X86_OPTIMIZATIONS
@@ -322,28 +330,28 @@ enum CPUFeatures
extern "C"
{
#ifdef LMMS_HOST_X86
void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size );
void alignedMemClearMMX( void * RP _dst, int _size );
void memCpyMMX( void * RP _dst, const void * RP _src, int _size );
void memClearMMX( void * RP _dst, int _size );
#endif
void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size );
void alignedMemClearSSE( void * RP _dst, int _size );
void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
void memCpySSE( void * RP _dst, const void * RP _src, int _size );
void memClearSSE( void * RP _dst, int _size );
void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
void bufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _src, const float _left, const float _right, int _frames );
void alignedBufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
void bufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
#ifdef X86_OPTIMIZATIONS
void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size );
void alignedMemClearSSE2( void * RP _dst, int _size );
int alignedConvertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
void memCpySSE2( void * RP _dst, const void * RP _src, int _size );
void memClearSSE2( void * RP _dst, int _size );
int convertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
#endif
} ;
#endif
void initBasicOps( void )
void init()
{
#ifdef X86_OPTIMIZATIONS
static bool extensions_checked = false;
@@ -428,29 +436,29 @@ void initBasicOps( void )
#ifdef LMMS_HOST_X86
if( features & MMX )
{
alignedMemCpy = alignedMemCpyMMX;
alignedMemClear = alignedMemClearMMX;
memCpy = memCpyMMX;
memClear = memClearMMX;
}
#endif
if( features & SSE )
{
fprintf( stderr, "Using SSE optimized routines\n" );
alignedMemCpy = alignedMemCpySSE;
alignedMemClear = alignedMemClearSSE;
alignedBufApplyGain = alignedBufApplyGainSSE;
alignedBufMix = alignedBufMixSSE;
alignedBufMixLRCoeff = alignedBufMixLRCoeffSSE;
memCpy = memCpySSE;
memClear = memClearSSE;
bufApplyGain = bufApplyGainSSE;
bufMix = bufMixSSE;
bufMixLRCoeff = bufMixLRCoeffSSE;
unalignedBufMixLRCoeff = unalignedBufMixLRCoeffSSE;
alignedBufWetDryMix = alignedBufWetDryMixSSE;
alignedBufWetDryMixSplitted =
alignedBufWetDryMixSplittedSSE;
bufWetDryMix = bufWetDryMixSSE;
bufWetDryMixSplitted =
bufWetDryMixSplittedSSE;
}
if( features & SSE2 )
{
fprintf( stderr, "Using SSE2 optimized routines\n" );
alignedMemCpy = alignedMemCpySSE2;
alignedMemClear = alignedMemClearSSE2;
alignedConvertToS16 = alignedConvertToS16SSE2;
memCpy = memCpySSE2;
memClear = memClearSSE2;
convertToS16 = convertToS16SSE2;
}
extensions_checked = true;
}
@@ -458,4 +466,5 @@ void initBasicOps( void )
}
}

View File

@@ -1,8 +1,8 @@
/*
* basic_ops_x86.c - x86 specific optimized operations
* cpu_x86.c - x86 specific optimized operations
*
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -22,9 +22,7 @@
*
*/
#include "basic_ops.h"
#include "Cpu.h"
#ifdef X86_OPTIMIZATIONS
@@ -32,7 +30,7 @@
#include <mmintrin.h>
void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size )
void memCpyMMX( void * RP _dst, const void * RP _src, int _size )
{
const int s = _size / ( sizeof( __m64 ) * 8 );
int i;
@@ -79,7 +77,7 @@ void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size )
void alignedMemClearMMX( void * RP _dst, int _size )
void memClearMMX( void * RP _dst, int _size )
{
__m64 * dst = (__m64 *) _dst;
const int s = _size / ( sizeof( *dst ) * 8 );
@@ -109,7 +107,7 @@ void alignedMemClearMMX( void * RP _dst, int _size )
#include <xmmintrin.h>
void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size )
void memCpySSE( void * RP _dst, const void * RP _src, int _size )
{
__m128 * dst = (__m128 *) _dst;
__m128 * src = (__m128 *) _src;
@@ -133,7 +131,7 @@ void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size )
void alignedMemClearSSE( void * RP _dst, int _size )
void memClearSSE( void * RP _dst, int _size )
{
__m128 * dst = (__m128 *) _dst;
const int s = _size / ( sizeof( *dst ) * 4 );
@@ -152,7 +150,7 @@ void alignedMemClearSSE( void * RP _dst, int _size )
void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
{
int i;
for( i = 0; i < _frames; )
@@ -180,7 +178,7 @@ void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
int _frames )
{
int i;
@@ -209,7 +207,7 @@ void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst,
void bufMixLRCoeffSSE( sampleFrameA * RP _dst,
const sampleFrameA * RP _src,
float _left, float _right, int _frames )
{
@@ -257,7 +255,7 @@ void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _s
void alignedBufWetDryMixSSE( sampleFrameA * RP _dst,
void bufWetDryMixSSE( sampleFrameA * RP _dst,
const sampleFrameA * RP _src,
float _wet, float _dry, int _frames )
{
@@ -279,7 +277,7 @@ void alignedBufWetDryMixSSE( sampleFrameA * RP _dst,
void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
const float * RP _left,
const float * RP _right,
float _wet, float _dry, int _frames )
@@ -304,7 +302,7 @@ void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
#include <emmintrin.h>
void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size )
void memCpySSE2( void * RP _dst, const void * RP _src, int _size )
{
__m128i * dst = (__m128i *) _dst;
__m128i * src = (__m128i *) _src;
@@ -324,7 +322,7 @@ void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size )
void alignedMemClearSSE2( void * RP _dst, int _size )
void memClearSSE2( void * RP _dst, int _size )
{
__m128i * dst = (__m128i *) _dst;
const int s = _size / ( sizeof( *dst ) * 4 );
@@ -342,7 +340,7 @@ void alignedMemClearSSE2( void * RP _dst, int _size )
int alignedConvertToS16SSE2( const sampleFrameA * RP _src,
int convertToS16SSE2( const sampleFrameA * RP _src,
intSampleFrameA * RP _dst,
const fpp_t _frames,
const float _master_gain,

View File

@@ -1,10 +1,8 @@
#ifndef SINGLE_SOURCE_COMPILE
/*
* audio_alsa.cpp - device-class which implements ALSA-PCM-output
*
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -25,7 +23,6 @@
*/
#include <QtGui/QLineEdit>
#include <QtGui/QLabel>
@@ -39,7 +36,7 @@
#include "lcd_spinbox.h"
#include "gui_templates.h"
#include "templates.h"
#include "basic_ops.h"
#include "Cpu.h"
@@ -230,10 +227,10 @@ void audioALSA::applyQualitySettings( void )
void audioALSA::run( void )
{
sampleFrameA * temp = alignedAllocFrames(
sampleFrameA * temp = CPU::allocFrames(
getMixer()->framesPerPeriod() );
intSampleFrameA * outbuf = (intSampleFrameA *)
alignedMalloc( sizeof( intSampleFrameA ) * channels() /
CPU::memAlloc( sizeof( intSampleFrameA ) * channels() /
DEFAULT_CHANNELS * getMixer()->framesPerPeriod() );
int_sample_t * pcmbuf = new int_sample_t[m_periodSize * channels()];
@@ -261,7 +258,7 @@ void audioALSA::run( void )
}
outbuf_size = frames * channels();
alignedConvertToS16( temp, outbuf, frames,
CPU::convertToS16( temp, outbuf, frames,
getMixer()->masterGain(),
m_convertEndian );
}
@@ -300,8 +297,8 @@ void audioALSA::run( void )
}
}
alignedFreeFrames( temp );
alignedFree( outbuf );
CPU::freeFrames( temp );
CPU::memFree( outbuf );
delete[] pcmbuf;
}
@@ -526,5 +523,3 @@ void audioALSA::setupWidget::saveSettings( void )
#endif
#endif

View File

@@ -1,10 +1,8 @@
#ifndef SINGLE_SOURCE_COMPILE
/*
* audio_device.cpp - base-class for audio-devices used by LMMS-mixer
*
* Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -25,13 +23,10 @@
*/
#include <cstring>
#include "audio_device.h"
#include "config_mgr.h"
#include "debug.h"
#include "basic_ops.h"
#include "Cpu.h"
@@ -40,7 +35,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
m_sampleRate( _mixer->processingSampleRate() ),
m_channels( _channels ),
m_mixer( _mixer ),
m_buffer( alignedAllocFrames( getMixer()->framesPerPeriod() ) )
m_buffer( CPU::allocFrames( getMixer()->framesPerPeriod() ) )
{
int error;
if( ( m_srcState = src_new(
@@ -57,7 +52,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
audioDevice::~audioDevice()
{
src_delete( m_srcState );
alignedFreeFrames( m_buffer );
CPU::freeFrames( m_buffer );
m_devMutex.tryLock();
unlock();
@@ -104,7 +99,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab )
}
else
{
alignedMemCpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
CPU::memCpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
}
// release lock
@@ -112,7 +107,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab )
if( getMixer()->hasFifoWriter() )
{
alignedFreeFrames( b );
CPU::freeFrames( b );
}
return frames;
@@ -200,7 +195,7 @@ void audioDevice::resample( const sampleFrame * _src, const fpp_t _frames,
void audioDevice::clearS16Buffer( intSampleFrameA * _outbuf, const fpp_t _frames )
{
alignedMemClear( _outbuf, _frames * sizeof( *_outbuf ) );
CPU::memClear( _outbuf, _frames * sizeof( *_outbuf ) );
// memset( _outbuf, 0, _frames * channels() * BYTES_PER_INT_SAMPLE );
}
@@ -213,5 +208,3 @@ bool audioDevice::hqAudio( void ) const
}
#endif

View File

@@ -1,11 +1,9 @@
#ifndef SINGLE_SOURCE_COMPILE
/*
* audio_file_wave.cpp - audio-device which encodes wave-stream and writes it
* into a WAVE-file. This is used for song-export.
*
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -26,10 +24,9 @@
*/
#include "audio_file_wave.h"
#include "endian_handling.h"
#include "basic_ops.h"
#include "Cpu.h"
#include <cstring>
@@ -89,14 +86,14 @@ void audioFileWave::writeBuffer( const surroundSampleFrame * _ab,
{
if( depth() == 16 )
{
intSampleFrameA * buf = (intSampleFrameA *) alignedMalloc(
intSampleFrameA * buf = (intSampleFrameA *) CPU::memAlloc(
sizeof( intSampleFrameA ) * _frames );
alignedConvertToS16( _ab, buf, _frames, _master_gain,
CPU::convertToS16( _ab, buf, _frames, _master_gain,
!isLittleEndian() );
sf_writef_short( m_sf, (int_sample_t *) buf, _frames );
alignedFree( buf );
CPU::memFree( buf );
}
else
{
@@ -123,4 +120,3 @@ void audioFileWave::finishEncoding( void )
}
#endif

View File

@@ -44,7 +44,7 @@
#include "lcd_spinbox.h"
#include "audio_port.h"
#include "main_window.h"
#include "basic_ops.h"
#include "Cpu.h"
@@ -57,7 +57,7 @@ audioJACK::audioJACK( bool & _success_ful, mixer * _mixer ) :
m_client( NULL ),
m_active( false ),
m_stopSemaphore( 1 ),
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
m_framesDoneInCurBuf( 0 ),
m_framesToDoInCurBuf( 0 )
{
@@ -93,7 +93,7 @@ audioJACK::~audioJACK()
jack_client_close( m_client );
}
alignedFreeFrames( m_outBuf );
CPU::freeFrames( m_outBuf );
}

View File

@@ -1,10 +1,8 @@
#ifndef SINGLE_SOURCE_COMPILE
/*
* audio_oss.cpp - device-class that implements OSS-PCM-output
*
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -39,7 +37,7 @@
#include "engine.h"
#include "gui_templates.h"
#include "templates.h"
#include "basic_ops.h"
#include "Cpu.h"
#ifdef LMMS_HAVE_UNISTD_H
#include <unistd.h>
@@ -299,10 +297,10 @@ void audioOSS::applyQualitySettings( void )
void audioOSS::run( void )
{
sampleFrameA * temp = alignedAllocFrames(
sampleFrameA * temp = CPU::allocFrames(
getMixer()->framesPerPeriod() );
intSampleFrameA * outbuf = (intSampleFrameA *)
alignedMalloc( sizeof( intSampleFrameA ) *
CPU::memAlloc( sizeof( intSampleFrameA ) *
getMixer()->framesPerPeriod() );
while( 1 )
@@ -313,7 +311,7 @@ void audioOSS::run( void )
break;
}
int bytes = alignedConvertToS16( temp, outbuf, frames,
int bytes = CPU::convertToS16( temp, outbuf, frames,
getMixer()->masterGain(),
m_convertEndian );
if( write( m_audioFD, outbuf, bytes ) != bytes )
@@ -322,8 +320,8 @@ void audioOSS::run( void )
}
}
alignedFreeFrames( temp );
alignedFree( outbuf );
CPU::freeFrames( temp );
CPU::memFree( outbuf );
}
@@ -374,5 +372,3 @@ void audioOSS::setupWidget::saveSettings( void )
#endif
#endif

View File

@@ -26,14 +26,14 @@
#include "audio_device.h"
#include "effect_chain.h"
#include "engine.h"
#include "basic_ops.h"
#include "Cpu.h"
audioPort::audioPort( const QString & _name, bool _has_effect_chain ) :
m_bufferUsage( NoUsage ),
m_firstBuffer( alignedAllocFrames(
m_firstBuffer( CPU::allocFrames(
engine::getMixer()->framesPerPeriod() ) ),
m_secondBuffer( alignedAllocFrames(
m_secondBuffer( CPU::allocFrames(
engine::getMixer()->framesPerPeriod() ) ),
m_extOutputEnabled( false ),
m_nextFxChannel( 0 ),
@@ -55,8 +55,8 @@ audioPort::~audioPort()
{
setExtOutputEnabled( false );
engine::getMixer()->removeAudioPort( this );
alignedFreeFrames( m_firstBuffer );
alignedFreeFrames( m_secondBuffer );
CPU::freeFrames( m_firstBuffer );
CPU::freeFrames( m_secondBuffer );
delete m_effects;
}

View File

@@ -60,7 +60,7 @@ audioPortAudio::audioPortAudio( bool & _success_ful, mixer * _mixer ) :
DEFAULT_CHANNELS, SURROUND_CHANNELS ),
_mixer ),
m_wasPAInitError( false ),
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
m_outBufPos( 0 ),
m_stopSemaphore( 1 )
{
@@ -206,7 +206,7 @@ audioPortAudio::~audioPortAudio()
{
Pa_Terminate();
}
alignedFreeFrames( m_outBuf );
CPU::freeFrames( m_outBuf );
}

View File

@@ -1,10 +1,8 @@
#ifndef SINGLE_SOURCE_COMPILE
/*
* audio_pulseaudio.cpp - device-class which implements PulseAudio-output
*
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -25,7 +23,6 @@
*/
#include <QtGui/QLineEdit>
#include <QtGui/QLabel>
@@ -40,7 +37,7 @@
#include "lcd_spinbox.h"
#include "gui_templates.h"
#include "templates.h"
#include "basic_ops.h"
#include "Cpu.h"
static void stream_write_callback(pa_stream *s, size_t length, void *userdata)
@@ -231,7 +228,7 @@ void audioPulseAudio::run( void )
void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
{
const fpp_t fpp = getMixer()->framesPerPeriod();
sampleFrameA * temp = alignedAllocFrames( fpp );
sampleFrameA * temp = CPU::allocFrames( fpp );
Sint16 * pcmbuf = (Sint16*)pa_xmalloc( fpp * channels() *
sizeof(Sint16) );
@@ -243,7 +240,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
{
return;
}
int bytes = alignedConvertToS16( temp,
int bytes = CPU::convertToS16( temp,
(intSampleFrameA *) pcmbuf,
frames,
getMixer()->masterGain(),
@@ -257,7 +254,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
}
pa_xfree( pcmbuf );
alignedFreeFrames( temp );
CPU::freeFrames( temp );
}
@@ -308,5 +305,3 @@ void audioPulseAudio::setupWidget::saveSettings( void )
#endif
#endif

View File

@@ -1,10 +1,8 @@
#ifndef SINGLE_SOURCE_COMPILE
/*
* audio_sdl.cpp - device-class that performs PCM-output via SDL
*
* Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -25,7 +23,6 @@
*/
#include "audio_sdl.h"
#ifdef LMMS_HAVE_SDL
@@ -38,13 +35,13 @@
#include "config_mgr.h"
#include "gui_templates.h"
#include "templates.h"
#include "basic_ops.h"
#include "Cpu.h"
audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) :
audioDevice( DEFAULT_CHANNELS, _mixer ),
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
m_convertedBufPos( 0 ),
m_convertEndian( false ),
m_stopSemaphore( 1 )
@@ -53,7 +50,7 @@ audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) :
m_convertedBufSize = getMixer()->framesPerPeriod() *
sizeof( intSampleFrameA );
m_convertedBuf = (intSampleFrameA *) alignedMalloc( m_convertedBufSize );
m_convertedBuf = (intSampleFrameA *) CPU::memAlloc( m_convertedBufSize );
if( SDL_Init( SDL_INIT_AUDIO | SDL_INIT_NOPARACHUTE ) < 0 )
@@ -97,8 +94,8 @@ audioSDL::~audioSDL()
SDL_CloseAudio();
SDL_Quit();
alignedFree( m_convertedBuf );
alignedFreeFrames( m_outBuf );
CPU::memFree( m_convertedBuf );
CPU::freeFrames( m_outBuf );
}
@@ -192,7 +189,7 @@ void audioSDL::sdlAudioCallback( Uint8 * _buf, int _len )
}
m_convertedBufSize = frames * sizeof( intSampleFrameA );
alignedConvertToS16( m_outBuf,
CPU::convertToS16( m_outBuf,
m_convertedBuf,
frames,
getMixer()->masterGain(),
@@ -243,4 +240,3 @@ void audioSDL::setupWidget::saveSettings( void )
#endif
#endif

View File

@@ -1,555 +0,0 @@
.file "basic_ops_x86.c"
.text
.align 16
.globl alignedMemCpySSE
.type alignedMemCpySSE, @function
alignedMemCpySSE:
.LFB509:
movslq %edx,%rdx
shrq $6, %rdx
testl %edx, %edx
jle .L4
subl $1, %edx
xorl %eax, %eax
addq $1, %rdx
salq $6, %rdx
.align 16
.L3:
movaps (%rsi,%rax), %xmm0
movaps %xmm0, (%rdi,%rax)
movaps 16(%rsi,%rax), %xmm0
movaps %xmm0, 16(%rdi,%rax)
movaps 32(%rsi,%rax), %xmm0
movaps %xmm0, 32(%rdi,%rax)
movaps 48(%rsi,%rax), %xmm0
movaps %xmm0, 48(%rdi,%rax)
addq $64, %rax
cmpq %rdx, %rax
jne .L3
.L4:
rep
ret
.LFE509:
.size alignedMemCpySSE, .-alignedMemCpySSE
.align 16
.globl alignedMemClearSSE
.type alignedMemClearSSE, @function
alignedMemClearSSE:
.LFB510:
movslq %esi,%rsi
shrq $6, %rsi
testl %esi, %esi
jle .L10
subl $1, %esi
xorps %xmm0, %xmm0
salq $6, %rsi
leaq 64(%rdi,%rsi), %rax
.align 16
.L9:
movaps %xmm0, (%rdi)
movaps %xmm0, 16(%rdi)
movaps %xmm0, 32(%rdi)
movaps %xmm0, 48(%rdi)
addq $64, %rdi
cmpq %rax, %rdi
jne .L9
.L10:
rep
ret
.LFE510:
.size alignedMemClearSSE, .-alignedMemClearSSE
.align 16
.globl alignedBufApplyGainSSE
.type alignedBufApplyGainSSE, @function
alignedBufApplyGainSSE:
.LFB511:
testl %esi, %esi
jle .L15
leal -1(%rsi), %edx
shufps $0, %xmm0, %xmm0
xorl %eax, %eax
shrl $3, %edx
addl $1, %edx
.align 16
.L14:
movaps 16(%rdi), %xmm3
addl $1, %eax
movaps 32(%rdi), %xmm2
mulps %xmm0, %xmm3
movaps 48(%rdi), %xmm1
mulps %xmm0, %xmm2
movaps (%rdi), %xmm4
mulps %xmm0, %xmm1
movaps %xmm3, 16(%rdi)
mulps %xmm0, %xmm4
movaps %xmm2, 32(%rdi)
movaps %xmm1, 48(%rdi)
movaps %xmm4, (%rdi)
addq $64, %rdi
cmpl %eax, %edx
ja .L14
.L15:
rep
ret
.LFE511:
.size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
.align 16
.globl alignedBufMixSSE
.type alignedBufMixSSE, @function
alignedBufMixSSE:
.LFB512:
testl %edx, %edx
jle .L20
leal -1(%rdx), %ecx
xorl %eax, %eax
xorl %edx, %edx
shrl $3, %ecx
addl $1, %ecx
.align 16
.L19:
movaps 16(%rdi,%rax), %xmm2
addl $1, %edx
movaps 32(%rdi,%rax), %xmm1
addps 16(%rsi,%rax), %xmm2
movaps 48(%rdi,%rax), %xmm0
addps 32(%rsi,%rax), %xmm1
movaps (%rdi,%rax), %xmm3
addps 48(%rsi,%rax), %xmm0
addps (%rsi,%rax), %xmm3
movaps %xmm2, 16(%rdi,%rax)
movaps %xmm1, 32(%rdi,%rax)
movaps %xmm0, 48(%rdi,%rax)
movaps %xmm3, (%rdi,%rax)
addq $64, %rax
cmpl %edx, %ecx
ja .L19
.L20:
rep
ret
.LFE512:
.size alignedBufMixSSE, .-alignedBufMixSSE
.align 16
.globl alignedBufMixLRCoeffSSE
.type alignedBufMixLRCoeffSSE, @function
alignedBufMixLRCoeffSSE:
.LFB513:
testl %edx, %edx
jle .L25
unpcklps %xmm1, %xmm0
leal -1(%rdx), %ecx
xorl %eax, %eax
xorl %edx, %edx
shrl $2, %ecx
movlhps %xmm0, %xmm0
addl $1, %ecx
.align 16
.L24:
movaps 16(%rsi,%rax), %xmm2
addl $1, %edx
movaps (%rsi,%rax), %xmm3
mulps %xmm0, %xmm2
mulps %xmm0, %xmm3
addps 16(%rdi,%rax), %xmm2
addps (%rdi,%rax), %xmm3
movaps %xmm2, 16(%rdi,%rax)
movaps %xmm3, (%rdi,%rax)
addq $32, %rax
cmpl %edx, %ecx
ja .L24
.L25:
rep
ret
.LFE513:
.size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
.align 16
.globl alignedBufWetDryMixSSE
.type alignedBufWetDryMixSSE, @function
alignedBufWetDryMixSSE:
.LFB515:
testl %edx, %edx
jle .L30
leal -1(%rdx), %ecx
shufps $0, %xmm1, %xmm1
shufps $0, %xmm0, %xmm0
xorl %eax, %eax
shrl $2, %ecx
xorl %edx, %edx
addl $1, %ecx
.align 16
.L29:
movaps 16(%rsi,%rax), %xmm3
addl $1, %edx
movaps 16(%rdi,%rax), %xmm2
mulps %xmm0, %xmm3
movaps (%rsi,%rax), %xmm4
mulps %xmm1, %xmm2
mulps %xmm0, %xmm4
addps %xmm3, %xmm2
movaps (%rdi,%rax), %xmm3
mulps %xmm1, %xmm3
movaps %xmm2, 16(%rdi,%rax)
addps %xmm4, %xmm3
movaps %xmm3, (%rdi,%rax)
addq $32, %rax
cmpl %edx, %ecx
ja .L29
.L30:
rep
ret
.LFE515:
.size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
.align 16
.globl alignedBufWetDryMixSplittedSSE
.type alignedBufWetDryMixSplittedSSE, @function
alignedBufWetDryMixSplittedSSE:
.LFB516:
pushq %rbp
.LCFI0:
testl %ecx, %ecx
pushq %rbx
.LCFI1:
jle .L39
leal -1(%rcx), %ebx
shrl %ebx
addl $1, %ebx
movl %ebx, %r11d
shrl $2, %r11d
cmpl $3, %ebx
leal 0(,%r11,4), %ebp
jbe .L40
testl %ebp, %ebp
jne .L34
.L40:
xorl %r9d, %r9d
jmp .L36
.align 16
.L34:
movaps %xmm1, %xmm2
movq %rdi, %rax
xorps %xmm10, %xmm10
movq %rsi, %r9
shufps $0, %xmm2, %xmm2
movq %rdx, %r8
xorl %r10d, %r10d
movaps %xmm2, %xmm12
movaps %xmm0, %xmm2
shufps $0, %xmm2, %xmm2
movaps %xmm2, %xmm11
.align 16
.L37:
movaps (%rax), %xmm2
addl $1, %r10d
movaps %xmm10, %xmm9
movaps 16(%rax), %xmm5
movaps %xmm2, %xmm4
movlps (%r9), %xmm9
movaps %xmm10, %xmm8
movaps 32(%rax), %xmm14
shufps $136, %xmm5, %xmm4
movhps 8(%r9), %xmm9
movaps 48(%rax), %xmm3
movaps %xmm14, %xmm15
movlps 16(%r9), %xmm8
shufps $221, %xmm5, %xmm2
shufps $136, %xmm3, %xmm15
movhps 24(%r9), %xmm8
shufps $221, %xmm3, %xmm14
movaps %xmm4, %xmm5
addq $32, %r9
movaps %xmm9, %xmm3
shufps $136, %xmm15, %xmm5
movaps %xmm10, %xmm7
shufps $136, %xmm8, %xmm3
movlps (%r8), %xmm7
movaps %xmm10, %xmm6
mulps %xmm12, %xmm5
movhps 8(%r8), %xmm7
mulps %xmm11, %xmm3
movlps 16(%r8), %xmm6
movaps %xmm7, %xmm13
movhps 24(%r8), %xmm6
shufps $221, %xmm15, %xmm4
shufps $221, %xmm8, %xmm9
addq $32, %r8
shufps $136, %xmm6, %xmm13
addps %xmm3, %xmm5
movaps %xmm2, %xmm3
shufps $221, %xmm6, %xmm7
shufps $136, %xmm14, %xmm3
shufps $221, %xmm14, %xmm2
mulps %xmm11, %xmm13
movaps %xmm5, %xmm6
mulps %xmm12, %xmm3
mulps %xmm12, %xmm4
mulps %xmm11, %xmm9
addps %xmm13, %xmm3
mulps %xmm12, %xmm2
mulps %xmm11, %xmm7
addps %xmm9, %xmm4
addps %xmm7, %xmm2
unpcklps %xmm4, %xmm6
unpckhps %xmm4, %xmm5
movaps %xmm3, %xmm4
unpcklps %xmm2, %xmm4
unpckhps %xmm2, %xmm3
movaps %xmm6, %xmm2
unpcklps %xmm4, %xmm2
unpckhps %xmm4, %xmm6
movaps %xmm2, (%rax)
movaps %xmm5, %xmm2
unpckhps %xmm3, %xmm5
unpcklps %xmm3, %xmm2
movaps %xmm6, 16(%rax)
movaps %xmm2, 32(%rax)
movaps %xmm5, 48(%rax)
addq $64, %rax
cmpl %r10d, %r11d
ja .L37
cmpl %ebx, %ebp
leal (%rbp,%rbp), %r9d
je .L39
.L36:
movslq %r9d,%rax
leaq 1(%rax), %rbx
leaq 0(,%rax,4), %r10
leaq (%rdi,%rax,8), %r8
leaq (%rdi,%rbx,8), %rax
salq $2, %rbx
leaq (%rsi,%r10), %r11
leaq (%rdx,%r10), %r10
addq %rbx, %rsi
addq %rbx, %rdx
.align 16
.L38:
movss (%r11), %xmm3
addl $2, %r9d
movss (%r8), %xmm2
mulss %xmm0, %xmm3
mulss %xmm1, %xmm2
addq $8, %r11
addss %xmm3, %xmm2
movss %xmm2, (%r8)
movss 4(%r8), %xmm2
movss (%r10), %xmm3
mulss %xmm1, %xmm2
addq $8, %r10
mulss %xmm0, %xmm3
addss %xmm3, %xmm2
movss %xmm2, 4(%r8)
addq $16, %r8
movss (%rsi), %xmm3
addq $8, %rsi
movss (%rax), %xmm2
mulss %xmm0, %xmm3
mulss %xmm1, %xmm2
addss %xmm3, %xmm2
movss %xmm2, (%rax)
movss 4(%rax), %xmm2
movss (%rdx), %xmm3
mulss %xmm1, %xmm2
addq $8, %rdx
mulss %xmm0, %xmm3
addss %xmm3, %xmm2
movss %xmm2, 4(%rax)
addq $16, %rax
cmpl %r9d, %ecx
jg .L38
.L39:
popq %rbx
popq %rbp
ret
.LFE516:
.size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
.align 16
.globl unalignedBufMixLRCoeffSSE
.type unalignedBufMixLRCoeffSSE, @function
unalignedBufMixLRCoeffSSE:
.LFB514:
movl %edx, %ecx
shrl $31, %ecx
leal (%rdx,%rcx), %eax
andl $1, %eax
cmpl %ecx, %eax
jne .L52
.L44:
testl %edx, %edx
jle .L49
subl $1, %edx
shrl %edx
testb $15, %dil
jne .L46
unpcklps %xmm1, %xmm0
addl $1, %edx
xorps %xmm3, %xmm3
xorl %eax, %eax
movlhps %xmm0, %xmm0
.align 16
.L47:
movaps %xmm3, %xmm2
addl $1, %eax
movaps %xmm3, %xmm1
movlps (%rsi), %xmm2
movlps (%rdi), %xmm1
movhps 8(%rsi), %xmm2
addq $16, %rsi
movhps 8(%rdi), %xmm1
mulps %xmm0, %xmm2
addps %xmm2, %xmm1
movaps %xmm1, (%rdi)
addq $16, %rdi
cmpl %edx, %eax
jb .L47
rep
ret
.align 16
.L46:
mov %edx, %edx
xorl %eax, %eax
addq $1, %rdx
salq $4, %rdx
.align 16
.L48:
movss (%rsi,%rax), %xmm2
mulss %xmm0, %xmm2
addss (%rdi,%rax), %xmm2
movss %xmm2, (%rdi,%rax)
movss 4(%rsi,%rax), %xmm2
mulss %xmm1, %xmm2
addss 4(%rdi,%rax), %xmm2
movss %xmm2, 4(%rdi,%rax)
movss 8(%rsi,%rax), %xmm2
mulss %xmm0, %xmm2
addss 8(%rdi,%rax), %xmm2
movss %xmm2, 8(%rdi,%rax)
movss 12(%rsi,%rax), %xmm2
mulss %xmm1, %xmm2
addss 12(%rdi,%rax), %xmm2
movss %xmm2, 12(%rdi,%rax)
addq $16, %rax
cmpq %rdx, %rax
jne .L48
.L49:
rep
ret
.L52:
movss (%rsi), %xmm2
subl $1, %edx
mulss %xmm0, %xmm2
addss (%rdi), %xmm2
movss %xmm2, (%rdi)
movss 4(%rsi), %xmm2
addq $8, %rsi
mulss %xmm1, %xmm2
addss 4(%rdi), %xmm2
movss %xmm2, 4(%rdi)
addq $8, %rdi
jmp .L44
.LFE514:
.size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
.section .eh_frame,"aw",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.byte 0x1
.byte 0x78
.byte 0x10
.byte 0x1
.byte 0x3
.byte 0xc
.byte 0x7
.byte 0x8
.byte 0x11
.byte 0x10
.byte 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB509
.long .LFE509-.LFB509
.byte 0x0
.align 8
.LEFDE1:
.LSFDE3:
.long .LEFDE3-.LASFDE3
.LASFDE3:
.long .LASFDE3-.Lframe1
.long .LFB510
.long .LFE510-.LFB510
.byte 0x0
.align 8
.LEFDE3:
.LSFDE5:
.long .LEFDE5-.LASFDE5
.LASFDE5:
.long .LASFDE5-.Lframe1
.long .LFB511
.long .LFE511-.LFB511
.byte 0x0
.align 8
.LEFDE5:
.LSFDE7:
.long .LEFDE7-.LASFDE7
.LASFDE7:
.long .LASFDE7-.Lframe1
.long .LFB512
.long .LFE512-.LFB512
.byte 0x0
.align 8
.LEFDE7:
.LSFDE9:
.long .LEFDE9-.LASFDE9
.LASFDE9:
.long .LASFDE9-.Lframe1
.long .LFB513
.long .LFE513-.LFB513
.byte 0x0
.align 8
.LEFDE9:
.LSFDE11:
.long .LEFDE11-.LASFDE11
.LASFDE11:
.long .LASFDE11-.Lframe1
.long .LFB515
.long .LFE515-.LFB515
.byte 0x0
.align 8
.LEFDE11:
.LSFDE13:
.long .LEFDE13-.LASFDE13
.LASFDE13:
.long .LASFDE13-.Lframe1
.long .LFB516
.long .LFE516-.LFB516
.byte 0x0
.byte 0x4
.long .LCFI0-.LFB516
.byte 0xe
.byte 0x10
.byte 0x4
.long .LCFI1-.LCFI0
.byte 0xe
.byte 0x18
.byte 0x11
.byte 0x3
.byte 0x3
.byte 0x11
.byte 0x6
.byte 0x2
.align 8
.LEFDE13:
.LSFDE15:
.long .LEFDE15-.LASFDE15
.LASFDE15:
.long .LASFDE15-.Lframe1
.long .LFB514
.long .LFE514-.LFB514
.byte 0x0
.align 8
.LEFDE15:
.ident "GCC: (GNU) 4.4.0 20090304 (experimental)"

View File

@@ -1,395 +0,0 @@
.file "basic_ops_x86.c"
.text
.align 16
.globl alignedMemCpySSE2
.type alignedMemCpySSE2, @function
alignedMemCpySSE2:
.LFB509:
movslq %edx,%rdx
shrq $6, %rdx
testl %edx, %edx
jle .L4
subl $1, %edx
xorl %eax, %eax
addq $1, %rdx
salq $6, %rdx
.align 16
.L3:
movdqa (%rsi,%rax), %xmm0
movdqa %xmm0, (%rdi,%rax)
movdqa 16(%rsi,%rax), %xmm0
movdqa %xmm0, 16(%rdi,%rax)
movdqa 32(%rsi,%rax), %xmm0
movdqa %xmm0, 32(%rdi,%rax)
movdqa 48(%rsi,%rax), %xmm0
movdqa %xmm0, 48(%rdi,%rax)
addq $64, %rax
cmpq %rdx, %rax
jne .L3
.L4:
rep
ret
.LFE509:
.size alignedMemCpySSE2, .-alignedMemCpySSE2
.align 16
.globl alignedMemClearSSE2
.type alignedMemClearSSE2, @function
alignedMemClearSSE2:
.LFB510:
movslq %esi,%rsi
shrq $6, %rsi
testl %esi, %esi
jle .L10
subl $1, %esi
pxor %xmm0, %xmm0
salq $6, %rsi
leaq 64(%rdi,%rsi), %rax
.align 16
.L9:
movdqa %xmm0, (%rdi)
movdqa %xmm0, 16(%rdi)
movdqa %xmm0, 32(%rdi)
movdqa %xmm0, 48(%rdi)
addq $64, %rdi
cmpq %rax, %rdi
jne .L9
.L10:
rep
ret
.LFE510:
.size alignedMemClearSSE2, .-alignedMemClearSSE2
.align 16
.globl alignedConvertToS16SSE2
.type alignedConvertToS16SSE2, @function
alignedConvertToS16SSE2:
.LFB511:
pushq %rbp
.LCFI0:
testb %cl, %cl
movl %edx, %eax
mulss .LC0(%rip), %xmm0
pushq %rbx
.LCFI1:
jne .L13
testw %dx, %dx
jle .L15
movl %edx, %ebx
shrw $2, %bx
cmpw $3, %dx
leal 0(,%rbx,4), %r8d
ja .L33
.L28:
xorl %r8d, %r8d
.align 16
.L23:
movswq %r8w,%rdx
movl $32767, %ebx
leaq (%rdi,%rdx,8), %rcx
leaq (%rsi,%rdx,4), %rdx
movl $-32768, %edi
.align 16
.L25:
movss (%rcx), %xmm1
mulss %xmm0, %xmm1
cvttss2si %xmm1, %esi
movss 4(%rcx), %xmm1
mulss %xmm0, %xmm1
cmpl $-32768, %esi
cmovl %edi, %esi
cmpl $32767, %esi
cmovg %ebx, %esi
movw %si, (%rdx)
cvttss2si %xmm1, %esi
cmpl $-32768, %esi
cmovl %edi, %esi
cmpl $32767, %esi
cmovg %ebx, %esi
addl $1, %r8d
addq $8, %rcx
movw %si, 2(%rdx)
addq $4, %rdx
cmpw %r8w, %ax
jg .L25
.L15:
cwtl
popq %rbx
sall $2, %eax
popq %rbp
ret
.align 16
.L13:
testw %dx, %dx
jle .L15
movl %edx, %ebx
shrw $2, %bx
cmpw $3, %dx
leal 0(,%rbx,4), %r8d
ja .L34
.L27:
xorl %r8d, %r8d
.align 16
.L18:
movswq %r8w,%rdx
leaq (%rdi,%rdx,8), %rcx
leaq (%rsi,%rdx,4), %rdx
movl $-32768, %edi
movl $32767, %esi
.align 16
.L20:
movss (%rcx), %xmm1
mulss %xmm0, %xmm1
cvttss2si %xmm1, %ebx
movss 4(%rcx), %xmm1
mulss %xmm0, %xmm1
cmpl $-32768, %ebx
cmovl %edi, %ebx
cmpl $32767, %ebx
cmovg %esi, %ebx
movzbl %bh, %ebp
sall $8, %ebx
movl %ebp, %r9d
orl %r9d, %ebx
movw %bx, (%rdx)
cvttss2si %xmm1, %ebx
cmpl $-32768, %ebx
cmovl %edi, %ebx
cmpl $32767, %ebx
cmovg %esi, %ebx
addl $1, %r8d
addq $8, %rcx
movzbl %bh, %ebp
sall $8, %ebx
movl %ebp, %r9d
orl %r9d, %ebx
movw %bx, 2(%rdx)
addq $4, %rdx
cmpw %r8w, %ax
jg .L20
cwtl
popq %rbx
sall $2, %eax
popq %rbp
ret
.align 16
.L34:
testw %r8w, %r8w
je .L27
movaps %xmm0, %xmm1
movq %rdi, %rcx
movdqa .LC1(%rip), %xmm4
movq %rsi, %r10
shufps $0, %xmm1, %xmm1
xorl %r9d, %r9d
movdqa .LC2(%rip), %xmm3
movaps %xmm1, %xmm9
movdqa .LC3(%rip), %xmm8
.align 16
.L19:
movaps (%rcx), %xmm1
addl $1, %r9d
movdqa %xmm3, %xmm5
mulps %xmm9, %xmm1
movaps 16(%rcx), %xmm6
movdqa %xmm3, %xmm7
addq $32, %rcx
mulps %xmm9, %xmm6
cvttps2dq %xmm1, %xmm1
movdqa %xmm1, %xmm2
pcmpgtd %xmm4, %xmm2
cvttps2dq %xmm6, %xmm6
pand %xmm2, %xmm1
pandn %xmm4, %xmm2
por %xmm1, %xmm2
movdqa %xmm2, %xmm1
pcmpgtd %xmm3, %xmm1
pand %xmm1, %xmm5
pandn %xmm2, %xmm1
movdqa %xmm1, %xmm2
movdqa %xmm6, %xmm1
por %xmm5, %xmm2
pcmpgtd %xmm4, %xmm1
pand %xmm1, %xmm6
pandn %xmm4, %xmm1
movdqa %xmm2, %xmm5
pslld $8, %xmm2
pand %xmm8, %xmm5
por %xmm6, %xmm1
psrad $8, %xmm5
movdqa %xmm1, %xmm6
pcmpgtd %xmm3, %xmm6
pand %xmm6, %xmm7
pandn %xmm1, %xmm6
movdqa %xmm6, %xmm1
por %xmm7, %xmm1
movdqa %xmm5, %xmm7
movdqa %xmm1, %xmm6
pslld $8, %xmm1
pand %xmm8, %xmm6
psrad $8, %xmm6
punpcklwd %xmm6, %xmm5
punpckhwd %xmm6, %xmm7
movdqa %xmm5, %xmm6
punpcklwd %xmm7, %xmm5
punpckhwd %xmm7, %xmm6
punpcklwd %xmm6, %xmm5
movdqa %xmm2, %xmm6
punpcklwd %xmm1, %xmm2
punpckhwd %xmm1, %xmm6
movdqa %xmm2, %xmm1
punpcklwd %xmm6, %xmm2
punpckhwd %xmm6, %xmm1
punpcklwd %xmm1, %xmm2
por %xmm2, %xmm5
movdqa %xmm5, (%r10)
addq $16, %r10
cmpw %r9w, %bx
ja .L19
cmpw %dx, %r8w
jne .L18
jmp .L15
.align 16
.L33:
testw %r8w, %r8w
je .L28
movaps %xmm0, %xmm1
movq %rdi, %rcx
movdqa .LC1(%rip), %xmm4
movq %rsi, %r10
shufps $0, %xmm1, %xmm1
xorl %r9d, %r9d
movdqa .LC2(%rip), %xmm3
movaps %xmm1, %xmm6
.align 16
.L24:
movaps (%rcx), %xmm1
addl $1, %r9d
movdqa %xmm3, %xmm7
mulps %xmm6, %xmm1
movaps 16(%rcx), %xmm5
addq $32, %rcx
mulps %xmm6, %xmm5
cvttps2dq %xmm1, %xmm1
movdqa %xmm1, %xmm2
pcmpgtd %xmm4, %xmm2
cvttps2dq %xmm5, %xmm5
pand %xmm2, %xmm1
pandn %xmm4, %xmm2
por %xmm1, %xmm2
movdqa %xmm2, %xmm1
pcmpgtd %xmm3, %xmm1
pand %xmm1, %xmm7
pandn %xmm2, %xmm1
movdqa %xmm1, %xmm2
movdqa %xmm5, %xmm1
por %xmm7, %xmm2
movdqa %xmm3, %xmm7
pcmpgtd %xmm4, %xmm1
pand %xmm1, %xmm5
pandn %xmm4, %xmm1
por %xmm5, %xmm1
movdqa %xmm1, %xmm5
pcmpgtd %xmm3, %xmm5
pand %xmm5, %xmm7
pandn %xmm1, %xmm5
movdqa %xmm5, %xmm1
movdqa %xmm2, %xmm5
por %xmm7, %xmm1
punpcklwd %xmm1, %xmm2
punpckhwd %xmm1, %xmm5
movdqa %xmm2, %xmm1
punpcklwd %xmm5, %xmm2
punpckhwd %xmm5, %xmm1
punpcklwd %xmm1, %xmm2
movdqa %xmm2, (%r10)
addq $16, %r10
cmpw %r9w, %bx
ja .L24
cmpw %r8w, %dx
jne .L23
jmp .L15
.LFE511:
.size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
.section .rodata
.align 4
.LC0:
.long 1191181824
.align 16
.LC1:
.long -32768
.long -32768
.long -32768
.long -32768
.align 16
.LC2:
.long 32767
.long 32767
.long 32767
.long 32767
.align 16
.LC3:
.long 65280
.long 65280
.long 65280
.long 65280
.section .eh_frame,"aw",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.byte 0x1
.byte 0x78
.byte 0x10
.byte 0x1
.byte 0x3
.byte 0xc
.byte 0x7
.byte 0x8
.byte 0x11
.byte 0x10
.byte 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB509
.long .LFE509-.LFB509
.byte 0x0
.align 8
.LEFDE1:
.LSFDE3:
.long .LEFDE3-.LASFDE3
.LASFDE3:
.long .LASFDE3-.Lframe1
.long .LFB510
.long .LFE510-.LFB510
.byte 0x0
.align 8
.LEFDE3:
.LSFDE5:
.long .LEFDE5-.LASFDE5
.LASFDE5:
.long .LASFDE5-.Lframe1
.long .LFB511
.long .LFE511-.LFB511
.byte 0x0
.byte 0x4
.long .LCFI0-.LFB511
.byte 0xe
.byte 0x10
.byte 0x4
.long .LCFI1-.LCFI0
.byte 0xe
.byte 0x18
.byte 0x11
.byte 0x3
.byte 0x3
.byte 0x11
.byte 0x6
.byte 0x2
.align 8
.LEFDE5:
.ident "GCC: (GNU) 4.4.0 20090304 (experimental)"

View File

@@ -1,107 +0,0 @@
.file "basic_ops_x86.c"
.text
.p2align 4,,15
.globl alignedMemCpyMMX
.type alignedMemCpyMMX, @function
alignedMemCpyMMX:
pushl %ebx
subl $112, %esp
movl 128(%esp), %ebx
movl 124(%esp), %eax
shrl $6, %ebx
#APP
# 42 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
fsave 4(%esp); fwait
# 0 "" 2
# 44 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
1: prefetchnta (%eax)
prefetchnta 64(%eax)
prefetchnta 128(%eax)
prefetchnta 192(%eax)
prefetchnta 256(%eax)
# 0 "" 2
#NO_APP
testl %ebx, %ebx
je .L2
movl 120(%esp), %ecx
xorl %edx, %edx
.p2align 4,,7
.p2align 3
.L3:
#APP
# 53 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
1: prefetchnta 320(%eax)
2: movq (%eax), %mm0
movq 8(%eax), %mm1
movq 16(%eax), %mm2
movq 24(%eax), %mm3
movq %mm0, (%ecx)
movq %mm1, 8(%ecx)
movq %mm2, 16(%ecx)
movq %mm3, 24(%ecx)
movq 32(%eax), %mm0
movq 40(%eax), %mm1
movq 48(%eax), %mm2
movq 56(%eax), %mm3
movq %mm0, 32(%ecx)
movq %mm1, 40(%ecx)
movq %mm2, 48(%ecx)
movq %mm3, 56(%ecx)
# 0 "" 2
#NO_APP
addl $1, %edx
addl $64, %eax
addl $64, %ecx
cmpl %edx, %ebx
jne .L3
.L2:
#APP
# 75 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
fsave 4(%esp); fwait
# 0 "" 2
#NO_APP
addl $112, %esp
popl %ebx
ret
.size alignedMemCpyMMX, .-alignedMemCpyMMX
.p2align 4,,15
.globl alignedMemClearMMX
.type alignedMemClearMMX, @function
alignedMemClearMMX:
movl 8(%esp), %ecx
shrl $6, %ecx
testl %ecx, %ecx
je .L8
movl 4(%esp), %edx
xorl %eax, %eax
pxor %mm0, %mm0
.p2align 4,,7
.p2align 3
.L9:
#APP
# 90 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
movq %mm0, (%edx)
movq %mm0, 8(%edx)
movq %mm0, 16(%edx)
movq %mm0, 24(%edx)
movq %mm0, 32(%edx)
movq %mm0, 40(%edx)
movq %mm0, 48(%edx)
movq %mm0, 56(%edx)
# 0 "" 2
#NO_APP
addl $1, %eax
addl $64, %edx
cmpl %eax, %ecx
jne .L9
.L8:
emms
ret
.size alignedMemClearMMX, .-alignedMemClearMMX
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -1,494 +0,0 @@
.file "basic_ops_x86.c"
.text
.p2align 4,,15
.globl alignedMemCpySSE
.type alignedMemCpySSE, @function
alignedMemCpySSE:
pushl %esi
pushl %ebx
movl 20(%esp), %esi
movl 12(%esp), %edx
movl 16(%esp), %ecx
shrl $6, %esi
testl %esi, %esi
je .L4
xorl %eax, %eax
xorl %ebx, %ebx
.p2align 4,,7
.p2align 3
.L3:
movaps (%ecx,%eax), %xmm0
addl $1, %ebx
movaps %xmm0, (%edx,%eax)
movaps 16(%ecx,%eax), %xmm0
movaps %xmm0, 16(%edx,%eax)
movaps 32(%ecx,%eax), %xmm0
movaps %xmm0, 32(%edx,%eax)
movaps 48(%ecx,%eax), %xmm0
movaps %xmm0, 48(%edx,%eax)
addl $64, %eax
cmpl %ebx, %esi
jne .L3
.L4:
popl %ebx
popl %esi
ret
.size alignedMemCpySSE, .-alignedMemCpySSE
.p2align 4,,15
.globl alignedMemClearSSE
.type alignedMemClearSSE, @function
alignedMemClearSSE:
movl 8(%esp), %ecx
shrl $6, %ecx
testl %ecx, %ecx
je .L10
movl 4(%esp), %eax
xorps %xmm0, %xmm0
xorl %edx, %edx
.p2align 4,,7
.p2align 3
.L9:
addl $1, %edx
movaps %xmm0, (%eax)
movaps %xmm0, 16(%eax)
movaps %xmm0, 32(%eax)
movaps %xmm0, 48(%eax)
addl $64, %eax
cmpl %edx, %ecx
jne .L9
.L10:
rep
ret
.size alignedMemClearSSE, .-alignedMemClearSSE
.p2align 4,,15
.globl alignedBufApplyGainSSE
.type alignedBufApplyGainSSE, @function
alignedBufApplyGainSSE:
movl 12(%esp), %ecx
testl %ecx, %ecx
jle .L15
movss 8(%esp), %xmm0
subl $1, %ecx
movl 4(%esp), %eax
shrl $3, %ecx
xorl %edx, %edx
addl $1, %ecx
shufps $0, %xmm0, %xmm0
.p2align 4,,7
.p2align 3
.L14:
movaps 16(%eax), %xmm3
addl $1, %edx
movaps 32(%eax), %xmm2
mulps %xmm0, %xmm3
movaps 48(%eax), %xmm1
mulps %xmm0, %xmm2
movaps (%eax), %xmm4
mulps %xmm0, %xmm1
movaps %xmm3, 16(%eax)
mulps %xmm0, %xmm4
movaps %xmm2, 32(%eax)
movaps %xmm1, 48(%eax)
movaps %xmm4, (%eax)
addl $64, %eax
cmpl %edx, %ecx
ja .L14
.L15:
rep
ret
.size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
.p2align 4,,15
.globl alignedBufMixSSE
.type alignedBufMixSSE, @function
alignedBufMixSSE:
pushl %esi
pushl %ebx
movl 20(%esp), %esi
movl 12(%esp), %edx
movl 16(%esp), %ecx
testl %esi, %esi
jle .L20
subl $1, %esi
xorl %eax, %eax
shrl $3, %esi
xorl %ebx, %ebx
addl $1, %esi
.p2align 4,,7
.p2align 3
.L19:
movaps 16(%edx,%eax), %xmm2
addl $1, %ebx
movaps 32(%edx,%eax), %xmm1
movaps 48(%edx,%eax), %xmm0
movaps (%edx,%eax), %xmm3
addps 16(%ecx,%eax), %xmm2
addps 32(%ecx,%eax), %xmm1
addps 48(%ecx,%eax), %xmm0
addps (%ecx,%eax), %xmm3
movaps %xmm2, 16(%edx,%eax)
movaps %xmm3, (%edx,%eax)
movaps %xmm1, 32(%edx,%eax)
movaps %xmm0, 48(%edx,%eax)
addl $64, %eax
cmpl %ebx, %esi
ja .L19
.L20:
popl %ebx
popl %esi
ret
.size alignedBufMixSSE, .-alignedBufMixSSE
.p2align 4,,15
.globl alignedBufMixLRCoeffSSE
.type alignedBufMixLRCoeffSSE, @function
alignedBufMixLRCoeffSSE:
pushl %esi
pushl %ebx
movl 28(%esp), %esi
movl 12(%esp), %edx
movl 16(%esp), %ebx
testl %esi, %esi
jle .L25
movss 24(%esp), %xmm2
subl $1, %esi
movss 20(%esp), %xmm0
xorl %eax, %eax
shrl $2, %esi
xorl %ecx, %ecx
addl $1, %esi
unpcklps %xmm2, %xmm0
movaps %xmm0, %xmm2
movlhps %xmm0, %xmm2
.p2align 4,,7
.p2align 3
.L24:
movaps 16(%ebx,%eax), %xmm0
addl $1, %ecx
movaps (%ebx,%eax), %xmm1
mulps %xmm2, %xmm0
mulps %xmm2, %xmm1
addps 16(%edx,%eax), %xmm0
addps (%edx,%eax), %xmm1
movaps %xmm0, 16(%edx,%eax)
movaps %xmm1, (%edx,%eax)
addl $32, %eax
cmpl %ecx, %esi
ja .L24
.L25:
popl %ebx
popl %esi
ret
.size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
.p2align 4,,15
.globl alignedBufWetDryMixSSE
.type alignedBufWetDryMixSSE, @function
alignedBufWetDryMixSSE:
pushl %esi
pushl %ebx
movl 28(%esp), %esi
movl 12(%esp), %edx
movl 16(%esp), %ebx
testl %esi, %esi
jle .L30
movss 24(%esp), %xmm3
subl $1, %esi
movss 20(%esp), %xmm2
xorl %eax, %eax
shrl $2, %esi
xorl %ecx, %ecx
shufps $0, %xmm3, %xmm3
addl $1, %esi
shufps $0, %xmm2, %xmm2
.p2align 4,,7
.p2align 3
.L29:
movaps 16(%ebx,%eax), %xmm1
addl $1, %ecx
movaps 16(%edx,%eax), %xmm0
mulps %xmm2, %xmm1
movaps (%ebx,%eax), %xmm4
mulps %xmm3, %xmm0
mulps %xmm2, %xmm4
addps %xmm1, %xmm0
movaps (%edx,%eax), %xmm1
mulps %xmm3, %xmm1
movaps %xmm0, 16(%edx,%eax)
addps %xmm4, %xmm1
movaps %xmm1, (%edx,%eax)
addl $32, %eax
cmpl %ecx, %esi
ja .L29
.L30:
popl %ebx
popl %esi
ret
.size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
.p2align 4,,15
.globl alignedBufWetDryMixSplittedSSE
.type alignedBufWetDryMixSplittedSSE, @function
alignedBufWetDryMixSplittedSSE:
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $124, %esp
movl 164(%esp), %eax
movl 144(%esp), %edx
movl 148(%esp), %esi
movl 152(%esp), %ecx
testl %eax, %eax
jle .L39
movl 164(%esp), %eax
subl $1, %eax
shrl %eax
addl $1, %eax
movl %eax, %ebp
movl %eax, 104(%esp)
shrl $2, %ebp
cmpl $3, 104(%esp)
leal 0(,%ebp,4), %eax
movl %eax, 108(%esp)
jbe .L40
testl %eax, %eax
jne .L34
.L40:
xorl %edi, %edi
jmp .L36
.p2align 4,,7
.p2align 3
.L34:
movss 160(%esp), %xmm0
xorps %xmm7, %xmm7
movl %esi, %ebx
xorl %eax, %eax
xorl %edi, %edi
shufps $0, %xmm0, %xmm0
movaps %xmm0, 16(%esp)
movss 156(%esp), %xmm0
shufps $0, %xmm0, %xmm0
movaps %xmm0, (%esp)
.p2align 4,,7
.p2align 3
.L37:
movaps (%edx,%eax,2), %xmm5
addl $1, %edi
movaps 16(%edx,%eax,2), %xmm6
movaps %xmm5, %xmm0
shufps $136, %xmm6, %xmm0
movaps 32(%edx,%eax,2), %xmm4
shufps $221, %xmm6, %xmm5
movaps %xmm0, 80(%esp)
movaps 48(%edx,%eax,2), %xmm3
movaps %xmm4, %xmm0
shufps $136, %xmm3, %xmm0
movaps 80(%esp), %xmm2
shufps $221, %xmm3, %xmm4
movaps %xmm7, %xmm6
movlps (%ebx), %xmm6
movaps %xmm5, 64(%esp)
movhps 8(%ebx), %xmm6
shufps $136, %xmm0, %xmm2
movaps %xmm0, 48(%esp)
movaps %xmm7, %xmm5
movaps %xmm6, %xmm0
movlps 16(%ebx), %xmm5
movhps 24(%ebx), %xmm5
shufps $136, %xmm5, %xmm0
mulps 16(%esp), %xmm2
shufps $221, %xmm5, %xmm6
movaps %xmm4, 32(%esp)
addl $32, %ebx
mulps (%esp), %xmm0
movaps %xmm7, %xmm4
movlps (%eax,%ecx), %xmm4
movaps %xmm7, %xmm3
movhps 8(%eax,%ecx), %xmm4
movaps %xmm4, %xmm1
movlps 16(%ecx,%eax), %xmm3
movhps 24(%ecx,%eax), %xmm3
shufps $136, %xmm3, %xmm1
addps %xmm0, %xmm2
movaps 64(%esp), %xmm0
shufps $221, %xmm3, %xmm4
shufps $136, 32(%esp), %xmm0
mulps (%esp), %xmm1
movaps %xmm2, %xmm3
movaps 64(%esp), %xmm5
mulps 16(%esp), %xmm0
shufps $221, 32(%esp), %xmm5
mulps (%esp), %xmm6
addps %xmm1, %xmm0
movaps 80(%esp), %xmm1
shufps $221, 48(%esp), %xmm1
mulps (%esp), %xmm4
mulps 16(%esp), %xmm1
mulps 16(%esp), %xmm5
addps %xmm6, %xmm1
addps %xmm4, %xmm5
movaps %xmm0, %xmm4
unpcklps %xmm1, %xmm3
unpcklps %xmm5, %xmm4
unpckhps %xmm1, %xmm2
movaps %xmm3, %xmm1
unpckhps %xmm5, %xmm0
unpcklps %xmm4, %xmm1
unpckhps %xmm4, %xmm3
movaps %xmm1, (%edx,%eax,2)
movaps %xmm2, %xmm1
unpckhps %xmm0, %xmm2
unpcklps %xmm0, %xmm1
movaps %xmm3, 16(%edx,%eax,2)
movaps %xmm1, 32(%edx,%eax,2)
movaps %xmm2, 48(%edx,%eax,2)
addl $32, %eax
cmpl %edi, %ebp
ja .L37
movl 108(%esp), %edi
movl 104(%esp), %eax
addl %edi, %edi
cmpl %eax, 108(%esp)
je .L39
.L36:
movss 156(%esp), %xmm0
xorl %ebp, %ebp
movss 160(%esp), %xmm1
movl %edi, %eax
leal (%edx,%edi,8), %ebx
leal 8(%edx,%edi,8), %edx
.p2align 4,,7
.p2align 3
.L38:
movss (%esi,%eax,4), %xmm3
addl $2, %ebp
movss (%ebx), %xmm2
mulss %xmm0, %xmm3
mulss %xmm1, %xmm2
addss %xmm3, %xmm2
movss %xmm2, (%ebx)
movss 4(%ebx), %xmm2
movss (%ecx,%eax,4), %xmm3
mulss %xmm1, %xmm2
mulss %xmm0, %xmm3
addss %xmm3, %xmm2
movss %xmm2, 4(%ebx)
addl $16, %ebx
movss 4(%esi,%eax,4), %xmm3
movss (%edx), %xmm2
mulss %xmm0, %xmm3
mulss %xmm1, %xmm2
addss %xmm3, %xmm2
movss %xmm2, (%edx)
movss 4(%edx), %xmm2
movss 4(%ecx,%eax,4), %xmm3
mulss %xmm1, %xmm2
leal (%edi,%ebp), %eax
mulss %xmm0, %xmm3
addss %xmm3, %xmm2
movss %xmm2, 4(%edx)
addl $16, %edx
cmpl %eax, 164(%esp)
jg .L38
.L39:
addl $124, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
.p2align 4,,15
.globl unalignedBufMixLRCoeffSSE
.type unalignedBufMixLRCoeffSSE, @function
unalignedBufMixLRCoeffSSE:
pushl %esi
pushl %ebx
movl 28(%esp), %ebx
movl 12(%esp), %eax
movl 16(%esp), %edx
movss 20(%esp), %xmm1
movl %ebx, %esi
shrl $31, %esi
leal (%ebx,%esi), %ecx
andl $1, %ecx
cmpl %esi, %ecx
movss 24(%esp), %xmm3
jne .L52
.L44:
testl %ebx, %ebx
jle .L49
testb $15, %al
jne .L46
movaps %xmm1, %xmm0
subl $1, %ebx
unpcklps %xmm3, %xmm0
shrl %ebx
xorps %xmm2, %xmm2
movaps %xmm0, %xmm3
addl $1, %ebx
movlhps %xmm0, %xmm3
xorl %ecx, %ecx
.p2align 4,,7
.p2align 3
.L47:
movaps %xmm2, %xmm1
addl $1, %ecx
movlps (%edx), %xmm1
movhps 8(%edx), %xmm1
movaps %xmm2, %xmm0
movlps (%eax), %xmm0
movhps 8(%eax), %xmm0
addl $16, %edx
mulps %xmm3, %xmm1
addps %xmm1, %xmm0
movaps %xmm0, (%eax)
addl $16, %eax
cmpl %ebx, %ecx
jb .L47
.L49:
popl %ebx
popl %esi
ret
.p2align 4,,7
.p2align 3
.L46:
xorl %ecx, %ecx
.p2align 4,,7
.p2align 3
.L48:
movss (%edx,%ecx,8), %xmm0
mulss %xmm1, %xmm0
addss (%eax,%ecx,8), %xmm0
movss %xmm0, (%eax,%ecx,8)
movss 4(%edx,%ecx,8), %xmm0
mulss %xmm3, %xmm0
addss 4(%eax,%ecx,8), %xmm0
movss %xmm0, 4(%eax,%ecx,8)
movss 8(%edx,%ecx,8), %xmm0
mulss %xmm1, %xmm0
addss 8(%eax,%ecx,8), %xmm0
movss %xmm0, 8(%eax,%ecx,8)
movss 12(%edx,%ecx,8), %xmm0
mulss %xmm3, %xmm0
addss 12(%eax,%ecx,8), %xmm0
movss %xmm0, 12(%eax,%ecx,8)
addl $2, %ecx
cmpl %ecx, %ebx
jg .L48
popl %ebx
popl %esi
ret
.L52:
movss (%edx), %xmm0
subl $1, %ebx
mulss %xmm1, %xmm0
addss (%eax), %xmm0
movss %xmm0, (%eax)
movss 4(%edx), %xmm0
addl $8, %edx
mulss %xmm3, %xmm0
addss 4(%eax), %xmm0
movss %xmm0, 4(%eax)
addl $8, %eax
jmp .L44
.size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -1,349 +0,0 @@
.file "basic_ops_x86.c"
.text
.p2align 4,,15
.globl alignedMemCpySSE2
.type alignedMemCpySSE2, @function
alignedMemCpySSE2:
pushl %esi
pushl %ebx
movl 20(%esp), %esi
movl 12(%esp), %edx
movl 16(%esp), %ecx
shrl $6, %esi
testl %esi, %esi
je .L4
xorl %eax, %eax
xorl %ebx, %ebx
.p2align 4,,7
.p2align 3
.L3:
addl $1, %ebx
movdqa (%ecx,%eax), %xmm0
movdqa %xmm0, (%edx,%eax)
movdqa 16(%ecx,%eax), %xmm0
movdqa %xmm0, 16(%edx,%eax)
movdqa 32(%ecx,%eax), %xmm0
movdqa %xmm0, 32(%edx,%eax)
movdqa 48(%ecx,%eax), %xmm0
movdqa %xmm0, 48(%edx,%eax)
addl $64, %eax
cmpl %ebx, %esi
jne .L3
.L4:
popl %ebx
popl %esi
ret
.size alignedMemCpySSE2, .-alignedMemCpySSE2
.p2align 4,,15
.globl alignedMemClearSSE2
.type alignedMemClearSSE2, @function
alignedMemClearSSE2:
movl 8(%esp), %ecx
shrl $6, %ecx
testl %ecx, %ecx
je .L10
movl 4(%esp), %eax
xorl %edx, %edx
pxor %xmm0, %xmm0
.p2align 4,,7
.p2align 3
.L9:
addl $1, %edx
movdqa %xmm0, (%eax)
movdqa %xmm0, 16(%eax)
movdqa %xmm0, 32(%eax)
movdqa %xmm0, 48(%eax)
addl $64, %eax
cmpl %edx, %ecx
jne .L9
.L10:
rep
ret
.size alignedMemClearSSE2, .-alignedMemClearSSE2
.p2align 4,,15
.globl alignedConvertToS16SSE2
.type alignedConvertToS16SSE2, @function
alignedConvertToS16SSE2:
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $8, %esp
movl 36(%esp), %eax
movss .LC0, %xmm6
cmpb $0, 44(%esp)
movl 28(%esp), %edx
movl 32(%esp), %ebx
movl %eax, %esi
mulss 40(%esp), %xmm6
jne .L13
testw %ax, %ax
jle .L15
movl %eax, %edi
shrw $2, %di
cmpw $3, %ax
movw %ax, 2(%esp)
leal 0(,%edi,4), %ebp
ja .L33
.L28:
xorl %ebp, %ebp
.p2align 4,,7
.p2align 3
.L23:
movswl %bp,%eax
movl $-32768, %edi
leal (%edx,%eax,8), %edx
leal (%ebx,%eax,4), %eax
movl $32767, %ebx
.p2align 4,,7
.p2align 3
.L25:
movss (%edx), %xmm0
mulss %xmm6, %xmm0
cvttss2si %xmm0, %ecx
movss 4(%edx), %xmm0
cmpl $-32768, %ecx
mulss %xmm6, %xmm0
cmovl %edi, %ecx
cmpl $32767, %ecx
cmovg %ebx, %ecx
movw %cx, (%eax)
cvttss2si %xmm0, %ecx
cmpl $-32768, %ecx
cmovl %edi, %ecx
cmpl $32767, %ecx
cmovg %ebx, %ecx
addl $1, %ebp
movw %cx, 2(%eax)
addl $8, %edx
addl $4, %eax
cmpw %bp, %si
jg .L25
.L15:
movswl %si,%esi
addl $8, %esp
leal 0(,%esi,4), %eax
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.p2align 4,,7
.p2align 3
.L13:
testw %ax, %ax
jle .L15
movl %eax, %ebp
shrw $2, %bp
cmpw $3, %si
movw %ax, 2(%esp)
leal 0(,%ebp,4), %eax
ja .L34
.L27:
xorl %eax, %eax
.p2align 4,,7
.p2align 3
.L18:
movswl %ax,%edi
leal (%edx,%edi,8), %ecx
leal (%ebx,%edi,4), %edx
movl $-32768, %edi
.p2align 4,,7
.p2align 3
.L20:
movss (%ecx), %xmm0
movl $32767, %ebp
mulss %xmm6, %xmm0
cvttss2si %xmm0, %ebx
movss 4(%ecx), %xmm0
cmpl $-32768, %ebx
cmovl %edi, %ebx
cmpl $32767, %ebx
mulss %xmm6, %xmm0
cmovg %ebp, %ebx
movzbl %bh, %ebp
sall $8, %ebx
orl %ebp, %ebx
movl $32767, %ebp
movw %bx, (%edx)
cvttss2si %xmm0, %ebx
cmpl $-32768, %ebx
cmovl %edi, %ebx
cmpl $32767, %ebx
cmovg %ebp, %ebx
addl $1, %eax
movzbl %bh, %ebp
addl $8, %ecx
sall $8, %ebx
orl %ebp, %ebx
movw %bx, 2(%edx)
addl $4, %edx
cmpw %ax, %si
jg .L20
jmp .L15
.p2align 4,,7
.p2align 3
.L34:
testw %ax, %ax
je .L27
movaps %xmm6, %xmm0
xorl %ecx, %ecx
movdqa .LC1, %xmm3
shufps $0, %xmm0, %xmm0
movdqa .LC2, %xmm2
movss %xmm6, 4(%esp)
xorl %edi, %edi
movaps %xmm0, %xmm7
.p2align 4,,7
.p2align 3
.L19:
movaps (%edx,%ecx,2), %xmm0
movdqa %xmm2, %xmm5
movdqa %xmm2, %xmm6
addl $1, %edi
movaps 16(%edx,%ecx,2), %xmm4
mulps %xmm7, %xmm0
mulps %xmm7, %xmm4
cvttps2dq %xmm0, %xmm0
movdqa %xmm0, %xmm1
pcmpgtd %xmm3, %xmm1
pand %xmm1, %xmm0
pandn %xmm3, %xmm1
por %xmm0, %xmm1
cvttps2dq %xmm4, %xmm4
movdqa %xmm1, %xmm0
pcmpgtd %xmm2, %xmm0
pand %xmm0, %xmm5
pandn %xmm1, %xmm0
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm0
por %xmm5, %xmm1
pcmpgtd %xmm3, %xmm0
movdqa .LC3, %xmm5
pand %xmm0, %xmm4
pand %xmm1, %xmm5
pandn %xmm3, %xmm0
psrad $8, %xmm5
por %xmm4, %xmm0
pslld $8, %xmm1
movdqa %xmm0, %xmm4
pcmpgtd %xmm2, %xmm4
pand %xmm4, %xmm6
pandn %xmm0, %xmm4
movdqa %xmm4, %xmm0
movdqa .LC3, %xmm4
por %xmm6, %xmm0
pand %xmm0, %xmm4
pslld $8, %xmm0
psrad $8, %xmm4
movdqa %xmm5, %xmm6
punpcklwd %xmm4, %xmm5
punpckhwd %xmm4, %xmm6
movdqa %xmm5, %xmm4
punpcklwd %xmm6, %xmm5
punpckhwd %xmm6, %xmm4
punpcklwd %xmm4, %xmm5
movdqa %xmm1, %xmm4
punpcklwd %xmm0, %xmm1
punpckhwd %xmm0, %xmm4
movdqa %xmm1, %xmm6
punpcklwd %xmm4, %xmm1
punpckhwd %xmm4, %xmm6
punpcklwd %xmm6, %xmm1
por %xmm1, %xmm5
movdqa %xmm5, (%ebx,%ecx)
addl $16, %ecx
cmpw %di, %bp
ja .L19
cmpw 2(%esp), %ax
movss 4(%esp), %xmm6
jne .L18
jmp .L15
.p2align 4,,7
.p2align 3
.L33:
testw %bp, %bp
.p2align 4,,3
.p2align 3
je .L28
movaps %xmm6, %xmm0
xorl %eax, %eax
movdqa .LC1, %xmm3
shufps $0, %xmm0, %xmm0
movdqa .LC2, %xmm2
xorl %ecx, %ecx
movaps %xmm0, %xmm5
.p2align 4,,7
.p2align 3
.L24:
movaps (%edx,%eax,2), %xmm0
addl $1, %ecx
movdqa %xmm2, %xmm7
movaps 16(%edx,%eax,2), %xmm4
mulps %xmm5, %xmm0
mulps %xmm5, %xmm4
cvttps2dq %xmm0, %xmm0
movdqa %xmm0, %xmm1
pcmpgtd %xmm3, %xmm1
pand %xmm1, %xmm0
pandn %xmm3, %xmm1
por %xmm0, %xmm1
cvttps2dq %xmm4, %xmm4
movdqa %xmm1, %xmm0
pcmpgtd %xmm2, %xmm0
pand %xmm0, %xmm7
pandn %xmm1, %xmm0
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm0
por %xmm7, %xmm1
pcmpgtd %xmm3, %xmm0
movdqa %xmm2, %xmm7
pand %xmm0, %xmm4
pandn %xmm3, %xmm0
por %xmm4, %xmm0
movdqa %xmm0, %xmm4
pcmpgtd %xmm2, %xmm4
pand %xmm4, %xmm7
pandn %xmm0, %xmm4
movdqa %xmm4, %xmm0
movdqa %xmm1, %xmm4
por %xmm7, %xmm0
punpckhwd %xmm0, %xmm4
punpcklwd %xmm0, %xmm1
movdqa %xmm1, %xmm0
punpcklwd %xmm4, %xmm1
punpckhwd %xmm4, %xmm0
punpcklwd %xmm0, %xmm1
movdqa %xmm1, (%ebx,%eax)
addl $16, %eax
cmpw %cx, %di
ja .L24
cmpw %bp, 2(%esp)
jne .L23
jmp .L15
.size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
.section .rodata.cst4,"aM",@progbits,4
.align 4
.LC0:
.long 1191181824
.section .rodata.cst16,"aM",@progbits,16
.align 16
.LC1:
.long -32768
.long -32768
.long -32768
.long -32768
.align 16
.LC2:
.long 32767
.long 32767
.long 32767
.long 32767
.align 16
.LC3:
.long 65280
.long 65280
.long 65280
.long 65280
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -1,10 +1,8 @@
#ifndef SINGLE_SOURCE_COMPILE
/*
* fx_mixer.cpp - effect-mixer for LMMS
*
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -28,7 +26,7 @@
#include <QtXml/QDomElement>
#include "fx_mixer.h"
#include "basic_ops.h"
#include "Cpu.h"
#include "effect.h"
#include "song.h"
@@ -39,7 +37,7 @@ fxChannel::fxChannel( model * _parent ) :
m_stillRunning( false ),
m_peakLeft( 0.0f ),
m_peakRight( 0.0f ),
m_buffer( alignedAllocFrames( engine::getMixer()->framesPerPeriod() ) ),
m_buffer( CPU::allocFrames( engine::getMixer()->framesPerPeriod() ) ),
m_muteModel( false, _parent ),
m_volumeModel( 1.0, 0.0, 2.0, 0.01, _parent ),
m_name(),
@@ -54,7 +52,7 @@ fxChannel::fxChannel( model * _parent ) :
fxChannel::~fxChannel()
{
alignedFreeFrames( m_buffer );
CPU::freeFrames( m_buffer );
}
@@ -93,7 +91,8 @@ void fxMixer::mixToChannel( const sampleFrame * _buf, fx_ch_t _ch )
if( m_fxChannels[_ch]->m_muteModel.value() == false )
{
m_fxChannels[_ch]->m_lock.lock();
alignedBufMix( m_fxChannels[_ch]->m_buffer, _buf, engine::getMixer()->framesPerPeriod() );
CPU::bufMix( m_fxChannels[_ch]->m_buffer, _buf,
engine::getMixer()->framesPerPeriod() );
m_fxChannels[_ch]->m_used = true;
m_fxChannels[_ch]->m_lock.unlock();
}
@@ -248,4 +247,3 @@ void fxMixer::loadSettings( const QDomElement & _this )
}
#endif

View File

@@ -62,7 +62,7 @@
#include "main_window.h"
#include "project_renderer.h"
#include "song.h"
#include "basic_ops.h"
#include "Cpu.h"
// TODO Make a factory class for this (or hide it behind engine)
#include "lmms_style.h"
@@ -96,8 +96,8 @@ int main( int argc, char * * argv )
// intialize RNG
srand( getpid() + time( 0 ) );
// init CPU specific optimized basic ops
initBasicOps();
// init CPU specific optimized operations
CPU::init();
bool core_only = false;
bool fullscreen = true;

View File

@@ -2,7 +2,7 @@
* mixer.cpp - audio-device-independent mixer for LMMS
*
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
*
*
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
*
* This program is free software; you can redistribute it and/or
@@ -22,7 +22,6 @@
*
*/
#include <math.h>
#include "mixer.h"
@@ -41,7 +40,7 @@
#include "sample_play_handle.h"
#include "piano_roll.h"
#include "micro_timer.h"
#include "basic_ops.h"
#include "Cpu.h"
#include "audio_device.h"
#include "midi_client.h"
@@ -125,7 +124,7 @@ public:
MixerWorkerThread( int _worker_num, mixer * _mixer ) :
QThread( _mixer ),
m_workingBuf( alignedAllocFrames( _mixer->framesPerPeriod() ) ),
m_workingBuf( CPU::allocFrames( _mixer->framesPerPeriod() ) ),
m_workerNum( _worker_num ),
m_quit( false ),
m_mixer( _mixer ),
@@ -135,7 +134,7 @@ public:
virtual ~MixerWorkerThread()
{
alignedFreeFrames( m_workingBuf );
CPU::freeFrames( m_workingBuf );
}
virtual void quit( void )
@@ -295,7 +294,7 @@ mixer::mixer( void ) :
{
m_inputBufferFrames[i] = 0;
m_inputBufferSize[i] = DEFAULT_BUFFER_SIZE * 100;
m_inputBuffer[i] = alignedAllocFrames(
m_inputBuffer[i] = CPU::allocFrames(
DEFAULT_BUFFER_SIZE * 100 );
clearAudioBuffer( m_inputBuffer[i], m_inputBufferSize[i] );
}
@@ -337,10 +336,10 @@ mixer::mixer( void ) :
m_fifo = new fifo( 1 );
}
m_workingBuf = alignedAllocFrames( m_framesPerPeriod );
m_workingBuf = CPU::allocFrames( m_framesPerPeriod );
for( Uint8 i = 0; i < 3; i++ )
{
m_readBuf = alignedAllocFrames( m_framesPerPeriod );
m_readBuf = CPU::allocFrames( m_framesPerPeriod );
clearAudioBuffer( m_readBuf, m_framesPerPeriod );
m_bufferPool.push_back( m_readBuf );
}
@@ -389,10 +388,10 @@ mixer::~mixer()
for( Uint8 i = 0; i < 3; i++ )
{
alignedFreeFrames( m_bufferPool[i] );
CPU::freeFrames( m_bufferPool[i] );
}
alignedFreeFrames( m_workingBuf );
CPU::freeFrames( m_workingBuf );
}
@@ -504,9 +503,9 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
if( frames + _frames > size )
{
size = qMax( size * 2, frames + _frames );
sampleFrame * ab = alignedAllocFrames( size );
alignedMemCpy( ab, buf, frames * sizeof( sampleFrame ) );
alignedFreeFrames( buf );
sampleFrame * ab = CPU::allocFrames( size );
CPU::memCpy( ab, buf, frames * sizeof( sampleFrame ) );
CPU::freeFrames( buf );
m_inputBufferSize[ m_inputBufferWrite ] = size;
m_inputBuffer[ m_inputBufferWrite ] = ab;
@@ -514,7 +513,7 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
buf = ab;
}
alignedMemCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
CPU::memCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
m_inputBufferFrames[ m_inputBufferWrite ] += _frames;
unlockInputFrames();
@@ -686,7 +685,7 @@ void mixer::bufferToPort( const sampleFrame * _buf,
const int loop1_frame = qMin<int>( end_frame, m_framesPerPeriod );
_port->lockFirstBuffer();
unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame,
CPU::unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame,
_buf, _vv.vol[0], _vv.vol[1],
loop1_frame - start_frame );
_port->unlockFirstBuffer();
@@ -697,7 +696,7 @@ void mixer::bufferToPort( const sampleFrame * _buf,
const int frames_done = m_framesPerPeriod - start_frame;
end_frame -= m_framesPerPeriod;
end_frame = qMin<int>( end_frame, m_framesPerPeriod );
unalignedBufMixLRCoeff( _port->secondBuffer(),
CPU::unalignedBufMixLRCoeff( _port->secondBuffer(),
_buf+frames_done,
_vv.vol[0], _vv.vol[1],
end_frame );
@@ -720,7 +719,7 @@ void mixer::clearAudioBuffer( sampleFrame * _ab, const f_cnt_t _frames,
{
if( likely( (size_t)( _ab+_offset ) % 16 == 0 && _frames % 8 == 0 ) )
{
alignedMemClear( _ab+_offset, sizeof( *_ab ) * _frames );
CPU::memClear( _ab+_offset, sizeof( *_ab ) * _frames );
}
else
{
@@ -1157,9 +1156,9 @@ void mixer::fifoWriter::run( void )
const fpp_t frames = m_mixer->framesPerPeriod();
while( m_writing )
{
sampleFrameA * buffer = alignedAllocFrames( frames );
sampleFrameA * buffer = CPU::allocFrames( frames );
const sampleFrameA * b = m_mixer->renderNextBuffer();
alignedMemCpy( buffer, b, frames * sizeof( sampleFrameA ) );
CPU::memCpy( buffer, b, frames * sizeof( sampleFrameA ) );
m_fifo->write( buffer );
}