diff --git a/CMakeLists.txt b/CMakeLists.txt index fe46bb2ae..7a33bb09b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -454,47 +454,42 @@ SET(LMMS_ER_H ${CMAKE_CURRENT_BINARY_DIR}/embedded_resources.h) ADD_FILE_DEPENDENCIES(${CMAKE_BINARY_DIR}/lmmsconfig.h ${lmms_MOC_out}) ADD_CUSTOM_COMMAND(OUTPUT ${LMMS_ER_H} COMMAND ${BIN2RES} ARGS ${lmms_EMBEDDED_RESOURCES} > ${LMMS_ER_H} DEPENDS ${BIN2RES}) -SET(BASIC_OPS_X86_C "${CMAKE_SOURCE_DIR}/src/core/basic_ops_x86.c") +# build CPU specific optimized modules IF(LMMS_HOST_X86 OR LMMS_HOST_X86_64) -ADD_CUSTOM_TARGET(regen-basic-ops) - -IF(LMMS_HOST_X86) -SET(opt_targets mmx sse sse2) -SET(host_arch x86) -ELSE(LMMS_HOST_X86) -SET(opt_targets sse sse2) -SET(host_arch x86_64) -ENDIF(LMMS_HOST_X86) + IF(LMMS_HOST_X86) + SET(opt_targets mmx sse sse2) + ELSE(LMMS_HOST_X86) + SET(opt_targets sse sse2) + ENDIF(LMMS_HOST_X86) FOREACH(opt_target ${opt_targets}) STRING(TOUPPER ${opt_target} OPT_TARGET) - SET(BASIC_OPS_X86_TARGET_S "${CMAKE_SOURCE_DIR}/src/core/basic_ops_${host_arch}_${opt_target}.s") - SET(BASIC_OPS_X86_TARGET_O "${CMAKE_BINARY_DIR}/basic_ops_${host_arch}_${opt_target}.o") + SET(BASIC_OPS_X86_TARGET_S "") + SET(CPU_X86_C "${CMAKE_SOURCE_DIR}/src/core/CpuX86.c") + SET(CPU_X86_TARGET_O "${CMAKE_BINARY_DIR}/CpuX86_${opt_target}.o") + SET(FPMATH_FLAGS "") IF(NOT "${OPT_TARGET}" STREQUAL "MMX") SET(FPMATH_FLAGS "-mfpmath=sse") ENDIF(NOT "${OPT_TARGET}" STREQUAL "MMX") - IF(EXISTS "$ENV{SVN_C_COMPILER}") - SET(C_COMPILER $ENV{SVN_C_COMPILER}) - ELSE(EXISTS "$ENV{SVN_C_COMPILER}") - SET(C_COMPILER ${CMAKE_C_COMPILER}) - ENDIF(EXISTS "$ENV{SVN_C_COMPILER}") IF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc") SET(CMAKE_C_COMPILER_ARG1 gcc) ENDIF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc") - ADD_CUSTOM_TARGET(regen-basic-ops-${opt_target} COMMAND ${C_COMPILER} -O2 -fno-stack-protector -ftree-vectorize -ftree-vectorizer-verbose=2 -fomit-frame-pointer -c -S -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS} -o ${BASIC_OPS_X86_TARGET_S} ${BASIC_OPS_X86_C} DEPENDS ${BASIC_OPS_X86_C}) - ADD_CUSTOM_COMMAND(OUTPUT ${BASIC_OPS_X86_TARGET_O} COMMAND ${CMAKE_C_COMPILER} ARGS ${CMAKE_C_COMPILER_ARG1} ${BASIC_OPS_X86_TARGET_S} -c -o ${BASIC_OPS_X86_TARGET_O} DEPENDS ${BASIC_OPS_X86_TARGET_S}) - ADD_DEPENDENCIES(regen-basic-ops regen-basic-ops-${opt_target}) - SET(opt_target_objects ${opt_target_objects} ${BASIC_OPS_X86_TARGET_O}) + SET(COMPILE_CMD ${CMAKE_C_COMPILER} ${CPU_X86_C} -O2 -fno-stack-protector -ftree-vectorize -fomit-frame-pointer -c -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS}) + ADD_CUSTOM_COMMAND(OUTPUT ${CPU_X86_TARGET_O} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O} DEPENDS ${CPU_X86_C}) + ADD_CUSTOM_TARGET(debug-${opt_target} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O}.s -S -ftree-vectorizer-verbose=2) + SET(cpu_objects ${cpu_objects} ${CPU_X86_TARGET_O}) ENDFOREACH(opt_target ${opt_targets}) -SET(lmms_SOURCES ${lmms_SOURCES} ${opt_target_objects}) -# to be used by maintainer with special ultra-optimizing super duper GCC + + SET(lmms_SOURCES ${lmms_SOURCES} ${cpu_objects}) + ENDIF(LMMS_HOST_X86 OR LMMS_HOST_X86_64) + IF(WIN32) SET(WINRC "${CMAKE_BINARY_DIR}/lmmsrc.obj") IF(LMMS_HOST_X86_64) diff --git a/include/basic_ops.h b/include/Cpu.h similarity index 52% rename from include/basic_ops.h rename to include/Cpu.h index 5163b333e..871edc075 100644 --- a/include/basic_ops.h +++ b/include/Cpu.h @@ -1,8 +1,8 @@ /* - * basic_ops.h - basic memory operations + * Cpu.h - CPU specific accellerated operations + * + * Copyright (c) 2008-2009 Tobias Doerffel * - * Copyright (c) 2008 Tobias Doerffel - * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -22,9 +22,8 @@ * */ - -#ifndef _BASIC_OPS_H -#define _BASIC_OPS_H +#ifndef _CPU_H +#define _CPU_H #include "lmms_basics.h" @@ -32,56 +31,64 @@ #include #endif -void initBasicOps( void ); +#ifdef __cplusplus +namespace CPU +{ +#endif -void * alignedMalloc( int _bytes ); -void alignedFree( void * _buf ); +void init(); -sampleFrameA * alignedAllocFrames( int _frames ); -void alignedFreeFrames( sampleFrameA * _buf ); +void * memAlloc( int _bytes ); +void memFree( void * _buf ); + +sampleFrameA * allocFrames( int _frames ); +void freeFrames( sampleFrameA * _buf ); -// all aligned* functions assume data to be 16 byte aligned and size to be -// multiples of 64 -typedef void (*alignedMemCpyFunc)( void * RP _dst, const void * RP _src, +// all functions assume data to be 16 byte and size to be +// multiples of 64 (except for unaligned*()) +typedef void (*MemCpyFunc)( void * RP _dst, const void * RP _src, int _size ); -typedef void (*alignedMemClearFunc)( void * RP _dst, int _size ); -typedef void (*alignedBufApplyGainFunc)( sampleFrameA * RP _dst, +typedef void (*MemClearFunc)( void * RP _dst, int _size ); +typedef void (*BufApplyGainFunc)( sampleFrameA * RP _dst, float _gain, int _frames ); -typedef void (*alignedBufMixFunc)( sampleFrameA * RP _dst, +typedef void (*BufMixFunc)( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames ); -typedef void (*alignedBufMixLRCoeffFunc)( sampleFrameA * RP _dst, +typedef void (*BufMixLRCoeffFunc)( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames ); -typedef void (*unalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst, +typedef void (*UnalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst, const sampleFrame * RP _src, float _left, float _right, int _frames ); -typedef void (*alignedBufWetDryMixFunc)( sampleFrameA * RP _dst, +typedef void (*BufWetDryMixFunc)( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames ); -typedef void (*alignedBufWetDryMixSplittedFunc)( sampleFrameA * RP _dst, +typedef void (*BufWetDryMixSplittedFunc)( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames ); -typedef int (*alignedConvertToS16Func)( const sampleFrameA * RP _src, +typedef int (*ConvertToS16Func)( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian ); -extern alignedMemCpyFunc alignedMemCpy; -extern alignedMemClearFunc alignedMemClear; -extern alignedBufApplyGainFunc alignedBufApplyGain; -extern alignedBufMixFunc alignedBufMix; -extern alignedBufMixLRCoeffFunc alignedBufMixLRCoeff; -extern unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff; -extern alignedBufWetDryMixFunc alignedBufWetDryMix; -extern alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted; -extern alignedConvertToS16Func alignedConvertToS16; +extern MemCpyFunc memCpy; +extern MemClearFunc memClear; +extern BufApplyGainFunc bufApplyGain; +extern BufMixFunc bufMix; +extern BufMixLRCoeffFunc bufMixLRCoeff; +extern UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff; +extern BufWetDryMixFunc bufWetDryMix; +extern BufWetDryMixSplittedFunc bufWetDryMixSplitted; +extern ConvertToS16Func convertToS16; +#ifdef __cplusplus +} +#endif #ifdef LMMS_HOST_X86 #define X86_OPTIMIZATIONS diff --git a/include/audio_dummy.h b/include/audio_dummy.h index f64fbb8f5..61670ad25 100644 --- a/include/audio_dummy.h +++ b/include/audio_dummy.h @@ -1,8 +1,8 @@ /* * audio_dummy.h - dummy-audio-device * - * Copyright (c) 2004-2008 Tobias Doerffel - * + * Copyright (c) 2004-2009 Tobias Doerffel + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -22,12 +22,11 @@ * */ - #ifndef _AUDIO_DUMMY_H #define _AUDIO_DUMMY_H #include "audio_device.h" -#include "basic_ops.h" +#include "Cpu.h" #include "micro_timer.h" @@ -45,7 +44,7 @@ public: stopProcessing(); } - inline static QString name( void ) + inline static QString name() { return( QT_TRANSLATE_NOOP( "setupWidget", "Dummy (no sound output)" ) ); @@ -64,11 +63,11 @@ public: { } - virtual void saveSettings( void ) + virtual void saveSettings() { } - virtual void show( void ) + virtual void show() { parentWidget()->hide(); QWidget::show(); @@ -78,12 +77,12 @@ public: private: - virtual void startProcessing( void ) + virtual void startProcessing() { start(); } - virtual void stopProcessing( void ) + virtual void stopProcessing() { if( isRunning() ) { @@ -92,7 +91,7 @@ private: } } - virtual void run( void ) + virtual void run() { microTimer timer; while( true ) @@ -104,7 +103,7 @@ private: { break; } - alignedFreeFrames( b ); + CPU::freeFrames( b ); const Sint32 microseconds = static_cast( getMixer()->framesPerPeriod() * diff --git a/plugins/ladspa_effect/ladspa_effect.cpp b/plugins/ladspa_effect/ladspa_effect.cpp index 6303abed7..3a1ddfe37 100644 --- a/plugins/ladspa_effect/ladspa_effect.cpp +++ b/plugins/ladspa_effect/ladspa_effect.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 2006-2008 Danny McRae * Copyright (c) 2009 Tobias Doerffel - * + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -23,7 +23,6 @@ * */ - #include #include "ladspa_effect.h" @@ -35,7 +34,7 @@ #include "ladspa_subplugin_features.h" #include "mixer.h" #include "effect_chain.h" -#include "basic_ops.h" +#include "Cpu.h" #include "automation_pattern.h" #include "controller_connection.h" @@ -82,7 +81,7 @@ ladspaEffect::ladspaEffect( model * _parent, arg( m_key.second ), QMessageBox::Ok, QMessageBox::NoButton ); } - setOkay( FALSE ); + setOkay( false ); return; } @@ -105,7 +104,7 @@ ladspaEffect::~ladspaEffect() -void ladspaEffect::changeSampleRate( void ) +void ladspaEffect::changeSampleRate() { multimediaProject mmp( multimediaProject::EffectSettings ); m_controls->saveState( mmp, mmp.content() ); @@ -141,7 +140,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf, if( !isOkay() || dontRun() || !isRunning() || !isEnabled() ) { m_pluginMutex.unlock(); - return( FALSE ); + return false; } int frames = _frames; @@ -150,7 +149,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf, if( m_maxSampleRate < engine::getMixer()->processingSampleRate() ) { o_buf = _buf; - _buf = alignedAllocFrames( _frames ); + _buf = CPU::allocFrames( _frames ); sampleDown( o_buf, _buf, m_maxSampleRate ); frames = _frames * m_maxSampleRate / engine::getMixer()->processingSampleRate(); @@ -258,7 +257,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf, } if( channel >= 1 && channel <= DEFAULT_CHANNELS ) { - alignedBufWetDryMixSplitted( _buf, buffers[0], buffers[1], + CPU::bufWetDryMixSplitted( _buf, buffers[0], buffers[1], getWetLevel(), getDryLevel(), frames ); } @@ -272,7 +271,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf, if( o_buf != NULL ) { sampleBack( _buf, o_buf, m_maxSampleRate ); - alignedFreeFrames( _buf ); + CPU::freeFrames( _buf ); } checkGate( out_sum / frames ); @@ -280,7 +279,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf, bool is_running = isRunning(); m_pluginMutex.unlock(); - return( is_running ); + return is_running; } @@ -298,7 +297,7 @@ void ladspaEffect::setControl( int _control, LADSPA_Data _value ) -void ladspaEffect::pluginInstantiation( void ) +void ladspaEffect::pluginInstantiation() { m_maxSampleRate = maxSamplerate( displayName() ); @@ -469,7 +468,7 @@ void ladspaEffect::pluginInstantiation( void ) QMessageBox::warning( 0, "Effect", "Can't get LADSPA descriptor function: " + m_key.second, QMessageBox::Ok, QMessageBox::NoButton ); - setOkay( FALSE ); + setOkay( false ); return; } if( m_descriptor->run == NULL ) @@ -477,7 +476,7 @@ void ladspaEffect::pluginInstantiation( void ) QMessageBox::warning( 0, "Effect", "Plugin has no processor: " + m_key.second, QMessageBox::Ok, QMessageBox::NoButton ); - setDontRun( TRUE ); + setDontRun( true ); } for( ch_cnt_t proc = 0; proc < getProcessorCount(); proc++ ) { @@ -488,7 +487,7 @@ void ladspaEffect::pluginInstantiation( void ) QMessageBox::warning( 0, "Effect", "Can't get LADSPA instance: " + m_key.second, QMessageBox::Ok, QMessageBox::NoButton ); - setOkay( FALSE ); + setOkay( false ); return; } m_handles.append( effect ); @@ -508,7 +507,7 @@ void ladspaEffect::pluginInstantiation( void ) QMessageBox::warning( 0, "Effect", "Failed to connect port: " + m_key.second, QMessageBox::Ok, QMessageBox::NoButton ); - setDontRun( TRUE ); + setDontRun( true ); return; } } @@ -525,7 +524,7 @@ void ladspaEffect::pluginInstantiation( void ) -void ladspaEffect::pluginDestruction( void ) +void ladspaEffect::pluginDestruction() { if( !isOkay() ) { @@ -571,9 +570,9 @@ sample_rate_t ladspaEffect::maxSamplerate( const QString & _name ) } if( __buggy_plugins.contains( _name ) ) { - return( __buggy_plugins[_name] ); + return __buggy_plugins[_name]; } - return( engine::getMixer()->processingSampleRate() ); + return engine::getMixer()->processingSampleRate(); } @@ -585,9 +584,9 @@ extern "C" // neccessary for getting instance out of shared lib plugin * PLUGIN_EXPORT lmms_plugin_main( model * _parent, void * _data ) { - return( new ladspaEffect( _parent, + return new ladspaEffect( _parent, static_cast( - _data ) ) ); + _data ) ); } } diff --git a/src/core/basic_ops.cpp b/src/core/Cpu.cpp similarity index 69% rename from src/core/basic_ops.cpp rename to src/core/Cpu.cpp index 5f54667e5..7e72e5a8f 100644 --- a/src/core/basic_ops.cpp +++ b/src/core/Cpu.cpp @@ -1,8 +1,8 @@ /* - * basic_ops.cpp - basic memory operations + * Cpu.cpp - CPU specific accellerated operations + * + * Copyright (c) 2008-2009 Tobias Doerffel * - * Copyright (c) 2008 Tobias Doerffel - * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -23,17 +23,19 @@ */ -#include "basic_ops.h" +#include "Cpu.h" #include #include #include - -void * alignedMalloc( int _bytes ) +namespace CPU { - char *ptr,*ptr2,*aligned_ptr; + +void * memAlloc( int _bytes ) +{ + char *ptr,*ptr2,*_ptr; int align_mask = ALIGN_SIZE- 1; ptr =(char *) malloc( _bytes + ALIGN_SIZE + sizeof(int) ); if( ptr == NULL ) @@ -42,17 +44,19 @@ void * alignedMalloc( int _bytes ) } ptr2 = ptr + sizeof(int); - aligned_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) ); + _ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) ); - ptr2 = aligned_ptr - sizeof(int); - *((int *) ptr2) = (int)( aligned_ptr - ptr ); + ptr2 = _ptr - sizeof(int); + *((int *) ptr2) = (int)( _ptr - ptr ); - return aligned_ptr; + return _ptr; } -void alignedFree( void * _buf ) + + +void memFree( void * _buf ) { if( _buf ) { @@ -66,22 +70,26 @@ void alignedFree( void * _buf ) } -sampleFrameA * alignedAllocFrames( int _n ) + + +sampleFrameA * allocFrames( int _n ) { - return (sampleFrameA *) alignedMalloc( _n * sizeof( sampleFrameA ) ); + return (sampleFrameA *) memAlloc( _n * sizeof( sampleFrameA ) ); } -void alignedFreeFrames( sampleFrame * _buf ) + + +void freeFrames( sampleFrame * _buf ) { - alignedFree( _buf ); + memFree( _buf ); } // slow fallback -void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size ) +void memCpyNoOpt( void * RP _dst, const void * RP _src, int _size ) { const int s = _size / sizeof( int ); const int * RP src = (const int *) _src; @@ -110,7 +118,7 @@ void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size ) // slow fallback -void alignedMemClearNoOpt( void * _dst, int _size ) +void memClearNoOpt( void * _dst, int _size ) { const int s = _size / ( sizeof( int ) * 4 ); int * dst = (int *) _dst; @@ -126,7 +134,7 @@ void alignedMemClearNoOpt( void * _dst, int _size ) -void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain, +void bufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain, int _frames ) { for( int i = 0; i < _frames; ) @@ -152,7 +160,7 @@ void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain, } -void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src, +void bufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames ) { for( int i = 0; i < _frames; ) @@ -171,7 +179,7 @@ void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src, -void alignedBufMixLRCoeffNoOpt( sampleFrameA * RP _dst, +void bufMixLRCoeffNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames ) { @@ -217,7 +225,7 @@ void unalignedBufMixLRCoeffNoOpt( sampleFrame * RP _dst, -void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst, +void bufWetDryMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames ) { @@ -231,7 +239,7 @@ void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst, -void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst, +void bufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames ) @@ -248,7 +256,7 @@ void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst, -int alignedConvertToS16NoOpt( const sampleFrameA * RP _src, +int convertToS16NoOpt( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, @@ -294,15 +302,15 @@ int alignedConvertToS16NoOpt( const sampleFrameA * RP _src, } -alignedMemCpyFunc alignedMemCpy = alignedMemCpyNoOpt; -alignedMemClearFunc alignedMemClear = alignedMemClearNoOpt; -alignedBufApplyGainFunc alignedBufApplyGain = alignedBufApplyGainNoOpt; -alignedBufMixFunc alignedBufMix = alignedBufMixNoOpt; -alignedBufMixLRCoeffFunc alignedBufMixLRCoeff = alignedBufMixLRCoeffNoOpt; -unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt; -alignedBufWetDryMixFunc alignedBufWetDryMix = alignedBufWetDryMixNoOpt; -alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted = alignedBufWetDryMixSplittedNoOpt; -alignedConvertToS16Func alignedConvertToS16 = alignedConvertToS16NoOpt; +MemCpyFunc memCpy = memCpyNoOpt; +MemClearFunc memClear = memClearNoOpt; +BufApplyGainFunc bufApplyGain = bufApplyGainNoOpt; +BufMixFunc bufMix = bufMixNoOpt; +BufMixLRCoeffFunc bufMixLRCoeff = bufMixLRCoeffNoOpt; +UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt; +BufWetDryMixFunc bufWetDryMix = bufWetDryMixNoOpt; +BufWetDryMixSplittedFunc bufWetDryMixSplitted = bufWetDryMixSplittedNoOpt; +ConvertToS16Func convertToS16 = convertToS16NoOpt; #ifdef X86_OPTIMIZATIONS @@ -322,28 +330,28 @@ enum CPUFeatures extern "C" { #ifdef LMMS_HOST_X86 -void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size ); -void alignedMemClearMMX( void * RP _dst, int _size ); +void memCpyMMX( void * RP _dst, const void * RP _src, int _size ); +void memClearMMX( void * RP _dst, int _size ); #endif -void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size ); -void alignedMemClearSSE( void * RP _dst, int _size ); -void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames ); -void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames ); -void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames ); +void memCpySSE( void * RP _dst, const void * RP _src, int _size ); +void memClearSSE( void * RP _dst, int _size ); +void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames ); +void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames ); +void bufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames ); void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _src, const float _left, const float _right, int _frames ); -void alignedBufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames ); -void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames ); +void bufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames ); +void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames ); #ifdef X86_OPTIMIZATIONS -void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size ); -void alignedMemClearSSE2( void * RP _dst, int _size ); -int alignedConvertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian ); +void memCpySSE2( void * RP _dst, const void * RP _src, int _size ); +void memClearSSE2( void * RP _dst, int _size ); +int convertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian ); #endif } ; #endif -void initBasicOps( void ) +void init() { #ifdef X86_OPTIMIZATIONS static bool extensions_checked = false; @@ -428,29 +436,29 @@ void initBasicOps( void ) #ifdef LMMS_HOST_X86 if( features & MMX ) { - alignedMemCpy = alignedMemCpyMMX; - alignedMemClear = alignedMemClearMMX; + memCpy = memCpyMMX; + memClear = memClearMMX; } #endif if( features & SSE ) { fprintf( stderr, "Using SSE optimized routines\n" ); - alignedMemCpy = alignedMemCpySSE; - alignedMemClear = alignedMemClearSSE; - alignedBufApplyGain = alignedBufApplyGainSSE; - alignedBufMix = alignedBufMixSSE; - alignedBufMixLRCoeff = alignedBufMixLRCoeffSSE; + memCpy = memCpySSE; + memClear = memClearSSE; + bufApplyGain = bufApplyGainSSE; + bufMix = bufMixSSE; + bufMixLRCoeff = bufMixLRCoeffSSE; unalignedBufMixLRCoeff = unalignedBufMixLRCoeffSSE; - alignedBufWetDryMix = alignedBufWetDryMixSSE; - alignedBufWetDryMixSplitted = - alignedBufWetDryMixSplittedSSE; + bufWetDryMix = bufWetDryMixSSE; + bufWetDryMixSplitted = + bufWetDryMixSplittedSSE; } if( features & SSE2 ) { fprintf( stderr, "Using SSE2 optimized routines\n" ); - alignedMemCpy = alignedMemCpySSE2; - alignedMemClear = alignedMemClearSSE2; - alignedConvertToS16 = alignedConvertToS16SSE2; + memCpy = memCpySSE2; + memClear = memClearSSE2; + convertToS16 = convertToS16SSE2; } extensions_checked = true; } @@ -458,4 +466,5 @@ void initBasicOps( void ) } +} diff --git a/src/core/basic_ops_x86.c b/src/core/CpuX86.c similarity index 89% rename from src/core/basic_ops_x86.c rename to src/core/CpuX86.c index 8b8535095..3deb01c90 100644 --- a/src/core/basic_ops_x86.c +++ b/src/core/CpuX86.c @@ -1,8 +1,8 @@ /* - * basic_ops_x86.c - x86 specific optimized operations + * cpu_x86.c - x86 specific optimized operations + * + * Copyright (c) 2008-2009 Tobias Doerffel * - * Copyright (c) 2008 Tobias Doerffel - * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -22,9 +22,7 @@ * */ - - -#include "basic_ops.h" +#include "Cpu.h" #ifdef X86_OPTIMIZATIONS @@ -32,7 +30,7 @@ #include -void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size ) +void memCpyMMX( void * RP _dst, const void * RP _src, int _size ) { const int s = _size / ( sizeof( __m64 ) * 8 ); int i; @@ -79,7 +77,7 @@ void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size ) -void alignedMemClearMMX( void * RP _dst, int _size ) +void memClearMMX( void * RP _dst, int _size ) { __m64 * dst = (__m64 *) _dst; const int s = _size / ( sizeof( *dst ) * 8 ); @@ -109,7 +107,7 @@ void alignedMemClearMMX( void * RP _dst, int _size ) #include -void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size ) +void memCpySSE( void * RP _dst, const void * RP _src, int _size ) { __m128 * dst = (__m128 *) _dst; __m128 * src = (__m128 *) _src; @@ -133,7 +131,7 @@ void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size ) -void alignedMemClearSSE( void * RP _dst, int _size ) +void memClearSSE( void * RP _dst, int _size ) { __m128 * dst = (__m128 *) _dst; const int s = _size / ( sizeof( *dst ) * 4 ); @@ -152,7 +150,7 @@ void alignedMemClearSSE( void * RP _dst, int _size ) -void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames ) +void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames ) { int i; for( i = 0; i < _frames; ) @@ -180,7 +178,7 @@ void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames ) -void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, +void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames ) { int i; @@ -209,7 +207,7 @@ void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, -void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst, +void bufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames ) { @@ -257,7 +255,7 @@ void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _s -void alignedBufWetDryMixSSE( sampleFrameA * RP _dst, +void bufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames ) { @@ -279,7 +277,7 @@ void alignedBufWetDryMixSSE( sampleFrameA * RP _dst, -void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst, +void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames ) @@ -304,7 +302,7 @@ void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst, #include -void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size ) +void memCpySSE2( void * RP _dst, const void * RP _src, int _size ) { __m128i * dst = (__m128i *) _dst; __m128i * src = (__m128i *) _src; @@ -324,7 +322,7 @@ void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size ) -void alignedMemClearSSE2( void * RP _dst, int _size ) +void memClearSSE2( void * RP _dst, int _size ) { __m128i * dst = (__m128i *) _dst; const int s = _size / ( sizeof( *dst ) * 4 ); @@ -342,7 +340,7 @@ void alignedMemClearSSE2( void * RP _dst, int _size ) -int alignedConvertToS16SSE2( const sampleFrameA * RP _src, +int convertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, diff --git a/src/core/audio/audio_alsa.cpp b/src/core/audio/audio_alsa.cpp index 20a4d1a93..9892aaf6f 100644 --- a/src/core/audio/audio_alsa.cpp +++ b/src/core/audio/audio_alsa.cpp @@ -1,10 +1,8 @@ -#ifndef SINGLE_SOURCE_COMPILE - /* * audio_alsa.cpp - device-class which implements ALSA-PCM-output * * Copyright (c) 2004-2009 Tobias Doerffel - * + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -25,7 +23,6 @@ */ - #include #include @@ -39,7 +36,7 @@ #include "lcd_spinbox.h" #include "gui_templates.h" #include "templates.h" -#include "basic_ops.h" +#include "Cpu.h" @@ -230,10 +227,10 @@ void audioALSA::applyQualitySettings( void ) void audioALSA::run( void ) { - sampleFrameA * temp = alignedAllocFrames( + sampleFrameA * temp = CPU::allocFrames( getMixer()->framesPerPeriod() ); intSampleFrameA * outbuf = (intSampleFrameA *) - alignedMalloc( sizeof( intSampleFrameA ) * channels() / + CPU::memAlloc( sizeof( intSampleFrameA ) * channels() / DEFAULT_CHANNELS * getMixer()->framesPerPeriod() ); int_sample_t * pcmbuf = new int_sample_t[m_periodSize * channels()]; @@ -261,7 +258,7 @@ void audioALSA::run( void ) } outbuf_size = frames * channels(); - alignedConvertToS16( temp, outbuf, frames, + CPU::convertToS16( temp, outbuf, frames, getMixer()->masterGain(), m_convertEndian ); } @@ -300,8 +297,8 @@ void audioALSA::run( void ) } } - alignedFreeFrames( temp ); - alignedFree( outbuf ); + CPU::freeFrames( temp ); + CPU::memFree( outbuf ); delete[] pcmbuf; } @@ -526,5 +523,3 @@ void audioALSA::setupWidget::saveSettings( void ) #endif - -#endif diff --git a/src/core/audio/audio_device.cpp b/src/core/audio/audio_device.cpp index 0e1e0af91..0f31d850c 100644 --- a/src/core/audio/audio_device.cpp +++ b/src/core/audio/audio_device.cpp @@ -1,10 +1,8 @@ -#ifndef SINGLE_SOURCE_COMPILE - /* * audio_device.cpp - base-class for audio-devices used by LMMS-mixer * - * Copyright (c) 2004-2008 Tobias Doerffel - * + * Copyright (c) 2004-2009 Tobias Doerffel + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -25,13 +23,10 @@ */ -#include - - #include "audio_device.h" #include "config_mgr.h" #include "debug.h" -#include "basic_ops.h" +#include "Cpu.h" @@ -40,7 +35,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) : m_sampleRate( _mixer->processingSampleRate() ), m_channels( _channels ), m_mixer( _mixer ), - m_buffer( alignedAllocFrames( getMixer()->framesPerPeriod() ) ) + m_buffer( CPU::allocFrames( getMixer()->framesPerPeriod() ) ) { int error; if( ( m_srcState = src_new( @@ -57,7 +52,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) : audioDevice::~audioDevice() { src_delete( m_srcState ); - alignedFreeFrames( m_buffer ); + CPU::freeFrames( m_buffer ); m_devMutex.tryLock(); unlock(); @@ -104,7 +99,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab ) } else { - alignedMemCpy( _ab, b, frames * sizeof( surroundSampleFrame ) ); + CPU::memCpy( _ab, b, frames * sizeof( surroundSampleFrame ) ); } // release lock @@ -112,7 +107,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab ) if( getMixer()->hasFifoWriter() ) { - alignedFreeFrames( b ); + CPU::freeFrames( b ); } return frames; @@ -200,7 +195,7 @@ void audioDevice::resample( const sampleFrame * _src, const fpp_t _frames, void audioDevice::clearS16Buffer( intSampleFrameA * _outbuf, const fpp_t _frames ) { - alignedMemClear( _outbuf, _frames * sizeof( *_outbuf ) ); + CPU::memClear( _outbuf, _frames * sizeof( *_outbuf ) ); // memset( _outbuf, 0, _frames * channels() * BYTES_PER_INT_SAMPLE ); } @@ -213,5 +208,3 @@ bool audioDevice::hqAudio( void ) const } - -#endif diff --git a/src/core/audio/audio_file_wave.cpp b/src/core/audio/audio_file_wave.cpp index 84c25c3ac..ffead91e4 100644 --- a/src/core/audio/audio_file_wave.cpp +++ b/src/core/audio/audio_file_wave.cpp @@ -1,11 +1,9 @@ -#ifndef SINGLE_SOURCE_COMPILE - /* * audio_file_wave.cpp - audio-device which encodes wave-stream and writes it * into a WAVE-file. This is used for song-export. * * Copyright (c) 2004-2009 Tobias Doerffel - * + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -26,10 +24,9 @@ */ - #include "audio_file_wave.h" #include "endian_handling.h" -#include "basic_ops.h" +#include "Cpu.h" #include @@ -89,14 +86,14 @@ void audioFileWave::writeBuffer( const surroundSampleFrame * _ab, { if( depth() == 16 ) { - intSampleFrameA * buf = (intSampleFrameA *) alignedMalloc( + intSampleFrameA * buf = (intSampleFrameA *) CPU::memAlloc( sizeof( intSampleFrameA ) * _frames ); - alignedConvertToS16( _ab, buf, _frames, _master_gain, + CPU::convertToS16( _ab, buf, _frames, _master_gain, !isLittleEndian() ); sf_writef_short( m_sf, (int_sample_t *) buf, _frames ); - alignedFree( buf ); + CPU::memFree( buf ); } else { @@ -123,4 +120,3 @@ void audioFileWave::finishEncoding( void ) } -#endif diff --git a/src/core/audio/audio_jack.cpp b/src/core/audio/audio_jack.cpp index af90838dd..27117a307 100644 --- a/src/core/audio/audio_jack.cpp +++ b/src/core/audio/audio_jack.cpp @@ -44,7 +44,7 @@ #include "lcd_spinbox.h" #include "audio_port.h" #include "main_window.h" -#include "basic_ops.h" +#include "Cpu.h" @@ -57,7 +57,7 @@ audioJACK::audioJACK( bool & _success_ful, mixer * _mixer ) : m_client( NULL ), m_active( false ), m_stopSemaphore( 1 ), - m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ), + m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ), m_framesDoneInCurBuf( 0 ), m_framesToDoInCurBuf( 0 ) { @@ -93,7 +93,7 @@ audioJACK::~audioJACK() jack_client_close( m_client ); } - alignedFreeFrames( m_outBuf ); + CPU::freeFrames( m_outBuf ); } diff --git a/src/core/audio/audio_oss.cpp b/src/core/audio/audio_oss.cpp index c8711d559..214f947ab 100644 --- a/src/core/audio/audio_oss.cpp +++ b/src/core/audio/audio_oss.cpp @@ -1,10 +1,8 @@ -#ifndef SINGLE_SOURCE_COMPILE - /* * audio_oss.cpp - device-class that implements OSS-PCM-output * * Copyright (c) 2004-2009 Tobias Doerffel - * + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -39,7 +37,7 @@ #include "engine.h" #include "gui_templates.h" #include "templates.h" -#include "basic_ops.h" +#include "Cpu.h" #ifdef LMMS_HAVE_UNISTD_H #include @@ -299,10 +297,10 @@ void audioOSS::applyQualitySettings( void ) void audioOSS::run( void ) { - sampleFrameA * temp = alignedAllocFrames( + sampleFrameA * temp = CPU::allocFrames( getMixer()->framesPerPeriod() ); intSampleFrameA * outbuf = (intSampleFrameA *) - alignedMalloc( sizeof( intSampleFrameA ) * + CPU::memAlloc( sizeof( intSampleFrameA ) * getMixer()->framesPerPeriod() ); while( 1 ) @@ -313,7 +311,7 @@ void audioOSS::run( void ) break; } - int bytes = alignedConvertToS16( temp, outbuf, frames, + int bytes = CPU::convertToS16( temp, outbuf, frames, getMixer()->masterGain(), m_convertEndian ); if( write( m_audioFD, outbuf, bytes ) != bytes ) @@ -322,8 +320,8 @@ void audioOSS::run( void ) } } - alignedFreeFrames( temp ); - alignedFree( outbuf ); + CPU::freeFrames( temp ); + CPU::memFree( outbuf ); } @@ -374,5 +372,3 @@ void audioOSS::setupWidget::saveSettings( void ) #endif - -#endif diff --git a/src/core/audio/audio_port.cpp b/src/core/audio/audio_port.cpp index 119013339..a44ae821c 100644 --- a/src/core/audio/audio_port.cpp +++ b/src/core/audio/audio_port.cpp @@ -26,14 +26,14 @@ #include "audio_device.h" #include "effect_chain.h" #include "engine.h" -#include "basic_ops.h" +#include "Cpu.h" audioPort::audioPort( const QString & _name, bool _has_effect_chain ) : m_bufferUsage( NoUsage ), - m_firstBuffer( alignedAllocFrames( + m_firstBuffer( CPU::allocFrames( engine::getMixer()->framesPerPeriod() ) ), - m_secondBuffer( alignedAllocFrames( + m_secondBuffer( CPU::allocFrames( engine::getMixer()->framesPerPeriod() ) ), m_extOutputEnabled( false ), m_nextFxChannel( 0 ), @@ -55,8 +55,8 @@ audioPort::~audioPort() { setExtOutputEnabled( false ); engine::getMixer()->removeAudioPort( this ); - alignedFreeFrames( m_firstBuffer ); - alignedFreeFrames( m_secondBuffer ); + CPU::freeFrames( m_firstBuffer ); + CPU::freeFrames( m_secondBuffer ); delete m_effects; } diff --git a/src/core/audio/audio_portaudio.cpp b/src/core/audio/audio_portaudio.cpp index c99d92ddc..fe2b13437 100644 --- a/src/core/audio/audio_portaudio.cpp +++ b/src/core/audio/audio_portaudio.cpp @@ -60,7 +60,7 @@ audioPortAudio::audioPortAudio( bool & _success_ful, mixer * _mixer ) : DEFAULT_CHANNELS, SURROUND_CHANNELS ), _mixer ), m_wasPAInitError( false ), - m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ), + m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ), m_outBufPos( 0 ), m_stopSemaphore( 1 ) { @@ -206,7 +206,7 @@ audioPortAudio::~audioPortAudio() { Pa_Terminate(); } - alignedFreeFrames( m_outBuf ); + CPU::freeFrames( m_outBuf ); } diff --git a/src/core/audio/audio_pulseaudio.cpp b/src/core/audio/audio_pulseaudio.cpp index 09c9d6217..e9dbdaaaa 100644 --- a/src/core/audio/audio_pulseaudio.cpp +++ b/src/core/audio/audio_pulseaudio.cpp @@ -1,10 +1,8 @@ -#ifndef SINGLE_SOURCE_COMPILE - /* * audio_pulseaudio.cpp - device-class which implements PulseAudio-output * - * Copyright (c) 2008 Tobias Doerffel - * + * Copyright (c) 2008-2009 Tobias Doerffel + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -25,7 +23,6 @@ */ - #include #include @@ -40,7 +37,7 @@ #include "lcd_spinbox.h" #include "gui_templates.h" #include "templates.h" -#include "basic_ops.h" +#include "Cpu.h" static void stream_write_callback(pa_stream *s, size_t length, void *userdata) @@ -231,7 +228,7 @@ void audioPulseAudio::run( void ) void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length) { const fpp_t fpp = getMixer()->framesPerPeriod(); - sampleFrameA * temp = alignedAllocFrames( fpp ); + sampleFrameA * temp = CPU::allocFrames( fpp ); Sint16 * pcmbuf = (Sint16*)pa_xmalloc( fpp * channels() * sizeof(Sint16) ); @@ -243,7 +240,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length) { return; } - int bytes = alignedConvertToS16( temp, + int bytes = CPU::convertToS16( temp, (intSampleFrameA *) pcmbuf, frames, getMixer()->masterGain(), @@ -257,7 +254,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length) } pa_xfree( pcmbuf ); - alignedFreeFrames( temp ); + CPU::freeFrames( temp ); } @@ -308,5 +305,3 @@ void audioPulseAudio::setupWidget::saveSettings( void ) #endif -#endif - diff --git a/src/core/audio/audio_sdl.cpp b/src/core/audio/audio_sdl.cpp index e4066a9b8..8bf077b05 100644 --- a/src/core/audio/audio_sdl.cpp +++ b/src/core/audio/audio_sdl.cpp @@ -1,10 +1,8 @@ -#ifndef SINGLE_SOURCE_COMPILE - /* * audio_sdl.cpp - device-class that performs PCM-output via SDL * * Copyright (c) 2004-2008 Tobias Doerffel - * + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -25,7 +23,6 @@ */ - #include "audio_sdl.h" #ifdef LMMS_HAVE_SDL @@ -38,13 +35,13 @@ #include "config_mgr.h" #include "gui_templates.h" #include "templates.h" -#include "basic_ops.h" +#include "Cpu.h" audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) : audioDevice( DEFAULT_CHANNELS, _mixer ), - m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ), + m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ), m_convertedBufPos( 0 ), m_convertEndian( false ), m_stopSemaphore( 1 ) @@ -53,7 +50,7 @@ audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) : m_convertedBufSize = getMixer()->framesPerPeriod() * sizeof( intSampleFrameA ); - m_convertedBuf = (intSampleFrameA *) alignedMalloc( m_convertedBufSize ); + m_convertedBuf = (intSampleFrameA *) CPU::memAlloc( m_convertedBufSize ); if( SDL_Init( SDL_INIT_AUDIO | SDL_INIT_NOPARACHUTE ) < 0 ) @@ -97,8 +94,8 @@ audioSDL::~audioSDL() SDL_CloseAudio(); SDL_Quit(); - alignedFree( m_convertedBuf ); - alignedFreeFrames( m_outBuf ); + CPU::memFree( m_convertedBuf ); + CPU::freeFrames( m_outBuf ); } @@ -192,7 +189,7 @@ void audioSDL::sdlAudioCallback( Uint8 * _buf, int _len ) } m_convertedBufSize = frames * sizeof( intSampleFrameA ); - alignedConvertToS16( m_outBuf, + CPU::convertToS16( m_outBuf, m_convertedBuf, frames, getMixer()->masterGain(), @@ -243,4 +240,3 @@ void audioSDL::setupWidget::saveSettings( void ) #endif -#endif diff --git a/src/core/basic_ops_x86_64_sse.s b/src/core/basic_ops_x86_64_sse.s deleted file mode 100644 index 0cd43e3e3..000000000 --- a/src/core/basic_ops_x86_64_sse.s +++ /dev/null @@ -1,555 +0,0 @@ - .file "basic_ops_x86.c" - .text - .align 16 -.globl alignedMemCpySSE - .type alignedMemCpySSE, @function -alignedMemCpySSE: -.LFB509: - movslq %edx,%rdx - shrq $6, %rdx - testl %edx, %edx - jle .L4 - subl $1, %edx - xorl %eax, %eax - addq $1, %rdx - salq $6, %rdx - .align 16 -.L3: - movaps (%rsi,%rax), %xmm0 - movaps %xmm0, (%rdi,%rax) - movaps 16(%rsi,%rax), %xmm0 - movaps %xmm0, 16(%rdi,%rax) - movaps 32(%rsi,%rax), %xmm0 - movaps %xmm0, 32(%rdi,%rax) - movaps 48(%rsi,%rax), %xmm0 - movaps %xmm0, 48(%rdi,%rax) - addq $64, %rax - cmpq %rdx, %rax - jne .L3 -.L4: - rep - ret -.LFE509: - .size alignedMemCpySSE, .-alignedMemCpySSE - .align 16 -.globl alignedMemClearSSE - .type alignedMemClearSSE, @function -alignedMemClearSSE: -.LFB510: - movslq %esi,%rsi - shrq $6, %rsi - testl %esi, %esi - jle .L10 - subl $1, %esi - xorps %xmm0, %xmm0 - salq $6, %rsi - leaq 64(%rdi,%rsi), %rax - .align 16 -.L9: - movaps %xmm0, (%rdi) - movaps %xmm0, 16(%rdi) - movaps %xmm0, 32(%rdi) - movaps %xmm0, 48(%rdi) - addq $64, %rdi - cmpq %rax, %rdi - jne .L9 -.L10: - rep - ret -.LFE510: - .size alignedMemClearSSE, .-alignedMemClearSSE - .align 16 -.globl alignedBufApplyGainSSE - .type alignedBufApplyGainSSE, @function -alignedBufApplyGainSSE: -.LFB511: - testl %esi, %esi - jle .L15 - leal -1(%rsi), %edx - shufps $0, %xmm0, %xmm0 - xorl %eax, %eax - shrl $3, %edx - addl $1, %edx - .align 16 -.L14: - movaps 16(%rdi), %xmm3 - addl $1, %eax - movaps 32(%rdi), %xmm2 - mulps %xmm0, %xmm3 - movaps 48(%rdi), %xmm1 - mulps %xmm0, %xmm2 - movaps (%rdi), %xmm4 - mulps %xmm0, %xmm1 - movaps %xmm3, 16(%rdi) - mulps %xmm0, %xmm4 - movaps %xmm2, 32(%rdi) - movaps %xmm1, 48(%rdi) - movaps %xmm4, (%rdi) - addq $64, %rdi - cmpl %eax, %edx - ja .L14 -.L15: - rep - ret -.LFE511: - .size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE - .align 16 -.globl alignedBufMixSSE - .type alignedBufMixSSE, @function -alignedBufMixSSE: -.LFB512: - testl %edx, %edx - jle .L20 - leal -1(%rdx), %ecx - xorl %eax, %eax - xorl %edx, %edx - shrl $3, %ecx - addl $1, %ecx - .align 16 -.L19: - movaps 16(%rdi,%rax), %xmm2 - addl $1, %edx - movaps 32(%rdi,%rax), %xmm1 - addps 16(%rsi,%rax), %xmm2 - movaps 48(%rdi,%rax), %xmm0 - addps 32(%rsi,%rax), %xmm1 - movaps (%rdi,%rax), %xmm3 - addps 48(%rsi,%rax), %xmm0 - addps (%rsi,%rax), %xmm3 - movaps %xmm2, 16(%rdi,%rax) - movaps %xmm1, 32(%rdi,%rax) - movaps %xmm0, 48(%rdi,%rax) - movaps %xmm3, (%rdi,%rax) - addq $64, %rax - cmpl %edx, %ecx - ja .L19 -.L20: - rep - ret -.LFE512: - .size alignedBufMixSSE, .-alignedBufMixSSE - .align 16 -.globl alignedBufMixLRCoeffSSE - .type alignedBufMixLRCoeffSSE, @function -alignedBufMixLRCoeffSSE: -.LFB513: - testl %edx, %edx - jle .L25 - unpcklps %xmm1, %xmm0 - leal -1(%rdx), %ecx - xorl %eax, %eax - xorl %edx, %edx - shrl $2, %ecx - movlhps %xmm0, %xmm0 - addl $1, %ecx - .align 16 -.L24: - movaps 16(%rsi,%rax), %xmm2 - addl $1, %edx - movaps (%rsi,%rax), %xmm3 - mulps %xmm0, %xmm2 - mulps %xmm0, %xmm3 - addps 16(%rdi,%rax), %xmm2 - addps (%rdi,%rax), %xmm3 - movaps %xmm2, 16(%rdi,%rax) - movaps %xmm3, (%rdi,%rax) - addq $32, %rax - cmpl %edx, %ecx - ja .L24 -.L25: - rep - ret -.LFE513: - .size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE - .align 16 -.globl alignedBufWetDryMixSSE - .type alignedBufWetDryMixSSE, @function -alignedBufWetDryMixSSE: -.LFB515: - testl %edx, %edx - jle .L30 - leal -1(%rdx), %ecx - shufps $0, %xmm1, %xmm1 - shufps $0, %xmm0, %xmm0 - xorl %eax, %eax - shrl $2, %ecx - xorl %edx, %edx - addl $1, %ecx - .align 16 -.L29: - movaps 16(%rsi,%rax), %xmm3 - addl $1, %edx - movaps 16(%rdi,%rax), %xmm2 - mulps %xmm0, %xmm3 - movaps (%rsi,%rax), %xmm4 - mulps %xmm1, %xmm2 - mulps %xmm0, %xmm4 - addps %xmm3, %xmm2 - movaps (%rdi,%rax), %xmm3 - mulps %xmm1, %xmm3 - movaps %xmm2, 16(%rdi,%rax) - addps %xmm4, %xmm3 - movaps %xmm3, (%rdi,%rax) - addq $32, %rax - cmpl %edx, %ecx - ja .L29 -.L30: - rep - ret -.LFE515: - .size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE - .align 16 -.globl alignedBufWetDryMixSplittedSSE - .type alignedBufWetDryMixSplittedSSE, @function -alignedBufWetDryMixSplittedSSE: -.LFB516: - pushq %rbp -.LCFI0: - testl %ecx, %ecx - pushq %rbx -.LCFI1: - jle .L39 - leal -1(%rcx), %ebx - shrl %ebx - addl $1, %ebx - movl %ebx, %r11d - shrl $2, %r11d - cmpl $3, %ebx - leal 0(,%r11,4), %ebp - jbe .L40 - testl %ebp, %ebp - jne .L34 -.L40: - xorl %r9d, %r9d - jmp .L36 - .align 16 -.L34: - movaps %xmm1, %xmm2 - movq %rdi, %rax - xorps %xmm10, %xmm10 - movq %rsi, %r9 - shufps $0, %xmm2, %xmm2 - movq %rdx, %r8 - xorl %r10d, %r10d - movaps %xmm2, %xmm12 - movaps %xmm0, %xmm2 - shufps $0, %xmm2, %xmm2 - movaps %xmm2, %xmm11 - .align 16 -.L37: - movaps (%rax), %xmm2 - addl $1, %r10d - movaps %xmm10, %xmm9 - movaps 16(%rax), %xmm5 - movaps %xmm2, %xmm4 - movlps (%r9), %xmm9 - movaps %xmm10, %xmm8 - movaps 32(%rax), %xmm14 - shufps $136, %xmm5, %xmm4 - movhps 8(%r9), %xmm9 - movaps 48(%rax), %xmm3 - movaps %xmm14, %xmm15 - movlps 16(%r9), %xmm8 - shufps $221, %xmm5, %xmm2 - shufps $136, %xmm3, %xmm15 - movhps 24(%r9), %xmm8 - shufps $221, %xmm3, %xmm14 - movaps %xmm4, %xmm5 - addq $32, %r9 - movaps %xmm9, %xmm3 - shufps $136, %xmm15, %xmm5 - movaps %xmm10, %xmm7 - shufps $136, %xmm8, %xmm3 - movlps (%r8), %xmm7 - movaps %xmm10, %xmm6 - mulps %xmm12, %xmm5 - movhps 8(%r8), %xmm7 - mulps %xmm11, %xmm3 - movlps 16(%r8), %xmm6 - movaps %xmm7, %xmm13 - movhps 24(%r8), %xmm6 - shufps $221, %xmm15, %xmm4 - shufps $221, %xmm8, %xmm9 - addq $32, %r8 - shufps $136, %xmm6, %xmm13 - addps %xmm3, %xmm5 - movaps %xmm2, %xmm3 - shufps $221, %xmm6, %xmm7 - shufps $136, %xmm14, %xmm3 - shufps $221, %xmm14, %xmm2 - mulps %xmm11, %xmm13 - movaps %xmm5, %xmm6 - mulps %xmm12, %xmm3 - mulps %xmm12, %xmm4 - mulps %xmm11, %xmm9 - addps %xmm13, %xmm3 - mulps %xmm12, %xmm2 - mulps %xmm11, %xmm7 - addps %xmm9, %xmm4 - addps %xmm7, %xmm2 - unpcklps %xmm4, %xmm6 - unpckhps %xmm4, %xmm5 - movaps %xmm3, %xmm4 - unpcklps %xmm2, %xmm4 - unpckhps %xmm2, %xmm3 - movaps %xmm6, %xmm2 - unpcklps %xmm4, %xmm2 - unpckhps %xmm4, %xmm6 - movaps %xmm2, (%rax) - movaps %xmm5, %xmm2 - unpckhps %xmm3, %xmm5 - unpcklps %xmm3, %xmm2 - movaps %xmm6, 16(%rax) - movaps %xmm2, 32(%rax) - movaps %xmm5, 48(%rax) - addq $64, %rax - cmpl %r10d, %r11d - ja .L37 - cmpl %ebx, %ebp - leal (%rbp,%rbp), %r9d - je .L39 -.L36: - movslq %r9d,%rax - leaq 1(%rax), %rbx - leaq 0(,%rax,4), %r10 - leaq (%rdi,%rax,8), %r8 - leaq (%rdi,%rbx,8), %rax - salq $2, %rbx - leaq (%rsi,%r10), %r11 - leaq (%rdx,%r10), %r10 - addq %rbx, %rsi - addq %rbx, %rdx - .align 16 -.L38: - movss (%r11), %xmm3 - addl $2, %r9d - movss (%r8), %xmm2 - mulss %xmm0, %xmm3 - mulss %xmm1, %xmm2 - addq $8, %r11 - addss %xmm3, %xmm2 - movss %xmm2, (%r8) - movss 4(%r8), %xmm2 - movss (%r10), %xmm3 - mulss %xmm1, %xmm2 - addq $8, %r10 - mulss %xmm0, %xmm3 - addss %xmm3, %xmm2 - movss %xmm2, 4(%r8) - addq $16, %r8 - movss (%rsi), %xmm3 - addq $8, %rsi - movss (%rax), %xmm2 - mulss %xmm0, %xmm3 - mulss %xmm1, %xmm2 - addss %xmm3, %xmm2 - movss %xmm2, (%rax) - movss 4(%rax), %xmm2 - movss (%rdx), %xmm3 - mulss %xmm1, %xmm2 - addq $8, %rdx - mulss %xmm0, %xmm3 - addss %xmm3, %xmm2 - movss %xmm2, 4(%rax) - addq $16, %rax - cmpl %r9d, %ecx - jg .L38 -.L39: - popq %rbx - popq %rbp - ret -.LFE516: - .size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE - .align 16 -.globl unalignedBufMixLRCoeffSSE - .type unalignedBufMixLRCoeffSSE, @function -unalignedBufMixLRCoeffSSE: -.LFB514: - movl %edx, %ecx - shrl $31, %ecx - leal (%rdx,%rcx), %eax - andl $1, %eax - cmpl %ecx, %eax - jne .L52 -.L44: - testl %edx, %edx - jle .L49 - subl $1, %edx - shrl %edx - testb $15, %dil - jne .L46 - unpcklps %xmm1, %xmm0 - addl $1, %edx - xorps %xmm3, %xmm3 - xorl %eax, %eax - movlhps %xmm0, %xmm0 - .align 16 -.L47: - movaps %xmm3, %xmm2 - addl $1, %eax - movaps %xmm3, %xmm1 - movlps (%rsi), %xmm2 - movlps (%rdi), %xmm1 - movhps 8(%rsi), %xmm2 - addq $16, %rsi - movhps 8(%rdi), %xmm1 - mulps %xmm0, %xmm2 - addps %xmm2, %xmm1 - movaps %xmm1, (%rdi) - addq $16, %rdi - cmpl %edx, %eax - jb .L47 - rep - ret - .align 16 -.L46: - mov %edx, %edx - xorl %eax, %eax - addq $1, %rdx - salq $4, %rdx - .align 16 -.L48: - movss (%rsi,%rax), %xmm2 - mulss %xmm0, %xmm2 - addss (%rdi,%rax), %xmm2 - movss %xmm2, (%rdi,%rax) - movss 4(%rsi,%rax), %xmm2 - mulss %xmm1, %xmm2 - addss 4(%rdi,%rax), %xmm2 - movss %xmm2, 4(%rdi,%rax) - movss 8(%rsi,%rax), %xmm2 - mulss %xmm0, %xmm2 - addss 8(%rdi,%rax), %xmm2 - movss %xmm2, 8(%rdi,%rax) - movss 12(%rsi,%rax), %xmm2 - mulss %xmm1, %xmm2 - addss 12(%rdi,%rax), %xmm2 - movss %xmm2, 12(%rdi,%rax) - addq $16, %rax - cmpq %rdx, %rax - jne .L48 -.L49: - rep - ret -.L52: - movss (%rsi), %xmm2 - subl $1, %edx - mulss %xmm0, %xmm2 - addss (%rdi), %xmm2 - movss %xmm2, (%rdi) - movss 4(%rsi), %xmm2 - addq $8, %rsi - mulss %xmm1, %xmm2 - addss 4(%rdi), %xmm2 - movss %xmm2, 4(%rdi) - addq $8, %rdi - jmp .L44 -.LFE514: - .size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE - .section .eh_frame,"aw",@progbits -.Lframe1: - .long .LECIE1-.LSCIE1 -.LSCIE1: - .long 0x0 - .byte 0x1 - .string "zR" - .byte 0x1 - .byte 0x78 - .byte 0x10 - .byte 0x1 - .byte 0x3 - .byte 0xc - .byte 0x7 - .byte 0x8 - .byte 0x11 - .byte 0x10 - .byte 0x1 - .align 8 -.LECIE1: -.LSFDE1: - .long .LEFDE1-.LASFDE1 -.LASFDE1: - .long .LASFDE1-.Lframe1 - .long .LFB509 - .long .LFE509-.LFB509 - .byte 0x0 - .align 8 -.LEFDE1: -.LSFDE3: - .long .LEFDE3-.LASFDE3 -.LASFDE3: - .long .LASFDE3-.Lframe1 - .long .LFB510 - .long .LFE510-.LFB510 - .byte 0x0 - .align 8 -.LEFDE3: -.LSFDE5: - .long .LEFDE5-.LASFDE5 -.LASFDE5: - .long .LASFDE5-.Lframe1 - .long .LFB511 - .long .LFE511-.LFB511 - .byte 0x0 - .align 8 -.LEFDE5: -.LSFDE7: - .long .LEFDE7-.LASFDE7 -.LASFDE7: - .long .LASFDE7-.Lframe1 - .long .LFB512 - .long .LFE512-.LFB512 - .byte 0x0 - .align 8 -.LEFDE7: -.LSFDE9: - .long .LEFDE9-.LASFDE9 -.LASFDE9: - .long .LASFDE9-.Lframe1 - .long .LFB513 - .long .LFE513-.LFB513 - .byte 0x0 - .align 8 -.LEFDE9: -.LSFDE11: - .long .LEFDE11-.LASFDE11 -.LASFDE11: - .long .LASFDE11-.Lframe1 - .long .LFB515 - .long .LFE515-.LFB515 - .byte 0x0 - .align 8 -.LEFDE11: -.LSFDE13: - .long .LEFDE13-.LASFDE13 -.LASFDE13: - .long .LASFDE13-.Lframe1 - .long .LFB516 - .long .LFE516-.LFB516 - .byte 0x0 - .byte 0x4 - .long .LCFI0-.LFB516 - .byte 0xe - .byte 0x10 - .byte 0x4 - .long .LCFI1-.LCFI0 - .byte 0xe - .byte 0x18 - .byte 0x11 - .byte 0x3 - .byte 0x3 - .byte 0x11 - .byte 0x6 - .byte 0x2 - .align 8 -.LEFDE13: -.LSFDE15: - .long .LEFDE15-.LASFDE15 -.LASFDE15: - .long .LASFDE15-.Lframe1 - .long .LFB514 - .long .LFE514-.LFB514 - .byte 0x0 - .align 8 -.LEFDE15: - .ident "GCC: (GNU) 4.4.0 20090304 (experimental)" diff --git a/src/core/basic_ops_x86_64_sse2.s b/src/core/basic_ops_x86_64_sse2.s deleted file mode 100644 index 78ac365f1..000000000 --- a/src/core/basic_ops_x86_64_sse2.s +++ /dev/null @@ -1,395 +0,0 @@ - .file "basic_ops_x86.c" - .text - .align 16 -.globl alignedMemCpySSE2 - .type alignedMemCpySSE2, @function -alignedMemCpySSE2: -.LFB509: - movslq %edx,%rdx - shrq $6, %rdx - testl %edx, %edx - jle .L4 - subl $1, %edx - xorl %eax, %eax - addq $1, %rdx - salq $6, %rdx - .align 16 -.L3: - movdqa (%rsi,%rax), %xmm0 - movdqa %xmm0, (%rdi,%rax) - movdqa 16(%rsi,%rax), %xmm0 - movdqa %xmm0, 16(%rdi,%rax) - movdqa 32(%rsi,%rax), %xmm0 - movdqa %xmm0, 32(%rdi,%rax) - movdqa 48(%rsi,%rax), %xmm0 - movdqa %xmm0, 48(%rdi,%rax) - addq $64, %rax - cmpq %rdx, %rax - jne .L3 -.L4: - rep - ret -.LFE509: - .size alignedMemCpySSE2, .-alignedMemCpySSE2 - .align 16 -.globl alignedMemClearSSE2 - .type alignedMemClearSSE2, @function -alignedMemClearSSE2: -.LFB510: - movslq %esi,%rsi - shrq $6, %rsi - testl %esi, %esi - jle .L10 - subl $1, %esi - pxor %xmm0, %xmm0 - salq $6, %rsi - leaq 64(%rdi,%rsi), %rax - .align 16 -.L9: - movdqa %xmm0, (%rdi) - movdqa %xmm0, 16(%rdi) - movdqa %xmm0, 32(%rdi) - movdqa %xmm0, 48(%rdi) - addq $64, %rdi - cmpq %rax, %rdi - jne .L9 -.L10: - rep - ret -.LFE510: - .size alignedMemClearSSE2, .-alignedMemClearSSE2 - .align 16 -.globl alignedConvertToS16SSE2 - .type alignedConvertToS16SSE2, @function -alignedConvertToS16SSE2: -.LFB511: - pushq %rbp -.LCFI0: - testb %cl, %cl - movl %edx, %eax - mulss .LC0(%rip), %xmm0 - pushq %rbx -.LCFI1: - jne .L13 - testw %dx, %dx - jle .L15 - movl %edx, %ebx - shrw $2, %bx - cmpw $3, %dx - leal 0(,%rbx,4), %r8d - ja .L33 -.L28: - xorl %r8d, %r8d - .align 16 -.L23: - movswq %r8w,%rdx - movl $32767, %ebx - leaq (%rdi,%rdx,8), %rcx - leaq (%rsi,%rdx,4), %rdx - movl $-32768, %edi - .align 16 -.L25: - movss (%rcx), %xmm1 - mulss %xmm0, %xmm1 - cvttss2si %xmm1, %esi - movss 4(%rcx), %xmm1 - mulss %xmm0, %xmm1 - cmpl $-32768, %esi - cmovl %edi, %esi - cmpl $32767, %esi - cmovg %ebx, %esi - movw %si, (%rdx) - cvttss2si %xmm1, %esi - cmpl $-32768, %esi - cmovl %edi, %esi - cmpl $32767, %esi - cmovg %ebx, %esi - addl $1, %r8d - addq $8, %rcx - movw %si, 2(%rdx) - addq $4, %rdx - cmpw %r8w, %ax - jg .L25 -.L15: - cwtl - popq %rbx - sall $2, %eax - popq %rbp - ret - .align 16 -.L13: - testw %dx, %dx - jle .L15 - movl %edx, %ebx - shrw $2, %bx - cmpw $3, %dx - leal 0(,%rbx,4), %r8d - ja .L34 -.L27: - xorl %r8d, %r8d - .align 16 -.L18: - movswq %r8w,%rdx - leaq (%rdi,%rdx,8), %rcx - leaq (%rsi,%rdx,4), %rdx - movl $-32768, %edi - movl $32767, %esi - .align 16 -.L20: - movss (%rcx), %xmm1 - mulss %xmm0, %xmm1 - cvttss2si %xmm1, %ebx - movss 4(%rcx), %xmm1 - mulss %xmm0, %xmm1 - cmpl $-32768, %ebx - cmovl %edi, %ebx - cmpl $32767, %ebx - cmovg %esi, %ebx - movzbl %bh, %ebp - sall $8, %ebx - movl %ebp, %r9d - orl %r9d, %ebx - movw %bx, (%rdx) - cvttss2si %xmm1, %ebx - cmpl $-32768, %ebx - cmovl %edi, %ebx - cmpl $32767, %ebx - cmovg %esi, %ebx - addl $1, %r8d - addq $8, %rcx - movzbl %bh, %ebp - sall $8, %ebx - movl %ebp, %r9d - orl %r9d, %ebx - movw %bx, 2(%rdx) - addq $4, %rdx - cmpw %r8w, %ax - jg .L20 - cwtl - popq %rbx - sall $2, %eax - popq %rbp - ret - .align 16 -.L34: - testw %r8w, %r8w - je .L27 - movaps %xmm0, %xmm1 - movq %rdi, %rcx - movdqa .LC1(%rip), %xmm4 - movq %rsi, %r10 - shufps $0, %xmm1, %xmm1 - xorl %r9d, %r9d - movdqa .LC2(%rip), %xmm3 - movaps %xmm1, %xmm9 - movdqa .LC3(%rip), %xmm8 - .align 16 -.L19: - movaps (%rcx), %xmm1 - addl $1, %r9d - movdqa %xmm3, %xmm5 - mulps %xmm9, %xmm1 - movaps 16(%rcx), %xmm6 - movdqa %xmm3, %xmm7 - addq $32, %rcx - mulps %xmm9, %xmm6 - cvttps2dq %xmm1, %xmm1 - movdqa %xmm1, %xmm2 - pcmpgtd %xmm4, %xmm2 - cvttps2dq %xmm6, %xmm6 - pand %xmm2, %xmm1 - pandn %xmm4, %xmm2 - por %xmm1, %xmm2 - movdqa %xmm2, %xmm1 - pcmpgtd %xmm3, %xmm1 - pand %xmm1, %xmm5 - pandn %xmm2, %xmm1 - movdqa %xmm1, %xmm2 - movdqa %xmm6, %xmm1 - por %xmm5, %xmm2 - pcmpgtd %xmm4, %xmm1 - pand %xmm1, %xmm6 - pandn %xmm4, %xmm1 - movdqa %xmm2, %xmm5 - pslld $8, %xmm2 - pand %xmm8, %xmm5 - por %xmm6, %xmm1 - psrad $8, %xmm5 - movdqa %xmm1, %xmm6 - pcmpgtd %xmm3, %xmm6 - pand %xmm6, %xmm7 - pandn %xmm1, %xmm6 - movdqa %xmm6, %xmm1 - por %xmm7, %xmm1 - movdqa %xmm5, %xmm7 - movdqa %xmm1, %xmm6 - pslld $8, %xmm1 - pand %xmm8, %xmm6 - psrad $8, %xmm6 - punpcklwd %xmm6, %xmm5 - punpckhwd %xmm6, %xmm7 - movdqa %xmm5, %xmm6 - punpcklwd %xmm7, %xmm5 - punpckhwd %xmm7, %xmm6 - punpcklwd %xmm6, %xmm5 - movdqa %xmm2, %xmm6 - punpcklwd %xmm1, %xmm2 - punpckhwd %xmm1, %xmm6 - movdqa %xmm2, %xmm1 - punpcklwd %xmm6, %xmm2 - punpckhwd %xmm6, %xmm1 - punpcklwd %xmm1, %xmm2 - por %xmm2, %xmm5 - movdqa %xmm5, (%r10) - addq $16, %r10 - cmpw %r9w, %bx - ja .L19 - cmpw %dx, %r8w - jne .L18 - jmp .L15 - .align 16 -.L33: - testw %r8w, %r8w - je .L28 - movaps %xmm0, %xmm1 - movq %rdi, %rcx - movdqa .LC1(%rip), %xmm4 - movq %rsi, %r10 - shufps $0, %xmm1, %xmm1 - xorl %r9d, %r9d - movdqa .LC2(%rip), %xmm3 - movaps %xmm1, %xmm6 - .align 16 -.L24: - movaps (%rcx), %xmm1 - addl $1, %r9d - movdqa %xmm3, %xmm7 - mulps %xmm6, %xmm1 - movaps 16(%rcx), %xmm5 - addq $32, %rcx - mulps %xmm6, %xmm5 - cvttps2dq %xmm1, %xmm1 - movdqa %xmm1, %xmm2 - pcmpgtd %xmm4, %xmm2 - cvttps2dq %xmm5, %xmm5 - pand %xmm2, %xmm1 - pandn %xmm4, %xmm2 - por %xmm1, %xmm2 - movdqa %xmm2, %xmm1 - pcmpgtd %xmm3, %xmm1 - pand %xmm1, %xmm7 - pandn %xmm2, %xmm1 - movdqa %xmm1, %xmm2 - movdqa %xmm5, %xmm1 - por %xmm7, %xmm2 - movdqa %xmm3, %xmm7 - pcmpgtd %xmm4, %xmm1 - pand %xmm1, %xmm5 - pandn %xmm4, %xmm1 - por %xmm5, %xmm1 - movdqa %xmm1, %xmm5 - pcmpgtd %xmm3, %xmm5 - pand %xmm5, %xmm7 - pandn %xmm1, %xmm5 - movdqa %xmm5, %xmm1 - movdqa %xmm2, %xmm5 - por %xmm7, %xmm1 - punpcklwd %xmm1, %xmm2 - punpckhwd %xmm1, %xmm5 - movdqa %xmm2, %xmm1 - punpcklwd %xmm5, %xmm2 - punpckhwd %xmm5, %xmm1 - punpcklwd %xmm1, %xmm2 - movdqa %xmm2, (%r10) - addq $16, %r10 - cmpw %r9w, %bx - ja .L24 - cmpw %r8w, %dx - jne .L23 - jmp .L15 -.LFE511: - .size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2 - .section .rodata - .align 4 -.LC0: - .long 1191181824 - .align 16 -.LC1: - .long -32768 - .long -32768 - .long -32768 - .long -32768 - .align 16 -.LC2: - .long 32767 - .long 32767 - .long 32767 - .long 32767 - .align 16 -.LC3: - .long 65280 - .long 65280 - .long 65280 - .long 65280 - .section .eh_frame,"aw",@progbits -.Lframe1: - .long .LECIE1-.LSCIE1 -.LSCIE1: - .long 0x0 - .byte 0x1 - .string "zR" - .byte 0x1 - .byte 0x78 - .byte 0x10 - .byte 0x1 - .byte 0x3 - .byte 0xc - .byte 0x7 - .byte 0x8 - .byte 0x11 - .byte 0x10 - .byte 0x1 - .align 8 -.LECIE1: -.LSFDE1: - .long .LEFDE1-.LASFDE1 -.LASFDE1: - .long .LASFDE1-.Lframe1 - .long .LFB509 - .long .LFE509-.LFB509 - .byte 0x0 - .align 8 -.LEFDE1: -.LSFDE3: - .long .LEFDE3-.LASFDE3 -.LASFDE3: - .long .LASFDE3-.Lframe1 - .long .LFB510 - .long .LFE510-.LFB510 - .byte 0x0 - .align 8 -.LEFDE3: -.LSFDE5: - .long .LEFDE5-.LASFDE5 -.LASFDE5: - .long .LASFDE5-.Lframe1 - .long .LFB511 - .long .LFE511-.LFB511 - .byte 0x0 - .byte 0x4 - .long .LCFI0-.LFB511 - .byte 0xe - .byte 0x10 - .byte 0x4 - .long .LCFI1-.LCFI0 - .byte 0xe - .byte 0x18 - .byte 0x11 - .byte 0x3 - .byte 0x3 - .byte 0x11 - .byte 0x6 - .byte 0x2 - .align 8 -.LEFDE5: - .ident "GCC: (GNU) 4.4.0 20090304 (experimental)" diff --git a/src/core/basic_ops_x86_mmx.s b/src/core/basic_ops_x86_mmx.s deleted file mode 100644 index 0f21c9a92..000000000 --- a/src/core/basic_ops_x86_mmx.s +++ /dev/null @@ -1,107 +0,0 @@ - .file "basic_ops_x86.c" - .text - .p2align 4,,15 -.globl alignedMemCpyMMX - .type alignedMemCpyMMX, @function -alignedMemCpyMMX: - pushl %ebx - subl $112, %esp - movl 128(%esp), %ebx - movl 124(%esp), %eax - shrl $6, %ebx -#APP -# 42 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 - fsave 4(%esp); fwait - -# 0 "" 2 -# 44 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 - 1: prefetchnta (%eax) - prefetchnta 64(%eax) - prefetchnta 128(%eax) - prefetchnta 192(%eax) - prefetchnta 256(%eax) - -# 0 "" 2 -#NO_APP - testl %ebx, %ebx - je .L2 - movl 120(%esp), %ecx - xorl %edx, %edx - .p2align 4,,7 - .p2align 3 -.L3: -#APP -# 53 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 - 1: prefetchnta 320(%eax) -2: movq (%eax), %mm0 - movq 8(%eax), %mm1 - movq 16(%eax), %mm2 - movq 24(%eax), %mm3 - movq %mm0, (%ecx) - movq %mm1, 8(%ecx) - movq %mm2, 16(%ecx) - movq %mm3, 24(%ecx) - movq 32(%eax), %mm0 - movq 40(%eax), %mm1 - movq 48(%eax), %mm2 - movq 56(%eax), %mm3 - movq %mm0, 32(%ecx) - movq %mm1, 40(%ecx) - movq %mm2, 48(%ecx) - movq %mm3, 56(%ecx) - -# 0 "" 2 -#NO_APP - addl $1, %edx - addl $64, %eax - addl $64, %ecx - cmpl %edx, %ebx - jne .L3 -.L2: -#APP -# 75 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 - fsave 4(%esp); fwait - -# 0 "" 2 -#NO_APP - addl $112, %esp - popl %ebx - ret - .size alignedMemCpyMMX, .-alignedMemCpyMMX - .p2align 4,,15 -.globl alignedMemClearMMX - .type alignedMemClearMMX, @function -alignedMemClearMMX: - movl 8(%esp), %ecx - shrl $6, %ecx - testl %ecx, %ecx - je .L8 - movl 4(%esp), %edx - xorl %eax, %eax - pxor %mm0, %mm0 - .p2align 4,,7 - .p2align 3 -.L9: -#APP -# 90 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1 - movq %mm0, (%edx) -movq %mm0, 8(%edx) -movq %mm0, 16(%edx) -movq %mm0, 24(%edx) -movq %mm0, 32(%edx) -movq %mm0, 40(%edx) -movq %mm0, 48(%edx) -movq %mm0, 56(%edx) - -# 0 "" 2 -#NO_APP - addl $1, %eax - addl $64, %edx - cmpl %eax, %ecx - jne .L9 -.L8: - emms - ret - .size alignedMemClearMMX, .-alignedMemClearMMX - .ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0" - .section .note.GNU-stack,"",@progbits diff --git a/src/core/basic_ops_x86_sse.s b/src/core/basic_ops_x86_sse.s deleted file mode 100644 index 3f72a9ccd..000000000 --- a/src/core/basic_ops_x86_sse.s +++ /dev/null @@ -1,494 +0,0 @@ - .file "basic_ops_x86.c" - .text - .p2align 4,,15 -.globl alignedMemCpySSE - .type alignedMemCpySSE, @function -alignedMemCpySSE: - pushl %esi - pushl %ebx - movl 20(%esp), %esi - movl 12(%esp), %edx - movl 16(%esp), %ecx - shrl $6, %esi - testl %esi, %esi - je .L4 - xorl %eax, %eax - xorl %ebx, %ebx - .p2align 4,,7 - .p2align 3 -.L3: - movaps (%ecx,%eax), %xmm0 - addl $1, %ebx - movaps %xmm0, (%edx,%eax) - movaps 16(%ecx,%eax), %xmm0 - movaps %xmm0, 16(%edx,%eax) - movaps 32(%ecx,%eax), %xmm0 - movaps %xmm0, 32(%edx,%eax) - movaps 48(%ecx,%eax), %xmm0 - movaps %xmm0, 48(%edx,%eax) - addl $64, %eax - cmpl %ebx, %esi - jne .L3 -.L4: - popl %ebx - popl %esi - ret - .size alignedMemCpySSE, .-alignedMemCpySSE - .p2align 4,,15 -.globl alignedMemClearSSE - .type alignedMemClearSSE, @function -alignedMemClearSSE: - movl 8(%esp), %ecx - shrl $6, %ecx - testl %ecx, %ecx - je .L10 - movl 4(%esp), %eax - xorps %xmm0, %xmm0 - xorl %edx, %edx - .p2align 4,,7 - .p2align 3 -.L9: - addl $1, %edx - movaps %xmm0, (%eax) - movaps %xmm0, 16(%eax) - movaps %xmm0, 32(%eax) - movaps %xmm0, 48(%eax) - addl $64, %eax - cmpl %edx, %ecx - jne .L9 -.L10: - rep - ret - .size alignedMemClearSSE, .-alignedMemClearSSE - .p2align 4,,15 -.globl alignedBufApplyGainSSE - .type alignedBufApplyGainSSE, @function -alignedBufApplyGainSSE: - movl 12(%esp), %ecx - testl %ecx, %ecx - jle .L15 - movss 8(%esp), %xmm0 - subl $1, %ecx - movl 4(%esp), %eax - shrl $3, %ecx - xorl %edx, %edx - addl $1, %ecx - shufps $0, %xmm0, %xmm0 - .p2align 4,,7 - .p2align 3 -.L14: - movaps 16(%eax), %xmm3 - addl $1, %edx - movaps 32(%eax), %xmm2 - mulps %xmm0, %xmm3 - movaps 48(%eax), %xmm1 - mulps %xmm0, %xmm2 - movaps (%eax), %xmm4 - mulps %xmm0, %xmm1 - movaps %xmm3, 16(%eax) - mulps %xmm0, %xmm4 - movaps %xmm2, 32(%eax) - movaps %xmm1, 48(%eax) - movaps %xmm4, (%eax) - addl $64, %eax - cmpl %edx, %ecx - ja .L14 -.L15: - rep - ret - .size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE - .p2align 4,,15 -.globl alignedBufMixSSE - .type alignedBufMixSSE, @function -alignedBufMixSSE: - pushl %esi - pushl %ebx - movl 20(%esp), %esi - movl 12(%esp), %edx - movl 16(%esp), %ecx - testl %esi, %esi - jle .L20 - subl $1, %esi - xorl %eax, %eax - shrl $3, %esi - xorl %ebx, %ebx - addl $1, %esi - .p2align 4,,7 - .p2align 3 -.L19: - movaps 16(%edx,%eax), %xmm2 - addl $1, %ebx - movaps 32(%edx,%eax), %xmm1 - movaps 48(%edx,%eax), %xmm0 - movaps (%edx,%eax), %xmm3 - addps 16(%ecx,%eax), %xmm2 - addps 32(%ecx,%eax), %xmm1 - addps 48(%ecx,%eax), %xmm0 - addps (%ecx,%eax), %xmm3 - movaps %xmm2, 16(%edx,%eax) - movaps %xmm3, (%edx,%eax) - movaps %xmm1, 32(%edx,%eax) - movaps %xmm0, 48(%edx,%eax) - addl $64, %eax - cmpl %ebx, %esi - ja .L19 -.L20: - popl %ebx - popl %esi - ret - .size alignedBufMixSSE, .-alignedBufMixSSE - .p2align 4,,15 -.globl alignedBufMixLRCoeffSSE - .type alignedBufMixLRCoeffSSE, @function -alignedBufMixLRCoeffSSE: - pushl %esi - pushl %ebx - movl 28(%esp), %esi - movl 12(%esp), %edx - movl 16(%esp), %ebx - testl %esi, %esi - jle .L25 - movss 24(%esp), %xmm2 - subl $1, %esi - movss 20(%esp), %xmm0 - xorl %eax, %eax - shrl $2, %esi - xorl %ecx, %ecx - addl $1, %esi - unpcklps %xmm2, %xmm0 - movaps %xmm0, %xmm2 - movlhps %xmm0, %xmm2 - .p2align 4,,7 - .p2align 3 -.L24: - movaps 16(%ebx,%eax), %xmm0 - addl $1, %ecx - movaps (%ebx,%eax), %xmm1 - mulps %xmm2, %xmm0 - mulps %xmm2, %xmm1 - addps 16(%edx,%eax), %xmm0 - addps (%edx,%eax), %xmm1 - movaps %xmm0, 16(%edx,%eax) - movaps %xmm1, (%edx,%eax) - addl $32, %eax - cmpl %ecx, %esi - ja .L24 -.L25: - popl %ebx - popl %esi - ret - .size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE - .p2align 4,,15 -.globl alignedBufWetDryMixSSE - .type alignedBufWetDryMixSSE, @function -alignedBufWetDryMixSSE: - pushl %esi - pushl %ebx - movl 28(%esp), %esi - movl 12(%esp), %edx - movl 16(%esp), %ebx - testl %esi, %esi - jle .L30 - movss 24(%esp), %xmm3 - subl $1, %esi - movss 20(%esp), %xmm2 - xorl %eax, %eax - shrl $2, %esi - xorl %ecx, %ecx - shufps $0, %xmm3, %xmm3 - addl $1, %esi - shufps $0, %xmm2, %xmm2 - .p2align 4,,7 - .p2align 3 -.L29: - movaps 16(%ebx,%eax), %xmm1 - addl $1, %ecx - movaps 16(%edx,%eax), %xmm0 - mulps %xmm2, %xmm1 - movaps (%ebx,%eax), %xmm4 - mulps %xmm3, %xmm0 - mulps %xmm2, %xmm4 - addps %xmm1, %xmm0 - movaps (%edx,%eax), %xmm1 - mulps %xmm3, %xmm1 - movaps %xmm0, 16(%edx,%eax) - addps %xmm4, %xmm1 - movaps %xmm1, (%edx,%eax) - addl $32, %eax - cmpl %ecx, %esi - ja .L29 -.L30: - popl %ebx - popl %esi - ret - .size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE - .p2align 4,,15 -.globl alignedBufWetDryMixSplittedSSE - .type alignedBufWetDryMixSplittedSSE, @function -alignedBufWetDryMixSplittedSSE: - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - subl $124, %esp - movl 164(%esp), %eax - movl 144(%esp), %edx - movl 148(%esp), %esi - movl 152(%esp), %ecx - testl %eax, %eax - jle .L39 - movl 164(%esp), %eax - subl $1, %eax - shrl %eax - addl $1, %eax - movl %eax, %ebp - movl %eax, 104(%esp) - shrl $2, %ebp - cmpl $3, 104(%esp) - leal 0(,%ebp,4), %eax - movl %eax, 108(%esp) - jbe .L40 - testl %eax, %eax - jne .L34 -.L40: - xorl %edi, %edi - jmp .L36 - .p2align 4,,7 - .p2align 3 -.L34: - movss 160(%esp), %xmm0 - xorps %xmm7, %xmm7 - movl %esi, %ebx - xorl %eax, %eax - xorl %edi, %edi - shufps $0, %xmm0, %xmm0 - movaps %xmm0, 16(%esp) - movss 156(%esp), %xmm0 - shufps $0, %xmm0, %xmm0 - movaps %xmm0, (%esp) - .p2align 4,,7 - .p2align 3 -.L37: - movaps (%edx,%eax,2), %xmm5 - addl $1, %edi - movaps 16(%edx,%eax,2), %xmm6 - movaps %xmm5, %xmm0 - shufps $136, %xmm6, %xmm0 - movaps 32(%edx,%eax,2), %xmm4 - shufps $221, %xmm6, %xmm5 - movaps %xmm0, 80(%esp) - movaps 48(%edx,%eax,2), %xmm3 - movaps %xmm4, %xmm0 - shufps $136, %xmm3, %xmm0 - movaps 80(%esp), %xmm2 - shufps $221, %xmm3, %xmm4 - movaps %xmm7, %xmm6 - movlps (%ebx), %xmm6 - movaps %xmm5, 64(%esp) - movhps 8(%ebx), %xmm6 - shufps $136, %xmm0, %xmm2 - movaps %xmm0, 48(%esp) - movaps %xmm7, %xmm5 - movaps %xmm6, %xmm0 - movlps 16(%ebx), %xmm5 - movhps 24(%ebx), %xmm5 - shufps $136, %xmm5, %xmm0 - mulps 16(%esp), %xmm2 - shufps $221, %xmm5, %xmm6 - movaps %xmm4, 32(%esp) - addl $32, %ebx - mulps (%esp), %xmm0 - movaps %xmm7, %xmm4 - movlps (%eax,%ecx), %xmm4 - movaps %xmm7, %xmm3 - movhps 8(%eax,%ecx), %xmm4 - movaps %xmm4, %xmm1 - movlps 16(%ecx,%eax), %xmm3 - movhps 24(%ecx,%eax), %xmm3 - shufps $136, %xmm3, %xmm1 - addps %xmm0, %xmm2 - movaps 64(%esp), %xmm0 - shufps $221, %xmm3, %xmm4 - shufps $136, 32(%esp), %xmm0 - mulps (%esp), %xmm1 - movaps %xmm2, %xmm3 - movaps 64(%esp), %xmm5 - mulps 16(%esp), %xmm0 - shufps $221, 32(%esp), %xmm5 - mulps (%esp), %xmm6 - addps %xmm1, %xmm0 - movaps 80(%esp), %xmm1 - shufps $221, 48(%esp), %xmm1 - mulps (%esp), %xmm4 - mulps 16(%esp), %xmm1 - mulps 16(%esp), %xmm5 - addps %xmm6, %xmm1 - addps %xmm4, %xmm5 - movaps %xmm0, %xmm4 - unpcklps %xmm1, %xmm3 - unpcklps %xmm5, %xmm4 - unpckhps %xmm1, %xmm2 - movaps %xmm3, %xmm1 - unpckhps %xmm5, %xmm0 - unpcklps %xmm4, %xmm1 - unpckhps %xmm4, %xmm3 - movaps %xmm1, (%edx,%eax,2) - movaps %xmm2, %xmm1 - unpckhps %xmm0, %xmm2 - unpcklps %xmm0, %xmm1 - movaps %xmm3, 16(%edx,%eax,2) - movaps %xmm1, 32(%edx,%eax,2) - movaps %xmm2, 48(%edx,%eax,2) - addl $32, %eax - cmpl %edi, %ebp - ja .L37 - movl 108(%esp), %edi - movl 104(%esp), %eax - addl %edi, %edi - cmpl %eax, 108(%esp) - je .L39 -.L36: - movss 156(%esp), %xmm0 - xorl %ebp, %ebp - movss 160(%esp), %xmm1 - movl %edi, %eax - leal (%edx,%edi,8), %ebx - leal 8(%edx,%edi,8), %edx - .p2align 4,,7 - .p2align 3 -.L38: - movss (%esi,%eax,4), %xmm3 - addl $2, %ebp - movss (%ebx), %xmm2 - mulss %xmm0, %xmm3 - mulss %xmm1, %xmm2 - addss %xmm3, %xmm2 - movss %xmm2, (%ebx) - movss 4(%ebx), %xmm2 - movss (%ecx,%eax,4), %xmm3 - mulss %xmm1, %xmm2 - mulss %xmm0, %xmm3 - addss %xmm3, %xmm2 - movss %xmm2, 4(%ebx) - addl $16, %ebx - movss 4(%esi,%eax,4), %xmm3 - movss (%edx), %xmm2 - mulss %xmm0, %xmm3 - mulss %xmm1, %xmm2 - addss %xmm3, %xmm2 - movss %xmm2, (%edx) - movss 4(%edx), %xmm2 - movss 4(%ecx,%eax,4), %xmm3 - mulss %xmm1, %xmm2 - leal (%edi,%ebp), %eax - mulss %xmm0, %xmm3 - addss %xmm3, %xmm2 - movss %xmm2, 4(%edx) - addl $16, %edx - cmpl %eax, 164(%esp) - jg .L38 -.L39: - addl $124, %esp - popl %ebx - popl %esi - popl %edi - popl %ebp - ret - .size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE - .p2align 4,,15 -.globl unalignedBufMixLRCoeffSSE - .type unalignedBufMixLRCoeffSSE, @function -unalignedBufMixLRCoeffSSE: - pushl %esi - pushl %ebx - movl 28(%esp), %ebx - movl 12(%esp), %eax - movl 16(%esp), %edx - movss 20(%esp), %xmm1 - movl %ebx, %esi - shrl $31, %esi - leal (%ebx,%esi), %ecx - andl $1, %ecx - cmpl %esi, %ecx - movss 24(%esp), %xmm3 - jne .L52 -.L44: - testl %ebx, %ebx - jle .L49 - testb $15, %al - jne .L46 - movaps %xmm1, %xmm0 - subl $1, %ebx - unpcklps %xmm3, %xmm0 - shrl %ebx - xorps %xmm2, %xmm2 - movaps %xmm0, %xmm3 - addl $1, %ebx - movlhps %xmm0, %xmm3 - xorl %ecx, %ecx - .p2align 4,,7 - .p2align 3 -.L47: - movaps %xmm2, %xmm1 - addl $1, %ecx - movlps (%edx), %xmm1 - movhps 8(%edx), %xmm1 - movaps %xmm2, %xmm0 - movlps (%eax), %xmm0 - movhps 8(%eax), %xmm0 - addl $16, %edx - mulps %xmm3, %xmm1 - addps %xmm1, %xmm0 - movaps %xmm0, (%eax) - addl $16, %eax - cmpl %ebx, %ecx - jb .L47 -.L49: - popl %ebx - popl %esi - ret - .p2align 4,,7 - .p2align 3 -.L46: - xorl %ecx, %ecx - .p2align 4,,7 - .p2align 3 -.L48: - movss (%edx,%ecx,8), %xmm0 - mulss %xmm1, %xmm0 - addss (%eax,%ecx,8), %xmm0 - movss %xmm0, (%eax,%ecx,8) - movss 4(%edx,%ecx,8), %xmm0 - mulss %xmm3, %xmm0 - addss 4(%eax,%ecx,8), %xmm0 - movss %xmm0, 4(%eax,%ecx,8) - movss 8(%edx,%ecx,8), %xmm0 - mulss %xmm1, %xmm0 - addss 8(%eax,%ecx,8), %xmm0 - movss %xmm0, 8(%eax,%ecx,8) - movss 12(%edx,%ecx,8), %xmm0 - mulss %xmm3, %xmm0 - addss 12(%eax,%ecx,8), %xmm0 - movss %xmm0, 12(%eax,%ecx,8) - addl $2, %ecx - cmpl %ecx, %ebx - jg .L48 - popl %ebx - popl %esi - ret -.L52: - movss (%edx), %xmm0 - subl $1, %ebx - mulss %xmm1, %xmm0 - addss (%eax), %xmm0 - movss %xmm0, (%eax) - movss 4(%edx), %xmm0 - addl $8, %edx - mulss %xmm3, %xmm0 - addss 4(%eax), %xmm0 - movss %xmm0, 4(%eax) - addl $8, %eax - jmp .L44 - .size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE - .ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0" - .section .note.GNU-stack,"",@progbits diff --git a/src/core/basic_ops_x86_sse2.s b/src/core/basic_ops_x86_sse2.s deleted file mode 100644 index aee60c234..000000000 --- a/src/core/basic_ops_x86_sse2.s +++ /dev/null @@ -1,349 +0,0 @@ - .file "basic_ops_x86.c" - .text - .p2align 4,,15 -.globl alignedMemCpySSE2 - .type alignedMemCpySSE2, @function -alignedMemCpySSE2: - pushl %esi - pushl %ebx - movl 20(%esp), %esi - movl 12(%esp), %edx - movl 16(%esp), %ecx - shrl $6, %esi - testl %esi, %esi - je .L4 - xorl %eax, %eax - xorl %ebx, %ebx - .p2align 4,,7 - .p2align 3 -.L3: - addl $1, %ebx - movdqa (%ecx,%eax), %xmm0 - movdqa %xmm0, (%edx,%eax) - movdqa 16(%ecx,%eax), %xmm0 - movdqa %xmm0, 16(%edx,%eax) - movdqa 32(%ecx,%eax), %xmm0 - movdqa %xmm0, 32(%edx,%eax) - movdqa 48(%ecx,%eax), %xmm0 - movdqa %xmm0, 48(%edx,%eax) - addl $64, %eax - cmpl %ebx, %esi - jne .L3 -.L4: - popl %ebx - popl %esi - ret - .size alignedMemCpySSE2, .-alignedMemCpySSE2 - .p2align 4,,15 -.globl alignedMemClearSSE2 - .type alignedMemClearSSE2, @function -alignedMemClearSSE2: - movl 8(%esp), %ecx - shrl $6, %ecx - testl %ecx, %ecx - je .L10 - movl 4(%esp), %eax - xorl %edx, %edx - pxor %xmm0, %xmm0 - .p2align 4,,7 - .p2align 3 -.L9: - addl $1, %edx - movdqa %xmm0, (%eax) - movdqa %xmm0, 16(%eax) - movdqa %xmm0, 32(%eax) - movdqa %xmm0, 48(%eax) - addl $64, %eax - cmpl %edx, %ecx - jne .L9 -.L10: - rep - ret - .size alignedMemClearSSE2, .-alignedMemClearSSE2 - .p2align 4,,15 -.globl alignedConvertToS16SSE2 - .type alignedConvertToS16SSE2, @function -alignedConvertToS16SSE2: - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - subl $8, %esp - movl 36(%esp), %eax - movss .LC0, %xmm6 - cmpb $0, 44(%esp) - movl 28(%esp), %edx - movl 32(%esp), %ebx - movl %eax, %esi - mulss 40(%esp), %xmm6 - jne .L13 - testw %ax, %ax - jle .L15 - movl %eax, %edi - shrw $2, %di - cmpw $3, %ax - movw %ax, 2(%esp) - leal 0(,%edi,4), %ebp - ja .L33 -.L28: - xorl %ebp, %ebp - .p2align 4,,7 - .p2align 3 -.L23: - movswl %bp,%eax - movl $-32768, %edi - leal (%edx,%eax,8), %edx - leal (%ebx,%eax,4), %eax - movl $32767, %ebx - .p2align 4,,7 - .p2align 3 -.L25: - movss (%edx), %xmm0 - mulss %xmm6, %xmm0 - cvttss2si %xmm0, %ecx - movss 4(%edx), %xmm0 - cmpl $-32768, %ecx - mulss %xmm6, %xmm0 - cmovl %edi, %ecx - cmpl $32767, %ecx - cmovg %ebx, %ecx - movw %cx, (%eax) - cvttss2si %xmm0, %ecx - cmpl $-32768, %ecx - cmovl %edi, %ecx - cmpl $32767, %ecx - cmovg %ebx, %ecx - addl $1, %ebp - movw %cx, 2(%eax) - addl $8, %edx - addl $4, %eax - cmpw %bp, %si - jg .L25 -.L15: - movswl %si,%esi - addl $8, %esp - leal 0(,%esi,4), %eax - popl %ebx - popl %esi - popl %edi - popl %ebp - ret - .p2align 4,,7 - .p2align 3 -.L13: - testw %ax, %ax - jle .L15 - movl %eax, %ebp - shrw $2, %bp - cmpw $3, %si - movw %ax, 2(%esp) - leal 0(,%ebp,4), %eax - ja .L34 -.L27: - xorl %eax, %eax - .p2align 4,,7 - .p2align 3 -.L18: - movswl %ax,%edi - leal (%edx,%edi,8), %ecx - leal (%ebx,%edi,4), %edx - movl $-32768, %edi - .p2align 4,,7 - .p2align 3 -.L20: - movss (%ecx), %xmm0 - movl $32767, %ebp - mulss %xmm6, %xmm0 - cvttss2si %xmm0, %ebx - movss 4(%ecx), %xmm0 - cmpl $-32768, %ebx - cmovl %edi, %ebx - cmpl $32767, %ebx - mulss %xmm6, %xmm0 - cmovg %ebp, %ebx - movzbl %bh, %ebp - sall $8, %ebx - orl %ebp, %ebx - movl $32767, %ebp - movw %bx, (%edx) - cvttss2si %xmm0, %ebx - cmpl $-32768, %ebx - cmovl %edi, %ebx - cmpl $32767, %ebx - cmovg %ebp, %ebx - addl $1, %eax - movzbl %bh, %ebp - addl $8, %ecx - sall $8, %ebx - orl %ebp, %ebx - movw %bx, 2(%edx) - addl $4, %edx - cmpw %ax, %si - jg .L20 - jmp .L15 - .p2align 4,,7 - .p2align 3 -.L34: - testw %ax, %ax - je .L27 - movaps %xmm6, %xmm0 - xorl %ecx, %ecx - movdqa .LC1, %xmm3 - shufps $0, %xmm0, %xmm0 - movdqa .LC2, %xmm2 - movss %xmm6, 4(%esp) - xorl %edi, %edi - movaps %xmm0, %xmm7 - .p2align 4,,7 - .p2align 3 -.L19: - movaps (%edx,%ecx,2), %xmm0 - movdqa %xmm2, %xmm5 - movdqa %xmm2, %xmm6 - addl $1, %edi - movaps 16(%edx,%ecx,2), %xmm4 - mulps %xmm7, %xmm0 - mulps %xmm7, %xmm4 - cvttps2dq %xmm0, %xmm0 - movdqa %xmm0, %xmm1 - pcmpgtd %xmm3, %xmm1 - pand %xmm1, %xmm0 - pandn %xmm3, %xmm1 - por %xmm0, %xmm1 - cvttps2dq %xmm4, %xmm4 - movdqa %xmm1, %xmm0 - pcmpgtd %xmm2, %xmm0 - pand %xmm0, %xmm5 - pandn %xmm1, %xmm0 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm0 - por %xmm5, %xmm1 - pcmpgtd %xmm3, %xmm0 - movdqa .LC3, %xmm5 - pand %xmm0, %xmm4 - pand %xmm1, %xmm5 - pandn %xmm3, %xmm0 - psrad $8, %xmm5 - por %xmm4, %xmm0 - pslld $8, %xmm1 - movdqa %xmm0, %xmm4 - pcmpgtd %xmm2, %xmm4 - pand %xmm4, %xmm6 - pandn %xmm0, %xmm4 - movdqa %xmm4, %xmm0 - movdqa .LC3, %xmm4 - por %xmm6, %xmm0 - pand %xmm0, %xmm4 - pslld $8, %xmm0 - psrad $8, %xmm4 - movdqa %xmm5, %xmm6 - punpcklwd %xmm4, %xmm5 - punpckhwd %xmm4, %xmm6 - movdqa %xmm5, %xmm4 - punpcklwd %xmm6, %xmm5 - punpckhwd %xmm6, %xmm4 - punpcklwd %xmm4, %xmm5 - movdqa %xmm1, %xmm4 - punpcklwd %xmm0, %xmm1 - punpckhwd %xmm0, %xmm4 - movdqa %xmm1, %xmm6 - punpcklwd %xmm4, %xmm1 - punpckhwd %xmm4, %xmm6 - punpcklwd %xmm6, %xmm1 - por %xmm1, %xmm5 - movdqa %xmm5, (%ebx,%ecx) - addl $16, %ecx - cmpw %di, %bp - ja .L19 - cmpw 2(%esp), %ax - movss 4(%esp), %xmm6 - jne .L18 - jmp .L15 - .p2align 4,,7 - .p2align 3 -.L33: - testw %bp, %bp - .p2align 4,,3 - .p2align 3 - je .L28 - movaps %xmm6, %xmm0 - xorl %eax, %eax - movdqa .LC1, %xmm3 - shufps $0, %xmm0, %xmm0 - movdqa .LC2, %xmm2 - xorl %ecx, %ecx - movaps %xmm0, %xmm5 - .p2align 4,,7 - .p2align 3 -.L24: - movaps (%edx,%eax,2), %xmm0 - addl $1, %ecx - movdqa %xmm2, %xmm7 - movaps 16(%edx,%eax,2), %xmm4 - mulps %xmm5, %xmm0 - mulps %xmm5, %xmm4 - cvttps2dq %xmm0, %xmm0 - movdqa %xmm0, %xmm1 - pcmpgtd %xmm3, %xmm1 - pand %xmm1, %xmm0 - pandn %xmm3, %xmm1 - por %xmm0, %xmm1 - cvttps2dq %xmm4, %xmm4 - movdqa %xmm1, %xmm0 - pcmpgtd %xmm2, %xmm0 - pand %xmm0, %xmm7 - pandn %xmm1, %xmm0 - movdqa %xmm0, %xmm1 - movdqa %xmm4, %xmm0 - por %xmm7, %xmm1 - pcmpgtd %xmm3, %xmm0 - movdqa %xmm2, %xmm7 - pand %xmm0, %xmm4 - pandn %xmm3, %xmm0 - por %xmm4, %xmm0 - movdqa %xmm0, %xmm4 - pcmpgtd %xmm2, %xmm4 - pand %xmm4, %xmm7 - pandn %xmm0, %xmm4 - movdqa %xmm4, %xmm0 - movdqa %xmm1, %xmm4 - por %xmm7, %xmm0 - punpckhwd %xmm0, %xmm4 - punpcklwd %xmm0, %xmm1 - movdqa %xmm1, %xmm0 - punpcklwd %xmm4, %xmm1 - punpckhwd %xmm4, %xmm0 - punpcklwd %xmm0, %xmm1 - movdqa %xmm1, (%ebx,%eax) - addl $16, %eax - cmpw %cx, %di - ja .L24 - cmpw %bp, 2(%esp) - jne .L23 - jmp .L15 - .size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2 - .section .rodata.cst4,"aM",@progbits,4 - .align 4 -.LC0: - .long 1191181824 - .section .rodata.cst16,"aM",@progbits,16 - .align 16 -.LC1: - .long -32768 - .long -32768 - .long -32768 - .long -32768 - .align 16 -.LC2: - .long 32767 - .long 32767 - .long 32767 - .long 32767 - .align 16 -.LC3: - .long 65280 - .long 65280 - .long 65280 - .long 65280 - .ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0" - .section .note.GNU-stack,"",@progbits diff --git a/src/core/fx_mixer.cpp b/src/core/fx_mixer.cpp index cb69b6ac8..01b4661df 100644 --- a/src/core/fx_mixer.cpp +++ b/src/core/fx_mixer.cpp @@ -1,10 +1,8 @@ -#ifndef SINGLE_SOURCE_COMPILE - /* * fx_mixer.cpp - effect-mixer for LMMS * - * Copyright (c) 2008 Tobias Doerffel - * + * Copyright (c) 2008-2009 Tobias Doerffel + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -28,7 +26,7 @@ #include #include "fx_mixer.h" -#include "basic_ops.h" +#include "Cpu.h" #include "effect.h" #include "song.h" @@ -39,7 +37,7 @@ fxChannel::fxChannel( model * _parent ) : m_stillRunning( false ), m_peakLeft( 0.0f ), m_peakRight( 0.0f ), - m_buffer( alignedAllocFrames( engine::getMixer()->framesPerPeriod() ) ), + m_buffer( CPU::allocFrames( engine::getMixer()->framesPerPeriod() ) ), m_muteModel( false, _parent ), m_volumeModel( 1.0, 0.0, 2.0, 0.01, _parent ), m_name(), @@ -54,7 +52,7 @@ fxChannel::fxChannel( model * _parent ) : fxChannel::~fxChannel() { - alignedFreeFrames( m_buffer ); + CPU::freeFrames( m_buffer ); } @@ -93,7 +91,8 @@ void fxMixer::mixToChannel( const sampleFrame * _buf, fx_ch_t _ch ) if( m_fxChannels[_ch]->m_muteModel.value() == false ) { m_fxChannels[_ch]->m_lock.lock(); - alignedBufMix( m_fxChannels[_ch]->m_buffer, _buf, engine::getMixer()->framesPerPeriod() ); + CPU::bufMix( m_fxChannels[_ch]->m_buffer, _buf, + engine::getMixer()->framesPerPeriod() ); m_fxChannels[_ch]->m_used = true; m_fxChannels[_ch]->m_lock.unlock(); } @@ -248,4 +247,3 @@ void fxMixer::loadSettings( const QDomElement & _this ) } -#endif diff --git a/src/core/main.cpp b/src/core/main.cpp index 85512c4c2..7bded2737 100644 --- a/src/core/main.cpp +++ b/src/core/main.cpp @@ -62,7 +62,7 @@ #include "main_window.h" #include "project_renderer.h" #include "song.h" -#include "basic_ops.h" +#include "Cpu.h" // TODO Make a factory class for this (or hide it behind engine) #include "lmms_style.h" @@ -96,8 +96,8 @@ int main( int argc, char * * argv ) // intialize RNG srand( getpid() + time( 0 ) ); - // init CPU specific optimized basic ops - initBasicOps(); + // init CPU specific optimized operations + CPU::init(); bool core_only = false; bool fullscreen = true; diff --git a/src/core/mixer.cpp b/src/core/mixer.cpp index 42ef1e76c..2351366f2 100644 --- a/src/core/mixer.cpp +++ b/src/core/mixer.cpp @@ -2,7 +2,7 @@ * mixer.cpp - audio-device-independent mixer for LMMS * * Copyright (c) 2004-2009 Tobias Doerffel - * + * * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net * * This program is free software; you can redistribute it and/or @@ -22,7 +22,6 @@ * */ - #include #include "mixer.h" @@ -41,7 +40,7 @@ #include "sample_play_handle.h" #include "piano_roll.h" #include "micro_timer.h" -#include "basic_ops.h" +#include "Cpu.h" #include "audio_device.h" #include "midi_client.h" @@ -125,7 +124,7 @@ public: MixerWorkerThread( int _worker_num, mixer * _mixer ) : QThread( _mixer ), - m_workingBuf( alignedAllocFrames( _mixer->framesPerPeriod() ) ), + m_workingBuf( CPU::allocFrames( _mixer->framesPerPeriod() ) ), m_workerNum( _worker_num ), m_quit( false ), m_mixer( _mixer ), @@ -135,7 +134,7 @@ public: virtual ~MixerWorkerThread() { - alignedFreeFrames( m_workingBuf ); + CPU::freeFrames( m_workingBuf ); } virtual void quit( void ) @@ -295,7 +294,7 @@ mixer::mixer( void ) : { m_inputBufferFrames[i] = 0; m_inputBufferSize[i] = DEFAULT_BUFFER_SIZE * 100; - m_inputBuffer[i] = alignedAllocFrames( + m_inputBuffer[i] = CPU::allocFrames( DEFAULT_BUFFER_SIZE * 100 ); clearAudioBuffer( m_inputBuffer[i], m_inputBufferSize[i] ); } @@ -337,10 +336,10 @@ mixer::mixer( void ) : m_fifo = new fifo( 1 ); } - m_workingBuf = alignedAllocFrames( m_framesPerPeriod ); + m_workingBuf = CPU::allocFrames( m_framesPerPeriod ); for( Uint8 i = 0; i < 3; i++ ) { - m_readBuf = alignedAllocFrames( m_framesPerPeriod ); + m_readBuf = CPU::allocFrames( m_framesPerPeriod ); clearAudioBuffer( m_readBuf, m_framesPerPeriod ); m_bufferPool.push_back( m_readBuf ); } @@ -389,10 +388,10 @@ mixer::~mixer() for( Uint8 i = 0; i < 3; i++ ) { - alignedFreeFrames( m_bufferPool[i] ); + CPU::freeFrames( m_bufferPool[i] ); } - alignedFreeFrames( m_workingBuf ); + CPU::freeFrames( m_workingBuf ); } @@ -504,9 +503,9 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames ) if( frames + _frames > size ) { size = qMax( size * 2, frames + _frames ); - sampleFrame * ab = alignedAllocFrames( size ); - alignedMemCpy( ab, buf, frames * sizeof( sampleFrame ) ); - alignedFreeFrames( buf ); + sampleFrame * ab = CPU::allocFrames( size ); + CPU::memCpy( ab, buf, frames * sizeof( sampleFrame ) ); + CPU::freeFrames( buf ); m_inputBufferSize[ m_inputBufferWrite ] = size; m_inputBuffer[ m_inputBufferWrite ] = ab; @@ -514,7 +513,7 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames ) buf = ab; } - alignedMemCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) ); + CPU::memCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) ); m_inputBufferFrames[ m_inputBufferWrite ] += _frames; unlockInputFrames(); @@ -686,7 +685,7 @@ void mixer::bufferToPort( const sampleFrame * _buf, const int loop1_frame = qMin( end_frame, m_framesPerPeriod ); _port->lockFirstBuffer(); - unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame, + CPU::unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame, _buf, _vv.vol[0], _vv.vol[1], loop1_frame - start_frame ); _port->unlockFirstBuffer(); @@ -697,7 +696,7 @@ void mixer::bufferToPort( const sampleFrame * _buf, const int frames_done = m_framesPerPeriod - start_frame; end_frame -= m_framesPerPeriod; end_frame = qMin( end_frame, m_framesPerPeriod ); - unalignedBufMixLRCoeff( _port->secondBuffer(), + CPU::unalignedBufMixLRCoeff( _port->secondBuffer(), _buf+frames_done, _vv.vol[0], _vv.vol[1], end_frame ); @@ -720,7 +719,7 @@ void mixer::clearAudioBuffer( sampleFrame * _ab, const f_cnt_t _frames, { if( likely( (size_t)( _ab+_offset ) % 16 == 0 && _frames % 8 == 0 ) ) { - alignedMemClear( _ab+_offset, sizeof( *_ab ) * _frames ); + CPU::memClear( _ab+_offset, sizeof( *_ab ) * _frames ); } else { @@ -1157,9 +1156,9 @@ void mixer::fifoWriter::run( void ) const fpp_t frames = m_mixer->framesPerPeriod(); while( m_writing ) { - sampleFrameA * buffer = alignedAllocFrames( frames ); + sampleFrameA * buffer = CPU::allocFrames( frames ); const sampleFrameA * b = m_mixer->renderNextBuffer(); - alignedMemCpy( buffer, b, frames * sizeof( sampleFrameA ) ); + CPU::memCpy( buffer, b, frames * sizeof( sampleFrameA ) ); m_fifo->write( buffer ); }