CPU: new framework for optimized CPU-specific routines

The new CPU framework replaces the old BasicOps framework. It is more flexible and the build process isn't such a mess anymore (pre-compiled assembler files etc.). It will hopefully see some improvements and extensions soon. Signed-off-by: Tobias Doerffel <tobias.doerffel@gmail.com>
2009-08-03 14:47:28 +02:00
parent 27d9c17e3f
commit 89fa5c99e9
23 changed files with 247 additions and 2172 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -454,47 +454,42 @@ SET(LMMS_ER_H ${CMAKE_CURRENT_BINARY_DIR}/embedded_resources.h)
 ADD_FILE_DEPENDENCIES(${CMAKE_BINARY_DIR}/lmmsconfig.h ${lmms_MOC_out})

 ADD_CUSTOM_COMMAND(OUTPUT ${LMMS_ER_H} COMMAND ${BIN2RES} ARGS ${lmms_EMBEDDED_RESOURCES} > ${LMMS_ER_H} DEPENDS ${BIN2RES})
-SET(BASIC_OPS_X86_C "${CMAKE_SOURCE_DIR}/src/core/basic_ops_x86.c")

+# build CPU specific optimized modules
 IF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)

-ADD_CUSTOM_TARGET(regen-basic-ops)
-
-IF(LMMS_HOST_X86)
-SET(opt_targets mmx sse sse2)
-SET(host_arch x86)
-ELSE(LMMS_HOST_X86)
-SET(opt_targets sse sse2)
-SET(host_arch x86_64)
-ENDIF(LMMS_HOST_X86)
+	IF(LMMS_HOST_X86)
+		SET(opt_targets mmx sse sse2)
+	ELSE(LMMS_HOST_X86)
+		SET(opt_targets sse sse2)
+	ENDIF(LMMS_HOST_X86)

 FOREACH(opt_target ${opt_targets})

 	STRING(TOUPPER ${opt_target} OPT_TARGET)

-	SET(BASIC_OPS_X86_TARGET_S "${CMAKE_SOURCE_DIR}/src/core/basic_ops_${host_arch}_${opt_target}.s")
-	SET(BASIC_OPS_X86_TARGET_O "${CMAKE_BINARY_DIR}/basic_ops_${host_arch}_${opt_target}.o")
+	SET(BASIC_OPS_X86_TARGET_S "")
+	SET(CPU_X86_C "${CMAKE_SOURCE_DIR}/src/core/CpuX86.c")
+	SET(CPU_X86_TARGET_O "${CMAKE_BINARY_DIR}/CpuX86_${opt_target}.o")
+	SET(FPMATH_FLAGS "")
 	IF(NOT "${OPT_TARGET}" STREQUAL "MMX")
 		SET(FPMATH_FLAGS "-mfpmath=sse")
 	ENDIF(NOT "${OPT_TARGET}" STREQUAL "MMX")
-	IF(EXISTS "$ENV{SVN_C_COMPILER}")
-		SET(C_COMPILER $ENV{SVN_C_COMPILER})
-	ELSE(EXISTS "$ENV{SVN_C_COMPILER}")
-		SET(C_COMPILER ${CMAKE_C_COMPILER})
-	ENDIF(EXISTS "$ENV{SVN_C_COMPILER}")
 	IF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc")
 		SET(CMAKE_C_COMPILER_ARG1 gcc)
 	ENDIF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc")
-	ADD_CUSTOM_TARGET(regen-basic-ops-${opt_target} COMMAND ${C_COMPILER} -O2 -fno-stack-protector -ftree-vectorize -ftree-vectorizer-verbose=2 -fomit-frame-pointer -c -S -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS} -o ${BASIC_OPS_X86_TARGET_S} ${BASIC_OPS_X86_C} DEPENDS ${BASIC_OPS_X86_C})
-	ADD_CUSTOM_COMMAND(OUTPUT ${BASIC_OPS_X86_TARGET_O} COMMAND ${CMAKE_C_COMPILER} ARGS ${CMAKE_C_COMPILER_ARG1} ${BASIC_OPS_X86_TARGET_S} -c -o ${BASIC_OPS_X86_TARGET_O} DEPENDS ${BASIC_OPS_X86_TARGET_S})
-	ADD_DEPENDENCIES(regen-basic-ops regen-basic-ops-${opt_target})
-	SET(opt_target_objects ${opt_target_objects} ${BASIC_OPS_X86_TARGET_O})
+	SET(COMPILE_CMD ${CMAKE_C_COMPILER} ${CPU_X86_C} -O2 -fno-stack-protector -ftree-vectorize -fomit-frame-pointer -c -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS})
+	ADD_CUSTOM_COMMAND(OUTPUT ${CPU_X86_TARGET_O} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O} DEPENDS ${CPU_X86_C})
+	ADD_CUSTOM_TARGET(debug-${opt_target} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O}.s -S -ftree-vectorizer-verbose=2)
+	SET(cpu_objects ${cpu_objects} ${CPU_X86_TARGET_O})

 ENDFOREACH(opt_target ${opt_targets})
-SET(lmms_SOURCES ${lmms_SOURCES} ${opt_target_objects})
-# to be used by maintainer with special ultra-optimizing super duper GCC
+
+	SET(lmms_SOURCES ${lmms_SOURCES} ${cpu_objects})
+
 ENDIF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)

+
 IF(WIN32)
 	SET(WINRC "${CMAKE_BINARY_DIR}/lmmsrc.obj")
 	IF(LMMS_HOST_X86_64)
--- a/include/basic_ops.h
+++ b/include/basic_ops.h
@@ -1,8 +1,8 @@
 /*
- * basic_ops.h - basic memory operations
+ * Cpu.h - CPU specific accellerated operations
+ *
+ * Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
 *
- * Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -22,9 +22,8 @@
 *
 */

-
-#ifndef _BASIC_OPS_H
-#define _BASIC_OPS_H
+#ifndef _CPU_H
+#define _CPU_H

 #include "lmms_basics.h"

@@ -32,56 +31,64 @@
 #include <stdbool.h>
 #endif

-void initBasicOps( void );
+#ifdef __cplusplus
+namespace CPU
+{
+#endif

-void * alignedMalloc( int _bytes );
-void alignedFree( void * _buf );
+void init();

-sampleFrameA * alignedAllocFrames( int _frames );
-void alignedFreeFrames( sampleFrameA * _buf );
+void * memAlloc( int _bytes );
+void memFree( void * _buf );
+
+sampleFrameA * allocFrames( int _frames );
+void freeFrames( sampleFrameA * _buf );


-// all aligned* functions assume data to be 16 byte aligned and size to be
-// multiples of 64
-typedef void (*alignedMemCpyFunc)( void * RP _dst, const void * RP _src,
+// all functions assume data to be 16 byte  and size to be
+// multiples of 64 (except for unaligned*())
+typedef void (*MemCpyFunc)( void * RP _dst, const void * RP _src,
 								int _size );
-typedef void (*alignedMemClearFunc)( void * RP _dst, int _size );
-typedef void (*alignedBufApplyGainFunc)( sampleFrameA * RP _dst,
+typedef void (*MemClearFunc)( void * RP _dst, int _size );
+typedef void (*BufApplyGainFunc)( sampleFrameA * RP _dst,
 						float _gain, int _frames );
-typedef void (*alignedBufMixFunc)( sampleFrameA * RP _dst,
+typedef void (*BufMixFunc)( sampleFrameA * RP _dst,
 						const sampleFrameA * RP _src,
 								int _frames );
-typedef void (*alignedBufMixLRCoeffFunc)( sampleFrameA * RP _dst,
+typedef void (*BufMixLRCoeffFunc)( sampleFrameA * RP _dst,
 						const sampleFrameA * RP _src,
 						float _left, float _right,
 								int _frames );
-typedef void (*unalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst,
+typedef void (*UnalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst,
 						const sampleFrame * RP _src,
 						float _left, float _right,
 								int _frames );
-typedef void (*alignedBufWetDryMixFunc)( sampleFrameA * RP _dst,
+typedef void (*BufWetDryMixFunc)( sampleFrameA * RP _dst,
 					const sampleFrameA * RP _src,
 					float _wet, float _dry, int _frames );
-typedef void (*alignedBufWetDryMixSplittedFunc)( sampleFrameA * RP _dst,
+typedef void (*BufWetDryMixSplittedFunc)( sampleFrameA * RP _dst,
 					const float * RP _left,
 					const float * RP _right,
 					float _wet, float _dry, int _frames );
-typedef int (*alignedConvertToS16Func)( const sampleFrameA * RP _src,
+typedef int (*ConvertToS16Func)( const sampleFrameA * RP _src,
 					intSampleFrameA * RP _dst,
 					const fpp_t _frames,
 					const float _master_gain,
 					const bool _convert_endian );

-extern alignedMemCpyFunc alignedMemCpy;
-extern alignedMemClearFunc alignedMemClear;
-extern alignedBufApplyGainFunc alignedBufApplyGain;
-extern alignedBufMixFunc alignedBufMix;
-extern alignedBufMixLRCoeffFunc alignedBufMixLRCoeff;
-extern unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff;
-extern alignedBufWetDryMixFunc alignedBufWetDryMix;
-extern alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted;
-extern alignedConvertToS16Func alignedConvertToS16;
+extern MemCpyFunc memCpy;
+extern MemClearFunc memClear;
+extern BufApplyGainFunc bufApplyGain;
+extern BufMixFunc bufMix;
+extern BufMixLRCoeffFunc bufMixLRCoeff;
+extern UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff;
+extern BufWetDryMixFunc bufWetDryMix;
+extern BufWetDryMixSplittedFunc bufWetDryMixSplitted;
+extern ConvertToS16Func convertToS16;

+#ifdef __cplusplus
+}
+#endif

 #ifdef LMMS_HOST_X86
 #define X86_OPTIMIZATIONS
--- a/include/audio_dummy.h
+++ b/include/audio_dummy.h
@@ -1,8 +1,8 @@
 /*
 * audio_dummy.h - dummy-audio-device
 *
- * Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ * Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -22,12 +22,11 @@
 *
 */

-
 #ifndef _AUDIO_DUMMY_H
 #define _AUDIO_DUMMY_H

 #include "audio_device.h"
-#include "basic_ops.h"
+#include "Cpu.h"
 #include "micro_timer.h"


@@ -45,7 +44,7 @@ public:
 		stopProcessing();
 	}

-	inline static QString name( void )
+	inline static QString name()
 	{
 		return( QT_TRANSLATE_NOOP( "setupWidget",
 						"Dummy (no sound output)" ) );
@@ -64,11 +63,11 @@ public:
 		{
 		}

-		virtual void saveSettings( void )
+		virtual void saveSettings()
 		{
 		}

-		virtual void show( void )
+		virtual void show()
 		{
 			parentWidget()->hide();
 			QWidget::show();
@@ -78,12 +77,12 @@ public:


 private:
-	virtual void startProcessing( void )
+	virtual void startProcessing()
 	{
 		start();
 	}

-	virtual void stopProcessing( void )
+	virtual void stopProcessing()
 	{
 		if( isRunning() )
 		{
@@ -92,7 +91,7 @@ private:
 		}
 	}

-	virtual void run( void )
+	virtual void run()
 	{
 		microTimer timer;
 		while( true )
@@ -104,7 +103,7 @@ private:
 			{
 				break;
 			}
-			alignedFreeFrames( b );
+			CPU::freeFrames( b );

 			const Sint32 microseconds = static_cast<Sint32>(
 					getMixer()->framesPerPeriod() *
--- a/plugins/ladspa_effect/ladspa_effect.cpp
+++ b/plugins/ladspa_effect/ladspa_effect.cpp
@@ -3,7 +3,7 @@
 *
 * Copyright (c) 2006-2008 Danny McRae <khjklujn/at/users.sourceforge.net>
 * Copyright (c) 2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 *
 */

-
 #include <QtGui/QMessageBox>

 #include "ladspa_effect.h"
@@ -35,7 +34,7 @@
 #include "ladspa_subplugin_features.h"
 #include "mixer.h"
 #include "effect_chain.h"
-#include "basic_ops.h"
+#include "Cpu.h"
 #include "automation_pattern.h"
 #include "controller_connection.h"

@@ -82,7 +81,7 @@ ladspaEffect::ladspaEffect( model * _parent,
 							arg( m_key.second ),
 				QMessageBox::Ok, QMessageBox::NoButton );
 		}
-		setOkay( FALSE );
+		setOkay( false );
 		return;
 	}

@@ -105,7 +104,7 @@ ladspaEffect::~ladspaEffect()



-void ladspaEffect::changeSampleRate( void )
+void ladspaEffect::changeSampleRate()
 {
 	multimediaProject mmp( multimediaProject::EffectSettings );
 	m_controls->saveState( mmp, mmp.content() );
@@ -141,7 +140,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
 	if( !isOkay() || dontRun() || !isRunning() || !isEnabled() )
 	{
 		m_pluginMutex.unlock();
-		return( FALSE );
+		return false;
 	}

 	int frames = _frames;
@@ -150,7 +149,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
 	if( m_maxSampleRate < engine::getMixer()->processingSampleRate() )
 	{
 		o_buf = _buf;
-		_buf = alignedAllocFrames( _frames );
+		_buf = CPU::allocFrames( _frames );
 		sampleDown( o_buf, _buf, m_maxSampleRate );
 		frames = _frames * m_maxSampleRate /
 				engine::getMixer()->processingSampleRate();
@@ -258,7 +257,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
 	}
 	if( channel >= 1 && channel <= DEFAULT_CHANNELS )
 	{
-		alignedBufWetDryMixSplitted( _buf, buffers[0], buffers[1],
+		CPU::bufWetDryMixSplitted( _buf, buffers[0], buffers[1],
 					getWetLevel(), getDryLevel(), frames );
 	}

@@ -272,7 +271,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
 	if( o_buf != NULL )
 	{
 		sampleBack( _buf, o_buf, m_maxSampleRate );
-		alignedFreeFrames( _buf );
+		CPU::freeFrames( _buf );
 	}

 	checkGate( out_sum / frames );
@@ -280,7 +279,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,

 	bool is_running = isRunning();
 	m_pluginMutex.unlock();
-	return( is_running );
+	return is_running;
 }


@@ -298,7 +297,7 @@ void ladspaEffect::setControl( int _control, LADSPA_Data _value )



-void ladspaEffect::pluginInstantiation( void )
+void ladspaEffect::pluginInstantiation()
 {
 	m_maxSampleRate = maxSamplerate( displayName() );

@@ -469,7 +468,7 @@ void ladspaEffect::pluginInstantiation( void )
 		QMessageBox::warning( 0, "Effect", 
 			"Can't get LADSPA descriptor function: " + m_key.second,
 			QMessageBox::Ok, QMessageBox::NoButton );
-		setOkay( FALSE );
+		setOkay( false );
 		return;
 	}
 	if( m_descriptor->run == NULL )
@@ -477,7 +476,7 @@ void ladspaEffect::pluginInstantiation( void )
 		QMessageBox::warning( 0, "Effect",
 			"Plugin has no processor: " + m_key.second,
 			QMessageBox::Ok, QMessageBox::NoButton );
-		setDontRun( TRUE );
+		setDontRun( true );
 	}
 	for( ch_cnt_t proc = 0; proc < getProcessorCount(); proc++ )
 	{
@@ -488,7 +487,7 @@ void ladspaEffect::pluginInstantiation( void )
 			QMessageBox::warning( 0, "Effect",
 				"Can't get LADSPA instance: " + m_key.second,
 				QMessageBox::Ok, QMessageBox::NoButton );
-			setOkay( FALSE );
+			setOkay( false );
 			return;
 		}
 		m_handles.append( effect );
@@ -508,7 +507,7 @@ void ladspaEffect::pluginInstantiation( void )
 				QMessageBox::warning( 0, "Effect", 
 				"Failed to connect port: " + m_key.second, 
 				QMessageBox::Ok, QMessageBox::NoButton );
-				setDontRun( TRUE );
+				setDontRun( true );
 				return;
 			}
 		}
@@ -525,7 +524,7 @@ void ladspaEffect::pluginInstantiation( void )



-void ladspaEffect::pluginDestruction( void )
+void ladspaEffect::pluginDestruction()
 {
 	if( !isOkay() )
 	{
@@ -571,9 +570,9 @@ sample_rate_t ladspaEffect::maxSamplerate( const QString & _name )
 	}
 	if( __buggy_plugins.contains( _name ) )
 	{
-		return( __buggy_plugins[_name] );
+		return __buggy_plugins[_name];
 	}
-	return( engine::getMixer()->processingSampleRate() );
+	return engine::getMixer()->processingSampleRate();
 }


@@ -585,9 +584,9 @@ extern "C"
 // neccessary for getting instance out of shared lib
 plugin * PLUGIN_EXPORT lmms_plugin_main( model * _parent, void * _data )
 {
-	return( new ladspaEffect( _parent,
+	return new ladspaEffect( _parent,
 		static_cast<const plugin::descriptor::subPluginFeatures::key *>(
-								_data ) ) );
+								_data ) );
 }

 }
--- a/src/core/basic_ops.cpp
+++ b/src/core/basic_ops.cpp
@@ -1,8 +1,8 @@
 /*
- * basic_ops.cpp - basic memory operations
+ * Cpu.cpp - CPU specific accellerated operations
+ *
+ * Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
 *
- * Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -23,17 +23,19 @@
 */


-#include "basic_ops.h"
+#include "Cpu.h"

 #include <cstdlib>
 #include <cstdio>
 #include <memory.h>


-
-void * alignedMalloc( int _bytes )
+namespace CPU
 {
-	char *ptr,*ptr2,*aligned_ptr;
+
+void * memAlloc( int _bytes )
+{
+	char *ptr,*ptr2,*_ptr;
 	int align_mask = ALIGN_SIZE- 1;
 	ptr =(char *) malloc( _bytes + ALIGN_SIZE + sizeof(int) );
 	if( ptr == NULL )
@@ -42,17 +44,19 @@ void * alignedMalloc( int _bytes )
 	}

 	ptr2 = ptr + sizeof(int);
-	aligned_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );
+	_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );


-	ptr2 = aligned_ptr - sizeof(int);
-	*((int *) ptr2) = (int)( aligned_ptr - ptr );
+	ptr2 = _ptr - sizeof(int);
+	*((int *) ptr2) = (int)( _ptr - ptr );

-	return aligned_ptr;
+	return _ptr;
 }


-void alignedFree( void * _buf )
+
+
+void memFree( void * _buf )
 {
 	if( _buf )
 	{
@@ -66,22 +70,26 @@ void alignedFree( void * _buf )
 }


-sampleFrameA * alignedAllocFrames( int _n )
+
+
+sampleFrameA * allocFrames( int _n )
 {
-	return (sampleFrameA *) alignedMalloc( _n * sizeof( sampleFrameA ) );
+	return (sampleFrameA *) memAlloc( _n * sizeof( sampleFrameA ) );
 }


-void alignedFreeFrames( sampleFrame * _buf )
+
+
+void freeFrames( sampleFrame * _buf )
 {
-	alignedFree( _buf );
+	memFree( _buf );
 }




 // slow fallback
-void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
+void memCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
 {
 	const int s = _size / sizeof( int );
 	const int * RP src = (const int *) _src;
@@ -110,7 +118,7 @@ void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size )


 // slow fallback
-void alignedMemClearNoOpt( void * _dst, int _size )
+void memClearNoOpt( void * _dst, int _size )
 {
 	const int s = _size / ( sizeof( int ) * 4 );
 	int * dst = (int *) _dst;
@@ -126,7 +134,7 @@ void alignedMemClearNoOpt( void * _dst, int _size )



-void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
+void bufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
 								int _frames )
 {
 	for( int i = 0; i < _frames; )
@@ -152,7 +160,7 @@ void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
 }


-void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
+void bufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
 								int _frames )
 {
 	for( int i = 0; i < _frames; )
@@ -171,7 +179,7 @@ void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,



-void alignedBufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
+void bufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
 					const sampleFrameA * RP _src,
 					float _left, float _right, int _frames )
 {
@@ -217,7 +225,7 @@ void unalignedBufMixLRCoeffNoOpt( sampleFrame * RP _dst,



-void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst,
+void bufWetDryMixNoOpt( sampleFrameA * RP _dst,
 					const sampleFrameA * RP _src,
 					float _wet, float _dry, int _frames )
 {
@@ -231,7 +239,7 @@ void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst,



-void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
+void bufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
 					const float * RP _left,
 					const float * RP _right,
 					float _wet, float _dry, int _frames )
@@ -248,7 +256,7 @@ void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,



-int alignedConvertToS16NoOpt( const sampleFrameA * RP _src,
+int convertToS16NoOpt( const sampleFrameA * RP _src,
 					intSampleFrameA * RP _dst,
 					const fpp_t _frames,
 					const float _master_gain,
@@ -294,15 +302,15 @@ int alignedConvertToS16NoOpt( const sampleFrameA * RP _src,
 }


-alignedMemCpyFunc alignedMemCpy = alignedMemCpyNoOpt;
-alignedMemClearFunc alignedMemClear = alignedMemClearNoOpt;
-alignedBufApplyGainFunc alignedBufApplyGain = alignedBufApplyGainNoOpt;
-alignedBufMixFunc alignedBufMix = alignedBufMixNoOpt;
-alignedBufMixLRCoeffFunc alignedBufMixLRCoeff = alignedBufMixLRCoeffNoOpt;
-unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
-alignedBufWetDryMixFunc alignedBufWetDryMix = alignedBufWetDryMixNoOpt;
-alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted = alignedBufWetDryMixSplittedNoOpt;
-alignedConvertToS16Func alignedConvertToS16 = alignedConvertToS16NoOpt;
+MemCpyFunc memCpy = memCpyNoOpt;
+MemClearFunc memClear = memClearNoOpt;
+BufApplyGainFunc bufApplyGain = bufApplyGainNoOpt;
+BufMixFunc bufMix = bufMixNoOpt;
+BufMixLRCoeffFunc bufMixLRCoeff = bufMixLRCoeffNoOpt;
+UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
+BufWetDryMixFunc bufWetDryMix = bufWetDryMixNoOpt;
+BufWetDryMixSplittedFunc bufWetDryMixSplitted = bufWetDryMixSplittedNoOpt;
+ConvertToS16Func convertToS16 = convertToS16NoOpt;


 #ifdef X86_OPTIMIZATIONS
@@ -322,28 +330,28 @@ enum CPUFeatures
 extern "C"
 {
 #ifdef LMMS_HOST_X86
-void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size );
-void alignedMemClearMMX( void * RP _dst, int _size );
+void memCpyMMX( void * RP _dst, const void * RP _src, int _size );
+void memClearMMX( void * RP _dst, int _size );
 #endif
-void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size );
-void alignedMemClearSSE( void * RP _dst, int _size );
-void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
-void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
-void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
+void memCpySSE( void * RP _dst, const void * RP _src, int _size );
+void memClearSSE( void * RP _dst, int _size );
+void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
+void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
+void bufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
 void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _src, const float _left, const float _right, int _frames );
-void alignedBufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
-void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
+void bufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
+void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
 #ifdef X86_OPTIMIZATIONS
-void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size );
-void alignedMemClearSSE2( void * RP _dst, int _size );
-int alignedConvertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
+void memCpySSE2( void * RP _dst, const void * RP _src, int _size );
+void memClearSSE2( void * RP _dst, int _size );
+int convertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
 #endif
 } ;
 #endif



-void initBasicOps( void )
+void init()
 {
 #ifdef X86_OPTIMIZATIONS
 	static bool extensions_checked = false;
@@ -428,29 +436,29 @@ void initBasicOps( void )
 #ifdef LMMS_HOST_X86
 		if( features & MMX )
 		{
-			alignedMemCpy = alignedMemCpyMMX;
-			alignedMemClear = alignedMemClearMMX;
+			memCpy = memCpyMMX;
+			memClear = memClearMMX;
 		}
 #endif
 		if( features & SSE )
 		{
 			fprintf( stderr, "Using SSE optimized routines\n" );
-			alignedMemCpy = alignedMemCpySSE;
-			alignedMemClear = alignedMemClearSSE;
-			alignedBufApplyGain = alignedBufApplyGainSSE;
-			alignedBufMix = alignedBufMixSSE;
-			alignedBufMixLRCoeff = alignedBufMixLRCoeffSSE;
+			memCpy = memCpySSE;
+			memClear = memClearSSE;
+			bufApplyGain = bufApplyGainSSE;
+			bufMix = bufMixSSE;
+			bufMixLRCoeff = bufMixLRCoeffSSE;
 			unalignedBufMixLRCoeff = unalignedBufMixLRCoeffSSE;
-			alignedBufWetDryMix = alignedBufWetDryMixSSE;
-			alignedBufWetDryMixSplitted =
-						alignedBufWetDryMixSplittedSSE;
+			bufWetDryMix = bufWetDryMixSSE;
+			bufWetDryMixSplitted =
+						bufWetDryMixSplittedSSE;
 		}
 		if( features & SSE2 )
 		{
 			fprintf( stderr, "Using SSE2 optimized routines\n" );
-			alignedMemCpy = alignedMemCpySSE2;
-			alignedMemClear = alignedMemClearSSE2;
-			alignedConvertToS16 = alignedConvertToS16SSE2;
+			memCpy = memCpySSE2;
+			memClear = memClearSSE2;
+			convertToS16 = convertToS16SSE2;
 		}
 		extensions_checked = true;
 	}
@@ -458,4 +466,5 @@ void initBasicOps( void )
 }


+}

--- a/src/core/basic_ops_x86.c
+++ b/src/core/basic_ops_x86.c
@@ -1,8 +1,8 @@
 /*
- * basic_ops_x86.c - x86 specific optimized operations
+ * cpu_x86.c - x86 specific optimized operations
+ *
+ * Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
 *
- * Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -22,9 +22,7 @@
 *
 */

-
-
-#include "basic_ops.h"
+#include "Cpu.h"

 #ifdef X86_OPTIMIZATIONS

@@ -32,7 +30,7 @@

 #include <mmintrin.h>

-void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size )
+void memCpyMMX( void * RP _dst, const void * RP _src, int _size )
 {
 	const int s = _size / ( sizeof( __m64 ) * 8 );
 	int i;
@@ -79,7 +77,7 @@ void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size )



-void alignedMemClearMMX( void * RP _dst, int _size )
+void memClearMMX( void * RP _dst, int _size )
 {
 	__m64 * dst = (__m64 *) _dst;
 	const int s = _size / ( sizeof( *dst ) * 8 );
@@ -109,7 +107,7 @@ void alignedMemClearMMX( void * RP _dst, int _size )

 #include <xmmintrin.h>

-void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size )
+void memCpySSE( void * RP _dst, const void * RP _src, int _size )
 {
 	__m128 * dst = (__m128 *) _dst;
 	__m128 * src = (__m128 *) _src;
@@ -133,7 +131,7 @@ void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size )



-void alignedMemClearSSE( void * RP _dst, int _size )
+void memClearSSE( void * RP _dst, int _size )
 {
 	__m128 * dst = (__m128 *) _dst;
 	const int s = _size / ( sizeof( *dst ) * 4 );
@@ -152,7 +150,7 @@ void alignedMemClearSSE( void * RP _dst, int _size )



-void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
+void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
 {
 	int i;
 	for( i = 0; i < _frames; )
@@ -180,7 +178,7 @@ void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )



-void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
+void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
 								int _frames )
 {
 	int i;
@@ -209,7 +207,7 @@ void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,



-void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst,
+void bufMixLRCoeffSSE( sampleFrameA * RP _dst,
 					const sampleFrameA * RP _src,
 					float _left, float _right, int _frames )
 {
@@ -257,7 +255,7 @@ void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _s



-void alignedBufWetDryMixSSE( sampleFrameA * RP _dst,
+void bufWetDryMixSSE( sampleFrameA * RP _dst,
 					const sampleFrameA * RP _src,
 					float _wet, float _dry, int _frames )
 {
@@ -279,7 +277,7 @@ void alignedBufWetDryMixSSE( sampleFrameA * RP _dst,



-void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
+void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
 					const float * RP _left,
 					const float * RP _right,
 					float _wet, float _dry, int _frames )
@@ -304,7 +302,7 @@ void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst,

 #include <emmintrin.h>

-void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size )
+void memCpySSE2( void * RP _dst, const void * RP _src, int _size )
 {
 	__m128i * dst = (__m128i *) _dst;
 	__m128i * src = (__m128i *) _src;
@@ -324,7 +322,7 @@ void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size )



-void alignedMemClearSSE2( void * RP _dst, int _size )
+void memClearSSE2( void * RP _dst, int _size )
 {
 	__m128i * dst = (__m128i *) _dst;
 	const int s = _size / ( sizeof( *dst ) * 4 );
@@ -342,7 +340,7 @@ void alignedMemClearSSE2( void * RP _dst, int _size )



-int alignedConvertToS16SSE2( const sampleFrameA * RP _src,
+int convertToS16SSE2( const sampleFrameA * RP _src,
 					intSampleFrameA * RP _dst,
 					const fpp_t _frames,
 					const float _master_gain,
--- a/src/core/audio/audio_alsa.cpp
+++ b/src/core/audio/audio_alsa.cpp
@@ -1,10 +1,8 @@
-#ifndef SINGLE_SOURCE_COMPILE
-
 /*
 * audio_alsa.cpp - device-class which implements ALSA-PCM-output
 *
 * Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -25,7 +23,6 @@
 */


-
 #include <QtGui/QLineEdit>
 #include <QtGui/QLabel>

@@ -39,7 +36,7 @@
 #include "lcd_spinbox.h"
 #include "gui_templates.h"
 #include "templates.h"
-#include "basic_ops.h"
+#include "Cpu.h"



@@ -230,10 +227,10 @@ void audioALSA::applyQualitySettings( void )

 void audioALSA::run( void )
 {
-	sampleFrameA * temp = alignedAllocFrames(
+	sampleFrameA * temp = CPU::allocFrames(
 					getMixer()->framesPerPeriod() );
 	intSampleFrameA * outbuf = (intSampleFrameA *)
-		alignedMalloc( sizeof( intSampleFrameA ) * channels() /
+		CPU::memAlloc( sizeof( intSampleFrameA ) * channels() /
 			DEFAULT_CHANNELS * getMixer()->framesPerPeriod() );

 	int_sample_t * pcmbuf = new int_sample_t[m_periodSize * channels()];
@@ -261,7 +258,7 @@ void audioALSA::run( void )
 				}
 				outbuf_size = frames * channels();

-				alignedConvertToS16( temp, outbuf, frames,
+				CPU::convertToS16( temp, outbuf, frames,
 						getMixer()->masterGain(),
 							m_convertEndian );
 			}
@@ -300,8 +297,8 @@ void audioALSA::run( void )
 		}
 	}

-	alignedFreeFrames( temp );
-	alignedFree( outbuf );
+	CPU::freeFrames( temp );
+	CPU::memFree( outbuf );
 	delete[] pcmbuf;
 }

@@ -526,5 +523,3 @@ void audioALSA::setupWidget::saveSettings( void )

 #endif

-
-#endif
--- a/src/core/audio/audio_device.cpp
+++ b/src/core/audio/audio_device.cpp
@@ -1,10 +1,8 @@
-#ifndef SINGLE_SOURCE_COMPILE
-
 /*
 * audio_device.cpp - base-class for audio-devices used by LMMS-mixer
 *
- * Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ * Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -25,13 +23,10 @@
 */


-#include <cstring>
-
-
 #include "audio_device.h"
 #include "config_mgr.h"
 #include "debug.h"
-#include "basic_ops.h"
+#include "Cpu.h"



@@ -40,7 +35,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
 	m_sampleRate( _mixer->processingSampleRate() ),
 	m_channels( _channels ),
 	m_mixer( _mixer ),
-	m_buffer( alignedAllocFrames( getMixer()->framesPerPeriod() ) )
+	m_buffer( CPU::allocFrames( getMixer()->framesPerPeriod() ) )
 {
 	int error;
 	if( ( m_srcState = src_new(
@@ -57,7 +52,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
 audioDevice::~audioDevice()
 {
 	src_delete( m_srcState );
-	alignedFreeFrames( m_buffer );
+	CPU::freeFrames( m_buffer );

 	m_devMutex.tryLock();
 	unlock();
@@ -104,7 +99,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab )
 	}
 	else
 	{
-		alignedMemCpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
+		CPU::memCpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
 	}

 	// release lock
@@ -112,7 +107,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab )

 	if( getMixer()->hasFifoWriter() )
 	{
-		alignedFreeFrames( b );
+		CPU::freeFrames( b );
 	}

 	return frames;
@@ -200,7 +195,7 @@ void audioDevice::resample( const sampleFrame * _src, const fpp_t _frames,

 void audioDevice::clearS16Buffer( intSampleFrameA * _outbuf, const fpp_t _frames )
 {
-	alignedMemClear( _outbuf, _frames * sizeof( *_outbuf ) );
+	CPU::memClear( _outbuf, _frames * sizeof( *_outbuf ) );
 //	memset( _outbuf, 0,  _frames * channels() * BYTES_PER_INT_SAMPLE );
 }

@@ -213,5 +208,3 @@ bool audioDevice::hqAudio( void ) const
 }


-
-#endif
--- a/src/core/audio/audio_file_wave.cpp
+++ b/src/core/audio/audio_file_wave.cpp
@@ -1,11 +1,9 @@
-#ifndef SINGLE_SOURCE_COMPILE
-
 /*
 * audio_file_wave.cpp - audio-device which encodes wave-stream and writes it
 *                       into a WAVE-file. This is used for song-export.
 *
 * Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -26,10 +24,9 @@
 */


-
 #include "audio_file_wave.h"
 #include "endian_handling.h"
-#include "basic_ops.h"
+#include "Cpu.h"

 #include <cstring>

@@ -89,14 +86,14 @@ void audioFileWave::writeBuffer( const surroundSampleFrame * _ab,
 {
 	if( depth() == 16 )
 	{
-		intSampleFrameA * buf = (intSampleFrameA *) alignedMalloc(
+		intSampleFrameA * buf = (intSampleFrameA *) CPU::memAlloc(
            sizeof( intSampleFrameA ) * _frames );

-		alignedConvertToS16( _ab, buf, _frames, _master_gain,
+		CPU::convertToS16( _ab, buf, _frames, _master_gain,
            !isLittleEndian() );

 		sf_writef_short( m_sf, (int_sample_t *) buf, _frames );
-		alignedFree( buf );
+		CPU::memFree( buf );
 	}
 	else
 	{
@@ -123,4 +120,3 @@ void audioFileWave::finishEncoding( void )
 }


-#endif
--- a/src/core/audio/audio_jack.cpp
+++ b/src/core/audio/audio_jack.cpp
@@ -44,7 +44,7 @@
 #include "lcd_spinbox.h"
 #include "audio_port.h"
 #include "main_window.h"
-#include "basic_ops.h"
+#include "Cpu.h"



@@ -57,7 +57,7 @@ audioJACK::audioJACK( bool & _success_ful, mixer * _mixer ) :
 	m_client( NULL ),
 	m_active( false ),
 	m_stopSemaphore( 1 ),
-	m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
+	m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
 	m_framesDoneInCurBuf( 0 ),
 	m_framesToDoInCurBuf( 0 )
 {
@@ -93,7 +93,7 @@ audioJACK::~audioJACK()
 		jack_client_close( m_client );
 	}

-	alignedFreeFrames( m_outBuf );
+	CPU::freeFrames( m_outBuf );

 }

--- a/src/core/audio/audio_oss.cpp
+++ b/src/core/audio/audio_oss.cpp
@@ -1,10 +1,8 @@
-#ifndef SINGLE_SOURCE_COMPILE
-
 /*
 * audio_oss.cpp - device-class that implements OSS-PCM-output
 *
 * Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -39,7 +37,7 @@
 #include "engine.h"
 #include "gui_templates.h"
 #include "templates.h"
-#include "basic_ops.h"
+#include "Cpu.h"

 #ifdef LMMS_HAVE_UNISTD_H
 #include <unistd.h>
@@ -299,10 +297,10 @@ void audioOSS::applyQualitySettings( void )

 void audioOSS::run( void )
 {
-	sampleFrameA * temp = alignedAllocFrames(
+	sampleFrameA * temp = CPU::allocFrames(
 						getMixer()->framesPerPeriod() );
 	intSampleFrameA * outbuf = (intSampleFrameA *)
-			alignedMalloc( sizeof( intSampleFrameA ) *
+			CPU::memAlloc( sizeof( intSampleFrameA ) *
 						getMixer()->framesPerPeriod() );

 	while( 1 )
@@ -313,7 +311,7 @@ void audioOSS::run( void )
 			break;
 		}

-		int bytes = alignedConvertToS16( temp, outbuf, frames,
+		int bytes = CPU::convertToS16( temp, outbuf, frames,
 						getMixer()->masterGain(),
 							m_convertEndian );
 		if( write( m_audioFD, outbuf, bytes ) != bytes )
@@ -322,8 +320,8 @@ void audioOSS::run( void )
 		}
 	}

-	alignedFreeFrames( temp );
-	alignedFree( outbuf );
+	CPU::freeFrames( temp );
+	CPU::memFree( outbuf );
 }


@@ -374,5 +372,3 @@ void audioOSS::setupWidget::saveSettings( void )

 #endif

-
-#endif
--- a/src/core/audio/audio_port.cpp
+++ b/src/core/audio/audio_port.cpp
@@ -26,14 +26,14 @@
 #include "audio_device.h"
 #include "effect_chain.h"
 #include "engine.h"
-#include "basic_ops.h"
+#include "Cpu.h"


 audioPort::audioPort( const QString & _name, bool _has_effect_chain ) :
 	m_bufferUsage( NoUsage ),
-	m_firstBuffer( alignedAllocFrames( 
+	m_firstBuffer( CPU::allocFrames( 
 				engine::getMixer()->framesPerPeriod() ) ),
-	m_secondBuffer( alignedAllocFrames(
+	m_secondBuffer( CPU::allocFrames(
 				engine::getMixer()->framesPerPeriod() ) ),
 	m_extOutputEnabled( false ),
 	m_nextFxChannel( 0 ),
@@ -55,8 +55,8 @@ audioPort::~audioPort()
 {
 	setExtOutputEnabled( false );
 	engine::getMixer()->removeAudioPort( this );
-	alignedFreeFrames( m_firstBuffer );
-	alignedFreeFrames( m_secondBuffer );
+	CPU::freeFrames( m_firstBuffer );
+	CPU::freeFrames( m_secondBuffer );
 	delete m_effects;
 }

--- a/src/core/audio/audio_portaudio.cpp
+++ b/src/core/audio/audio_portaudio.cpp
@@ -60,7 +60,7 @@ audioPortAudio::audioPortAudio( bool & _success_ful, mixer * _mixer ) :
 					DEFAULT_CHANNELS, SURROUND_CHANNELS ),
 								_mixer ),
 	m_wasPAInitError( false ),
-	m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
+	m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
 	m_outBufPos( 0 ),
 	m_stopSemaphore( 1 )
 {
@@ -206,7 +206,7 @@ audioPortAudio::~audioPortAudio()
 	{
 		Pa_Terminate();
 	}
-	alignedFreeFrames( m_outBuf );
+	CPU::freeFrames( m_outBuf );
 }


--- a/src/core/audio/audio_pulseaudio.cpp
+++ b/src/core/audio/audio_pulseaudio.cpp
@@ -1,10 +1,8 @@
-#ifndef SINGLE_SOURCE_COMPILE
-
 /*
 * audio_pulseaudio.cpp - device-class which implements PulseAudio-output
 *
- * Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ * Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -25,7 +23,6 @@
 */


-
 #include <QtGui/QLineEdit>
 #include <QtGui/QLabel>

@@ -40,7 +37,7 @@
 #include "lcd_spinbox.h"
 #include "gui_templates.h"
 #include "templates.h"
-#include "basic_ops.h"
+#include "Cpu.h"


 static void stream_write_callback(pa_stream *s, size_t length, void *userdata)
@@ -231,7 +228,7 @@ void audioPulseAudio::run( void )
 void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
 {
 	const fpp_t fpp = getMixer()->framesPerPeriod();
-	sampleFrameA * temp = alignedAllocFrames( fpp );
+	sampleFrameA * temp = CPU::allocFrames( fpp );
 	Sint16 * pcmbuf = (Sint16*)pa_xmalloc( fpp * channels() *
 							sizeof(Sint16) );

@@ -243,7 +240,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
 		{
 			return;
 		}
-		int bytes = alignedConvertToS16( temp,
+		int bytes = CPU::convertToS16( temp,
 						(intSampleFrameA *) pcmbuf,
 						frames,
 						getMixer()->masterGain(),
@@ -257,7 +254,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
 	}

 	pa_xfree( pcmbuf );
-	alignedFreeFrames( temp );
+	CPU::freeFrames( temp );
 }


@@ -308,5 +305,3 @@ void audioPulseAudio::setupWidget::saveSettings( void )

 #endif

-#endif
-
--- a/src/core/audio/audio_sdl.cpp
+++ b/src/core/audio/audio_sdl.cpp
@@ -1,10 +1,8 @@
-#ifndef SINGLE_SOURCE_COMPILE
-
 /*
 * audio_sdl.cpp - device-class that performs PCM-output via SDL
 *
 * Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -25,7 +23,6 @@
 */


-
 #include "audio_sdl.h"

 #ifdef LMMS_HAVE_SDL
@@ -38,13 +35,13 @@
 #include "config_mgr.h"
 #include "gui_templates.h"
 #include "templates.h"
-#include "basic_ops.h"
+#include "Cpu.h"



 audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) :
 	audioDevice( DEFAULT_CHANNELS, _mixer ),
-	m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
+	m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
 	m_convertedBufPos( 0 ),
 	m_convertEndian( false ),
 	m_stopSemaphore( 1 )
@@ -53,7 +50,7 @@ audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) :

 	m_convertedBufSize = getMixer()->framesPerPeriod() *
 						sizeof( intSampleFrameA );
-	m_convertedBuf = (intSampleFrameA *) alignedMalloc( m_convertedBufSize );
+	m_convertedBuf = (intSampleFrameA *) CPU::memAlloc( m_convertedBufSize );


 	if( SDL_Init( SDL_INIT_AUDIO | SDL_INIT_NOPARACHUTE ) < 0 )
@@ -97,8 +94,8 @@ audioSDL::~audioSDL()

 	SDL_CloseAudio();
 	SDL_Quit();
-	alignedFree( m_convertedBuf );
-	alignedFreeFrames( m_outBuf );
+	CPU::memFree( m_convertedBuf );
+	CPU::freeFrames( m_outBuf );
 }


@@ -192,7 +189,7 @@ void audioSDL::sdlAudioCallback( Uint8 * _buf, int _len )
 			}
 			m_convertedBufSize = frames * sizeof( intSampleFrameA );

-			alignedConvertToS16( m_outBuf,
+			CPU::convertToS16( m_outBuf,
 						m_convertedBuf,
 						frames,
 						getMixer()->masterGain(),
@@ -243,4 +240,3 @@ void audioSDL::setupWidget::saveSettings( void )

 #endif

-#endif
--- a/src/core/basic_ops_x86_64_sse.s
+++ b/src/core/basic_ops_x86_64_sse.s
@@ -1,555 +0,0 @@
-	.file	"basic_ops_x86.c"
-	.text
-	.align 16
-.globl alignedMemCpySSE
-	.type	alignedMemCpySSE, @function
-alignedMemCpySSE:
-.LFB509:
-	movslq	%edx,%rdx
-	shrq	$6, %rdx
-	testl	%edx, %edx
-	jle	.L4
-	subl	$1, %edx
-	xorl	%eax, %eax
-	addq	$1, %rdx
-	salq	$6, %rdx
-	.align 16
-.L3:
-	movaps	(%rsi,%rax), %xmm0
-	movaps	%xmm0, (%rdi,%rax)
-	movaps	16(%rsi,%rax), %xmm0
-	movaps	%xmm0, 16(%rdi,%rax)
-	movaps	32(%rsi,%rax), %xmm0
-	movaps	%xmm0, 32(%rdi,%rax)
-	movaps	48(%rsi,%rax), %xmm0
-	movaps	%xmm0, 48(%rdi,%rax)
-	addq	$64, %rax
-	cmpq	%rdx, %rax
-	jne	.L3
-.L4:
-	rep
-	ret
-.LFE509:
-	.size	alignedMemCpySSE, .-alignedMemCpySSE
-	.align 16
-.globl alignedMemClearSSE
-	.type	alignedMemClearSSE, @function
-alignedMemClearSSE:
-.LFB510:
-	movslq	%esi,%rsi
-	shrq	$6, %rsi
-	testl	%esi, %esi
-	jle	.L10
-	subl	$1, %esi
-	xorps	%xmm0, %xmm0
-	salq	$6, %rsi
-	leaq	64(%rdi,%rsi), %rax
-	.align 16
-.L9:
-	movaps	%xmm0, (%rdi)
-	movaps	%xmm0, 16(%rdi)
-	movaps	%xmm0, 32(%rdi)
-	movaps	%xmm0, 48(%rdi)
-	addq	$64, %rdi
-	cmpq	%rax, %rdi
-	jne	.L9
-.L10:
-	rep
-	ret
-.LFE510:
-	.size	alignedMemClearSSE, .-alignedMemClearSSE
-	.align 16
-.globl alignedBufApplyGainSSE
-	.type	alignedBufApplyGainSSE, @function
-alignedBufApplyGainSSE:
-.LFB511:
-	testl	%esi, %esi
-	jle	.L15
-	leal	-1(%rsi), %edx
-	shufps	$0, %xmm0, %xmm0
-	xorl	%eax, %eax
-	shrl	$3, %edx
-	addl	$1, %edx
-	.align 16
-.L14:
-	movaps	16(%rdi), %xmm3
-	addl	$1, %eax
-	movaps	32(%rdi), %xmm2
-	mulps	%xmm0, %xmm3
-	movaps	48(%rdi), %xmm1
-	mulps	%xmm0, %xmm2
-	movaps	(%rdi), %xmm4
-	mulps	%xmm0, %xmm1
-	movaps	%xmm3, 16(%rdi)
-	mulps	%xmm0, %xmm4
-	movaps	%xmm2, 32(%rdi)
-	movaps	%xmm1, 48(%rdi)
-	movaps	%xmm4, (%rdi)
-	addq	$64, %rdi
-	cmpl	%eax, %edx
-	ja	.L14
-.L15:
-	rep
-	ret
-.LFE511:
-	.size	alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
-	.align 16
-.globl alignedBufMixSSE
-	.type	alignedBufMixSSE, @function
-alignedBufMixSSE:
-.LFB512:
-	testl	%edx, %edx
-	jle	.L20
-	leal	-1(%rdx), %ecx
-	xorl	%eax, %eax
-	xorl	%edx, %edx
-	shrl	$3, %ecx
-	addl	$1, %ecx
-	.align 16
-.L19:
-	movaps	16(%rdi,%rax), %xmm2
-	addl	$1, %edx
-	movaps	32(%rdi,%rax), %xmm1
-	addps	16(%rsi,%rax), %xmm2
-	movaps	48(%rdi,%rax), %xmm0
-	addps	32(%rsi,%rax), %xmm1
-	movaps	(%rdi,%rax), %xmm3
-	addps	48(%rsi,%rax), %xmm0
-	addps	(%rsi,%rax), %xmm3
-	movaps	%xmm2, 16(%rdi,%rax)
-	movaps	%xmm1, 32(%rdi,%rax)
-	movaps	%xmm0, 48(%rdi,%rax)
-	movaps	%xmm3, (%rdi,%rax)
-	addq	$64, %rax
-	cmpl	%edx, %ecx
-	ja	.L19
-.L20:
-	rep
-	ret
-.LFE512:
-	.size	alignedBufMixSSE, .-alignedBufMixSSE
-	.align 16
-.globl alignedBufMixLRCoeffSSE
-	.type	alignedBufMixLRCoeffSSE, @function
-alignedBufMixLRCoeffSSE:
-.LFB513:
-	testl	%edx, %edx
-	jle	.L25
-	unpcklps	%xmm1, %xmm0
-	leal	-1(%rdx), %ecx
-	xorl	%eax, %eax
-	xorl	%edx, %edx
-	shrl	$2, %ecx
-	movlhps	%xmm0, %xmm0
-	addl	$1, %ecx
-	.align 16
-.L24:
-	movaps	16(%rsi,%rax), %xmm2
-	addl	$1, %edx
-	movaps	(%rsi,%rax), %xmm3
-	mulps	%xmm0, %xmm2
-	mulps	%xmm0, %xmm3
-	addps	16(%rdi,%rax), %xmm2
-	addps	(%rdi,%rax), %xmm3
-	movaps	%xmm2, 16(%rdi,%rax)
-	movaps	%xmm3, (%rdi,%rax)
-	addq	$32, %rax
-	cmpl	%edx, %ecx
-	ja	.L24
-.L25:
-	rep
-	ret
-.LFE513:
-	.size	alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
-	.align 16
-.globl alignedBufWetDryMixSSE
-	.type	alignedBufWetDryMixSSE, @function
-alignedBufWetDryMixSSE:
-.LFB515:
-	testl	%edx, %edx
-	jle	.L30
-	leal	-1(%rdx), %ecx
-	shufps	$0, %xmm1, %xmm1
-	shufps	$0, %xmm0, %xmm0
-	xorl	%eax, %eax
-	shrl	$2, %ecx
-	xorl	%edx, %edx
-	addl	$1, %ecx
-	.align 16
-.L29:
-	movaps	16(%rsi,%rax), %xmm3
-	addl	$1, %edx
-	movaps	16(%rdi,%rax), %xmm2
-	mulps	%xmm0, %xmm3
-	movaps	(%rsi,%rax), %xmm4
-	mulps	%xmm1, %xmm2
-	mulps	%xmm0, %xmm4
-	addps	%xmm3, %xmm2
-	movaps	(%rdi,%rax), %xmm3
-	mulps	%xmm1, %xmm3
-	movaps	%xmm2, 16(%rdi,%rax)
-	addps	%xmm4, %xmm3
-	movaps	%xmm3, (%rdi,%rax)
-	addq	$32, %rax
-	cmpl	%edx, %ecx
-	ja	.L29
-.L30:
-	rep
-	ret
-.LFE515:
-	.size	alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
-	.align 16
-.globl alignedBufWetDryMixSplittedSSE
-	.type	alignedBufWetDryMixSplittedSSE, @function
-alignedBufWetDryMixSplittedSSE:
-.LFB516:
-	pushq	%rbp
-.LCFI0:
-	testl	%ecx, %ecx
-	pushq	%rbx
-.LCFI1:
-	jle	.L39
-	leal	-1(%rcx), %ebx
-	shrl	%ebx
-	addl	$1, %ebx
-	movl	%ebx, %r11d
-	shrl	$2, %r11d
-	cmpl	$3, %ebx
-	leal	0(,%r11,4), %ebp
-	jbe	.L40
-	testl	%ebp, %ebp
-	jne	.L34
-.L40:
-	xorl	%r9d, %r9d
-	jmp	.L36
-	.align 16
-.L34:
-	movaps	%xmm1, %xmm2
-	movq	%rdi, %rax
-	xorps	%xmm10, %xmm10
-	movq	%rsi, %r9
-	shufps	$0, %xmm2, %xmm2
-	movq	%rdx, %r8
-	xorl	%r10d, %r10d
-	movaps	%xmm2, %xmm12
-	movaps	%xmm0, %xmm2
-	shufps	$0, %xmm2, %xmm2
-	movaps	%xmm2, %xmm11
-	.align 16
-.L37:
-	movaps	(%rax), %xmm2
-	addl	$1, %r10d
-	movaps	%xmm10, %xmm9
-	movaps	16(%rax), %xmm5
-	movaps	%xmm2, %xmm4
-	movlps	(%r9), %xmm9
-	movaps	%xmm10, %xmm8
-	movaps	32(%rax), %xmm14
-	shufps	$136, %xmm5, %xmm4
-	movhps	8(%r9), %xmm9
-	movaps	48(%rax), %xmm3
-	movaps	%xmm14, %xmm15
-	movlps	16(%r9), %xmm8
-	shufps	$221, %xmm5, %xmm2
-	shufps	$136, %xmm3, %xmm15
-	movhps	24(%r9), %xmm8
-	shufps	$221, %xmm3, %xmm14
-	movaps	%xmm4, %xmm5
-	addq	$32, %r9
-	movaps	%xmm9, %xmm3
-	shufps	$136, %xmm15, %xmm5
-	movaps	%xmm10, %xmm7
-	shufps	$136, %xmm8, %xmm3
-	movlps	(%r8), %xmm7
-	movaps	%xmm10, %xmm6
-	mulps	%xmm12, %xmm5
-	movhps	8(%r8), %xmm7
-	mulps	%xmm11, %xmm3
-	movlps	16(%r8), %xmm6
-	movaps	%xmm7, %xmm13
-	movhps	24(%r8), %xmm6
-	shufps	$221, %xmm15, %xmm4
-	shufps	$221, %xmm8, %xmm9
-	addq	$32, %r8
-	shufps	$136, %xmm6, %xmm13
-	addps	%xmm3, %xmm5
-	movaps	%xmm2, %xmm3
-	shufps	$221, %xmm6, %xmm7
-	shufps	$136, %xmm14, %xmm3
-	shufps	$221, %xmm14, %xmm2
-	mulps	%xmm11, %xmm13
-	movaps	%xmm5, %xmm6
-	mulps	%xmm12, %xmm3
-	mulps	%xmm12, %xmm4
-	mulps	%xmm11, %xmm9
-	addps	%xmm13, %xmm3
-	mulps	%xmm12, %xmm2
-	mulps	%xmm11, %xmm7
-	addps	%xmm9, %xmm4
-	addps	%xmm7, %xmm2
-	unpcklps	%xmm4, %xmm6
-	unpckhps	%xmm4, %xmm5
-	movaps	%xmm3, %xmm4
-	unpcklps	%xmm2, %xmm4
-	unpckhps	%xmm2, %xmm3
-	movaps	%xmm6, %xmm2
-	unpcklps	%xmm4, %xmm2
-	unpckhps	%xmm4, %xmm6
-	movaps	%xmm2, (%rax)
-	movaps	%xmm5, %xmm2
-	unpckhps	%xmm3, %xmm5
-	unpcklps	%xmm3, %xmm2
-	movaps	%xmm6, 16(%rax)
-	movaps	%xmm2, 32(%rax)
-	movaps	%xmm5, 48(%rax)
-	addq	$64, %rax
-	cmpl	%r10d, %r11d
-	ja	.L37
-	cmpl	%ebx, %ebp
-	leal	(%rbp,%rbp), %r9d
-	je	.L39
-.L36:
-	movslq	%r9d,%rax
-	leaq	1(%rax), %rbx
-	leaq	0(,%rax,4), %r10
-	leaq	(%rdi,%rax,8), %r8
-	leaq	(%rdi,%rbx,8), %rax
-	salq	$2, %rbx
-	leaq	(%rsi,%r10), %r11
-	leaq	(%rdx,%r10), %r10
-	addq	%rbx, %rsi
-	addq	%rbx, %rdx
-	.align 16
-.L38:
-	movss	(%r11), %xmm3
-	addl	$2, %r9d
-	movss	(%r8), %xmm2
-	mulss	%xmm0, %xmm3
-	mulss	%xmm1, %xmm2
-	addq	$8, %r11
-	addss	%xmm3, %xmm2
-	movss	%xmm2, (%r8)
-	movss	4(%r8), %xmm2
-	movss	(%r10), %xmm3
-	mulss	%xmm1, %xmm2
-	addq	$8, %r10
-	mulss	%xmm0, %xmm3
-	addss	%xmm3, %xmm2
-	movss	%xmm2, 4(%r8)
-	addq	$16, %r8
-	movss	(%rsi), %xmm3
-	addq	$8, %rsi
-	movss	(%rax), %xmm2
-	mulss	%xmm0, %xmm3
-	mulss	%xmm1, %xmm2
-	addss	%xmm3, %xmm2
-	movss	%xmm2, (%rax)
-	movss	4(%rax), %xmm2
-	movss	(%rdx), %xmm3
-	mulss	%xmm1, %xmm2
-	addq	$8, %rdx
-	mulss	%xmm0, %xmm3
-	addss	%xmm3, %xmm2
-	movss	%xmm2, 4(%rax)
-	addq	$16, %rax
-	cmpl	%r9d, %ecx
-	jg	.L38
-.L39:
-	popq	%rbx
-	popq	%rbp
-	ret
-.LFE516:
-	.size	alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
-	.align 16
-.globl unalignedBufMixLRCoeffSSE
-	.type	unalignedBufMixLRCoeffSSE, @function
-unalignedBufMixLRCoeffSSE:
-.LFB514:
-	movl	%edx, %ecx
-	shrl	$31, %ecx
-	leal	(%rdx,%rcx), %eax
-	andl	$1, %eax
-	cmpl	%ecx, %eax
-	jne	.L52
-.L44:
-	testl	%edx, %edx
-	jle	.L49
-	subl	$1, %edx
-	shrl	%edx
-	testb	$15, %dil
-	jne	.L46
-	unpcklps	%xmm1, %xmm0
-	addl	$1, %edx
-	xorps	%xmm3, %xmm3
-	xorl	%eax, %eax
-	movlhps	%xmm0, %xmm0
-	.align 16
-.L47:
-	movaps	%xmm3, %xmm2
-	addl	$1, %eax
-	movaps	%xmm3, %xmm1
-	movlps	(%rsi), %xmm2
-	movlps	(%rdi), %xmm1
-	movhps	8(%rsi), %xmm2
-	addq	$16, %rsi
-	movhps	8(%rdi), %xmm1
-	mulps	%xmm0, %xmm2
-	addps	%xmm2, %xmm1
-	movaps	%xmm1, (%rdi)
-	addq	$16, %rdi
-	cmpl	%edx, %eax
-	jb	.L47
-	rep
-	ret
-	.align 16
-.L46:
-	mov	%edx, %edx
-	xorl	%eax, %eax
-	addq	$1, %rdx
-	salq	$4, %rdx
-	.align 16
-.L48:
-	movss	(%rsi,%rax), %xmm2
-	mulss	%xmm0, %xmm2
-	addss	(%rdi,%rax), %xmm2
-	movss	%xmm2, (%rdi,%rax)
-	movss	4(%rsi,%rax), %xmm2
-	mulss	%xmm1, %xmm2
-	addss	4(%rdi,%rax), %xmm2
-	movss	%xmm2, 4(%rdi,%rax)
-	movss	8(%rsi,%rax), %xmm2
-	mulss	%xmm0, %xmm2
-	addss	8(%rdi,%rax), %xmm2
-	movss	%xmm2, 8(%rdi,%rax)
-	movss	12(%rsi,%rax), %xmm2
-	mulss	%xmm1, %xmm2
-	addss	12(%rdi,%rax), %xmm2
-	movss	%xmm2, 12(%rdi,%rax)
-	addq	$16, %rax
-	cmpq	%rdx, %rax
-	jne	.L48
-.L49:
-	rep
-	ret
-.L52:
-	movss	(%rsi), %xmm2
-	subl	$1, %edx
-	mulss	%xmm0, %xmm2
-	addss	(%rdi), %xmm2
-	movss	%xmm2, (%rdi)
-	movss	4(%rsi), %xmm2
-	addq	$8, %rsi
-	mulss	%xmm1, %xmm2
-	addss	4(%rdi), %xmm2
-	movss	%xmm2, 4(%rdi)
-	addq	$8, %rdi
-	jmp	.L44
-.LFE514:
-	.size	unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
-	.section	.eh_frame,"aw",@progbits
-.Lframe1:
-	.long	.LECIE1-.LSCIE1
-.LSCIE1:
-	.long	0x0
-	.byte	0x1
-	.string	"zR"
-	.byte	0x1
-	.byte	0x78
-	.byte	0x10
-	.byte	0x1
-	.byte	0x3
-	.byte	0xc
-	.byte	0x7
-	.byte	0x8
-	.byte	0x11
-	.byte	0x10
-	.byte	0x1
-	.align 8
-.LECIE1:
-.LSFDE1:
-	.long	.LEFDE1-.LASFDE1
-.LASFDE1:
-	.long	.LASFDE1-.Lframe1
-	.long	.LFB509
-	.long	.LFE509-.LFB509
-	.byte	0x0
-	.align 8
-.LEFDE1:
-.LSFDE3:
-	.long	.LEFDE3-.LASFDE3
-.LASFDE3:
-	.long	.LASFDE3-.Lframe1
-	.long	.LFB510
-	.long	.LFE510-.LFB510
-	.byte	0x0
-	.align 8
-.LEFDE3:
-.LSFDE5:
-	.long	.LEFDE5-.LASFDE5
-.LASFDE5:
-	.long	.LASFDE5-.Lframe1
-	.long	.LFB511
-	.long	.LFE511-.LFB511
-	.byte	0x0
-	.align 8
-.LEFDE5:
-.LSFDE7:
-	.long	.LEFDE7-.LASFDE7
-.LASFDE7:
-	.long	.LASFDE7-.Lframe1
-	.long	.LFB512
-	.long	.LFE512-.LFB512
-	.byte	0x0
-	.align 8
-.LEFDE7:
-.LSFDE9:
-	.long	.LEFDE9-.LASFDE9
-.LASFDE9:
-	.long	.LASFDE9-.Lframe1
-	.long	.LFB513
-	.long	.LFE513-.LFB513
-	.byte	0x0
-	.align 8
-.LEFDE9:
-.LSFDE11:
-	.long	.LEFDE11-.LASFDE11
-.LASFDE11:
-	.long	.LASFDE11-.Lframe1
-	.long	.LFB515
-	.long	.LFE515-.LFB515
-	.byte	0x0
-	.align 8
-.LEFDE11:
-.LSFDE13:
-	.long	.LEFDE13-.LASFDE13
-.LASFDE13:
-	.long	.LASFDE13-.Lframe1
-	.long	.LFB516
-	.long	.LFE516-.LFB516
-	.byte	0x0
-	.byte	0x4
-	.long	.LCFI0-.LFB516
-	.byte	0xe
-	.byte	0x10
-	.byte	0x4
-	.long	.LCFI1-.LCFI0
-	.byte	0xe
-	.byte	0x18
-	.byte	0x11
-	.byte	0x3
-	.byte	0x3
-	.byte	0x11
-	.byte	0x6
-	.byte	0x2
-	.align 8
-.LEFDE13:
-.LSFDE15:
-	.long	.LEFDE15-.LASFDE15
-.LASFDE15:
-	.long	.LASFDE15-.Lframe1
-	.long	.LFB514
-	.long	.LFE514-.LFB514
-	.byte	0x0
-	.align 8
-.LEFDE15:
-	.ident	"GCC: (GNU) 4.4.0 20090304 (experimental)"
--- a/src/core/basic_ops_x86_64_sse2.s
+++ b/src/core/basic_ops_x86_64_sse2.s
@@ -1,395 +0,0 @@
-	.file	"basic_ops_x86.c"
-	.text
-	.align 16
-.globl alignedMemCpySSE2
-	.type	alignedMemCpySSE2, @function
-alignedMemCpySSE2:
-.LFB509:
-	movslq	%edx,%rdx
-	shrq	$6, %rdx
-	testl	%edx, %edx
-	jle	.L4
-	subl	$1, %edx
-	xorl	%eax, %eax
-	addq	$1, %rdx
-	salq	$6, %rdx
-	.align 16
-.L3:
-	movdqa	(%rsi,%rax), %xmm0
-	movdqa	%xmm0, (%rdi,%rax)
-	movdqa	16(%rsi,%rax), %xmm0
-	movdqa	%xmm0, 16(%rdi,%rax)
-	movdqa	32(%rsi,%rax), %xmm0
-	movdqa	%xmm0, 32(%rdi,%rax)
-	movdqa	48(%rsi,%rax), %xmm0
-	movdqa	%xmm0, 48(%rdi,%rax)
-	addq	$64, %rax
-	cmpq	%rdx, %rax
-	jne	.L3
-.L4:
-	rep
-	ret
-.LFE509:
-	.size	alignedMemCpySSE2, .-alignedMemCpySSE2
-	.align 16
-.globl alignedMemClearSSE2
-	.type	alignedMemClearSSE2, @function
-alignedMemClearSSE2:
-.LFB510:
-	movslq	%esi,%rsi
-	shrq	$6, %rsi
-	testl	%esi, %esi
-	jle	.L10
-	subl	$1, %esi
-	pxor	%xmm0, %xmm0
-	salq	$6, %rsi
-	leaq	64(%rdi,%rsi), %rax
-	.align 16
-.L9:
-	movdqa	%xmm0, (%rdi)
-	movdqa	%xmm0, 16(%rdi)
-	movdqa	%xmm0, 32(%rdi)
-	movdqa	%xmm0, 48(%rdi)
-	addq	$64, %rdi
-	cmpq	%rax, %rdi
-	jne	.L9
-.L10:
-	rep
-	ret
-.LFE510:
-	.size	alignedMemClearSSE2, .-alignedMemClearSSE2
-	.align 16
-.globl alignedConvertToS16SSE2
-	.type	alignedConvertToS16SSE2, @function
-alignedConvertToS16SSE2:
-.LFB511:
-	pushq	%rbp
-.LCFI0:
-	testb	%cl, %cl
-	movl	%edx, %eax
-	mulss	.LC0(%rip), %xmm0
-	pushq	%rbx
-.LCFI1:
-	jne	.L13
-	testw	%dx, %dx
-	jle	.L15
-	movl	%edx, %ebx
-	shrw	$2, %bx
-	cmpw	$3, %dx
-	leal	0(,%rbx,4), %r8d
-	ja	.L33
-.L28:
-	xorl	%r8d, %r8d
-	.align 16
-.L23:
-	movswq	%r8w,%rdx
-	movl	$32767, %ebx
-	leaq	(%rdi,%rdx,8), %rcx
-	leaq	(%rsi,%rdx,4), %rdx
-	movl	$-32768, %edi
-	.align 16
-.L25:
-	movss	(%rcx), %xmm1
-	mulss	%xmm0, %xmm1
-	cvttss2si	%xmm1, %esi
-	movss	4(%rcx), %xmm1
-	mulss	%xmm0, %xmm1
-	cmpl	$-32768, %esi
-	cmovl	%edi, %esi
-	cmpl	$32767, %esi
-	cmovg	%ebx, %esi
-	movw	%si, (%rdx)
-	cvttss2si	%xmm1, %esi
-	cmpl	$-32768, %esi
-	cmovl	%edi, %esi
-	cmpl	$32767, %esi
-	cmovg	%ebx, %esi
-	addl	$1, %r8d
-	addq	$8, %rcx
-	movw	%si, 2(%rdx)
-	addq	$4, %rdx
-	cmpw	%r8w, %ax
-	jg	.L25
-.L15:
-	cwtl
-	popq	%rbx
-	sall	$2, %eax
-	popq	%rbp
-	ret
-	.align 16
-.L13:
-	testw	%dx, %dx
-	jle	.L15
-	movl	%edx, %ebx
-	shrw	$2, %bx
-	cmpw	$3, %dx
-	leal	0(,%rbx,4), %r8d
-	ja	.L34
-.L27:
-	xorl	%r8d, %r8d
-	.align 16
-.L18:
-	movswq	%r8w,%rdx
-	leaq	(%rdi,%rdx,8), %rcx
-	leaq	(%rsi,%rdx,4), %rdx
-	movl	$-32768, %edi
-	movl	$32767, %esi
-	.align 16
-.L20:
-	movss	(%rcx), %xmm1
-	mulss	%xmm0, %xmm1
-	cvttss2si	%xmm1, %ebx
-	movss	4(%rcx), %xmm1
-	mulss	%xmm0, %xmm1
-	cmpl	$-32768, %ebx
-	cmovl	%edi, %ebx
-	cmpl	$32767, %ebx
-	cmovg	%esi, %ebx
-	movzbl	%bh, %ebp
-	sall	$8, %ebx
-	movl	%ebp, %r9d
-	orl	%r9d, %ebx
-	movw	%bx, (%rdx)
-	cvttss2si	%xmm1, %ebx
-	cmpl	$-32768, %ebx
-	cmovl	%edi, %ebx
-	cmpl	$32767, %ebx
-	cmovg	%esi, %ebx
-	addl	$1, %r8d
-	addq	$8, %rcx
-	movzbl	%bh, %ebp
-	sall	$8, %ebx
-	movl	%ebp, %r9d
-	orl	%r9d, %ebx
-	movw	%bx, 2(%rdx)
-	addq	$4, %rdx
-	cmpw	%r8w, %ax
-	jg	.L20
-	cwtl
-	popq	%rbx
-	sall	$2, %eax
-	popq	%rbp
-	ret
-	.align 16
-.L34:
-	testw	%r8w, %r8w
-	je	.L27
-	movaps	%xmm0, %xmm1
-	movq	%rdi, %rcx
-	movdqa	.LC1(%rip), %xmm4
-	movq	%rsi, %r10
-	shufps	$0, %xmm1, %xmm1
-	xorl	%r9d, %r9d
-	movdqa	.LC2(%rip), %xmm3
-	movaps	%xmm1, %xmm9
-	movdqa	.LC3(%rip), %xmm8
-	.align 16
-.L19:
-	movaps	(%rcx), %xmm1
-	addl	$1, %r9d
-	movdqa	%xmm3, %xmm5
-	mulps	%xmm9, %xmm1
-	movaps	16(%rcx), %xmm6
-	movdqa	%xmm3, %xmm7
-	addq	$32, %rcx
-	mulps	%xmm9, %xmm6
-	cvttps2dq	%xmm1, %xmm1
-	movdqa	%xmm1, %xmm2
-	pcmpgtd	%xmm4, %xmm2
-	cvttps2dq	%xmm6, %xmm6
-	pand	%xmm2, %xmm1
-	pandn	%xmm4, %xmm2
-	por	%xmm1, %xmm2
-	movdqa	%xmm2, %xmm1
-	pcmpgtd	%xmm3, %xmm1
-	pand	%xmm1, %xmm5
-	pandn	%xmm2, %xmm1
-	movdqa	%xmm1, %xmm2
-	movdqa	%xmm6, %xmm1
-	por	%xmm5, %xmm2
-	pcmpgtd	%xmm4, %xmm1
-	pand	%xmm1, %xmm6
-	pandn	%xmm4, %xmm1
-	movdqa	%xmm2, %xmm5
-	pslld	$8, %xmm2
-	pand	%xmm8, %xmm5
-	por	%xmm6, %xmm1
-	psrad	$8, %xmm5
-	movdqa	%xmm1, %xmm6
-	pcmpgtd	%xmm3, %xmm6
-	pand	%xmm6, %xmm7
-	pandn	%xmm1, %xmm6
-	movdqa	%xmm6, %xmm1
-	por	%xmm7, %xmm1
-	movdqa	%xmm5, %xmm7
-	movdqa	%xmm1, %xmm6
-	pslld	$8, %xmm1
-	pand	%xmm8, %xmm6
-	psrad	$8, %xmm6
-	punpcklwd	%xmm6, %xmm5
-	punpckhwd	%xmm6, %xmm7
-	movdqa	%xmm5, %xmm6
-	punpcklwd	%xmm7, %xmm5
-	punpckhwd	%xmm7, %xmm6
-	punpcklwd	%xmm6, %xmm5
-	movdqa	%xmm2, %xmm6
-	punpcklwd	%xmm1, %xmm2
-	punpckhwd	%xmm1, %xmm6
-	movdqa	%xmm2, %xmm1
-	punpcklwd	%xmm6, %xmm2
-	punpckhwd	%xmm6, %xmm1
-	punpcklwd	%xmm1, %xmm2
-	por	%xmm2, %xmm5
-	movdqa	%xmm5, (%r10)
-	addq	$16, %r10
-	cmpw	%r9w, %bx
-	ja	.L19
-	cmpw	%dx, %r8w
-	jne	.L18
-	jmp	.L15
-	.align 16
-.L33:
-	testw	%r8w, %r8w
-	je	.L28
-	movaps	%xmm0, %xmm1
-	movq	%rdi, %rcx
-	movdqa	.LC1(%rip), %xmm4
-	movq	%rsi, %r10
-	shufps	$0, %xmm1, %xmm1
-	xorl	%r9d, %r9d
-	movdqa	.LC2(%rip), %xmm3
-	movaps	%xmm1, %xmm6
-	.align 16
-.L24:
-	movaps	(%rcx), %xmm1
-	addl	$1, %r9d
-	movdqa	%xmm3, %xmm7
-	mulps	%xmm6, %xmm1
-	movaps	16(%rcx), %xmm5
-	addq	$32, %rcx
-	mulps	%xmm6, %xmm5
-	cvttps2dq	%xmm1, %xmm1
-	movdqa	%xmm1, %xmm2
-	pcmpgtd	%xmm4, %xmm2
-	cvttps2dq	%xmm5, %xmm5
-	pand	%xmm2, %xmm1
-	pandn	%xmm4, %xmm2
-	por	%xmm1, %xmm2
-	movdqa	%xmm2, %xmm1
-	pcmpgtd	%xmm3, %xmm1
-	pand	%xmm1, %xmm7
-	pandn	%xmm2, %xmm1
-	movdqa	%xmm1, %xmm2
-	movdqa	%xmm5, %xmm1
-	por	%xmm7, %xmm2
-	movdqa	%xmm3, %xmm7
-	pcmpgtd	%xmm4, %xmm1
-	pand	%xmm1, %xmm5
-	pandn	%xmm4, %xmm1
-	por	%xmm5, %xmm1
-	movdqa	%xmm1, %xmm5
-	pcmpgtd	%xmm3, %xmm5
-	pand	%xmm5, %xmm7
-	pandn	%xmm1, %xmm5
-	movdqa	%xmm5, %xmm1
-	movdqa	%xmm2, %xmm5
-	por	%xmm7, %xmm1
-	punpcklwd	%xmm1, %xmm2
-	punpckhwd	%xmm1, %xmm5
-	movdqa	%xmm2, %xmm1
-	punpcklwd	%xmm5, %xmm2
-	punpckhwd	%xmm5, %xmm1
-	punpcklwd	%xmm1, %xmm2
-	movdqa	%xmm2, (%r10)
-	addq	$16, %r10
-	cmpw	%r9w, %bx
-	ja	.L24
-	cmpw	%r8w, %dx
-	jne	.L23
-	jmp	.L15
-.LFE511:
-	.size	alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
-	.section	.rodata
-	.align 4
-.LC0:
-	.long	1191181824
-	.align 16
-.LC1:
-	.long	-32768
-	.long	-32768
-	.long	-32768
-	.long	-32768
-	.align 16
-.LC2:
-	.long	32767
-	.long	32767
-	.long	32767
-	.long	32767
-	.align 16
-.LC3:
-	.long	65280
-	.long	65280
-	.long	65280
-	.long	65280
-	.section	.eh_frame,"aw",@progbits
-.Lframe1:
-	.long	.LECIE1-.LSCIE1
-.LSCIE1:
-	.long	0x0
-	.byte	0x1
-	.string	"zR"
-	.byte	0x1
-	.byte	0x78
-	.byte	0x10
-	.byte	0x1
-	.byte	0x3
-	.byte	0xc
-	.byte	0x7
-	.byte	0x8
-	.byte	0x11
-	.byte	0x10
-	.byte	0x1
-	.align 8
-.LECIE1:
-.LSFDE1:
-	.long	.LEFDE1-.LASFDE1
-.LASFDE1:
-	.long	.LASFDE1-.Lframe1
-	.long	.LFB509
-	.long	.LFE509-.LFB509
-	.byte	0x0
-	.align 8
-.LEFDE1:
-.LSFDE3:
-	.long	.LEFDE3-.LASFDE3
-.LASFDE3:
-	.long	.LASFDE3-.Lframe1
-	.long	.LFB510
-	.long	.LFE510-.LFB510
-	.byte	0x0
-	.align 8
-.LEFDE3:
-.LSFDE5:
-	.long	.LEFDE5-.LASFDE5
-.LASFDE5:
-	.long	.LASFDE5-.Lframe1
-	.long	.LFB511
-	.long	.LFE511-.LFB511
-	.byte	0x0
-	.byte	0x4
-	.long	.LCFI0-.LFB511
-	.byte	0xe
-	.byte	0x10
-	.byte	0x4
-	.long	.LCFI1-.LCFI0
-	.byte	0xe
-	.byte	0x18
-	.byte	0x11
-	.byte	0x3
-	.byte	0x3
-	.byte	0x11
-	.byte	0x6
-	.byte	0x2
-	.align 8
-.LEFDE5:
-	.ident	"GCC: (GNU) 4.4.0 20090304 (experimental)"
--- a/src/core/basic_ops_x86_mmx.s
+++ b/src/core/basic_ops_x86_mmx.s
@@ -1,107 +0,0 @@
-	.file	"basic_ops_x86.c"
-	.text
-	.p2align 4,,15
-.globl alignedMemCpyMMX
-	.type	alignedMemCpyMMX, @function
-alignedMemCpyMMX:
-	pushl	%ebx
-	subl	$112, %esp
-	movl	128(%esp), %ebx
-	movl	124(%esp), %eax
-	shrl	$6, %ebx
-#APP
-# 42 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
-	 fsave 4(%esp); fwait
-
-# 0 "" 2
-# 44 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
-	1: prefetchnta (%eax)
-   prefetchnta 64(%eax)
-   prefetchnta 128(%eax)
-   prefetchnta 192(%eax)
-   prefetchnta 256(%eax)
-
-# 0 "" 2
-#NO_APP
-	testl	%ebx, %ebx
-	je	.L2
-	movl	120(%esp), %ecx
-	xorl	%edx, %edx
-	.p2align 4,,7
-	.p2align 3
-.L3:
-#APP
-# 53 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
-	1: prefetchnta 320(%eax)
-2: movq (%eax), %mm0
-   movq 8(%eax), %mm1
-   movq 16(%eax), %mm2
-   movq 24(%eax), %mm3
-   movq %mm0, (%ecx)
-   movq %mm1, 8(%ecx)
-   movq %mm2, 16(%ecx)
-   movq %mm3, 24(%ecx)
-   movq 32(%eax), %mm0
-   movq 40(%eax), %mm1
-   movq 48(%eax), %mm2
-   movq 56(%eax), %mm3
-   movq %mm0, 32(%ecx)
-   movq %mm1, 40(%ecx)
-   movq %mm2, 48(%ecx)
-   movq %mm3, 56(%ecx)
-
-# 0 "" 2
-#NO_APP
-	addl	$1, %edx
-	addl	$64, %eax
-	addl	$64, %ecx
-	cmpl	%edx, %ebx
-	jne	.L3
-.L2:
-#APP
-# 75 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
-	 fsave 4(%esp); fwait
-
-# 0 "" 2
-#NO_APP
-	addl	$112, %esp
-	popl	%ebx
-	ret
-	.size	alignedMemCpyMMX, .-alignedMemCpyMMX
-	.p2align 4,,15
-.globl alignedMemClearMMX
-	.type	alignedMemClearMMX, @function
-alignedMemClearMMX:
-	movl	8(%esp), %ecx
-	shrl	$6, %ecx
-	testl	%ecx, %ecx
-	je	.L8
-	movl	4(%esp), %edx
-	xorl	%eax, %eax
-	pxor	%mm0, %mm0
-	.p2align 4,,7
-	.p2align 3
-.L9:
-#APP
-# 90 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
-	movq    %mm0, (%edx)
-movq    %mm0, 8(%edx)
-movq    %mm0, 16(%edx)
-movq    %mm0, 24(%edx)
-movq    %mm0, 32(%edx)
-movq    %mm0, 40(%edx)
-movq    %mm0, 48(%edx)
-movq    %mm0, 56(%edx)
-
-# 0 "" 2
-#NO_APP
-	addl	$1, %eax
-	addl	$64, %edx
-	cmpl	%eax, %ecx
-	jne	.L9
-.L8:
-	emms
-	ret
-	.size	alignedMemClearMMX, .-alignedMemClearMMX
-	.ident	"GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
-	.section	.note.GNU-stack,"",@progbits
--- a/src/core/basic_ops_x86_sse.s
+++ b/src/core/basic_ops_x86_sse.s
@@ -1,494 +0,0 @@
-	.file	"basic_ops_x86.c"
-	.text
-	.p2align 4,,15
-.globl alignedMemCpySSE
-	.type	alignedMemCpySSE, @function
-alignedMemCpySSE:
-	pushl	%esi
-	pushl	%ebx
-	movl	20(%esp), %esi
-	movl	12(%esp), %edx
-	movl	16(%esp), %ecx
-	shrl	$6, %esi
-	testl	%esi, %esi
-	je	.L4
-	xorl	%eax, %eax
-	xorl	%ebx, %ebx
-	.p2align 4,,7
-	.p2align 3
-.L3:
-	movaps	(%ecx,%eax), %xmm0
-	addl	$1, %ebx
-	movaps	%xmm0, (%edx,%eax)
-	movaps	16(%ecx,%eax), %xmm0
-	movaps	%xmm0, 16(%edx,%eax)
-	movaps	32(%ecx,%eax), %xmm0
-	movaps	%xmm0, 32(%edx,%eax)
-	movaps	48(%ecx,%eax), %xmm0
-	movaps	%xmm0, 48(%edx,%eax)
-	addl	$64, %eax
-	cmpl	%ebx, %esi
-	jne	.L3
-.L4:
-	popl	%ebx
-	popl	%esi
-	ret
-	.size	alignedMemCpySSE, .-alignedMemCpySSE
-	.p2align 4,,15
-.globl alignedMemClearSSE
-	.type	alignedMemClearSSE, @function
-alignedMemClearSSE:
-	movl	8(%esp), %ecx
-	shrl	$6, %ecx
-	testl	%ecx, %ecx
-	je	.L10
-	movl	4(%esp), %eax
-	xorps	%xmm0, %xmm0
-	xorl	%edx, %edx
-	.p2align 4,,7
-	.p2align 3
-.L9:
-	addl	$1, %edx
-	movaps	%xmm0, (%eax)
-	movaps	%xmm0, 16(%eax)
-	movaps	%xmm0, 32(%eax)
-	movaps	%xmm0, 48(%eax)
-	addl	$64, %eax
-	cmpl	%edx, %ecx
-	jne	.L9
-.L10:
-	rep
-	ret
-	.size	alignedMemClearSSE, .-alignedMemClearSSE
-	.p2align 4,,15
-.globl alignedBufApplyGainSSE
-	.type	alignedBufApplyGainSSE, @function
-alignedBufApplyGainSSE:
-	movl	12(%esp), %ecx
-	testl	%ecx, %ecx
-	jle	.L15
-	movss	8(%esp), %xmm0
-	subl	$1, %ecx
-	movl	4(%esp), %eax
-	shrl	$3, %ecx
-	xorl	%edx, %edx
-	addl	$1, %ecx
-	shufps	$0, %xmm0, %xmm0
-	.p2align 4,,7
-	.p2align 3
-.L14:
-	movaps	16(%eax), %xmm3
-	addl	$1, %edx
-	movaps	32(%eax), %xmm2
-	mulps	%xmm0, %xmm3
-	movaps	48(%eax), %xmm1
-	mulps	%xmm0, %xmm2
-	movaps	(%eax), %xmm4
-	mulps	%xmm0, %xmm1
-	movaps	%xmm3, 16(%eax)
-	mulps	%xmm0, %xmm4
-	movaps	%xmm2, 32(%eax)
-	movaps	%xmm1, 48(%eax)
-	movaps	%xmm4, (%eax)
-	addl	$64, %eax
-	cmpl	%edx, %ecx
-	ja	.L14
-.L15:
-	rep
-	ret
-	.size	alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
-	.p2align 4,,15
-.globl alignedBufMixSSE
-	.type	alignedBufMixSSE, @function
-alignedBufMixSSE:
-	pushl	%esi
-	pushl	%ebx
-	movl	20(%esp), %esi
-	movl	12(%esp), %edx
-	movl	16(%esp), %ecx
-	testl	%esi, %esi
-	jle	.L20
-	subl	$1, %esi
-	xorl	%eax, %eax
-	shrl	$3, %esi
-	xorl	%ebx, %ebx
-	addl	$1, %esi
-	.p2align 4,,7
-	.p2align 3
-.L19:
-	movaps	16(%edx,%eax), %xmm2
-	addl	$1, %ebx
-	movaps	32(%edx,%eax), %xmm1
-	movaps	48(%edx,%eax), %xmm0
-	movaps	(%edx,%eax), %xmm3
-	addps	16(%ecx,%eax), %xmm2
-	addps	32(%ecx,%eax), %xmm1
-	addps	48(%ecx,%eax), %xmm0
-	addps	(%ecx,%eax), %xmm3
-	movaps	%xmm2, 16(%edx,%eax)
-	movaps	%xmm3, (%edx,%eax)
-	movaps	%xmm1, 32(%edx,%eax)
-	movaps	%xmm0, 48(%edx,%eax)
-	addl	$64, %eax
-	cmpl	%ebx, %esi
-	ja	.L19
-.L20:
-	popl	%ebx
-	popl	%esi
-	ret
-	.size	alignedBufMixSSE, .-alignedBufMixSSE
-	.p2align 4,,15
-.globl alignedBufMixLRCoeffSSE
-	.type	alignedBufMixLRCoeffSSE, @function
-alignedBufMixLRCoeffSSE:
-	pushl	%esi
-	pushl	%ebx
-	movl	28(%esp), %esi
-	movl	12(%esp), %edx
-	movl	16(%esp), %ebx
-	testl	%esi, %esi
-	jle	.L25
-	movss	24(%esp), %xmm2
-	subl	$1, %esi
-	movss	20(%esp), %xmm0
-	xorl	%eax, %eax
-	shrl	$2, %esi
-	xorl	%ecx, %ecx
-	addl	$1, %esi
-	unpcklps	%xmm2, %xmm0
-	movaps	%xmm0, %xmm2
-	movlhps	%xmm0, %xmm2
-	.p2align 4,,7
-	.p2align 3
-.L24:
-	movaps	16(%ebx,%eax), %xmm0
-	addl	$1, %ecx
-	movaps	(%ebx,%eax), %xmm1
-	mulps	%xmm2, %xmm0
-	mulps	%xmm2, %xmm1
-	addps	16(%edx,%eax), %xmm0
-	addps	(%edx,%eax), %xmm1
-	movaps	%xmm0, 16(%edx,%eax)
-	movaps	%xmm1, (%edx,%eax)
-	addl	$32, %eax
-	cmpl	%ecx, %esi
-	ja	.L24
-.L25:
-	popl	%ebx
-	popl	%esi
-	ret
-	.size	alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
-	.p2align 4,,15
-.globl alignedBufWetDryMixSSE
-	.type	alignedBufWetDryMixSSE, @function
-alignedBufWetDryMixSSE:
-	pushl	%esi
-	pushl	%ebx
-	movl	28(%esp), %esi
-	movl	12(%esp), %edx
-	movl	16(%esp), %ebx
-	testl	%esi, %esi
-	jle	.L30
-	movss	24(%esp), %xmm3
-	subl	$1, %esi
-	movss	20(%esp), %xmm2
-	xorl	%eax, %eax
-	shrl	$2, %esi
-	xorl	%ecx, %ecx
-	shufps	$0, %xmm3, %xmm3
-	addl	$1, %esi
-	shufps	$0, %xmm2, %xmm2
-	.p2align 4,,7
-	.p2align 3
-.L29:
-	movaps	16(%ebx,%eax), %xmm1
-	addl	$1, %ecx
-	movaps	16(%edx,%eax), %xmm0
-	mulps	%xmm2, %xmm1
-	movaps	(%ebx,%eax), %xmm4
-	mulps	%xmm3, %xmm0
-	mulps	%xmm2, %xmm4
-	addps	%xmm1, %xmm0
-	movaps	(%edx,%eax), %xmm1
-	mulps	%xmm3, %xmm1
-	movaps	%xmm0, 16(%edx,%eax)
-	addps	%xmm4, %xmm1
-	movaps	%xmm1, (%edx,%eax)
-	addl	$32, %eax
-	cmpl	%ecx, %esi
-	ja	.L29
-.L30:
-	popl	%ebx
-	popl	%esi
-	ret
-	.size	alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
-	.p2align 4,,15
-.globl alignedBufWetDryMixSplittedSSE
-	.type	alignedBufWetDryMixSplittedSSE, @function
-alignedBufWetDryMixSplittedSSE:
-	pushl	%ebp
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-	subl	$124, %esp
-	movl	164(%esp), %eax
-	movl	144(%esp), %edx
-	movl	148(%esp), %esi
-	movl	152(%esp), %ecx
-	testl	%eax, %eax
-	jle	.L39
-	movl	164(%esp), %eax
-	subl	$1, %eax
-	shrl	%eax
-	addl	$1, %eax
-	movl	%eax, %ebp
-	movl	%eax, 104(%esp)
-	shrl	$2, %ebp
-	cmpl	$3, 104(%esp)
-	leal	0(,%ebp,4), %eax
-	movl	%eax, 108(%esp)
-	jbe	.L40
-	testl	%eax, %eax
-	jne	.L34
-.L40:
-	xorl	%edi, %edi
-	jmp	.L36
-	.p2align 4,,7
-	.p2align 3
-.L34:
-	movss	160(%esp), %xmm0
-	xorps	%xmm7, %xmm7
-	movl	%esi, %ebx
-	xorl	%eax, %eax
-	xorl	%edi, %edi
-	shufps	$0, %xmm0, %xmm0
-	movaps	%xmm0, 16(%esp)
-	movss	156(%esp), %xmm0
-	shufps	$0, %xmm0, %xmm0
-	movaps	%xmm0, (%esp)
-	.p2align 4,,7
-	.p2align 3
-.L37:
-	movaps	(%edx,%eax,2), %xmm5
-	addl	$1, %edi
-	movaps	16(%edx,%eax,2), %xmm6
-	movaps	%xmm5, %xmm0
-	shufps	$136, %xmm6, %xmm0
-	movaps	32(%edx,%eax,2), %xmm4
-	shufps	$221, %xmm6, %xmm5
-	movaps	%xmm0, 80(%esp)
-	movaps	48(%edx,%eax,2), %xmm3
-	movaps	%xmm4, %xmm0
-	shufps	$136, %xmm3, %xmm0
-	movaps	80(%esp), %xmm2
-	shufps	$221, %xmm3, %xmm4
-	movaps	%xmm7, %xmm6
-	movlps	(%ebx), %xmm6
-	movaps	%xmm5, 64(%esp)
-	movhps	8(%ebx), %xmm6
-	shufps	$136, %xmm0, %xmm2
-	movaps	%xmm0, 48(%esp)
-	movaps	%xmm7, %xmm5
-	movaps	%xmm6, %xmm0
-	movlps	16(%ebx), %xmm5
-	movhps	24(%ebx), %xmm5
-	shufps	$136, %xmm5, %xmm0
-	mulps	16(%esp), %xmm2
-	shufps	$221, %xmm5, %xmm6
-	movaps	%xmm4, 32(%esp)
-	addl	$32, %ebx
-	mulps	(%esp), %xmm0
-	movaps	%xmm7, %xmm4
-	movlps	(%eax,%ecx), %xmm4
-	movaps	%xmm7, %xmm3
-	movhps	8(%eax,%ecx), %xmm4
-	movaps	%xmm4, %xmm1
-	movlps	16(%ecx,%eax), %xmm3
-	movhps	24(%ecx,%eax), %xmm3
-	shufps	$136, %xmm3, %xmm1
-	addps	%xmm0, %xmm2
-	movaps	64(%esp), %xmm0
-	shufps	$221, %xmm3, %xmm4
-	shufps	$136, 32(%esp), %xmm0
-	mulps	(%esp), %xmm1
-	movaps	%xmm2, %xmm3
-	movaps	64(%esp), %xmm5
-	mulps	16(%esp), %xmm0
-	shufps	$221, 32(%esp), %xmm5
-	mulps	(%esp), %xmm6
-	addps	%xmm1, %xmm0
-	movaps	80(%esp), %xmm1
-	shufps	$221, 48(%esp), %xmm1
-	mulps	(%esp), %xmm4
-	mulps	16(%esp), %xmm1
-	mulps	16(%esp), %xmm5
-	addps	%xmm6, %xmm1
-	addps	%xmm4, %xmm5
-	movaps	%xmm0, %xmm4
-	unpcklps	%xmm1, %xmm3
-	unpcklps	%xmm5, %xmm4
-	unpckhps	%xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	unpckhps	%xmm5, %xmm0
-	unpcklps	%xmm4, %xmm1
-	unpckhps	%xmm4, %xmm3
-	movaps	%xmm1, (%edx,%eax,2)
-	movaps	%xmm2, %xmm1
-	unpckhps	%xmm0, %xmm2
-	unpcklps	%xmm0, %xmm1
-	movaps	%xmm3, 16(%edx,%eax,2)
-	movaps	%xmm1, 32(%edx,%eax,2)
-	movaps	%xmm2, 48(%edx,%eax,2)
-	addl	$32, %eax
-	cmpl	%edi, %ebp
-	ja	.L37
-	movl	108(%esp), %edi
-	movl	104(%esp), %eax
-	addl	%edi, %edi
-	cmpl	%eax, 108(%esp)
-	je	.L39
-.L36:
-	movss	156(%esp), %xmm0
-	xorl	%ebp, %ebp
-	movss	160(%esp), %xmm1
-	movl	%edi, %eax
-	leal	(%edx,%edi,8), %ebx
-	leal	8(%edx,%edi,8), %edx
-	.p2align 4,,7
-	.p2align 3
-.L38:
-	movss	(%esi,%eax,4), %xmm3
-	addl	$2, %ebp
-	movss	(%ebx), %xmm2
-	mulss	%xmm0, %xmm3
-	mulss	%xmm1, %xmm2
-	addss	%xmm3, %xmm2
-	movss	%xmm2, (%ebx)
-	movss	4(%ebx), %xmm2
-	movss	(%ecx,%eax,4), %xmm3
-	mulss	%xmm1, %xmm2
-	mulss	%xmm0, %xmm3
-	addss	%xmm3, %xmm2
-	movss	%xmm2, 4(%ebx)
-	addl	$16, %ebx
-	movss	4(%esi,%eax,4), %xmm3
-	movss	(%edx), %xmm2
-	mulss	%xmm0, %xmm3
-	mulss	%xmm1, %xmm2
-	addss	%xmm3, %xmm2
-	movss	%xmm2, (%edx)
-	movss	4(%edx), %xmm2
-	movss	4(%ecx,%eax,4), %xmm3
-	mulss	%xmm1, %xmm2
-	leal	(%edi,%ebp), %eax
-	mulss	%xmm0, %xmm3
-	addss	%xmm3, %xmm2
-	movss	%xmm2, 4(%edx)
-	addl	$16, %edx
-	cmpl	%eax, 164(%esp)
-	jg	.L38
-.L39:
-	addl	$124, %esp
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	popl	%ebp
-	ret
-	.size	alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
-	.p2align 4,,15
-.globl unalignedBufMixLRCoeffSSE
-	.type	unalignedBufMixLRCoeffSSE, @function
-unalignedBufMixLRCoeffSSE:
-	pushl	%esi
-	pushl	%ebx
-	movl	28(%esp), %ebx
-	movl	12(%esp), %eax
-	movl	16(%esp), %edx
-	movss	20(%esp), %xmm1
-	movl	%ebx, %esi
-	shrl	$31, %esi
-	leal	(%ebx,%esi), %ecx
-	andl	$1, %ecx
-	cmpl	%esi, %ecx
-	movss	24(%esp), %xmm3
-	jne	.L52
-.L44:
-	testl	%ebx, %ebx
-	jle	.L49
-	testb	$15, %al
-	jne	.L46
-	movaps	%xmm1, %xmm0
-	subl	$1, %ebx
-	unpcklps	%xmm3, %xmm0
-	shrl	%ebx
-	xorps	%xmm2, %xmm2
-	movaps	%xmm0, %xmm3
-	addl	$1, %ebx
-	movlhps	%xmm0, %xmm3
-	xorl	%ecx, %ecx
-	.p2align 4,,7
-	.p2align 3
-.L47:
-	movaps	%xmm2, %xmm1
-	addl	$1, %ecx
-	movlps	(%edx), %xmm1
-	movhps	8(%edx), %xmm1
-	movaps	%xmm2, %xmm0
-	movlps	(%eax), %xmm0
-	movhps	8(%eax), %xmm0
-	addl	$16, %edx
-	mulps	%xmm3, %xmm1
-	addps	%xmm1, %xmm0
-	movaps	%xmm0, (%eax)
-	addl	$16, %eax
-	cmpl	%ebx, %ecx
-	jb	.L47
-.L49:
-	popl	%ebx
-	popl	%esi
-	ret
-	.p2align 4,,7
-	.p2align 3
-.L46:
-	xorl	%ecx, %ecx
-	.p2align 4,,7
-	.p2align 3
-.L48:
-	movss	(%edx,%ecx,8), %xmm0
-	mulss	%xmm1, %xmm0
-	addss	(%eax,%ecx,8), %xmm0
-	movss	%xmm0, (%eax,%ecx,8)
-	movss	4(%edx,%ecx,8), %xmm0
-	mulss	%xmm3, %xmm0
-	addss	4(%eax,%ecx,8), %xmm0
-	movss	%xmm0, 4(%eax,%ecx,8)
-	movss	8(%edx,%ecx,8), %xmm0
-	mulss	%xmm1, %xmm0
-	addss	8(%eax,%ecx,8), %xmm0
-	movss	%xmm0, 8(%eax,%ecx,8)
-	movss	12(%edx,%ecx,8), %xmm0
-	mulss	%xmm3, %xmm0
-	addss	12(%eax,%ecx,8), %xmm0
-	movss	%xmm0, 12(%eax,%ecx,8)
-	addl	$2, %ecx
-	cmpl	%ecx, %ebx
-	jg	.L48
-	popl	%ebx
-	popl	%esi
-	ret
-.L52:
-	movss	(%edx), %xmm0
-	subl	$1, %ebx
-	mulss	%xmm1, %xmm0
-	addss	(%eax), %xmm0
-	movss	%xmm0, (%eax)
-	movss	4(%edx), %xmm0
-	addl	$8, %edx
-	mulss	%xmm3, %xmm0
-	addss	4(%eax), %xmm0
-	movss	%xmm0, 4(%eax)
-	addl	$8, %eax
-	jmp	.L44
-	.size	unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
-	.ident	"GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
-	.section	.note.GNU-stack,"",@progbits
--- a/src/core/basic_ops_x86_sse2.s
+++ b/src/core/basic_ops_x86_sse2.s
@@ -1,349 +0,0 @@
-	.file	"basic_ops_x86.c"
-	.text
-	.p2align 4,,15
-.globl alignedMemCpySSE2
-	.type	alignedMemCpySSE2, @function
-alignedMemCpySSE2:
-	pushl	%esi
-	pushl	%ebx
-	movl	20(%esp), %esi
-	movl	12(%esp), %edx
-	movl	16(%esp), %ecx
-	shrl	$6, %esi
-	testl	%esi, %esi
-	je	.L4
-	xorl	%eax, %eax
-	xorl	%ebx, %ebx
-	.p2align 4,,7
-	.p2align 3
-.L3:
-	addl	$1, %ebx
-	movdqa	(%ecx,%eax), %xmm0
-	movdqa	%xmm0, (%edx,%eax)
-	movdqa	16(%ecx,%eax), %xmm0
-	movdqa	%xmm0, 16(%edx,%eax)
-	movdqa	32(%ecx,%eax), %xmm0
-	movdqa	%xmm0, 32(%edx,%eax)
-	movdqa	48(%ecx,%eax), %xmm0
-	movdqa	%xmm0, 48(%edx,%eax)
-	addl	$64, %eax
-	cmpl	%ebx, %esi
-	jne	.L3
-.L4:
-	popl	%ebx
-	popl	%esi
-	ret
-	.size	alignedMemCpySSE2, .-alignedMemCpySSE2
-	.p2align 4,,15
-.globl alignedMemClearSSE2
-	.type	alignedMemClearSSE2, @function
-alignedMemClearSSE2:
-	movl	8(%esp), %ecx
-	shrl	$6, %ecx
-	testl	%ecx, %ecx
-	je	.L10
-	movl	4(%esp), %eax
-	xorl	%edx, %edx
-	pxor	%xmm0, %xmm0
-	.p2align 4,,7
-	.p2align 3
-.L9:
-	addl	$1, %edx
-	movdqa	%xmm0, (%eax)
-	movdqa	%xmm0, 16(%eax)
-	movdqa	%xmm0, 32(%eax)
-	movdqa	%xmm0, 48(%eax)
-	addl	$64, %eax
-	cmpl	%edx, %ecx
-	jne	.L9
-.L10:
-	rep
-	ret
-	.size	alignedMemClearSSE2, .-alignedMemClearSSE2
-	.p2align 4,,15
-.globl alignedConvertToS16SSE2
-	.type	alignedConvertToS16SSE2, @function
-alignedConvertToS16SSE2:
-	pushl	%ebp
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-	subl	$8, %esp
-	movl	36(%esp), %eax
-	movss	.LC0, %xmm6
-	cmpb	$0, 44(%esp)
-	movl	28(%esp), %edx
-	movl	32(%esp), %ebx
-	movl	%eax, %esi
-	mulss	40(%esp), %xmm6
-	jne	.L13
-	testw	%ax, %ax
-	jle	.L15
-	movl	%eax, %edi
-	shrw	$2, %di
-	cmpw	$3, %ax
-	movw	%ax, 2(%esp)
-	leal	0(,%edi,4), %ebp
-	ja	.L33
-.L28:
-	xorl	%ebp, %ebp
-	.p2align 4,,7
-	.p2align 3
-.L23:
-	movswl	%bp,%eax
-	movl	$-32768, %edi
-	leal	(%edx,%eax,8), %edx
-	leal	(%ebx,%eax,4), %eax
-	movl	$32767, %ebx
-	.p2align 4,,7
-	.p2align 3
-.L25:
-	movss	(%edx), %xmm0
-	mulss	%xmm6, %xmm0
-	cvttss2si	%xmm0, %ecx
-	movss	4(%edx), %xmm0
-	cmpl	$-32768, %ecx
-	mulss	%xmm6, %xmm0
-	cmovl	%edi, %ecx
-	cmpl	$32767, %ecx
-	cmovg	%ebx, %ecx
-	movw	%cx, (%eax)
-	cvttss2si	%xmm0, %ecx
-	cmpl	$-32768, %ecx
-	cmovl	%edi, %ecx
-	cmpl	$32767, %ecx
-	cmovg	%ebx, %ecx
-	addl	$1, %ebp
-	movw	%cx, 2(%eax)
-	addl	$8, %edx
-	addl	$4, %eax
-	cmpw	%bp, %si
-	jg	.L25
-.L15:
-	movswl	%si,%esi
-	addl	$8, %esp
-	leal	0(,%esi,4), %eax
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	popl	%ebp
-	ret
-	.p2align 4,,7
-	.p2align 3
-.L13:
-	testw	%ax, %ax
-	jle	.L15
-	movl	%eax, %ebp
-	shrw	$2, %bp
-	cmpw	$3, %si
-	movw	%ax, 2(%esp)
-	leal	0(,%ebp,4), %eax
-	ja	.L34
-.L27:
-	xorl	%eax, %eax
-	.p2align 4,,7
-	.p2align 3
-.L18:
-	movswl	%ax,%edi
-	leal	(%edx,%edi,8), %ecx
-	leal	(%ebx,%edi,4), %edx
-	movl	$-32768, %edi
-	.p2align 4,,7
-	.p2align 3
-.L20:
-	movss	(%ecx), %xmm0
-	movl	$32767, %ebp
-	mulss	%xmm6, %xmm0
-	cvttss2si	%xmm0, %ebx
-	movss	4(%ecx), %xmm0
-	cmpl	$-32768, %ebx
-	cmovl	%edi, %ebx
-	cmpl	$32767, %ebx
-	mulss	%xmm6, %xmm0
-	cmovg	%ebp, %ebx
-	movzbl	%bh, %ebp
-	sall	$8, %ebx
-	orl	%ebp, %ebx
-	movl	$32767, %ebp
-	movw	%bx, (%edx)
-	cvttss2si	%xmm0, %ebx
-	cmpl	$-32768, %ebx
-	cmovl	%edi, %ebx
-	cmpl	$32767, %ebx
-	cmovg	%ebp, %ebx
-	addl	$1, %eax
-	movzbl	%bh, %ebp
-	addl	$8, %ecx
-	sall	$8, %ebx
-	orl	%ebp, %ebx
-	movw	%bx, 2(%edx)
-	addl	$4, %edx
-	cmpw	%ax, %si
-	jg	.L20
-	jmp	.L15
-	.p2align 4,,7
-	.p2align 3
-.L34:
-	testw	%ax, %ax
-	je	.L27
-	movaps	%xmm6, %xmm0
-	xorl	%ecx, %ecx
-	movdqa	.LC1, %xmm3
-	shufps	$0, %xmm0, %xmm0
-	movdqa	.LC2, %xmm2
-	movss	%xmm6, 4(%esp)
-	xorl	%edi, %edi
-	movaps	%xmm0, %xmm7
-	.p2align 4,,7
-	.p2align 3
-.L19:
-	movaps	(%edx,%ecx,2), %xmm0
-	movdqa	%xmm2, %xmm5
-	movdqa	%xmm2, %xmm6
-	addl	$1, %edi
-	movaps	16(%edx,%ecx,2), %xmm4
-	mulps	%xmm7, %xmm0
-	mulps	%xmm7, %xmm4
-	cvttps2dq	%xmm0, %xmm0
-	movdqa	%xmm0, %xmm1
-	pcmpgtd	%xmm3, %xmm1
-	pand	%xmm1, %xmm0
-	pandn	%xmm3, %xmm1
-	por	%xmm0, %xmm1
-	cvttps2dq	%xmm4, %xmm4
-	movdqa	%xmm1, %xmm0
-	pcmpgtd	%xmm2, %xmm0
-	pand	%xmm0, %xmm5
-	pandn	%xmm1, %xmm0
-	movdqa	%xmm0, %xmm1
-	movdqa	%xmm4, %xmm0
-	por	%xmm5, %xmm1
-	pcmpgtd	%xmm3, %xmm0
-	movdqa	.LC3, %xmm5
-	pand	%xmm0, %xmm4
-	pand	%xmm1, %xmm5
-	pandn	%xmm3, %xmm0
-	psrad	$8, %xmm5
-	por	%xmm4, %xmm0
-	pslld	$8, %xmm1
-	movdqa	%xmm0, %xmm4
-	pcmpgtd	%xmm2, %xmm4
-	pand	%xmm4, %xmm6
-	pandn	%xmm0, %xmm4
-	movdqa	%xmm4, %xmm0
-	movdqa	.LC3, %xmm4
-	por	%xmm6, %xmm0
-	pand	%xmm0, %xmm4
-	pslld	$8, %xmm0
-	psrad	$8, %xmm4
-	movdqa	%xmm5, %xmm6
-	punpcklwd	%xmm4, %xmm5
-	punpckhwd	%xmm4, %xmm6
-	movdqa	%xmm5, %xmm4
-	punpcklwd	%xmm6, %xmm5
-	punpckhwd	%xmm6, %xmm4
-	punpcklwd	%xmm4, %xmm5
-	movdqa	%xmm1, %xmm4
-	punpcklwd	%xmm0, %xmm1
-	punpckhwd	%xmm0, %xmm4
-	movdqa	%xmm1, %xmm6
-	punpcklwd	%xmm4, %xmm1
-	punpckhwd	%xmm4, %xmm6
-	punpcklwd	%xmm6, %xmm1
-	por	%xmm1, %xmm5
-	movdqa	%xmm5, (%ebx,%ecx)
-	addl	$16, %ecx
-	cmpw	%di, %bp
-	ja	.L19
-	cmpw	2(%esp), %ax
-	movss	4(%esp), %xmm6
-	jne	.L18
-	jmp	.L15
-	.p2align 4,,7
-	.p2align 3
-.L33:
-	testw	%bp, %bp
-	.p2align 4,,3
-	.p2align 3
-	je	.L28
-	movaps	%xmm6, %xmm0
-	xorl	%eax, %eax
-	movdqa	.LC1, %xmm3
-	shufps	$0, %xmm0, %xmm0
-	movdqa	.LC2, %xmm2
-	xorl	%ecx, %ecx
-	movaps	%xmm0, %xmm5
-	.p2align 4,,7
-	.p2align 3
-.L24:
-	movaps	(%edx,%eax,2), %xmm0
-	addl	$1, %ecx
-	movdqa	%xmm2, %xmm7
-	movaps	16(%edx,%eax,2), %xmm4
-	mulps	%xmm5, %xmm0
-	mulps	%xmm5, %xmm4
-	cvttps2dq	%xmm0, %xmm0
-	movdqa	%xmm0, %xmm1
-	pcmpgtd	%xmm3, %xmm1
-	pand	%xmm1, %xmm0
-	pandn	%xmm3, %xmm1
-	por	%xmm0, %xmm1
-	cvttps2dq	%xmm4, %xmm4
-	movdqa	%xmm1, %xmm0
-	pcmpgtd	%xmm2, %xmm0
-	pand	%xmm0, %xmm7
-	pandn	%xmm1, %xmm0
-	movdqa	%xmm0, %xmm1
-	movdqa	%xmm4, %xmm0
-	por	%xmm7, %xmm1
-	pcmpgtd	%xmm3, %xmm0
-	movdqa	%xmm2, %xmm7
-	pand	%xmm0, %xmm4
-	pandn	%xmm3, %xmm0
-	por	%xmm4, %xmm0
-	movdqa	%xmm0, %xmm4
-	pcmpgtd	%xmm2, %xmm4
-	pand	%xmm4, %xmm7
-	pandn	%xmm0, %xmm4
-	movdqa	%xmm4, %xmm0
-	movdqa	%xmm1, %xmm4
-	por	%xmm7, %xmm0
-	punpckhwd	%xmm0, %xmm4
-	punpcklwd	%xmm0, %xmm1
-	movdqa	%xmm1, %xmm0
-	punpcklwd	%xmm4, %xmm1
-	punpckhwd	%xmm4, %xmm0
-	punpcklwd	%xmm0, %xmm1
-	movdqa	%xmm1, (%ebx,%eax)
-	addl	$16, %eax
-	cmpw	%cx, %di
-	ja	.L24
-	cmpw	%bp, 2(%esp)
-	jne	.L23
-	jmp	.L15
-	.size	alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
-	.section	.rodata.cst4,"aM",@progbits,4
-	.align 4
-.LC0:
-	.long	1191181824
-	.section	.rodata.cst16,"aM",@progbits,16
-	.align 16
-.LC1:
-	.long	-32768
-	.long	-32768
-	.long	-32768
-	.long	-32768
-	.align 16
-.LC2:
-	.long	32767
-	.long	32767
-	.long	32767
-	.long	32767
-	.align 16
-.LC3:
-	.long	65280
-	.long	65280
-	.long	65280
-	.long	65280
-	.ident	"GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
-	.section	.note.GNU-stack,"",@progbits
--- a/src/core/fx_mixer.cpp
+++ b/src/core/fx_mixer.cpp
@@ -1,10 +1,8 @@
-#ifndef SINGLE_SOURCE_COMPILE
-
 /*
 * fx_mixer.cpp - effect-mixer for LMMS
 *
- * Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ * Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -28,7 +26,7 @@
 #include <QtXml/QDomElement>

 #include "fx_mixer.h"
-#include "basic_ops.h"
+#include "Cpu.h"
 #include "effect.h"
 #include "song.h"

@@ -39,7 +37,7 @@ fxChannel::fxChannel( model * _parent ) :
 	m_stillRunning( false ),
 	m_peakLeft( 0.0f ),
 	m_peakRight( 0.0f ),
-	m_buffer( alignedAllocFrames( engine::getMixer()->framesPerPeriod() ) ),
+	m_buffer( CPU::allocFrames( engine::getMixer()->framesPerPeriod() ) ),
 	m_muteModel( false, _parent ),
 	m_volumeModel( 1.0, 0.0, 2.0, 0.01, _parent ),
 	m_name(),
@@ -54,7 +52,7 @@ fxChannel::fxChannel( model * _parent ) :

 fxChannel::~fxChannel()
 {
-	alignedFreeFrames( m_buffer );
+	CPU::freeFrames( m_buffer );
 }


@@ -93,7 +91,8 @@ void fxMixer::mixToChannel( const sampleFrame * _buf, fx_ch_t _ch )
 	if( m_fxChannels[_ch]->m_muteModel.value() == false )
 	{
 		m_fxChannels[_ch]->m_lock.lock();
-		alignedBufMix( m_fxChannels[_ch]->m_buffer, _buf, engine::getMixer()->framesPerPeriod() );
+		CPU::bufMix( m_fxChannels[_ch]->m_buffer, _buf,
+						engine::getMixer()->framesPerPeriod() );
 		m_fxChannels[_ch]->m_used = true;
 		m_fxChannels[_ch]->m_lock.unlock();
 	}
@@ -248,4 +247,3 @@ void fxMixer::loadSettings( const QDomElement & _this )
 }


-#endif
--- a/src/core/main.cpp
+++ b/src/core/main.cpp
@@ -62,7 +62,7 @@
 #include "main_window.h"
 #include "project_renderer.h"
 #include "song.h"
-#include "basic_ops.h"
+#include "Cpu.h"

 // TODO Make a factory class for this (or hide it behind engine)
 #include "lmms_style.h"
@@ -96,8 +96,8 @@ int main( int argc, char * * argv )
 	// intialize RNG
 	srand( getpid() + time( 0 ) );

-	// init CPU specific optimized basic ops
-	initBasicOps();
+	// init CPU specific optimized operations
+	CPU::init();

 	bool core_only = false;
 	bool fullscreen = true;
--- a/src/core/mixer.cpp
+++ b/src/core/mixer.cpp
@@ -2,7 +2,7 @@
 * mixer.cpp - audio-device-independent mixer for LMMS
 *
 * Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
- * 
+ *
 * This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
@@ -22,7 +22,6 @@
 *
 */

-
 #include <math.h>

 #include "mixer.h"
@@ -41,7 +40,7 @@
 #include "sample_play_handle.h"
 #include "piano_roll.h"
 #include "micro_timer.h"
-#include "basic_ops.h"
+#include "Cpu.h"

 #include "audio_device.h"
 #include "midi_client.h"
@@ -125,7 +124,7 @@ public:

 	MixerWorkerThread( int _worker_num, mixer * _mixer ) :
 		QThread( _mixer ),
-		m_workingBuf( alignedAllocFrames( _mixer->framesPerPeriod() ) ),
+		m_workingBuf( CPU::allocFrames( _mixer->framesPerPeriod() ) ),
 		m_workerNum( _worker_num ),
 		m_quit( false ),
 		m_mixer( _mixer ),
@@ -135,7 +134,7 @@ public:

 	virtual ~MixerWorkerThread()
 	{
-		alignedFreeFrames( m_workingBuf );
+		CPU::freeFrames( m_workingBuf );
 	}

 	virtual void quit( void )
@@ -295,7 +294,7 @@ mixer::mixer( void ) :
 	{
 		m_inputBufferFrames[i] = 0;
 		m_inputBufferSize[i] = DEFAULT_BUFFER_SIZE * 100;
-		m_inputBuffer[i] = alignedAllocFrames( 
+		m_inputBuffer[i] = CPU::allocFrames( 
 						DEFAULT_BUFFER_SIZE * 100 );
 		clearAudioBuffer( m_inputBuffer[i], m_inputBufferSize[i] );
 	}
@@ -337,10 +336,10 @@ mixer::mixer( void ) :
 		m_fifo = new fifo( 1 );
 	}

-	m_workingBuf = alignedAllocFrames( m_framesPerPeriod );
+	m_workingBuf = CPU::allocFrames( m_framesPerPeriod );
 	for( Uint8 i = 0; i < 3; i++ )
 	{
-		m_readBuf = alignedAllocFrames( m_framesPerPeriod );
+		m_readBuf = CPU::allocFrames( m_framesPerPeriod );
 		clearAudioBuffer( m_readBuf, m_framesPerPeriod );
 		m_bufferPool.push_back( m_readBuf );
 	}
@@ -389,10 +388,10 @@ mixer::~mixer()

 	for( Uint8 i = 0; i < 3; i++ )
 	{
-		alignedFreeFrames( m_bufferPool[i] );
+		CPU::freeFrames( m_bufferPool[i] );
 	}

-	alignedFreeFrames( m_workingBuf );
+	CPU::freeFrames( m_workingBuf );
 }


@@ -504,9 +503,9 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
 	if( frames + _frames > size )
 	{
 		size = qMax( size * 2, frames + _frames );
-		sampleFrame * ab = alignedAllocFrames( size );
-		alignedMemCpy( ab, buf, frames * sizeof( sampleFrame ) );
-		alignedFreeFrames( buf );
+		sampleFrame * ab = CPU::allocFrames( size );
+		CPU::memCpy( ab, buf, frames * sizeof( sampleFrame ) );
+		CPU::freeFrames( buf );

 		m_inputBufferSize[ m_inputBufferWrite ] = size;
 		m_inputBuffer[ m_inputBufferWrite ] = ab;
@@ -514,7 +513,7 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
 		buf = ab;
 	}
 	
-	alignedMemCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
+	CPU::memCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
 	m_inputBufferFrames[ m_inputBufferWrite ] += _frames;
 	
 	unlockInputFrames();
@@ -686,7 +685,7 @@ void mixer::bufferToPort( const sampleFrame * _buf,
 	const int loop1_frame = qMin<int>( end_frame, m_framesPerPeriod );

 	_port->lockFirstBuffer();
-	unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame,
+	CPU::unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame,
 					_buf, _vv.vol[0], _vv.vol[1],
 						loop1_frame - start_frame );
 	_port->unlockFirstBuffer();
@@ -697,7 +696,7 @@ void mixer::bufferToPort( const sampleFrame * _buf,
 		const int frames_done = m_framesPerPeriod - start_frame;
 		end_frame -= m_framesPerPeriod;
 		end_frame = qMin<int>( end_frame, m_framesPerPeriod );
-		unalignedBufMixLRCoeff( _port->secondBuffer(),
+		CPU::unalignedBufMixLRCoeff( _port->secondBuffer(),
 						_buf+frames_done,
 						_vv.vol[0], _vv.vol[1],
 						end_frame );
@@ -720,7 +719,7 @@ void mixer::clearAudioBuffer( sampleFrame * _ab, const f_cnt_t _frames,
 {
 	if( likely( (size_t)( _ab+_offset ) % 16 == 0 && _frames % 8 == 0 ) )
 	{
-		alignedMemClear( _ab+_offset, sizeof( *_ab ) * _frames );
+		CPU::memClear( _ab+_offset, sizeof( *_ab ) * _frames );
 	}
 	else
 	{
@@ -1157,9 +1156,9 @@ void mixer::fifoWriter::run( void )
 	const fpp_t frames = m_mixer->framesPerPeriod();
 	while( m_writing )
 	{
-		sampleFrameA * buffer = alignedAllocFrames( frames );
+		sampleFrameA * buffer = CPU::allocFrames( frames );
 		const sampleFrameA * b = m_mixer->renderNextBuffer();
-		alignedMemCpy( buffer, b, frames * sizeof( sampleFrameA ) );
+		CPU::memCpy( buffer, b, frames * sizeof( sampleFrameA ) );
 		m_fifo->write( buffer );
 	}