CPU: new framework for optimized CPU-specific routines
The new CPU framework replaces the old BasicOps framework. It is more flexible and the build process isn't such a mess anymore (pre-compiled assembler files etc.). It will hopefully see some improvements and extensions soon. Signed-off-by: Tobias Doerffel <tobias.doerffel@gmail.com>
This commit is contained in:
@@ -454,47 +454,42 @@ SET(LMMS_ER_H ${CMAKE_CURRENT_BINARY_DIR}/embedded_resources.h)
|
||||
ADD_FILE_DEPENDENCIES(${CMAKE_BINARY_DIR}/lmmsconfig.h ${lmms_MOC_out})
|
||||
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${LMMS_ER_H} COMMAND ${BIN2RES} ARGS ${lmms_EMBEDDED_RESOURCES} > ${LMMS_ER_H} DEPENDS ${BIN2RES})
|
||||
SET(BASIC_OPS_X86_C "${CMAKE_SOURCE_DIR}/src/core/basic_ops_x86.c")
|
||||
|
||||
# build CPU specific optimized modules
|
||||
IF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)
|
||||
|
||||
ADD_CUSTOM_TARGET(regen-basic-ops)
|
||||
|
||||
IF(LMMS_HOST_X86)
|
||||
SET(opt_targets mmx sse sse2)
|
||||
SET(host_arch x86)
|
||||
ELSE(LMMS_HOST_X86)
|
||||
SET(opt_targets sse sse2)
|
||||
SET(host_arch x86_64)
|
||||
ENDIF(LMMS_HOST_X86)
|
||||
IF(LMMS_HOST_X86)
|
||||
SET(opt_targets mmx sse sse2)
|
||||
ELSE(LMMS_HOST_X86)
|
||||
SET(opt_targets sse sse2)
|
||||
ENDIF(LMMS_HOST_X86)
|
||||
|
||||
FOREACH(opt_target ${opt_targets})
|
||||
|
||||
STRING(TOUPPER ${opt_target} OPT_TARGET)
|
||||
|
||||
SET(BASIC_OPS_X86_TARGET_S "${CMAKE_SOURCE_DIR}/src/core/basic_ops_${host_arch}_${opt_target}.s")
|
||||
SET(BASIC_OPS_X86_TARGET_O "${CMAKE_BINARY_DIR}/basic_ops_${host_arch}_${opt_target}.o")
|
||||
SET(BASIC_OPS_X86_TARGET_S "")
|
||||
SET(CPU_X86_C "${CMAKE_SOURCE_DIR}/src/core/CpuX86.c")
|
||||
SET(CPU_X86_TARGET_O "${CMAKE_BINARY_DIR}/CpuX86_${opt_target}.o")
|
||||
SET(FPMATH_FLAGS "")
|
||||
IF(NOT "${OPT_TARGET}" STREQUAL "MMX")
|
||||
SET(FPMATH_FLAGS "-mfpmath=sse")
|
||||
ENDIF(NOT "${OPT_TARGET}" STREQUAL "MMX")
|
||||
IF(EXISTS "$ENV{SVN_C_COMPILER}")
|
||||
SET(C_COMPILER $ENV{SVN_C_COMPILER})
|
||||
ELSE(EXISTS "$ENV{SVN_C_COMPILER}")
|
||||
SET(C_COMPILER ${CMAKE_C_COMPILER})
|
||||
ENDIF(EXISTS "$ENV{SVN_C_COMPILER}")
|
||||
IF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc")
|
||||
SET(CMAKE_C_COMPILER_ARG1 gcc)
|
||||
ENDIF("${CMAKE_C_COMPILER_ARG1}" STREQUAL " gcc")
|
||||
ADD_CUSTOM_TARGET(regen-basic-ops-${opt_target} COMMAND ${C_COMPILER} -O2 -fno-stack-protector -ftree-vectorize -ftree-vectorizer-verbose=2 -fomit-frame-pointer -c -S -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS} -o ${BASIC_OPS_X86_TARGET_S} ${BASIC_OPS_X86_C} DEPENDS ${BASIC_OPS_X86_C})
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${BASIC_OPS_X86_TARGET_O} COMMAND ${CMAKE_C_COMPILER} ARGS ${CMAKE_C_COMPILER_ARG1} ${BASIC_OPS_X86_TARGET_S} -c -o ${BASIC_OPS_X86_TARGET_O} DEPENDS ${BASIC_OPS_X86_TARGET_S})
|
||||
ADD_DEPENDENCIES(regen-basic-ops regen-basic-ops-${opt_target})
|
||||
SET(opt_target_objects ${opt_target_objects} ${BASIC_OPS_X86_TARGET_O})
|
||||
SET(COMPILE_CMD ${CMAKE_C_COMPILER} ${CPU_X86_C} -O2 -fno-stack-protector -ftree-vectorize -fomit-frame-pointer -c -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS})
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${CPU_X86_TARGET_O} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O} DEPENDS ${CPU_X86_C})
|
||||
ADD_CUSTOM_TARGET(debug-${opt_target} COMMAND ${COMPILE_CMD} -o ${CPU_X86_TARGET_O}.s -S -ftree-vectorizer-verbose=2)
|
||||
SET(cpu_objects ${cpu_objects} ${CPU_X86_TARGET_O})
|
||||
|
||||
ENDFOREACH(opt_target ${opt_targets})
|
||||
SET(lmms_SOURCES ${lmms_SOURCES} ${opt_target_objects})
|
||||
# to be used by maintainer with special ultra-optimizing super duper GCC
|
||||
|
||||
SET(lmms_SOURCES ${lmms_SOURCES} ${cpu_objects})
|
||||
|
||||
ENDIF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)
|
||||
|
||||
|
||||
IF(WIN32)
|
||||
SET(WINRC "${CMAKE_BINARY_DIR}/lmmsrc.obj")
|
||||
IF(LMMS_HOST_X86_64)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
/*
|
||||
* basic_ops.h - basic memory operations
|
||||
* Cpu.h - CPU specific accellerated operations
|
||||
*
|
||||
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -22,9 +22,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _BASIC_OPS_H
|
||||
#define _BASIC_OPS_H
|
||||
#ifndef _CPU_H
|
||||
#define _CPU_H
|
||||
|
||||
#include "lmms_basics.h"
|
||||
|
||||
@@ -32,56 +31,64 @@
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
void initBasicOps( void );
|
||||
#ifdef __cplusplus
|
||||
namespace CPU
|
||||
{
|
||||
#endif
|
||||
|
||||
void * alignedMalloc( int _bytes );
|
||||
void alignedFree( void * _buf );
|
||||
void init();
|
||||
|
||||
sampleFrameA * alignedAllocFrames( int _frames );
|
||||
void alignedFreeFrames( sampleFrameA * _buf );
|
||||
void * memAlloc( int _bytes );
|
||||
void memFree( void * _buf );
|
||||
|
||||
sampleFrameA * allocFrames( int _frames );
|
||||
void freeFrames( sampleFrameA * _buf );
|
||||
|
||||
|
||||
// all aligned* functions assume data to be 16 byte aligned and size to be
|
||||
// multiples of 64
|
||||
typedef void (*alignedMemCpyFunc)( void * RP _dst, const void * RP _src,
|
||||
// all functions assume data to be 16 byte and size to be
|
||||
// multiples of 64 (except for unaligned*())
|
||||
typedef void (*MemCpyFunc)( void * RP _dst, const void * RP _src,
|
||||
int _size );
|
||||
typedef void (*alignedMemClearFunc)( void * RP _dst, int _size );
|
||||
typedef void (*alignedBufApplyGainFunc)( sampleFrameA * RP _dst,
|
||||
typedef void (*MemClearFunc)( void * RP _dst, int _size );
|
||||
typedef void (*BufApplyGainFunc)( sampleFrameA * RP _dst,
|
||||
float _gain, int _frames );
|
||||
typedef void (*alignedBufMixFunc)( sampleFrameA * RP _dst,
|
||||
typedef void (*BufMixFunc)( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
int _frames );
|
||||
typedef void (*alignedBufMixLRCoeffFunc)( sampleFrameA * RP _dst,
|
||||
typedef void (*BufMixLRCoeffFunc)( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _left, float _right,
|
||||
int _frames );
|
||||
typedef void (*unalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst,
|
||||
typedef void (*UnalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst,
|
||||
const sampleFrame * RP _src,
|
||||
float _left, float _right,
|
||||
int _frames );
|
||||
typedef void (*alignedBufWetDryMixFunc)( sampleFrameA * RP _dst,
|
||||
typedef void (*BufWetDryMixFunc)( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _wet, float _dry, int _frames );
|
||||
typedef void (*alignedBufWetDryMixSplittedFunc)( sampleFrameA * RP _dst,
|
||||
typedef void (*BufWetDryMixSplittedFunc)( sampleFrameA * RP _dst,
|
||||
const float * RP _left,
|
||||
const float * RP _right,
|
||||
float _wet, float _dry, int _frames );
|
||||
typedef int (*alignedConvertToS16Func)( const sampleFrameA * RP _src,
|
||||
typedef int (*ConvertToS16Func)( const sampleFrameA * RP _src,
|
||||
intSampleFrameA * RP _dst,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
const bool _convert_endian );
|
||||
|
||||
extern alignedMemCpyFunc alignedMemCpy;
|
||||
extern alignedMemClearFunc alignedMemClear;
|
||||
extern alignedBufApplyGainFunc alignedBufApplyGain;
|
||||
extern alignedBufMixFunc alignedBufMix;
|
||||
extern alignedBufMixLRCoeffFunc alignedBufMixLRCoeff;
|
||||
extern unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff;
|
||||
extern alignedBufWetDryMixFunc alignedBufWetDryMix;
|
||||
extern alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted;
|
||||
extern alignedConvertToS16Func alignedConvertToS16;
|
||||
extern MemCpyFunc memCpy;
|
||||
extern MemClearFunc memClear;
|
||||
extern BufApplyGainFunc bufApplyGain;
|
||||
extern BufMixFunc bufMix;
|
||||
extern BufMixLRCoeffFunc bufMixLRCoeff;
|
||||
extern UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff;
|
||||
extern BufWetDryMixFunc bufWetDryMix;
|
||||
extern BufWetDryMixSplittedFunc bufWetDryMixSplitted;
|
||||
extern ConvertToS16Func convertToS16;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef LMMS_HOST_X86
|
||||
#define X86_OPTIMIZATIONS
|
||||
@@ -1,8 +1,8 @@
|
||||
/*
|
||||
* audio_dummy.h - dummy-audio-device
|
||||
*
|
||||
* Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -22,12 +22,11 @@
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _AUDIO_DUMMY_H
|
||||
#define _AUDIO_DUMMY_H
|
||||
|
||||
#include "audio_device.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
#include "micro_timer.h"
|
||||
|
||||
|
||||
@@ -45,7 +44,7 @@ public:
|
||||
stopProcessing();
|
||||
}
|
||||
|
||||
inline static QString name( void )
|
||||
inline static QString name()
|
||||
{
|
||||
return( QT_TRANSLATE_NOOP( "setupWidget",
|
||||
"Dummy (no sound output)" ) );
|
||||
@@ -64,11 +63,11 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
virtual void saveSettings( void )
|
||||
virtual void saveSettings()
|
||||
{
|
||||
}
|
||||
|
||||
virtual void show( void )
|
||||
virtual void show()
|
||||
{
|
||||
parentWidget()->hide();
|
||||
QWidget::show();
|
||||
@@ -78,12 +77,12 @@ public:
|
||||
|
||||
|
||||
private:
|
||||
virtual void startProcessing( void )
|
||||
virtual void startProcessing()
|
||||
{
|
||||
start();
|
||||
}
|
||||
|
||||
virtual void stopProcessing( void )
|
||||
virtual void stopProcessing()
|
||||
{
|
||||
if( isRunning() )
|
||||
{
|
||||
@@ -92,7 +91,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
virtual void run( void )
|
||||
virtual void run()
|
||||
{
|
||||
microTimer timer;
|
||||
while( true )
|
||||
@@ -104,7 +103,7 @@ private:
|
||||
{
|
||||
break;
|
||||
}
|
||||
alignedFreeFrames( b );
|
||||
CPU::freeFrames( b );
|
||||
|
||||
const Sint32 microseconds = static_cast<Sint32>(
|
||||
getMixer()->framesPerPeriod() *
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* Copyright (c) 2006-2008 Danny McRae <khjklujn/at/users.sourceforge.net>
|
||||
* Copyright (c) 2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -23,7 +23,6 @@
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <QtGui/QMessageBox>
|
||||
|
||||
#include "ladspa_effect.h"
|
||||
@@ -35,7 +34,7 @@
|
||||
#include "ladspa_subplugin_features.h"
|
||||
#include "mixer.h"
|
||||
#include "effect_chain.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
#include "automation_pattern.h"
|
||||
#include "controller_connection.h"
|
||||
|
||||
@@ -82,7 +81,7 @@ ladspaEffect::ladspaEffect( model * _parent,
|
||||
arg( m_key.second ),
|
||||
QMessageBox::Ok, QMessageBox::NoButton );
|
||||
}
|
||||
setOkay( FALSE );
|
||||
setOkay( false );
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -105,7 +104,7 @@ ladspaEffect::~ladspaEffect()
|
||||
|
||||
|
||||
|
||||
void ladspaEffect::changeSampleRate( void )
|
||||
void ladspaEffect::changeSampleRate()
|
||||
{
|
||||
multimediaProject mmp( multimediaProject::EffectSettings );
|
||||
m_controls->saveState( mmp, mmp.content() );
|
||||
@@ -141,7 +140,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
if( !isOkay() || dontRun() || !isRunning() || !isEnabled() )
|
||||
{
|
||||
m_pluginMutex.unlock();
|
||||
return( FALSE );
|
||||
return false;
|
||||
}
|
||||
|
||||
int frames = _frames;
|
||||
@@ -150,7 +149,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
if( m_maxSampleRate < engine::getMixer()->processingSampleRate() )
|
||||
{
|
||||
o_buf = _buf;
|
||||
_buf = alignedAllocFrames( _frames );
|
||||
_buf = CPU::allocFrames( _frames );
|
||||
sampleDown( o_buf, _buf, m_maxSampleRate );
|
||||
frames = _frames * m_maxSampleRate /
|
||||
engine::getMixer()->processingSampleRate();
|
||||
@@ -258,7 +257,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
}
|
||||
if( channel >= 1 && channel <= DEFAULT_CHANNELS )
|
||||
{
|
||||
alignedBufWetDryMixSplitted( _buf, buffers[0], buffers[1],
|
||||
CPU::bufWetDryMixSplitted( _buf, buffers[0], buffers[1],
|
||||
getWetLevel(), getDryLevel(), frames );
|
||||
}
|
||||
|
||||
@@ -272,7 +271,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
if( o_buf != NULL )
|
||||
{
|
||||
sampleBack( _buf, o_buf, m_maxSampleRate );
|
||||
alignedFreeFrames( _buf );
|
||||
CPU::freeFrames( _buf );
|
||||
}
|
||||
|
||||
checkGate( out_sum / frames );
|
||||
@@ -280,7 +279,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
|
||||
bool is_running = isRunning();
|
||||
m_pluginMutex.unlock();
|
||||
return( is_running );
|
||||
return is_running;
|
||||
}
|
||||
|
||||
|
||||
@@ -298,7 +297,7 @@ void ladspaEffect::setControl( int _control, LADSPA_Data _value )
|
||||
|
||||
|
||||
|
||||
void ladspaEffect::pluginInstantiation( void )
|
||||
void ladspaEffect::pluginInstantiation()
|
||||
{
|
||||
m_maxSampleRate = maxSamplerate( displayName() );
|
||||
|
||||
@@ -469,7 +468,7 @@ void ladspaEffect::pluginInstantiation( void )
|
||||
QMessageBox::warning( 0, "Effect",
|
||||
"Can't get LADSPA descriptor function: " + m_key.second,
|
||||
QMessageBox::Ok, QMessageBox::NoButton );
|
||||
setOkay( FALSE );
|
||||
setOkay( false );
|
||||
return;
|
||||
}
|
||||
if( m_descriptor->run == NULL )
|
||||
@@ -477,7 +476,7 @@ void ladspaEffect::pluginInstantiation( void )
|
||||
QMessageBox::warning( 0, "Effect",
|
||||
"Plugin has no processor: " + m_key.second,
|
||||
QMessageBox::Ok, QMessageBox::NoButton );
|
||||
setDontRun( TRUE );
|
||||
setDontRun( true );
|
||||
}
|
||||
for( ch_cnt_t proc = 0; proc < getProcessorCount(); proc++ )
|
||||
{
|
||||
@@ -488,7 +487,7 @@ void ladspaEffect::pluginInstantiation( void )
|
||||
QMessageBox::warning( 0, "Effect",
|
||||
"Can't get LADSPA instance: " + m_key.second,
|
||||
QMessageBox::Ok, QMessageBox::NoButton );
|
||||
setOkay( FALSE );
|
||||
setOkay( false );
|
||||
return;
|
||||
}
|
||||
m_handles.append( effect );
|
||||
@@ -508,7 +507,7 @@ void ladspaEffect::pluginInstantiation( void )
|
||||
QMessageBox::warning( 0, "Effect",
|
||||
"Failed to connect port: " + m_key.second,
|
||||
QMessageBox::Ok, QMessageBox::NoButton );
|
||||
setDontRun( TRUE );
|
||||
setDontRun( true );
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -525,7 +524,7 @@ void ladspaEffect::pluginInstantiation( void )
|
||||
|
||||
|
||||
|
||||
void ladspaEffect::pluginDestruction( void )
|
||||
void ladspaEffect::pluginDestruction()
|
||||
{
|
||||
if( !isOkay() )
|
||||
{
|
||||
@@ -571,9 +570,9 @@ sample_rate_t ladspaEffect::maxSamplerate( const QString & _name )
|
||||
}
|
||||
if( __buggy_plugins.contains( _name ) )
|
||||
{
|
||||
return( __buggy_plugins[_name] );
|
||||
return __buggy_plugins[_name];
|
||||
}
|
||||
return( engine::getMixer()->processingSampleRate() );
|
||||
return engine::getMixer()->processingSampleRate();
|
||||
}
|
||||
|
||||
|
||||
@@ -585,9 +584,9 @@ extern "C"
|
||||
// neccessary for getting instance out of shared lib
|
||||
plugin * PLUGIN_EXPORT lmms_plugin_main( model * _parent, void * _data )
|
||||
{
|
||||
return( new ladspaEffect( _parent,
|
||||
return new ladspaEffect( _parent,
|
||||
static_cast<const plugin::descriptor::subPluginFeatures::key *>(
|
||||
_data ) ) );
|
||||
_data ) );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
/*
|
||||
* basic_ops.cpp - basic memory operations
|
||||
* Cpu.cpp - CPU specific accellerated operations
|
||||
*
|
||||
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -23,17 +23,19 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <memory.h>
|
||||
|
||||
|
||||
|
||||
void * alignedMalloc( int _bytes )
|
||||
namespace CPU
|
||||
{
|
||||
char *ptr,*ptr2,*aligned_ptr;
|
||||
|
||||
void * memAlloc( int _bytes )
|
||||
{
|
||||
char *ptr,*ptr2,*_ptr;
|
||||
int align_mask = ALIGN_SIZE- 1;
|
||||
ptr =(char *) malloc( _bytes + ALIGN_SIZE + sizeof(int) );
|
||||
if( ptr == NULL )
|
||||
@@ -42,17 +44,19 @@ void * alignedMalloc( int _bytes )
|
||||
}
|
||||
|
||||
ptr2 = ptr + sizeof(int);
|
||||
aligned_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );
|
||||
_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );
|
||||
|
||||
|
||||
ptr2 = aligned_ptr - sizeof(int);
|
||||
*((int *) ptr2) = (int)( aligned_ptr - ptr );
|
||||
ptr2 = _ptr - sizeof(int);
|
||||
*((int *) ptr2) = (int)( _ptr - ptr );
|
||||
|
||||
return aligned_ptr;
|
||||
return _ptr;
|
||||
}
|
||||
|
||||
|
||||
void alignedFree( void * _buf )
|
||||
|
||||
|
||||
void memFree( void * _buf )
|
||||
{
|
||||
if( _buf )
|
||||
{
|
||||
@@ -66,22 +70,26 @@ void alignedFree( void * _buf )
|
||||
}
|
||||
|
||||
|
||||
sampleFrameA * alignedAllocFrames( int _n )
|
||||
|
||||
|
||||
sampleFrameA * allocFrames( int _n )
|
||||
{
|
||||
return (sampleFrameA *) alignedMalloc( _n * sizeof( sampleFrameA ) );
|
||||
return (sampleFrameA *) memAlloc( _n * sizeof( sampleFrameA ) );
|
||||
}
|
||||
|
||||
|
||||
void alignedFreeFrames( sampleFrame * _buf )
|
||||
|
||||
|
||||
void freeFrames( sampleFrame * _buf )
|
||||
{
|
||||
alignedFree( _buf );
|
||||
memFree( _buf );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// slow fallback
|
||||
void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
|
||||
void memCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
const int s = _size / sizeof( int );
|
||||
const int * RP src = (const int *) _src;
|
||||
@@ -110,7 +118,7 @@ void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
|
||||
|
||||
|
||||
// slow fallback
|
||||
void alignedMemClearNoOpt( void * _dst, int _size )
|
||||
void memClearNoOpt( void * _dst, int _size )
|
||||
{
|
||||
const int s = _size / ( sizeof( int ) * 4 );
|
||||
int * dst = (int *) _dst;
|
||||
@@ -126,7 +134,7 @@ void alignedMemClearNoOpt( void * _dst, int _size )
|
||||
|
||||
|
||||
|
||||
void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
|
||||
void bufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
|
||||
int _frames )
|
||||
{
|
||||
for( int i = 0; i < _frames; )
|
||||
@@ -152,7 +160,7 @@ void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
|
||||
}
|
||||
|
||||
|
||||
void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
void bufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
int _frames )
|
||||
{
|
||||
for( int i = 0; i < _frames; )
|
||||
@@ -171,7 +179,7 @@ void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
|
||||
|
||||
|
||||
void alignedBufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
|
||||
void bufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _left, float _right, int _frames )
|
||||
{
|
||||
@@ -217,7 +225,7 @@ void unalignedBufMixLRCoeffNoOpt( sampleFrame * RP _dst,
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst,
|
||||
void bufWetDryMixNoOpt( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _wet, float _dry, int _frames )
|
||||
{
|
||||
@@ -231,7 +239,7 @@ void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst,
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
|
||||
void bufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
|
||||
const float * RP _left,
|
||||
const float * RP _right,
|
||||
float _wet, float _dry, int _frames )
|
||||
@@ -248,7 +256,7 @@ void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
|
||||
|
||||
|
||||
|
||||
int alignedConvertToS16NoOpt( const sampleFrameA * RP _src,
|
||||
int convertToS16NoOpt( const sampleFrameA * RP _src,
|
||||
intSampleFrameA * RP _dst,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
@@ -294,15 +302,15 @@ int alignedConvertToS16NoOpt( const sampleFrameA * RP _src,
|
||||
}
|
||||
|
||||
|
||||
alignedMemCpyFunc alignedMemCpy = alignedMemCpyNoOpt;
|
||||
alignedMemClearFunc alignedMemClear = alignedMemClearNoOpt;
|
||||
alignedBufApplyGainFunc alignedBufApplyGain = alignedBufApplyGainNoOpt;
|
||||
alignedBufMixFunc alignedBufMix = alignedBufMixNoOpt;
|
||||
alignedBufMixLRCoeffFunc alignedBufMixLRCoeff = alignedBufMixLRCoeffNoOpt;
|
||||
unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
|
||||
alignedBufWetDryMixFunc alignedBufWetDryMix = alignedBufWetDryMixNoOpt;
|
||||
alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted = alignedBufWetDryMixSplittedNoOpt;
|
||||
alignedConvertToS16Func alignedConvertToS16 = alignedConvertToS16NoOpt;
|
||||
MemCpyFunc memCpy = memCpyNoOpt;
|
||||
MemClearFunc memClear = memClearNoOpt;
|
||||
BufApplyGainFunc bufApplyGain = bufApplyGainNoOpt;
|
||||
BufMixFunc bufMix = bufMixNoOpt;
|
||||
BufMixLRCoeffFunc bufMixLRCoeff = bufMixLRCoeffNoOpt;
|
||||
UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
|
||||
BufWetDryMixFunc bufWetDryMix = bufWetDryMixNoOpt;
|
||||
BufWetDryMixSplittedFunc bufWetDryMixSplitted = bufWetDryMixSplittedNoOpt;
|
||||
ConvertToS16Func convertToS16 = convertToS16NoOpt;
|
||||
|
||||
|
||||
#ifdef X86_OPTIMIZATIONS
|
||||
@@ -322,28 +330,28 @@ enum CPUFeatures
|
||||
extern "C"
|
||||
{
|
||||
#ifdef LMMS_HOST_X86
|
||||
void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size );
|
||||
void alignedMemClearMMX( void * RP _dst, int _size );
|
||||
void memCpyMMX( void * RP _dst, const void * RP _src, int _size );
|
||||
void memClearMMX( void * RP _dst, int _size );
|
||||
#endif
|
||||
void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size );
|
||||
void alignedMemClearSSE( void * RP _dst, int _size );
|
||||
void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
|
||||
void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
|
||||
void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
|
||||
void memCpySSE( void * RP _dst, const void * RP _src, int _size );
|
||||
void memClearSSE( void * RP _dst, int _size );
|
||||
void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
|
||||
void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
|
||||
void bufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
|
||||
void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _src, const float _left, const float _right, int _frames );
|
||||
void alignedBufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
|
||||
void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
|
||||
void bufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
|
||||
void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
|
||||
#ifdef X86_OPTIMIZATIONS
|
||||
void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size );
|
||||
void alignedMemClearSSE2( void * RP _dst, int _size );
|
||||
int alignedConvertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
|
||||
void memCpySSE2( void * RP _dst, const void * RP _src, int _size );
|
||||
void memClearSSE2( void * RP _dst, int _size );
|
||||
int convertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
|
||||
#endif
|
||||
} ;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void initBasicOps( void )
|
||||
void init()
|
||||
{
|
||||
#ifdef X86_OPTIMIZATIONS
|
||||
static bool extensions_checked = false;
|
||||
@@ -428,29 +436,29 @@ void initBasicOps( void )
|
||||
#ifdef LMMS_HOST_X86
|
||||
if( features & MMX )
|
||||
{
|
||||
alignedMemCpy = alignedMemCpyMMX;
|
||||
alignedMemClear = alignedMemClearMMX;
|
||||
memCpy = memCpyMMX;
|
||||
memClear = memClearMMX;
|
||||
}
|
||||
#endif
|
||||
if( features & SSE )
|
||||
{
|
||||
fprintf( stderr, "Using SSE optimized routines\n" );
|
||||
alignedMemCpy = alignedMemCpySSE;
|
||||
alignedMemClear = alignedMemClearSSE;
|
||||
alignedBufApplyGain = alignedBufApplyGainSSE;
|
||||
alignedBufMix = alignedBufMixSSE;
|
||||
alignedBufMixLRCoeff = alignedBufMixLRCoeffSSE;
|
||||
memCpy = memCpySSE;
|
||||
memClear = memClearSSE;
|
||||
bufApplyGain = bufApplyGainSSE;
|
||||
bufMix = bufMixSSE;
|
||||
bufMixLRCoeff = bufMixLRCoeffSSE;
|
||||
unalignedBufMixLRCoeff = unalignedBufMixLRCoeffSSE;
|
||||
alignedBufWetDryMix = alignedBufWetDryMixSSE;
|
||||
alignedBufWetDryMixSplitted =
|
||||
alignedBufWetDryMixSplittedSSE;
|
||||
bufWetDryMix = bufWetDryMixSSE;
|
||||
bufWetDryMixSplitted =
|
||||
bufWetDryMixSplittedSSE;
|
||||
}
|
||||
if( features & SSE2 )
|
||||
{
|
||||
fprintf( stderr, "Using SSE2 optimized routines\n" );
|
||||
alignedMemCpy = alignedMemCpySSE2;
|
||||
alignedMemClear = alignedMemClearSSE2;
|
||||
alignedConvertToS16 = alignedConvertToS16SSE2;
|
||||
memCpy = memCpySSE2;
|
||||
memClear = memClearSSE2;
|
||||
convertToS16 = convertToS16SSE2;
|
||||
}
|
||||
extensions_checked = true;
|
||||
}
|
||||
@@ -458,4 +466,5 @@ void initBasicOps( void )
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
/*
|
||||
* basic_ops_x86.c - x86 specific optimized operations
|
||||
* cpu_x86.c - x86 specific optimized operations
|
||||
*
|
||||
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -22,9 +22,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
#ifdef X86_OPTIMIZATIONS
|
||||
|
||||
@@ -32,7 +30,7 @@
|
||||
|
||||
#include <mmintrin.h>
|
||||
|
||||
void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size )
|
||||
void memCpyMMX( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
const int s = _size / ( sizeof( __m64 ) * 8 );
|
||||
int i;
|
||||
@@ -79,7 +77,7 @@ void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size )
|
||||
|
||||
|
||||
|
||||
void alignedMemClearMMX( void * RP _dst, int _size )
|
||||
void memClearMMX( void * RP _dst, int _size )
|
||||
{
|
||||
__m64 * dst = (__m64 *) _dst;
|
||||
const int s = _size / ( sizeof( *dst ) * 8 );
|
||||
@@ -109,7 +107,7 @@ void alignedMemClearMMX( void * RP _dst, int _size )
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size )
|
||||
void memCpySSE( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
__m128 * dst = (__m128 *) _dst;
|
||||
__m128 * src = (__m128 *) _src;
|
||||
@@ -133,7 +131,7 @@ void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size )
|
||||
|
||||
|
||||
|
||||
void alignedMemClearSSE( void * RP _dst, int _size )
|
||||
void memClearSSE( void * RP _dst, int _size )
|
||||
{
|
||||
__m128 * dst = (__m128 *) _dst;
|
||||
const int s = _size / ( sizeof( *dst ) * 4 );
|
||||
@@ -152,7 +150,7 @@ void alignedMemClearSSE( void * RP _dst, int _size )
|
||||
|
||||
|
||||
|
||||
void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
|
||||
void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < _frames; )
|
||||
@@ -180,7 +178,7 @@ void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
|
||||
|
||||
|
||||
|
||||
void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
int _frames )
|
||||
{
|
||||
int i;
|
||||
@@ -209,7 +207,7 @@ void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
|
||||
|
||||
|
||||
void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst,
|
||||
void bufMixLRCoeffSSE( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _left, float _right, int _frames )
|
||||
{
|
||||
@@ -257,7 +255,7 @@ void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _s
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixSSE( sampleFrameA * RP _dst,
|
||||
void bufWetDryMixSSE( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _wet, float _dry, int _frames )
|
||||
{
|
||||
@@ -279,7 +277,7 @@ void alignedBufWetDryMixSSE( sampleFrameA * RP _dst,
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
|
||||
void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
|
||||
const float * RP _left,
|
||||
const float * RP _right,
|
||||
float _wet, float _dry, int _frames )
|
||||
@@ -304,7 +302,7 @@ void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size )
|
||||
void memCpySSE2( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
__m128i * dst = (__m128i *) _dst;
|
||||
__m128i * src = (__m128i *) _src;
|
||||
@@ -324,7 +322,7 @@ void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size )
|
||||
|
||||
|
||||
|
||||
void alignedMemClearSSE2( void * RP _dst, int _size )
|
||||
void memClearSSE2( void * RP _dst, int _size )
|
||||
{
|
||||
__m128i * dst = (__m128i *) _dst;
|
||||
const int s = _size / ( sizeof( *dst ) * 4 );
|
||||
@@ -342,7 +340,7 @@ void alignedMemClearSSE2( void * RP _dst, int _size )
|
||||
|
||||
|
||||
|
||||
int alignedConvertToS16SSE2( const sampleFrameA * RP _src,
|
||||
int convertToS16SSE2( const sampleFrameA * RP _src,
|
||||
intSampleFrameA * RP _dst,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
@@ -1,10 +1,8 @@
|
||||
#ifndef SINGLE_SOURCE_COMPILE
|
||||
|
||||
/*
|
||||
* audio_alsa.cpp - device-class which implements ALSA-PCM-output
|
||||
*
|
||||
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -25,7 +23,6 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <QtGui/QLineEdit>
|
||||
#include <QtGui/QLabel>
|
||||
|
||||
@@ -39,7 +36,7 @@
|
||||
#include "lcd_spinbox.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
|
||||
|
||||
@@ -230,10 +227,10 @@ void audioALSA::applyQualitySettings( void )
|
||||
|
||||
void audioALSA::run( void )
|
||||
{
|
||||
sampleFrameA * temp = alignedAllocFrames(
|
||||
sampleFrameA * temp = CPU::allocFrames(
|
||||
getMixer()->framesPerPeriod() );
|
||||
intSampleFrameA * outbuf = (intSampleFrameA *)
|
||||
alignedMalloc( sizeof( intSampleFrameA ) * channels() /
|
||||
CPU::memAlloc( sizeof( intSampleFrameA ) * channels() /
|
||||
DEFAULT_CHANNELS * getMixer()->framesPerPeriod() );
|
||||
|
||||
int_sample_t * pcmbuf = new int_sample_t[m_periodSize * channels()];
|
||||
@@ -261,7 +258,7 @@ void audioALSA::run( void )
|
||||
}
|
||||
outbuf_size = frames * channels();
|
||||
|
||||
alignedConvertToS16( temp, outbuf, frames,
|
||||
CPU::convertToS16( temp, outbuf, frames,
|
||||
getMixer()->masterGain(),
|
||||
m_convertEndian );
|
||||
}
|
||||
@@ -300,8 +297,8 @@ void audioALSA::run( void )
|
||||
}
|
||||
}
|
||||
|
||||
alignedFreeFrames( temp );
|
||||
alignedFree( outbuf );
|
||||
CPU::freeFrames( temp );
|
||||
CPU::memFree( outbuf );
|
||||
delete[] pcmbuf;
|
||||
}
|
||||
|
||||
@@ -526,5 +523,3 @@ void audioALSA::setupWidget::saveSettings( void )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
#ifndef SINGLE_SOURCE_COMPILE
|
||||
|
||||
/*
|
||||
* audio_device.cpp - base-class for audio-devices used by LMMS-mixer
|
||||
*
|
||||
* Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -25,13 +23,10 @@
|
||||
*/
|
||||
|
||||
|
||||
#include <cstring>
|
||||
|
||||
|
||||
#include "audio_device.h"
|
||||
#include "config_mgr.h"
|
||||
#include "debug.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
|
||||
|
||||
@@ -40,7 +35,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
|
||||
m_sampleRate( _mixer->processingSampleRate() ),
|
||||
m_channels( _channels ),
|
||||
m_mixer( _mixer ),
|
||||
m_buffer( alignedAllocFrames( getMixer()->framesPerPeriod() ) )
|
||||
m_buffer( CPU::allocFrames( getMixer()->framesPerPeriod() ) )
|
||||
{
|
||||
int error;
|
||||
if( ( m_srcState = src_new(
|
||||
@@ -57,7 +52,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
|
||||
audioDevice::~audioDevice()
|
||||
{
|
||||
src_delete( m_srcState );
|
||||
alignedFreeFrames( m_buffer );
|
||||
CPU::freeFrames( m_buffer );
|
||||
|
||||
m_devMutex.tryLock();
|
||||
unlock();
|
||||
@@ -104,7 +99,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab )
|
||||
}
|
||||
else
|
||||
{
|
||||
alignedMemCpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
|
||||
CPU::memCpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
|
||||
}
|
||||
|
||||
// release lock
|
||||
@@ -112,7 +107,7 @@ fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab )
|
||||
|
||||
if( getMixer()->hasFifoWriter() )
|
||||
{
|
||||
alignedFreeFrames( b );
|
||||
CPU::freeFrames( b );
|
||||
}
|
||||
|
||||
return frames;
|
||||
@@ -200,7 +195,7 @@ void audioDevice::resample( const sampleFrame * _src, const fpp_t _frames,
|
||||
|
||||
void audioDevice::clearS16Buffer( intSampleFrameA * _outbuf, const fpp_t _frames )
|
||||
{
|
||||
alignedMemClear( _outbuf, _frames * sizeof( *_outbuf ) );
|
||||
CPU::memClear( _outbuf, _frames * sizeof( *_outbuf ) );
|
||||
// memset( _outbuf, 0, _frames * channels() * BYTES_PER_INT_SAMPLE );
|
||||
}
|
||||
|
||||
@@ -213,5 +208,3 @@ bool audioDevice::hqAudio( void ) const
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
#ifndef SINGLE_SOURCE_COMPILE
|
||||
|
||||
/*
|
||||
* audio_file_wave.cpp - audio-device which encodes wave-stream and writes it
|
||||
* into a WAVE-file. This is used for song-export.
|
||||
*
|
||||
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -26,10 +24,9 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "audio_file_wave.h"
|
||||
#include "endian_handling.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
@@ -89,14 +86,14 @@ void audioFileWave::writeBuffer( const surroundSampleFrame * _ab,
|
||||
{
|
||||
if( depth() == 16 )
|
||||
{
|
||||
intSampleFrameA * buf = (intSampleFrameA *) alignedMalloc(
|
||||
intSampleFrameA * buf = (intSampleFrameA *) CPU::memAlloc(
|
||||
sizeof( intSampleFrameA ) * _frames );
|
||||
|
||||
alignedConvertToS16( _ab, buf, _frames, _master_gain,
|
||||
CPU::convertToS16( _ab, buf, _frames, _master_gain,
|
||||
!isLittleEndian() );
|
||||
|
||||
sf_writef_short( m_sf, (int_sample_t *) buf, _frames );
|
||||
alignedFree( buf );
|
||||
CPU::memFree( buf );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -123,4 +120,3 @@ void audioFileWave::finishEncoding( void )
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
#include "lcd_spinbox.h"
|
||||
#include "audio_port.h"
|
||||
#include "main_window.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ audioJACK::audioJACK( bool & _success_ful, mixer * _mixer ) :
|
||||
m_client( NULL ),
|
||||
m_active( false ),
|
||||
m_stopSemaphore( 1 ),
|
||||
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_framesDoneInCurBuf( 0 ),
|
||||
m_framesToDoInCurBuf( 0 )
|
||||
{
|
||||
@@ -93,7 +93,7 @@ audioJACK::~audioJACK()
|
||||
jack_client_close( m_client );
|
||||
}
|
||||
|
||||
alignedFreeFrames( m_outBuf );
|
||||
CPU::freeFrames( m_outBuf );
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
#ifndef SINGLE_SOURCE_COMPILE
|
||||
|
||||
/*
|
||||
* audio_oss.cpp - device-class that implements OSS-PCM-output
|
||||
*
|
||||
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -39,7 +37,7 @@
|
||||
#include "engine.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
#ifdef LMMS_HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
@@ -299,10 +297,10 @@ void audioOSS::applyQualitySettings( void )
|
||||
|
||||
void audioOSS::run( void )
|
||||
{
|
||||
sampleFrameA * temp = alignedAllocFrames(
|
||||
sampleFrameA * temp = CPU::allocFrames(
|
||||
getMixer()->framesPerPeriod() );
|
||||
intSampleFrameA * outbuf = (intSampleFrameA *)
|
||||
alignedMalloc( sizeof( intSampleFrameA ) *
|
||||
CPU::memAlloc( sizeof( intSampleFrameA ) *
|
||||
getMixer()->framesPerPeriod() );
|
||||
|
||||
while( 1 )
|
||||
@@ -313,7 +311,7 @@ void audioOSS::run( void )
|
||||
break;
|
||||
}
|
||||
|
||||
int bytes = alignedConvertToS16( temp, outbuf, frames,
|
||||
int bytes = CPU::convertToS16( temp, outbuf, frames,
|
||||
getMixer()->masterGain(),
|
||||
m_convertEndian );
|
||||
if( write( m_audioFD, outbuf, bytes ) != bytes )
|
||||
@@ -322,8 +320,8 @@ void audioOSS::run( void )
|
||||
}
|
||||
}
|
||||
|
||||
alignedFreeFrames( temp );
|
||||
alignedFree( outbuf );
|
||||
CPU::freeFrames( temp );
|
||||
CPU::memFree( outbuf );
|
||||
}
|
||||
|
||||
|
||||
@@ -374,5 +372,3 @@ void audioOSS::setupWidget::saveSettings( void )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -26,14 +26,14 @@
|
||||
#include "audio_device.h"
|
||||
#include "effect_chain.h"
|
||||
#include "engine.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
|
||||
audioPort::audioPort( const QString & _name, bool _has_effect_chain ) :
|
||||
m_bufferUsage( NoUsage ),
|
||||
m_firstBuffer( alignedAllocFrames(
|
||||
m_firstBuffer( CPU::allocFrames(
|
||||
engine::getMixer()->framesPerPeriod() ) ),
|
||||
m_secondBuffer( alignedAllocFrames(
|
||||
m_secondBuffer( CPU::allocFrames(
|
||||
engine::getMixer()->framesPerPeriod() ) ),
|
||||
m_extOutputEnabled( false ),
|
||||
m_nextFxChannel( 0 ),
|
||||
@@ -55,8 +55,8 @@ audioPort::~audioPort()
|
||||
{
|
||||
setExtOutputEnabled( false );
|
||||
engine::getMixer()->removeAudioPort( this );
|
||||
alignedFreeFrames( m_firstBuffer );
|
||||
alignedFreeFrames( m_secondBuffer );
|
||||
CPU::freeFrames( m_firstBuffer );
|
||||
CPU::freeFrames( m_secondBuffer );
|
||||
delete m_effects;
|
||||
}
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ audioPortAudio::audioPortAudio( bool & _success_ful, mixer * _mixer ) :
|
||||
DEFAULT_CHANNELS, SURROUND_CHANNELS ),
|
||||
_mixer ),
|
||||
m_wasPAInitError( false ),
|
||||
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_outBufPos( 0 ),
|
||||
m_stopSemaphore( 1 )
|
||||
{
|
||||
@@ -206,7 +206,7 @@ audioPortAudio::~audioPortAudio()
|
||||
{
|
||||
Pa_Terminate();
|
||||
}
|
||||
alignedFreeFrames( m_outBuf );
|
||||
CPU::freeFrames( m_outBuf );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
#ifndef SINGLE_SOURCE_COMPILE
|
||||
|
||||
/*
|
||||
* audio_pulseaudio.cpp - device-class which implements PulseAudio-output
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -25,7 +23,6 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <QtGui/QLineEdit>
|
||||
#include <QtGui/QLabel>
|
||||
|
||||
@@ -40,7 +37,7 @@
|
||||
#include "lcd_spinbox.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
|
||||
static void stream_write_callback(pa_stream *s, size_t length, void *userdata)
|
||||
@@ -231,7 +228,7 @@ void audioPulseAudio::run( void )
|
||||
void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
|
||||
{
|
||||
const fpp_t fpp = getMixer()->framesPerPeriod();
|
||||
sampleFrameA * temp = alignedAllocFrames( fpp );
|
||||
sampleFrameA * temp = CPU::allocFrames( fpp );
|
||||
Sint16 * pcmbuf = (Sint16*)pa_xmalloc( fpp * channels() *
|
||||
sizeof(Sint16) );
|
||||
|
||||
@@ -243,7 +240,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int bytes = alignedConvertToS16( temp,
|
||||
int bytes = CPU::convertToS16( temp,
|
||||
(intSampleFrameA *) pcmbuf,
|
||||
frames,
|
||||
getMixer()->masterGain(),
|
||||
@@ -257,7 +254,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
|
||||
}
|
||||
|
||||
pa_xfree( pcmbuf );
|
||||
alignedFreeFrames( temp );
|
||||
CPU::freeFrames( temp );
|
||||
}
|
||||
|
||||
|
||||
@@ -308,5 +305,3 @@ void audioPulseAudio::setupWidget::saveSettings( void )
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
#ifndef SINGLE_SOURCE_COMPILE
|
||||
|
||||
/*
|
||||
* audio_sdl.cpp - device-class that performs PCM-output via SDL
|
||||
*
|
||||
* Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -25,7 +23,6 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "audio_sdl.h"
|
||||
|
||||
#ifdef LMMS_HAVE_SDL
|
||||
@@ -38,13 +35,13 @@
|
||||
#include "config_mgr.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
|
||||
|
||||
audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) :
|
||||
audioDevice( DEFAULT_CHANNELS, _mixer ),
|
||||
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_outBuf( CPU::allocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_convertedBufPos( 0 ),
|
||||
m_convertEndian( false ),
|
||||
m_stopSemaphore( 1 )
|
||||
@@ -53,7 +50,7 @@ audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) :
|
||||
|
||||
m_convertedBufSize = getMixer()->framesPerPeriod() *
|
||||
sizeof( intSampleFrameA );
|
||||
m_convertedBuf = (intSampleFrameA *) alignedMalloc( m_convertedBufSize );
|
||||
m_convertedBuf = (intSampleFrameA *) CPU::memAlloc( m_convertedBufSize );
|
||||
|
||||
|
||||
if( SDL_Init( SDL_INIT_AUDIO | SDL_INIT_NOPARACHUTE ) < 0 )
|
||||
@@ -97,8 +94,8 @@ audioSDL::~audioSDL()
|
||||
|
||||
SDL_CloseAudio();
|
||||
SDL_Quit();
|
||||
alignedFree( m_convertedBuf );
|
||||
alignedFreeFrames( m_outBuf );
|
||||
CPU::memFree( m_convertedBuf );
|
||||
CPU::freeFrames( m_outBuf );
|
||||
}
|
||||
|
||||
|
||||
@@ -192,7 +189,7 @@ void audioSDL::sdlAudioCallback( Uint8 * _buf, int _len )
|
||||
}
|
||||
m_convertedBufSize = frames * sizeof( intSampleFrameA );
|
||||
|
||||
alignedConvertToS16( m_outBuf,
|
||||
CPU::convertToS16( m_outBuf,
|
||||
m_convertedBuf,
|
||||
frames,
|
||||
getMixer()->masterGain(),
|
||||
@@ -243,4 +240,3 @@ void audioSDL::setupWidget::saveSettings( void )
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,555 +0,0 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.align 16
|
||||
.globl alignedMemCpySSE
|
||||
.type alignedMemCpySSE, @function
|
||||
alignedMemCpySSE:
|
||||
.LFB509:
|
||||
movslq %edx,%rdx
|
||||
shrq $6, %rdx
|
||||
testl %edx, %edx
|
||||
jle .L4
|
||||
subl $1, %edx
|
||||
xorl %eax, %eax
|
||||
addq $1, %rdx
|
||||
salq $6, %rdx
|
||||
.align 16
|
||||
.L3:
|
||||
movaps (%rsi,%rax), %xmm0
|
||||
movaps %xmm0, (%rdi,%rax)
|
||||
movaps 16(%rsi,%rax), %xmm0
|
||||
movaps %xmm0, 16(%rdi,%rax)
|
||||
movaps 32(%rsi,%rax), %xmm0
|
||||
movaps %xmm0, 32(%rdi,%rax)
|
||||
movaps 48(%rsi,%rax), %xmm0
|
||||
movaps %xmm0, 48(%rdi,%rax)
|
||||
addq $64, %rax
|
||||
cmpq %rdx, %rax
|
||||
jne .L3
|
||||
.L4:
|
||||
rep
|
||||
ret
|
||||
.LFE509:
|
||||
.size alignedMemCpySSE, .-alignedMemCpySSE
|
||||
.align 16
|
||||
.globl alignedMemClearSSE
|
||||
.type alignedMemClearSSE, @function
|
||||
alignedMemClearSSE:
|
||||
.LFB510:
|
||||
movslq %esi,%rsi
|
||||
shrq $6, %rsi
|
||||
testl %esi, %esi
|
||||
jle .L10
|
||||
subl $1, %esi
|
||||
xorps %xmm0, %xmm0
|
||||
salq $6, %rsi
|
||||
leaq 64(%rdi,%rsi), %rax
|
||||
.align 16
|
||||
.L9:
|
||||
movaps %xmm0, (%rdi)
|
||||
movaps %xmm0, 16(%rdi)
|
||||
movaps %xmm0, 32(%rdi)
|
||||
movaps %xmm0, 48(%rdi)
|
||||
addq $64, %rdi
|
||||
cmpq %rax, %rdi
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.LFE510:
|
||||
.size alignedMemClearSSE, .-alignedMemClearSSE
|
||||
.align 16
|
||||
.globl alignedBufApplyGainSSE
|
||||
.type alignedBufApplyGainSSE, @function
|
||||
alignedBufApplyGainSSE:
|
||||
.LFB511:
|
||||
testl %esi, %esi
|
||||
jle .L15
|
||||
leal -1(%rsi), %edx
|
||||
shufps $0, %xmm0, %xmm0
|
||||
xorl %eax, %eax
|
||||
shrl $3, %edx
|
||||
addl $1, %edx
|
||||
.align 16
|
||||
.L14:
|
||||
movaps 16(%rdi), %xmm3
|
||||
addl $1, %eax
|
||||
movaps 32(%rdi), %xmm2
|
||||
mulps %xmm0, %xmm3
|
||||
movaps 48(%rdi), %xmm1
|
||||
mulps %xmm0, %xmm2
|
||||
movaps (%rdi), %xmm4
|
||||
mulps %xmm0, %xmm1
|
||||
movaps %xmm3, 16(%rdi)
|
||||
mulps %xmm0, %xmm4
|
||||
movaps %xmm2, 32(%rdi)
|
||||
movaps %xmm1, 48(%rdi)
|
||||
movaps %xmm4, (%rdi)
|
||||
addq $64, %rdi
|
||||
cmpl %eax, %edx
|
||||
ja .L14
|
||||
.L15:
|
||||
rep
|
||||
ret
|
||||
.LFE511:
|
||||
.size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
|
||||
.align 16
|
||||
.globl alignedBufMixSSE
|
||||
.type alignedBufMixSSE, @function
|
||||
alignedBufMixSSE:
|
||||
.LFB512:
|
||||
testl %edx, %edx
|
||||
jle .L20
|
||||
leal -1(%rdx), %ecx
|
||||
xorl %eax, %eax
|
||||
xorl %edx, %edx
|
||||
shrl $3, %ecx
|
||||
addl $1, %ecx
|
||||
.align 16
|
||||
.L19:
|
||||
movaps 16(%rdi,%rax), %xmm2
|
||||
addl $1, %edx
|
||||
movaps 32(%rdi,%rax), %xmm1
|
||||
addps 16(%rsi,%rax), %xmm2
|
||||
movaps 48(%rdi,%rax), %xmm0
|
||||
addps 32(%rsi,%rax), %xmm1
|
||||
movaps (%rdi,%rax), %xmm3
|
||||
addps 48(%rsi,%rax), %xmm0
|
||||
addps (%rsi,%rax), %xmm3
|
||||
movaps %xmm2, 16(%rdi,%rax)
|
||||
movaps %xmm1, 32(%rdi,%rax)
|
||||
movaps %xmm0, 48(%rdi,%rax)
|
||||
movaps %xmm3, (%rdi,%rax)
|
||||
addq $64, %rax
|
||||
cmpl %edx, %ecx
|
||||
ja .L19
|
||||
.L20:
|
||||
rep
|
||||
ret
|
||||
.LFE512:
|
||||
.size alignedBufMixSSE, .-alignedBufMixSSE
|
||||
.align 16
|
||||
.globl alignedBufMixLRCoeffSSE
|
||||
.type alignedBufMixLRCoeffSSE, @function
|
||||
alignedBufMixLRCoeffSSE:
|
||||
.LFB513:
|
||||
testl %edx, %edx
|
||||
jle .L25
|
||||
unpcklps %xmm1, %xmm0
|
||||
leal -1(%rdx), %ecx
|
||||
xorl %eax, %eax
|
||||
xorl %edx, %edx
|
||||
shrl $2, %ecx
|
||||
movlhps %xmm0, %xmm0
|
||||
addl $1, %ecx
|
||||
.align 16
|
||||
.L24:
|
||||
movaps 16(%rsi,%rax), %xmm2
|
||||
addl $1, %edx
|
||||
movaps (%rsi,%rax), %xmm3
|
||||
mulps %xmm0, %xmm2
|
||||
mulps %xmm0, %xmm3
|
||||
addps 16(%rdi,%rax), %xmm2
|
||||
addps (%rdi,%rax), %xmm3
|
||||
movaps %xmm2, 16(%rdi,%rax)
|
||||
movaps %xmm3, (%rdi,%rax)
|
||||
addq $32, %rax
|
||||
cmpl %edx, %ecx
|
||||
ja .L24
|
||||
.L25:
|
||||
rep
|
||||
ret
|
||||
.LFE513:
|
||||
.size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
|
||||
.align 16
|
||||
.globl alignedBufWetDryMixSSE
|
||||
.type alignedBufWetDryMixSSE, @function
|
||||
alignedBufWetDryMixSSE:
|
||||
.LFB515:
|
||||
testl %edx, %edx
|
||||
jle .L30
|
||||
leal -1(%rdx), %ecx
|
||||
shufps $0, %xmm1, %xmm1
|
||||
shufps $0, %xmm0, %xmm0
|
||||
xorl %eax, %eax
|
||||
shrl $2, %ecx
|
||||
xorl %edx, %edx
|
||||
addl $1, %ecx
|
||||
.align 16
|
||||
.L29:
|
||||
movaps 16(%rsi,%rax), %xmm3
|
||||
addl $1, %edx
|
||||
movaps 16(%rdi,%rax), %xmm2
|
||||
mulps %xmm0, %xmm3
|
||||
movaps (%rsi,%rax), %xmm4
|
||||
mulps %xmm1, %xmm2
|
||||
mulps %xmm0, %xmm4
|
||||
addps %xmm3, %xmm2
|
||||
movaps (%rdi,%rax), %xmm3
|
||||
mulps %xmm1, %xmm3
|
||||
movaps %xmm2, 16(%rdi,%rax)
|
||||
addps %xmm4, %xmm3
|
||||
movaps %xmm3, (%rdi,%rax)
|
||||
addq $32, %rax
|
||||
cmpl %edx, %ecx
|
||||
ja .L29
|
||||
.L30:
|
||||
rep
|
||||
ret
|
||||
.LFE515:
|
||||
.size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
|
||||
.align 16
|
||||
.globl alignedBufWetDryMixSplittedSSE
|
||||
.type alignedBufWetDryMixSplittedSSE, @function
|
||||
alignedBufWetDryMixSplittedSSE:
|
||||
.LFB516:
|
||||
pushq %rbp
|
||||
.LCFI0:
|
||||
testl %ecx, %ecx
|
||||
pushq %rbx
|
||||
.LCFI1:
|
||||
jle .L39
|
||||
leal -1(%rcx), %ebx
|
||||
shrl %ebx
|
||||
addl $1, %ebx
|
||||
movl %ebx, %r11d
|
||||
shrl $2, %r11d
|
||||
cmpl $3, %ebx
|
||||
leal 0(,%r11,4), %ebp
|
||||
jbe .L40
|
||||
testl %ebp, %ebp
|
||||
jne .L34
|
||||
.L40:
|
||||
xorl %r9d, %r9d
|
||||
jmp .L36
|
||||
.align 16
|
||||
.L34:
|
||||
movaps %xmm1, %xmm2
|
||||
movq %rdi, %rax
|
||||
xorps %xmm10, %xmm10
|
||||
movq %rsi, %r9
|
||||
shufps $0, %xmm2, %xmm2
|
||||
movq %rdx, %r8
|
||||
xorl %r10d, %r10d
|
||||
movaps %xmm2, %xmm12
|
||||
movaps %xmm0, %xmm2
|
||||
shufps $0, %xmm2, %xmm2
|
||||
movaps %xmm2, %xmm11
|
||||
.align 16
|
||||
.L37:
|
||||
movaps (%rax), %xmm2
|
||||
addl $1, %r10d
|
||||
movaps %xmm10, %xmm9
|
||||
movaps 16(%rax), %xmm5
|
||||
movaps %xmm2, %xmm4
|
||||
movlps (%r9), %xmm9
|
||||
movaps %xmm10, %xmm8
|
||||
movaps 32(%rax), %xmm14
|
||||
shufps $136, %xmm5, %xmm4
|
||||
movhps 8(%r9), %xmm9
|
||||
movaps 48(%rax), %xmm3
|
||||
movaps %xmm14, %xmm15
|
||||
movlps 16(%r9), %xmm8
|
||||
shufps $221, %xmm5, %xmm2
|
||||
shufps $136, %xmm3, %xmm15
|
||||
movhps 24(%r9), %xmm8
|
||||
shufps $221, %xmm3, %xmm14
|
||||
movaps %xmm4, %xmm5
|
||||
addq $32, %r9
|
||||
movaps %xmm9, %xmm3
|
||||
shufps $136, %xmm15, %xmm5
|
||||
movaps %xmm10, %xmm7
|
||||
shufps $136, %xmm8, %xmm3
|
||||
movlps (%r8), %xmm7
|
||||
movaps %xmm10, %xmm6
|
||||
mulps %xmm12, %xmm5
|
||||
movhps 8(%r8), %xmm7
|
||||
mulps %xmm11, %xmm3
|
||||
movlps 16(%r8), %xmm6
|
||||
movaps %xmm7, %xmm13
|
||||
movhps 24(%r8), %xmm6
|
||||
shufps $221, %xmm15, %xmm4
|
||||
shufps $221, %xmm8, %xmm9
|
||||
addq $32, %r8
|
||||
shufps $136, %xmm6, %xmm13
|
||||
addps %xmm3, %xmm5
|
||||
movaps %xmm2, %xmm3
|
||||
shufps $221, %xmm6, %xmm7
|
||||
shufps $136, %xmm14, %xmm3
|
||||
shufps $221, %xmm14, %xmm2
|
||||
mulps %xmm11, %xmm13
|
||||
movaps %xmm5, %xmm6
|
||||
mulps %xmm12, %xmm3
|
||||
mulps %xmm12, %xmm4
|
||||
mulps %xmm11, %xmm9
|
||||
addps %xmm13, %xmm3
|
||||
mulps %xmm12, %xmm2
|
||||
mulps %xmm11, %xmm7
|
||||
addps %xmm9, %xmm4
|
||||
addps %xmm7, %xmm2
|
||||
unpcklps %xmm4, %xmm6
|
||||
unpckhps %xmm4, %xmm5
|
||||
movaps %xmm3, %xmm4
|
||||
unpcklps %xmm2, %xmm4
|
||||
unpckhps %xmm2, %xmm3
|
||||
movaps %xmm6, %xmm2
|
||||
unpcklps %xmm4, %xmm2
|
||||
unpckhps %xmm4, %xmm6
|
||||
movaps %xmm2, (%rax)
|
||||
movaps %xmm5, %xmm2
|
||||
unpckhps %xmm3, %xmm5
|
||||
unpcklps %xmm3, %xmm2
|
||||
movaps %xmm6, 16(%rax)
|
||||
movaps %xmm2, 32(%rax)
|
||||
movaps %xmm5, 48(%rax)
|
||||
addq $64, %rax
|
||||
cmpl %r10d, %r11d
|
||||
ja .L37
|
||||
cmpl %ebx, %ebp
|
||||
leal (%rbp,%rbp), %r9d
|
||||
je .L39
|
||||
.L36:
|
||||
movslq %r9d,%rax
|
||||
leaq 1(%rax), %rbx
|
||||
leaq 0(,%rax,4), %r10
|
||||
leaq (%rdi,%rax,8), %r8
|
||||
leaq (%rdi,%rbx,8), %rax
|
||||
salq $2, %rbx
|
||||
leaq (%rsi,%r10), %r11
|
||||
leaq (%rdx,%r10), %r10
|
||||
addq %rbx, %rsi
|
||||
addq %rbx, %rdx
|
||||
.align 16
|
||||
.L38:
|
||||
movss (%r11), %xmm3
|
||||
addl $2, %r9d
|
||||
movss (%r8), %xmm2
|
||||
mulss %xmm0, %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
addq $8, %r11
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, (%r8)
|
||||
movss 4(%r8), %xmm2
|
||||
movss (%r10), %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
addq $8, %r10
|
||||
mulss %xmm0, %xmm3
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, 4(%r8)
|
||||
addq $16, %r8
|
||||
movss (%rsi), %xmm3
|
||||
addq $8, %rsi
|
||||
movss (%rax), %xmm2
|
||||
mulss %xmm0, %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, (%rax)
|
||||
movss 4(%rax), %xmm2
|
||||
movss (%rdx), %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
addq $8, %rdx
|
||||
mulss %xmm0, %xmm3
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, 4(%rax)
|
||||
addq $16, %rax
|
||||
cmpl %r9d, %ecx
|
||||
jg .L38
|
||||
.L39:
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
ret
|
||||
.LFE516:
|
||||
.size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
|
||||
.align 16
|
||||
.globl unalignedBufMixLRCoeffSSE
|
||||
.type unalignedBufMixLRCoeffSSE, @function
|
||||
unalignedBufMixLRCoeffSSE:
|
||||
.LFB514:
|
||||
movl %edx, %ecx
|
||||
shrl $31, %ecx
|
||||
leal (%rdx,%rcx), %eax
|
||||
andl $1, %eax
|
||||
cmpl %ecx, %eax
|
||||
jne .L52
|
||||
.L44:
|
||||
testl %edx, %edx
|
||||
jle .L49
|
||||
subl $1, %edx
|
||||
shrl %edx
|
||||
testb $15, %dil
|
||||
jne .L46
|
||||
unpcklps %xmm1, %xmm0
|
||||
addl $1, %edx
|
||||
xorps %xmm3, %xmm3
|
||||
xorl %eax, %eax
|
||||
movlhps %xmm0, %xmm0
|
||||
.align 16
|
||||
.L47:
|
||||
movaps %xmm3, %xmm2
|
||||
addl $1, %eax
|
||||
movaps %xmm3, %xmm1
|
||||
movlps (%rsi), %xmm2
|
||||
movlps (%rdi), %xmm1
|
||||
movhps 8(%rsi), %xmm2
|
||||
addq $16, %rsi
|
||||
movhps 8(%rdi), %xmm1
|
||||
mulps %xmm0, %xmm2
|
||||
addps %xmm2, %xmm1
|
||||
movaps %xmm1, (%rdi)
|
||||
addq $16, %rdi
|
||||
cmpl %edx, %eax
|
||||
jb .L47
|
||||
rep
|
||||
ret
|
||||
.align 16
|
||||
.L46:
|
||||
mov %edx, %edx
|
||||
xorl %eax, %eax
|
||||
addq $1, %rdx
|
||||
salq $4, %rdx
|
||||
.align 16
|
||||
.L48:
|
||||
movss (%rsi,%rax), %xmm2
|
||||
mulss %xmm0, %xmm2
|
||||
addss (%rdi,%rax), %xmm2
|
||||
movss %xmm2, (%rdi,%rax)
|
||||
movss 4(%rsi,%rax), %xmm2
|
||||
mulss %xmm1, %xmm2
|
||||
addss 4(%rdi,%rax), %xmm2
|
||||
movss %xmm2, 4(%rdi,%rax)
|
||||
movss 8(%rsi,%rax), %xmm2
|
||||
mulss %xmm0, %xmm2
|
||||
addss 8(%rdi,%rax), %xmm2
|
||||
movss %xmm2, 8(%rdi,%rax)
|
||||
movss 12(%rsi,%rax), %xmm2
|
||||
mulss %xmm1, %xmm2
|
||||
addss 12(%rdi,%rax), %xmm2
|
||||
movss %xmm2, 12(%rdi,%rax)
|
||||
addq $16, %rax
|
||||
cmpq %rdx, %rax
|
||||
jne .L48
|
||||
.L49:
|
||||
rep
|
||||
ret
|
||||
.L52:
|
||||
movss (%rsi), %xmm2
|
||||
subl $1, %edx
|
||||
mulss %xmm0, %xmm2
|
||||
addss (%rdi), %xmm2
|
||||
movss %xmm2, (%rdi)
|
||||
movss 4(%rsi), %xmm2
|
||||
addq $8, %rsi
|
||||
mulss %xmm1, %xmm2
|
||||
addss 4(%rdi), %xmm2
|
||||
movss %xmm2, 4(%rdi)
|
||||
addq $8, %rdi
|
||||
jmp .L44
|
||||
.LFE514:
|
||||
.size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
|
||||
.section .eh_frame,"aw",@progbits
|
||||
.Lframe1:
|
||||
.long .LECIE1-.LSCIE1
|
||||
.LSCIE1:
|
||||
.long 0x0
|
||||
.byte 0x1
|
||||
.string "zR"
|
||||
.byte 0x1
|
||||
.byte 0x78
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.byte 0x3
|
||||
.byte 0xc
|
||||
.byte 0x7
|
||||
.byte 0x8
|
||||
.byte 0x11
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.align 8
|
||||
.LECIE1:
|
||||
.LSFDE1:
|
||||
.long .LEFDE1-.LASFDE1
|
||||
.LASFDE1:
|
||||
.long .LASFDE1-.Lframe1
|
||||
.long .LFB509
|
||||
.long .LFE509-.LFB509
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE1:
|
||||
.LSFDE3:
|
||||
.long .LEFDE3-.LASFDE3
|
||||
.LASFDE3:
|
||||
.long .LASFDE3-.Lframe1
|
||||
.long .LFB510
|
||||
.long .LFE510-.LFB510
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE3:
|
||||
.LSFDE5:
|
||||
.long .LEFDE5-.LASFDE5
|
||||
.LASFDE5:
|
||||
.long .LASFDE5-.Lframe1
|
||||
.long .LFB511
|
||||
.long .LFE511-.LFB511
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE5:
|
||||
.LSFDE7:
|
||||
.long .LEFDE7-.LASFDE7
|
||||
.LASFDE7:
|
||||
.long .LASFDE7-.Lframe1
|
||||
.long .LFB512
|
||||
.long .LFE512-.LFB512
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE7:
|
||||
.LSFDE9:
|
||||
.long .LEFDE9-.LASFDE9
|
||||
.LASFDE9:
|
||||
.long .LASFDE9-.Lframe1
|
||||
.long .LFB513
|
||||
.long .LFE513-.LFB513
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE9:
|
||||
.LSFDE11:
|
||||
.long .LEFDE11-.LASFDE11
|
||||
.LASFDE11:
|
||||
.long .LASFDE11-.Lframe1
|
||||
.long .LFB515
|
||||
.long .LFE515-.LFB515
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE11:
|
||||
.LSFDE13:
|
||||
.long .LEFDE13-.LASFDE13
|
||||
.LASFDE13:
|
||||
.long .LASFDE13-.Lframe1
|
||||
.long .LFB516
|
||||
.long .LFE516-.LFB516
|
||||
.byte 0x0
|
||||
.byte 0x4
|
||||
.long .LCFI0-.LFB516
|
||||
.byte 0xe
|
||||
.byte 0x10
|
||||
.byte 0x4
|
||||
.long .LCFI1-.LCFI0
|
||||
.byte 0xe
|
||||
.byte 0x18
|
||||
.byte 0x11
|
||||
.byte 0x3
|
||||
.byte 0x3
|
||||
.byte 0x11
|
||||
.byte 0x6
|
||||
.byte 0x2
|
||||
.align 8
|
||||
.LEFDE13:
|
||||
.LSFDE15:
|
||||
.long .LEFDE15-.LASFDE15
|
||||
.LASFDE15:
|
||||
.long .LASFDE15-.Lframe1
|
||||
.long .LFB514
|
||||
.long .LFE514-.LFB514
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE15:
|
||||
.ident "GCC: (GNU) 4.4.0 20090304 (experimental)"
|
||||
@@ -1,395 +0,0 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.align 16
|
||||
.globl alignedMemCpySSE2
|
||||
.type alignedMemCpySSE2, @function
|
||||
alignedMemCpySSE2:
|
||||
.LFB509:
|
||||
movslq %edx,%rdx
|
||||
shrq $6, %rdx
|
||||
testl %edx, %edx
|
||||
jle .L4
|
||||
subl $1, %edx
|
||||
xorl %eax, %eax
|
||||
addq $1, %rdx
|
||||
salq $6, %rdx
|
||||
.align 16
|
||||
.L3:
|
||||
movdqa (%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, (%rdi,%rax)
|
||||
movdqa 16(%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, 16(%rdi,%rax)
|
||||
movdqa 32(%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, 32(%rdi,%rax)
|
||||
movdqa 48(%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, 48(%rdi,%rax)
|
||||
addq $64, %rax
|
||||
cmpq %rdx, %rax
|
||||
jne .L3
|
||||
.L4:
|
||||
rep
|
||||
ret
|
||||
.LFE509:
|
||||
.size alignedMemCpySSE2, .-alignedMemCpySSE2
|
||||
.align 16
|
||||
.globl alignedMemClearSSE2
|
||||
.type alignedMemClearSSE2, @function
|
||||
alignedMemClearSSE2:
|
||||
.LFB510:
|
||||
movslq %esi,%rsi
|
||||
shrq $6, %rsi
|
||||
testl %esi, %esi
|
||||
jle .L10
|
||||
subl $1, %esi
|
||||
pxor %xmm0, %xmm0
|
||||
salq $6, %rsi
|
||||
leaq 64(%rdi,%rsi), %rax
|
||||
.align 16
|
||||
.L9:
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 16(%rdi)
|
||||
movdqa %xmm0, 32(%rdi)
|
||||
movdqa %xmm0, 48(%rdi)
|
||||
addq $64, %rdi
|
||||
cmpq %rax, %rdi
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.LFE510:
|
||||
.size alignedMemClearSSE2, .-alignedMemClearSSE2
|
||||
.align 16
|
||||
.globl alignedConvertToS16SSE2
|
||||
.type alignedConvertToS16SSE2, @function
|
||||
alignedConvertToS16SSE2:
|
||||
.LFB511:
|
||||
pushq %rbp
|
||||
.LCFI0:
|
||||
testb %cl, %cl
|
||||
movl %edx, %eax
|
||||
mulss .LC0(%rip), %xmm0
|
||||
pushq %rbx
|
||||
.LCFI1:
|
||||
jne .L13
|
||||
testw %dx, %dx
|
||||
jle .L15
|
||||
movl %edx, %ebx
|
||||
shrw $2, %bx
|
||||
cmpw $3, %dx
|
||||
leal 0(,%rbx,4), %r8d
|
||||
ja .L33
|
||||
.L28:
|
||||
xorl %r8d, %r8d
|
||||
.align 16
|
||||
.L23:
|
||||
movswq %r8w,%rdx
|
||||
movl $32767, %ebx
|
||||
leaq (%rdi,%rdx,8), %rcx
|
||||
leaq (%rsi,%rdx,4), %rdx
|
||||
movl $-32768, %edi
|
||||
.align 16
|
||||
.L25:
|
||||
movss (%rcx), %xmm1
|
||||
mulss %xmm0, %xmm1
|
||||
cvttss2si %xmm1, %esi
|
||||
movss 4(%rcx), %xmm1
|
||||
mulss %xmm0, %xmm1
|
||||
cmpl $-32768, %esi
|
||||
cmovl %edi, %esi
|
||||
cmpl $32767, %esi
|
||||
cmovg %ebx, %esi
|
||||
movw %si, (%rdx)
|
||||
cvttss2si %xmm1, %esi
|
||||
cmpl $-32768, %esi
|
||||
cmovl %edi, %esi
|
||||
cmpl $32767, %esi
|
||||
cmovg %ebx, %esi
|
||||
addl $1, %r8d
|
||||
addq $8, %rcx
|
||||
movw %si, 2(%rdx)
|
||||
addq $4, %rdx
|
||||
cmpw %r8w, %ax
|
||||
jg .L25
|
||||
.L15:
|
||||
cwtl
|
||||
popq %rbx
|
||||
sall $2, %eax
|
||||
popq %rbp
|
||||
ret
|
||||
.align 16
|
||||
.L13:
|
||||
testw %dx, %dx
|
||||
jle .L15
|
||||
movl %edx, %ebx
|
||||
shrw $2, %bx
|
||||
cmpw $3, %dx
|
||||
leal 0(,%rbx,4), %r8d
|
||||
ja .L34
|
||||
.L27:
|
||||
xorl %r8d, %r8d
|
||||
.align 16
|
||||
.L18:
|
||||
movswq %r8w,%rdx
|
||||
leaq (%rdi,%rdx,8), %rcx
|
||||
leaq (%rsi,%rdx,4), %rdx
|
||||
movl $-32768, %edi
|
||||
movl $32767, %esi
|
||||
.align 16
|
||||
.L20:
|
||||
movss (%rcx), %xmm1
|
||||
mulss %xmm0, %xmm1
|
||||
cvttss2si %xmm1, %ebx
|
||||
movss 4(%rcx), %xmm1
|
||||
mulss %xmm0, %xmm1
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
cmovg %esi, %ebx
|
||||
movzbl %bh, %ebp
|
||||
sall $8, %ebx
|
||||
movl %ebp, %r9d
|
||||
orl %r9d, %ebx
|
||||
movw %bx, (%rdx)
|
||||
cvttss2si %xmm1, %ebx
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
cmovg %esi, %ebx
|
||||
addl $1, %r8d
|
||||
addq $8, %rcx
|
||||
movzbl %bh, %ebp
|
||||
sall $8, %ebx
|
||||
movl %ebp, %r9d
|
||||
orl %r9d, %ebx
|
||||
movw %bx, 2(%rdx)
|
||||
addq $4, %rdx
|
||||
cmpw %r8w, %ax
|
||||
jg .L20
|
||||
cwtl
|
||||
popq %rbx
|
||||
sall $2, %eax
|
||||
popq %rbp
|
||||
ret
|
||||
.align 16
|
||||
.L34:
|
||||
testw %r8w, %r8w
|
||||
je .L27
|
||||
movaps %xmm0, %xmm1
|
||||
movq %rdi, %rcx
|
||||
movdqa .LC1(%rip), %xmm4
|
||||
movq %rsi, %r10
|
||||
shufps $0, %xmm1, %xmm1
|
||||
xorl %r9d, %r9d
|
||||
movdqa .LC2(%rip), %xmm3
|
||||
movaps %xmm1, %xmm9
|
||||
movdqa .LC3(%rip), %xmm8
|
||||
.align 16
|
||||
.L19:
|
||||
movaps (%rcx), %xmm1
|
||||
addl $1, %r9d
|
||||
movdqa %xmm3, %xmm5
|
||||
mulps %xmm9, %xmm1
|
||||
movaps 16(%rcx), %xmm6
|
||||
movdqa %xmm3, %xmm7
|
||||
addq $32, %rcx
|
||||
mulps %xmm9, %xmm6
|
||||
cvttps2dq %xmm1, %xmm1
|
||||
movdqa %xmm1, %xmm2
|
||||
pcmpgtd %xmm4, %xmm2
|
||||
cvttps2dq %xmm6, %xmm6
|
||||
pand %xmm2, %xmm1
|
||||
pandn %xmm4, %xmm2
|
||||
por %xmm1, %xmm2
|
||||
movdqa %xmm2, %xmm1
|
||||
pcmpgtd %xmm3, %xmm1
|
||||
pand %xmm1, %xmm5
|
||||
pandn %xmm2, %xmm1
|
||||
movdqa %xmm1, %xmm2
|
||||
movdqa %xmm6, %xmm1
|
||||
por %xmm5, %xmm2
|
||||
pcmpgtd %xmm4, %xmm1
|
||||
pand %xmm1, %xmm6
|
||||
pandn %xmm4, %xmm1
|
||||
movdqa %xmm2, %xmm5
|
||||
pslld $8, %xmm2
|
||||
pand %xmm8, %xmm5
|
||||
por %xmm6, %xmm1
|
||||
psrad $8, %xmm5
|
||||
movdqa %xmm1, %xmm6
|
||||
pcmpgtd %xmm3, %xmm6
|
||||
pand %xmm6, %xmm7
|
||||
pandn %xmm1, %xmm6
|
||||
movdqa %xmm6, %xmm1
|
||||
por %xmm7, %xmm1
|
||||
movdqa %xmm5, %xmm7
|
||||
movdqa %xmm1, %xmm6
|
||||
pslld $8, %xmm1
|
||||
pand %xmm8, %xmm6
|
||||
psrad $8, %xmm6
|
||||
punpcklwd %xmm6, %xmm5
|
||||
punpckhwd %xmm6, %xmm7
|
||||
movdqa %xmm5, %xmm6
|
||||
punpcklwd %xmm7, %xmm5
|
||||
punpckhwd %xmm7, %xmm6
|
||||
punpcklwd %xmm6, %xmm5
|
||||
movdqa %xmm2, %xmm6
|
||||
punpcklwd %xmm1, %xmm2
|
||||
punpckhwd %xmm1, %xmm6
|
||||
movdqa %xmm2, %xmm1
|
||||
punpcklwd %xmm6, %xmm2
|
||||
punpckhwd %xmm6, %xmm1
|
||||
punpcklwd %xmm1, %xmm2
|
||||
por %xmm2, %xmm5
|
||||
movdqa %xmm5, (%r10)
|
||||
addq $16, %r10
|
||||
cmpw %r9w, %bx
|
||||
ja .L19
|
||||
cmpw %dx, %r8w
|
||||
jne .L18
|
||||
jmp .L15
|
||||
.align 16
|
||||
.L33:
|
||||
testw %r8w, %r8w
|
||||
je .L28
|
||||
movaps %xmm0, %xmm1
|
||||
movq %rdi, %rcx
|
||||
movdqa .LC1(%rip), %xmm4
|
||||
movq %rsi, %r10
|
||||
shufps $0, %xmm1, %xmm1
|
||||
xorl %r9d, %r9d
|
||||
movdqa .LC2(%rip), %xmm3
|
||||
movaps %xmm1, %xmm6
|
||||
.align 16
|
||||
.L24:
|
||||
movaps (%rcx), %xmm1
|
||||
addl $1, %r9d
|
||||
movdqa %xmm3, %xmm7
|
||||
mulps %xmm6, %xmm1
|
||||
movaps 16(%rcx), %xmm5
|
||||
addq $32, %rcx
|
||||
mulps %xmm6, %xmm5
|
||||
cvttps2dq %xmm1, %xmm1
|
||||
movdqa %xmm1, %xmm2
|
||||
pcmpgtd %xmm4, %xmm2
|
||||
cvttps2dq %xmm5, %xmm5
|
||||
pand %xmm2, %xmm1
|
||||
pandn %xmm4, %xmm2
|
||||
por %xmm1, %xmm2
|
||||
movdqa %xmm2, %xmm1
|
||||
pcmpgtd %xmm3, %xmm1
|
||||
pand %xmm1, %xmm7
|
||||
pandn %xmm2, %xmm1
|
||||
movdqa %xmm1, %xmm2
|
||||
movdqa %xmm5, %xmm1
|
||||
por %xmm7, %xmm2
|
||||
movdqa %xmm3, %xmm7
|
||||
pcmpgtd %xmm4, %xmm1
|
||||
pand %xmm1, %xmm5
|
||||
pandn %xmm4, %xmm1
|
||||
por %xmm5, %xmm1
|
||||
movdqa %xmm1, %xmm5
|
||||
pcmpgtd %xmm3, %xmm5
|
||||
pand %xmm5, %xmm7
|
||||
pandn %xmm1, %xmm5
|
||||
movdqa %xmm5, %xmm1
|
||||
movdqa %xmm2, %xmm5
|
||||
por %xmm7, %xmm1
|
||||
punpcklwd %xmm1, %xmm2
|
||||
punpckhwd %xmm1, %xmm5
|
||||
movdqa %xmm2, %xmm1
|
||||
punpcklwd %xmm5, %xmm2
|
||||
punpckhwd %xmm5, %xmm1
|
||||
punpcklwd %xmm1, %xmm2
|
||||
movdqa %xmm2, (%r10)
|
||||
addq $16, %r10
|
||||
cmpw %r9w, %bx
|
||||
ja .L24
|
||||
cmpw %r8w, %dx
|
||||
jne .L23
|
||||
jmp .L15
|
||||
.LFE511:
|
||||
.size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
|
||||
.section .rodata
|
||||
.align 4
|
||||
.LC0:
|
||||
.long 1191181824
|
||||
.align 16
|
||||
.LC1:
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.align 16
|
||||
.LC2:
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.align 16
|
||||
.LC3:
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.section .eh_frame,"aw",@progbits
|
||||
.Lframe1:
|
||||
.long .LECIE1-.LSCIE1
|
||||
.LSCIE1:
|
||||
.long 0x0
|
||||
.byte 0x1
|
||||
.string "zR"
|
||||
.byte 0x1
|
||||
.byte 0x78
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.byte 0x3
|
||||
.byte 0xc
|
||||
.byte 0x7
|
||||
.byte 0x8
|
||||
.byte 0x11
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.align 8
|
||||
.LECIE1:
|
||||
.LSFDE1:
|
||||
.long .LEFDE1-.LASFDE1
|
||||
.LASFDE1:
|
||||
.long .LASFDE1-.Lframe1
|
||||
.long .LFB509
|
||||
.long .LFE509-.LFB509
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE1:
|
||||
.LSFDE3:
|
||||
.long .LEFDE3-.LASFDE3
|
||||
.LASFDE3:
|
||||
.long .LASFDE3-.Lframe1
|
||||
.long .LFB510
|
||||
.long .LFE510-.LFB510
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE3:
|
||||
.LSFDE5:
|
||||
.long .LEFDE5-.LASFDE5
|
||||
.LASFDE5:
|
||||
.long .LASFDE5-.Lframe1
|
||||
.long .LFB511
|
||||
.long .LFE511-.LFB511
|
||||
.byte 0x0
|
||||
.byte 0x4
|
||||
.long .LCFI0-.LFB511
|
||||
.byte 0xe
|
||||
.byte 0x10
|
||||
.byte 0x4
|
||||
.long .LCFI1-.LCFI0
|
||||
.byte 0xe
|
||||
.byte 0x18
|
||||
.byte 0x11
|
||||
.byte 0x3
|
||||
.byte 0x3
|
||||
.byte 0x11
|
||||
.byte 0x6
|
||||
.byte 0x2
|
||||
.align 8
|
||||
.LEFDE5:
|
||||
.ident "GCC: (GNU) 4.4.0 20090304 (experimental)"
|
||||
@@ -1,107 +0,0 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl alignedMemCpyMMX
|
||||
.type alignedMemCpyMMX, @function
|
||||
alignedMemCpyMMX:
|
||||
pushl %ebx
|
||||
subl $112, %esp
|
||||
movl 128(%esp), %ebx
|
||||
movl 124(%esp), %eax
|
||||
shrl $6, %ebx
|
||||
#APP
|
||||
# 42 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
|
||||
fsave 4(%esp); fwait
|
||||
|
||||
# 0 "" 2
|
||||
# 44 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
|
||||
1: prefetchnta (%eax)
|
||||
prefetchnta 64(%eax)
|
||||
prefetchnta 128(%eax)
|
||||
prefetchnta 192(%eax)
|
||||
prefetchnta 256(%eax)
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
testl %ebx, %ebx
|
||||
je .L2
|
||||
movl 120(%esp), %ecx
|
||||
xorl %edx, %edx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L3:
|
||||
#APP
|
||||
# 53 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
|
||||
1: prefetchnta 320(%eax)
|
||||
2: movq (%eax), %mm0
|
||||
movq 8(%eax), %mm1
|
||||
movq 16(%eax), %mm2
|
||||
movq 24(%eax), %mm3
|
||||
movq %mm0, (%ecx)
|
||||
movq %mm1, 8(%ecx)
|
||||
movq %mm2, 16(%ecx)
|
||||
movq %mm3, 24(%ecx)
|
||||
movq 32(%eax), %mm0
|
||||
movq 40(%eax), %mm1
|
||||
movq 48(%eax), %mm2
|
||||
movq 56(%eax), %mm3
|
||||
movq %mm0, 32(%ecx)
|
||||
movq %mm1, 40(%ecx)
|
||||
movq %mm2, 48(%ecx)
|
||||
movq %mm3, 56(%ecx)
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
addl $1, %edx
|
||||
addl $64, %eax
|
||||
addl $64, %ecx
|
||||
cmpl %edx, %ebx
|
||||
jne .L3
|
||||
.L2:
|
||||
#APP
|
||||
# 75 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
|
||||
fsave 4(%esp); fwait
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
addl $112, %esp
|
||||
popl %ebx
|
||||
ret
|
||||
.size alignedMemCpyMMX, .-alignedMemCpyMMX
|
||||
.p2align 4,,15
|
||||
.globl alignedMemClearMMX
|
||||
.type alignedMemClearMMX, @function
|
||||
alignedMemClearMMX:
|
||||
movl 8(%esp), %ecx
|
||||
shrl $6, %ecx
|
||||
testl %ecx, %ecx
|
||||
je .L8
|
||||
movl 4(%esp), %edx
|
||||
xorl %eax, %eax
|
||||
pxor %mm0, %mm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L9:
|
||||
#APP
|
||||
# 90 "/home/toby/development/git/lmms/src/core/basic_ops_x86.c" 1
|
||||
movq %mm0, (%edx)
|
||||
movq %mm0, 8(%edx)
|
||||
movq %mm0, 16(%edx)
|
||||
movq %mm0, 24(%edx)
|
||||
movq %mm0, 32(%edx)
|
||||
movq %mm0, 40(%edx)
|
||||
movq %mm0, 48(%edx)
|
||||
movq %mm0, 56(%edx)
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
addl $1, %eax
|
||||
addl $64, %edx
|
||||
cmpl %eax, %ecx
|
||||
jne .L9
|
||||
.L8:
|
||||
emms
|
||||
ret
|
||||
.size alignedMemClearMMX, .-alignedMemClearMMX
|
||||
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
@@ -1,494 +0,0 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl alignedMemCpySSE
|
||||
.type alignedMemCpySSE, @function
|
||||
alignedMemCpySSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 20(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ecx
|
||||
shrl $6, %esi
|
||||
testl %esi, %esi
|
||||
je .L4
|
||||
xorl %eax, %eax
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L3:
|
||||
movaps (%ecx,%eax), %xmm0
|
||||
addl $1, %ebx
|
||||
movaps %xmm0, (%edx,%eax)
|
||||
movaps 16(%ecx,%eax), %xmm0
|
||||
movaps %xmm0, 16(%edx,%eax)
|
||||
movaps 32(%ecx,%eax), %xmm0
|
||||
movaps %xmm0, 32(%edx,%eax)
|
||||
movaps 48(%ecx,%eax), %xmm0
|
||||
movaps %xmm0, 48(%edx,%eax)
|
||||
addl $64, %eax
|
||||
cmpl %ebx, %esi
|
||||
jne .L3
|
||||
.L4:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedMemCpySSE, .-alignedMemCpySSE
|
||||
.p2align 4,,15
|
||||
.globl alignedMemClearSSE
|
||||
.type alignedMemClearSSE, @function
|
||||
alignedMemClearSSE:
|
||||
movl 8(%esp), %ecx
|
||||
shrl $6, %ecx
|
||||
testl %ecx, %ecx
|
||||
je .L10
|
||||
movl 4(%esp), %eax
|
||||
xorps %xmm0, %xmm0
|
||||
xorl %edx, %edx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L9:
|
||||
addl $1, %edx
|
||||
movaps %xmm0, (%eax)
|
||||
movaps %xmm0, 16(%eax)
|
||||
movaps %xmm0, 32(%eax)
|
||||
movaps %xmm0, 48(%eax)
|
||||
addl $64, %eax
|
||||
cmpl %edx, %ecx
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.size alignedMemClearSSE, .-alignedMemClearSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufApplyGainSSE
|
||||
.type alignedBufApplyGainSSE, @function
|
||||
alignedBufApplyGainSSE:
|
||||
movl 12(%esp), %ecx
|
||||
testl %ecx, %ecx
|
||||
jle .L15
|
||||
movss 8(%esp), %xmm0
|
||||
subl $1, %ecx
|
||||
movl 4(%esp), %eax
|
||||
shrl $3, %ecx
|
||||
xorl %edx, %edx
|
||||
addl $1, %ecx
|
||||
shufps $0, %xmm0, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L14:
|
||||
movaps 16(%eax), %xmm3
|
||||
addl $1, %edx
|
||||
movaps 32(%eax), %xmm2
|
||||
mulps %xmm0, %xmm3
|
||||
movaps 48(%eax), %xmm1
|
||||
mulps %xmm0, %xmm2
|
||||
movaps (%eax), %xmm4
|
||||
mulps %xmm0, %xmm1
|
||||
movaps %xmm3, 16(%eax)
|
||||
mulps %xmm0, %xmm4
|
||||
movaps %xmm2, 32(%eax)
|
||||
movaps %xmm1, 48(%eax)
|
||||
movaps %xmm4, (%eax)
|
||||
addl $64, %eax
|
||||
cmpl %edx, %ecx
|
||||
ja .L14
|
||||
.L15:
|
||||
rep
|
||||
ret
|
||||
.size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufMixSSE
|
||||
.type alignedBufMixSSE, @function
|
||||
alignedBufMixSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 20(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ecx
|
||||
testl %esi, %esi
|
||||
jle .L20
|
||||
subl $1, %esi
|
||||
xorl %eax, %eax
|
||||
shrl $3, %esi
|
||||
xorl %ebx, %ebx
|
||||
addl $1, %esi
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L19:
|
||||
movaps 16(%edx,%eax), %xmm2
|
||||
addl $1, %ebx
|
||||
movaps 32(%edx,%eax), %xmm1
|
||||
movaps 48(%edx,%eax), %xmm0
|
||||
movaps (%edx,%eax), %xmm3
|
||||
addps 16(%ecx,%eax), %xmm2
|
||||
addps 32(%ecx,%eax), %xmm1
|
||||
addps 48(%ecx,%eax), %xmm0
|
||||
addps (%ecx,%eax), %xmm3
|
||||
movaps %xmm2, 16(%edx,%eax)
|
||||
movaps %xmm3, (%edx,%eax)
|
||||
movaps %xmm1, 32(%edx,%eax)
|
||||
movaps %xmm0, 48(%edx,%eax)
|
||||
addl $64, %eax
|
||||
cmpl %ebx, %esi
|
||||
ja .L19
|
||||
.L20:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedBufMixSSE, .-alignedBufMixSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufMixLRCoeffSSE
|
||||
.type alignedBufMixLRCoeffSSE, @function
|
||||
alignedBufMixLRCoeffSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 28(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ebx
|
||||
testl %esi, %esi
|
||||
jle .L25
|
||||
movss 24(%esp), %xmm2
|
||||
subl $1, %esi
|
||||
movss 20(%esp), %xmm0
|
||||
xorl %eax, %eax
|
||||
shrl $2, %esi
|
||||
xorl %ecx, %ecx
|
||||
addl $1, %esi
|
||||
unpcklps %xmm2, %xmm0
|
||||
movaps %xmm0, %xmm2
|
||||
movlhps %xmm0, %xmm2
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L24:
|
||||
movaps 16(%ebx,%eax), %xmm0
|
||||
addl $1, %ecx
|
||||
movaps (%ebx,%eax), %xmm1
|
||||
mulps %xmm2, %xmm0
|
||||
mulps %xmm2, %xmm1
|
||||
addps 16(%edx,%eax), %xmm0
|
||||
addps (%edx,%eax), %xmm1
|
||||
movaps %xmm0, 16(%edx,%eax)
|
||||
movaps %xmm1, (%edx,%eax)
|
||||
addl $32, %eax
|
||||
cmpl %ecx, %esi
|
||||
ja .L24
|
||||
.L25:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufWetDryMixSSE
|
||||
.type alignedBufWetDryMixSSE, @function
|
||||
alignedBufWetDryMixSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 28(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ebx
|
||||
testl %esi, %esi
|
||||
jle .L30
|
||||
movss 24(%esp), %xmm3
|
||||
subl $1, %esi
|
||||
movss 20(%esp), %xmm2
|
||||
xorl %eax, %eax
|
||||
shrl $2, %esi
|
||||
xorl %ecx, %ecx
|
||||
shufps $0, %xmm3, %xmm3
|
||||
addl $1, %esi
|
||||
shufps $0, %xmm2, %xmm2
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L29:
|
||||
movaps 16(%ebx,%eax), %xmm1
|
||||
addl $1, %ecx
|
||||
movaps 16(%edx,%eax), %xmm0
|
||||
mulps %xmm2, %xmm1
|
||||
movaps (%ebx,%eax), %xmm4
|
||||
mulps %xmm3, %xmm0
|
||||
mulps %xmm2, %xmm4
|
||||
addps %xmm1, %xmm0
|
||||
movaps (%edx,%eax), %xmm1
|
||||
mulps %xmm3, %xmm1
|
||||
movaps %xmm0, 16(%edx,%eax)
|
||||
addps %xmm4, %xmm1
|
||||
movaps %xmm1, (%edx,%eax)
|
||||
addl $32, %eax
|
||||
cmpl %ecx, %esi
|
||||
ja .L29
|
||||
.L30:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufWetDryMixSplittedSSE
|
||||
.type alignedBufWetDryMixSplittedSSE, @function
|
||||
alignedBufWetDryMixSplittedSSE:
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
subl $124, %esp
|
||||
movl 164(%esp), %eax
|
||||
movl 144(%esp), %edx
|
||||
movl 148(%esp), %esi
|
||||
movl 152(%esp), %ecx
|
||||
testl %eax, %eax
|
||||
jle .L39
|
||||
movl 164(%esp), %eax
|
||||
subl $1, %eax
|
||||
shrl %eax
|
||||
addl $1, %eax
|
||||
movl %eax, %ebp
|
||||
movl %eax, 104(%esp)
|
||||
shrl $2, %ebp
|
||||
cmpl $3, 104(%esp)
|
||||
leal 0(,%ebp,4), %eax
|
||||
movl %eax, 108(%esp)
|
||||
jbe .L40
|
||||
testl %eax, %eax
|
||||
jne .L34
|
||||
.L40:
|
||||
xorl %edi, %edi
|
||||
jmp .L36
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L34:
|
||||
movss 160(%esp), %xmm0
|
||||
xorps %xmm7, %xmm7
|
||||
movl %esi, %ebx
|
||||
xorl %eax, %eax
|
||||
xorl %edi, %edi
|
||||
shufps $0, %xmm0, %xmm0
|
||||
movaps %xmm0, 16(%esp)
|
||||
movss 156(%esp), %xmm0
|
||||
shufps $0, %xmm0, %xmm0
|
||||
movaps %xmm0, (%esp)
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L37:
|
||||
movaps (%edx,%eax,2), %xmm5
|
||||
addl $1, %edi
|
||||
movaps 16(%edx,%eax,2), %xmm6
|
||||
movaps %xmm5, %xmm0
|
||||
shufps $136, %xmm6, %xmm0
|
||||
movaps 32(%edx,%eax,2), %xmm4
|
||||
shufps $221, %xmm6, %xmm5
|
||||
movaps %xmm0, 80(%esp)
|
||||
movaps 48(%edx,%eax,2), %xmm3
|
||||
movaps %xmm4, %xmm0
|
||||
shufps $136, %xmm3, %xmm0
|
||||
movaps 80(%esp), %xmm2
|
||||
shufps $221, %xmm3, %xmm4
|
||||
movaps %xmm7, %xmm6
|
||||
movlps (%ebx), %xmm6
|
||||
movaps %xmm5, 64(%esp)
|
||||
movhps 8(%ebx), %xmm6
|
||||
shufps $136, %xmm0, %xmm2
|
||||
movaps %xmm0, 48(%esp)
|
||||
movaps %xmm7, %xmm5
|
||||
movaps %xmm6, %xmm0
|
||||
movlps 16(%ebx), %xmm5
|
||||
movhps 24(%ebx), %xmm5
|
||||
shufps $136, %xmm5, %xmm0
|
||||
mulps 16(%esp), %xmm2
|
||||
shufps $221, %xmm5, %xmm6
|
||||
movaps %xmm4, 32(%esp)
|
||||
addl $32, %ebx
|
||||
mulps (%esp), %xmm0
|
||||
movaps %xmm7, %xmm4
|
||||
movlps (%eax,%ecx), %xmm4
|
||||
movaps %xmm7, %xmm3
|
||||
movhps 8(%eax,%ecx), %xmm4
|
||||
movaps %xmm4, %xmm1
|
||||
movlps 16(%ecx,%eax), %xmm3
|
||||
movhps 24(%ecx,%eax), %xmm3
|
||||
shufps $136, %xmm3, %xmm1
|
||||
addps %xmm0, %xmm2
|
||||
movaps 64(%esp), %xmm0
|
||||
shufps $221, %xmm3, %xmm4
|
||||
shufps $136, 32(%esp), %xmm0
|
||||
mulps (%esp), %xmm1
|
||||
movaps %xmm2, %xmm3
|
||||
movaps 64(%esp), %xmm5
|
||||
mulps 16(%esp), %xmm0
|
||||
shufps $221, 32(%esp), %xmm5
|
||||
mulps (%esp), %xmm6
|
||||
addps %xmm1, %xmm0
|
||||
movaps 80(%esp), %xmm1
|
||||
shufps $221, 48(%esp), %xmm1
|
||||
mulps (%esp), %xmm4
|
||||
mulps 16(%esp), %xmm1
|
||||
mulps 16(%esp), %xmm5
|
||||
addps %xmm6, %xmm1
|
||||
addps %xmm4, %xmm5
|
||||
movaps %xmm0, %xmm4
|
||||
unpcklps %xmm1, %xmm3
|
||||
unpcklps %xmm5, %xmm4
|
||||
unpckhps %xmm1, %xmm2
|
||||
movaps %xmm3, %xmm1
|
||||
unpckhps %xmm5, %xmm0
|
||||
unpcklps %xmm4, %xmm1
|
||||
unpckhps %xmm4, %xmm3
|
||||
movaps %xmm1, (%edx,%eax,2)
|
||||
movaps %xmm2, %xmm1
|
||||
unpckhps %xmm0, %xmm2
|
||||
unpcklps %xmm0, %xmm1
|
||||
movaps %xmm3, 16(%edx,%eax,2)
|
||||
movaps %xmm1, 32(%edx,%eax,2)
|
||||
movaps %xmm2, 48(%edx,%eax,2)
|
||||
addl $32, %eax
|
||||
cmpl %edi, %ebp
|
||||
ja .L37
|
||||
movl 108(%esp), %edi
|
||||
movl 104(%esp), %eax
|
||||
addl %edi, %edi
|
||||
cmpl %eax, 108(%esp)
|
||||
je .L39
|
||||
.L36:
|
||||
movss 156(%esp), %xmm0
|
||||
xorl %ebp, %ebp
|
||||
movss 160(%esp), %xmm1
|
||||
movl %edi, %eax
|
||||
leal (%edx,%edi,8), %ebx
|
||||
leal 8(%edx,%edi,8), %edx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L38:
|
||||
movss (%esi,%eax,4), %xmm3
|
||||
addl $2, %ebp
|
||||
movss (%ebx), %xmm2
|
||||
mulss %xmm0, %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, (%ebx)
|
||||
movss 4(%ebx), %xmm2
|
||||
movss (%ecx,%eax,4), %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
mulss %xmm0, %xmm3
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, 4(%ebx)
|
||||
addl $16, %ebx
|
||||
movss 4(%esi,%eax,4), %xmm3
|
||||
movss (%edx), %xmm2
|
||||
mulss %xmm0, %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, (%edx)
|
||||
movss 4(%edx), %xmm2
|
||||
movss 4(%ecx,%eax,4), %xmm3
|
||||
mulss %xmm1, %xmm2
|
||||
leal (%edi,%ebp), %eax
|
||||
mulss %xmm0, %xmm3
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, 4(%edx)
|
||||
addl $16, %edx
|
||||
cmpl %eax, 164(%esp)
|
||||
jg .L38
|
||||
.L39:
|
||||
addl $124, %esp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
ret
|
||||
.size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
|
||||
.p2align 4,,15
|
||||
.globl unalignedBufMixLRCoeffSSE
|
||||
.type unalignedBufMixLRCoeffSSE, @function
|
||||
unalignedBufMixLRCoeffSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 28(%esp), %ebx
|
||||
movl 12(%esp), %eax
|
||||
movl 16(%esp), %edx
|
||||
movss 20(%esp), %xmm1
|
||||
movl %ebx, %esi
|
||||
shrl $31, %esi
|
||||
leal (%ebx,%esi), %ecx
|
||||
andl $1, %ecx
|
||||
cmpl %esi, %ecx
|
||||
movss 24(%esp), %xmm3
|
||||
jne .L52
|
||||
.L44:
|
||||
testl %ebx, %ebx
|
||||
jle .L49
|
||||
testb $15, %al
|
||||
jne .L46
|
||||
movaps %xmm1, %xmm0
|
||||
subl $1, %ebx
|
||||
unpcklps %xmm3, %xmm0
|
||||
shrl %ebx
|
||||
xorps %xmm2, %xmm2
|
||||
movaps %xmm0, %xmm3
|
||||
addl $1, %ebx
|
||||
movlhps %xmm0, %xmm3
|
||||
xorl %ecx, %ecx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L47:
|
||||
movaps %xmm2, %xmm1
|
||||
addl $1, %ecx
|
||||
movlps (%edx), %xmm1
|
||||
movhps 8(%edx), %xmm1
|
||||
movaps %xmm2, %xmm0
|
||||
movlps (%eax), %xmm0
|
||||
movhps 8(%eax), %xmm0
|
||||
addl $16, %edx
|
||||
mulps %xmm3, %xmm1
|
||||
addps %xmm1, %xmm0
|
||||
movaps %xmm0, (%eax)
|
||||
addl $16, %eax
|
||||
cmpl %ebx, %ecx
|
||||
jb .L47
|
||||
.L49:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L46:
|
||||
xorl %ecx, %ecx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L48:
|
||||
movss (%edx,%ecx,8), %xmm0
|
||||
mulss %xmm1, %xmm0
|
||||
addss (%eax,%ecx,8), %xmm0
|
||||
movss %xmm0, (%eax,%ecx,8)
|
||||
movss 4(%edx,%ecx,8), %xmm0
|
||||
mulss %xmm3, %xmm0
|
||||
addss 4(%eax,%ecx,8), %xmm0
|
||||
movss %xmm0, 4(%eax,%ecx,8)
|
||||
movss 8(%edx,%ecx,8), %xmm0
|
||||
mulss %xmm1, %xmm0
|
||||
addss 8(%eax,%ecx,8), %xmm0
|
||||
movss %xmm0, 8(%eax,%ecx,8)
|
||||
movss 12(%edx,%ecx,8), %xmm0
|
||||
mulss %xmm3, %xmm0
|
||||
addss 12(%eax,%ecx,8), %xmm0
|
||||
movss %xmm0, 12(%eax,%ecx,8)
|
||||
addl $2, %ecx
|
||||
cmpl %ecx, %ebx
|
||||
jg .L48
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.L52:
|
||||
movss (%edx), %xmm0
|
||||
subl $1, %ebx
|
||||
mulss %xmm1, %xmm0
|
||||
addss (%eax), %xmm0
|
||||
movss %xmm0, (%eax)
|
||||
movss 4(%edx), %xmm0
|
||||
addl $8, %edx
|
||||
mulss %xmm3, %xmm0
|
||||
addss 4(%eax), %xmm0
|
||||
movss %xmm0, 4(%eax)
|
||||
addl $8, %eax
|
||||
jmp .L44
|
||||
.size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
|
||||
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
@@ -1,349 +0,0 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl alignedMemCpySSE2
|
||||
.type alignedMemCpySSE2, @function
|
||||
alignedMemCpySSE2:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 20(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ecx
|
||||
shrl $6, %esi
|
||||
testl %esi, %esi
|
||||
je .L4
|
||||
xorl %eax, %eax
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L3:
|
||||
addl $1, %ebx
|
||||
movdqa (%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, (%edx,%eax)
|
||||
movdqa 16(%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, 16(%edx,%eax)
|
||||
movdqa 32(%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, 32(%edx,%eax)
|
||||
movdqa 48(%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, 48(%edx,%eax)
|
||||
addl $64, %eax
|
||||
cmpl %ebx, %esi
|
||||
jne .L3
|
||||
.L4:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedMemCpySSE2, .-alignedMemCpySSE2
|
||||
.p2align 4,,15
|
||||
.globl alignedMemClearSSE2
|
||||
.type alignedMemClearSSE2, @function
|
||||
alignedMemClearSSE2:
|
||||
movl 8(%esp), %ecx
|
||||
shrl $6, %ecx
|
||||
testl %ecx, %ecx
|
||||
je .L10
|
||||
movl 4(%esp), %eax
|
||||
xorl %edx, %edx
|
||||
pxor %xmm0, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L9:
|
||||
addl $1, %edx
|
||||
movdqa %xmm0, (%eax)
|
||||
movdqa %xmm0, 16(%eax)
|
||||
movdqa %xmm0, 32(%eax)
|
||||
movdqa %xmm0, 48(%eax)
|
||||
addl $64, %eax
|
||||
cmpl %edx, %ecx
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.size alignedMemClearSSE2, .-alignedMemClearSSE2
|
||||
.p2align 4,,15
|
||||
.globl alignedConvertToS16SSE2
|
||||
.type alignedConvertToS16SSE2, @function
|
||||
alignedConvertToS16SSE2:
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
subl $8, %esp
|
||||
movl 36(%esp), %eax
|
||||
movss .LC0, %xmm6
|
||||
cmpb $0, 44(%esp)
|
||||
movl 28(%esp), %edx
|
||||
movl 32(%esp), %ebx
|
||||
movl %eax, %esi
|
||||
mulss 40(%esp), %xmm6
|
||||
jne .L13
|
||||
testw %ax, %ax
|
||||
jle .L15
|
||||
movl %eax, %edi
|
||||
shrw $2, %di
|
||||
cmpw $3, %ax
|
||||
movw %ax, 2(%esp)
|
||||
leal 0(,%edi,4), %ebp
|
||||
ja .L33
|
||||
.L28:
|
||||
xorl %ebp, %ebp
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L23:
|
||||
movswl %bp,%eax
|
||||
movl $-32768, %edi
|
||||
leal (%edx,%eax,8), %edx
|
||||
leal (%ebx,%eax,4), %eax
|
||||
movl $32767, %ebx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L25:
|
||||
movss (%edx), %xmm0
|
||||
mulss %xmm6, %xmm0
|
||||
cvttss2si %xmm0, %ecx
|
||||
movss 4(%edx), %xmm0
|
||||
cmpl $-32768, %ecx
|
||||
mulss %xmm6, %xmm0
|
||||
cmovl %edi, %ecx
|
||||
cmpl $32767, %ecx
|
||||
cmovg %ebx, %ecx
|
||||
movw %cx, (%eax)
|
||||
cvttss2si %xmm0, %ecx
|
||||
cmpl $-32768, %ecx
|
||||
cmovl %edi, %ecx
|
||||
cmpl $32767, %ecx
|
||||
cmovg %ebx, %ecx
|
||||
addl $1, %ebp
|
||||
movw %cx, 2(%eax)
|
||||
addl $8, %edx
|
||||
addl $4, %eax
|
||||
cmpw %bp, %si
|
||||
jg .L25
|
||||
.L15:
|
||||
movswl %si,%esi
|
||||
addl $8, %esp
|
||||
leal 0(,%esi,4), %eax
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
ret
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L13:
|
||||
testw %ax, %ax
|
||||
jle .L15
|
||||
movl %eax, %ebp
|
||||
shrw $2, %bp
|
||||
cmpw $3, %si
|
||||
movw %ax, 2(%esp)
|
||||
leal 0(,%ebp,4), %eax
|
||||
ja .L34
|
||||
.L27:
|
||||
xorl %eax, %eax
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L18:
|
||||
movswl %ax,%edi
|
||||
leal (%edx,%edi,8), %ecx
|
||||
leal (%ebx,%edi,4), %edx
|
||||
movl $-32768, %edi
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L20:
|
||||
movss (%ecx), %xmm0
|
||||
movl $32767, %ebp
|
||||
mulss %xmm6, %xmm0
|
||||
cvttss2si %xmm0, %ebx
|
||||
movss 4(%ecx), %xmm0
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
mulss %xmm6, %xmm0
|
||||
cmovg %ebp, %ebx
|
||||
movzbl %bh, %ebp
|
||||
sall $8, %ebx
|
||||
orl %ebp, %ebx
|
||||
movl $32767, %ebp
|
||||
movw %bx, (%edx)
|
||||
cvttss2si %xmm0, %ebx
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
cmovg %ebp, %ebx
|
||||
addl $1, %eax
|
||||
movzbl %bh, %ebp
|
||||
addl $8, %ecx
|
||||
sall $8, %ebx
|
||||
orl %ebp, %ebx
|
||||
movw %bx, 2(%edx)
|
||||
addl $4, %edx
|
||||
cmpw %ax, %si
|
||||
jg .L20
|
||||
jmp .L15
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L34:
|
||||
testw %ax, %ax
|
||||
je .L27
|
||||
movaps %xmm6, %xmm0
|
||||
xorl %ecx, %ecx
|
||||
movdqa .LC1, %xmm3
|
||||
shufps $0, %xmm0, %xmm0
|
||||
movdqa .LC2, %xmm2
|
||||
movss %xmm6, 4(%esp)
|
||||
xorl %edi, %edi
|
||||
movaps %xmm0, %xmm7
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L19:
|
||||
movaps (%edx,%ecx,2), %xmm0
|
||||
movdqa %xmm2, %xmm5
|
||||
movdqa %xmm2, %xmm6
|
||||
addl $1, %edi
|
||||
movaps 16(%edx,%ecx,2), %xmm4
|
||||
mulps %xmm7, %xmm0
|
||||
mulps %xmm7, %xmm4
|
||||
cvttps2dq %xmm0, %xmm0
|
||||
movdqa %xmm0, %xmm1
|
||||
pcmpgtd %xmm3, %xmm1
|
||||
pand %xmm1, %xmm0
|
||||
pandn %xmm3, %xmm1
|
||||
por %xmm0, %xmm1
|
||||
cvttps2dq %xmm4, %xmm4
|
||||
movdqa %xmm1, %xmm0
|
||||
pcmpgtd %xmm2, %xmm0
|
||||
pand %xmm0, %xmm5
|
||||
pandn %xmm1, %xmm0
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm4, %xmm0
|
||||
por %xmm5, %xmm1
|
||||
pcmpgtd %xmm3, %xmm0
|
||||
movdqa .LC3, %xmm5
|
||||
pand %xmm0, %xmm4
|
||||
pand %xmm1, %xmm5
|
||||
pandn %xmm3, %xmm0
|
||||
psrad $8, %xmm5
|
||||
por %xmm4, %xmm0
|
||||
pslld $8, %xmm1
|
||||
movdqa %xmm0, %xmm4
|
||||
pcmpgtd %xmm2, %xmm4
|
||||
pand %xmm4, %xmm6
|
||||
pandn %xmm0, %xmm4
|
||||
movdqa %xmm4, %xmm0
|
||||
movdqa .LC3, %xmm4
|
||||
por %xmm6, %xmm0
|
||||
pand %xmm0, %xmm4
|
||||
pslld $8, %xmm0
|
||||
psrad $8, %xmm4
|
||||
movdqa %xmm5, %xmm6
|
||||
punpcklwd %xmm4, %xmm5
|
||||
punpckhwd %xmm4, %xmm6
|
||||
movdqa %xmm5, %xmm4
|
||||
punpcklwd %xmm6, %xmm5
|
||||
punpckhwd %xmm6, %xmm4
|
||||
punpcklwd %xmm4, %xmm5
|
||||
movdqa %xmm1, %xmm4
|
||||
punpcklwd %xmm0, %xmm1
|
||||
punpckhwd %xmm0, %xmm4
|
||||
movdqa %xmm1, %xmm6
|
||||
punpcklwd %xmm4, %xmm1
|
||||
punpckhwd %xmm4, %xmm6
|
||||
punpcklwd %xmm6, %xmm1
|
||||
por %xmm1, %xmm5
|
||||
movdqa %xmm5, (%ebx,%ecx)
|
||||
addl $16, %ecx
|
||||
cmpw %di, %bp
|
||||
ja .L19
|
||||
cmpw 2(%esp), %ax
|
||||
movss 4(%esp), %xmm6
|
||||
jne .L18
|
||||
jmp .L15
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L33:
|
||||
testw %bp, %bp
|
||||
.p2align 4,,3
|
||||
.p2align 3
|
||||
je .L28
|
||||
movaps %xmm6, %xmm0
|
||||
xorl %eax, %eax
|
||||
movdqa .LC1, %xmm3
|
||||
shufps $0, %xmm0, %xmm0
|
||||
movdqa .LC2, %xmm2
|
||||
xorl %ecx, %ecx
|
||||
movaps %xmm0, %xmm5
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L24:
|
||||
movaps (%edx,%eax,2), %xmm0
|
||||
addl $1, %ecx
|
||||
movdqa %xmm2, %xmm7
|
||||
movaps 16(%edx,%eax,2), %xmm4
|
||||
mulps %xmm5, %xmm0
|
||||
mulps %xmm5, %xmm4
|
||||
cvttps2dq %xmm0, %xmm0
|
||||
movdqa %xmm0, %xmm1
|
||||
pcmpgtd %xmm3, %xmm1
|
||||
pand %xmm1, %xmm0
|
||||
pandn %xmm3, %xmm1
|
||||
por %xmm0, %xmm1
|
||||
cvttps2dq %xmm4, %xmm4
|
||||
movdqa %xmm1, %xmm0
|
||||
pcmpgtd %xmm2, %xmm0
|
||||
pand %xmm0, %xmm7
|
||||
pandn %xmm1, %xmm0
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm4, %xmm0
|
||||
por %xmm7, %xmm1
|
||||
pcmpgtd %xmm3, %xmm0
|
||||
movdqa %xmm2, %xmm7
|
||||
pand %xmm0, %xmm4
|
||||
pandn %xmm3, %xmm0
|
||||
por %xmm4, %xmm0
|
||||
movdqa %xmm0, %xmm4
|
||||
pcmpgtd %xmm2, %xmm4
|
||||
pand %xmm4, %xmm7
|
||||
pandn %xmm0, %xmm4
|
||||
movdqa %xmm4, %xmm0
|
||||
movdqa %xmm1, %xmm4
|
||||
por %xmm7, %xmm0
|
||||
punpckhwd %xmm0, %xmm4
|
||||
punpcklwd %xmm0, %xmm1
|
||||
movdqa %xmm1, %xmm0
|
||||
punpcklwd %xmm4, %xmm1
|
||||
punpckhwd %xmm4, %xmm0
|
||||
punpcklwd %xmm0, %xmm1
|
||||
movdqa %xmm1, (%ebx,%eax)
|
||||
addl $16, %eax
|
||||
cmpw %cx, %di
|
||||
ja .L24
|
||||
cmpw %bp, 2(%esp)
|
||||
jne .L23
|
||||
jmp .L15
|
||||
.size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
|
||||
.section .rodata.cst4,"aM",@progbits,4
|
||||
.align 4
|
||||
.LC0:
|
||||
.long 1191181824
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
.align 16
|
||||
.LC1:
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.align 16
|
||||
.LC2:
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.align 16
|
||||
.LC3:
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
@@ -1,10 +1,8 @@
|
||||
#ifndef SINGLE_SOURCE_COMPILE
|
||||
|
||||
/*
|
||||
* fx_mixer.cpp - effect-mixer for LMMS
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -28,7 +26,7 @@
|
||||
#include <QtXml/QDomElement>
|
||||
|
||||
#include "fx_mixer.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
#include "effect.h"
|
||||
#include "song.h"
|
||||
|
||||
@@ -39,7 +37,7 @@ fxChannel::fxChannel( model * _parent ) :
|
||||
m_stillRunning( false ),
|
||||
m_peakLeft( 0.0f ),
|
||||
m_peakRight( 0.0f ),
|
||||
m_buffer( alignedAllocFrames( engine::getMixer()->framesPerPeriod() ) ),
|
||||
m_buffer( CPU::allocFrames( engine::getMixer()->framesPerPeriod() ) ),
|
||||
m_muteModel( false, _parent ),
|
||||
m_volumeModel( 1.0, 0.0, 2.0, 0.01, _parent ),
|
||||
m_name(),
|
||||
@@ -54,7 +52,7 @@ fxChannel::fxChannel( model * _parent ) :
|
||||
|
||||
fxChannel::~fxChannel()
|
||||
{
|
||||
alignedFreeFrames( m_buffer );
|
||||
CPU::freeFrames( m_buffer );
|
||||
}
|
||||
|
||||
|
||||
@@ -93,7 +91,8 @@ void fxMixer::mixToChannel( const sampleFrame * _buf, fx_ch_t _ch )
|
||||
if( m_fxChannels[_ch]->m_muteModel.value() == false )
|
||||
{
|
||||
m_fxChannels[_ch]->m_lock.lock();
|
||||
alignedBufMix( m_fxChannels[_ch]->m_buffer, _buf, engine::getMixer()->framesPerPeriod() );
|
||||
CPU::bufMix( m_fxChannels[_ch]->m_buffer, _buf,
|
||||
engine::getMixer()->framesPerPeriod() );
|
||||
m_fxChannels[_ch]->m_used = true;
|
||||
m_fxChannels[_ch]->m_lock.unlock();
|
||||
}
|
||||
@@ -248,4 +247,3 @@ void fxMixer::loadSettings( const QDomElement & _this )
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#include "main_window.h"
|
||||
#include "project_renderer.h"
|
||||
#include "song.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
// TODO Make a factory class for this (or hide it behind engine)
|
||||
#include "lmms_style.h"
|
||||
@@ -96,8 +96,8 @@ int main( int argc, char * * argv )
|
||||
// intialize RNG
|
||||
srand( getpid() + time( 0 ) );
|
||||
|
||||
// init CPU specific optimized basic ops
|
||||
initBasicOps();
|
||||
// init CPU specific optimized operations
|
||||
CPU::init();
|
||||
|
||||
bool core_only = false;
|
||||
bool fullscreen = true;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* mixer.cpp - audio-device-independent mixer for LMMS
|
||||
*
|
||||
* Copyright (c) 2004-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@@ -22,7 +22,6 @@
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "mixer.h"
|
||||
@@ -41,7 +40,7 @@
|
||||
#include "sample_play_handle.h"
|
||||
#include "piano_roll.h"
|
||||
#include "micro_timer.h"
|
||||
#include "basic_ops.h"
|
||||
#include "Cpu.h"
|
||||
|
||||
#include "audio_device.h"
|
||||
#include "midi_client.h"
|
||||
@@ -125,7 +124,7 @@ public:
|
||||
|
||||
MixerWorkerThread( int _worker_num, mixer * _mixer ) :
|
||||
QThread( _mixer ),
|
||||
m_workingBuf( alignedAllocFrames( _mixer->framesPerPeriod() ) ),
|
||||
m_workingBuf( CPU::allocFrames( _mixer->framesPerPeriod() ) ),
|
||||
m_workerNum( _worker_num ),
|
||||
m_quit( false ),
|
||||
m_mixer( _mixer ),
|
||||
@@ -135,7 +134,7 @@ public:
|
||||
|
||||
virtual ~MixerWorkerThread()
|
||||
{
|
||||
alignedFreeFrames( m_workingBuf );
|
||||
CPU::freeFrames( m_workingBuf );
|
||||
}
|
||||
|
||||
virtual void quit( void )
|
||||
@@ -295,7 +294,7 @@ mixer::mixer( void ) :
|
||||
{
|
||||
m_inputBufferFrames[i] = 0;
|
||||
m_inputBufferSize[i] = DEFAULT_BUFFER_SIZE * 100;
|
||||
m_inputBuffer[i] = alignedAllocFrames(
|
||||
m_inputBuffer[i] = CPU::allocFrames(
|
||||
DEFAULT_BUFFER_SIZE * 100 );
|
||||
clearAudioBuffer( m_inputBuffer[i], m_inputBufferSize[i] );
|
||||
}
|
||||
@@ -337,10 +336,10 @@ mixer::mixer( void ) :
|
||||
m_fifo = new fifo( 1 );
|
||||
}
|
||||
|
||||
m_workingBuf = alignedAllocFrames( m_framesPerPeriod );
|
||||
m_workingBuf = CPU::allocFrames( m_framesPerPeriod );
|
||||
for( Uint8 i = 0; i < 3; i++ )
|
||||
{
|
||||
m_readBuf = alignedAllocFrames( m_framesPerPeriod );
|
||||
m_readBuf = CPU::allocFrames( m_framesPerPeriod );
|
||||
clearAudioBuffer( m_readBuf, m_framesPerPeriod );
|
||||
m_bufferPool.push_back( m_readBuf );
|
||||
}
|
||||
@@ -389,10 +388,10 @@ mixer::~mixer()
|
||||
|
||||
for( Uint8 i = 0; i < 3; i++ )
|
||||
{
|
||||
alignedFreeFrames( m_bufferPool[i] );
|
||||
CPU::freeFrames( m_bufferPool[i] );
|
||||
}
|
||||
|
||||
alignedFreeFrames( m_workingBuf );
|
||||
CPU::freeFrames( m_workingBuf );
|
||||
}
|
||||
|
||||
|
||||
@@ -504,9 +503,9 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
|
||||
if( frames + _frames > size )
|
||||
{
|
||||
size = qMax( size * 2, frames + _frames );
|
||||
sampleFrame * ab = alignedAllocFrames( size );
|
||||
alignedMemCpy( ab, buf, frames * sizeof( sampleFrame ) );
|
||||
alignedFreeFrames( buf );
|
||||
sampleFrame * ab = CPU::allocFrames( size );
|
||||
CPU::memCpy( ab, buf, frames * sizeof( sampleFrame ) );
|
||||
CPU::freeFrames( buf );
|
||||
|
||||
m_inputBufferSize[ m_inputBufferWrite ] = size;
|
||||
m_inputBuffer[ m_inputBufferWrite ] = ab;
|
||||
@@ -514,7 +513,7 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
|
||||
buf = ab;
|
||||
}
|
||||
|
||||
alignedMemCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
|
||||
CPU::memCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
|
||||
m_inputBufferFrames[ m_inputBufferWrite ] += _frames;
|
||||
|
||||
unlockInputFrames();
|
||||
@@ -686,7 +685,7 @@ void mixer::bufferToPort( const sampleFrame * _buf,
|
||||
const int loop1_frame = qMin<int>( end_frame, m_framesPerPeriod );
|
||||
|
||||
_port->lockFirstBuffer();
|
||||
unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame,
|
||||
CPU::unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame,
|
||||
_buf, _vv.vol[0], _vv.vol[1],
|
||||
loop1_frame - start_frame );
|
||||
_port->unlockFirstBuffer();
|
||||
@@ -697,7 +696,7 @@ void mixer::bufferToPort( const sampleFrame * _buf,
|
||||
const int frames_done = m_framesPerPeriod - start_frame;
|
||||
end_frame -= m_framesPerPeriod;
|
||||
end_frame = qMin<int>( end_frame, m_framesPerPeriod );
|
||||
unalignedBufMixLRCoeff( _port->secondBuffer(),
|
||||
CPU::unalignedBufMixLRCoeff( _port->secondBuffer(),
|
||||
_buf+frames_done,
|
||||
_vv.vol[0], _vv.vol[1],
|
||||
end_frame );
|
||||
@@ -720,7 +719,7 @@ void mixer::clearAudioBuffer( sampleFrame * _ab, const f_cnt_t _frames,
|
||||
{
|
||||
if( likely( (size_t)( _ab+_offset ) % 16 == 0 && _frames % 8 == 0 ) )
|
||||
{
|
||||
alignedMemClear( _ab+_offset, sizeof( *_ab ) * _frames );
|
||||
CPU::memClear( _ab+_offset, sizeof( *_ab ) * _frames );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1157,9 +1156,9 @@ void mixer::fifoWriter::run( void )
|
||||
const fpp_t frames = m_mixer->framesPerPeriod();
|
||||
while( m_writing )
|
||||
{
|
||||
sampleFrameA * buffer = alignedAllocFrames( frames );
|
||||
sampleFrameA * buffer = CPU::allocFrames( frames );
|
||||
const sampleFrameA * b = m_mixer->renderNextBuffer();
|
||||
alignedMemCpy( buffer, b, frames * sizeof( sampleFrameA ) );
|
||||
CPU::memCpy( buffer, b, frames * sizeof( sampleFrameA ) );
|
||||
m_fifo->write( buffer );
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user