Reintroduce fast math functions (#7495)
* Add fast fma functions * Use fast fma functions * Add fast pow function * Use fast pow function * Fix build * Remove fastFma * Avoid UB in fastPow On GCC with -O1 or -O2 optimizations, this new implementation generates identical assembly to the old union-based implementation
This commit is contained in:
@@ -69,13 +69,13 @@ inline float hermiteInterpolate( float x0, float x1, float x2, float x3,
|
||||
|
||||
inline float cubicInterpolate( float v0, float v1, float v2, float v3, float x )
|
||||
{
|
||||
float frsq = x*x;
|
||||
float frcu = frsq*v0;
|
||||
float t1 = std::fma(v1, 3, v3);
|
||||
float frsq = x * x;
|
||||
float frcu = frsq * v0;
|
||||
float t1 = v1 * 3.f + v3;
|
||||
|
||||
return (v1 + std::fma(0.5f, frcu, x) * (v2 - frcu * (1.0f / 6.0f) -
|
||||
std::fma(t1, (1.0f / 6.0f), -v0) * (1.0f / 3.0f)) + frsq * x * (t1 *
|
||||
(1.0f / 6.0f) - 0.5f * v2) + frsq * std::fma(0.5f, v2, -v1));
|
||||
return v1 + (0.5f * frcu + x) * (v2 - frcu * (1.0f / 6.0f) -
|
||||
(t1 * (1.0f / 6.0f) - v0) * (1.0f / 3.0f)) + frsq * x * (t1 *
|
||||
(1.0f / 6.0f) - 0.5f * v2) + frsq * (0.5f * v2 - v1);
|
||||
}
|
||||
|
||||
|
||||
@@ -83,13 +83,13 @@ inline float cubicInterpolate( float v0, float v1, float v2, float v3, float x )
|
||||
inline float cosinusInterpolate( float v0, float v1, float x )
|
||||
{
|
||||
const float f = ( 1.0f - cosf( x * F_PI ) ) * 0.5f;
|
||||
return std::fma(f, v1 - v0, v0);
|
||||
return f * (v1 - v0) + v0;
|
||||
}
|
||||
|
||||
|
||||
inline float linearInterpolate( float v0, float v1, float x )
|
||||
{
|
||||
return std::fma(x, v1 - v0, v0);
|
||||
return x * (v1 - v0) + v0;
|
||||
}
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ inline float optimalInterpolate( float v0, float v1, float x )
|
||||
const float c2 = even * -0.004541102062639801;
|
||||
const float c3 = odd * -1.57015627178718420;
|
||||
|
||||
return std::fma(std::fma(std::fma(c3, z, c2), z, c1), z, c0);
|
||||
return ((c3 * z + c2) * z + c1) * z + c0;
|
||||
}
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ inline float optimal4pInterpolate( float v0, float v1, float v2, float v3, float
|
||||
const float c2 = even1 * -0.246185007019907091 + even2 * 0.24614027139700284;
|
||||
const float c3 = odd1 * -0.36030925263849456 + odd2 * 0.10174985775982505;
|
||||
|
||||
return std::fma(std::fma(std::fma(c3, z, c2), z, c1), z, c0);
|
||||
return ((c3 * z + c2) * z + c1) * z + c0;
|
||||
}
|
||||
|
||||
|
||||
@@ -132,7 +132,7 @@ inline float lagrangeInterpolate( float v0, float v1, float v2, float v3, float
|
||||
const float c1 = v2 - v0 * ( 1.0f / 3.0f ) - v1 * 0.5f - v3 * ( 1.0f / 6.0f );
|
||||
const float c2 = 0.5f * (v0 + v2) - v1;
|
||||
const float c3 = ( 1.0f/6.0f ) * ( v3 - v0 ) + 0.5f * ( v1 - v2 );
|
||||
return std::fma(std::fma(std::fma(c3, x, c2), x, c1), x, c0);
|
||||
return ((c3 * x + c2) * x + c1) * x + c0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -27,12 +27,13 @@
|
||||
|
||||
#include <QtGlobal>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "lmms_constants.h"
|
||||
#include "lmmsconfig.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace lmms
|
||||
{
|
||||
@@ -96,6 +97,20 @@ static void roundAt(T& value, const T& where, const T& stepSize)
|
||||
}
|
||||
}
|
||||
|
||||
//! Source: http://martin.ankerl.com/2007/10/04/optimized-pow-approximation-for-java-and-c-c/
|
||||
inline double fastPow(double a, double b)
|
||||
{
|
||||
double d;
|
||||
std::int32_t x[2];
|
||||
|
||||
std::memcpy(x, &a, sizeof(x));
|
||||
x[1] = static_cast<std::int32_t>(b * (x[1] - 1072632447) + 1072632447);
|
||||
x[0] = 0;
|
||||
|
||||
std::memcpy(&d, x, sizeof(d));
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
//! returns 1.0f if val >= 0.0f, -1.0 else
|
||||
inline float sign(float val)
|
||||
|
||||
@@ -64,7 +64,7 @@ public:
|
||||
{
|
||||
for( fpp_t frame = 0; frame < frames; ++frame )
|
||||
{
|
||||
const double gain = 1 - std::pow((m_counter < m_length) ? m_counter / m_length : 1, m_env);
|
||||
const double gain = 1 - fastPow((m_counter < m_length) ? m_counter / m_length : 1, m_env);
|
||||
const sample_t s = ( Oscillator::sinSample( m_phase ) * ( 1 - m_noise ) ) + ( Oscillator::noiseSample( 0 ) * gain * gain * m_noise );
|
||||
buf[frame][0] = s * gain;
|
||||
buf[frame][1] = s * gain;
|
||||
@@ -80,7 +80,7 @@ public:
|
||||
m_FX.nextSample( buf[frame][0], buf[frame][1] );
|
||||
m_phase += m_freq / sampleRate;
|
||||
|
||||
const double change = (m_counter < m_length) ? ((m_startFreq - m_endFreq) * (1 - std::pow(m_counter / m_length, m_slope))) : 0;
|
||||
const double change = (m_counter < m_length) ? ((m_startFreq - m_endFreq) * (1 - fastPow(m_counter / m_length, m_slope))) : 0;
|
||||
m_freq = m_endFreq + change;
|
||||
++m_counter;
|
||||
}
|
||||
|
||||
@@ -858,7 +858,7 @@ inline sample_t MonstroSynth::calcSlope( int slope, sample_t s )
|
||||
{
|
||||
if( m_parent->m_slope[slope] == 1.0f ) return s;
|
||||
if( s == 0.0f ) return s;
|
||||
return std::pow(s, m_parent->m_slope[slope]);
|
||||
return fastPow(s, m_parent->m_slope[slope]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user