From ff7e4597e786706ba97c11c199e6c2bb02c12a75 Mon Sep 17 00:00:00 2001 From: Erik Hofman Date: Thu, 17 Nov 2016 11:48:16 +0100 Subject: [PATCH] Maximize simd optimization --- simgear/math/SGVec4.hxx | 7 +- simgear/math/simd.hxx | 653 ++++++++++++++++++++++++++++------------ 2 files changed, 458 insertions(+), 202 deletions(-) diff --git a/simgear/math/SGVec4.hxx b/simgear/math/SGVec4.hxx index 4339dfd9..c8faf20f 100644 --- a/simgear/math/SGVec4.hxx +++ b/simgear/math/SGVec4.hxx @@ -19,6 +19,7 @@ #define SGVec4_H #include +#include "simd.hxx" /// 4D Vector Class template @@ -92,10 +93,10 @@ public: /// Readonly raw storage interface const T (&data(void) const)[4] - { return _data; } + { return _data.ptr(); } /// Readonly raw storage interface T (&data(void))[4] - { return _data; } + { return _data.ptr(); } /// Inplace addition SGVec4& operator+=(const SGVec4& v) @@ -126,7 +127,7 @@ public: { return SGVec4(0, 0, 0, 1); } private: - T _data[4]; + simd4_t _data; }; /// Unary +, do nothing ... diff --git a/simgear/math/simd.hxx b/simgear/math/simd.hxx index d4dbdcd5..2d7b489b 100644 --- a/simgear/math/simd.hxx +++ b/simgear/math/simd.hxx @@ -18,6 +18,8 @@ #ifndef __SIMD_H__ #define __SIMD_H__ 1 +#include + # ifdef __SSE__ # include # endif @@ -50,12 +52,17 @@ public: { vec[0] = vec[1] = vec[2] = vec[3] = f; } - simd4_t(const __vec4_t& v) + simd4_t(const __vec4_t v) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; + std::memcpy(vec, v, sizeof(float[4])); + } + simd4_t(const simd4_t& v) + { +# ifdef __SSE__ + v4 = v.v4; +#else + std::memcpy(vec, v.vec, sizeof(float[4])); +#endif } inline float (&ptr(void))[4] { @@ -71,23 +78,38 @@ public: vec[0] = vec[1] = vec[2] = vec[3] = f; return *this; } - - inline simd4_t& operator=(const __vec4_t& v) + inline simd4_t& operator=(const __vec4_t v) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; + std::memcpy(vec, v, sizeof(float[4])); + return *this; + } + inline simd4_t& operator=(const simd4_t& v) + { +# ifdef __SSE__ + v4 = v.v4; +#else + std::memcpy(vec, v.vec, sizeof(float[4])); +#endif return *this; } - inline simd4_t& operator=(const simd4_t& v) + inline simd4_t operator+(float f) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; - return *this; + simd4_t r(*this); + r += f; + return r; + } + inline simd4_t operator+(const __vec4_t v) + { + simd4_t r(*this); + r += v; + return r; + } + inline simd4_t operator+(const simd4_t& v) + { + simd4_t r(*this); + r += v; + return v; } inline simd4_t operator-() @@ -96,36 +118,64 @@ public: r -= vec; return r; } - - inline simd4_t operator+(float f) - { - simd4_t r(vec); - r += f; - return r; - } - inline simd4_t operator-(float f) { - simd4_t r(vec); + simd4_t r(*this); r -= f; return r; } + inline simd4_t operator-(const __vec4_t v) + { + simd4_t r(*this); + r -= v; + return r; + } + inline simd4_t operator-(simd4_t& v) + { + simd4_t r(*this); + r -= v; + return r; + } inline simd4_t operator*(float f) { - simd4_t r(vec); + simd4_t r(*this); r *= f; return r; } + inline simd4_t operator*(const __vec4_t v) + { + simd4_t r(*this); + r *= v; + return r; + } + inline simd4_t operator*(simd4_t& v) + { + simd4_t r(*this); + r *= v; + return r; + } inline simd4_t operator/(float f) { - simd4_t r(vec); + simd4_t r(*this); r /= f; return r; } + inline simd4_t operator/(const __vec4_t v) + { + simd4_t r(*this); + r /= v; + return r; + } + inline simd4_t operator/(simd4_t& v) + { + simd4_t r(*this); + r /= v; + return r; + } - inline simd4_t& operator +=(float f) + inline simd4_t& operator+=(float f) { # ifdef __SSE__ v4 += f; @@ -137,8 +187,26 @@ public: # endif return *this; } + inline simd4_t& operator+=(const __vec4_t v) + { + simd4_t r(v); + *this += r; + return *this; + } + inline simd4_t& operator+=(const simd4_t& v) + { +# ifdef __SSE__ + v4 += v.v4; +# else + vec[0] += v[0]; + vec[1] += v[1]; + vec[2] += v[2]; + vec[3] += v[3]; +#endif + return *this; + } - inline simd4_t& operator -=(float f) + inline simd4_t& operator-=(float f) { # ifdef __SSE__ v4 -= f; @@ -150,6 +218,24 @@ public: # endif return *this; } + inline simd4_t& operator-=(const __vec4_t v) + { + simd4_t r(v); + *this -= r; + return *this; + } + inline simd4_t& operator-=(const simd4_t& v) + { +# ifdef __SSE__ + v4 -= v.v4; +# else + vec[0] -= v[0]; + vec[1] -= v[1]; + vec[2] -= v[2]; + vec[3] -= v[3]; +#endif + return *this; + } inline simd4_t& operator *=(float f) { @@ -163,8 +249,26 @@ public: # endif return *this; } + inline simd4_t& operator*=(const __vec4_t v) + { + simd4_t r(v); + *this *= r; + return *this; + } + inline simd4_t& operator*=(const simd4_t& v) + { +# ifdef __SSE__ + v4 *= v.v4; +# else + vec[0] *= v[0]; + vec[1] *= v[1]; + vec[2] *= v[2]; + vec[3] *= v[3]; +#endif + return *this; + } - inline simd4_t& operator /=(float f) + inline simd4_t& operator/=(float f) { # ifdef __SSE__ v4 /= f; @@ -176,40 +280,22 @@ public: #endif return *this; } - - inline simd4_t& operator +=(__vec4_t v) + inline simd4_t& operator/=(const __vec4_t v) { - vec[0] += v[0]; - vec[1] += v[1]; - vec[2] += v[2]; - vec[3] += v[3]; + simd4_t r(v); + *this /= r; return *this; } - - inline simd4_t& operator -=(__vec4_t v) - { - vec[0] -= v[0]; - vec[1] -= v[1]; - vec[2] -= v[2]; - vec[3] -= v[3]; - return *this; - } - - inline simd4_t& operator *=(__vec4_t v) - { - vec[0] *= v[0]; - vec[1] *= v[1]; - vec[2] *= v[2]; - vec[3] *= v[3]; - return *this; - } - - inline simd4_t& operator /=(__vec4_t v) + inline simd4_t& operator/=(const simd4_t& v) { +# ifdef __SSE__ + v4 /= v.v4; +# else vec[0] /= v[0]; vec[1] /= v[1]; vec[2] /= v[2]; vec[3] /= v[3]; +#endif return *this; } @@ -222,6 +308,8 @@ public: } }; + + template<> class simd4_t { @@ -239,16 +327,21 @@ private: public: simd4_t() {} - simd4_t(double f) + simd4_t(double d) { - vec[0] = vec[1] = vec[2] = vec[3] = f; + vec[0] = vec[1] = vec[2] = vec[3] = d; } - simd4_t(const __vec4_t& v) + simd4_t(const __vec4_t v) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; + std::memcpy(vec, v, sizeof(double[4])); + } + simd4_t(const simd4_t& v) + { +# ifdef __SSE__ + v4 = v.v4; +#else + std::memcpy(vec, v.vec, sizeof(double[4])); +#endif } inline double (&ptr(void))[4] { @@ -259,159 +352,237 @@ public: return vec; } - inline simd4_t& operator=(double f) + inline simd4_t& operator=(double d) { - vec[0] = vec[1] = vec[2] = vec[3] = f; + vec[0] = vec[1] = vec[2] = vec[3] = d; return *this; } - - inline simd4_t& operator=(const __vec4_t& v) + inline simd4_t& operator=(const __vec4_t v) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; + std::memcpy(vec, v, sizeof(double[4])); return *this; } - inline simd4_t& operator=(const simd4_t& v) { - simd4_t(r); - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; +# ifdef __SSE__ + v4 = v.v4; +#else + std::memcpy(vec, v.vec, sizeof(double[4])); +#endif return *this; } + inline simd4_t operator+(double d) + { + simd4_t r(*this); + r += d; + return r; + } + inline simd4_t operator+(const __vec4_t v) + { + simd4_t r(*this); + r += v; + return r; + } + inline simd4_t operator+(const simd4_t& v) + { + simd4_t r(*this); + r += v; + return v; + } + inline simd4_t operator-() { simd4_t r(0.0f); r -= vec; return r; } - - inline simd4_t operator+(double f) + inline simd4_t operator-(double d) { - simd4_t r(vec); - r += f; + simd4_t r(*this); + r -= d; + return r; + } + inline simd4_t operator-(const __vec4_t v) + { + simd4_t r(*this); + r -= v; + return r; + } + inline simd4_t operator-(simd4_t& v) + { + simd4_t r(*this); + r -= v; return r; } - inline simd4_t operator-(double f) + inline simd4_t operator*(double d) { - simd4_t r(vec); - r -= f; + simd4_t r(*this); + r *= d; + return r; + } + inline simd4_t operator*(const __vec4_t v) + { + simd4_t r(*this); + r *= v; + return r; + } + inline simd4_t operator*(simd4_t& v) + { + simd4_t r(*this); + r *= v; return r; } - inline simd4_t operator*(double f) + inline simd4_t operator/(double d) { - simd4_t r(vec); - r *= f; + simd4_t r(*this); + r /= d; + return r; + } + inline simd4_t operator/(const __vec4_t v) + { + simd4_t r(*this); + r /= v; + return r; + } + inline simd4_t operator/(simd4_t& v) + { + simd4_t r(*this); + r /= v; return r; } - inline simd4_t operator/(double f) - { - simd4_t r(vec); - r /= f; - return r; - } - - inline simd4_t& operator +=(double f) + inline simd4_t& operator+=(double d) { # ifdef __SSE__ - v4 += f; + v4 += d; # else - vec[0] += f; - vec[1] += f; - vec[2] += f; - vec[3] += f; + vec[0] += d; + vec[1] += d; + vec[2] += d; + vec[3] += d; # endif return *this; } - - inline simd4_t& operator -=(double f) + inline simd4_t& operator+=(const __vec4_t v) { -# ifdef __SSE__ - v4 -= f; -# else - vec[0] -= f; - vec[1] -= f; - vec[2] -= f; - vec[3] -= f; -# endif + simd4_t r(v); + *this += r; return *this; } - - inline simd4_t& operator *=(double f) + inline simd4_t& operator+=(const simd4_t& v) { # ifdef __SSE__ - v4 *= f; + v4 += v.v4; # else - vec[0] *= f; - vec[1] *= f; - vec[2] *= f; - vec[3] *= f; -# endif - return *this; - } - - inline simd4_t& operator /=(double f) - { -# ifdef __SSE__ - v4 /= f; -# else - vec[0] /= f; - vec[1] /= f; - vec[2] /= f; - vec[3] /= f; -#endif - return *this; - } - - inline simd4_t& operator +=(__vec4_t v) - { vec[0] += v[0]; vec[1] += v[1]; vec[2] += v[2]; vec[3] += v[3]; +#endif return *this; } - inline simd4_t& operator -=(__vec4_t v) + inline simd4_t& operator-=(double d) { +# ifdef __SSE__ + v4 -= d; +# else + vec[0] -= d; + vec[1] -= d; + vec[2] -= d; + vec[3] -= d; +# endif + return *this; + } + inline simd4_t& operator-=(const __vec4_t v) + { + simd4_t r(v); + *this -= r; + return *this; + } + inline simd4_t& operator-=(const simd4_t& v) + { +# ifdef __SSE__ + v4 -= v.v4; +# else vec[0] -= v[0]; vec[1] -= v[1]; vec[2] -= v[2]; vec[3] -= v[3]; +#endif return *this; } - inline simd4_t& operator *=(__vec4_t v) + inline simd4_t& operator *=(double d) { +# ifdef __SSE__ + v4 *= d; +# else + vec[0] *= d; + vec[1] *= d; + vec[2] *= d; + vec[3] *= d; +# endif + return *this; + } + inline simd4_t& operator*=(const __vec4_t v) + { + simd4_t r(v); + *this *= r; + return *this; + } + inline simd4_t& operator*=(const simd4_t& v) + { +# ifdef __SSE__ + v4 *= v.v4; +# else vec[0] *= v[0]; vec[1] *= v[1]; vec[2] *= v[2]; vec[3] *= v[3]; +#endif return *this; } - inline simd4_t& operator /=(__vec4_t v) + inline simd4_t& operator/=(double d) { +# ifdef __SSE__ + v4 /= d; +# else + vec[0] /= d; + vec[1] /= d; + vec[2] /= d; + vec[3] /= d; +#endif + return *this; + } + inline simd4_t& operator/=(const __vec4_t v) + { + simd4_t r(v); + *this /= r; + return *this; + } + inline simd4_t& operator/=(const simd4_t& v) + { +# ifdef __SSE__ + v4 /= v.v4; +# else vec[0] /= v[0]; vec[1] /= v[1]; vec[2] /= v[2]; vec[3] /= v[3]; +#endif return *this; } - inline operator const double*(void) const { + inline operator const double*() const { return vec; } - inline operator double*(void) { + inline operator double*() { return vec; } }; @@ -437,12 +608,17 @@ public: { vec[0] = vec[1] = vec[2] = vec[3] = i; } - simd4_t(const __vec4_t& v) + simd4_t(const __vec4_t v) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; + std::memcpy(vec, v, sizeof(int[4])); + } + simd4_t(const simd4_t& v) + { +# ifdef __SSE__ + v4 = v.v4; +#else + std::memcpy(vec, v.vec, sizeof(int[4])); +#endif } inline int (&ptr(void))[4] { @@ -458,23 +634,38 @@ public: vec[0] = vec[1] = vec[2] = vec[3] = i; return *this; } - - inline simd4_t& operator=(const __vec4_t& v) + inline simd4_t& operator=(const __vec4_t v) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; + std::memcpy(vec, v, sizeof(int[4])); + return *this; + } + inline simd4_t& operator=(const simd4_t& v) + { +# ifdef __SSE__ + v4 = v.v4; +#else + std::memcpy(vec, v.vec, sizeof(int[4])); +#endif return *this; } - inline simd4_t& operator=(const simd4_t& v) + inline simd4_t operator+(int i) { - vec[0] = v[0]; - vec[1] = v[1]; - vec[2] = v[2]; - vec[3] = v[3]; - return *this; + simd4_t r(*this); + r += i; + return r; + } + inline simd4_t operator+(const __vec4_t v) + { + simd4_t r(*this); + r += v; + return r; + } + inline simd4_t operator+(const simd4_t& v) + { + simd4_t r(*this); + r += v; + return v; } inline simd4_t operator-() @@ -483,36 +674,64 @@ public: r -= vec; return r; } - - inline simd4_t operator+(int i) - { - simd4_t r(vec); - r += i; - return r; - } - inline simd4_t operator-(int i) { - simd4_t r(vec); + simd4_t r(*this); r -= i; return r; } + inline simd4_t operator-(const __vec4_t v) + { + simd4_t r(*this); + r -= v; + return r; + } + inline simd4_t operator-(simd4_t& v) + { + simd4_t r(*this); + r -= v; + return r; + } inline simd4_t operator*(int i) { - simd4_t r(vec); + simd4_t r(*this); r *= i; return r; } + inline simd4_t operator*(const __vec4_t v) + { + simd4_t r(*this); + r *= v; + return r; + } + inline simd4_t operator*(simd4_t& v) + { + simd4_t r(*this); + r *= v; + return r; + } inline simd4_t operator/(int i) { - simd4_t r(vec); + simd4_t r(*this); r /= i; return r; } + inline simd4_t operator/(const __vec4_t v) + { + simd4_t r(*this); + r /= v; + return r; + } + inline simd4_t operator/(simd4_t& v) + { + simd4_t r(*this); + r /= v; + return r; + } - inline simd4_t& operator +=(int i) + inline simd4_t& operator+=(int i) { # ifdef __SSE__ v4 += i; @@ -524,8 +743,26 @@ public: # endif return *this; } + inline simd4_t& operator+=(const __vec4_t v) + { + simd4_t r(v); + *this += r; + return *this; + } + inline simd4_t& operator+=(const simd4_t& v) + { +# ifdef __SSE__ + v4 += v.v4; +# else + vec[0] += v[0]; + vec[1] += v[1]; + vec[2] += v[2]; + vec[3] += v[3]; +#endif + return *this; + } - inline simd4_t& operator -=(int i) + inline simd4_t& operator-=(int i) { # ifdef __SSE__ v4 -= i; @@ -537,6 +774,24 @@ public: # endif return *this; } + inline simd4_t& operator-=(const __vec4_t v) + { + simd4_t r(v); + *this -= r; + return *this; + } + inline simd4_t& operator-=(const simd4_t& v) + { +# ifdef __SSE__ + v4 -= v.v4; +# else + vec[0] -= v[0]; + vec[1] -= v[1]; + vec[2] -= v[2]; + vec[3] -= v[3]; +#endif + return *this; + } inline simd4_t& operator *=(int i) { @@ -550,8 +805,26 @@ public: # endif return *this; } + inline simd4_t& operator*=(const __vec4_t v) + { + simd4_t r(v); + *this *= r; + return *this; + } + inline simd4_t& operator*=(const simd4_t& v) + { +# ifdef __SSE__ + v4 *= v.v4; +# else + vec[0] *= v[0]; + vec[1] *= v[1]; + vec[2] *= v[2]; + vec[3] *= v[3]; +#endif + return *this; + } - inline simd4_t& operator /=(int i) + inline simd4_t& operator/=(int i) { # ifdef __SSE__ v4 /= i; @@ -563,40 +836,22 @@ public: #endif return *this; } - - inline simd4_t& operator +=(__vec4_t v) + inline simd4_t& operator/=(const __vec4_t v) { - vec[0] += v[0]; - vec[1] += v[1]; - vec[2] += v[2]; - vec[3] += v[3]; + simd4_t r(v); + *this /= r; return *this; } - - inline simd4_t& operator -=(__vec4_t v) - { - vec[0] -= v[0]; - vec[1] -= v[1]; - vec[2] -= v[2]; - vec[3] -= v[3]; - return *this; - } - - inline simd4_t& operator *=(__vec4_t v) - { - vec[0] *= v[0]; - vec[1] *= v[1]; - vec[2] *= v[2]; - vec[3] *= v[3]; - return *this; - } - - inline simd4_t& operator /=(__vec4_t v) + inline simd4_t& operator/=(const simd4_t& v) { +# ifdef __SSE__ + v4 /= v.v4; +# else vec[0] /= v[0]; vec[1] /= v[1]; vec[2] /= v[2]; vec[3] /= v[3]; +#endif return *this; }