From 3387f3d08449ae3d24b0e5bd41c8da2644960b51 Mon Sep 17 00:00:00 2001 From: Erik Hofman Date: Mon, 30 Jan 2017 16:00:29 +0100 Subject: [PATCH] Switch to c++11 alignas instead of our own hacks --- simgear/math/simd.hxx | 134 ++++++++++------------------------ simgear/math/simd4x4.hxx | 47 +++++------- simgear/math/simd4x4_neon.hxx | 12 +-- simgear/math/simd_neon.hxx | 33 +++------ 4 files changed, 76 insertions(+), 150 deletions(-) diff --git a/simgear/math/simd.hxx b/simgear/math/simd.hxx index 4ea04a4b..6adf62de 100644 --- a/simgear/math/simd.hxx +++ b/simgear/math/simd.hxx @@ -24,6 +24,14 @@ #include #include +#if defined(_MSC_VER) +# include +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) +# include +#elif defined(__GNUC__) && defined(__ARM_NEON__) +# include +#endif + #include #include @@ -287,70 +295,34 @@ inline simd4_t operator*(simd4_t v, T f) { } -# ifdef __MMX__ -# include -# if defined(_MSC_VER) -# define ALIGN16 __declspec(align(16)) -# define ALIGN32 __declspec(align(32)) -# define ALIGN16C -# define ALIGN32C -# elif defined(__GNUC__) -# define ALIGN16 -# define ALIGN32 -# define ALIGN16C __attribute__((aligned(16))) -# define ALIGN32C __attribute__((aligned(32))) -# endif - +# ifdef __SSE__ namespace simd4 { -static ALIGN16 const uint32_t m2a32[] ALIGN16C = { +static const uint32_t m2a32[] alignas(16) = { 0xffffffff,0xffffffff,0,0 }; -static ALIGN16 const uint32_t m3a32[] ALIGN16C = { +static const uint32_t m3a32[] alignas(16) = { 0xffffffff,0xffffffff,0xffffffff,0 }; -static ALIGN32 const uint64_t m2a64[] ALIGN32C = { +static const uint64_t m2a64[] alignas(32) = { 0xffffffffffffffff,0xffffffffffffffff,0,0 }; -static ALIGN32 const uint64_t m3a64[] ALIGN32C = { +static const uint64_t m3a64[] alignas(32) = { 0xffffffffffffffff,0xffffffffffffffff,0xffffffffffffffff,0 }; }; /* namespace simd4 */ -# endif -# ifdef __SSE__ -# include -# ifdef __SSE3__ -# include -# endif - -ALIGN16 -class simd_aligned16 -{ -public: - simd_aligned16() {} - ~simd_aligned16() {} - - static void *operator new (size_t size) throw (std::bad_alloc) { - void *p = _mm_malloc(size, 16); - if (!p) throw std::bad_alloc(); - return p; - } - static void operator delete (void *p) { - _mm_free(p); - } -} ALIGN16C; template -class simd4_t : public simd_aligned16 +class alignas(16) simd4_t { private: typedef float __vec4f_t[N]; - union ALIGN16 { + union alignas(16) { __m128 simd4; - __vec4f_t vec; - } ALIGN16C; + alignas(16) __vec4f_t vec; + }; public: simd4_t(void) {} @@ -393,8 +365,8 @@ public: } template - inline simd4_t& operator=(const simd4_t v) { - simd4 = v.v4(); + inline simd4_t& operator=(const simd4_t& v) { + simd4 = simd4_t(v).v4(); return *this; } inline simd4_t& operator=(const __m128& v) { @@ -572,35 +544,16 @@ inline simd4_tabs(simd4_t v) { # ifdef __AVX__ -# include - -ALIGN32 -class simd_aligned32 -{ -public: - simd_aligned32() {} - ~simd_aligned32() {} - - static void *operator new (size_t size) throw (std::bad_alloc) { - void *p = _mm_malloc(size, 32); - if (!p) throw std::bad_alloc(); - return p; - } - static void operator delete (void *p) { - _mm_free(p); - } -} ALIGN32C; - template -class simd4_t : public simd_aligned32 +class alignas(32) simd4_t { private: typedef double __vec4d_t[N]; - union ALIGN32 { + union alignas(32) { __m256d simd4; - __vec4d_t vec; - } ALIGN32C; + alignas(32) __vec4d_t vec; + }; public: simd4_t(void) {} @@ -643,8 +596,8 @@ public: } template - inline simd4_t& operator=(const simd4_t v) { - simd4 = v.v4(); + inline simd4_t& operator=(const simd4_t& v) { + simd4 = simd4_t(v).v4(); return *this; } inline simd4_t& operator=(const __m256d& v) { @@ -777,7 +730,6 @@ inline double dot(simd4_t v1, const simd4_t& v2) { } # ifdef __AVX2__ -# include template<> inline simd4_t cross(const simd4_t& v1, const simd4_t& v2) { @@ -815,18 +767,17 @@ inline simd4_tabs(simd4_t v) { } /* namespace simd4 */ # elif defined __SSE2__ -# include template -class simd4_t : public simd_aligned16 +class alignas(16) simd4_t { private: typedef double __vec4d_t[N]; - union ALIGN16 { + union alignas(16) { __m128d simd4[2]; - __vec4d_t vec; - } ALIGN16C; + alignas(16) __vec4d_t vec; + }; public: simd4_t(void) {} @@ -846,7 +797,7 @@ public: simd4[1] = v[1]; } - inline const __m128d (&v4(void) const)[2] { + inline const __m128d (&v4(void) const)[2] { return simd4; } inline __m128d (&v4(void))[2] { @@ -871,9 +822,10 @@ public: } template - inline simd4_t& operator=(const simd4_t v) { - simd4[0] = v.v4()[0]; - simd4[1] = v.v4()[1]; + inline simd4_t& operator=(const simd4_t& v) { + simd4_t n(v); + simd4[0] = n.v4()[0]; + simd4[1] = n.v4()[1]; return *this; } inline simd4_t& operator=(const __m128d v[2]) { @@ -1032,7 +984,6 @@ inline double dot(simd4_t v1, const simd4_t& v2) { return hsum_pd_sse(v1.v4()); } -#if 1 template<> inline simd4_t cross(const simd4_t& v1, const simd4_t& v2) { @@ -1053,7 +1004,6 @@ inline simd4_t cross(const simd4_t& v1, const simd4_t inline simd4_t min(simd4_t v1, const simd4_t& v2) { @@ -1083,21 +1033,17 @@ inline simd4_tabs(simd4_t v) { # ifdef __SSE2__ -# include -# ifdef __SSE4_1__ -# include -# endif template -class simd4_t : public simd_aligned16 +class alignas(16) simd4_t { private: typedef int __vec4i_t[N]; - union ALIGN16 { + union alignas(16) { __m128i simd4; - __vec4i_t vec; - } ALIGN16C; + alignas(16) __vec4i_t vec; + }; public: simd4_t(void) {} @@ -1142,8 +1088,8 @@ public: } template - inline simd4_t& operator=(const simd4_t v) { - simd4 = v.v4(); + inline simd4_t& operator=(const simd4_t& v) { + simd4 = simd4_t(v).v4(); return *this; } inline simd4_t& operator=(const __m128& v) { diff --git a/simgear/math/simd4x4.hxx b/simgear/math/simd4x4.hxx index b24a20e8..a6414d12 100644 --- a/simgear/math/simd4x4.hxx +++ b/simgear/math/simd4x4.hxx @@ -282,19 +282,17 @@ inline simd4x4_t operator*(const simd4x4_t& m1, const simd4x4_t& # ifdef __SSE__ -# include - template<> -class simd4x4_t : public simd_aligned16 +class alignas(16) simd4x4_t { private: typedef float __mtx4f_t[4][4]; - union ALIGN16 { + union alignas(16) { __m128 simd4x4[4]; - __mtx4f_t mtx; - float array[4*4]; - } ALIGN16C; + alignas(16) __mtx4f_t mtx; + alignas(16) float array[4*4]; + }; public: simd4x4_t(void) {} @@ -487,19 +485,17 @@ inline simd4_t transform(const simd4x4_t& m, const simd # ifdef __AVX__ -# include - template<> -class simd4x4_t : public simd_aligned32 +class alignas(32) simd4x4_t { private: typedef double __mtx4d_t[4][4]; - union ALIGN32 { + union alignas(32) { __m256d simd4x4[4]; - __mtx4d_t mtx; - double array[4*4]; - } ALIGN32C; + alignas(32) __mtx4d_t mtx; + alignas(32) double array[4*4]; + }; public: simd4x4_t(void) {} @@ -708,19 +704,18 @@ inline simd4_t transform(const simd4x4_t& m, const s } /* namespace simd4x4 */ # elif defined __SSE2__ -# include template<> -class simd4x4_t : public simd_aligned16 +class alignas(16) simd4x4_t { private: typedef double __mtx4d_t[4][4]; - union ALIGN16 { + union alignas(16) { __m128d simd4x4[4][2]; - __mtx4d_t mtx; - double array[4*4]; - } ALIGN16C; + alignas(16) __mtx4d_t mtx; + alignas(16) double array[4*4]; + }; public: simd4x4_t(void) {} @@ -975,19 +970,17 @@ inline simd4_t transform(const simd4x4_t& m, const s # ifdef __SSE2__ -# include - template<> -class simd4x4_t : public simd_aligned16 +class alignas(16) simd4x4_t { private: typedef int __mtx4i_t[4][4]; - union ALIGN16 { + union alignas(16) { __m128i simd4x4[4]; - __mtx4i_t mtx; - int array[4*4]; - } ALIGN16C; + alignas(16) __mtx4i_t mtx; + alignas(16) int array[4*4]; + }; public: simd4x4_t(void) {} diff --git a/simgear/math/simd4x4_neon.hxx b/simgear/math/simd4x4_neon.hxx index 983eac39..46e56312 100644 --- a/simgear/math/simd4x4_neon.hxx +++ b/simgear/math/simd4x4_neon.hxx @@ -27,11 +27,11 @@ class simd4x4_t private: typedef float __mtx4f_t[4][4]; - union ALIGN16 { + union alignas(16) { float32x4_t simd4x4[4]; __mtx4f_t mtx; float array[4*4]; - } ALIGN16C; + }g; public: simd4x4_t(void) {} @@ -254,11 +254,11 @@ class simd4x4_t private: typedef double __mtx4d_t[4][4]; - union ALIGN32 { + union alignas(32) { __m256d simd4x4[4]; __mtx4d_t mtx; double array[4*4]; - } ALIGN32C; + }; public: simd4x4_t(void) {} @@ -491,11 +491,11 @@ class simd4x4_t private: typedef int __mtx4i_t[4][4]; - union ALIGN16 { + union alignas(16) { int32x4_t simd4x4[4]; __mtx4i_t mtx; int array[4*4]; - } ALIGN16C; + }g; public: simd4x4_t(void) {} diff --git a/simgear/math/simd_neon.hxx b/simgear/math/simd_neon.hxx index 8d4b4063..d65f868a 100644 --- a/simgear/math/simd_neon.hxx +++ b/simgear/math/simd_neon.hxx @@ -19,22 +19,9 @@ #define __SIMD_NEON_H__ 1 #ifdef __ARM_NEON__ -# include -# if defined(_MSC_VER) -# define ALIGN16 __declspec(align(16)) -# define ALIGN32 __declspec(align(32)) -# define ALIGN16C -# define ALIGN32C -# elif defined(__GNUC__) -# define ALIGN16 -# define ALIGN32 -# define ALIGN16C __attribute__((aligned(16))) -# define ALIGN32C __attribute__((aligned(32))) -# endif - -static const uint32_t m2a32[] = { 0xffffffff,0xffffffff,0,0 }; -static const uint32_t m3a32[] = { 0xffffffff,0xffffffff,0xffffffff,0 }; +static const uint32_t m2a32[] alignas(16) = { 0xffffffff,0xffffffff,0,0 }; +static const uint32_t m3a32[] alignas(16) = { 0xffffffff,0xffffffff,0xffffffff,0 }; template class simd4_t @@ -42,11 +29,11 @@ class simd4_t private: typedef float __vec4f_t[N]; - union ALIGN16 { + union alignas(16) { float32x4_t simd4; float32x2x2_t simd2x2; __vec4f_t vec; - } ALIGN16C; + }; public: simd4_t(void) {} @@ -54,7 +41,7 @@ public: simd4_t(float x, float y) : simd4_t(x,y,0,0) {} simd4_t(float x, float y, float z) : simd4_t(x,y,z,0) {} simd4_t(float x, float y, float z, float w) { - ALIGN16 float ALIGN16C data[4] = { x, y, z, w }; + alignas(16) float data[4] = { x, y, z, w }; simd4 = vld1q_f32(data); } simd4_t(const __vec4f_t v) {} @@ -244,10 +231,10 @@ class simd4_t private: typedef double __vec4d_t[N]; - union ALIGN32 { + union alignas(32) { __m256d simd4; __vec4d_t vec; - } ALIGN32C; + }; public: simd4_t(void) {} @@ -417,10 +404,10 @@ class simd4_t private: typedef int __vec4i_t[N]; - union ALIGN16 { + union alignas(16) { int32x4_t simd4; __vec4i_t vec; - } ALIGN16C; + }; public: simd4_t(void) {} @@ -428,7 +415,7 @@ public: simd4_t(int x, int y) : simd4_t(x,y,0,0) {} simd4_t(int x, int y, int z) : simd4_t(x,y,z,0) {} simd4_t(int x, int y, int z, int w) { - ALIGN16 int32_t ALIGN16C data[4] = { x, y, z, w }; + alignas(16) int32_t data[4] = { x, y, z, w }; simd4 = vld1q_s32(data); } simd4_t(const __vec4i_t v) {}