Switch to c++11 alignas instead of our own hacks

2017-01-30 16:00:29 +01:00
parent bd421c381c
commit 3387f3d084
4 changed files with 76 additions and 150 deletions
--- a/simgear/math/simd.hxx
+++ b/simgear/math/simd.hxx
@@ -24,6 +24,14 @@
 #include <cmath>
 #include <new>

+#if defined(_MSC_VER)
+# include <intrin.h>
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+# include <x86intrin.h>
+#elif defined(__GNUC__) && defined(__ARM_NEON__)
+# include <arm_neon.h>
+#endif
+
 #include <simgear/math/SGLimits.hxx>
 #include <simgear/math/SGMisc.hxx>

@@ -287,70 +295,34 @@ inline simd4_t<T,N> operator*(simd4_t<T,N> v, T f) {
 }


-# ifdef __MMX__
-#  include <mmintrin.h>
-# if defined(_MSC_VER)
-#  define ALIGN16  __declspec(align(16))
-#  define ALIGN32  __declspec(align(32))
-#  define ALIGN16C
-#  define ALIGN32C
-# elif defined(__GNUC__)
-#  define ALIGN16
-#  define ALIGN32
-#  define ALIGN16C __attribute__((aligned(16)))
-#  define ALIGN32C __attribute__((aligned(32)))
-# endif
-
+# ifdef __SSE__
 namespace simd4
 {
-static ALIGN16 const uint32_t m2a32[] ALIGN16C = {
+static const uint32_t m2a32[] alignas(16) = {
    0xffffffff,0xffffffff,0,0
 };
-static ALIGN16 const uint32_t m3a32[] ALIGN16C = {
+static const uint32_t m3a32[] alignas(16) = {
    0xffffffff,0xffffffff,0xffffffff,0
 };
-static ALIGN32 const uint64_t m2a64[] ALIGN32C = {
+static const uint64_t m2a64[] alignas(32) = {
    0xffffffffffffffff,0xffffffffffffffff,0,0
 };
-static ALIGN32 const uint64_t m3a64[] ALIGN32C = {
+static const uint64_t m3a64[] alignas(32) = {
    0xffffffffffffffff,0xffffffffffffffff,0xffffffffffffffff,0
 };
 }; /* namespace simd4 */
-# endif

-# ifdef __SSE__
-#  include <xmmintrin.h>
-# ifdef __SSE3__
-#  include <pmmintrin.h>
-# endif
-
-ALIGN16
-class simd_aligned16
-{
-public:
-    simd_aligned16() {}
-    ~simd_aligned16() {}
-
-    static void *operator new (size_t size) throw (std::bad_alloc) {
-        void *p = _mm_malloc(size, 16);
-        if (!p) throw std::bad_alloc();
-        return p;
-    }
-    static void operator delete (void *p) {
-        _mm_free(p);
-    }
-} ALIGN16C;

 template<int N>
-class simd4_t<float,N> : public simd_aligned16
+class alignas(16) simd4_t<float,N>
 {
 private:
   typedef float  __vec4f_t[N];

-    union ALIGN16 {
+    union alignas(16) {
        __m128 simd4;
-        __vec4f_t vec;
-    } ALIGN16C;
+        alignas(16) __vec4f_t vec;
+    };

 public:
    simd4_t(void) {}
@@ -393,8 +365,8 @@ public:
    }

    template<int M>
-    inline simd4_t<float,N>& operator=(const simd4_t<float,M> v) {
-        simd4 = v.v4();
+    inline simd4_t<float,N>& operator=(const simd4_t<float,M>& v) {
+        simd4 = simd4_t<float,N>(v).v4();
        return *this;
    }
    inline simd4_t<float,N>& operator=(const __m128& v) {
@@ -572,35 +544,16 @@ inline simd4_t<float,N>abs(simd4_t<float,N> v) {


 # ifdef __AVX__
-#  include <immintrin.h>
-
-ALIGN32
-class simd_aligned32
-{
-public:
-    simd_aligned32() {}
-    ~simd_aligned32() {}
-
-    static void *operator new (size_t size) throw (std::bad_alloc) {
-        void *p = _mm_malloc(size, 32);
-        if (!p) throw std::bad_alloc();
-        return p;
-    }
-    static void operator delete (void *p) {
-        _mm_free(p);
-    }
-} ALIGN32C;
-
 template<int N>
-class simd4_t<double,N> : public simd_aligned32
+class alignas(32) simd4_t<double,N>
 {
 private:
   typedef double  __vec4d_t[N];

-    union ALIGN32 {
+    union alignas(32) {
        __m256d simd4;
-        __vec4d_t vec;
-    } ALIGN32C;
+        alignas(32) __vec4d_t vec;
+    };

 public:
    simd4_t(void) {}
@@ -643,8 +596,8 @@ public:
    }

    template<int M>
-    inline simd4_t<double,N>& operator=(const simd4_t<double,M> v) {
-        simd4 = v.v4();
+    inline simd4_t<double,N>& operator=(const simd4_t<double,M>& v) {
+        simd4 = simd4_t<double,N>(v).v4();
        return *this;
    }
    inline simd4_t<double,N>& operator=(const __m256d& v) {
@@ -777,7 +730,6 @@ inline double dot(simd4_t<double,4> v1, const simd4_t<double,4>& v2) {
 }

 #  ifdef __AVX2__
-#   include <pmmintrin.h>
 template<>
 inline simd4_t<double,3> cross(const simd4_t<double,3>& v1, const simd4_t<double,3>& v2)
 {
@@ -815,18 +767,17 @@ inline simd4_t<double,N>abs(simd4_t<double,N> v) {
 } /* namespace simd4 */

 # elif defined __SSE2__
-#  include <emmintrin.h>

 template<int N>
-class simd4_t<double,N> : public simd_aligned16
+class alignas(16) simd4_t<double,N>
 {
 private:
   typedef double  __vec4d_t[N];

-    union ALIGN16 {
+    union alignas(16) {
        __m128d simd4[2];
-        __vec4d_t vec;
-    } ALIGN16C;
+        alignas(16) __vec4d_t vec;
+    };

 public:
    simd4_t(void) {}
@@ -846,7 +797,7 @@ public:
        simd4[1] = v[1];
    }

-        inline const __m128d (&v4(void) const)[2] {
+    inline const __m128d (&v4(void) const)[2] {
        return simd4;
    }
    inline __m128d (&v4(void))[2] {
@@ -871,9 +822,10 @@ public:
    }

    template<int M>
-    inline simd4_t<double,N>& operator=(const simd4_t<double,M> v) {
-        simd4[0] = v.v4()[0];
-        simd4[1] = v.v4()[1];
+    inline simd4_t<double,N>& operator=(const simd4_t<double,M>& v) {
+        simd4_t<double,N> n(v);
+        simd4[0] = n.v4()[0];
+        simd4[1] = n.v4()[1];
        return *this;
    }
    inline simd4_t<double,N>& operator=(const __m128d v[2]) {
@@ -1032,7 +984,6 @@ inline double dot(simd4_t<double,4> v1, const simd4_t<double,4>& v2) {
    return hsum_pd_sse(v1.v4());
 }

-#if 1
 template<>
 inline simd4_t<double,3> cross(const simd4_t<double,3>& v1, const simd4_t<double,3>& v2)
 {
@@ -1053,7 +1004,6 @@ inline simd4_t<double,3> cross(const simd4_t<double,3>& v1, const simd4_t<double

    return r;
 }
-#endif

 template<int N>
 inline simd4_t<double,N> min(simd4_t<double,N> v1, const simd4_t<double,N>& v2) {
@@ -1083,21 +1033,17 @@ inline simd4_t<double,N>abs(simd4_t<double,N> v) {


 # ifdef __SSE2__
-#  include <emmintrin.h>
-#  ifdef __SSE4_1__
-#   include <smmintrin.h>
-#  endif

 template<int N>
-class simd4_t<int,N>  : public simd_aligned16
+class alignas(16) simd4_t<int,N>
 {
 private:
   typedef int  __vec4i_t[N];

-    union ALIGN16 {
+    union alignas(16) {
        __m128i simd4;
-        __vec4i_t vec;
-    } ALIGN16C;
+        alignas(16) __vec4i_t vec;
+    };

 public:
    simd4_t(void) {}
@@ -1142,8 +1088,8 @@ public:
    }

    template<int M>
-    inline simd4_t<int,N>& operator=(const simd4_t<int,M> v) {
-        simd4 = v.v4();
+    inline simd4_t<int,N>& operator=(const simd4_t<int,M>& v) {
+        simd4 = simd4_t<int,N>(v).v4();
        return *this;
    }
    inline simd4_t<int,N>& operator=(const __m128& v) {
--- a/simgear/math/simd4x4.hxx
+++ b/simgear/math/simd4x4.hxx
@@ -282,19 +282,17 @@ inline simd4x4_t<T,N> operator*(const simd4x4_t<T,N>& m1, const simd4x4_t<T,N>&


 # ifdef __SSE__
-#  include <xmmintrin.h>
-
 template<>
-class simd4x4_t<float,4>  : public simd_aligned16
+class alignas(16) simd4x4_t<float,4>
 {
 private:
    typedef float  __mtx4f_t[4][4];

-    union ALIGN16 {
+    union alignas(16) {
        __m128 simd4x4[4];
-        __mtx4f_t mtx;
-        float array[4*4];
-    } ALIGN16C;
+        alignas(16) __mtx4f_t mtx;
+        alignas(16) float array[4*4];
+    };

 public:
    simd4x4_t(void) {}
@@ -487,19 +485,17 @@ inline simd4_t<float,3> transform<float>(const simd4x4_t<float,4>& m, const simd


 # ifdef __AVX__
-#  include <immintrin.h>
-
 template<>
-class simd4x4_t<double,4> : public simd_aligned32
+class alignas(32) simd4x4_t<double,4>
 {
 private:
    typedef double  __mtx4d_t[4][4];

-    union ALIGN32 {
+    union alignas(32) {
        __m256d simd4x4[4];
-        __mtx4d_t mtx;
-        double array[4*4];
-    } ALIGN32C;
+        alignas(32) __mtx4d_t mtx;
+        alignas(32) double array[4*4];
+    };

 public:
    simd4x4_t(void) {}
@@ -708,19 +704,18 @@ inline simd4_t<double,3> transform<double>(const simd4x4_t<double,4>& m, const s
 } /* namespace simd4x4 */

 # elif defined __SSE2__
-#  include <emmintrin.h>

 template<>
-class simd4x4_t<double,4> : public simd_aligned16
+class alignas(16) simd4x4_t<double,4>
 {
 private:
    typedef double  __mtx4d_t[4][4];

-    union ALIGN16 {
+    union alignas(16) {
        __m128d simd4x4[4][2];
-        __mtx4d_t mtx;
-        double array[4*4];
-    } ALIGN16C;
+        alignas(16) __mtx4d_t mtx;
+        alignas(16) double array[4*4];
+    };

 public:
    simd4x4_t(void) {}
@@ -975,19 +970,17 @@ inline simd4_t<double,3> transform<double>(const simd4x4_t<double,4>& m, const s


 # ifdef __SSE2__
-#  include <xmmintrin.h>
-
 template<>
-class simd4x4_t<int,4> : public simd_aligned16
+class alignas(16) simd4x4_t<int,4>
 {
 private:
    typedef int  __mtx4i_t[4][4];

-    union ALIGN16 {
+    union alignas(16) {
        __m128i simd4x4[4];
-        __mtx4i_t mtx;
-        int array[4*4];
-    } ALIGN16C;
+        alignas(16) __mtx4i_t mtx;
+        alignas(16) int array[4*4];
+    };

 public:
    simd4x4_t(void) {}
--- a/simgear/math/simd4x4_neon.hxx
+++ b/simgear/math/simd4x4_neon.hxx
@@ -27,11 +27,11 @@ class simd4x4_t<float,4>
 private:
    typedef float  __mtx4f_t[4][4];

-    union ALIGN16 {
+    union alignas(16) {
        float32x4_t simd4x4[4];
        __mtx4f_t mtx;
        float array[4*4];
-    } ALIGN16C;
+    }g;

 public:
    simd4x4_t(void) {}
@@ -254,11 +254,11 @@ class simd4x4_t<double,4>
 private:
    typedef double  __mtx4d_t[4][4];

-    union ALIGN32 {
+    union alignas(32) {
        __m256d simd4x4[4];
        __mtx4d_t mtx;
        double array[4*4];
-    } ALIGN32C;
+    };

 public:
    simd4x4_t(void) {}
@@ -491,11 +491,11 @@ class simd4x4_t<int,4>
 private:
    typedef int  __mtx4i_t[4][4];

-    union ALIGN16 {
+    union alignas(16) {
        int32x4_t simd4x4[4];
        __mtx4i_t mtx;
        int array[4*4];
-    } ALIGN16C;
+    }g;

 public:
    simd4x4_t(void) {}
--- a/simgear/math/simd_neon.hxx
+++ b/simgear/math/simd_neon.hxx
@@ -19,22 +19,9 @@
 #define __SIMD_NEON_H__	1

 #ifdef __ARM_NEON__
-# include <arm_neon.h>

-# if defined(_MSC_VER)
-#  define ALIGN16  __declspec(align(16))
-#  define ALIGN32  __declspec(align(32))
-#  define ALIGN16C
-#  define ALIGN32C
-# elif defined(__GNUC__)
-#  define ALIGN16
-#  define ALIGN32
-#  define ALIGN16C __attribute__((aligned(16)))
-#  define ALIGN32C __attribute__((aligned(32)))
-# endif
-
-static const uint32_t m2a32[] = { 0xffffffff,0xffffffff,0,0 };
-static const uint32_t m3a32[] = { 0xffffffff,0xffffffff,0xffffffff,0 };
+static const uint32_t m2a32[] alignas(16) = { 0xffffffff,0xffffffff,0,0 };
+static const uint32_t m3a32[] alignas(16) = { 0xffffffff,0xffffffff,0xffffffff,0 };

 template<int N>
 class simd4_t<float,N>
@@ -42,11 +29,11 @@ class simd4_t<float,N>
 private:
   typedef float  __vec4f_t[N];

-    union ALIGN16 {
+    union alignas(16) {
        float32x4_t simd4;
        float32x2x2_t simd2x2;
        __vec4f_t vec;
-    } ALIGN16C;
+    };

 public:
    simd4_t(void) {}
@@ -54,7 +41,7 @@ public:
    simd4_t(float x, float y) : simd4_t(x,y,0,0) {}
    simd4_t(float x, float y, float z) : simd4_t(x,y,z,0) {}
    simd4_t(float x, float y, float z, float w) {
-        ALIGN16 float ALIGN16C data[4] = { x, y, z, w };
+        alignas(16) float data[4] = { x, y, z, w };
        simd4 = vld1q_f32(data);
    }
    simd4_t(const __vec4f_t v) {}
@@ -244,10 +231,10 @@ class simd4_t<double,N>
 private:
   typedef double  __vec4d_t[N];

-    union ALIGN32 {
+    union alignas(32) {
        __m256d simd4;
        __vec4d_t vec;
-    } ALIGN32C;
+    };

 public:
    simd4_t(void) {}
@@ -417,10 +404,10 @@ class simd4_t<int,N>
 private:
   typedef int  __vec4i_t[N];

-    union ALIGN16 {
+    union alignas(16) {
        int32x4_t simd4;
        __vec4i_t vec;
-    } ALIGN16C;
+    };

 public:
    simd4_t(void) {}
@@ -428,7 +415,7 @@ public:
    simd4_t(int x, int y) : simd4_t(x,y,0,0) {}
    simd4_t(int x, int y, int z) : simd4_t(x,y,z,0) {}
    simd4_t(int x, int y, int z, int w) {
-        ALIGN16 int32_t ALIGN16C data[4] = { x, y, z, w };
+        alignas(16) int32_t data[4] = { x, y, z, w };
        simd4 = vld1q_s32(data);
    }
    simd4_t(const __vec4i_t v) {}