From 3387f3d08449ae3d24b0e5bd41c8da2644960b51 Mon Sep 17 00:00:00 2001
From: Erik Hofman <erik@ehofman.com>
Date: Mon, 30 Jan 2017 16:00:29 +0100
Subject: [PATCH] Switch to c++11 alignas instead of our own hacks

---
 simgear/math/simd.hxx         | 134 ++++++++++------------------------
 simgear/math/simd4x4.hxx      |  47 +++++-------
 simgear/math/simd4x4_neon.hxx |  12 +--
 simgear/math/simd_neon.hxx    |  33 +++------
 4 files changed, 76 insertions(+), 150 deletions(-)
diff --git a/simgear/math/simd.hxx b/simgear/math/simd.hxx
index 4ea04a4b..6adf62de 100644
--- a/simgear/math/simd.hxx
+++ b/simgear/math/simd.hxx
@@ -24,6 +24,14 @@
 #include <cmath>
 #include <new>
 
+#if defined(_MSC_VER)
+# include <intrin.h>
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+# include <x86intrin.h>
+#elif defined(__GNUC__) && defined(__ARM_NEON__)
+# include <arm_neon.h>
+#endif
+
 #include <simgear/math/SGLimits.hxx>
 #include <simgear/math/SGMisc.hxx>
 
@@ -287,70 +295,34 @@ inline simd4_t<T,N> operator*(simd4_t<T,N> v, T f) {
 }
 
 
-# ifdef __MMX__
-#  include <mmintrin.h>
-# if defined(_MSC_VER)
-#  define ALIGN16  __declspec(align(16))
-#  define ALIGN32  __declspec(align(32))
-#  define ALIGN16C
-#  define ALIGN32C
-# elif defined(__GNUC__)
-#  define ALIGN16
-#  define ALIGN32
-#  define ALIGN16C __attribute__((aligned(16)))
-#  define ALIGN32C __attribute__((aligned(32)))
-# endif
-
+# ifdef __SSE__
 namespace simd4
 {
-static ALIGN16 const uint32_t m2a32[] ALIGN16C = {
+static const uint32_t m2a32[] alignas(16) = {
     0xffffffff,0xffffffff,0,0
 };
-static ALIGN16 const uint32_t m3a32[] ALIGN16C = {
+static const uint32_t m3a32[] alignas(16) = {
     0xffffffff,0xffffffff,0xffffffff,0
 };
-static ALIGN32 const uint64_t m2a64[] ALIGN32C = {
+static const uint64_t m2a64[] alignas(32) = {
     0xffffffffffffffff,0xffffffffffffffff,0,0
 };
-static ALIGN32 const uint64_t m3a64[] ALIGN32C = {
+static const uint64_t m3a64[] alignas(32) = {
     0xffffffffffffffff,0xffffffffffffffff,0xffffffffffffffff,0
 };
 }; /* namespace simd4 */
-# endif
 
-# ifdef __SSE__
-#  include <xmmintrin.h>
-# ifdef __SSE3__
-#  include <pmmintrin.h>
-# endif
-
-ALIGN16
-class simd_aligned16
-{
-public:
-    simd_aligned16() {}
-    ~simd_aligned16() {}
-
-    static void *operator new (size_t size) throw (std::bad_alloc) {
-        void *p = _mm_malloc(size, 16);
-        if (!p) throw std::bad_alloc();
-        return p;
-    }
-    static void operator delete (void *p) {
-        _mm_free(p);
-    }
-} ALIGN16C;
 
 template<int N>
-class simd4_t<float,N> : public simd_aligned16
+class alignas(16) simd4_t<float,N>
 {
 private:
    typedef float  __vec4f_t[N];
 
-    union ALIGN16 {
+    union alignas(16) {
         __m128 simd4;
-        __vec4f_t vec;
-    } ALIGN16C;
+        alignas(16) __vec4f_t vec;
+    };
 
 public:
     simd4_t(void) {}
@@ -393,8 +365,8 @@ public:
     }
 
     template<int M>
-    inline simd4_t<float,N>& operator=(const simd4_t<float,M> v) {
-        simd4 = v.v4();
+    inline simd4_t<float,N>& operator=(const simd4_t<float,M>& v) {
+        simd4 = simd4_t<float,N>(v).v4();
         return *this;
     }
     inline simd4_t<float,N>& operator=(const __m128& v) {
@@ -572,35 +544,16 @@ inline simd4_t<float,N>abs(simd4_t<float,N> v) {
 
 
 # ifdef __AVX__
-#  include <immintrin.h>
-
-ALIGN32
-class simd_aligned32
-{
-public:
-    simd_aligned32() {}
-    ~simd_aligned32() {}
-
-    static void *operator new (size_t size) throw (std::bad_alloc) {
-        void *p = _mm_malloc(size, 32);
-        if (!p) throw std::bad_alloc();
-        return p;
-    }
-    static void operator delete (void *p) {
-        _mm_free(p);
-    }
-} ALIGN32C;
-
 template<int N>
-class simd4_t<double,N> : public simd_aligned32
+class alignas(32) simd4_t<double,N>
 {
 private:
    typedef double  __vec4d_t[N];
 
-    union ALIGN32 {
+    union alignas(32) {
         __m256d simd4;
-        __vec4d_t vec;
-    } ALIGN32C;
+        alignas(32) __vec4d_t vec;
+    };
 
 public:
     simd4_t(void) {}
@@ -643,8 +596,8 @@ public:
     }
 
     template<int M>
-    inline simd4_t<double,N>& operator=(const simd4_t<double,M> v) {
-        simd4 = v.v4();
+    inline simd4_t<double,N>& operator=(const simd4_t<double,M>& v) {
+        simd4 = simd4_t<double,N>(v).v4();
         return *this;
     }
     inline simd4_t<double,N>& operator=(const __m256d& v) {
@@ -777,7 +730,6 @@ inline double dot(simd4_t<double,4> v1, const simd4_t<double,4>& v2) {
 }
 
 #  ifdef __AVX2__
-#   include <pmmintrin.h>
 template<>
 inline simd4_t<double,3> cross(const simd4_t<double,3>& v1, const simd4_t<double,3>& v2)
 {
@@ -815,18 +767,17 @@ inline simd4_t<double,N>abs(simd4_t<double,N> v) {
 } /* namespace simd4 */
 
 # elif defined __SSE2__
-#  include <emmintrin.h>
 
 template<int N>
-class simd4_t<double,N> : public simd_aligned16
+class alignas(16) simd4_t<double,N>
 {
 private:
    typedef double  __vec4d_t[N];
 
-    union ALIGN16 {
+    union alignas(16) {
         __m128d simd4[2];
-        __vec4d_t vec;
-    } ALIGN16C;
+        alignas(16) __vec4d_t vec;
+    };
 
 public:
     simd4_t(void) {}
@@ -846,7 +797,7 @@ public:
         simd4[1] = v[1];
     }
 
-        inline const __m128d (&v4(void) const)[2] {
+    inline const __m128d (&v4(void) const)[2] {
         return simd4;
     }
     inline __m128d (&v4(void))[2] {
@@ -871,9 +822,10 @@ public:
     }
 
     template<int M>
-    inline simd4_t<double,N>& operator=(const simd4_t<double,M> v) {
-        simd4[0] = v.v4()[0];
-        simd4[1] = v.v4()[1];
+    inline simd4_t<double,N>& operator=(const simd4_t<double,M>& v) {
+        simd4_t<double,N> n(v);
+        simd4[0] = n.v4()[0];
+        simd4[1] = n.v4()[1];
         return *this;
     }
     inline simd4_t<double,N>& operator=(const __m128d v[2]) {
@@ -1032,7 +984,6 @@ inline double dot(simd4_t<double,4> v1, const simd4_t<double,4>& v2) {
     return hsum_pd_sse(v1.v4());
 }
 
-#if 1
 template<>
 inline simd4_t<double,3> cross(const simd4_t<double,3>& v1, const simd4_t<double,3>& v2)
 {
@@ -1053,7 +1004,6 @@ inline simd4_t<double,3> cross(const simd4_t<double,3>& v1, const simd4_t<double
 
     return r;
 }
-#endif
 
 template<int N>
 inline simd4_t<double,N> min(simd4_t<double,N> v1, const simd4_t<double,N>& v2) {
@@ -1083,21 +1033,17 @@ inline simd4_t<double,N>abs(simd4_t<double,N> v) {
 
 
 # ifdef __SSE2__
-#  include <emmintrin.h>
-#  ifdef __SSE4_1__
-#   include <smmintrin.h>
-#  endif
 
 template<int N>
-class simd4_t<int,N>  : public simd_aligned16
+class alignas(16) simd4_t<int,N>
 {
 private:
    typedef int  __vec4i_t[N];
 
-    union ALIGN16 {
+    union alignas(16) {
         __m128i simd4;
-        __vec4i_t vec;
-    } ALIGN16C;
+        alignas(16) __vec4i_t vec;
+    };
 
 public:
     simd4_t(void) {}
@@ -1142,8 +1088,8 @@ public:
     }
 
     template<int M>
-    inline simd4_t<int,N>& operator=(const simd4_t<int,M> v) {
-        simd4 = v.v4();
+    inline simd4_t<int,N>& operator=(const simd4_t<int,M>& v) {
+        simd4 = simd4_t<int,N>(v).v4();
         return *this;
     }
     inline simd4_t<int,N>& operator=(const __m128& v) {
diff --git a/simgear/math/simd4x4.hxx b/simgear/math/simd4x4.hxx
index b24a20e8..a6414d12 100644
--- a/simgear/math/simd4x4.hxx
+++ b/simgear/math/simd4x4.hxx
@@ -282,19 +282,17 @@ inline simd4x4_t<T,N> operator*(const simd4x4_t<T,N>& m1, const simd4x4_t<T,N>&
 
 
 # ifdef __SSE__
-#  include <xmmintrin.h>
-
 template<>
-class simd4x4_t<float,4>  : public simd_aligned16
+class alignas(16) simd4x4_t<float,4>
 {
 private:
     typedef float  __mtx4f_t[4][4];
 
-    union ALIGN16 {
+    union alignas(16) {
         __m128 simd4x4[4];
-        __mtx4f_t mtx;
-        float array[4*4];
-    } ALIGN16C;
+        alignas(16) __mtx4f_t mtx;
+        alignas(16) float array[4*4];
+    };
 
 public:
     simd4x4_t(void) {}
@@ -487,19 +485,17 @@ inline simd4_t<float,3> transform<float>(const simd4x4_t<float,4>& m, const simd
 
 
 # ifdef __AVX__
-#  include <immintrin.h>
-
 template<>
-class simd4x4_t<double,4> : public simd_aligned32
+class alignas(32) simd4x4_t<double,4>
 {
 private:
     typedef double  __mtx4d_t[4][4];
 
-    union ALIGN32 {
+    union alignas(32) {
         __m256d simd4x4[4];
-        __mtx4d_t mtx;
-        double array[4*4];
-    } ALIGN32C;
+        alignas(32) __mtx4d_t mtx;
+        alignas(32) double array[4*4];
+    };
 
 public:
     simd4x4_t(void) {}
@@ -708,19 +704,18 @@ inline simd4_t<double,3> transform<double>(const simd4x4_t<double,4>& m, const s
 } /* namespace simd4x4 */
 
 # elif defined __SSE2__
-#  include <emmintrin.h>
 
 template<>
-class simd4x4_t<double,4> : public simd_aligned16
+class alignas(16) simd4x4_t<double,4>
 {
 private:
     typedef double  __mtx4d_t[4][4];
 
-    union ALIGN16 {
+    union alignas(16) {
         __m128d simd4x4[4][2];
-        __mtx4d_t mtx;
-        double array[4*4];
-    } ALIGN16C;
+        alignas(16) __mtx4d_t mtx;
+        alignas(16) double array[4*4];
+    };
 
 public:
     simd4x4_t(void) {}
@@ -975,19 +970,17 @@ inline simd4_t<double,3> transform<double>(const simd4x4_t<double,4>& m, const s
 
 
 # ifdef __SSE2__
-#  include <xmmintrin.h>
-
 template<>
-class simd4x4_t<int,4> : public simd_aligned16
+class alignas(16) simd4x4_t<int,4>
 {
 private:
     typedef int  __mtx4i_t[4][4];
 
-    union ALIGN16 {
+    union alignas(16) {
         __m128i simd4x4[4];
-        __mtx4i_t mtx;
-        int array[4*4];
-    } ALIGN16C;
+        alignas(16) __mtx4i_t mtx;
+        alignas(16) int array[4*4];
+    };
 
 public:
     simd4x4_t(void) {}
diff --git a/simgear/math/simd4x4_neon.hxx b/simgear/math/simd4x4_neon.hxx
index 983eac39..46e56312 100644
--- a/simgear/math/simd4x4_neon.hxx
+++ b/simgear/math/simd4x4_neon.hxx
@@ -27,11 +27,11 @@ class simd4x4_t<float,4>
 private:
     typedef float  __mtx4f_t[4][4];
 
-    union ALIGN16 {
+    union alignas(16) {
         float32x4_t simd4x4[4];
         __mtx4f_t mtx;
         float array[4*4];
-    } ALIGN16C;
+    }g;
 
 public:
     simd4x4_t(void) {}
@@ -254,11 +254,11 @@ class simd4x4_t<double,4>
 private:
     typedef double  __mtx4d_t[4][4];
 
-    union ALIGN32 {
+    union alignas(32) {
         __m256d simd4x4[4];
         __mtx4d_t mtx;
         double array[4*4];
-    } ALIGN32C;
+    };
 
 public:
     simd4x4_t(void) {}
@@ -491,11 +491,11 @@ class simd4x4_t<int,4>
 private:
     typedef int  __mtx4i_t[4][4];
 
-    union ALIGN16 {
+    union alignas(16) {
         int32x4_t simd4x4[4];
         __mtx4i_t mtx;
         int array[4*4];
-    } ALIGN16C;
+    }g;
 
 public:
     simd4x4_t(void) {}
diff --git a/simgear/math/simd_neon.hxx b/simgear/math/simd_neon.hxx
index 8d4b4063..d65f868a 100644
--- a/simgear/math/simd_neon.hxx
+++ b/simgear/math/simd_neon.hxx
@@ -19,22 +19,9 @@
 #define __SIMD_NEON_H__	1
 
 #ifdef __ARM_NEON__
-# include <arm_neon.h>
 
-# if defined(_MSC_VER)
-#  define ALIGN16  __declspec(align(16))
-#  define ALIGN32  __declspec(align(32))
-#  define ALIGN16C
-#  define ALIGN32C
-# elif defined(__GNUC__)
-#  define ALIGN16
-#  define ALIGN32
-#  define ALIGN16C __attribute__((aligned(16)))
-#  define ALIGN32C __attribute__((aligned(32)))
-# endif
-
-static const uint32_t m2a32[] = { 0xffffffff,0xffffffff,0,0 };
-static const uint32_t m3a32[] = { 0xffffffff,0xffffffff,0xffffffff,0 };
+static const uint32_t m2a32[] alignas(16) = { 0xffffffff,0xffffffff,0,0 };
+static const uint32_t m3a32[] alignas(16) = { 0xffffffff,0xffffffff,0xffffffff,0 };
 
 template<int N>
 class simd4_t<float,N>
@@ -42,11 +29,11 @@ class simd4_t<float,N>
 private:
    typedef float  __vec4f_t[N];
 
-    union ALIGN16 {
+    union alignas(16) {
         float32x4_t simd4;
         float32x2x2_t simd2x2;
         __vec4f_t vec;
-    } ALIGN16C;
+    };
 
 public:
     simd4_t(void) {}
@@ -54,7 +41,7 @@ public:
     simd4_t(float x, float y) : simd4_t(x,y,0,0) {}
     simd4_t(float x, float y, float z) : simd4_t(x,y,z,0) {}
     simd4_t(float x, float y, float z, float w) {
-        ALIGN16 float ALIGN16C data[4] = { x, y, z, w };
+        alignas(16) float data[4] = { x, y, z, w };
         simd4 = vld1q_f32(data);
     }
     simd4_t(const __vec4f_t v) {}
@@ -244,10 +231,10 @@ class simd4_t<double,N>
 private:
    typedef double  __vec4d_t[N];
 
-    union ALIGN32 {
+    union alignas(32) {
         __m256d simd4;
         __vec4d_t vec;
-    } ALIGN32C;
+    };
 
 public:
     simd4_t(void) {}
@@ -417,10 +404,10 @@ class simd4_t<int,N>
 private:
    typedef int  __vec4i_t[N];
 
-    union ALIGN16 {
+    union alignas(16) {
         int32x4_t simd4;
         __vec4i_t vec;
-    } ALIGN16C;
+    };
 
 public:
     simd4_t(void) {}
@@ -428,7 +415,7 @@ public:
     simd4_t(int x, int y) : simd4_t(x,y,0,0) {}
     simd4_t(int x, int y, int z) : simd4_t(x,y,z,0) {}
     simd4_t(int x, int y, int z, int w) {
-        ALIGN16 int32_t ALIGN16C data[4] = { x, y, z, w };
+        alignas(16) int32_t data[4] = { x, y, z, w };
         simd4 = vld1q_s32(data);
     }
     simd4_t(const __vec4i_t v) {}