From Mathias Froehlich, "This is a generic optimization that does not depend on any cpu or instruction

set. The optimization is based on the observation that matrix matrix multiplication with a dense matrix 4x4 is 4^3 Operations whereas multiplication with a transform, or scale matrix is only 4^2 operations. Which is a gain of a *FACTOR*4* for these special cases. The change implements these special cases, provides a unit test for these implementation and converts uses of the expensiver dense matrix matrix routine with the specialized versions. Depending on the transform nodes in the scenegraph this change gives a noticable improovement. For example the osgforest code using the MatrixTransform is about 20% slower than the same codepath using the PositionAttitudeTransform instead of the MatrixTransform with this patch applied. If I remember right, the sse type optimizations did *not* provide a factor 4 improovement. Also these changes are totally independent of any cpu or instruction set architecture. So I would prefer to have this current kind of change instead of some hand coded and cpu dependent assembly stuff. If we need that hand tuned stuff, these can go on top of this changes which must provide than hand optimized additional variants for the specialized versions to give a even better result in the end. An other change included here is a change to rotation matrix from quaterion code. There is a sqrt call which couold be optimized away. Since we divide in effect by sqrt(length)*sqrt(length) which is just length ... "
2008-09-17 16:14:28 +00:00
parent 0598ac3b69
commit 22eae68e48
21 changed files with 723 additions and 193 deletions
--- a/include/osg/AnimationPath
+++ b/include/osg/AnimationPath
@@ -91,30 +91,30 @@ class OSG_EXPORT AnimationPath : public virtual osg::Object

            inline void getMatrix(Matrixf& matrix) const
            {
-                matrix.makeScale(_scale);
-                matrix.postMult(osg::Matrixf::rotate(_rotation));
-                matrix.postMult(osg::Matrixf::translate(_position));
+                matrix.makeRotate(_rotation);
+                matrix.preMultScale(_scale);
+                matrix.postMultTranslate(_position);
            }

            inline void getMatrix(Matrixd& matrix) const
            {
-                matrix.makeScale(_scale);
-                matrix.postMult(osg::Matrixd::rotate(_rotation));
-                matrix.postMult(osg::Matrixd::translate(_position));
+                matrix.makeRotate(_rotation);
+                matrix.preMultScale(_scale);
+                matrix.postMultTranslate(_position);
            }

            inline void getInverse(Matrixf& matrix) const
            {
-                matrix.makeScale(1.0/_scale.x(),1.0/_scale.y(),1.0/_scale.z());
-                matrix.preMult(osg::Matrixf::rotate(_rotation.inverse()));
-                matrix.preMult(osg::Matrixf::translate(-_position));
+                matrix.makeRotate(_rotation.inverse());
+                matrix.postMultScale(osg::Vec3d(1.0/_scale.x(),1.0/_scale.y(),1.0/_scale.z()));
+                matrix.preMultTranslate(-_position);
            }

            inline void getInverse(Matrixd& matrix) const
            {
-                matrix.makeScale(1.0/_scale.x(),1.0/_scale.y(),1.0/_scale.z());
-                matrix.preMult(osg::Matrixd::rotate(_rotation.inverse()));
-                matrix.preMult(osg::Matrixd::translate(-_position));
+                matrix.makeRotate(_rotation.inverse());
+                matrix.postMultScale(osg::Vec3d(1.0/_scale.x(),1.0/_scale.y(),1.0/_scale.z()));
+                matrix.preMultTranslate(-_position);
            }

        protected:
--- a/include/osg/Matrixd
+++ b/include/osg/Matrixd
@@ -347,6 +347,25 @@ class OSG_EXPORT Matrixd
        void preMult( const Matrixd& );
        void postMult( const Matrixd& );

+        /** Optimized version of preMult(translate(v)); */
+        inline void preMultTranslate( const Vec3d& v );
+        inline void preMultTranslate( const Vec3f& v );
+        /** Optimized version of postMult(translate(v)); */
+        inline void postMultTranslate( const Vec3d& v );
+        inline void postMultTranslate( const Vec3f& v );
+
+        /** Optimized version of preMult(scale(v)); */
+        inline void preMultScale( const Vec3d& v );
+        inline void preMultScale( const Vec3f& v );
+        /** Optimized version of postMult(scale(v)); */
+        inline void postMultScale( const Vec3d& v );
+        inline void postMultScale( const Vec3f& v );
+
+        /** Optimized version of preMult(rotate(q)); */
+        inline void preMultRotate( const Quat& q );
+        /** Optimized version of postMult(rotate(q)); */
+        inline void postMultRotate( const Quat& q );
+
        inline void operator *= ( const Matrixd& other ) 
        {    if( this == &other ) {
                Matrixd temp(other);
@@ -647,6 +666,108 @@ inline Vec3d Matrixd::transform3x3(const Matrixd& m,const Vec3d& v)
                 (m._mat[2][0]*v.x() + m._mat[2][1]*v.y() + m._mat[2][2]*v.z()) ) ;
 }

+inline void Matrixd::preMultTranslate( const Vec3d& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        double tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[3][0] += tmp*_mat[i][0];
+        _mat[3][1] += tmp*_mat[i][1];
+        _mat[3][2] += tmp*_mat[i][2];
+        _mat[3][3] += tmp*_mat[i][3];
+    }
+}
+
+inline void Matrixd::preMultTranslate( const Vec3f& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        float tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[3][0] += tmp*_mat[i][0];
+        _mat[3][1] += tmp*_mat[i][1];
+        _mat[3][2] += tmp*_mat[i][2];
+        _mat[3][3] += tmp*_mat[i][3];
+    }
+}
+
+inline void Matrixd::postMultTranslate( const Vec3d& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        double tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[0][i] += tmp*_mat[0][3];
+        _mat[1][i] += tmp*_mat[1][3];
+        _mat[2][i] += tmp*_mat[2][3];
+        _mat[3][i] += tmp*_mat[3][3];
+    }
+}
+
+inline void Matrixd::postMultTranslate( const Vec3f& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        float tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[0][i] += tmp*_mat[0][3];
+        _mat[1][i] += tmp*_mat[1][3];
+        _mat[2][i] += tmp*_mat[2][3];
+        _mat[3][i] += tmp*_mat[3][3];
+    }
+}
+
+inline void Matrixd::preMultScale( const Vec3d& v )
+{
+    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0];
+    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1];
+    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2];
+}
+
+inline void Matrixd::preMultScale( const Vec3f& v )
+{
+    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0];
+    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1];
+    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2];
+}
+
+inline void Matrixd::postMultScale( const Vec3d& v )
+{
+    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0];
+    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1];
+    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2];
+}
+
+inline void Matrixd::postMultScale( const Vec3f& v )
+{
+    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0];
+    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1];
+    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2];
+}
+
+inline void Matrixd::preMultRotate( const Quat& q )
+{
+    if (q.zeroRotation())
+        return;
+    Matrixd r;
+    r.setRotate(q);
+    preMult(r);
+}
+
+inline void Matrixd::postMultRotate( const Quat& q )
+{
+    if (q.zeroRotation())
+        return;
+    Matrixd r;
+    r.setRotate(q);
+    postMult(r);
+}
+
 inline Vec3f operator* (const Vec3f& v, const Matrixd& m )
 {
    return m.preMult(v);
--- a/include/osg/Matrixf
+++ b/include/osg/Matrixf
@@ -349,6 +349,25 @@ class OSG_EXPORT Matrixf
        void preMult( const Matrixf& );
        void postMult( const Matrixf& );

+        /** Optimized version of preMult(translate(v)); */
+        inline void preMultTranslate( const Vec3d& v );
+        inline void preMultTranslate( const Vec3f& v );
+        /** Optimized version of postMult(translate(v)); */
+        inline void postMultTranslate( const Vec3d& v );
+        inline void postMultTranslate( const Vec3f& v );
+
+        /** Optimized version of preMult(scale(v)); */
+        inline void preMultScale( const Vec3d& v );
+        inline void preMultScale( const Vec3f& v );
+        /** Optimized version of postMult(scale(v)); */
+        inline void postMultScale( const Vec3d& v );
+        inline void postMultScale( const Vec3f& v );
+
+        /** Optimized version of preMult(rotate(q)); */
+        inline void preMultRotate( const Quat& q );
+        /** Optimized version of postMult(rotate(q)); */
+        inline void postMultRotate( const Quat& q );
+
        inline void operator *= ( const Matrixf& other ) 
        {    if( this == &other ) {
                Matrixf temp(other);
@@ -641,6 +660,108 @@ inline Vec3d Matrixf::transform3x3(const Matrixf& m,const Vec3d& v)
                 (m._mat[2][0]*v.x() + m._mat[2][1]*v.y() + m._mat[2][2]*v.z()) ) ;
 }

+inline void Matrixf::preMultTranslate( const Vec3d& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        double tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[3][0] += tmp*_mat[i][0];
+        _mat[3][1] += tmp*_mat[i][1];
+        _mat[3][2] += tmp*_mat[i][2];
+        _mat[3][3] += tmp*_mat[i][3];
+    }
+}
+
+inline void Matrixf::preMultTranslate( const Vec3f& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        float tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[3][0] += tmp*_mat[i][0];
+        _mat[3][1] += tmp*_mat[i][1];
+        _mat[3][2] += tmp*_mat[i][2];
+        _mat[3][3] += tmp*_mat[i][3];
+    }
+}
+
+inline void Matrixf::postMultTranslate( const Vec3d& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        double tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[0][i] += tmp*_mat[0][3];
+        _mat[1][i] += tmp*_mat[1][3];
+        _mat[2][i] += tmp*_mat[2][3];
+        _mat[3][i] += tmp*_mat[3][3];
+    }
+}
+
+inline void Matrixf::postMultTranslate( const Vec3f& v )
+{
+    for (unsigned i = 0; i < 3; ++i)
+    {
+        float tmp = v[i];
+        if (tmp == 0)
+            continue;
+        _mat[0][i] += tmp*_mat[0][3];
+        _mat[1][i] += tmp*_mat[1][3];
+        _mat[2][i] += tmp*_mat[2][3];
+        _mat[3][i] += tmp*_mat[3][3];
+    }
+}
+
+inline void Matrixf::preMultScale( const Vec3d& v )
+{
+    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0];
+    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1];
+    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2];
+}
+
+inline void Matrixf::preMultScale( const Vec3f& v )
+{
+    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0];
+    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1];
+    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2];
+}
+
+inline void Matrixf::postMultScale( const Vec3d& v )
+{
+    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0];
+    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1];
+    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2];
+}
+
+inline void Matrixf::postMultScale( const Vec3f& v )
+{
+    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0];
+    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1];
+    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2];
+}
+
+
+inline void Matrixf::preMultRotate( const Quat& q )
+{
+    if (q.zeroRotation())
+        return;
+    Matrixf r;
+    r.setRotate(q);
+    preMult(r);
+}
+
+inline void Matrixf::postMultRotate( const Quat& q )
+{
+    if (q.zeroRotation())
+        return;
+    Matrixf r;
+    r.setRotate(q);
+    postMult(r);
+}

 inline Vec3f operator* (const Vec3f& v, const Matrixf& m )
 {