Added Nasal garbage collection background thread

This uses an SGThreadExclusive controlled by Emesary notifications that are received from the main loop. When active at the end of a frame the garbage collection thread will be released; if it is already running this will do nothing. Optionally at the start of the mainloop we can wait for the previous GC to finish. The actions of the background GC is controlled by notifications - again received from the main loop which in turn uses properties. I initially thought that the wait at the start of the frame would be necessary; however in 100 or so hours of flight without the await for completion at the start of frame no threading problems (or any other problems) were shown; so nasal-gc-threaded-wait is defaulted to false which gives a slight boost in performance. So what this does is to it removes the GC pause of 10-20ms every 4 seconds (test using the F-15). This change doesn't really give much extra performance per frame because normally GC is only performed when needed.
2019-06-11 13:44:52 +02:00
parent c71f287498
commit 92a3c8bbd8
5 changed files with 305 additions and 25 deletions
--- a/simgear/nasal/code.c
+++ b/simgear/nasal/code.c
@@ -157,7 +157,7 @@ static void initContext(naContext c)
    c->error[0] = 0;
    c->userData = 0;
 }
-
+#define BASE_SIZE 256000
 static void initGlobals()
 {
    int i;
@@ -168,10 +168,10 @@ static void initGlobals()
    globals->sem = naNewSem();
    globals->lock = naNewLock();

-    globals->allocCount = 256; // reasonable starting value
+    globals->allocCount = BASE_SIZE; // reasonable starting value
    for(i=0; i<NUM_NASAL_TYPES; i++)
        naGC_init(&(globals->pools[i]), i);
-    globals->deadsz = 256;
+    globals->deadsz = BASE_SIZE;
    globals->ndead = 0;
    globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz);

@@ -833,9 +833,13 @@ naRef naGetSourceFile(naContext ctx, int frame)
 {
    naRef f;
    frame = findFrame(ctx, &ctx, frame);
-    f = ctx->fStack[frame].func;
-    f = PTR(f).func->code;
-    return PTR(f).code->srcFile;
+    if (frame >= 0) {
+        f = ctx->fStack[frame].func;
+        f = PTR(f).func->code;
+        if (!IS_NIL(f) && PTR(f).code)
+            return PTR(f).code->srcFile;
+    }
+    return naNil();
 }

 char* naGetError(naContext ctx)
--- a/simgear/nasal/cppbind/CMakeLists.txt
+++ b/simgear/nasal/cppbind/CMakeLists.txt
@@ -5,6 +5,7 @@ set(HEADERS
  Ghost.hxx
  NasalCallContext.hxx
  NasalContext.hxx
+  NasalEmesaryInterface.hxx
  NasalHash.hxx
  NasalMe.hxx
  NasalMethodHolder.hxx
--- a/simgear/nasal/cppbind/NasalEmesaryInterface.hxx
+++ b/simgear/nasal/cppbind/NasalEmesaryInterface.hxx
@@ -0,0 +1,123 @@
+#ifndef NASALEMESARYINTERFACE_INCLUDED
+#define NASALEMESARYINTERFACE_INCLUDED 1
+// Nasal Emesary receipient interface.
+//
+// Copyright (C) 2019  Richard Harrison rjh@zaretto.com
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Library General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Library General Public License for more details.
+//
+// You should have received a copy of the GNU Library General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA
+
+#include <simgear/nasal/cppbind/NasalHash.hxx>
+#include <simgear/nasal/cppbind/Ghost.hxx>
+
+#include <simgear/math/SGMath.hxx>
+#include <simgear/misc/sg_path.hxx>
+
+#include <simgear/emesary/emesary.hxx>
+#include <simgear/emesary/notifications.hxx>
+
+#include <boost/function.hpp>
+
+#include <simgear/debug/logstream.hxx>
+
+#include <simgear/threads/SGThread.hxx>
+#include <mutex>
+#include <condition_variable>
+#include <atomic>
+
+
+namespace nasal
+{
+    extern"C" {
+        extern int GCglobalAlloc();
+        extern int naGarbageCollect();
+        // these are used by the detailed debug in the Nasal GC.
+        SGTimeStamp global_timestamp;
+        void global_stamp() {
+            global_timestamp.stamp();
+        }
+        extern int global_elapsedUSec()
+        {
+            return global_timestamp.elapsedUSec();
+        }
+    }
+
+    class ThreadedGarbageCollector : public SGExclusiveThread {
+    public:
+        ThreadedGarbageCollector() : SGExclusiveThread() {}
+        virtual ~ThreadedGarbageCollector() {}
+
+        virtual int process(){
+            return naGarbageCollect();
+        }
+    };
+
+    class NasalMainLoopRecipient : public simgear::Emesary::IReceiver {
+    public:
+        NasalMainLoopRecipient() : receiveCount(0) {
+            simgear::Emesary::GlobalTransmitter::instance()->Register(*this);
+            SG_LOG(SG_NASAL, SG_INFO, "NasalMainLoopRecipient created");
+        }
+        virtual ~NasalMainLoopRecipient() {
+            simgear::Emesary::GlobalTransmitter::instance()->DeRegister(*this);
+        }
+
+        std::atomic<int> receiveCount;
+        virtual simgear::Emesary::ReceiptStatus Receive(simgear::Emesary::INotification &n)
+        {
+
+            simgear::Notifications::MainLoopNotification *mln = dynamic_cast<simgear::Notifications::MainLoopNotification *>(&n);
+
+            if (mln) {
+                switch (mln->GetValue()) {
+                case simgear::Notifications::MainLoopNotification::Type::Begin:
+                    if (gct.is_running()) {
+                        if (Active && CanWait)
+                            gct.awaitCompletion();
+                        else
+                            gct.clearAwaitCompletionTime();
+                    }
+                    break;
+                case simgear::Notifications::MainLoopNotification::Type::End:
+                    if (Active) {
+                        if (gct.is_running())
+                            gct.release();
+                    }
+                    break;
+                case simgear::Notifications::MainLoopNotification::Type::Started:
+                    gct.ensure_running();
+                    break;
+                case simgear::Notifications::MainLoopNotification::Type::Stopped:
+                    gct.terminate();
+                    break;
+                }
+                return simgear::Emesary::ReceiptStatusOK;
+            }
+
+            auto *gccn = dynamic_cast<simgear::Notifications::NasalGarbageCollectionConfigurationNotification *>(&n);
+            if (gccn) {
+                CanWait = gccn->GetCanWait();
+                Active = gccn->GetActive();
+                return simgear::Emesary::ReceiptStatusOK;
+            }
+            return simgear::Emesary::ReceiptStatusNotProcessed;
+        }
+    protected:
+        bool CanWait;
+        bool Active;
+        ThreadedGarbageCollector gct;
+    };
+   
+} // namespace nasal
+#endif
--- a/simgear/nasal/cppbind/detail/to_nasal_helper.cxx
+++ b/simgear/nasal/cppbind/detail/to_nasal_helper.cxx
@@ -19,6 +19,7 @@
 #include "to_nasal_helper.hxx"
 #include <simgear/nasal/cppbind/NasalHash.hxx>
 #include <simgear/nasal/cppbind/Ghost.hxx>
+#include <simgear/nasal/cppbind/NasalEmesaryInterface.hxx>

 #include <simgear/math/SGMath.hxx>
 #include <simgear/misc/sg_path.hxx>
@@ -27,6 +28,19 @@

 namespace nasal
 {
+    // create single instance of the main loop recipient for Nasal - this will self register at the 
+    // global transmitter - and that's all that is needed to link up the background GC to the main 
+    // loop in FG that will send out the MainLoop notifications.
+    //class NasalMainLoopRecipientSingleton : public simgear::Singleton<NasalMainLoopRecipient>
+    //{
+    //public:
+    //    NasalMainLoopRecipientSingleton()
+    //    {
+    //    }
+    //    virtual ~NasalMainLoopRecipientSingleton() {}
+    //};
+  NasalMainLoopRecipient mrl;  
+
  //----------------------------------------------------------------------------
  naRef to_nasal_helper(naContext c, const std::string& str)
  {
--- a/simgear/nasal/gc.c
+++ b/simgear/nasal/gc.c
@@ -1,7 +1,6 @@
 #include "nasal.h"
 #include "data.h"
 #include "code.h"
-
 #define MIN_BLOCK_SIZE 32

 static void reap(struct naPool* p);
@@ -12,14 +11,17 @@ struct Block {
    char* block;
    struct Block* next;
 };
-
 // Must be called with the giant exclusive lock!
-static void freeDead()
+extern void global_stamp();
+extern int global_elapsedUSec();
+
+static int freeDead()
 {
    int i;
    for(i=0; i<globals->ndead; i++)
        naFree(globals->deadBlocks[i]);
    globals->ndead = 0;
+    return i;
 }

 static void marktemps(struct Context* c)
@@ -31,50 +33,127 @@ static void marktemps(struct Context* c)
        mark(r);
    }
 }
-
+//#define GC_DETAIL_DEBUG 
+static int __elements_visited = 0;
+static int gc_busy=0;
 // Must be called with the big lock!
 static void garbageCollect()
 {
+    if (gc_busy)
+        return;
+    gc_busy = 1;
    int i;
    struct Context* c;
    globals->allocCount = 0;
    c = globals->allContexts;
-    while(c) {
-        for(i=0; i<NUM_NASAL_TYPES; i++)
+
+#if GC_DETAIL_DEBUG
+    int ctxc = 0;
+    __elements_visited = 0;
+    int st = global_elapsedUSec();
+    int et = 0;
+    int stel = __elements_visited;
+    int eel = 0;
+#endif
+
+    c = globals->allContexts;
+    while (c) {
+#if GC_DETAIL_DEBUG
+        ctxc++;
+#endif
+        for (i = 0; i < NUM_NASAL_TYPES; i++)
            c->nfree[i] = 0;
-        for(i=0; i < c->fTop; i++) {
+        for (i = 0; i < c->fTop; i++) {
            mark(c->fStack[i].func);
            mark(c->fStack[i].locals);
        }
-        for(i=0; i < c->opTop; i++)
+        for (i = 0; i < c->opTop; i++)
            mark(c->opStack[i]);
        mark(c->dieArg);
        marktemps(c);
        c = c->nextAll;
    }
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    eel = __elements_visited - stel; stel = __elements_visited;
+    printf("--> garbageCollect(#e%-5d): %-4d ", eel, et);
+#endif

    mark(globals->save);
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    eel = __elements_visited - stel; stel = __elements_visited;
+    printf("s(%5d) %-5d ", eel, et);
+#endif
+
    mark(globals->save_hash);
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    eel = __elements_visited - stel; stel = __elements_visited;
+    printf("h(%5d) %-5d ", eel, et);
+#endif
+
+
    mark(globals->symbols);
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    eel = __elements_visited - stel; stel = __elements_visited;
+    //printf("sy(%5d) %-4d ", eel, et);
+#endif
+
    mark(globals->meRef);
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    eel = __elements_visited - stel; stel = __elements_visited;
+    //printf("me(%5d) %-5d ", eel, et);
+#endif
+
    mark(globals->argRef);
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    eel = __elements_visited - stel; stel = __elements_visited;
+    //printf("ar(%5d) %-5d ", eel, et);
+#endif
+
    mark(globals->parentsRef);
-
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    eel = __elements_visited - stel; stel = __elements_visited;
+#endif
+    //printf(" ev[%3d] %-5d", eel, et);
    // Finally collect all the freed objects
-    for(i=0; i<NUM_NASAL_TYPES; i++)
+    for (i = 0; i < NUM_NASAL_TYPES; i++) {
        reap(&(globals->pools[i]));
-
+    }
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    printf(" >> reap %-5d", et);
+#endif
    // Make enough space for the dead blocks we need to free during
    // execution.  This works out to 1 spot for every 2 live objects,
    // which should be limit the number of bottleneck operations
    // without imposing an undue burden of extra "freeable" memory.
    if(globals->deadsz < globals->allocCount) {
        globals->deadsz = globals->allocCount;
-        if(globals->deadsz < 256) globals->deadsz = 256;
+        if(globals->deadsz < 256000) globals->deadsz = 256000;
        naFree(globals->deadBlocks);
        globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz);
    }
    globals->needGC = 0;
+#if GC_DETAIL_DEBUG
+    et = global_elapsedUSec() - st;
+    st = global_elapsedUSec();
+    printf(">> %-5d ", et);
+#endif
+    gc_busy = 0;
 }

 void naModLock()
@@ -104,6 +183,7 @@ void naModUnlock()
 // you think about it).
 static void bottleneck()
 {
+    global_stamp();
    struct Globals* g = globals;
    g->bottleneck = 1;
    while(g->bottleneck && g->waitCount < g->nThreads - 1) {
@@ -111,12 +191,39 @@ static void bottleneck()
        UNLOCK(); naSemDown(g->sem); LOCK();
        g->waitCount--;
    }
+#if GC_DETAIL_DEBUG
+    printf("GC: wait %2d ", global_elapsedUSec());
+#endif
    if(g->waitCount >= g->nThreads - 1) {
-        freeDead();
-        if(g->needGC) garbageCollect();
+        int fd = freeDead();
+#if GC_DETAIL_DEBUG
+        printf("--> freedead (%5d) : %5d", fd, global_elapsedUSec());
+#endif
+        if(g->needGC)
+            garbageCollect();
        if(g->waitCount) naSemUp(g->sem, g->waitCount);
        g->bottleneck = 0;
    }
+#if GC_DETAIL_DEBUG
+    printf(" :: finished: %5d\n", global_elapsedUSec());
+#endif
+}
+
+static void bottleneckFreeDead()
+{
+    global_stamp();
+    struct Globals* g = globals;
+    g->bottleneck = 1;
+    while (g->bottleneck && g->waitCount < g->nThreads - 1) {
+        g->waitCount++;
+        UNLOCK(); naSemDown(g->sem); LOCK();
+        g->waitCount--;
+    }
+    if (g->waitCount >= g->nThreads - 1) {
+        freeDead();
+         if (g->waitCount) naSemUp(g->sem, g->waitCount);
+        g->bottleneck = 0;
+    }
 }

 void naGC()
@@ -127,6 +234,29 @@ void naGC()
    UNLOCK();
    naCheckBottleneck();
 }
+int naGarbageCollect()
+{
+    int rv = 1;
+    LOCK();
+    //
+    // The number here is again based on observation - if this is too low then the inline GC will be used
+    // which is fine occasionally.
+    // So what we're doing by checking the global alloc is to see if GC is likely required during the next frame and if
+    // so we pre-empt this by doing it now.
+    // GC can typically take between 5ms and 50ms (F-15, FG1000 PFD & MFD, Advanced weather) - but usually it is completed
+    // prior to the start of the next frame.
+
+    globals->needGC = nasal_globals->allocCount < 23000;
+    if (globals->needGC)
+        bottleneck();
+    else {
+        bottleneckFreeDead();
+        rv = 0;
+    }
+    UNLOCK();
+    naCheckBottleneck();
+    return rv;
+}

 void naCheckBottleneck()
 {
@@ -207,7 +337,9 @@ static int poolsize(struct naPool* p)
    while(b) { total += b->size; b = b->next; }
    return total;
 }
-
+int GCglobalAlloc() {
+    return globals->allocCount;
+}
 struct naObj** naGC_get(struct naPool* p, int n, int* nout)
 {
    struct naObj** result;
@@ -215,6 +347,9 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout)
    LOCK();
    while(globals->allocCount < 0 || (p->nfree == 0 && p->freetop >= p->freesz)) {
        globals->needGC = 1;
+#if GC_DETAIL_DEBUG
+        printf("++");
+#endif
        bottleneck();
    }
    if(p->nfree == 0)
@@ -248,7 +383,7 @@ static void mark(naRef r)

    if(PTR(r).obj->mark == 1)
        return;
-
+    __elements_visited++;
    PTR(r).obj->mark = 1;
    switch(PTR(r).obj->type) {
    case T_VEC: markvec(r); break;
@@ -306,11 +441,14 @@ static void reap(struct naPool* p)

    // Allocate more if necessary (try to keep 25-50% of the objects
    // available)
-    if(p->nfree < total/4) {
+    // This was changed (2019.2) to allocate in larger blocks
+    // previously it used total/4 and used/2 now we
+    // use total/2 and used / 1
+    if (p->nfree < total / 2) {
        int used = total - p->nfree;
        int avail = total - used;
-        int need = used/2 - avail;
-        if(need > 0)
+        int need = used / 1 - avail;
+        if (need > 0)
            newBlock(p, need);
    }
 }