From 92a3c8bbd823ce2aee5cbe434cc0ea5c03c47f71 Mon Sep 17 00:00:00 2001 From: Richard Harrison Date: Tue, 11 Jun 2019 13:44:52 +0200 Subject: [PATCH] Added Nasal garbage collection background thread This uses an SGThreadExclusive controlled by Emesary notifications that are received from the main loop. When active at the end of a frame the garbage collection thread will be released; if it is already running this will do nothing. Optionally at the start of the mainloop we can wait for the previous GC to finish. The actions of the background GC is controlled by notifications - again received from the main loop which in turn uses properties. I initially thought that the wait at the start of the frame would be necessary; however in 100 or so hours of flight without the await for completion at the start of frame no threading problems (or any other problems) were shown; so nasal-gc-threaded-wait is defaulted to false which gives a slight boost in performance. So what this does is to it removes the GC pause of 10-20ms every 4 seconds (test using the F-15). This change doesn't really give much extra performance per frame because normally GC is only performed when needed. --- simgear/nasal/code.c | 16 +- simgear/nasal/cppbind/CMakeLists.txt | 1 + .../nasal/cppbind/NasalEmesaryInterface.hxx | 123 ++++++++++++ .../nasal/cppbind/detail/to_nasal_helper.cxx | 14 ++ simgear/nasal/gc.c | 176 ++++++++++++++++-- 5 files changed, 305 insertions(+), 25 deletions(-) create mode 100644 simgear/nasal/cppbind/NasalEmesaryInterface.hxx diff --git a/simgear/nasal/code.c b/simgear/nasal/code.c index faf534ec..0783b38b 100644 --- a/simgear/nasal/code.c +++ b/simgear/nasal/code.c @@ -157,7 +157,7 @@ static void initContext(naContext c) c->error[0] = 0; c->userData = 0; } - +#define BASE_SIZE 256000 static void initGlobals() { int i; @@ -168,10 +168,10 @@ static void initGlobals() globals->sem = naNewSem(); globals->lock = naNewLock(); - globals->allocCount = 256; // reasonable starting value + globals->allocCount = BASE_SIZE; // reasonable starting value for(i=0; ipools[i]), i); - globals->deadsz = 256; + globals->deadsz = BASE_SIZE; globals->ndead = 0; globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz); @@ -833,9 +833,13 @@ naRef naGetSourceFile(naContext ctx, int frame) { naRef f; frame = findFrame(ctx, &ctx, frame); - f = ctx->fStack[frame].func; - f = PTR(f).func->code; - return PTR(f).code->srcFile; + if (frame >= 0) { + f = ctx->fStack[frame].func; + f = PTR(f).func->code; + if (!IS_NIL(f) && PTR(f).code) + return PTR(f).code->srcFile; + } + return naNil(); } char* naGetError(naContext ctx) diff --git a/simgear/nasal/cppbind/CMakeLists.txt b/simgear/nasal/cppbind/CMakeLists.txt index 08f09937..ca2475c7 100644 --- a/simgear/nasal/cppbind/CMakeLists.txt +++ b/simgear/nasal/cppbind/CMakeLists.txt @@ -5,6 +5,7 @@ set(HEADERS Ghost.hxx NasalCallContext.hxx NasalContext.hxx + NasalEmesaryInterface.hxx NasalHash.hxx NasalMe.hxx NasalMethodHolder.hxx diff --git a/simgear/nasal/cppbind/NasalEmesaryInterface.hxx b/simgear/nasal/cppbind/NasalEmesaryInterface.hxx new file mode 100644 index 00000000..3f8d6c1f --- /dev/null +++ b/simgear/nasal/cppbind/NasalEmesaryInterface.hxx @@ -0,0 +1,123 @@ +#ifndef NASALEMESARYINTERFACE_INCLUDED +#define NASALEMESARYINTERFACE_INCLUDED 1 +// Nasal Emesary receipient interface. +// +// Copyright (C) 2019 Richard Harrison rjh@zaretto.com +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Library General Public +// License as published by the Free Software Foundation; either +// version 2 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Library General Public License for more details. +// +// You should have received a copy of the GNU Library General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + +#include +#include + +#include +#include + +#include +#include + +#include + +#include + +#include +#include +#include +#include + + +namespace nasal +{ + extern"C" { + extern int GCglobalAlloc(); + extern int naGarbageCollect(); + // these are used by the detailed debug in the Nasal GC. + SGTimeStamp global_timestamp; + void global_stamp() { + global_timestamp.stamp(); + } + extern int global_elapsedUSec() + { + return global_timestamp.elapsedUSec(); + } + } + + class ThreadedGarbageCollector : public SGExclusiveThread { + public: + ThreadedGarbageCollector() : SGExclusiveThread() {} + virtual ~ThreadedGarbageCollector() {} + + virtual int process(){ + return naGarbageCollect(); + } + }; + + class NasalMainLoopRecipient : public simgear::Emesary::IReceiver { + public: + NasalMainLoopRecipient() : receiveCount(0) { + simgear::Emesary::GlobalTransmitter::instance()->Register(*this); + SG_LOG(SG_NASAL, SG_INFO, "NasalMainLoopRecipient created"); + } + virtual ~NasalMainLoopRecipient() { + simgear::Emesary::GlobalTransmitter::instance()->DeRegister(*this); + } + + std::atomic receiveCount; + virtual simgear::Emesary::ReceiptStatus Receive(simgear::Emesary::INotification &n) + { + + simgear::Notifications::MainLoopNotification *mln = dynamic_cast(&n); + + if (mln) { + switch (mln->GetValue()) { + case simgear::Notifications::MainLoopNotification::Type::Begin: + if (gct.is_running()) { + if (Active && CanWait) + gct.awaitCompletion(); + else + gct.clearAwaitCompletionTime(); + } + break; + case simgear::Notifications::MainLoopNotification::Type::End: + if (Active) { + if (gct.is_running()) + gct.release(); + } + break; + case simgear::Notifications::MainLoopNotification::Type::Started: + gct.ensure_running(); + break; + case simgear::Notifications::MainLoopNotification::Type::Stopped: + gct.terminate(); + break; + } + return simgear::Emesary::ReceiptStatusOK; + } + + auto *gccn = dynamic_cast(&n); + if (gccn) { + CanWait = gccn->GetCanWait(); + Active = gccn->GetActive(); + return simgear::Emesary::ReceiptStatusOK; + } + return simgear::Emesary::ReceiptStatusNotProcessed; + } + protected: + bool CanWait; + bool Active; + ThreadedGarbageCollector gct; + }; + +} // namespace nasal +#endif diff --git a/simgear/nasal/cppbind/detail/to_nasal_helper.cxx b/simgear/nasal/cppbind/detail/to_nasal_helper.cxx index 752a7d5a..26d3363f 100644 --- a/simgear/nasal/cppbind/detail/to_nasal_helper.cxx +++ b/simgear/nasal/cppbind/detail/to_nasal_helper.cxx @@ -19,6 +19,7 @@ #include "to_nasal_helper.hxx" #include #include +#include #include #include @@ -27,6 +28,19 @@ namespace nasal { + // create single instance of the main loop recipient for Nasal - this will self register at the + // global transmitter - and that's all that is needed to link up the background GC to the main + // loop in FG that will send out the MainLoop notifications. + //class NasalMainLoopRecipientSingleton : public simgear::Singleton + //{ + //public: + // NasalMainLoopRecipientSingleton() + // { + // } + // virtual ~NasalMainLoopRecipientSingleton() {} + //}; + NasalMainLoopRecipient mrl; + //---------------------------------------------------------------------------- naRef to_nasal_helper(naContext c, const std::string& str) { diff --git a/simgear/nasal/gc.c b/simgear/nasal/gc.c index 5ac9c43c..67a0100b 100644 --- a/simgear/nasal/gc.c +++ b/simgear/nasal/gc.c @@ -1,7 +1,6 @@ #include "nasal.h" #include "data.h" #include "code.h" - #define MIN_BLOCK_SIZE 32 static void reap(struct naPool* p); @@ -12,14 +11,17 @@ struct Block { char* block; struct Block* next; }; - // Must be called with the giant exclusive lock! -static void freeDead() +extern void global_stamp(); +extern int global_elapsedUSec(); + +static int freeDead() { int i; for(i=0; indead; i++) naFree(globals->deadBlocks[i]); globals->ndead = 0; + return i; } static void marktemps(struct Context* c) @@ -31,50 +33,127 @@ static void marktemps(struct Context* c) mark(r); } } - +//#define GC_DETAIL_DEBUG +static int __elements_visited = 0; +static int gc_busy=0; // Must be called with the big lock! static void garbageCollect() { + if (gc_busy) + return; + gc_busy = 1; int i; struct Context* c; globals->allocCount = 0; c = globals->allContexts; - while(c) { - for(i=0; iallContexts; + while (c) { +#if GC_DETAIL_DEBUG + ctxc++; +#endif + for (i = 0; i < NUM_NASAL_TYPES; i++) c->nfree[i] = 0; - for(i=0; i < c->fTop; i++) { + for (i = 0; i < c->fTop; i++) { mark(c->fStack[i].func); mark(c->fStack[i].locals); } - for(i=0; i < c->opTop; i++) + for (i = 0; i < c->opTop; i++) mark(c->opStack[i]); mark(c->dieArg); marktemps(c); c = c->nextAll; } +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("--> garbageCollect(#e%-5d): %-4d ", eel, et); +#endif mark(globals->save); +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("s(%5d) %-5d ", eel, et); +#endif + mark(globals->save_hash); +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("h(%5d) %-5d ", eel, et); +#endif + + mark(globals->symbols); +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("sy(%5d) %-4d ", eel, et); +#endif + mark(globals->meRef); +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("me(%5d) %-5d ", eel, et); +#endif + mark(globals->argRef); +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("ar(%5d) %-5d ", eel, et); +#endif + mark(globals->parentsRef); - +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; +#endif + //printf(" ev[%3d] %-5d", eel, et); // Finally collect all the freed objects - for(i=0; ipools[i])); - + } +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + printf(" >> reap %-5d", et); +#endif // Make enough space for the dead blocks we need to free during // execution. This works out to 1 spot for every 2 live objects, // which should be limit the number of bottleneck operations // without imposing an undue burden of extra "freeable" memory. if(globals->deadsz < globals->allocCount) { globals->deadsz = globals->allocCount; - if(globals->deadsz < 256) globals->deadsz = 256; + if(globals->deadsz < 256000) globals->deadsz = 256000; naFree(globals->deadBlocks); globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz); } globals->needGC = 0; +#if GC_DETAIL_DEBUG + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + printf(">> %-5d ", et); +#endif + gc_busy = 0; } void naModLock() @@ -104,6 +183,7 @@ void naModUnlock() // you think about it). static void bottleneck() { + global_stamp(); struct Globals* g = globals; g->bottleneck = 1; while(g->bottleneck && g->waitCount < g->nThreads - 1) { @@ -111,12 +191,39 @@ static void bottleneck() UNLOCK(); naSemDown(g->sem); LOCK(); g->waitCount--; } +#if GC_DETAIL_DEBUG + printf("GC: wait %2d ", global_elapsedUSec()); +#endif if(g->waitCount >= g->nThreads - 1) { - freeDead(); - if(g->needGC) garbageCollect(); + int fd = freeDead(); +#if GC_DETAIL_DEBUG + printf("--> freedead (%5d) : %5d", fd, global_elapsedUSec()); +#endif + if(g->needGC) + garbageCollect(); if(g->waitCount) naSemUp(g->sem, g->waitCount); g->bottleneck = 0; } +#if GC_DETAIL_DEBUG + printf(" :: finished: %5d\n", global_elapsedUSec()); +#endif +} + +static void bottleneckFreeDead() +{ + global_stamp(); + struct Globals* g = globals; + g->bottleneck = 1; + while (g->bottleneck && g->waitCount < g->nThreads - 1) { + g->waitCount++; + UNLOCK(); naSemDown(g->sem); LOCK(); + g->waitCount--; + } + if (g->waitCount >= g->nThreads - 1) { + freeDead(); + if (g->waitCount) naSemUp(g->sem, g->waitCount); + g->bottleneck = 0; + } } void naGC() @@ -127,6 +234,29 @@ void naGC() UNLOCK(); naCheckBottleneck(); } +int naGarbageCollect() +{ + int rv = 1; + LOCK(); + // + // The number here is again based on observation - if this is too low then the inline GC will be used + // which is fine occasionally. + // So what we're doing by checking the global alloc is to see if GC is likely required during the next frame and if + // so we pre-empt this by doing it now. + // GC can typically take between 5ms and 50ms (F-15, FG1000 PFD & MFD, Advanced weather) - but usually it is completed + // prior to the start of the next frame. + + globals->needGC = nasal_globals->allocCount < 23000; + if (globals->needGC) + bottleneck(); + else { + bottleneckFreeDead(); + rv = 0; + } + UNLOCK(); + naCheckBottleneck(); + return rv; +} void naCheckBottleneck() { @@ -207,7 +337,9 @@ static int poolsize(struct naPool* p) while(b) { total += b->size; b = b->next; } return total; } - +int GCglobalAlloc() { + return globals->allocCount; +} struct naObj** naGC_get(struct naPool* p, int n, int* nout) { struct naObj** result; @@ -215,6 +347,9 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout) LOCK(); while(globals->allocCount < 0 || (p->nfree == 0 && p->freetop >= p->freesz)) { globals->needGC = 1; +#if GC_DETAIL_DEBUG + printf("++"); +#endif bottleneck(); } if(p->nfree == 0) @@ -248,7 +383,7 @@ static void mark(naRef r) if(PTR(r).obj->mark == 1) return; - + __elements_visited++; PTR(r).obj->mark = 1; switch(PTR(r).obj->type) { case T_VEC: markvec(r); break; @@ -306,11 +441,14 @@ static void reap(struct naPool* p) // Allocate more if necessary (try to keep 25-50% of the objects // available) - if(p->nfree < total/4) { + // This was changed (2019.2) to allocate in larger blocks + // previously it used total/4 and used/2 now we + // use total/2 and used / 1 + if (p->nfree < total / 2) { int used = total - p->nfree; int avail = total - used; - int need = used/2 - avail; - if(need > 0) + int need = used / 1 - avail; + if (need > 0) newBlock(p, need); } }