Add support for tarballs in Terrasync

This commit is contained in:
Stuart Buchanan
2020-02-16 20:26:20 +00:00
parent a4b79d6882
commit 3fe85809b2

View File

@@ -34,9 +34,11 @@
#include "simgear/debug/logstream.hxx"
#include "simgear/misc/strutils.hxx"
#include <simgear/misc/sg_dir.hxx>
#include <simgear/io/HTTPClient.hxx>
#include <simgear/io/sg_file.hxx>
#include <simgear/io/untar.hxx>
#include <simgear/io/iostreams/sgstream.hxx>
#include <simgear/structure/exception.hxx>
#include <simgear/timing/timestamp.hxx>
@@ -176,7 +178,8 @@ class HTTPDirectory
enum Type
{
FileType,
DirectoryType
DirectoryType,
TarballType
};
ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
@@ -186,7 +189,7 @@ class HTTPDirectory
{
}
ChildInfo(const ChildInfo& other) = default;
ChildInfo(const ChildInfo& other) = default;
void setSize(const std::string & sizeData)
{
@@ -311,59 +314,41 @@ public:
void updateChildrenBasedOnHash()
{
//SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
copyInstalledChildren();
copyInstalledChildren();
ChildInfoList toBeUpdated;
string_list toBeUpdated, orphans,
indexNames = indexChildren();
simgear::Dir d(absolutePath());
PathList fsChildren = d.children(0);
simgear::Dir d(absolutePath());
PathList fsChildren = d.children(0);
PathList orphans = d.children(0);
for (const auto& child : fsChildren) {
const auto& fileName = child.file();
if ((fileName == ".dirindex") || (fileName == ".hashes")) {
continue;
}
ChildInfoList::const_iterator it;
for (it=children.begin(); it != children.end(); ++it) {
// Check if the file exists
PathList::const_iterator p = std::find_if(fsChildren.begin(), fsChildren.end(), LocalFileMatcher(*it));
if (p == fsChildren.end()) {
// File or directory does not exist on local disk, so needs to be updated.
toBeUpdated.push_back(ChildInfo(*it));
} else if (hashForChild(*it) != it->hash) {
// File/directory exists, but hash doesn't match.
toBeUpdated.push_back(ChildInfo(*it));
orphans.erase(std::remove(orphans.begin(), orphans.end(), *p), orphans.end());
} else {
// File/Directory exists and hash is valid.
orphans.erase(std::remove(orphans.begin(), orphans.end(), *p), orphans.end());
ChildInfo info(child.isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
fileName, "");
info.path = child;
std::string hash = hashForChild(info);
if (it->type == ChildInfo::DirectoryType) {
// If it's a directory,perform a recursive check.
HTTPDirectory* childDir = childDirectory(it->name);
childDir->updateChildrenBasedOnHash();
}
}
}
ChildInfoList::iterator c = findIndexChild(fileName);
if (c == children.end()) {
orphans.push_back(fileName);
} else if (c->hash != hash) {
#if 0
SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << fileName);
// file exists, but hash mismatch, schedule update
if (!hash.empty()) {
SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << fileName);
SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash);
}
#endif
toBeUpdated.push_back(fileName);
} else {
// file exists and hash is valid. If it's a directory,
// perform a recursive check.
if (c->type == ChildInfo::DirectoryType) {
HTTPDirectory* childDir = childDirectory(fileName);
childDir->updateChildrenBasedOnHash();
}
}
// remove existing file system children from the index list,
// so we can detect new children
// https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Erase-Remove
indexNames.erase(std::remove(indexNames.begin(), indexNames.end(), fileName), indexNames.end());
} // of real children iteration
// all remaining names in indexChilden are new children
toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
removeOrphans(orphans);
scheduleUpdates(toBeUpdated);
// We now have a list of entries that need to be updated, and a list
// of orphan files that should be removed.
removeOrphans(orphans);
scheduleUpdates(toBeUpdated);
}
HTTPDirectory* childDirectory(const std::string& name)
@@ -372,10 +357,12 @@ public:
return _repository->getOrCreateDirectory(childPath);
}
void removeOrphans(const string_list& orphans)
void removeOrphans(const PathList orphans)
{
string_list::const_iterator it;
PathList::const_iterator it;
for (it = orphans.begin(); it != orphans.end(); ++it) {
if (it->file() == ".dirindex") continue;
if (it->file() == ".hash") continue;
removeChild(*it);
}
}
@@ -391,21 +378,20 @@ public:
return r;
}
void scheduleUpdates(const string_list& names)
void scheduleUpdates(const ChildInfoList names)
{
string_list::const_iterator it;
ChildInfoList::const_iterator it;
for (it = names.begin(); it != names.end(); ++it) {
ChildInfoList::iterator cit = findIndexChild(*it);
if (cit == children.end()) {
SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
continue;
}
if (cit->type == ChildInfo::FileType) {
_repository->updateFile(this, *it, cit->sizeInBytes);
if (it->type == ChildInfo::FileType) {
_repository->updateFile(this, it->name, it->sizeInBytes);
} else if (it->type == ChildInfo::DirectoryType){
HTTPDirectory* childDir = childDirectory(it->name);
_repository->updateDir(childDir, it->hash, it->sizeInBytes);
} else if (it->type == ChildInfo::TarballType) {
// Download a tarball just as a file.
_repository->updateFile(this, it->name, it->sizeInBytes);
} else {
HTTPDirectory* childDir = childDirectory(*it);
_repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
SG_LOG(SG_TERRASYNC, SG_ALERT, "Coding error! Unknown Child type to schedule update");
}
}
}
@@ -430,12 +416,58 @@ public:
SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
} else {
if (it->hash != hash) {
// we don't erase the file on a hash mismatch, becuase if we're syncing during the
SG_LOG(SG_TERRASYNC, SG_INFO, "Checksum error for " << absolutePath() << "/" << file << " " << it->hash << " " << hash);
// we don't erase the file on a hash mismatch, because if we're syncing during the
// middle of a server-side update, the downloaded file may actually become valid.
_repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM);
} else {
_repository->updatedFileContents(it->path, hash);
_repository->totalDownloaded += sz;
SGPath p = SGPath(absolutePath(), file);
if ((p.extension() == "tgz") || (p.extension() == "zip")) {
// We require that any compressed files have the same filename as the file or directory
// they expand to, so we can remove the old file/directory before extracting the new
// data.
SGPath removePath = SGPath(p.base());
bool pathAvailable = true;
if (removePath.exists()) {
if (removePath.isDir()) {
simgear::Dir pd(removePath);
pathAvailable = pd.removeChildren();
} else {
pathAvailable = removePath.remove();
}
}
if (pathAvailable) {
// If this is a tarball, then extract it.
SGBinaryFile f(p);
if (! f.open(SG_IO_IN)) SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to open " << p << " to extract");
SG_LOG(SG_TERRASYNC, SG_INFO, "Extracting " << absolutePath() << "/" << file << " to " << p.dir());
SGPath extractDir = p.dir();
ArchiveExtractor ex(extractDir);
uint8_t* buf = (uint8_t*) alloca(128);
while (!f.eof()) {
size_t bufSize = f.read((char*) buf, 128);
ex.extractBytes(buf, bufSize);
}
ex.flush();
if (! ex.isAtEndOfArchive()) {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Corrupt tarball " << p);
}
if (ex.hasError()) {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Error extracting " << p);
}
} else {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to remove old file/directory " << removePath);
} // of pathAvailable
} // of handling tgz files
} // of hash matches
} // of found in child list
}
@@ -458,6 +490,16 @@ private:
{ return info.name == name; }
};
struct LocalFileMatcher
{
LocalFileMatcher(const ChildInfo ci) : childInfo(ci) {}
ChildInfo childInfo;
bool operator()(const SGPath path) const {
return path.file() == childInfo.name;
}
};
ChildInfoList::iterator findIndexChild(const std::string& name)
{
return std::find_if(children.begin(), children.end(), ChildWithName(name));
@@ -519,8 +561,8 @@ private:
continue;
}
if (typeData != "f" && typeData != "d" ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)"
if (typeData != "f" && typeData != "d" && typeData != "t" ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 't', 'd' or 'f', (ignoring line)"
<< "\n\tparsing:" << p.utf8Str());
continue;
}
@@ -533,8 +575,12 @@ private:
continue;
}
children.emplace_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2]));
children.back().path = absolutePath() / tokens[1];
ChildInfo ci = ChildInfo(ChildInfo::FileType, tokens[1], tokens[2]);
if (typeData == "d") ci.type = ChildInfo::DirectoryType;
if (typeData == "t") ci.type = ChildInfo::TarballType;
children.emplace_back(ci);
children.back().path = absolutePath() / tokens[1];
if (tokens.size() > 3) {
children.back().setSize(tokens[3]);
}
@@ -543,40 +589,36 @@ private:
return true;
}
void removeChild(const std::string& name)
void removeChild(SGPath path)
{
SGPath p(absolutePath());
p.append(name);
bool ok;
SG_LOG(SG_TERRASYNC, SG_WARN, "Removing:" << path);
std::string fpath = _relativePath + "/" + name;
if (p.isDir()) {
ok = _repository->deleteDirectory(fpath, p);
std::string fpath = _relativePath + "/" + path.file();
if (path.isDir()) {
ok = _repository->deleteDirectory(fpath, path);
} else {
// remove the hash cache entry
_repository->updatedFileContents(p, std::string());
ok = p.remove();
_repository->updatedFileContents(path, std::string());
ok = path.remove();
}
if (!ok) {
SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
throw sg_io_exception("Failed to remove existing file/dir:", p);
SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << path);
throw sg_io_exception("Failed to remove existing file/dir:", path);
}
}
std::string hashForChild(const ChildInfo& child) const
{
SGPath p(child.path);
if (child.type == ChildInfo::DirectoryType) {
p.append(".dirindex");
}
SGPath p(child.path);
if (child.type == ChildInfo::DirectoryType) p.append(".dirindex");
if (child.type == ChildInfo::TarballType) p.concat(".tgz"); // For tarballs the hash is against the tarball file itself
return _repository->hashForPath(p);
}
HTTPRepoPrivate* _repository;
std::string _relativePath; // in URL and file-system space
};
HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :