diff --git a/simgear/io/HTTPRepository.cxx b/simgear/io/HTTPRepository.cxx index bfa426a6..59fa8dc0 100644 --- a/simgear/io/HTTPRepository.cxx +++ b/simgear/io/HTTPRepository.cxx @@ -34,9 +34,11 @@ #include "simgear/debug/logstream.hxx" #include "simgear/misc/strutils.hxx" + #include #include #include +#include #include #include #include @@ -176,7 +178,8 @@ class HTTPDirectory enum Type { FileType, - DirectoryType + DirectoryType, + TarballType }; ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) : @@ -186,7 +189,7 @@ class HTTPDirectory { } - ChildInfo(const ChildInfo& other) = default; + ChildInfo(const ChildInfo& other) = default; void setSize(const std::string & sizeData) { @@ -311,59 +314,41 @@ public: void updateChildrenBasedOnHash() { - //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath()); + copyInstalledChildren(); - copyInstalledChildren(); + ChildInfoList toBeUpdated; - string_list toBeUpdated, orphans, - indexNames = indexChildren(); - simgear::Dir d(absolutePath()); - PathList fsChildren = d.children(0); + simgear::Dir d(absolutePath()); + PathList fsChildren = d.children(0); + PathList orphans = d.children(0); - for (const auto& child : fsChildren) { - const auto& fileName = child.file(); - if ((fileName == ".dirindex") || (fileName == ".hashes")) { - continue; - } + ChildInfoList::const_iterator it; + for (it=children.begin(); it != children.end(); ++it) { + // Check if the file exists + PathList::const_iterator p = std::find_if(fsChildren.begin(), fsChildren.end(), LocalFileMatcher(*it)); + if (p == fsChildren.end()) { + // File or directory does not exist on local disk, so needs to be updated. + toBeUpdated.push_back(ChildInfo(*it)); + } else if (hashForChild(*it) != it->hash) { + // File/directory exists, but hash doesn't match. + toBeUpdated.push_back(ChildInfo(*it)); + orphans.erase(std::remove(orphans.begin(), orphans.end(), *p), orphans.end()); + } else { + // File/Directory exists and hash is valid. + orphans.erase(std::remove(orphans.begin(), orphans.end(), *p), orphans.end()); - ChildInfo info(child.isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType, - fileName, ""); - info.path = child; - std::string hash = hashForChild(info); + if (it->type == ChildInfo::DirectoryType) { + // If it's a directory,perform a recursive check. + HTTPDirectory* childDir = childDirectory(it->name); + childDir->updateChildrenBasedOnHash(); + } + } + } - ChildInfoList::iterator c = findIndexChild(fileName); - if (c == children.end()) { - orphans.push_back(fileName); - } else if (c->hash != hash) { -#if 0 - SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << fileName); - // file exists, but hash mismatch, schedule update - if (!hash.empty()) { - SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << fileName); - SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash); - } -#endif - toBeUpdated.push_back(fileName); - } else { - // file exists and hash is valid. If it's a directory, - // perform a recursive check. - if (c->type == ChildInfo::DirectoryType) { - HTTPDirectory* childDir = childDirectory(fileName); - childDir->updateChildrenBasedOnHash(); - } - } - - // remove existing file system children from the index list, - // so we can detect new children - // https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Erase-Remove - indexNames.erase(std::remove(indexNames.begin(), indexNames.end(), fileName), indexNames.end()); - } // of real children iteration - - // all remaining names in indexChilden are new children - toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end()); - - removeOrphans(orphans); - scheduleUpdates(toBeUpdated); + // We now have a list of entries that need to be updated, and a list + // of orphan files that should be removed. + removeOrphans(orphans); + scheduleUpdates(toBeUpdated); } HTTPDirectory* childDirectory(const std::string& name) @@ -372,10 +357,12 @@ public: return _repository->getOrCreateDirectory(childPath); } - void removeOrphans(const string_list& orphans) + void removeOrphans(const PathList orphans) { - string_list::const_iterator it; + PathList::const_iterator it; for (it = orphans.begin(); it != orphans.end(); ++it) { + if (it->file() == ".dirindex") continue; + if (it->file() == ".hash") continue; removeChild(*it); } } @@ -391,21 +378,20 @@ public: return r; } - void scheduleUpdates(const string_list& names) + void scheduleUpdates(const ChildInfoList names) { - string_list::const_iterator it; + ChildInfoList::const_iterator it; for (it = names.begin(); it != names.end(); ++it) { - ChildInfoList::iterator cit = findIndexChild(*it); - if (cit == children.end()) { - SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it); - continue; - } - - if (cit->type == ChildInfo::FileType) { - _repository->updateFile(this, *it, cit->sizeInBytes); + if (it->type == ChildInfo::FileType) { + _repository->updateFile(this, it->name, it->sizeInBytes); + } else if (it->type == ChildInfo::DirectoryType){ + HTTPDirectory* childDir = childDirectory(it->name); + _repository->updateDir(childDir, it->hash, it->sizeInBytes); + } else if (it->type == ChildInfo::TarballType) { + // Download a tarball just as a file. + _repository->updateFile(this, it->name, it->sizeInBytes); } else { - HTTPDirectory* childDir = childDirectory(*it); - _repository->updateDir(childDir, cit->hash, cit->sizeInBytes); + SG_LOG(SG_TERRASYNC, SG_ALERT, "Coding error! Unknown Child type to schedule update"); } } } @@ -430,12 +416,58 @@ public: SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file); } else { if (it->hash != hash) { - // we don't erase the file on a hash mismatch, becuase if we're syncing during the + SG_LOG(SG_TERRASYNC, SG_INFO, "Checksum error for " << absolutePath() << "/" << file << " " << it->hash << " " << hash); + // we don't erase the file on a hash mismatch, because if we're syncing during the // middle of a server-side update, the downloaded file may actually become valid. _repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM); } else { _repository->updatedFileContents(it->path, hash); _repository->totalDownloaded += sz; + SGPath p = SGPath(absolutePath(), file); + + if ((p.extension() == "tgz") || (p.extension() == "zip")) { + // We require that any compressed files have the same filename as the file or directory + // they expand to, so we can remove the old file/directory before extracting the new + // data. + SGPath removePath = SGPath(p.base()); + bool pathAvailable = true; + if (removePath.exists()) { + if (removePath.isDir()) { + simgear::Dir pd(removePath); + pathAvailable = pd.removeChildren(); + } else { + pathAvailable = removePath.remove(); + } + } + + if (pathAvailable) { + // If this is a tarball, then extract it. + SGBinaryFile f(p); + if (! f.open(SG_IO_IN)) SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to open " << p << " to extract"); + + SG_LOG(SG_TERRASYNC, SG_INFO, "Extracting " << absolutePath() << "/" << file << " to " << p.dir()); + SGPath extractDir = p.dir(); + ArchiveExtractor ex(extractDir); + + uint8_t* buf = (uint8_t*) alloca(128); + while (!f.eof()) { + size_t bufSize = f.read((char*) buf, 128); + ex.extractBytes(buf, bufSize); + } + + ex.flush(); + if (! ex.isAtEndOfArchive()) { + SG_LOG(SG_TERRASYNC, SG_ALERT, "Corrupt tarball " << p); + } + + if (ex.hasError()) { + SG_LOG(SG_TERRASYNC, SG_ALERT, "Error extracting " << p); + } + + } else { + SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to remove old file/directory " << removePath); + } // of pathAvailable + } // of handling tgz files } // of hash matches } // of found in child list } @@ -458,6 +490,16 @@ private: { return info.name == name; } }; + struct LocalFileMatcher + { + LocalFileMatcher(const ChildInfo ci) : childInfo(ci) {} + ChildInfo childInfo; + + bool operator()(const SGPath path) const { + return path.file() == childInfo.name; + } + }; + ChildInfoList::iterator findIndexChild(const std::string& name) { return std::find_if(children.begin(), children.end(), ChildWithName(name)); @@ -519,8 +561,8 @@ private: continue; } - if (typeData != "f" && typeData != "d" ) { - SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)" + if (typeData != "f" && typeData != "d" && typeData != "t" ) { + SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 't', 'd' or 'f', (ignoring line)" << "\n\tparsing:" << p.utf8Str()); continue; } @@ -533,8 +575,12 @@ private: continue; } - children.emplace_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2])); - children.back().path = absolutePath() / tokens[1]; + ChildInfo ci = ChildInfo(ChildInfo::FileType, tokens[1], tokens[2]); + if (typeData == "d") ci.type = ChildInfo::DirectoryType; + if (typeData == "t") ci.type = ChildInfo::TarballType; + + children.emplace_back(ci); + children.back().path = absolutePath() / tokens[1]; if (tokens.size() > 3) { children.back().setSize(tokens[3]); } @@ -543,40 +589,36 @@ private: return true; } - void removeChild(const std::string& name) + void removeChild(SGPath path) { - SGPath p(absolutePath()); - p.append(name); bool ok; + SG_LOG(SG_TERRASYNC, SG_WARN, "Removing:" << path); - std::string fpath = _relativePath + "/" + name; - if (p.isDir()) { - ok = _repository->deleteDirectory(fpath, p); + std::string fpath = _relativePath + "/" + path.file(); + if (path.isDir()) { + ok = _repository->deleteDirectory(fpath, path); } else { // remove the hash cache entry - _repository->updatedFileContents(p, std::string()); - ok = p.remove(); + _repository->updatedFileContents(path, std::string()); + ok = path.remove(); } if (!ok) { - SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p); - throw sg_io_exception("Failed to remove existing file/dir:", p); + SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << path); + throw sg_io_exception("Failed to remove existing file/dir:", path); } } std::string hashForChild(const ChildInfo& child) const { - SGPath p(child.path); - if (child.type == ChildInfo::DirectoryType) { - p.append(".dirindex"); - } + SGPath p(child.path); + if (child.type == ChildInfo::DirectoryType) p.append(".dirindex"); + if (child.type == ChildInfo::TarballType) p.concat(".tgz"); // For tarballs the hash is against the tarball file itself return _repository->hashForPath(p); } HTTPRepoPrivate* _repository; std::string _relativePath; // in URL and file-system space - - }; HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :