diff --git a/scripts/python/terrasync.py b/scripts/python/terrasync.py index 74b257dd1..ea10e9bc7 100755 --- a/scripts/python/terrasync.py +++ b/scripts/python/terrasync.py @@ -19,27 +19,113 @@ # # terrasync.py - synchronize terrascenery data to your local disk # needs dnspython (pip install dnspython) -# -import os -import hashlib -import urllib.request +import urllib, os, hashlib +from urllib.parse import urlparse +from http.client import HTTPConnection, _CS_IDLE from os import listdir from os.path import isfile, join -dirindex = ".dirindex" -DIRINDEXVERSION = 1 +################################################################################################################################# +class HTTPGetCallback: + def __init__(self, src, callback): + self.callback = callback + self.src = src + self.result = None -URL="http://flightgear.sourceforge.net/scenery" -# User master repository for now -#URL="automatic" -TARGET="." -QUICK=False -REMOVE_ORPHAN=False +class HTTPGetter: + def __init__(self, baseUrl, maxPending=10): + self.baseUrl = baseUrl + self.parsedBaseUrl = urlparse(baseUrl) + self.maxPending = maxPending + self.requests = [] + self.pendingRequests = [] + self.httpConnection = HTTPConnection(self.parsedBaseUrl.netloc,80, True) + self.httpRequestHeaders = headers = {'Host':self.parsedBaseUrl.netloc,'Content-Length':0,'Connection':'Keep-Alive'} -######################################################################## + def get(self, httpGetCallback): -def fn_hash_of_file(fname): + #self.requests.append(httpGetCallback) + conn = self.httpConnection + request = httpGetCallback + conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders) + try: + httpGetCallback.result = conn.getresponse() + except: + # try to reconnect once + #print("reconnect") + conn.close() + conn.connect() + conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders) + httpGetCallback.result = conn.getresponse() + + httpGetCallback.callback() + #self.requests.remove(httpGetCallback) + +################################################################################################################################# +class DirIndex: + + def __init__(self, dirIndexFile): + self.d = [] + self.f = [] + self.version = 0 + self.path = "" + + with open(dirIndexFile) as f: + self.readFrom(f) + + def readFrom(self, readable): + for line in readable: + line = line.strip() + if line.startswith('#'): + continue + + tokens = line.split(':') + if len(tokens) == 0: + continue + + if tokens[0] == "version": + self.version = int(tokens[1]) + + elif tokens[0] == "path": + self.path = tokens[1] + + elif tokens[0] == "d": + self.d.append({ 'name': tokens[1], 'hash': tokens[2] }) + + elif tokens[0] == "f": + self.f.append({ 'name': tokens[1], 'hash': tokens[2], 'size': tokens[3] }) + + def getVersion(self): + return self.version + + def getPath(self): + return self.path + + def getDirectories(self): + return self.d + + def getFiles(self): + return self.f + +################################################################################################################################# +class HTTPDownloadRequest(HTTPGetCallback): + def __init__(self, terrasync, src, dst, callback = None ): + super().__init__(src, self.callback) + self.terrasync = terrasync + self.dst = dst + self.mycallback = callback + + def callback(self): + with open(self.dst, 'wb') as f: + f.write(self.result.read()) + + if self.mycallback != None: + self.mycallback(self) + +################################################################################################################################# + +def hash_of_file(fname): if not os.path.exists( fname ): return None @@ -53,85 +139,92 @@ def fn_hash_of_file(fname): return hash.hexdigest() -######################################################################## -def do_download_file( _url, _path, _localfile, _hash, _force ): - if os.path.exists( _localfile ) and not _force: - h = fn_hash_of_file(_localfile) - if h == _hash: - #print("hash match for ", _localfile) - return False +################################################################################################################################# +class TerraSync: - r = urllib.request.urlopen( _url + _path ) - with open(_localfile, 'wb') as f: - f.write( r.read() ) - #print("downloaded ", _localfile, " from ", _url + _path ) - return True + def __init__(self, url="http://flightgear.sourceforge.net/scenery", target=".", quick=False, removeOrphan=False): + self.setUrl(url).setTarget(target) + self.quick = quick + self.removeOrphan = removeOrphan + self.httpGetter = None -######################################################################## -def do_terrasync( _url, _path, _localdir, _dirIndexHash ): - url = _url + _path - print(url) + def setUrl(self, url): + self.url = url.rstrip('/').strip() + return self - if not os.path.exists( _localdir ): - os.makedirs( _localdir ) + def setTarget(self, target): + self.target = target.rstrip('/').strip() + return self - # download and process .dirindex as temporary file - # rename to .dirindex after successful processing of directory - # in case of abort, .dirindex.tmp will be removed as orphan - myDirIndexFile = os.path.join(_localdir, ".dirindex.tmp") + def start(self): + self.httpGetter = HTTPGetter(self.url) + self.updateDirectory("", "", None ) - try: - if not do_download_file( url, "/.dirindex", myDirIndexFile, _dirIndexHash, QUICK == False ): - # dirindex hash matches, file not downloaded, skip directory - return + def updateFile(self, serverPath, localPath, fileHash ): + localFullPath = join(self.target, localPath) + if fileHash != None and hash_of_file(localFullPath) == fileHash: + #print("hash of file matches, not downloading") + return - except urllib.error.HTTPError as err: - if err.code == 404 and _path == "": - # HACK: only the master on SF provides .dirindex for root, fake it if it's missing - print("Using static root hack.") - for _sub in ("Models", "Terrain", "Objects", "Airports" ): - do_terrasync( _url, "/" + _sub, os.path.join(_localdir,_sub), None ) - return + print("downloading ", serverPath ) - else: - raise + request = HTTPDownloadRequest(self, serverPath, localFullPath ) + self.httpGetter.get(request) - with open(myDirIndexFile, 'r') as myDirIndex: - serverFiles = [] - for line in myDirIndex: - tokens = line.rstrip().split(':') - if( len(tokens) == 0 ): - continue - # TODO: check version number, should be equal to DIRINDEXVERSION - # otherwise complain and terminate - if( tokens[0] == "version" ): - continue + def updateDirectory(self, serverPath, localPath, dirIndexHash): + print("processing ", serverPath) - if( tokens[0] == "path" ): - continue + localFullPath = join(self.target, localPath) + if not os.path.exists( localFullPath ): + os.makedirs( localFullPath ) - if( tokens[0] == "d" ): - do_terrasync( url, "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2] ) + localDirIndex = join(localFullPath, ".dirindex") + if dirIndexHash != None and hash_of_file(localDirIndex) == dirIndexHash: + # print("hash of dirindex matches, not downloading") + if not self.quick: + self.handleDirindexFile( localDirIndex ) + else: + request = HTTPDownloadRequest(self, serverPath + "/.dirindex", localDirIndex, self.handleDirindexRequest ) + self.httpGetter.get(request) - if( tokens[0] == "f" ): - do_download_file( url, "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2], False ) - serverFiles.append( tokens[1] ) + def handleDirindexRequest(self, dirindexRequest): + self.handleDirindexFile(dirindexRequest.dst) - os.rename( myDirIndexFile, os.path.join(_localdir, ".dirindex" ) ) + def handleDirindexFile(self, dirindexFile): + dirIndex = DirIndex(dirindexFile) + serverFiles = [] - localFiles = [f for f in listdir(_localdir) if isfile(join(_localdir, f))] - for f in localFiles: - if f != ".dirindex" and not f in serverFiles: - if REMOVE_ORPHAN: - os.remove( os.path.join(_localdir,f) ) + for file in dirIndex.getFiles(): + f = file['name'] + h = file['hash'] + self.updateFile( "/" + dirIndex.getPath() + "/" + f, join(dirIndex.getPath(),f), h ) + serverFiles.append(f) - #TODO: cleanup orphan files + for subdir in dirIndex.getDirectories(): + d = subdir['name'] + h = subdir['hash'] + self.updateDirectory( "/" + dirIndex.getPath() + "/" + d, join(dirIndex.getPath(),d), h ) -######################################################################## + if self.removeOrphan: + localFullPath = join(self.target, dirIndex.getPath()) + localFiles = [f for f in listdir(localFullPath) if isfile(join(localFullPath, f))] + for f in localFiles: + if f != ".dirindex" and not f in serverFiles: + #print("removing orphan", join(localFullPath,f) ) + os.remove( join(localFullPath,f) ) -import getopt, sys, random, re + def isReady(self): + return self.httpGetter and self.httpGetter.isReady() + return False + + def update(self): + if self.httpGetter: + self.httpGetter.update() + +################################################################################################################################# +import getopt, sys try: opts, args = getopt.getopt(sys.argv[1:], "u:t:qr", [ "url=", "target=", "quick", "remove-orphan" ]) @@ -139,64 +232,18 @@ except getopt.GetoptError: print("terrasync.py [--url=http://some.server.org/scenery] [--target=/some/path] [-q|--quick] [-r|--remove-orphan]") sys.exit(2) +terraSync = TerraSync() for opt, arg in opts: - if opt in( "-u", "--url"): - URL = arg + if opt in("-u", "--url"): + terraSync.url = arg - elif opt in ( "-t", "--target"): - TARGET= arg + elif opt in ("-t", "--target"): + terraSync.target = arg elif opt in ("-q", "--quick"): - QUICK = True + terraSync.quick = True elif opt in ("-r", "--remove-orphan"): - REMOVE_ORPHAN = True + terraSync.removeOrphan = True -# automatic URL lookup from DNS NAPTR -# - lookup terrasync.flightgear.org, type=NAPTR, service="ws20", flags="U" -# - sort by order,preference ascending -# - pick entries with lowest order and preference -# - randomly pick one of those -# - use regexp fields URL -if URL == "automatic": - import dns.resolver - dnsResolver = dns.resolver.Resolver() - - order = -1 - preference = -1 - - # find lowes preference/order for service 'ws20' and flags 'U' - dnsAnswer = dnsResolver.query("terrasync.flightgear.org", "NAPTR" ) - for naptr in dnsAnswer: - if naptr.service != b'ws20' or naptr.flags != b'U': - continue - - if order == -1 or naptr.order < order: - order = naptr.order - preference = naptr.preference - - if order == naptr.order: - if naptr.preference < preference: - preference = naptr.preference - - - # grab candidats - candidates = [] - for naptr in dnsAnswer: - if naptr.service != b'ws20' or naptr.flags != b'U' or naptr.preference != preference or naptr.order != order: - continue - - candidates.append( naptr.regexp.decode('utf-8') ) - - if not candidates: - print("sorry, no terrascenery URLs found. You may specify one with --url=http://some.url.org/foo") - sys.exit(3) - - _url = random.choice(candidates) - _subst = _url.split(_url[0]) # split string, first character is separator regexreplacement - URL = re.sub(_subst[1], _subst[2], "" ) # apply regex substitude on empty string - -print( "terrasyncing from ", URL, "to ", TARGET ) -do_terrasync( URL, "", TARGET, None ) - -######################################################################## +terraSync.start()