Much better terrasync.py
- tortellini instead of spaghetti code (use oop) - reuse connection
This commit is contained in:
parent
a4a3659ee9
commit
6921c98933
1 changed files with 176 additions and 129 deletions
|
@ -19,27 +19,113 @@
|
||||||
#
|
#
|
||||||
# terrasync.py - synchronize terrascenery data to your local disk
|
# terrasync.py - synchronize terrascenery data to your local disk
|
||||||
# needs dnspython (pip install dnspython)
|
# needs dnspython (pip install dnspython)
|
||||||
#
|
|
||||||
|
|
||||||
import os
|
import urllib, os, hashlib
|
||||||
import hashlib
|
from urllib.parse import urlparse
|
||||||
import urllib.request
|
from http.client import HTTPConnection, _CS_IDLE
|
||||||
from os import listdir
|
from os import listdir
|
||||||
from os.path import isfile, join
|
from os.path import isfile, join
|
||||||
|
|
||||||
dirindex = ".dirindex"
|
#################################################################################################################################
|
||||||
DIRINDEXVERSION = 1
|
class HTTPGetCallback:
|
||||||
|
def __init__(self, src, callback):
|
||||||
|
self.callback = callback
|
||||||
|
self.src = src
|
||||||
|
self.result = None
|
||||||
|
|
||||||
URL="http://flightgear.sourceforge.net/scenery"
|
class HTTPGetter:
|
||||||
# User master repository for now
|
def __init__(self, baseUrl, maxPending=10):
|
||||||
#URL="automatic"
|
self.baseUrl = baseUrl
|
||||||
TARGET="."
|
self.parsedBaseUrl = urlparse(baseUrl)
|
||||||
QUICK=False
|
self.maxPending = maxPending
|
||||||
REMOVE_ORPHAN=False
|
self.requests = []
|
||||||
|
self.pendingRequests = []
|
||||||
|
self.httpConnection = HTTPConnection(self.parsedBaseUrl.netloc,80, True)
|
||||||
|
self.httpRequestHeaders = headers = {'Host':self.parsedBaseUrl.netloc,'Content-Length':0,'Connection':'Keep-Alive'}
|
||||||
|
|
||||||
########################################################################
|
def get(self, httpGetCallback):
|
||||||
|
|
||||||
def fn_hash_of_file(fname):
|
#self.requests.append(httpGetCallback)
|
||||||
|
conn = self.httpConnection
|
||||||
|
request = httpGetCallback
|
||||||
|
conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders)
|
||||||
|
try:
|
||||||
|
httpGetCallback.result = conn.getresponse()
|
||||||
|
except:
|
||||||
|
# try to reconnect once
|
||||||
|
#print("reconnect")
|
||||||
|
conn.close()
|
||||||
|
conn.connect()
|
||||||
|
conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders)
|
||||||
|
httpGetCallback.result = conn.getresponse()
|
||||||
|
|
||||||
|
httpGetCallback.callback()
|
||||||
|
#self.requests.remove(httpGetCallback)
|
||||||
|
|
||||||
|
#################################################################################################################################
|
||||||
|
class DirIndex:
|
||||||
|
|
||||||
|
def __init__(self, dirIndexFile):
|
||||||
|
self.d = []
|
||||||
|
self.f = []
|
||||||
|
self.version = 0
|
||||||
|
self.path = ""
|
||||||
|
|
||||||
|
with open(dirIndexFile) as f:
|
||||||
|
self.readFrom(f)
|
||||||
|
|
||||||
|
def readFrom(self, readable):
|
||||||
|
for line in readable:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('#'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
tokens = line.split(':')
|
||||||
|
if len(tokens) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if tokens[0] == "version":
|
||||||
|
self.version = int(tokens[1])
|
||||||
|
|
||||||
|
elif tokens[0] == "path":
|
||||||
|
self.path = tokens[1]
|
||||||
|
|
||||||
|
elif tokens[0] == "d":
|
||||||
|
self.d.append({ 'name': tokens[1], 'hash': tokens[2] })
|
||||||
|
|
||||||
|
elif tokens[0] == "f":
|
||||||
|
self.f.append({ 'name': tokens[1], 'hash': tokens[2], 'size': tokens[3] })
|
||||||
|
|
||||||
|
def getVersion(self):
|
||||||
|
return self.version
|
||||||
|
|
||||||
|
def getPath(self):
|
||||||
|
return self.path
|
||||||
|
|
||||||
|
def getDirectories(self):
|
||||||
|
return self.d
|
||||||
|
|
||||||
|
def getFiles(self):
|
||||||
|
return self.f
|
||||||
|
|
||||||
|
#################################################################################################################################
|
||||||
|
class HTTPDownloadRequest(HTTPGetCallback):
|
||||||
|
def __init__(self, terrasync, src, dst, callback = None ):
|
||||||
|
super().__init__(src, self.callback)
|
||||||
|
self.terrasync = terrasync
|
||||||
|
self.dst = dst
|
||||||
|
self.mycallback = callback
|
||||||
|
|
||||||
|
def callback(self):
|
||||||
|
with open(self.dst, 'wb') as f:
|
||||||
|
f.write(self.result.read())
|
||||||
|
|
||||||
|
if self.mycallback != None:
|
||||||
|
self.mycallback(self)
|
||||||
|
|
||||||
|
#################################################################################################################################
|
||||||
|
|
||||||
|
def hash_of_file(fname):
|
||||||
if not os.path.exists( fname ):
|
if not os.path.exists( fname ):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -53,85 +139,92 @@ def fn_hash_of_file(fname):
|
||||||
|
|
||||||
return hash.hexdigest()
|
return hash.hexdigest()
|
||||||
|
|
||||||
########################################################################
|
#################################################################################################################################
|
||||||
def do_download_file( _url, _path, _localfile, _hash, _force ):
|
class TerraSync:
|
||||||
if os.path.exists( _localfile ) and not _force:
|
|
||||||
h = fn_hash_of_file(_localfile)
|
|
||||||
if h == _hash:
|
|
||||||
#print("hash match for ", _localfile)
|
|
||||||
return False
|
|
||||||
|
|
||||||
r = urllib.request.urlopen( _url + _path )
|
def __init__(self, url="http://flightgear.sourceforge.net/scenery", target=".", quick=False, removeOrphan=False):
|
||||||
with open(_localfile, 'wb') as f:
|
self.setUrl(url).setTarget(target)
|
||||||
f.write( r.read() )
|
self.quick = quick
|
||||||
#print("downloaded ", _localfile, " from ", _url + _path )
|
self.removeOrphan = removeOrphan
|
||||||
return True
|
self.httpGetter = None
|
||||||
|
|
||||||
########################################################################
|
def setUrl(self, url):
|
||||||
def do_terrasync( _url, _path, _localdir, _dirIndexHash ):
|
self.url = url.rstrip('/').strip()
|
||||||
url = _url + _path
|
return self
|
||||||
print(url)
|
|
||||||
|
|
||||||
if not os.path.exists( _localdir ):
|
def setTarget(self, target):
|
||||||
os.makedirs( _localdir )
|
self.target = target.rstrip('/').strip()
|
||||||
|
return self
|
||||||
|
|
||||||
# download and process .dirindex as temporary file
|
def start(self):
|
||||||
# rename to .dirindex after successful processing of directory
|
self.httpGetter = HTTPGetter(self.url)
|
||||||
# in case of abort, .dirindex.tmp will be removed as orphan
|
self.updateDirectory("", "", None )
|
||||||
myDirIndexFile = os.path.join(_localdir, ".dirindex.tmp")
|
|
||||||
|
|
||||||
try:
|
def updateFile(self, serverPath, localPath, fileHash ):
|
||||||
if not do_download_file( url, "/.dirindex", myDirIndexFile, _dirIndexHash, QUICK == False ):
|
localFullPath = join(self.target, localPath)
|
||||||
# dirindex hash matches, file not downloaded, skip directory
|
if fileHash != None and hash_of_file(localFullPath) == fileHash:
|
||||||
|
#print("hash of file matches, not downloading")
|
||||||
return
|
return
|
||||||
|
|
||||||
except urllib.error.HTTPError as err:
|
print("downloading ", serverPath )
|
||||||
if err.code == 404 and _path == "":
|
|
||||||
# HACK: only the master on SF provides .dirindex for root, fake it if it's missing
|
|
||||||
print("Using static root hack.")
|
|
||||||
for _sub in ("Models", "Terrain", "Objects", "Airports" ):
|
|
||||||
do_terrasync( _url, "/" + _sub, os.path.join(_localdir,_sub), None )
|
|
||||||
return
|
|
||||||
|
|
||||||
|
request = HTTPDownloadRequest(self, serverPath, localFullPath )
|
||||||
|
self.httpGetter.get(request)
|
||||||
|
|
||||||
|
|
||||||
|
def updateDirectory(self, serverPath, localPath, dirIndexHash):
|
||||||
|
print("processing ", serverPath)
|
||||||
|
|
||||||
|
localFullPath = join(self.target, localPath)
|
||||||
|
if not os.path.exists( localFullPath ):
|
||||||
|
os.makedirs( localFullPath )
|
||||||
|
|
||||||
|
localDirIndex = join(localFullPath, ".dirindex")
|
||||||
|
if dirIndexHash != None and hash_of_file(localDirIndex) == dirIndexHash:
|
||||||
|
# print("hash of dirindex matches, not downloading")
|
||||||
|
if not self.quick:
|
||||||
|
self.handleDirindexFile( localDirIndex )
|
||||||
else:
|
else:
|
||||||
raise
|
request = HTTPDownloadRequest(self, serverPath + "/.dirindex", localDirIndex, self.handleDirindexRequest )
|
||||||
|
self.httpGetter.get(request)
|
||||||
|
|
||||||
with open(myDirIndexFile, 'r') as myDirIndex:
|
def handleDirindexRequest(self, dirindexRequest):
|
||||||
|
self.handleDirindexFile(dirindexRequest.dst)
|
||||||
|
|
||||||
|
def handleDirindexFile(self, dirindexFile):
|
||||||
|
dirIndex = DirIndex(dirindexFile)
|
||||||
serverFiles = []
|
serverFiles = []
|
||||||
for line in myDirIndex:
|
|
||||||
tokens = line.rstrip().split(':')
|
|
||||||
if( len(tokens) == 0 ):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# TODO: check version number, should be equal to DIRINDEXVERSION
|
for file in dirIndex.getFiles():
|
||||||
# otherwise complain and terminate
|
f = file['name']
|
||||||
if( tokens[0] == "version" ):
|
h = file['hash']
|
||||||
continue
|
self.updateFile( "/" + dirIndex.getPath() + "/" + f, join(dirIndex.getPath(),f), h )
|
||||||
|
serverFiles.append(f)
|
||||||
|
|
||||||
if( tokens[0] == "path" ):
|
for subdir in dirIndex.getDirectories():
|
||||||
continue
|
d = subdir['name']
|
||||||
|
h = subdir['hash']
|
||||||
|
self.updateDirectory( "/" + dirIndex.getPath() + "/" + d, join(dirIndex.getPath(),d), h )
|
||||||
|
|
||||||
if( tokens[0] == "d" ):
|
if self.removeOrphan:
|
||||||
do_terrasync( url, "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2] )
|
localFullPath = join(self.target, dirIndex.getPath())
|
||||||
|
localFiles = [f for f in listdir(localFullPath) if isfile(join(localFullPath, f))]
|
||||||
if( tokens[0] == "f" ):
|
|
||||||
do_download_file( url, "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2], False )
|
|
||||||
serverFiles.append( tokens[1] )
|
|
||||||
|
|
||||||
os.rename( myDirIndexFile, os.path.join(_localdir, ".dirindex" ) )
|
|
||||||
|
|
||||||
localFiles = [f for f in listdir(_localdir) if isfile(join(_localdir, f))]
|
|
||||||
for f in localFiles:
|
for f in localFiles:
|
||||||
if f != ".dirindex" and not f in serverFiles:
|
if f != ".dirindex" and not f in serverFiles:
|
||||||
if REMOVE_ORPHAN:
|
#print("removing orphan", join(localFullPath,f) )
|
||||||
os.remove( os.path.join(_localdir,f) )
|
os.remove( join(localFullPath,f) )
|
||||||
|
|
||||||
#TODO: cleanup orphan files
|
|
||||||
|
|
||||||
########################################################################
|
def isReady(self):
|
||||||
|
return self.httpGetter and self.httpGetter.isReady()
|
||||||
|
return False
|
||||||
|
|
||||||
import getopt, sys, random, re
|
def update(self):
|
||||||
|
if self.httpGetter:
|
||||||
|
self.httpGetter.update()
|
||||||
|
|
||||||
|
#################################################################################################################################
|
||||||
|
import getopt, sys
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(sys.argv[1:], "u:t:qr", [ "url=", "target=", "quick", "remove-orphan" ])
|
opts, args = getopt.getopt(sys.argv[1:], "u:t:qr", [ "url=", "target=", "quick", "remove-orphan" ])
|
||||||
|
|
||||||
|
@ -139,64 +232,18 @@ except getopt.GetoptError:
|
||||||
print("terrasync.py [--url=http://some.server.org/scenery] [--target=/some/path] [-q|--quick] [-r|--remove-orphan]")
|
print("terrasync.py [--url=http://some.server.org/scenery] [--target=/some/path] [-q|--quick] [-r|--remove-orphan]")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
|
terraSync = TerraSync()
|
||||||
for opt, arg in opts:
|
for opt, arg in opts:
|
||||||
if opt in("-u", "--url"):
|
if opt in("-u", "--url"):
|
||||||
URL = arg
|
terraSync.url = arg
|
||||||
|
|
||||||
elif opt in ("-t", "--target"):
|
elif opt in ("-t", "--target"):
|
||||||
TARGET= arg
|
terraSync.target = arg
|
||||||
|
|
||||||
elif opt in ("-q", "--quick"):
|
elif opt in ("-q", "--quick"):
|
||||||
QUICK = True
|
terraSync.quick = True
|
||||||
|
|
||||||
elif opt in ("-r", "--remove-orphan"):
|
elif opt in ("-r", "--remove-orphan"):
|
||||||
REMOVE_ORPHAN = True
|
terraSync.removeOrphan = True
|
||||||
|
|
||||||
# automatic URL lookup from DNS NAPTR
|
terraSync.start()
|
||||||
# - lookup terrasync.flightgear.org, type=NAPTR, service="ws20", flags="U"
|
|
||||||
# - sort by order,preference ascending
|
|
||||||
# - pick entries with lowest order and preference
|
|
||||||
# - randomly pick one of those
|
|
||||||
# - use regexp fields URL
|
|
||||||
if URL == "automatic":
|
|
||||||
import dns.resolver
|
|
||||||
dnsResolver = dns.resolver.Resolver()
|
|
||||||
|
|
||||||
order = -1
|
|
||||||
preference = -1
|
|
||||||
|
|
||||||
# find lowes preference/order for service 'ws20' and flags 'U'
|
|
||||||
dnsAnswer = dnsResolver.query("terrasync.flightgear.org", "NAPTR" )
|
|
||||||
for naptr in dnsAnswer:
|
|
||||||
if naptr.service != b'ws20' or naptr.flags != b'U':
|
|
||||||
continue
|
|
||||||
|
|
||||||
if order == -1 or naptr.order < order:
|
|
||||||
order = naptr.order
|
|
||||||
preference = naptr.preference
|
|
||||||
|
|
||||||
if order == naptr.order:
|
|
||||||
if naptr.preference < preference:
|
|
||||||
preference = naptr.preference
|
|
||||||
|
|
||||||
|
|
||||||
# grab candidats
|
|
||||||
candidates = []
|
|
||||||
for naptr in dnsAnswer:
|
|
||||||
if naptr.service != b'ws20' or naptr.flags != b'U' or naptr.preference != preference or naptr.order != order:
|
|
||||||
continue
|
|
||||||
|
|
||||||
candidates.append( naptr.regexp.decode('utf-8') )
|
|
||||||
|
|
||||||
if not candidates:
|
|
||||||
print("sorry, no terrascenery URLs found. You may specify one with --url=http://some.url.org/foo")
|
|
||||||
sys.exit(3)
|
|
||||||
|
|
||||||
_url = random.choice(candidates)
|
|
||||||
_subst = _url.split(_url[0]) # split string, first character is separator <sep>regex<sep>replacement<sep>
|
|
||||||
URL = re.sub(_subst[1], _subst[2], "" ) # apply regex substitude on empty string
|
|
||||||
|
|
||||||
print( "terrasyncing from ", URL, "to ", TARGET )
|
|
||||||
do_terrasync( URL, "", TARGET, None )
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
|
|
Loading…
Reference in a new issue