Much better terrasync.py

- tortellini instead of spaghetti code (use oop) - reuse connection
2016-05-18 12:51:29 +02:00 · 2016-05-18 12:51:29 +02:00 · 6921c98933
commit 6921c98933
parent a4a3659ee9
1 changed files with 176 additions and 129 deletions
--- a/scripts/python/terrasync.py
+++ b/scripts/python/terrasync.py
@ -19,27 +19,113 @@
 #
 # terrasync.py - synchronize terrascenery data to your local disk
 # needs dnspython (pip install dnspython)
-#

-import os 
-import hashlib
-import urllib.request
+import urllib, os, hashlib
+from urllib.parse import urlparse
+from http.client import HTTPConnection, _CS_IDLE
 from os import listdir
 from os.path import isfile, join

-dirindex = ".dirindex"
-DIRINDEXVERSION = 1
+#################################################################################################################################
+class HTTPGetCallback:
+    def __init__(self, src, callback):
+        self.callback = callback
+        self.src = src
+        self.result = None

-URL="http://flightgear.sourceforge.net/scenery"
-# User master repository for now
-#URL="automatic"
-TARGET="."
-QUICK=False
-REMOVE_ORPHAN=False
+class HTTPGetter:
+    def __init__(self, baseUrl, maxPending=10):
+        self.baseUrl = baseUrl
+        self.parsedBaseUrl = urlparse(baseUrl)
+        self.maxPending = maxPending
+        self.requests = []
+        self.pendingRequests = []
+        self.httpConnection = HTTPConnection(self.parsedBaseUrl.netloc,80, True)
+        self.httpRequestHeaders = headers = {'Host':self.parsedBaseUrl.netloc,'Content-Length':0,'Connection':'Keep-Alive'}

-########################################################################
+    def get(self, httpGetCallback):

-def fn_hash_of_file(fname):
+        #self.requests.append(httpGetCallback)
+        conn = self.httpConnection
+        request = httpGetCallback
+        conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders)
+        try:
+            httpGetCallback.result = conn.getresponse()
+        except:
+            # try to reconnect once
+            #print("reconnect")
+            conn.close()
+            conn.connect()
+            conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders)
+            httpGetCallback.result = conn.getresponse()
+
+        httpGetCallback.callback()
+        #self.requests.remove(httpGetCallback)
+
+#################################################################################################################################
+class DirIndex:
+
+    def __init__(self, dirIndexFile):
+        self.d = []
+        self.f = []
+        self.version = 0
+        self.path = ""
+
+        with open(dirIndexFile) as f:
+            self.readFrom(f)
+
+    def readFrom(self, readable):
+        for line in readable:
+            line = line.strip()
+            if line.startswith('#'):
+                continue
+
+            tokens = line.split(':')
+            if len(tokens) == 0:
+                continue
+
+            if tokens[0] == "version":
+                self.version = int(tokens[1])
+
+            elif tokens[0] == "path":
+                self.path = tokens[1]
+
+            elif tokens[0] == "d":
+                self.d.append({ 'name': tokens[1], 'hash': tokens[2] })
+
+            elif tokens[0] == "f":
+                self.f.append({ 'name': tokens[1], 'hash': tokens[2], 'size': tokens[3] })
+
+    def getVersion(self):
+        return self.version
+
+    def getPath(self):
+        return self.path
+
+    def getDirectories(self):
+        return self.d
+
+    def getFiles(self):
+        return self.f
+
+#################################################################################################################################
+class HTTPDownloadRequest(HTTPGetCallback):
+    def __init__(self, terrasync, src, dst, callback = None ):
+        super().__init__(src, self.callback)
+        self.terrasync = terrasync
+        self.dst = dst
+        self.mycallback = callback
+
+    def callback(self):
+        with open(self.dst, 'wb') as f:
+            f.write(self.result.read())
+
+        if self.mycallback != None:
+            self.mycallback(self)
+
+#################################################################################################################################
+
+def hash_of_file(fname):
    if not os.path.exists( fname ):
      return None

@ -53,85 +139,92 @@ def fn_hash_of_file(fname):

    return hash.hexdigest()

-########################################################################
-def do_download_file( _url, _path, _localfile, _hash, _force ):
-  if os.path.exists( _localfile ) and not _force:
-    h = fn_hash_of_file(_localfile)
-    if h == _hash:
-      #print("hash match for ", _localfile)
-      return False
+#################################################################################################################################
+class TerraSync:

-  r = urllib.request.urlopen( _url + _path )
-  with open(_localfile, 'wb') as f:
-    f.write( r.read() )
-  #print("downloaded ", _localfile, " from ", _url + _path )
-  return True
+    def __init__(self, url="http://flightgear.sourceforge.net/scenery", target=".", quick=False, removeOrphan=False):
+        self.setUrl(url).setTarget(target)
+        self.quick = quick
+        self.removeOrphan = removeOrphan
+        self.httpGetter = None

-########################################################################
-def do_terrasync( _url, _path, _localdir, _dirIndexHash ):
-  url = _url + _path
-  print(url)
+    def setUrl(self, url):
+        self.url = url.rstrip('/').strip()
+        return self

-  if not os.path.exists( _localdir ):
-    os.makedirs( _localdir )
+    def setTarget(self, target):
+        self.target = target.rstrip('/').strip()
+        return self

-  # download and process .dirindex as temporary file
-  # rename to .dirindex after successful processing of directory
-  # in case of abort, .dirindex.tmp will be removed as orphan
-  myDirIndexFile = os.path.join(_localdir, ".dirindex.tmp")
+    def start(self):
+        self.httpGetter = HTTPGetter(self.url)
+        self.updateDirectory("", "", None )

-  try:
-    if not do_download_file( url, "/.dirindex", myDirIndexFile, _dirIndexHash, QUICK == False ):
-      # dirindex hash matches, file not downloaded, skip directory
-      return
+    def updateFile(self, serverPath, localPath, fileHash ):
+        localFullPath = join(self.target, localPath)
+        if fileHash != None and hash_of_file(localFullPath) == fileHash:
+            #print("hash of file matches, not downloading")
+            return

-  except urllib.error.HTTPError as err:
-    if err.code == 404 and _path == "":
-      # HACK: only the master on SF provides .dirindex for root, fake it if it's missing
-      print("Using static root hack.")
-      for _sub in ("Models", "Terrain", "Objects", "Airports" ):
-        do_terrasync( _url, "/" + _sub, os.path.join(_localdir,_sub), None )
-      return
+        print("downloading ", serverPath )

-    else:
-      raise
+        request = HTTPDownloadRequest(self, serverPath, localFullPath )
+        self.httpGetter.get(request)

-  with open(myDirIndexFile, 'r') as myDirIndex:
-    serverFiles = []
-    for line in myDirIndex:
-      tokens = line.rstrip().split(':')
-      if( len(tokens) == 0 ):
-        continue

-      # TODO: check version number, should be equal to DIRINDEXVERSION
-      #       otherwise complain and terminate
-      if( tokens[0] == "version" ):
-        continue
+    def updateDirectory(self, serverPath, localPath, dirIndexHash):
+        print("processing ", serverPath)

-      if( tokens[0] == "path" ):
-        continue
+        localFullPath = join(self.target, localPath)
+        if not os.path.exists( localFullPath ):
+          os.makedirs( localFullPath )

-      if( tokens[0] == "d" ):
-        do_terrasync( url,  "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2] )
+        localDirIndex = join(localFullPath, ".dirindex")
+        if dirIndexHash != None and  hash_of_file(localDirIndex) == dirIndexHash:
+            # print("hash of dirindex matches, not downloading")
+            if not self.quick:
+                self.handleDirindexFile( localDirIndex )
+        else:
+            request = HTTPDownloadRequest(self, serverPath + "/.dirindex", localDirIndex, self.handleDirindexRequest )
+            self.httpGetter.get(request)

-      if( tokens[0] == "f" ):
-        do_download_file( url, "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2], False )
-        serverFiles.append( tokens[1] )
+    def handleDirindexRequest(self, dirindexRequest):
+        self.handleDirindexFile(dirindexRequest.dst)

-  os.rename( myDirIndexFile, os.path.join(_localdir, ".dirindex" ) )
+    def handleDirindexFile(self, dirindexFile):
+        dirIndex = DirIndex(dirindexFile)
+        serverFiles = []

-  localFiles = [f for f in listdir(_localdir) if isfile(join(_localdir, f))]
-  for f in localFiles:
-    if f != ".dirindex" and not f in serverFiles:
-      if REMOVE_ORPHAN:
-        os.remove( os.path.join(_localdir,f) )
+        for file in dirIndex.getFiles():
+            f = file['name']
+            h = file['hash']
+            self.updateFile( "/" + dirIndex.getPath() + "/" + f, join(dirIndex.getPath(),f), h )
+            serverFiles.append(f)

-  #TODO: cleanup orphan files
+        for subdir in dirIndex.getDirectories():
+            d = subdir['name']
+            h = subdir['hash']
+            self.updateDirectory( "/" + dirIndex.getPath() + "/" + d, join(dirIndex.getPath(),d), h )

-########################################################################
+        if self.removeOrphan:
+            localFullPath = join(self.target, dirIndex.getPath())
+            localFiles = [f for f in listdir(localFullPath) if isfile(join(localFullPath, f))]
+            for f in localFiles:
+                if f != ".dirindex" and not f in serverFiles:
+                    #print("removing orphan", join(localFullPath,f) )
+                    os.remove( join(localFullPath,f) )

-import getopt, sys, random, re

+    def isReady(self):
+        return self.httpGetter and self.httpGetter.isReady()
+        return False
+
+    def update(self):
+        if self.httpGetter:
+            self.httpGetter.update()
+
+#################################################################################################################################
+import getopt, sys
 try:
  opts, args = getopt.getopt(sys.argv[1:], "u:t:qr", [ "url=", "target=", "quick", "remove-orphan" ])

@ -139,64 +232,18 @@ except getopt.GetoptError:
  print("terrasync.py [--url=http://some.server.org/scenery] [--target=/some/path] [-q|--quick] [-r|--remove-orphan]")
  sys.exit(2)

+terraSync = TerraSync()
 for opt, arg in opts:
-  if opt in( "-u", "--url"):
-    URL = arg
+  if opt in("-u", "--url"):
+    terraSync.url = arg

-  elif opt in ( "-t", "--target"):
-    TARGET= arg
+  elif opt in ("-t", "--target"):
+    terraSync.target = arg

  elif opt in ("-q", "--quick"):
-    QUICK = True
+    terraSync.quick = True

  elif opt in ("-r", "--remove-orphan"):
-    REMOVE_ORPHAN = True
+    terraSync.removeOrphan = True

-# automatic URL lookup from DNS NAPTR
-# - lookup terrasync.flightgear.org, type=NAPTR, service="ws20", flags="U"
-# - sort by order,preference ascending
-# - pick entries with lowest order and preference
-# - randomly pick one of those
-# - use regexp fields URL
-if URL == "automatic":
-  import dns.resolver
-  dnsResolver = dns.resolver.Resolver()
-
-  order = -1
-  preference = -1
-
-  # find lowes preference/order for service 'ws20' and flags 'U'
-  dnsAnswer = dnsResolver.query("terrasync.flightgear.org", "NAPTR" )
-  for naptr in dnsAnswer:
-    if naptr.service != b'ws20' or naptr.flags != b'U':
-      continue
-
-    if order == -1 or naptr.order < order:
-      order = naptr.order
-      preference = naptr.preference
-
-    if order == naptr.order:
-      if naptr.preference < preference:
-        preference = naptr.preference
-
-
-  # grab candidats
-  candidates = []
-  for naptr in dnsAnswer:
-    if naptr.service != b'ws20' or naptr.flags != b'U' or naptr.preference != preference or naptr.order != order:
-      continue
-
-    candidates.append( naptr.regexp.decode('utf-8') )
-
-  if not candidates:
-    print("sorry, no terrascenery URLs found. You may specify one with --url=http://some.url.org/foo")
-    sys.exit(3)
-
-  _url  = random.choice(candidates)
-  _subst = _url.split(_url[0]) # split string, first character is separator <sep>regex<sep>replacement<sep>
-  URL = re.sub(_subst[1], _subst[2], "" ) # apply regex substitude on empty string
-
-print( "terrasyncing from ", URL, "to ", TARGET )
-do_terrasync( URL, "", TARGET, None )
-
-########################################################################
+terraSync.start()