terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like object (e.g., a connected socket). - Rename hash_of_file() to hashForFile(), and of course implement it using our new computeHash(). - Add class HTTPSocketRequest derived from HTTPGetCallback. It allows one to process data from the network without storing it to a file (it uses the file-like interface provided by http.client.HTTPResponse). The callback returns the http.client.HTTPResponse object, which can be conveniently used in a 'with' statement. - Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash' argument must now be a hash (a string); the None object is not allowed anymore (with the soon-to-come addition of --mode=check, having to deal with this special case in updateDirectory() would make the logic too difficult to follow, or we would have to really completely separate check-only mode from update mode, which would entail code duplication). Since TerraSync.updateDirectory() must now always have a hash to work with, compute the hash of the root '.dirindex' file from the server in TerraSync.start(), using our new HTTPSocketRequest class---which was written for this purpose, since that will have to work in check-only mode (but not only), where we don't want to write any file to disk. - TerraSync.updateFile(): correctly handle the case where a directory inside the TerraSync repository is (now) a file according to the server: the directory must be recursively removed before the file can be downloaded in the place formerly occupied by the directory. - Add stub class Report. Its methods do nothing for now, but are already called in a couple of appropriate places. The class will be completed in a future commit, of course.
This commit is contained in:
parent
af021cc1ef
commit
6d323bbbdc
1 changed files with 87 additions and 14 deletions
|
@ -121,6 +121,20 @@ def removeDirectoryTree(base, whatToRemove):
|
||||||
shutil.rmtree(absPath)
|
shutil.rmtree(absPath)
|
||||||
|
|
||||||
|
|
||||||
|
def computeHash(fileLike):
|
||||||
|
hash = hashlib.sha1()
|
||||||
|
|
||||||
|
for chunk in iter(lambda: fileLike.read(4096), b""):
|
||||||
|
hash.update(chunk)
|
||||||
|
|
||||||
|
return hash.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def hashForFile(fname):
|
||||||
|
with open(fname, "rb") as f:
|
||||||
|
return computeHash(f)
|
||||||
|
|
||||||
|
|
||||||
# *****************************************************************************
|
# *****************************************************************************
|
||||||
# * Network-related classes *
|
# * Network-related classes *
|
||||||
# *****************************************************************************
|
# *****************************************************************************
|
||||||
|
@ -247,16 +261,32 @@ class HTTPDownloadRequest(HTTPGetCallback):
|
||||||
if self.mycallback != None:
|
if self.mycallback != None:
|
||||||
self.mycallback(self)
|
self.mycallback(self)
|
||||||
|
|
||||||
#################################################################################################################################
|
|
||||||
|
|
||||||
def hash_of_file(fname):
|
class HTTPSocketRequest(HTTPGetCallback):
|
||||||
hash = hashlib.sha1()
|
"""HTTPGetCallback class whose callback returns a file-like object.
|
||||||
|
|
||||||
with open(fname, "rb") as f:
|
The file-like object returned by the callback, and thus by
|
||||||
for chunk in iter(lambda: f.read(4096), b""):
|
HTTPGetter.get(), is a socket or similar. This allows one to read
|
||||||
hash.update(chunk)
|
the data obtained from the network without necessarily storing it
|
||||||
|
to a file.
|
||||||
|
|
||||||
return hash.hexdigest()
|
"""
|
||||||
|
def __init__(self, src):
|
||||||
|
"""Initialize an HTTPSocketRequest object.
|
||||||
|
|
||||||
|
src -- path to the resource on the server (no protocol, no
|
||||||
|
server name, just the path starting with a '/').
|
||||||
|
|
||||||
|
"""
|
||||||
|
HTTPGetCallback.__init__(self, src, self.callback)
|
||||||
|
|
||||||
|
def callback(self, url, httpResponse):
|
||||||
|
# Same comment as for HTTPDownloadRequest.callback()
|
||||||
|
if httpResponse.status != 200:
|
||||||
|
raise NetworkError("HTTP callback got status {status} for URL {url}"
|
||||||
|
.format(status=httpResponse.status, url=url))
|
||||||
|
|
||||||
|
return httpResponse
|
||||||
|
|
||||||
#################################################################################################################################
|
#################################################################################################################################
|
||||||
|
|
||||||
|
@ -300,6 +330,17 @@ def parse_terrasync_coordinate(coordinate):
|
||||||
lat *= -1
|
lat *= -1
|
||||||
return Coordinate(lat, lon)
|
return Coordinate(lat, lon)
|
||||||
|
|
||||||
|
|
||||||
|
class Report:
|
||||||
|
"""Gather and format data about the state of a TerraSync mirror."""
|
||||||
|
|
||||||
|
def addDirIndexWithIncorrectHash(self, localDirIndex):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def addMissingDirIndex(self, localDirIndex):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class TerraSync:
|
class TerraSync:
|
||||||
|
|
||||||
def __init__(self, url, target, quick, removeOrphan, downloadBoundaries):
|
def __init__(self, url, target, quick, removeOrphan, downloadBoundaries):
|
||||||
|
@ -308,6 +349,9 @@ class TerraSync:
|
||||||
self.removeOrphan = removeOrphan
|
self.removeOrphan = removeOrphan
|
||||||
self.httpGetter = None
|
self.httpGetter = None
|
||||||
self.downloadBoundaries = downloadBoundaries
|
self.downloadBoundaries = downloadBoundaries
|
||||||
|
# Status of the local repository (as compared to what the server says),
|
||||||
|
# before any update we might do to it.
|
||||||
|
self.report = Report()
|
||||||
|
|
||||||
def setUrl(self, url):
|
def setUrl(self, url):
|
||||||
self.url = url.rstrip('/').strip()
|
self.url = url.rstrip('/').strip()
|
||||||
|
@ -319,14 +363,32 @@ class TerraSync:
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
self.httpGetter = HTTPGetter(self.url)
|
self.httpGetter = HTTPGetter(self.url)
|
||||||
self.updateDirectory("", "", None )
|
|
||||||
|
# Get the hash of the root .dirindex file
|
||||||
|
try:
|
||||||
|
request = HTTPSocketRequest("/.dirindex")
|
||||||
|
with self.httpGetter.get(request) as fileLike:
|
||||||
|
rootDirIndexHash = computeHash(fileLike)
|
||||||
|
except HTTPException as exc:
|
||||||
|
raise NetworkError("for the root .dirindex file: {errMsg}"
|
||||||
|
.format(errMsg=exc)) from exc
|
||||||
|
|
||||||
|
# Process the root (TerraSync) directory
|
||||||
|
self.updateDirectory("", "", rootDirIndexHash)
|
||||||
|
|
||||||
def updateFile(self, serverPath, localPath, fileHash ):
|
def updateFile(self, serverPath, localPath, fileHash ):
|
||||||
localFullPath = join(self.target, localPath)
|
localFullPath = join(self.target, localPath)
|
||||||
if fileHash != None and hash_of_file(localFullPath) == fileHash:
|
|
||||||
|
if (os.path.isfile(localFullPath) and
|
||||||
|
hashForFile(localFullPath) == fileHash):
|
||||||
#print("hash of file matches, not downloading")
|
#print("hash of file matches, not downloading")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if os.path.isdir(localFullPath):
|
||||||
|
# 'localFullPath' is a directory (locally), but on the server it is
|
||||||
|
# a file -> remove the dir so that we can store the file.
|
||||||
|
removeDirectoryTree(self.target, localFullPath)
|
||||||
|
|
||||||
print("Downloading '{}'".format(serverPath))
|
print("Downloading '{}'".format(serverPath))
|
||||||
|
|
||||||
request = HTTPDownloadRequest(self, serverPath, localFullPath )
|
request = HTTPDownloadRequest(self, serverPath, localFullPath )
|
||||||
|
@ -343,15 +405,26 @@ class TerraSync:
|
||||||
return
|
return
|
||||||
|
|
||||||
localFullPath = join(self.target, localPath)
|
localFullPath = join(self.target, localPath)
|
||||||
if not os.path.exists( localFullPath ):
|
|
||||||
os.makedirs( localFullPath )
|
|
||||||
|
|
||||||
localDirIndex = join(localFullPath, ".dirindex")
|
localDirIndex = join(localFullPath, ".dirindex")
|
||||||
if dirIndexHash != None and hash_of_file(localDirIndex) == dirIndexHash:
|
localDirIndexPresent = localDirIndexHasCorrectHash = False
|
||||||
# print("hash of dirindex matches, not downloading")
|
|
||||||
|
if os.path.isfile(localDirIndex):
|
||||||
|
localDirIndexPresent = True
|
||||||
|
|
||||||
|
if hashForFile(localDirIndex) == dirIndexHash:
|
||||||
|
localDirIndexHasCorrectHash = True
|
||||||
|
else:
|
||||||
|
self.report.addDirIndexWithIncorrectHash(localDirIndex)
|
||||||
|
else:
|
||||||
|
self.report.addMissingDirIndex(localDirIndex)
|
||||||
|
|
||||||
|
if localDirIndexPresent and localDirIndexHasCorrectHash:
|
||||||
if not self.quick:
|
if not self.quick:
|
||||||
self.handleDirindexFile( localDirIndex )
|
self.handleDirindexFile( localDirIndex )
|
||||||
else:
|
else:
|
||||||
|
if not os.path.exists(localFullPath):
|
||||||
|
os.makedirs(localFullPath)
|
||||||
|
|
||||||
request = HTTPDownloadRequest(self,
|
request = HTTPDownloadRequest(self,
|
||||||
serverPath + "/.dirindex",
|
serverPath + "/.dirindex",
|
||||||
localDirIndex,
|
localDirIndex,
|
||||||
|
|
Loading…
Add table
Reference in a new issue