2016-05-04 23:08:22 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
2018-01-26 23:53:57 +01:00
# terrasync.py --- Synchronize TerraScenery data to your local disk
2016-05-04 23:08:22 +02:00
# Copyright (C) 2016 Torsten Dreyer
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
2018-01-26 23:32:29 +01:00
import argparse
import enum
import hashlib
import os
import pathlib
import re
import shutil
import sys
import time
import urllib
2016-05-18 12:51:29 +02:00
from urllib.parse import urlparse
2016-12-27 20:46:59 -06:00
from http.client import HTTPConnection, _CS_IDLE, HTTPException
2016-05-11 22:59:59 +02:00
from os import listdir
2017-09-01 10:33:35 +02:00
from os.path import isfile, isdir, join
2018-01-26 23:32:29 +01:00
PROGNAME = os.path.basename(sys.argv[0])
class ExitStatus(enum.Enum):
# The program exit status is 1 when an exception isn't caught.
2016-05-04 23:08:22 +02:00
2018-01-26 13:27:51 +01:00
2018-01-26 19:00:20 +01:00
# *****************************************************************************
# * Custom exceptions *
# *****************************************************************************
2018-01-26 13:27:51 +01:00
# Generic exception class for terrasync.py, to be subclassed for each specific
# kind exception.
class TerraSyncPyException(Exception):
def __init__(self, message=None, *, mayCapitalizeMsg=True):
"""Initialize a TerraSyncPyException instance.
Except in cases where 'message' starts with a proper noun or
something like that, its first character should be given in
lower case. Automated treatments of this exception may print the
message with its first character changed to upper case, unless
'mayCapitalizeMsg' is False. In other words, if the case of the
first character of 'message' must not be changed under any
circumstances, set 'mayCapitalizeMsg' to False.
self.message = message
self.mayCapitalizeMsg = mayCapitalizeMsg
def __str__(self):
return self.completeMessage()
def __repr__(self):
return "{}.{}({!r})".format(__name__, type(self).__name__, self.message)
# Typically overridden by subclasses with a custom constructor
def detail(self):
return self.message
def completeMessage(self):
if self.message:
return "{shortDesc}: {detail}".format(
return self.ExceptionShortDescription
ExceptionShortDescription = "terrasync.py generic exception"
2018-01-26 19:00:20 +01:00
class UserError(TerraSyncPyException):
"""Exception raised when the program is used in an incorrect way."""
ExceptionShortDescription = "User error"
2018-01-26 13:27:51 +01:00
class NetworkError(TerraSyncPyException):
"""Exception raised when getting a network error even after retrying."""
ExceptionShortDescription = "Network error"
2018-01-26 19:00:20 +01:00
# *****************************************************************************
# * Utility functions *
# *****************************************************************************
# If a path matches this regexp, we really don't want to delete it recursively
# (“cre” stands for “compiled regexp”).
_removeDirectoryTree_dangerous_cre = re.compile(
r"""^(/ (home (/ [^/]*)? )? /* | # for Unix-like systems
[a-zA-Z]: [\/]* # for Windows
)$""", re.VERBOSE)
def removeDirectoryTree(base, whatToRemove):
"""Recursively remove directory 'whatToRemove', with safety checks.
This function ensures that 'whatToRemove' does not resolve to a
directory such as /, /home, /home/foobar, C:\, d:\, etc. It is also
an error if 'whatToRemove' does not literally start with the value
of 'base' (IOW, this function refuses to erase anything that is not
under 'base').
'whatToRemove' is *not* interpreted relatively to 'base' (this would
be doable, just a different API).
assert os.path.isdir(base), "Not a directory: {!r}".format(base)
assert (base and
whatToRemove.startswith(base) and
whatToRemove[len(base):].startswith(os.sep)), \
"Unexpected base path for removeDirectoryTree(): {!r}".format(base)
absPath = os.path.abspath(whatToRemove)
if _removeDirectoryTree_dangerous_cre.match(absPath):
raise UserError("in order to protect your data, refusing to "
"recursively delete '{}'".format(absPath))
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
def computeHash(fileLike):
hash = hashlib.sha1()
for chunk in iter(lambda: fileLike.read(4096), b""):
return hash.hexdigest()
def hashForFile(fname):
with open(fname, "rb") as f:
return computeHash(f)
2018-01-26 23:32:29 +01:00
def normalizeVirtualPath(path):
"""Normalized string representation of a virtual path.
Virtual paths are paths inside the TerraSync repository (be it local
or remote) using '/' as their separator. The virtual path '/' always
corresponds to the repository root, regardless of where it is stored
(hard drive, etc.).
If the input path (string) doesn't start with a slash ('/'), it is
considered relative to the root of the TerraSync repository.
Return a string that always starts with a slash, never contains
consecutive slashes and only ends with a slash if it is the root
virtual path ('/').
if not path.startswith('/'):
# / is the “virtual root” of the TerraSync repository
path = '/' + path
elif path.startswith('//') and not path.startswith('///'):
# Nasty special case. As allowed (but not mandated!) by POSIX[1],
# in pathlib.PurePosixPath('//some/path'), no collapsing happens[2].
# This is only the case for exactly *two* *leading* slashes.
# [1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11
# [2] https://www.python.org/dev/peps/pep-0428/#construction
path = path[1:]
return pathlib.PurePosixPath(path).as_posix()
2018-01-26 19:00:20 +01:00
# *****************************************************************************
# * Network-related classes *
# *****************************************************************************
2016-05-18 12:51:29 +02:00
class HTTPGetCallback:
def __init__(self, src, callback):
self.callback = callback
self.src = src
class HTTPGetter:
def __init__(self, baseUrl, maxPending=10):
self.baseUrl = baseUrl
self.parsedBaseUrl = urlparse(baseUrl)
self.maxPending = maxPending
self.requests = []
self.pendingRequests = []
2016-06-06 12:17:12 +02:00
self.httpConnection = HTTPConnection(self.parsedBaseUrl.netloc)
2016-05-18 15:14:59 +02:00
self.httpRequestHeaders = headers = {'Host':self.parsedBaseUrl.netloc,'Content-Length':0,'Connection':'Keep-Alive','User-Agent':'FlightGear terrasync.py'}
2016-05-18 12:51:29 +02:00
2018-01-26 14:49:02 +01:00
def assembleUrl(self, httpGetCallback):
return self.parsedBaseUrl.path + httpGetCallback.src
2016-05-25 16:19:36 +02:00
def doGet(self, httpGetCallback):
2016-05-18 12:51:29 +02:00
conn = self.httpConnection
2018-01-26 14:49:02 +01:00
url = self.assembleUrl(httpGetCallback)
2018-01-26 14:06:20 +01:00
self.httpConnection.request("GET", url, None, self.httpRequestHeaders)
httpResponse = self.httpConnection.getresponse()
2016-05-25 16:19:36 +02:00
2018-01-26 14:06:20 +01:00
# 'httpResponse' is an http.client.HTTPResponse instance
return httpGetCallback.callback(url, httpResponse)
2016-05-25 16:19:36 +02:00
2018-01-26 13:23:42 +01:00
def get(self, httpGetCallback):
2018-01-26 14:49:02 +01:00
nbRetries = nbRetriesLeft = 5
while True:
return self.doGet(httpGetCallback)
except HTTPException as exc:
if nbRetriesLeft == 0:
raise NetworkError(
"after {nbRetries} retries for URL {url}: {errMsg}"
errMsg=exc)) from exc
# Try to reconnect
2016-05-25 16:19:36 +02:00
2018-01-26 14:49:02 +01:00
2016-05-25 16:19:36 +02:00
2018-01-26 14:49:02 +01:00
nbRetriesLeft -= 1
2016-05-18 12:51:29 +02:00
class DirIndex:
def __init__(self, dirIndexFile):
self.d = []
self.f = []
self.version = 0
self.path = ""
with open(dirIndexFile) as f:
def readFrom(self, readable):
for line in readable:
line = line.strip()
if line.startswith('#'):
tokens = line.split(':')
if len(tokens) == 0:
if tokens[0] == "version":
self.version = int(tokens[1])
elif tokens[0] == "path":
self.path = tokens[1]
elif tokens[0] == "d":
self.d.append({ 'name': tokens[1], 'hash': tokens[2] })
elif tokens[0] == "f":
self.f.append({ 'name': tokens[1], 'hash': tokens[2], 'size': tokens[3] })
def getVersion(self):
return self.version
def getPath(self):
return self.path
def getDirectories(self):
return self.d
def getFiles(self):
return self.f
class HTTPDownloadRequest(HTTPGetCallback):
def __init__(self, terrasync, src, dst, callback = None ):
super().__init__(src, self.callback)
self.terrasync = terrasync
self.dst = dst
self.mycallback = callback
2018-01-26 14:06:20 +01:00
# 'httpResponse' is an http.client.HTTPResponse instance
def callback(self, url, httpResponse):
2018-01-26 13:27:51 +01:00
# I suspect this doesn't handle HTTP redirects and things like that. As
# mentioned at <https://docs.python.org/3/library/http.client.html>,
# http.client is a low-level interface that should normally not be used
# directly!
2018-01-26 14:06:20 +01:00
if httpResponse.status != 200:
2018-01-26 13:27:51 +01:00
raise NetworkError("HTTP callback got status {status} for URL {url}"
.format(status=httpResponse.status, url=url))
2016-12-27 20:46:59 -06:00
2018-01-26 14:49:02 +01:00
with open(self.dst, 'wb') as f:
except HTTPException as exc:
raise NetworkError("for URL {url}: {error}"
.format(url=url, error=exc)) from exc
2016-05-18 12:51:29 +02:00
if self.mycallback != None:
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
class HTTPSocketRequest(HTTPGetCallback):
"""HTTPGetCallback class whose callback returns a file-like object.
2018-01-26 13:27:51 +01:00
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
The file-like object returned by the callback, and thus by
HTTPGetter.get(), is a socket or similar. This allows one to read
the data obtained from the network without necessarily storing it
to a file.
2016-05-04 23:08:22 +02:00
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
def __init__(self, src):
"""Initialize an HTTPSocketRequest object.
src -- path to the resource on the server (no protocol, no
server name, just the path starting with a '/').
HTTPGetCallback.__init__(self, src, self.callback)
def callback(self, url, httpResponse):
# Same comment as for HTTPDownloadRequest.callback()
if httpResponse.status != 200:
raise NetworkError("HTTP callback got status {status} for URL {url}"
.format(status=httpResponse.status, url=url))
return httpResponse
2016-05-04 23:08:22 +02:00
2016-05-18 12:51:29 +02:00
2016-12-27 21:17:46 -06:00
class Coordinate:
def __init__(self, lat, lon):
self.lat = lat
self.lon = lon
class DownloadBoundaries:
def __init__(self, top, left, bottom, right):
if top < bottom:
raise ValueError("top cannot be less than bottom")
if right < left:
2016-12-28 08:32:55 -06:00
raise ValueError("right cannot be less than left")
2016-12-27 21:17:46 -06:00
if top > 90 or bottom < -90:
raise ValueError("top and bottom must be a valid latitude")
if left < -180 or right > 180:
raise ValueError("left and right must be a valid longitude")
self.top = top
self.left = left
self.bottom = bottom
self.right = right
def is_coordinate_inside_boundaries(self, coordinate):
if coordinate.lat < self.bottom or coordinate.lat > self.top:
return False
if coordinate.lon < self.left or coordinate.lon > self.right:
return False
return True
def parse_terrasync_coordinate(coordinate):
matches = re.match("(w|e)(\d{3})(n|s)(\d{2})", coordinate)
if not matches:
return None
lon = int(matches.group(2))
if matches.group(1) == "w":
lon *= -1
lat = int(matches.group(4))
if matches.group(3) == "s":
lat *= -1
return Coordinate(lat, lon)
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
class Report:
"""Gather and format data about the state of a TerraSync mirror."""
2018-01-26 23:32:29 +01:00
def __init__(self, targetDir):
self.targetDir = targetDir
self.dirsWithMissingIndex = set()
self.dirsWithMismatchingDirIndexHash = set()
self.missingFiles = set()
self.filesWithMismatchingHash = set()
self.dirsSkippedDueToBoundaries = set()
self.orphanFiles = set()
self.orphanDirs = set()
def addMissingDirIndex(self, directoryRelPath):
def addDirIndexWithMismatchingHash(self, directoryRelPath):
def addMissingFile(self, relPath):
def addFileWithMismatchingHash(self, relPath):
def addSkippedDueToBoundaries(self, relPath):
def addOrphanFile(self, relPath):
def addOrphanDir(self, relPath):
def summaryString(self):
reportElements = [
("Directories with missing index", self.dirsWithMissingIndex),
("Directories whose .dirindex file had a mismatching hash",
("Missing files", self.missingFiles),
("Files with a mismatching hash", self.filesWithMismatchingHash),
("Directories skipped because of the specified boundaries",
("Orphan files", self.orphanFiles),
("Orphan directories", self.orphanDirs)]
l = []
for heading, setOfFilesOrDirs in reportElements:
if setOfFilesOrDirs:
l.append(heading + ":\n")
l.extend( ( " /" + f + '\n' for f in sorted(setOfFilesOrDirs)) )
l.append(heading + ": none")
return '\n'.join(l)
def printReport(self):
title = "{prg} report".format(prg=PROGNAME)
print("\n" + title + '\n' + len(title)*"=", end="\n\n")
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
2018-01-26 23:32:29 +01:00
class FailedCheckReason(enum.Enum):
"""Reasons that can cause 'check' mode to report a mismatch.
Note that network errors and things like that do *not* belong here.
missingDirIndexFile, mismatchingHashForDirIndexFile, \
missingNormalFile, mismatchingHashForNormalFile, \
orphanFile, orphanDirectory = range(6)
# 'path': virtual path to a file or directory
def explain(self, path):
if self is FailedCheckReason.missingDirIndexFile:
res = ".dirindex file '{}' is missing locally".format(path)
elif self is FailedCheckReason.mismatchingHashForDirIndexFile:
res = ".dirindex file '{}' doesn't have the hash it " \
"should have according to the server".format(path)
elif self is FailedCheckReason.missingNormalFile:
res = "file '{}' is present on the server but missing locally" \
elif self is FailedCheckReason.mismatchingHashForNormalFile:
res = "file '{}' doesn't have the hash given in the " \
".dirindex file of its containing directory".format(path)
elif self is FailedCheckReason.orphanFile:
res = "file '{}' was found locally but is not present on the " \
elif self is FailedCheckReason.orphanDirectory:
res = "directory '{}' was found locally but is not present " \
"on the server".format(path)
assert False, "Unhandled enum value: {!r}".format(self)
return res
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
2016-05-18 12:51:29 +02:00
class TerraSync:
2016-05-11 14:57:38 +02:00
2018-01-26 23:32:29 +01:00
class Mode(enum.Enum):
"""Main modes of operation for the TerraSync class."""
# Using lower case for the member names, because this way
# enumMember.name is exactly the mode string passed to --mode on the
# command line (can be useful for messages destined to users).
check, sync = range(2)
def __init__(self, mode, doReport, url, target, quick, removeOrphan,
self.mode = self.Mode[mode]
self.doReport = doReport
2016-05-18 12:51:29 +02:00
self.quick = quick
self.removeOrphan = removeOrphan
self.httpGetter = None
2016-12-27 21:17:46 -06:00
self.downloadBoundaries = downloadBoundaries
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
# Status of the local repository (as compared to what the server says),
# before any update we might do to it.
2018-01-26 23:32:29 +01:00
self.report = Report(self.target)
def inSyncMode(self):
return self.mode == self.Mode.sync
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
def setUrl(self, url):
self.url = url.rstrip('/').strip()
return self
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
def setTarget(self, target):
self.target = target.rstrip('/').strip()
return self
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
def start(self):
self.httpGetter = HTTPGetter(self.url)
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
# Get the hash of the root .dirindex file
request = HTTPSocketRequest("/.dirindex")
with self.httpGetter.get(request) as fileLike:
rootDirIndexHash = computeHash(fileLike)
except HTTPException as exc:
raise NetworkError("for the root .dirindex file: {errMsg}"
.format(errMsg=exc)) from exc
2018-01-26 23:32:29 +01:00
# Process the root directory of the repository (recursive)
self.processDirectoryEntry("", "", rootDirIndexHash)
return self.report
2016-05-11 14:57:38 +02:00
2018-01-26 23:32:29 +01:00
def processFileEntry(self, serverPath, localPath, fileHash):
"""Process a file entry from a .dirindex file."""
2016-05-18 12:51:29 +02:00
localFullPath = join(self.target, localPath)
2018-01-26 23:32:29 +01:00
failedCheckReason = None
if not os.path.isfile(localFullPath):
failedCheckReason = FailedCheckReason.missingNormalFile
elif hashForFile(localFullPath) != fileHash:
failedCheckReason = FailedCheckReason.mismatchingHashForNormalFile
# The file exists and has the hash mentioned in the .dirindex file
2016-05-18 12:51:29 +02:00
2018-01-26 23:32:29 +01:00
assert failedCheckReason is not None
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
2018-01-26 23:32:29 +01:00
if self.inSyncMode():
if os.path.isdir(localFullPath):
# 'localFullPath' is a directory (locally), but on the server
# it is a file -> remove the dir so that we can store the file.
removeDirectoryTree(self.target, localFullPath)
2016-05-11 22:59:59 +02:00
2018-01-26 23:32:29 +01:00
print("Downloading '{}'".format(serverPath))
request = HTTPDownloadRequest(self, serverPath, localFullPath )
virtualPath = normalizeVirtualPath(serverPath)
self.abortCheckMode(failedCheckReason, virtualPath)
2016-05-11 22:59:59 +02:00
2018-01-26 23:32:29 +01:00
def processDirectoryEntry(self, serverPath, localPath, dirIndexHash):
"""Process a directory entry from a .dirindex file."""
virtualPath = normalizeVirtualPath(serverPath)
print("Processing '{}'...".format(virtualPath))
2016-05-11 14:57:38 +02:00
2018-01-26 07:38:51 +01:00
if serverPath:
serverFolderName = os.path.basename(serverPath)
2016-12-27 21:17:46 -06:00
coordinate = parse_terrasync_coordinate(serverFolderName)
2018-01-26 23:32:29 +01:00
if (coordinate and
not self.downloadBoundaries.is_coordinate_inside_boundaries(
2016-12-27 21:17:46 -06:00
2016-05-18 12:51:29 +02:00
localFullPath = join(self.target, localPath)
localDirIndex = join(localFullPath, ".dirindex")
2018-01-26 23:32:29 +01:00
failedCheckReason = None
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
2018-01-26 23:32:29 +01:00
if not os.path.isfile(localDirIndex):
failedCheckReason = FailedCheckReason.missingDirIndexFile
elif hashForFile(localDirIndex) != dirIndexHash:
failedCheckReason = FailedCheckReason.mismatchingHashForDirIndexFile
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
2018-01-26 23:32:29 +01:00
if failedCheckReason is None:
2016-05-18 12:51:29 +02:00
if not self.quick:
2018-01-26 23:32:29 +01:00
elif self.inSyncMode():
terrasync.py: prepare the terrain for --mode and --report
- Add computeHash() utility function that can work with any file-like
object (e.g., a connected socket).
- Rename hash_of_file() to hashForFile(), and of course implement it
using our new computeHash().
- Add class HTTPSocketRequest derived from HTTPGetCallback. It allows
one to process data from the network without storing it to a file (it
uses the file-like interface provided by http.client.HTTPResponse).
The callback returns the http.client.HTTPResponse object, which can be
conveniently used in a 'with' statement.
- Simplify the API of TerraSync.updateDirectory(): its 'dirIndexHash'
argument must now be a hash (a string); the None object is not allowed
anymore (with the soon-to-come addition of --mode=check, having to
deal with this special case in updateDirectory() would make the logic
too difficult to follow, or we would have to really completely
separate check-only mode from update mode, which would entail code
Since TerraSync.updateDirectory() must now always have a hash to work
with, compute the hash of the root '.dirindex' file from the server in
TerraSync.start(), using our new HTTPSocketRequest class---which was
written for this purpose, since that will have to work in check-only
mode (but not only), where we don't want to write any file to disk.
- TerraSync.updateFile(): correctly handle the case where a directory
inside the TerraSync repository is (now) a file according to the
server: the directory must be recursively removed before the file can
be downloaded in the place formerly occupied by the directory.
- Add stub class Report. Its methods do nothing for now, but are already
called in a couple of appropriate places. The class will be completed
in a future commit, of course.
2018-01-26 19:07:30 +01:00
if not os.path.exists(localFullPath):
2018-01-26 07:38:51 +01:00
request = HTTPDownloadRequest(self,
serverPath + "/.dirindex",
2016-05-18 12:51:29 +02:00
2018-01-26 23:32:29 +01:00
vPath = normalizeVirtualPath(virtualPath + "/.dirindex")
self.abortCheckMode(failedCheckReason, vPath)
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
def handleDirindexRequest(self, dirindexRequest):
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
def handleDirindexFile(self, dirindexFile):
dirIndex = DirIndex(dirindexFile)
2018-01-26 23:32:29 +01:00
root = "/" + dirIndex.getPath() if dirIndex.getPath() else ""
2016-05-18 12:51:29 +02:00
serverFiles = []
2017-09-01 10:33:35 +02:00
serverDirs = []
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
for file in dirIndex.getFiles():
f = file['name']
2018-01-26 23:32:29 +01:00
self.processFileEntry(root + "/" + f,
join(dirIndex.getPath(), f),
2016-05-18 12:51:29 +02:00
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
for subdir in dirIndex.getDirectories():
d = subdir['name']
2018-01-26 23:32:29 +01:00
self.processDirectoryEntry(root + "/" + d,
join(dirIndex.getPath(), d),
2017-09-01 10:33:35 +02:00
2016-05-11 14:57:38 +02:00
2018-01-26 23:32:29 +01:00
localFullPath = join(self.target, dirIndex.getPath())
localFiles = [ f for f in listdir(localFullPath)
if isfile(join(localFullPath, f)) ]
for f in localFiles:
if f != ".dirindex" and f not in serverFiles:
relPath = dirIndex.getPath() + '/' + f # has no leading '/'
if self.inSyncMode():
if self.removeOrphan:
os.remove(join(self.target, relPath))
localDirs = [ f for f in listdir(localFullPath)
if isdir(join(localFullPath, f)) ]
for d in localDirs:
if d not in serverDirs:
relPath = dirIndex.getPath() + '/' + d # has no leading '/'
if self.inSyncMode():
if self.removeOrphan:
join(self.target, relPath))
# 'reason' is a member of the FailedCheckReason enum
def abortCheckMode(self, reason, fileOrDirVirtualPath):
assert self.mode == self.Mode.check, self.mode
print("{prg}: exiting from 'check' mode because {explanation}."
if self.doReport:
2016-05-11 14:57:38 +02:00
2016-05-18 12:51:29 +02:00
2016-12-28 08:32:55 -06:00
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--url", dest="url", metavar="URL",
default="http://flightgear.sourceforge.net/scenery", help="Server URL [default: %(default)s]")
parser.add_argument("-t", "--target", dest="target", metavar="DIR",
2016-12-27 22:41:59 -06:00
default=".", help="Directory to store the files [default: current directory]")
2016-12-28 08:32:55 -06:00
parser.add_argument("-q", "--quick", dest="quick", action="store_true",
2016-12-27 22:41:59 -06:00
default=False, help="Quick")
2016-12-28 08:32:55 -06:00
parser.add_argument("-r", "--remove-orphan", dest="removeOrphan", action="store_true",
2016-12-27 22:41:59 -06:00
default=False, help="Remove old scenery files")
2018-01-26 23:32:29 +01:00
parser.add_argument("--mode", default="sync", choices=("check", "sync"),
main mode of operation (default: '%(default)s'). In 'sync' mode, contents is
downloaded from the server to the target directory. On the other hand, in
'check' mode, {progname} compares the contents of the target directory with
the remote repository without writing nor deleting anything on disk."""
parser.add_argument("--report", dest="report", action="store_true",
help="before normal exit, print a report of what was found")
2016-12-28 08:32:55 -06:00
parser.add_argument("--top", dest="top", type=int,
default=90, help="Maximum latitude to include in download [default: %(default)d]")
parser.add_argument("--bottom", dest="bottom", type=int,
default=-90, help="Minimum latitude to include in download [default: %(default)d]")
parser.add_argument("--left", dest="left", type=int,
default=-180, help="Minimum longitude to include in download [default: %(default)d]")
parser.add_argument("--right", dest="right", type=int,
default=180, help="Maximum longitude to include in download [default: %(default)d]")
2016-12-27 22:41:59 -06:00
2016-12-28 08:32:55 -06:00
args = parser.parse_args()
2016-12-27 22:41:59 -06:00
2018-01-26 23:32:29 +01:00
# Consistency checks on the arguments
if args.mode == "check" and args.removeOrphan:
print("{prg}: 'check' mode is read-only and thus doesn't make sense with\n"
"option --remove-orphan (-r)".format(prg=PROGNAME), file=sys.stderr)
# Now the real work :)
terraSync = TerraSync(args.mode, args.report, args.url, args.target, args.quick,
DownloadBoundaries(args.top, args.left, args.bottom,
report = terraSync.start()
if args.report:
2016-05-04 23:08:22 +02:00
2018-01-26 23:32:29 +01:00