Terrasync container

Signed-off-by: fly <merspieler@airmail.cc>
This commit is contained in:
fly 2021-06-22 10:27:27 +02:00
parent aa7227c137
commit 5d259b9e00
27 changed files with 2008 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
*.swp
*.swo
__pycache__
*.pyc

24
terrasync/Dockerfile Normal file
View file

@ -0,0 +1,24 @@
FROM docker.io/library/debian:buster
RUN apt-get update && apt-get -y install cron python3 python3-pip
RUN pip3 install requests
COPY ts-cron /etc/cron.d/ts-cron
RUN crontab /etc/cron.d/ts-cron
RUN mkdir /TerraSync
COPY TerraSync /TerraSync
COPY run.sh .
RUN mkdir /scenery /log /config
VOLUME /scenery
VOLUME /log
VOLUME /config
ENV URL http://ukmirror.flightgear.org/fgscenery
CMD cron -f

View file

@ -0,0 +1,26 @@
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# terrasync.py --- Synchronize TerraScenery data to your local disk
# Copyright (C) 2018 Florent Rougon
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# terrasync.py development was started by Torsten Dreyer in 2016. This file is
# just the normal entry point for users.
import terrasync.main
terrasync.main.main()

View file

View file

@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-
# dirindex.py --- Class used to parse .dirindex files
#
# Copyright (C) 2016 Torsten Dreyer
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Parser for .dirindex files."""
from .exceptions import InvalidDirIndexFile
from .virtual_path import VirtualPath
class DirIndex:
"""Parser for .dirindex files."""
def __init__(self, dirIndexFile):
self.directories = []
self.files = []
self.tarballs = []
self.version = 0
self.path = None # will be a VirtualPath instance when set
# readFrom() stores the raw contents of the .dirindex file in this
# attribute. This is useful for troubleshooting.
self._rawContents = None
with open(dirIndexFile, "r", encoding="ascii") as f:
self.readFrom(f)
self._sanityCheck()
@classmethod
def checkForBackslashOrLeadingSlash(cls, line, path):
if '\\' in path or path.startswith('/'):
raise InvalidDirIndexFile(
r"invalid '\' or leading '/' in path field from line {!r}"
.format(line))
@classmethod
def checkForSlashBackslashOrDoubleColon(cls, line, name):
if '/' in name or '\\' in name:
raise InvalidDirIndexFile(
r"invalid '\' or '/' in name field from line {!r}"
.format(line))
if name == "..":
raise InvalidDirIndexFile(
r"invalid name field equal to '..' in line {!r}".format(line))
def readFrom(self, readable):
self._rawContents = readable.read()
for line in self._rawContents.split('\n'):
line = line.strip()
if line.startswith('#'):
continue
tokens = line.split(':')
if len(tokens) == 0:
continue
elif tokens[0] == "version":
self.version = int(tokens[1])
elif tokens[0] == "path":
self.checkForBackslashOrLeadingSlash(line, tokens[1])
# This is relative to the repository root
self.path = VirtualPath(tokens[1])
if ".." in self.path.parts:
raise InvalidDirIndexFile(
"'..' component found in 'path' entry {!r}"
.format(self.path))
elif tokens[0] == "d":
self.checkForSlashBackslashOrDoubleColon(line, tokens[1])
self.directories.append({'name': tokens[1], 'hash': tokens[2]})
elif tokens[0] == "f":
self.checkForSlashBackslashOrDoubleColon(line, tokens[1])
self.files.append({'name': tokens[1],
'hash': tokens[2], 'size': int(tokens[3])})
elif tokens[0] == "t":
self.checkForSlashBackslashOrDoubleColon(line, tokens[1])
self.tarballs.append({'name': tokens[1], 'hash': tokens[2],
'size': int(tokens[3])})
def _sanityCheck(self):
if self.path is None:
assert self._rawContents is not None
firstLines = self._rawContents.split('\n')[:5]
raise InvalidDirIndexFile(
"no 'path' field found; the first lines of this .dirindex file "
"follow:\n\n" + '\n'.join(firstLines))

View file

@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
# exceptions.py --- Custom exception classes for terrasync.py
#
# Copyright (C) 2018 Florent Rougon
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Generic exception class for terrasync.py, to be subclassed for each specific
# kind of exception.
class TerraSyncPyException(Exception):
def __init__(self, message=None, *, mayCapitalizeMsg=True):
"""Initialize a TerraSyncPyException instance.
Except in cases where 'message' starts with a proper noun or
something like that, its first character should be given in
lower case. Automated treatments of this exception may print the
message with its first character changed to upper case, unless
'mayCapitalizeMsg' is False. In other words, if the case of the
first character of 'message' must not be changed under any
circumstances, set 'mayCapitalizeMsg' to False.
"""
self.message = message
self.mayCapitalizeMsg = mayCapitalizeMsg
def __str__(self):
return self.completeMessage()
def __repr__(self):
return "{}.{}({!r})".format(__name__, type(self).__name__, self.message)
# Typically overridden by subclasses with a custom constructor
def detail(self):
return self.message
def completeMessage(self):
if self.message:
return "{shortDesc}: {detail}".format(
shortDesc=self.ExceptionShortDescription,
detail=self.detail())
else:
return self.ExceptionShortDescription
ExceptionShortDescription = "terrasync.py generic exception"
class UserError(TerraSyncPyException):
"""Exception raised when the program is used in an incorrect way."""
ExceptionShortDescription = "User error"
class NetworkError(TerraSyncPyException):
"""Exception raised when getting a network error even after retrying."""
ExceptionShortDescription = "Network error"
class UnsupportedURLScheme(TerraSyncPyException):
"""Exception raised when asked to handle an unsupported URL scheme."""
ExceptionShortDescription = "Unsupported URL scheme"
class RepoDataError(TerraSyncPyException):
"""
Exception raised when getting invalid data from the TerraSync repository."""
ExceptionShortDescription = "Invalid data from the TerraSync repository"
class InvalidDirIndexFile(RepoDataError):
"""Exception raised when getting invalid data from a .dirindex file."""
ExceptionShortDescription = "Invalid .dirindex file"

View file

@ -0,0 +1,743 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# main.py --- Main module for terrasync.py
#
# Copyright (C) 2016 Torsten Dreyer
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import argparse
import enum
import hashlib
import os
import re
import shutil
import ssl
import sys
import time
import urllib
from urllib.parse import urlparse, urljoin
from http.client import HTTPConnection, HTTPSConnection, HTTPException
from os import listdir
from os.path import isfile, isdir, join
from base64 import b64encode
from . import dirindex
from .exceptions import UserError, NetworkError, RepoDataError, \
InvalidDirIndexFile, UnsupportedURLScheme
from .virtual_path import VirtualPath
PROGNAME = os.path.basename(sys.argv[0])
class ExitStatus(enum.Enum):
SUCCESS = 0
# The program exit status is 1 when an exception isn't caught.
ERROR = 1
CHECK_MODE_FOUND_MISMATCH = 2
# *****************************************************************************
# * Utility functions *
# *****************************************************************************
# If a path matches this regexp, we really don't want to delete it recursively
# (“cre” stands for “compiled regexp”).
_removeDirectoryTree_dangerous_cre = re.compile(
r"""^(/ (home (/ [^/]*)? )? /* | # for Unix-like systems
[a-zA-Z]: [\/]* # for Windows
)$""", re.VERBOSE)
def removeDirectoryTree(base, whatToRemove):
"""Recursively remove directory 'whatToRemove', with safety checks.
This function ensures that 'whatToRemove' does not resolve to a
directory such as /, /home, /home/foobar, C:\, d:\, etc. It is also
an error if 'whatToRemove' does not literally start with the value
of 'base' (IOW, this function refuses to erase anything that is not
under 'base').
'whatToRemove' is *not* interpreted relatively to 'base' (this would
be doable, just a different API).
"""
assert os.path.isdir(base), "Not a directory: {!r}".format(base)
assert (base and
whatToRemove.startswith(base) and
whatToRemove[len(base):].startswith(os.sep)), \
"Unexpected base path for removeDirectoryTree(): {!r}".format(base)
absPath = os.path.abspath(whatToRemove)
if not os.path.isfile(join(absPath, ".dirindex")):
raise UserError("refusing to recursively delete '{}' because "
"it does not contain a .dirindex file".format(absPath))
elif _removeDirectoryTree_dangerous_cre.match(absPath):
raise UserError("in order to protect your data, refusing to "
"recursively delete '{}'".format(absPath))
else:
shutil.rmtree(absPath)
def computeHash(fileLike):
hash = hashlib.sha1()
for chunk in iter(lambda: fileLike.read(4096), b""):
hash.update(chunk)
return hash.hexdigest()
def hashForFile(fname):
with open(fname, "rb") as f:
return computeHash(f)
# *****************************************************************************
# * Network-related classes *
# *****************************************************************************
class HTTPGetCallback:
def __init__(self, src, callback):
"""Initialize an HTTPGetCallback instance.
src -- a VirtualPath instance (corresponding to the path on
the server for which a GET request is to be issued)
callback -- a function taking two parameters: the URL (string)
and an http.client.HTTPResponse instance. When
invoked, the callback return value will be returned
by HTTPGetter.get().
"""
if callback is not None:
self.callback = callback
self.src = src
class HTTPGetter:
def __init__(self, baseUrl, maxPending=10, auth=""):
self.baseUrl = baseUrl
self.parsedBaseUrl = urlparse(baseUrl)
self.maxPending = maxPending
self.requests = []
self.pendingRequests = []
if self.parsedBaseUrl.scheme == "http":
self.httpConnection = HTTPConnection(self.parsedBaseUrl.netloc)
elif self.parsedBaseUrl.scheme == "https":
context = ssl.create_default_context()
self.httpConnection = HTTPSConnection(self.parsedBaseUrl.netloc,
context=context)
else:
raise UnsupportedURLScheme(self.parsedBaseUrl.scheme)
self.httpRequestHeaders = headers = {'Host':self.parsedBaseUrl.netloc,'Content-Length':0,'Connection':'Keep-Alive','User-Agent':'FlightGear terrasync.py'}
if( auth and not auth.isspace() ):
self.httpRequestHeaders['Authorization'] = 'Basic %s' % b64encode(auth.encode("utf-8")).decode("ascii")
def assemblePath(self, httpGetCallback):
"""Return the path-on-server for the file to download.
Example: '/scenery/Airports/N/E/4/.dirindex'
"""
assert not self.parsedBaseUrl.path.endswith('/'), \
repr(self.parsedBaseUrl)
return self.parsedBaseUrl.path + str(httpGetCallback.src)
def assembleUrl(self, httpGetCallback):
"""Return the URL of the file to download."""
baseUrl = self.parsedBaseUrl.geturl()
assert not baseUrl.endswith('/'), repr(baseUrl)
return urljoin(baseUrl + '/', httpGetCallback.src.asRelative())
def doGet(self, httpGetCallback):
time.sleep(1.25) # throttle the rate
pathOnServer = self.assemblePath(httpGetCallback)
self.httpConnection.request("GET", pathOnServer, None,
self.httpRequestHeaders)
httpResponse = self.httpConnection.getresponse()
# 'httpResponse' is an http.client.HTTPResponse instance
return httpGetCallback.callback(self.assembleUrl(httpGetCallback),
httpResponse)
def get(self, httpGetCallback):
nbRetries = nbRetriesLeft = 5
while True:
try:
return self.doGet(httpGetCallback)
except HTTPException as exc:
if nbRetriesLeft == 0:
raise NetworkError(
"after {nbRetries} retries for URL {url}: {errMsg}"
.format(nbRetries=nbRetries,
url=self.assembleUrl(httpGetCallback),
errMsg=exc)) from exc
# Try to reconnect
self.httpConnection.close()
time.sleep(1)
self.httpConnection.connect()
nbRetriesLeft -= 1
class HTTPDownloadRequest(HTTPGetCallback):
def __init__(self, src, dst, callback=None):
"""Initialize an HTTPDownloadRequest instance.
src -- a VirtualPath instance (corresponding to the path
on the server for which a GET request is to be
issued)
dst -- file path (or whatever open() accepts) where the
downloaded data is to be stored
callback -- a function that will be called if the download is
successful, or None if no such callback is desired.
The function must take one parameter: when invoked,
it will be passed this HTTPDownloadRequest
instance. Its return value is ignored.
"""
HTTPGetCallback.__init__(self, src, None)
self.dst = dst
self.mycallback = callback
# 'httpResponse' is an http.client.HTTPResponse instance
def callback(self, url, httpResponse):
# I suspect this doesn't handle HTTP redirects and things like that. As
# mentioned at <https://docs.python.org/3/library/http.client.html>,
# http.client is a low-level interface that should normally not be used
# directly!
if httpResponse.status != 200:
raise NetworkError("HTTP callback got status {status} for URL {url}"
.format(status=httpResponse.status, url=url))
try:
with open(self.dst, 'wb') as f:
f.write(httpResponse.read())
except HTTPException as exc:
raise NetworkError("for URL {url}: {error}"
.format(url=url, error=exc)) from exc
if self.mycallback is not None:
self.mycallback(self)
class HTTPSocketRequest(HTTPGetCallback):
"""HTTPGetCallback class whose callback returns a file-like object.
The file-like object returned by the callback, and thus by
HTTPGetter.get(), is a socket or similar. This allows one to read
the data obtained from the network without necessarily storing it
to a file.
"""
def __init__(self, src):
"""Initialize an HTTPSocketRequest object.
src -- VirtualPath instance for the resource on the server
(presumably a file)
"""
HTTPGetCallback.__init__(self, src, None)
def callback(self, url, httpResponse):
# Same comment as for HTTPDownloadRequest.callback()
if httpResponse.status != 200:
raise NetworkError("HTTP callback got status {status} for URL {url}"
.format(status=httpResponse.status, url=url))
return httpResponse
#################################################################################################################################
class Coordinate:
def __init__(self, lat, lon):
self.lat = lat
self.lon = lon
class DownloadBoundaries:
def __init__(self, top, left, bottom, right):
if top < bottom:
raise ValueError("top cannot be less than bottom")
if right < left:
# right may be less than left when wrapping across the antimeridian
if not (left >= 0 and right < 0):
raise ValueError("right cannot be less than left")
if top > 90 or bottom < -90:
raise ValueError("top and bottom must be a valid latitude")
if left < -180 or right >= 180:
raise ValueError("left and right must be a valid longitude")
self.top = top
self.left = left
self.bottom = bottom
self.right = right
def is_coordinate_inside_boundaries(self, coordinate, isOuterBucket):
bigTileBottom = coordinate.lat
bigTileTop = bigTileBottom + (10 if isOuterBucket else 1)
bigTileLeft = coordinate.lon
bigTileRight = bigTileLeft + (10 if isOuterBucket else 1)
# if the two regions do not overlap then we are done
if bigTileTop <= self.bottom or bigTileBottom > self.top:
return False
if bigTileRight <= self.left or bigTileLeft > self.right:
# check for spanning across the antimeridian
if self.left >= 0 and self.right < 0:
# determine which side we are on and check of region overlap
if bigTileLeft >= 0:
if bigTileRight <= self.left:
return False
elif bigTileLeft > self.right:
return False
else:
return False
# at least a partial overlap exists, so more processing will be needed
return True
def parse_terrasync_coordinate(coordinate):
matches = re.match("(w|e)(\d{3})(n|s)(\d{2})", coordinate)
if not matches:
return None
lon = int(matches.group(2))
if matches.group(1) == "w":
lon *= -1
lat = int(matches.group(4))
if matches.group(3) == "s":
lat *= -1
return Coordinate(lat, lon)
class Report:
"""Gather and format data about the state of a TerraSync mirror."""
def __init__(self, targetDir):
self.targetDir = targetDir
self.dirsWithMissingIndex = set()
self.dirsWithMismatchingDirIndexHash = set()
self.missingFiles = set()
self.filesWithMismatchingHash = set()
self.dirsSkippedDueToBoundaries = set()
self.orphanFiles = set()
self.orphanDirs = set()
def addMissingDirIndex(self, directoryVirtualPath):
self.dirsWithMissingIndex.add(directoryVirtualPath)
def addDirIndexWithMismatchingHash(self, directoryVirtualPath):
self.dirsWithMismatchingDirIndexHash.add(directoryVirtualPath)
def addMissingFile(self, virtualPath):
self.missingFiles.add(virtualPath)
def addFileWithMismatchingHash(self, virtualPath):
self.filesWithMismatchingHash.add(virtualPath)
def addSkippedDueToBoundaries(self, virtualPath):
self.dirsSkippedDueToBoundaries.add(virtualPath)
def addOrphanFile(self, virtualPath):
self.orphanFiles.add(virtualPath)
def addOrphanDir(self, virtualPath):
self.orphanDirs.add(virtualPath)
def summaryString(self):
reportElements = [
("Directories with missing index", self.dirsWithMissingIndex),
("Directories whose .dirindex file had a mismatching hash",
self.dirsWithMismatchingDirIndexHash),
("Missing files", self.missingFiles),
("Files with a mismatching hash", self.filesWithMismatchingHash),
("Directories skipped because of the specified boundaries",
self.dirsSkippedDueToBoundaries),
("Orphan files", self.orphanFiles),
("Orphan directories", self.orphanDirs)]
l = []
for heading, setOfFilesOrDirs in reportElements:
if setOfFilesOrDirs:
l.append(heading + ":\n")
l.extend( (" " + str(f) for f in sorted(setOfFilesOrDirs)) )
l.append('') # ensure a blank line follows the list
else:
l.append(heading + ": none")
return '\n'.join(l)
def printReport(self):
title = "{prg} report".format(prg=PROGNAME)
print("\n" + title + '\n' + len(title)*"=", end="\n\n")
print(self.summaryString())
@enum.unique
class FailedCheckReason(enum.Enum):
"""Reasons that can cause 'check' mode to report a mismatch.
Note that network errors and things like that do *not* belong here.
"""
missingDirIndexFile, mismatchingHashForDirIndexFile, \
missingNormalFile, mismatchingHashForNormalFile, \
orphanFile, orphanDirectory = range(6)
# 'path': VirtualPath instance for a file or directory
def explain(self, path):
if self is FailedCheckReason.missingDirIndexFile:
res = ".dirindex file '{}' is missing locally".format(path)
elif self is FailedCheckReason.mismatchingHashForDirIndexFile:
res = ".dirindex file '{}' doesn't have the hash it " \
"should have according to the server".format(path)
elif self is FailedCheckReason.missingNormalFile:
res = "file '{}' is present on the server but missing locally" \
.format(path)
elif self is FailedCheckReason.mismatchingHashForNormalFile:
res = "file '{}' doesn't have the hash given in the " \
".dirindex file of its containing directory".format(path)
elif self is FailedCheckReason.orphanFile:
res = "file '{}' was found locally but is not present on the " \
"server".format(path)
elif self is FailedCheckReason.orphanDirectory:
res = "directory '{}' was found locally but is not present " \
"on the server".format(path)
else:
assert False, "Unhandled enum value: {!r}".format(self)
return res
class TerraSync:
@enum.unique
class Mode(enum.Enum):
"""Main modes of operation for the TerraSync class."""
# Using lower case for the member names, because this way
# enumMember.name is exactly the mode string passed to --mode on the
# command line (can be useful for messages destined to users).
check, sync = range(2)
def __init__(self, mode, doReport, url, target, quick, removeOrphan,
downloadBoundaries, auth):
self.mode = self.Mode[mode]
self.doReport = doReport
self.setUrl(url).setTarget(target)
self.auth = auth
self.quick = quick
self.removeOrphan = removeOrphan
self.httpGetter = None
self.downloadBoundaries = downloadBoundaries
# Status of the local repository (as compared to what the server says),
# before any update we might do to it.
self.report = Report(self.target)
def inSyncMode(self):
return self.mode == self.Mode.sync
def setUrl(self, url):
self.url = url.rstrip('/').strip()
return self
def setTarget(self, target):
# Using os.path.abspath() here is safer in case the process later uses
# os.chdir(), which would change the meaning of the "." directory.
self.target = os.path.abspath(target)
return self
def start(self, virtualSubdir=VirtualPath('/')):
"""Start the 'sync' or 'check' process.
The 'virtualSubdir' argument must be a VirtualPath instance and
allows one to start the 'sync' or 'check' process in a chosen
subdirectory of the TerraSync repository, instead of at its
root.
"""
# Remove the leading '/' from 'virtualSubdir' and convert to native
# separators ('/' or '\' depending on the platform).
localSubdir = os.path.normpath(virtualSubdir.asRelative())
if localSubdir == ".": # just ugly, but it wouldn't hurt
localSubdir = ""
assert not os.path.isabs(localSubdir), repr(localSubdir)
self.httpGetter = HTTPGetter(baseUrl=self.url,auth=self.auth)
# Get the hash of the .dirindex file for 'virtualSubdir'
try:
request = HTTPSocketRequest(virtualSubdir / ".dirindex")
with self.httpGetter.get(request) as fileLike:
dirIndexHash = computeHash(fileLike)
except HTTPException as exc:
raise NetworkError("for the root .dirindex file: {errMsg}"
.format(errMsg=exc)) from exc
# Process the chosen part of the repository (recursive)
self.processDirectoryEntry(virtualSubdir, localSubdir, dirIndexHash)
return self.report
def processFileEntry(self, virtualPath, localPath, fileHash):
"""Process a file entry from a .dirindex file."""
localFullPath = join(self.target, localPath)
failedCheckReason = None
if not os.path.isfile(localFullPath):
self.report.addMissingFile(virtualPath)
failedCheckReason = FailedCheckReason.missingNormalFile
elif hashForFile(localFullPath) != fileHash:
self.report.addFileWithMismatchingHash(virtualPath)
failedCheckReason = FailedCheckReason.mismatchingHashForNormalFile
else:
# The file exists and has the hash mentioned in the .dirindex file
return
assert failedCheckReason is not None
if self.inSyncMode():
if os.path.isdir(localFullPath):
# 'localFullPath' is a directory (locally), but on the server
# it is a file -> remove the dir so that we can store the file.
removeDirectoryTree(self.target, localFullPath)
print("Downloading '{}'".format(virtualPath))
request = HTTPDownloadRequest(virtualPath, localFullPath)
self.httpGetter.get(request)
else:
self.abortCheckMode(failedCheckReason, virtualPath)
def processDirectoryEntry(self, virtualPath, localPath, dirIndexHash):
"""Process a directory entry from a .dirindex file."""
print("Processing '{}'...".format(virtualPath))
isOuterBucket = True if len(virtualPath.parts) <= 3 else False
coord = parse_terrasync_coordinate(virtualPath.name)
if (coord and
not self.downloadBoundaries.is_coordinate_inside_boundaries(coord, isOuterBucket)):
self.report.addSkippedDueToBoundaries(virtualPath)
return
localFullPath = join(self.target, localPath)
localDirIndex = join(localFullPath, ".dirindex")
failedCheckReason = None
if not os.path.isfile(localDirIndex):
failedCheckReason = FailedCheckReason.missingDirIndexFile
self.report.addMissingDirIndex(virtualPath)
elif hashForFile(localDirIndex) != dirIndexHash:
failedCheckReason = FailedCheckReason.mismatchingHashForDirIndexFile
self.report.addDirIndexWithMismatchingHash(virtualPath)
if failedCheckReason is None:
if not self.quick:
self.handleDirindexFile(localDirIndex)
elif self.inSyncMode():
if os.path.isfile(localFullPath):
os.unlink(localFullPath) # file on server became a directory
if not os.path.exists(localFullPath):
os.makedirs(localFullPath)
request = HTTPDownloadRequest(virtualPath / ".dirindex",
localDirIndex,
self.handleDirindexRequest)
self.httpGetter.get(request)
else:
self.abortCheckMode(failedCheckReason, virtualPath / ".dirindex")
def handleDirindexRequest(self, dirindexRequest):
self.handleDirindexFile(dirindexRequest.dst)
def handleDirindexFile(self, dirindexFile):
dirIndex = dirindex.DirIndex(dirindexFile)
virtualBase = dirIndex.path # VirtualPath instance
relativeBase = virtualBase.asRelative() # string, doesn't start with '/'
serverFiles = []
serverDirs = []
for file in dirIndex.files:
f = file['name']
self.processFileEntry(virtualBase / f,
join(relativeBase, f),
file['hash'])
serverFiles.append(f)
for subdir in dirIndex.directories:
d = subdir['name']
self.processDirectoryEntry(virtualBase / d,
join(relativeBase, d),
subdir['hash'])
serverDirs.append(d)
for tarball in dirIndex.tarballs:
# Tarballs are handled the same as normal files.
f = tarball['name']
self.processFileEntry(virtualBase / f,
join(relativeBase, f),
tarball['hash'])
serverFiles.append(f)
localFullPath = join(self.target, relativeBase)
localFiles = [ f for f in listdir(localFullPath)
if isfile(join(localFullPath, f)) ]
for f in localFiles:
if f != ".dirindex" and f not in serverFiles:
virtualPath = virtualBase / f
self.report.addOrphanFile(virtualPath)
if self.inSyncMode():
if self.removeOrphan:
os.remove(join(self.target, virtualPath.asRelative()))
else:
self.abortCheckMode(FailedCheckReason.orphanFile,
virtualPath)
localDirs = [ f for f in listdir(localFullPath)
if isdir(join(localFullPath, f)) ]
for d in localDirs:
if d not in serverDirs:
virtualPath = virtualBase / d
self.report.addOrphanDir(virtualPath)
if self.inSyncMode():
if self.removeOrphan:
removeDirectoryTree(self.target,
join(self.target,
virtualPath.asRelative()))
else:
self.abortCheckMode(FailedCheckReason.orphanDirectory,
virtualPath)
# 'reason' is a member of the FailedCheckReason enum
def abortCheckMode(self, reason, fileOrDirVirtualPath):
assert self.mode == self.Mode.check, repr(self.mode)
print("{prg}: exiting from 'check' mode because {explanation}."
.format(prg=PROGNAME,
explanation=reason.explain(fileOrDirVirtualPath)))
if self.doReport:
self.report.printReport()
sys.exit(ExitStatus.CHECK_MODE_FOUND_MISMATCH.value)
#################################################################################################################################
def parseCommandLine():
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--url", dest="url", metavar="URL",
default="http://flightgear.sourceforge.net/scenery",
help="server URL [default: %(default)s]")
parser.add_argument("-a", "--auth", dest="auth", metavar="user:password",
default="", help="""\
authentication credentials for basic auth [default: empty, no authentication]""")
parser.add_argument("-t", "--target", dest="target", metavar="DIR",
default=".", help="""\
directory where to store the files [default: the current directory]""")
parser.add_argument("--only-subdir", dest="onlySubdir", metavar="SUBDIR",
default="", help="""\
restrict processing to this subdirectory of the TerraSync repository. Use
a path relative to the repository root, for instance 'Models/Residential'
[default: process the whole repository]""")
parser.add_argument("-q", "--quick", dest="quick", action="store_true",
default=False, help="enable quick mode")
parser.add_argument("-r", "--remove-orphan", dest="removeOrphan",
action="store_true",
default=False, help="remove old scenery files")
parser.add_argument("--mode", default="sync", choices=("check", "sync"),
help="""\
main mode of operation (default: '%(default)s'). In 'sync' mode, contents
is downloaded from the server to the target directory. On the other hand,
in 'check' mode, {progname} compares the contents of the target directory
with the remote repository without writing nor deleting anything on
disk.""".format(progname=PROGNAME))
parser.add_argument("--report", dest="report", action="store_true",
default=False,
help="""\
before normal exit, print a report of what was found""")
parser.add_argument("--top", dest="top", type=int, default=90, help="""\
maximum latitude to include in download [default: %(default)d]""")
parser.add_argument("--bottom", dest="bottom", type=int, default=-90,
help="""\
minimum latitude to include in download [default: %(default)d]""")
parser.add_argument("--left", dest="left", type=int, default=-180, help="""\
minimum longitude to include in download [default: %(default)d]""")
parser.add_argument("--right", dest="right", type=int, default=179,
help="""\
maximum longitude to include in download [default: %(default)d]""")
args = parser.parse_args()
# Perform consistency checks on the arguments
if args.mode == "check" and args.removeOrphan:
print("{prg}: 'check' mode is read-only and thus doesn't make sense "
"with\noption --remove-orphan (-r)".format(prg=PROGNAME),
file=sys.stderr)
sys.exit(ExitStatus.ERROR.value)
# Replace backslashes with forward slashes, remove leading and trailing
# slashes, collapse consecutive slashes. Yes, this implies that we tolerate
# leading slashes for --only-subdir (which makes sense because virtual
# paths are printed like that by this program, therefore it is natural for
# users to copy & paste such paths in order to use them for --only-subdir).
args.virtualSubdir = VirtualPath(args.onlySubdir.replace('\\', '/'))
# Be nice to our user in case the path starts with '\', 'C:\', etc.
if os.path.isabs(args.virtualSubdir.asRelative()):
print("{prg}: option --only-subdir expects a *relative* path, but got "
"'{subdir}'".format(prg=PROGNAME, subdir=args.onlySubdir),
file=sys.stderr)
sys.exit(ExitStatus.ERROR.value)
return args
def main():
args = parseCommandLine()
terraSync = TerraSync(args.mode, args.report, args.url, args.target,
args.quick, args.removeOrphan,
DownloadBoundaries(args.top, args.left, args.bottom,
args.right),args.auth)
report = terraSync.start(args.virtualSubdir)
if args.report:
report.printReport()
sys.exit(ExitStatus.SUCCESS.value)

View file

@ -0,0 +1,506 @@
# -*- coding: utf-8 -*-
# virtual_path.py --- Classes used to manipulate slash-separated virtual paths
#
# Copyright (C) 2018 Florent Rougon
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Module containing the VirtualPath and MutableVirtualPath classes."""
import pathlib
class VirtualPath:
"""Class used to represent virtual paths using the slash separator.
This class always uses the slash ('/') as the separator between
components. For terrasync.py, the root path '/' corresponds to the
repository root, regardless of where it is stored (hard drive,
remote server, etc.).
Note: because of this, the class is not supposed to be used directly
for filesystem accesses, since some root directory or
protocol://server/root-dir prefix would have to be prepended
to provide reasonably useful functionality. This is why the
paths managed by this class are said to be virtual. This also
implies that even in Python 3.6 or later, this class should
*not* inherit from os.PathLike.
Whenever a given feature exists in pathlib.PurePath, this class
replicates the corresponding pathlib.PurePath API, but using
mixedCaseStyle instead of underscore_style (the latter being used
for every method of pathlib.PurePath). Of course, types are adapted:
for instance, methods of this class often return a VirtualPath
instance, whereas the corresponding pathlib.PurePath methods would
return a pathlib.PurePath instance.
"""
def __init__(self, p):
# Once this function exits, self._path *must not be changed* anymore
# (doing so would violate the contract for a hashable object: the
# hash must not change once the object has been constructed).
self._path = self.normalizeStringPath(p)
# This check could of course be skipped if it is found to really affect
# performance.
self._check()
def __str__(self):
"""Return a string representation of the path in self.
The return value:
- always starts with a '/';
- never ends with a '/' except if it is exactly '/' (i.e.,
the root virtual path).
"""
return self._path
def asPosix(self):
"""Return a string representation of the path in self.
This method returns str(self), it is only present for
compatibility with pathlib.PurePath.
"""
return str(self)
def __repr__(self):
return "{}.{}({!r})".format(__name__, type(self).__name__, self._path)
def __lt__(self, other):
# Allow sorting with instances of VirtualPath, or of any subclass. Note
# that the == operator (__eq__()) and therefore also != are stricter
# with respect to typing.
if isinstance(other, VirtualPath):
return self._path < other._path
else:
return NotImplemented
def __le__(self, other):
if isinstance(other, VirtualPath):
return self._path <= other._path
else:
return NotImplemented
def __eq__(self, other):
# The types must be the same, therefore a VirtualPath never compares
# equal to a MutableVirtualPath with the == operator. For such
# comparisons, use the samePath() method. If __eq__() (and thus
# necessarily __hash__()) were more lax about typing, adding
# VirtualPath instances and instances of hashable subclasses of
# VirtualPath with the same _path to a set or frozenset would lead to
# unintuitive behavior, since they would all be considered equal.
return type(self) == type(other) and self._path == other._path
# intentionally not implemented. Python3 provides a default implementation.
# def __ne__(self, other):
def __gt__(self, other):
if isinstance(other, VirtualPath):
return self._path > other._path
else:
return NotImplemented
def __ge__(self, other):
if isinstance(other, VirtualPath):
return self._path >= other._path
else:
return NotImplemented
def __hash__(self):
# Be strict about typing, as for __eq__().
return hash((type(self), self._path))
def samePath(self, other):
"""Compare the path with another instance, possibly of a subclass.
other -- instance of VirtualPath, or of a subclass of
VirtualPath
"""
if isinstance(other, VirtualPath):
return self._path == other._path
else:
raise TypeError("{obj!r} is of type {klass}, which is neither "
"VirtualPath nor a subclass thereof"
.format(obj=other, klass=type(other).__name__))
def _check(self):
"""Run consistency checks on self."""
assert (self._path.startswith('/') and not self._path.startswith('//')
and (self._path == '/' or not self._path.endswith('/'))), \
repr(self._path)
@classmethod
def normalizeStringPath(cls, path):
"""Normalize a string representing a virtual path.
path -- input path (string)
Return a string that always starts with a slash, never contains
consecutive slashes and only ends with a slash if it's the root
virtual path ('/').
If 'path' doesn't start with a slash ('/'), it is considered
relative to the root. This implies that if 'path' is the empty
string, the return value is '/'.
"""
if not path.startswith('/'):
# / is the “virtual root” of the TerraSync repository
path = '/' + path
elif path.startswith('//') and not path.startswith('///'):
# Nasty special case. As allowed (but not mandated!) by POSIX[1],
# in pathlib.PurePosixPath('//some/path'), no collapsing happens[2].
# This is only the case for exactly *two* *leading* slashes.
# [1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11
# [2] https://www.python.org/dev/peps/pep-0428/#construction
path = path[1:]
return pathlib.PurePosixPath(path).as_posix()
def __truediv__(self, s):
"""Path concatenation with the '/' operator.
's' must be a string representing a relative path using the '/'
separator, for instance "dir/subdir/other-subdir".
Return a new instance of type(self).
"""
assert not (s.startswith('/') or s.endswith('/')), repr(s)
if self._path == '/':
return type(self)(self._path + s)
else:
return type(self)(self._path + '/' + s)
def joinpath(self, *args):
"""Combine 'self' with each given string argument in turn.
Each argument should be of the form "foo", "foo/bar",
"foo/bar/baz", etc. Return the corresponding instance of
type(self).
>>> p = VirtualPath("/foo").joinpath("bar", "baz", "quux/zoot")
>>> str(p)
'/foo/bar/baz/quux/zoot'
"""
return self / '/'.join(args)
@property
def name(self):
"""Return a string representing the final path component.
>>> p = VirtualPath("/foo/bar/baz")
>>> p.name
'baz'
"""
pos = self._path.rfind('/')
assert pos != -1, (pos, self._path)
return self._path[pos+1:]
@property
def parts(self):
"""Return a tuple containing the paths components.
>>> p = VirtualPath('/usr/bin/python3')
>>> p.parts
('/', 'usr', 'bin', 'python3')
"""
if self._path == "/":
return ('/',)
else:
# Skip the leading slash before splitting
return ('/',) + tuple(self._path[1:].split('/'))
def generateParents(self):
"""Generator function for the parents of the path.
See the 'parents' property for details.
"""
if self._path == '/':
return
assert self._path.startswith('/'), repr(self._path)
prevPos = len(self._path)
while True:
pos = self._path.rfind('/', 0, prevPos)
if pos > 0:
yield type(self)(self._path[:pos])
prevPos = pos
else:
assert pos == 0, pos
break
yield type(self)('/')
@property
def parents(self):
"""The path ancestors.
Return an immutable sequence providing access to the logical
ancestors of the path.
>>> p = VirtualPath('/foo/bar/baz')
>>> len(p.parents)
3
>>> p.parents[0]
terrasync.virtual_path.VirtualPath('/foo/bar')
>>> p.parents[1]
terrasync.virtual_path.VirtualPath('/foo')
>>> p.parents[2]
terrasync.virtual_path.VirtualPath('/')
"""
return tuple(self.generateParents())
@property
def parent(self):
"""The logical parent of the path.
>>> p = VirtualPath('/foo/bar/baz')
>>> p.parent
terrasync.virtual_path.VirtualPath('/foo/bar')
>>> q = VirtualPath('/')
>>> q.parent
terrasync.virtual_path.VirtualPath('/')
"""
pos = self._path.rfind('/')
assert pos >= 0, pos
if pos == 0:
return type(self)('/')
else:
return type(self)(self._path[:pos])
@property
def suffix(self):
"""The extension of the final component, if any.
>>> VirtualPath('/my/library/setup.py').suffix
'.py'
>>> VirtualPath('/my/library.tar.gz').suffix
'.gz'
>>> VirtualPath('/my/library').suffix
''
"""
name = self.name
pos = name.rfind('.')
return name[pos:] if pos != -1 else ''
@property
def suffixes(self):
"""A list of the paths extensions.
>>> VirtualPath('/my/library/setup.py').suffixes
['.py']
>>> VirtualPath('/my/library.tar.gz').suffixes
['.tar', '.gz']
>>> VirtualPath('/my/library').suffixes
[]
"""
name = self.name
prevPos = len(name)
l = []
while True:
pos = name.rfind('.', 0, prevPos)
if pos == -1:
break
else:
l.insert(0, name[pos:prevPos])
prevPos = pos
return l
@property
def stem(self):
"""The final path component, without its suffix.
>>> VirtualPath('/my/library.tar.gz').stem
'library.tar'
>>> VirtualPath('/my/library.tar').stem
'library'
>>> VirtualPath('/my/library').stem
'library'
>>> VirtualPath('/').stem
''
"""
name = self.name
pos = name.rfind('.')
return name if pos == -1 else name[:pos]
def asRelative(self):
"""Return the virtual path without its leading '/'.
>>> p = VirtualPath('/usr/bin/python3')
>>> p.asRelative()
'usr/bin/python3'
>>> VirtualPath('').asRelative()
''
>>> VirtualPath('/').asRelative()
''
"""
assert self._path.startswith('/'), repr(self._path)
return self._path[1:]
def relativeTo(self, other):
"""Return the portion of this path that follows 'other'.
The return value is a string. If the operation is impossible,
ValueError is raised.
>>> VirtualPath('/etc/passwd').relativeTo('/')
'etc/passwd'
>>> VirtualPath('/etc/passwd').relativeTo('/etc')
'passwd'
"""
normedOther = self.normalizeStringPath(other)
if normedOther == '/':
return self._path[1:]
elif self._path.startswith(normedOther):
rest = self._path[len(normedOther):]
if rest.startswith('/'):
return rest[1:]
raise ValueError("{!r} does not start with '{}'".format(self, other))
def withName(self, newName):
"""Return a new VirtualPath instance with the 'name' part changed.
If the original path is '/' (which doesnt have a name in the
sense of the 'name' property), ValueError is raised.
>>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
>>> p.withName('setup.py')
terrasync.virtual_path.VirtualPath('/foobar/downloads/setup.py')
"""
if self._path == '/':
raise ValueError("{!r} has an empty name".format(self))
else:
pos = self._path.rfind('/')
assert pos != -1, (pos, self._path)
if newName.startswith('/'):
raise ValueError("{!r} starts with a '/'".format(newName))
elif newName.endswith('/'):
raise ValueError("{!r} ends with a '/'".format(newName))
else:
return VirtualPath(self._path[:pos]) / newName
def withSuffix(self, newSuffix):
"""Return a new VirtualPath instance with the suffix changed.
If the original path doesnt have a suffix, the new suffix is
appended:
>>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
>>> p.withSuffix('.bz2')
terrasync.virtual_path.VirtualPath('/foobar/downloads/pathlib.tar.bz2')
>>> p = VirtualPath('/foobar/README')
>>> p.withSuffix('.txt')
terrasync.virtual_path.VirtualPath('/foobar/README.txt')
If 'self' is the root virtual path ('/') or 'newSuffix' doesn't
start with '.', ValueError is raised.
"""
if not newSuffix.startswith('.'):
raise ValueError("new suffix {!r} doesn't start with '.'"
.format(newSuffix))
name = self.name
if not name:
raise ValueError("{!r} has an empty 'name' part".format(self))
pos = name.rfind('.')
if pos == -1:
return self.withName(name + newSuffix) # append suffix
else:
return self.withName(name[:pos] + newSuffix) # replace suffix
class MutableVirtualPath(VirtualPath):
"""Mutable subclass of VirtualPath.
Contrary to VirtualPath objects, instances of this class can be
modified in-place with the /= operator, in order to append path
components. The price to pay for this advantage is that they can't
be used as dictionary keys or as elements of a set or frozenset,
because they are not hashable.
"""
__hash__ = None # ensure the type is not hashable
def _normalize(self):
self._path = self.normalizeStringPath(self._path)
def __itruediv__(self, s):
"""Path concatenation with the '/=' operator.
's' must be a string representing a relative path using the '/'
separator, for instance "dir/subdir/other-subdir".
"""
# This check could of course be skipped if it is found to really affect
# performance.
self._check()
assert not (s.startswith('/') or s.endswith('/')), repr(s)
if self._path == '/':
self._path += s
else:
self._path += '/' + s
# Collapse multiple slashes, remove trailing '/' except if the whole
# path is '/', etc.
self._normalize()
return self
if __name__ == "__main__":
# The doctest setup below works, but for full test coverage, use the
# unittest framework (it is set up to automatically run all doctests from
# this module!).
#
# Hint: 'python3 -m unittest discover' from the TerraSync directory
# should do the trick.
import doctest
doctest.testmod()

View file

View file

@ -0,0 +1,3 @@
version:1
path:some/path
d:some\illegal directory name with a backslash:378b3dd58ce3058f2992b70aa5ecf8947a4d7f9e

View file

@ -0,0 +1,3 @@
version:1
path:some/path
f:some\illegal file name with a backslash:4cbf3d1746a1249bff7809e4b079dd80cfce594c:123

View file

@ -0,0 +1,3 @@
version:1
path:some/path
t:some\illegal tarball name with a backslash.tgz:b63a067d82824f158d6bde66f9e76654274277fe:1234567

View file

@ -0,0 +1,3 @@
version:1
path:some/path
d:..:378b3dd58ce3058f2992b70aa5ecf8947a4d7f9e

View file

@ -0,0 +1,2 @@
version:1
path:some/path/with/a/../component

View file

@ -0,0 +1,2 @@
version:1
path:some/path/non-ASCII chars like é, ê, €, Œ, Ÿ, etc./foo/bar

View file

@ -0,0 +1,3 @@
version:1
path:some/path
f:..:4cbf3d1746a1249bff7809e4b079dd80cfce594c:123

View file

@ -0,0 +1,2 @@
version:1
path:some/path/that/contains \ a/backslash

View file

@ -0,0 +1,2 @@
version:1
path:/some/path/that/starts/with/a/slash

View file

@ -0,0 +1,3 @@
version:1
path:some/path
d:some/illegal directory name with a slash:378b3dd58ce3058f2992b70aa5ecf8947a4d7f9e

View file

@ -0,0 +1,3 @@
version:1
path:some/path
f:some/illegal file name with a slash:4cbf3d1746a1249bff7809e4b079dd80cfce594c:123

View file

@ -0,0 +1,3 @@
version:1
path:some/path
t:some/illegal tarball name with a slash.tgz:b63a067d82824f158d6bde66f9e76654274277fe:1234567

View file

@ -0,0 +1,3 @@
version:1
path:some/path
t:..:b63a067d82824f158d6bde66f9e76654274277fe:1234567

View file

@ -0,0 +1,16 @@
# Comment line
version:1
path:some/path
time:20200926-10:38Z
d:Airports:8a93b5d8a2b04d2fb8de4ef58ad02f9e8819d314
d:Models:bee221c9d2621dc9b69cd9e0ad7dd0605f6ea928
d:Objects:10ae32c986470fa55b56b8eefbc6ed565cce0642
# Other comment line
d:Terrain:e934024dc0f959f9a433e47c646d256630052c2e
d:Buildings:19060725efc2a301fa6844991e2922d42d8de5e2
d:Pylons:378b3dd58ce3058f2992b70aa5ecf8947a4d7f9e
d:Roads:89f8f10406041948368c76c0a2e794d45ac536b7
f:some file:4cbf3d1746a1249bff7809e4b079dd80cfce594c:123
f:other file:62726252f7183eef31001c1c565e149f3c4527b9:4567
f:third file:303adcc1747d8dc438096307189881e987e9bb61:89012
t:Airports_archive.tgz:b63a067d82824f158d6bde66f9e76654274277fe:1234567

View file

@ -0,0 +1,95 @@
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# test_dirindex.py --- Test module for terrasync.dirindex
# Copyright (C) 2020 Florent Rougon
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# In order to exercise all tests, run the following command from the parent
# directory (you may omit the 'discover' argument):
#
# python3 -m unittest discover
"""Test module for terrasync.dirindex"""
import os
import unittest
from terrasync.dirindex import DirIndex
from terrasync.exceptions import InvalidDirIndexFile
from terrasync.virtual_path import VirtualPath
baseDir = os.path.dirname(__file__)
def testData(*args):
return os.path.join(baseDir, "data", "dirindex", *args)
directories_in_sample_dirindex_1 = [
{'name': 'Airports', 'hash': '8a93b5d8a2b04d2fb8de4ef58ad02f9e8819d314'},
{'name': 'Models', 'hash': 'bee221c9d2621dc9b69cd9e0ad7dd0605f6ea928'},
{'name': 'Objects', 'hash': '10ae32c986470fa55b56b8eefbc6ed565cce0642'},
{'name': 'Terrain', 'hash': 'e934024dc0f959f9a433e47c646d256630052c2e'},
{'name': 'Buildings', 'hash': '19060725efc2a301fa6844991e2922d42d8de5e2'},
{'name': 'Pylons', 'hash': '378b3dd58ce3058f2992b70aa5ecf8947a4d7f9e'},
{'name': 'Roads', 'hash': '89f8f10406041948368c76c0a2e794d45ac536b7'}]
files_in_sample_dirindex_1 = [
{'name': 'some file',
'hash': '4cbf3d1746a1249bff7809e4b079dd80cfce594c',
'size': 123},
{'name': 'other file',
'hash': '62726252f7183eef31001c1c565e149f3c4527b9',
'size': 4567},
{'name': 'third file',
'hash': '303adcc1747d8dc438096307189881e987e9bb61',
'size': 89012}]
tarballs_in_sample_dirindex_1 = [
{'name': 'Airports_archive.tgz',
'hash': 'b63a067d82824f158d6bde66f9e76654274277fe',
'size': 1234567}]
class TestDirIndex(unittest.TestCase):
"""Unit tests for the DirIndex class."""
def test_constructor(self):
d = DirIndex(testData("good", "sample_dirindex_1"))
self.assertEqual(d.version, 1)
self.assertEqual(d.path, VirtualPath("some/path"))
self.assertEqual(d.directories, directories_in_sample_dirindex_1)
self.assertEqual(d.files, files_in_sample_dirindex_1)
self.assertEqual(d.tarballs, tarballs_in_sample_dirindex_1)
stems = ("path_starts_with_slash",
"path_contains_a_backslash",
"dotdot_in_path",
"slash_in_directory_name",
"slash_in_file_name",
"slash_in_tarball_name",
"backslash_in_directory_name",
"backslash_in_file_name",
"backslash_in_tarball_name",
"directory_name_is_double_colon",
"file_name_is_double_colon",
"tarball_name_is_double_colon",)
for stem in stems:
with self.assertRaises(InvalidDirIndexFile):
DirIndex(testData("bad", "bad_dirindex_" + stem))
with self.assertRaises(UnicodeDecodeError):
d = DirIndex(testData("bad", "bad_dirindex_encoding"))

View file

@ -0,0 +1,357 @@
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# test_virtual_path.py --- Test module for terrasync.virtual_path
# Copyright (C) 2018 Florent Rougon
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# In order to exercise all tests, run the following command from the parent
# directory (you may omit the 'discover' argument):
#
# python3 -m unittest discover
import collections
import unittest
from terrasync.virtual_path import VirtualPath, MutableVirtualPath
# Hook doctest-based tests into the unittest test discovery mechanism
import doctest
import terrasync.virtual_path
def load_tests(loader, tests, ignore):
# Tell unittest to run doctests from terrasync.virtual_path
tests.addTests(doctest.DocTestSuite(terrasync.virtual_path))
return tests
class VirtualPathCommonTests:
"""Common tests to run for both VirtualPath and MutableVirtualPath.
The tests inside this class must exercice the class (VirtualPath or
MutableVirtualPath) stored in the 'cls' class attribute. They must
work for both VirtualPath and MutableVirtualPath, otherwise they
don't belong here!
"""
def test_normalizeStringPath(self):
self.assertEqual(self.cls.normalizeStringPath("/"), "/")
self.assertEqual(self.cls.normalizeStringPath(""), "/")
self.assertEqual(
self.cls.normalizeStringPath("/abc/Def ijk//l Mn///op/q/rst/"),
"/abc/Def ijk/l Mn/op/q/rst")
self.assertEqual(self.cls.normalizeStringPath("abc/def"), "/abc/def")
self.assertEqual(self.cls.normalizeStringPath("/abc/def"), "/abc/def")
self.assertEqual(self.cls.normalizeStringPath("//abc/def"),
"/abc/def")
self.assertEqual(self.cls.normalizeStringPath("///abc/def"),
"/abc/def")
self.assertEqual(self.cls.normalizeStringPath("/abc//def"),
"/abc/def")
# Unless the implementation of VirtualPath.__init__() has changed
# meanwhile, the following function must be essentially the same as
# test_normalizeStringPath().
def test_constructor_and_str(self):
p = self.cls("/")
self.assertEqual(str(p), "/")
p = self.cls("")
self.assertEqual(str(p), "/")
p = self.cls("/abc/Def ijk//l Mn///op/q/rst/")
self.assertEqual(str(p), "/abc/Def ijk/l Mn/op/q/rst")
p = self.cls("abc/def")
self.assertEqual(str(p), "/abc/def")
p = self.cls("/abc/def")
self.assertEqual(str(p), "/abc/def")
p = self.cls("//abc/def")
self.assertEqual(str(p), "/abc/def")
p = self.cls("///abc/def")
self.assertEqual(str(p), "/abc/def")
p = self.cls("/abc//def")
self.assertEqual(str(p), "/abc/def")
def test_asPosix (self):
self.assertEqual(self.cls("").asPosix(), "/")
self.assertEqual(self.cls("/").asPosix(), "/")
self.assertEqual(self.cls("/abc//def").asPosix(), "/abc/def")
self.assertEqual(self.cls("/abc//def/").asPosix(), "/abc/def")
self.assertEqual(self.cls("//abc//def//").asPosix(), "/abc/def")
self.assertEqual(self.cls("////abc//def//").asPosix(), "/abc/def")
def test_samePath(self):
self.assertTrue(self.cls("").samePath(self.cls("")))
self.assertTrue(self.cls("").samePath(self.cls("/")))
self.assertTrue(self.cls("/").samePath(self.cls("")))
self.assertTrue(self.cls("/").samePath(self.cls("/")))
self.assertTrue(
self.cls("/abc/def").samePath(self.cls("/abc/def")))
self.assertTrue(
self.cls("/abc//def").samePath(self.cls("/abc/def")))
self.assertTrue(
self.cls("/abc/def/").samePath(self.cls("/abc/def")))
def test_comparisons(self):
self.assertEqual(self.cls("/abc/def"), self.cls("/abc/def"))
self.assertEqual(self.cls("/abc//def"), self.cls("/abc/def"))
self.assertEqual(self.cls("/abc/def/"), self.cls("/abc/def"))
self.assertNotEqual(self.cls("/abc/dEf"), self.cls("/abc/def"))
self.assertNotEqual(self.cls("/abc/def "), self.cls("/abc/def"))
self.assertLessEqual(self.cls("/foo/bar"), self.cls("/foo/bar"))
self.assertLessEqual(self.cls("/foo/bar"), self.cls("/foo/bbr"))
self.assertLess(self.cls("/foo/bar"), self.cls("/foo/bbr"))
self.assertGreaterEqual(self.cls("/foo/bar"), self.cls("/foo/bar"))
self.assertGreaterEqual(self.cls("/foo/bbr"), self.cls("/foo/bar"))
self.assertGreater(self.cls("/foo/bbr"), self.cls("/foo/bar"))
def test_truedivOperators(self):
"""
Test operators used to add paths components to a VirtualPath instance."""
p = self.cls("/foo/bar/baz/quux/zoot")
self.assertEqual(p, self.cls("/") / "foo" / "bar" / "baz/quux/zoot")
self.assertEqual(p, self.cls("/foo") / "bar" / "baz/quux/zoot")
self.assertEqual(p, self.cls("/foo/bar") / "baz/quux/zoot")
def test_joinpath(self):
p = self.cls("/foo/bar/baz/quux/zoot")
self.assertEqual(
p,
self.cls("/foo").joinpath("bar", "baz", "quux/zoot"))
def test_nameAttribute(self):
self.assertEqual(self.cls("/").name, "")
p = self.cls("/foo/bar/baz/quux/zoot")
self.assertEqual(p.name, "zoot")
def test_partsAttribute(self):
self.assertEqual(self.cls("/").parts, ("/",))
p = self.cls("/foo/bar/baz/quux/zoot")
self.assertEqual(p.parts, ("/", "foo", "bar", "baz", "quux", "zoot"))
def test_parentsAttribute(self):
def pathify(*args):
return tuple( (self.cls(s) for s in args) )
p = self.cls("/")
self.assertEqual(tuple(p.parents), pathify()) # empty tuple
p = self.cls("/foo")
self.assertEqual(tuple(p.parents), pathify("/"))
p = self.cls("/foo/bar")
self.assertEqual(tuple(p.parents), pathify("/foo", "/"))
p = self.cls("/foo/bar/baz")
self.assertEqual(tuple(p.parents), pathify("/foo/bar", "/foo", "/"))
def test_parentAttribute(self):
def pathify(s):
return self.cls(s)
p = self.cls("/")
self.assertEqual(p.parent, pathify("/"))
p = self.cls("/foo")
self.assertEqual(p.parent, pathify("/"))
p = self.cls("/foo/bar")
self.assertEqual(p.parent, pathify("/foo"))
p = self.cls("/foo/bar/baz")
self.assertEqual(p.parent, pathify("/foo/bar"))
def test_suffixAttribute(self):
p = self.cls("/")
self.assertEqual(p.suffix, '')
p = self.cls("/foo/bar/baz.py")
self.assertEqual(p.suffix, '.py')
p = self.cls("/foo/bar/baz.py.bla")
self.assertEqual(p.suffix, '.bla')
p = self.cls("/foo/bar/baz")
self.assertEqual(p.suffix, '')
def test_suffixesAttribute(self):
p = self.cls("/")
self.assertEqual(p.suffixes, [])
p = self.cls("/foo/bar/baz.py")
self.assertEqual(p.suffixes, ['.py'])
p = self.cls("/foo/bar/baz.py.bla")
self.assertEqual(p.suffixes, ['.py', '.bla'])
p = self.cls("/foo/bar/baz")
self.assertEqual(p.suffixes, [])
def test_stemAttribute(self):
p = self.cls("/")
self.assertEqual(p.stem, '')
p = self.cls("/foo/bar/baz.py")
self.assertEqual(p.stem, 'baz')
p = self.cls("/foo/bar/baz.py.bla")
self.assertEqual(p.stem, 'baz.py')
def test_asRelative(self):
self.assertEqual(self.cls("/").asRelative(), "")
self.assertEqual(self.cls("/foo/bar/baz/quux/zoot").asRelative(),
"foo/bar/baz/quux/zoot")
def test_relativeTo(self):
self.assertEqual(self.cls("").relativeTo(""), "")
self.assertEqual(self.cls("").relativeTo("/"), "")
self.assertEqual(self.cls("/").relativeTo("/"), "")
self.assertEqual(self.cls("/").relativeTo(""), "")
p = self.cls("/foo/bar/baz/quux/zoot")
self.assertEqual(p.relativeTo(""), "foo/bar/baz/quux/zoot")
self.assertEqual(p.relativeTo("/"), "foo/bar/baz/quux/zoot")
self.assertEqual(p.relativeTo("foo"), "bar/baz/quux/zoot")
self.assertEqual(p.relativeTo("foo/"), "bar/baz/quux/zoot")
self.assertEqual(p.relativeTo("/foo"), "bar/baz/quux/zoot")
self.assertEqual(p.relativeTo("/foo/"), "bar/baz/quux/zoot")
self.assertEqual(p.relativeTo("foo/bar/baz"), "quux/zoot")
self.assertEqual(p.relativeTo("foo/bar/baz/"), "quux/zoot")
self.assertEqual(p.relativeTo("/foo/bar/baz"), "quux/zoot")
self.assertEqual(p.relativeTo("/foo/bar/baz/"), "quux/zoot")
with self.assertRaises(ValueError):
p.relativeTo("/foo/ba")
with self.assertRaises(ValueError):
p.relativeTo("/foo/balloon")
def test_withName(self):
p = self.cls("/foo/bar/baz/quux/zoot")
self.assertEqual(p.withName(""),
VirtualPath("/foo/bar/baz/quux"))
self.assertEqual(p.withName("pouet"),
VirtualPath("/foo/bar/baz/quux/pouet"))
self.assertEqual(p.withName("pouet/zdong"),
VirtualPath("/foo/bar/baz/quux/pouet/zdong"))
# The self.cls object has no 'name' (referring to the 'name' property)
with self.assertRaises(ValueError):
self.cls("").withName("foobar")
with self.assertRaises(ValueError):
self.cls("/").withName("foobar")
def test_withSuffix(self):
p = self.cls("/foo/bar/baz.tar.gz")
self.assertEqual(p.withSuffix(".bz2"),
VirtualPath("/foo/bar/baz.tar.bz2"))
p = self.cls("/foo/bar/baz")
self.assertEqual(p.withSuffix(".tar.xz"),
VirtualPath("/foo/bar/baz.tar.xz"))
# The self.cls object has no 'name' (referring to the 'name' property)
with self.assertRaises(ValueError):
self.cls("/foo/bar/baz.tar.gz").withSuffix("no-leading-dot")
with self.assertRaises(ValueError):
# The root virtual path ('/') can't be used for this
self.cls("/").withSuffix(".foobar")
class TestVirtualPath(unittest.TestCase, VirtualPathCommonTests):
"""Tests for the VirtualPath class.
These are the tests using the common infrastructure from
VirtualPathCommonTests.
"""
cls = VirtualPath
class TestVirtualPathSpecific(unittest.TestCase):
"""Tests specific to the VirtualPath class."""
def test_isHashableType(self):
p = VirtualPath("/foo")
self.assertTrue(isinstance(p, collections.abc.Hashable))
def test_insideSet(self):
l1 = [ VirtualPath("/foo/bar"),
VirtualPath("/foo/baz") ]
l2 = l1 + [ VirtualPath("/foo/bar") ] # l2 has a duplicate element
# Sets allow one to ignore duplicate elements when comparing
self.assertEqual(set(l1), set(l2))
self.assertEqual(frozenset(l1), frozenset(l2))
class TestMutableVirtualPath(unittest.TestCase, VirtualPathCommonTests):
"""Tests for the MutableVirtualPath class.
These are the tests using the common infrastructure from
VirtualPathCommonTests.
"""
cls = MutableVirtualPath
class TestMutableVirtualPathSpecific(unittest.TestCase):
"""Tests specific to the MutableVirtualPath class."""
def test_mixedComparisons(self):
self.assertTrue(
VirtualPath("/abc/def").samePath(MutableVirtualPath("/abc/def")))
self.assertTrue(
VirtualPath("/abc//def").samePath(MutableVirtualPath("/abc/def")))
self.assertTrue(
VirtualPath("/abc/def/").samePath(MutableVirtualPath("/abc/def")))
self.assertTrue(
MutableVirtualPath("/abc/def").samePath(VirtualPath("/abc/def")))
self.assertTrue(
MutableVirtualPath("/abc//def").samePath(VirtualPath("/abc/def")))
self.assertTrue(
MutableVirtualPath("/abc/def/").samePath(VirtualPath("/abc/def")))
def test_inPlacePathConcatenation(self):
p = VirtualPath("/foo/bar/baz/quux/zoot")
q = MutableVirtualPath("/foo")
q /= "bar"
q /= "baz/quux/zoot"
self.assertTrue(p.samePath(q))
def test_isNotHashableType(self):
p = MutableVirtualPath("/foo")
self.assertFalse(isinstance(p, collections.abc.Hashable))

15
terrasync/run.sh Executable file
View file

@ -0,0 +1,15 @@
#! /bin/bash
logfile=/log/$(date +%Y-%m-%d).log
# Pre update hook
if [ -f "/config/pre.sh" ]; then
/config/pre.sh $logfile
fi
/TerraSync/terrasync.py --url $URL --remove-orphan --target=/scenery/ > $logfile 2>&1
# Post update hook
if [ -f "/config/post.sh" ]; then
/config/post.sh $logfile
fi

3
terrasync/ts-cron Executable file
View file

@ -0,0 +1,3 @@
# m h dom mon dow command
0 2 * * * /run.sh
# An empty line is required at the end of this file for a valid cron file.