5cca99bbae
Add more functions and properties to VirtualPath that directly correspond to functions and properties of pathlib.PurePath, except that types are adapted of course, and that for API consistency, VirtualPath methods use mixedCaseStyle whereas those of pathlib.PurePath use underscore_style.
503 lines
16 KiB
Python
503 lines
16 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
# virtual_path.py --- Classes used to manipulate slash-separated virtual paths
|
||
#
|
||
# Copyright (C) 2018 Florent Rougon
|
||
#
|
||
# This program is free software; you can redistribute it and/or
|
||
# modify it under the terms of the GNU General Public License as
|
||
# published by the Free Software Foundation; either version 2 of the
|
||
# License, or (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful, but
|
||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
# General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program; if not, write to the Free Software
|
||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||
|
||
"""Module containing the VirtualPath and MutableVirtualPath classes."""
|
||
|
||
import pathlib
|
||
|
||
|
||
class VirtualPath:
|
||
"""Class used to represent virtual paths using the slash separator.
|
||
|
||
This class always uses the slash ('/') as the separator between
|
||
components. For terrasync.py, the root path '/' corresponds to the
|
||
repository root, regardless of where it is stored (hard drive,
|
||
remote server, etc.).
|
||
|
||
Note: because of this, the class is not supposed to be used directly
|
||
for filesystem accesses, since some root directory or
|
||
protocol://server/root-dir prefix would have to be prepended
|
||
to provide reasonably useful functionality. This is why the
|
||
paths managed by this class are said to be virtual. This also
|
||
implies that even in Python 3.6 or later, this class should
|
||
*not* inherit from os.PathLike.
|
||
|
||
Whenever a given feature exists in pathlib.PurePath, this class
|
||
replicates the corresponding pathlib.PurePath API, but using
|
||
mixedCaseStyle instead of underscore_style (the latter being used
|
||
for every method of pathlib.PurePath). Of course, types are adapted:
|
||
for instance, methods of this class often return a VirtualPath
|
||
instance, whereas the corresponding pathlib.PurePath methods would
|
||
return a pathlib.PurePath instance.
|
||
|
||
"""
|
||
def __init__(self, p):
|
||
# Once this function exits, self._path *must not be changed* anymore
|
||
# (doing so would violate the contract for a hashable object: the
|
||
# hash must not change once the object has been constructed).
|
||
self._path = self.normalizeStringPath(p)
|
||
# This check could of course be skipped if it is found to really affect
|
||
# performance.
|
||
self._check()
|
||
|
||
def __str__(self):
|
||
"""Return a string representation of the path in self.
|
||
|
||
The return value:
|
||
- always starts with a '/';
|
||
- never ends with a '/' except if it is exactly '/' (i.e.,
|
||
the root virtual path).
|
||
|
||
"""
|
||
return self._path
|
||
|
||
def asPosix(self):
|
||
"""Return a string representation of the path in self.
|
||
|
||
This method returns str(self), it is only present for
|
||
compatibility with pathlib.PurePath.
|
||
|
||
"""
|
||
return str(self)
|
||
|
||
def __repr__(self):
|
||
return "{}.{}({!r})".format(__name__, type(self).__name__, self._path)
|
||
|
||
def __lt__(self, other):
|
||
# Allow sorting with instances of VirtualPath, or of any subclass. Note
|
||
# that the == operator (__eq__()) and therefore also != are stricter
|
||
# with respect to typing.
|
||
if isinstance(other, VirtualPath):
|
||
return self._path < other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __le__(self, other):
|
||
if isinstance(other, VirtualPath):
|
||
return self._path <= other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __eq__(self, other):
|
||
# The types must be the same, therefore a VirtualPath never compares
|
||
# equal to a MutableVirtualPath with the == operator. For such
|
||
# comparisons, use the samePath() method. If __eq__() (and thus
|
||
# necessarily __hash__()) were more lax about typing, adding
|
||
# VirtualPath instances and instances of hashable subclasses of
|
||
# VirtualPath with the same _path to a set or frozenset would lead to
|
||
# unintuitive behavior, since they would all be considered equal.
|
||
return type(self) == type(other) and self._path == other._path
|
||
|
||
def __gt__(self, other):
|
||
if isinstance(other, VirtualPath):
|
||
return self._path > other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __ge__(self, other):
|
||
if isinstance(other, VirtualPath):
|
||
return self._path >= other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __hash__(self):
|
||
# Be strict about typing, as for __eq__().
|
||
return hash((type(self), self._path))
|
||
|
||
def samePath(self, other):
|
||
"""Compare the path with another instance, possibly of a subclass.
|
||
|
||
other -- instance of VirtualPath, or of a subclass of
|
||
VirtualPath
|
||
|
||
"""
|
||
if isinstance(other, VirtualPath):
|
||
return self._path == other._path
|
||
else:
|
||
raise TypeError("{obj!r} is of type {klass}, which is neither "
|
||
"VirtualPath nor a subclass thereof"
|
||
.format(obj=other, klass=type(other).__name__))
|
||
|
||
def _check(self):
|
||
"""Run consistency checks on self."""
|
||
assert (self._path.startswith('/') and not self._path.startswith('//')
|
||
and (self._path == '/' or not self._path.endswith('/'))), \
|
||
repr(self._path)
|
||
|
||
@classmethod
|
||
def normalizeStringPath(cls, path):
|
||
"""Normalize a string representing a virtual path.
|
||
|
||
path -- input path (string)
|
||
|
||
Return a string that always starts with a slash, never contains
|
||
consecutive slashes and only ends with a slash if it's the root
|
||
virtual path ('/').
|
||
|
||
If 'path' doesn't start with a slash ('/'), it is considered
|
||
relative to the root. This implies that if 'path' is the empty
|
||
string, the return value is '/'.
|
||
|
||
"""
|
||
if not path.startswith('/'):
|
||
# / is the “virtual root” of the TerraSync repository
|
||
path = '/' + path
|
||
elif path.startswith('//') and not path.startswith('///'):
|
||
# Nasty special case. As allowed (but not mandated!) by POSIX[1],
|
||
# in pathlib.PurePosixPath('//some/path'), no collapsing happens[2].
|
||
# This is only the case for exactly *two* *leading* slashes.
|
||
# [1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11
|
||
# [2] https://www.python.org/dev/peps/pep-0428/#construction
|
||
path = path[1:]
|
||
|
||
return pathlib.PurePosixPath(path).as_posix()
|
||
|
||
def __truediv__(self, s):
|
||
"""Path concatenation with the '/' operator.
|
||
|
||
's' must be a string representing a relative path using the '/'
|
||
separator, for instance "dir/subdir/other-subdir".
|
||
|
||
Return a new instance of type(self).
|
||
|
||
"""
|
||
assert not (s.startswith('/') or s.endswith('/')), repr(s)
|
||
|
||
if self._path == '/':
|
||
return type(self)(self._path + s)
|
||
else:
|
||
return type(self)(self._path + '/' + s)
|
||
|
||
def joinpath(self, *args):
|
||
"""Combine 'self' with each given string argument in turn.
|
||
|
||
Each argument should be of the form "foo", "foo/bar",
|
||
"foo/bar/baz", etc. Return the corresponding instance of
|
||
type(self).
|
||
|
||
>>> p = VirtualPath("/foo").joinpath("bar", "baz", "quux/zoot")
|
||
>>> str(p)
|
||
'/foo/bar/baz/quux/zoot'
|
||
|
||
"""
|
||
return self / '/'.join(args)
|
||
|
||
@property
|
||
def name(self):
|
||
"""Return a string representing the final path component.
|
||
|
||
>>> p = VirtualPath("/foo/bar/baz")
|
||
>>> p.name
|
||
'baz'
|
||
|
||
"""
|
||
pos = self._path.rfind('/')
|
||
assert pos != -1, (pos, self._path)
|
||
|
||
return self._path[pos+1:]
|
||
|
||
@property
|
||
def parts(self):
|
||
"""Return a tuple containing the path’s components.
|
||
|
||
>>> p = VirtualPath('/usr/bin/python3')
|
||
>>> p.parts
|
||
('/', 'usr', 'bin', 'python3')
|
||
|
||
"""
|
||
if self._path == "/":
|
||
return ('/',)
|
||
else:
|
||
# Skip the leading slash before splitting
|
||
return ('/',) + tuple(self._path[1:].split('/'))
|
||
|
||
def generateParents(self):
|
||
"""Generator function for the parents of the path.
|
||
|
||
See the 'parents' property for details.
|
||
|
||
"""
|
||
if self._path == '/':
|
||
return
|
||
|
||
assert self._path.startswith('/'), repr(self._path)
|
||
prevPos = len(self._path)
|
||
|
||
while True:
|
||
pos = self._path.rfind('/', 0, prevPos)
|
||
|
||
if pos > 0:
|
||
yield type(self)(self._path[:pos])
|
||
prevPos = pos
|
||
else:
|
||
assert pos == 0, pos
|
||
break
|
||
|
||
yield type(self)('/')
|
||
|
||
@property
|
||
def parents(self):
|
||
"""The path ancestors.
|
||
|
||
Return an immutable sequence providing access to the logical
|
||
ancestors of the path.
|
||
|
||
>>> p = VirtualPath('/foo/bar/baz')
|
||
>>> len(p.parents)
|
||
3
|
||
>>> p.parents[0]
|
||
terrasync.virtual_path.VirtualPath('/foo/bar')
|
||
>>> p.parents[1]
|
||
terrasync.virtual_path.VirtualPath('/foo')
|
||
>>> p.parents[2]
|
||
terrasync.virtual_path.VirtualPath('/')
|
||
|
||
"""
|
||
return tuple(self.generateParents())
|
||
|
||
@property
|
||
def parent(self):
|
||
"""The logical parent of the path.
|
||
|
||
>>> p = VirtualPath('/foo/bar/baz')
|
||
>>> p.parent
|
||
terrasync.virtual_path.VirtualPath('/foo/bar')
|
||
>>> q = VirtualPath('/')
|
||
>>> q.parent
|
||
terrasync.virtual_path.VirtualPath('/')
|
||
|
||
"""
|
||
pos = self._path.rfind('/')
|
||
assert pos >= 0, pos
|
||
|
||
if pos == 0:
|
||
return type(self)('/')
|
||
else:
|
||
return type(self)(self._path[:pos])
|
||
|
||
@property
|
||
def suffix(self):
|
||
"""The extension of the final component, if any.
|
||
|
||
>>> VirtualPath('/my/library/setup.py').suffix
|
||
'.py'
|
||
>>> VirtualPath('/my/library.tar.gz').suffix
|
||
'.gz'
|
||
>>> VirtualPath('/my/library').suffix
|
||
''
|
||
|
||
"""
|
||
name = self.name
|
||
pos = name.rfind('.')
|
||
return name[pos:] if pos != -1 else ''
|
||
|
||
@property
|
||
def suffixes(self):
|
||
"""A list of the path’s extensions.
|
||
|
||
>>> VirtualPath('/my/library/setup.py').suffixes
|
||
['.py']
|
||
>>> VirtualPath('/my/library.tar.gz').suffixes
|
||
['.tar', '.gz']
|
||
>>> VirtualPath('/my/library').suffixes
|
||
[]
|
||
|
||
"""
|
||
name = self.name
|
||
prevPos = len(name)
|
||
l = []
|
||
|
||
while True:
|
||
pos = name.rfind('.', 0, prevPos)
|
||
if pos == -1:
|
||
break
|
||
else:
|
||
l.insert(0, name[pos:prevPos])
|
||
prevPos = pos
|
||
|
||
return l
|
||
|
||
@property
|
||
def stem(self):
|
||
"""The final path component, without its suffix.
|
||
|
||
>>> VirtualPath('/my/library.tar.gz').stem
|
||
'library.tar'
|
||
>>> VirtualPath('/my/library.tar').stem
|
||
'library'
|
||
>>> VirtualPath('/my/library').stem
|
||
'library'
|
||
>>> VirtualPath('/').stem
|
||
''
|
||
|
||
"""
|
||
name = self.name
|
||
pos = name.rfind('.')
|
||
|
||
return name if pos == -1 else name[:pos]
|
||
|
||
def asRelative(self):
|
||
"""Return the virtual path without its leading '/'.
|
||
|
||
>>> p = VirtualPath('/usr/bin/python3')
|
||
>>> p.asRelative()
|
||
'usr/bin/python3'
|
||
|
||
>>> VirtualPath('').asRelative()
|
||
''
|
||
>>> VirtualPath('/').asRelative()
|
||
''
|
||
|
||
"""
|
||
assert self._path.startswith('/'), repr(self._path)
|
||
return self._path[1:]
|
||
|
||
def relativeTo(self, other):
|
||
"""Return the portion of this path that follows 'other'.
|
||
|
||
The return value is a string. If the operation is impossible,
|
||
ValueError is raised.
|
||
|
||
>>> VirtualPath('/etc/passwd').relativeTo('/')
|
||
'etc/passwd'
|
||
>>> VirtualPath('/etc/passwd').relativeTo('/etc')
|
||
'passwd'
|
||
|
||
"""
|
||
normedOther = self.normalizeStringPath(other)
|
||
|
||
if normedOther == '/':
|
||
return self._path[1:]
|
||
elif self._path.startswith(normedOther):
|
||
rest = self._path[len(normedOther):]
|
||
|
||
if rest.startswith('/'):
|
||
return rest[1:]
|
||
|
||
raise ValueError("{!r} does not start with '{}'".format(self, other))
|
||
|
||
def withName(self, newName):
|
||
"""Return a new VirtualPath instance with the 'name' part changed.
|
||
|
||
If the original path is '/' (which doesn’t have a name in the
|
||
sense of the 'name' property), ValueError is raised.
|
||
|
||
>>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
|
||
>>> p.withName('setup.py')
|
||
terrasync.virtual_path.VirtualPath('/foobar/downloads/setup.py')
|
||
|
||
"""
|
||
if self._path == '/':
|
||
raise ValueError("{!r} has an empty name".format(self))
|
||
else:
|
||
pos = self._path.rfind('/')
|
||
assert pos != -1, (pos, self._path)
|
||
|
||
if newName.startswith('/'):
|
||
raise ValueError("{!r} starts with a '/'".format(newName))
|
||
elif newName.endswith('/'):
|
||
raise ValueError("{!r} ends with a '/'".format(newName))
|
||
else:
|
||
return VirtualPath(self._path[:pos]) / newName
|
||
|
||
|
||
def withSuffix(self, newSuffix):
|
||
"""Return a new VirtualPath instance with the suffix changed.
|
||
|
||
If the original path doesn’t have a suffix, the new suffix is
|
||
appended:
|
||
|
||
>>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
|
||
>>> p.withSuffix('.bz2')
|
||
terrasync.virtual_path.VirtualPath('/foobar/downloads/pathlib.tar.bz2')
|
||
>>> p = VirtualPath('/foobar/README')
|
||
>>> p.withSuffix('.txt')
|
||
terrasync.virtual_path.VirtualPath('/foobar/README.txt')
|
||
|
||
If 'self' is the root virtual path ('/') or 'newSuffix' doesn't
|
||
start with '.', ValueError is raised.
|
||
|
||
"""
|
||
if not newSuffix.startswith('.'):
|
||
raise ValueError("new suffix {!r} doesn't start with '.'"
|
||
.format(newSuffix))
|
||
|
||
name = self.name
|
||
if not name:
|
||
raise ValueError("{!r} has an empty 'name' part".format(self))
|
||
|
||
pos = name.rfind('.')
|
||
|
||
if pos == -1:
|
||
return self.withName(name + newSuffix) # append suffix
|
||
else:
|
||
return self.withName(name[:pos] + newSuffix) # replace suffix
|
||
|
||
|
||
class MutableVirtualPath(VirtualPath):
|
||
|
||
"""Mutable subclass of VirtualPath.
|
||
|
||
Contrary to VirtualPath objects, instances of this class can be
|
||
modified in-place with the /= operator, in order to append path
|
||
components. The price to pay for this advantage is that they can't
|
||
be used as dictionary keys or as elements of a set or frozenset,
|
||
because they are not hashable.
|
||
|
||
"""
|
||
|
||
__hash__ = None # ensure the type is not hashable
|
||
|
||
def _normalize(self):
|
||
self._path = self.normalizeStringPath(self._path)
|
||
|
||
def __itruediv__(self, s):
|
||
"""Path concatenation with the '/=' operator.
|
||
|
||
's' must be a string representing a relative path using the '/'
|
||
separator, for instance "dir/subdir/other-subdir".
|
||
|
||
"""
|
||
# This check could of course be skipped if it is found to really affect
|
||
# performance.
|
||
self._check()
|
||
assert not (s.startswith('/') or s.endswith('/')), repr(s)
|
||
|
||
if self._path == '/':
|
||
self._path += s
|
||
else:
|
||
self._path += '/' + s
|
||
|
||
# Collapse multiple slashes, remove trailing '/' except if the whole
|
||
# path is '/', etc.
|
||
self._normalize()
|
||
|
||
return self
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# The doctest setup below works, but for full test coverage, use the
|
||
# unittest framework (it is set up to automatically run all doctests from
|
||
# this module!).
|
||
#
|
||
# Hint: 'python3 -m unittest discover' from the TerraSync directory
|
||
# should do the trick.
|
||
import doctest
|
||
doctest.testmod()
|