voussoirkit/voussoirkit/pathclass.py

790 lines
23 KiB
Python

import glob
import os
import re
import shutil
_glob = glob
from voussoirkit import winglob
if os.name == 'nt':
SEPS = {'\\', '/'}
else:
SEPS = {'/'}
WINDOWS_GLOBAL_BADCHARS = {'*', '?', '<', '>', '|', '"'}
WINDOWS_BASENAME_BADCHARS = {'\\', '/', ':', '*', '?', '<', '>', '|', '"'}
WINDOWS_RESERVED_NAMES = {
'AUX',
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
'CON',
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9',
'NUL',
'PRN',
}
class PathclassException(Exception):
pass
class Exists(PathclassException):
pass
class IsFile(PathclassException):
pass
class IsDirectory(PathclassException):
pass
class IsLink(PathclassException):
pass
class NotDirectory(PathclassException):
pass
class NotExists(PathclassException):
pass
class NotEnoughSpace(PathclassException):
def __init__(self, free, reserve, path):
self.free = free
self.reserve = reserve
self.path = path
self.args = (f'Only {free} available of requested {reserve}.',)
class NotFile(PathclassException):
pass
class NotLink(PathclassException):
pass
class Drive:
'''
The Drive part will contain everything up to but not including the final
slash. On Unix this will usually just be '', on Windows it will be the
drive letter 'C:' or the UNC path '\\\\?\\host'
'''
def __init__(self, name):
name = name.rstrip(os.sep)
self._name = name
def __eq__(self, other):
return self._name == other._name
class Extension:
def __init__(self, ext):
if isinstance(ext, Extension):
ext = ext.ext
ext = self.prep(ext)
self.ext = ext
@staticmethod
def prep(ext):
return os.path.normcase(ext).lstrip('.')
def __bool__(self):
return bool(self.ext)
def __eq__(self, other):
if isinstance(other, Extension):
return self.ext == other.ext
other = self.prep(other)
return self.ext == other
def __hash__(self):
return hash(self.ext)
def __repr__(self):
return f'Extension({repr(self.ext)})'
def __str__(self):
return self.ext
@property
def no_dot(self):
return self.ext
@property
def with_dot(self):
if self.ext == '':
return ''
return '.' + self.ext
class Path:
def __init__(
self,
path,
*,
_case_correct=False,
):
'''
_case_correct:
True or False. If True, this indicates that the path casing is
known in advance to be correct, which means calls to correct_case
can be skipped. This is helpful because correct_case can be a
source of slowdown.
'''
self._case_correct = _case_correct
self._absolute_path = None
self._extension = None
if isinstance(path, Path):
self._parts = path._parts
self._absolute_path = path._absolute_path
self._extension = path._extension
return
if isinstance(path, (tuple, list)):
if len(path) == 0:
raise ValueError('Empty tuple')
drive = normalize_drive(path[0])
parts = tuple(normalize_pathpart(part) for part in path[1:])
self._parts = (drive, *parts)
return
path = os.fspath(path)
if isinstance(path, str):
path = os.path.expanduser(path)
path = os.path.abspath(path)
self._absolute_path = path
(drive, remainder) = os.path.splitdrive(path)
drive = normalize_drive(drive)
remainder = remainder.lstrip(os.sep)
# If remainder == '' then splitting it will yield [''] which we
# don't want in our parts.
if remainder:
parts = (normalize_pathpart(part) for part in remainder.split(os.sep))
self._parts = (drive, *parts)
else:
self._parts = (drive,)
return
raise TypeError(f'path must be {Path}, {tuple} or {str}, not {type(path)}.')
def __contains__(self, other):
if not isinstance(other, Path):
other = Path(other)
# If other is a child of self, then other._parts must be at least as
# long as self._parts plus one.
if len(self._parts) >= len(other._parts):
return False
# Compare by normcase so that Windows's case-insensitive filenames
# behave correctly.
# It would be fitting to do this check using ._parts, but we would
# have to normcase each part anyway so let's just do the whole string
# at once.
return other.normcase.startswith(self.normcase)
def __eq__(self, other):
if not isinstance(other, (Path, str, tuple, list)):
try:
other = os.fspath(other)
except TypeError:
return False
if not isinstance(other, Path):
other = Path(other)
# Compare by normcase so that Windows's case-insensitive filenames
# behave correctly.
return self.normcase == other.normcase
def __fspath__(self):
return self.absolute_path
def __hash__(self):
return hash(self.normcase)
def __lt__(self, other):
# Sort by normcase so that Windows's case-insensitive filenames sort
# alphabetically regardless of case.
return self.normcase < other.normcase
def __repr__(self):
return f'{self.__class__.__name__}({repr(self.absolute_path)})'
@property
def absolute_path(self):
if self._absolute_path is not None:
return self._absolute_path
# This ensures that if this Path is just the drive, it will end with
# the sep, and all other paths do not end with the sep.
drive = self._parts[0]
parts = self._parts[1:]
absolute = drive._name + os.sep + os.sep.join(part._name for part in parts)
self._absolute_path = absolute
return self._absolute_path
def assert_disk_space(self, reserve):
free = shutil.disk_usage(self).free
if free < reserve:
raise NotEnoughSpace(path=self, reserve=reserve, free=free)
return free
# ASSERTS
def assert_exists(self):
if not self.exists:
raise NotExists(self)
def assert_not_exists(self):
if self.exists:
raise Exists(self)
#
def assert_is_file(self):
if not self.is_file:
raise NotFile(self)
def assert_not_file(self):
if self.is_file:
raise IsFile(self)
#
def assert_is_directory(self):
if not self.is_dir:
raise NotDirectory(self)
assert_is_dir = assert_is_directory
def assert_not_directory(self):
if self.is_dir:
raise IsDirectory(self)
assert_not_dir = assert_not_directory
#
def assert_is_link(self):
if not self.is_link:
raise NotLink(self)
def assert_not_link(self):
if self.is_link:
raise IsLink(self)
#
def add_extension(self, extension):
extension = Extension(extension)
if extension == '':
return self
return self.parent.with_child(self.basename + extension.with_dot)
@property
def basename(self):
return self._parts[-1]._name
def correct_case(self):
if self._case_correct:
return self
absolute_path = get_path_casing(self.absolute_path)
self.__init__(absolute_path, _case_correct=True)
return self
@property
def depth(self):
return len(self._parts)
@property
def dot_extension(self):
return self.extension.with_dot
@property
def drive(self):
return Path([self._parts[0]])
@property
def exists(self):
return os.path.exists(self)
@property
def extension(self):
if self._extension is not None:
return self._extension
# Let's consider bare drives to not have an extension.
if len(self._parts) == 1:
self._extension = ''
return self._extension
self._extension = Extension(os.path.splitext(self.basename)[1])
return self._extension
def glob(self, pattern):
'''
Return Paths that match a glob pattern within this directory.
'''
pattern = normalize_basename_glob(pattern)
trychild = self.with_child(pattern)
if trychild.exists:
return [trychild.correct_case()]
# By sidestepping the glob function and going straight for fnmatch
# filter, we have slightly different behavior than normal, which is
# that glob.glob treats .* as hidden files and won't match them with
# patterns that don't also start with .*.
children = os.listdir(self)
children = winglob.fnmatch_filter(children, pattern)
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
def glob_directories(self, pattern):
pattern = normalize_basename_glob(pattern)
trychild = self.with_child(pattern)
if trychild.is_directory:
return [trychild.correct_case()]
# Instead of turning all children into Path objects and filtering by
# the stat, let's filter by the stat from scandir first.
children = (e.name for e in os.scandir(self) if e.is_dir())
children = winglob.fnmatch_filter(children, pattern)
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
def glob_files(self, pattern):
pattern = normalize_basename_glob(pattern)
trychild = self.with_child(pattern)
if trychild.is_file:
return [trychild.correct_case()]
children = (e.name for e in os.scandir(self) if e.is_file())
children = winglob.fnmatch_filter(children, pattern)
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
@property
def is_directory(self):
return os.path.isdir(self)
# Aliases for your convenience.
is_dir = is_directory
is_folder = is_directory
@property
def is_file(self):
return os.path.isfile(self)
@property
def is_junction(self):
try:
return bool(os.readlink(self))
except (FileNotFoundError, OSError):
return False
@property
def is_link(self):
return self.is_symlink or self.is_junction
@property
def is_symlink(self):
return os.path.islink(self)
def join(self, subpath, **spawn_kwargs):
'''
Use os.path.join to join this path with any other path string.
'''
if not isinstance(subpath, str):
raise TypeError(f'subpath must be a {str}, not {type(subpath)}.')
path = os.path.join(self.absolute_path, subpath)
return Path(path, **spawn_kwargs)
def listdir(self):
children = os.listdir(self)
children = [self.with_child(child, _case_correct=self._case_correct) for child in children]
return children
def listdir_directories(self):
children = (e.name for e in os.scandir(self) if e.is_dir())
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
def listdir_files(self):
children = (e.name for e in os.scandir(self) if e.is_file())
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
def makedirs(self, mode=0o777, exist_ok=False):
return os.makedirs(self, mode=mode, exist_ok=exist_ok)
@property
def normcase(self):
return os.path.normcase(self.absolute_path)
def open(self, *args, **kwargs):
return open(self, *args, **kwargs)
@property
def parent(self):
if len(self._parts) == 1:
return self
return Path(self._parts[:-1], _case_correct=self._case_correct)
def read(self, mode, **kwargs):
'''
Shortcut function for opening the file handle and reading data from it.
'''
with self.open(mode, **kwargs) as handle:
return handle.read()
def readlines(self, mode, **kwargs):
'''
Shortcut function for opening the file handle and reading lines from it.
'''
with self.open(mode, **kwargs) as handle:
return handle.readlines()
@property
def relative_path(self):
return self.relative_to(cwd())
def relative_to(self, other, simple=False):
if not isinstance(other, Path):
other = Path(other)
if self == other:
return '.'
if self in other:
sub_parts = self._parts[len(other._parts):]
relative = os.sep.join(part._name for part in sub_parts)
if simple:
return relative
else:
return f'.{os.sep}{relative}'
common = common_path([self, other], fallback=None)
if common is None:
return self.absolute_path
backsteps = other.depth - common.depth
backsteps = os.sep.join('..' for x in range(backsteps))
unique = [part._name for part in self._parts[common.depth:]]
relative_path = os.path.join(backsteps, *unique)
return relative_path
def replace_extension(self, extension):
'''
Return a new Path that has the same basename as this one, but with a
different extension. If this Path does not have any extension, it is
added.
'''
extension = Extension(extension)
base = os.path.splitext(self.basename)[0]
if extension == '':
return self.parent.with_child(base)
return self.parent.with_child(base + extension.with_dot)
@property
def size(self):
self.assert_exists()
if self.is_file:
return os.path.getsize(self)
elif self.is_dir:
return sum(file.size for file in self.walk() if file.is_file)
@property
def stat(self):
return os.stat(self)
def touch(self):
'''
Update the file's mtime if it exists, or create it.
'''
try:
os.utime(self)
except FileNotFoundError:
self.open('a').close()
def walk(self):
'''
Yield files and directories from this directory and subdirectories.
'''
directories = []
entries = os.scandir(self)
entries = sorted(entries, key=lambda e: os.path.normcase(e.name))
for entry in entries:
child = self.with_child(entry.name, _case_correct=self._case_correct)
if entry.is_dir():
directories.append(child)
else:
yield child
for directory in directories:
yield directory
yield from directory.walk()
def walk_directories(self):
'''
Yield directories from this directory and subdirectories.
'''
entries = os.scandir(self)
entries = sorted(entries, key=lambda e: os.path.normcase(e.name))
for entry in entries:
if entry.is_dir():
child = self.with_child(entry.name, _case_correct=self._case_correct)
yield child
yield from child.walk_directories()
def walk_files(self):
'''
Yield files from this directory and subdirectories.
'''
# It would be nice to optimize this to not create Path objects for the
# directories since we don't yield them, but it's cheaper to do many
# directory.with_child(file) than it is to instantiate each file from
# the path string anyway.
return (item for item in self.walk() if item.is_file)
def with_child(self, basename, **spawn_kwargs):
if not isinstance(basename, str):
raise TypeError(f'basename must be {str}, not {type(basename)}.')
parts = (*self._parts, basename)
return Path(parts, **spawn_kwargs)
def write(self, mode, data, **kwargs):
'''
Shortcut function for opening the file handle and writing data into it.
'''
with self.open(mode, **kwargs) as handle:
return handle.write(data)
class PathPart:
'''
The PathPart is any part after the drive. Each individual part must not
contain path separators, those will be added when we join the tuple of
parts back into a string.
'''
def __init__(self, name):
if any(sep in name for sep in SEPS):
raise ValueError('A path part cannot contain path separators.')
self._name = name
def common_path(paths, fallback):
'''
Given a list of paths, determine the deepest path which all have in common.
'''
if isinstance(paths, (str, Path)):
raise TypeError('`paths` must be a collection')
paths = [Path(f) for f in paths]
if len(paths) == 0:
raise ValueError('Empty list')
index = 0
while True:
try:
this_level = set(os.path.normcase(path._parts[index]._name) for path in paths)
except IndexError:
break
if len(this_level) > 1:
break
index += 1
if index == 0:
return fallback
parts = paths[0]._parts[:index]
return Path(parts)
def cwd():
return Path(os.getcwd())
def get_path_casing(path):
'''
Take what is perhaps incorrectly cased input and get the path's actual
casing according to the filesystem.
Thank you:
Ethan Furman http://stackoverflow.com/a/7133137/5430534
xvorsx http://stackoverflow.com/a/14742779/5430534
'''
if not isinstance(path, Path):
path = Path(path)
# Nonexistent paths don't glob correctly. If the input is a nonexistent
# subpath of an existing path, we have to glob the existing portion first,
# and then attach the fake portion again at the end.
input_path = path
while not path.exists:
parent = path.parent
if path == parent:
# We're stuck at a fake root.
return input_path.absolute_path
path = parent
path = path.absolute_path
(drive, subpath) = os.path.splitdrive(path)
drive = drive.upper()
subpath = subpath.lstrip(os.sep)
pattern = [glob_patternize(piece) for piece in subpath.split(os.sep)]
pattern = os.sep.join(pattern)
pattern = drive + os.sep + pattern
try:
cased = _glob.glob(pattern)[0]
except IndexError:
return input_path.absolute_path
imaginary_portion = input_path.absolute_path
imaginary_portion = imaginary_portion[len(cased):]
imaginary_portion = imaginary_portion.lstrip(os.sep)
cased = os.path.join(cased, imaginary_portion)
cased = cased.rstrip(os.sep)
if os.sep not in cased:
cased += os.sep
return cased
def glob(pattern):
'''
Just like regular glob, except it returns Path objects instead of strings.
If you want to recurse, consider using spinal.walk with glob_filenames
instead.
'''
if pattern == '.':
return [cwd()]
elif pattern == '..':
return [cwd().parent]
trypath = Path(pattern)
if trypath.exists:
return [trypath.correct_case()]
(dirname, pattern) = os.path.split(pattern)
return Path(dirname).glob(pattern)
def glob_directories(pattern):
trypath = Path(pattern)
if trypath.is_directory:
return [trypath.correct_case()]
(dirname, pattern) = os.path.split(pattern)
return Path(dirname).glob_directories(pattern)
def glob_files(pattern):
trypath = Path(pattern)
if trypath.is_file:
return [trypath.correct_case()]
(dirname, pattern) = os.path.split(pattern)
return Path(dirname).glob_files(pattern)
def glob_many(patterns):
'''
Given many glob patterns, yield the results as a single generator.
Saves you from having to write the nested loop.
If you want to recurse, consider using spinal.walk(glob_filenames=[...])
instead. The important difference between this function and spinal.walk is
that spinal.walk starts from a root directory and looks for descendants
that match the glob. This function can take patterns with no common root.
'''
for pattern in patterns:
yield from glob(pattern)
def glob_many_directories(patterns):
for pattern in patterns:
yield from glob_directories(pattern)
def glob_many_files(patterns):
for pattern in patterns:
yield from glob_files(pattern)
def glob_patternize(piece):
'''
Create a pattern like "[u]ser" from "user". This forces glob to look up the
correct path name, while guaranteeing that the only result will be
the correct path.
Special cases are:
`!`
because in glob syntax, [!x] tells glob to look for paths that
don't contain "x", and [!] is invalid syntax.
`[`, `]`
because this starts a glob capture group
so we pick the first non-special character to put in the brackets.
If the path consists entirely of these special characters, then the
casing doesn't need to be corrected anyway.
'''
piece = _glob.escape(piece)
for character in piece:
if character not in '![]':
replacement = f'[{character}]'
piece = piece.replace(character, replacement, 1)
break
return piece
def natural_sorter(path):
'''
This function is used as the `key` argument in
paths.sort(key=pathclass.natural_sorter).
Used for sorting strings in 'natural' order instead of lexicographic order,
so that you get 1 2 3 4 5 6 7 8 9 10 11 12 13 ...
instead of 1 10 11 12 13 2 3 4 5 ...
Thank you Mark Byers
http://stackoverflow.com/a/11150413
'''
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key.absolute_path)]
return alphanum_key(path)
def normalize_drive(name):
if type(name) is Drive:
return name
return Drive(name)
def normalize_basename_glob(pattern):
pattern = os.path.normpath(pattern)
if os.sep in pattern:
# If the user wants to glob names in a different path, they should
# create a Pathclass for that directory first and do it normally.
raise TypeError('glob pattern should not have path separators.')
if not pattern:
raise ValueError('glob pattern should not be empty.')
return pattern
def normalize_pathpart(name):
if type(name) is PathPart:
return name
return PathPart(name)
def normalize_sep(path) -> str:
'''
Normalize path separators as appropriate for the operating system.
On Windows, forward slash / is replaced with backslash \\.
Note: os.path.normpath also performs separator normalization, but it also
eliminates leading ./ which you may want to keep in your string.
'''
if os.name == 'nt':
path = path.replace('/', '\\')
# On unix, backslashes are valid filename characters so we do not normalize
# them to forward slash.
return path
def system_root():
return Path(os.sep)