Rewrite a lot of pathclass, spinal.walk using tuple-based Path.

I was inspired by the idea of "making impossible states impossible"
and using a data model that accurately represents what we intend for
it to represent. Instead of storing the path as a string where "it's
a string but actually you're supposed to know that the parts between
os.seps are different parts and the first one is special and...", we
can use a data model that directly says that. Storing the path as a
tuple of (Drive, Part, Part) helps me focus on the semantics of the
Path as a collection of parts joined by the os.sep.

Furthermore, storing the path as a string made some operations slow.
Every time we call one of the os.path functions with a string, it
has to do a lot of normalization and edge-case handling even when we
know it wouldn't be needed. By storing the path as a tuple, we can
instantly get the drive name, parent dir name, and basename without
asking os.path to split it for us every single time. It also makes
relative path / common ancestor checks a lot easier to understand.
Fewer operations need to go into the slow functions.
This commit is contained in:
voussoir 2021-11-30 20:14:13 -08:00
parent 7a48a7b69c
commit b288cca519
No known key found for this signature in database
GPG key ID: 5F7554F8C26DACCB
3 changed files with 288 additions and 163 deletions

View file

@ -1,11 +1,15 @@
import glob import glob
import os import os
import re
_glob = glob _glob = glob
from voussoirkit import winglob from voussoirkit import winglob
if os.name == 'nt':
SEPS = {'\\', '/'}
else:
SEPS = {'/'}
WINDOWS_GLOBAL_BADCHARS = {'*', '?', '<', '>', '|', '"'} WINDOWS_GLOBAL_BADCHARS = {'*', '?', '<', '>', '|', '"'}
WINDOWS_BASENAME_BADCHARS = {'\\', '/', ':', '*', '?', '<', '>', '|', '"'} WINDOWS_BASENAME_BADCHARS = {'\\', '/', ':', '*', '?', '<', '>', '|', '"'}
WINDOWS_RESERVED_NAMES = { WINDOWS_RESERVED_NAMES = {
@ -44,6 +48,14 @@ class NotFile(PathclassException):
class NotLink(PathclassException): class NotLink(PathclassException):
pass pass
class Drive:
def __init__(self, name):
name = name.rstrip(os.sep)
self._name = name
def __eq__(self, other):
return self._name == other._name
class Extension: class Extension:
def __init__(self, ext): def __init__(self, ext):
if isinstance(ext, Extension): if isinstance(ext, Extension):
@ -88,57 +100,85 @@ class Path:
self, self,
path, path,
*, *,
force_sep=None,
_case_correct=False, _case_correct=False,
): ):
''' '''
force_sep:
Normally, the pathclass will use the default separator for your
operating system: / on unix and \\ on windows. You can use this
argument to force a particular separator.
_case_correct: _case_correct:
True or False. If True, this indicates that the path casing is True or False. If True, this indicates that the path casing is
known in advance to be correct, which means calls to correct_case known in advance to be correct, which means calls to correct_case
can be skipped. This is helpful because correct_case can be a can be skipped. This is helpful because correct_case can be a
source of slowdown. source of slowdown.
''' '''
self.force_sep = force_sep
self.sep = force_sep or os.sep
self._case_correct = _case_correct self._case_correct = _case_correct
self._absolute_path = None
self._extension = None
if isinstance(path, Path): if isinstance(path, Path):
self._absolute_path = path.absolute_path self._parts = path._parts
self._absolute_path = path._absolute_path
self._extension = path._extension
return return
if not isinstance(path, str): if isinstance(path, (tuple, list)):
raise TypeError(f'path must be {Path} or {str}, not {type(path)}.') if len(path) == 0:
raise ValueError('Empty tuple')
drive = normalize_drive(path[0])
parts = tuple(normalize_pathpart(part) for part in path[1:])
self._parts = (drive, *parts)
return
path = path.strip() path = os.fspath(path)
if re.match(r'^[A-Za-z]:$', path):
# Bare Windows drive letter. if isinstance(path, str):
path += self.sep path = os.path.abspath(path)
path = normalize_sep(path) self._absolute_path = path
path = os.path.normpath(path) (drive, remainder) = os.path.splitdrive(path)
absolute_path = os.path.abspath(path) drive = normalize_drive(drive)
self._absolute_path = normalize_sep(absolute_path, self.sep) remainder = remainder.lstrip(os.sep)
# If remainder == '' then splitting it will yield [''] which we
# don't want in our parts.
if remainder:
parts = (normalize_pathpart(part) for part in remainder.split(os.sep))
self._parts = (drive, *parts)
else:
self._parts = (drive,)
return
raise TypeError(f'path must be {Path}, {tuple} or {str}, not {type(path)}.')
def __contains__(self, other): def __contains__(self, other):
other = self.spawn(other) if not isinstance(other, Path):
other = Path(other)
self_norm = self.normcase # If other is a child of self, then other._parts must be at least as
if not self_norm.endswith(self.sep): # long as self._parts plus one.
self_norm += self.sep if len(self._parts) >= len(other._parts):
return other.normcase.startswith(self_norm) return False
# Compare by normcase so that Windows's case-insensitive filenames
# behave correctly.
# It would be fitting to do this check using ._parts, but we would
# have to normcase each part anyway so let's just do the whole string
# at once.
return other.normcase.startswith(self.normcase)
def __eq__(self, other): def __eq__(self, other):
if not hasattr(other, 'absolute_path'): if not isinstance(other, (Path, str, tuple, list)):
return False try:
other = os.fspath(other)
except TypeError:
return False
if not isinstance(other, Path):
other = Path(other)
# Compare by normcase so that Windows's case-insensitive filenames # Compare by normcase so that Windows's case-insensitive filenames
# behave correctly. # behave correctly.
return self.normcase == other.normcase return self.normcase == other.normcase
def __fspath__(self):
return self.absolute_path
def __hash__(self): def __hash__(self):
return hash(self.normcase) return hash(self.normcase)
@ -148,10 +188,19 @@ class Path:
return self.normcase < other.normcase return self.normcase < other.normcase
def __repr__(self): def __repr__(self):
return '{c}({path})'.format(c=self.__class__.__name__, path=repr(self.absolute_path)) return f'{self.__class__.__name__}({repr(self.absolute_path)})'
@property @property
def absolute_path(self): def absolute_path(self):
if self._absolute_path is not None:
return self._absolute_path
# This ensures that if this Path is just the drive, it will end with
# the sep, and all other paths do not end with the sep.
drive = self._parts[0]
parts = self._parts[1:]
absolute = drive._name + os.sep + os.sep.join(part._name for part in parts)
self._absolute_path = absolute
return self._absolute_path return self._absolute_path
def assert_exists(self): def assert_exists(self):
@ -198,19 +247,18 @@ class Path:
@property @property
def basename(self): def basename(self):
return os.path.basename(self.absolute_path) return self._parts[-1]._name
def correct_case(self): def correct_case(self):
if self._case_correct: if self._case_correct:
return self return self
absolute_path = get_path_casing(self._absolute_path) absolute_path = get_path_casing(self.absolute_path)
self._absolute_path = normalize_sep(absolute_path, self.sep) self.__init__(absolute_path, _case_correct=True)
self._case_correct = True
return self return self
@property @property
def depth(self): def depth(self):
return len(self.absolute_path.rstrip(self.sep).split(self.sep)) return len(self._parts)
@property @property
def dot_extension(self): def dot_extension(self):
@ -218,10 +266,7 @@ class Path:
@property @property
def drive(self): def drive(self):
drive = os.path.splitdrive(self.absolute_path)[0] return Path([self._parts[0]])
if not drive.endswith(self.sep):
drive += self.sep
return self.spawn(drive)
@property @property
def exists(self): def exists(self):
@ -229,17 +274,41 @@ class Path:
@property @property
def extension(self): def extension(self):
return Extension(os.path.splitext(self.absolute_path)[1]) if self._extension is not None:
return self._extension
# Let's consider bare drives to not have an extension.
if len(self._parts) == 1:
self._extension = ''
return self._extension
self._extension = Extension(os.path.splitext(self.basename)[1])
return self._extension
def glob(self, pattern): def glob(self, pattern):
if '/' in pattern or '\\' in pattern: '''
Return Paths that match a glob pattern within this directory.
'''
pattern = os.path.normpath(pattern)
if os.sep in pattern:
# If the user wants to glob names in a different path, they should # If the user wants to glob names in a different path, they should
# create a Pathclass for that directory first and do it normally. # create a Pathclass for that directory first and do it normally.
raise TypeError('glob pattern should not have path separators') raise TypeError('glob pattern should not have path separators.')
pattern = os.path.join(self.absolute_path, pattern)
children = winglob.glob(pattern) if not pattern:
children = [self.with_child(child) for child in children] raise ValueError('glob pattern should not be empty.')
return children
# I would like to rewrite this using listdir + fnmatch.filter so we can
# get straight to the basenames, but I need to learn what corner cases
# are handled by glob for us before I do so.
pattern_root = f'{self.absolute_path}{os.sep}'
cut_length = len(pattern_root)
pattern = f'{pattern_root}{pattern}'
items = winglob.glob(pattern)
basenames = (item[cut_length:] for item in items)
items = [self.with_child(item, _case_correct=self._case_correct) for item in basenames]
return items
def glob_directories(self, pattern): def glob_directories(self, pattern):
return [p for p in self.glob(pattern) if p.is_dir] return [p for p in self.glob(pattern) if p.is_dir]
@ -264,14 +333,17 @@ class Path:
return os.path.islink(self.absolute_path) return os.path.islink(self.absolute_path)
def join(self, subpath, **spawn_kwargs): def join(self, subpath, **spawn_kwargs):
'''
Use os.path.join to join this path with any other path string.
'''
if not isinstance(subpath, str): if not isinstance(subpath, str):
raise TypeError('subpath must be a string') raise TypeError(f'subpath must be a {str}, not {type(subpath)}.')
path = os.path.join(self.absolute_path, subpath) path = os.path.join(self.absolute_path, subpath)
return self.spawn(path, **spawn_kwargs) return Path(path, **spawn_kwargs)
def listdir(self): def listdir(self):
children = os.listdir(self.absolute_path) children = os.listdir(self.absolute_path)
children = [self.join(child, _case_correct=self._case_correct) for child in children] children = [self.with_child(child, _case_correct=self._case_correct) for child in children]
return children return children
def listdir_directories(self): def listdir_directories(self):
@ -285,17 +357,17 @@ class Path:
@property @property
def normcase(self): def normcase(self):
norm = os.path.normcase(self.absolute_path) return os.path.normcase(self.absolute_path)
norm = norm.replace('/', self.sep).replace('\\', self.sep)
return norm
def open(self, *args, **kwargs): def open(self, *args, **kwargs):
return open(self.absolute_path, *args, **kwargs) return open(self.absolute_path, *args, **kwargs)
@property @property
def parent(self): def parent(self):
parent = os.path.dirname(self.absolute_path) if len(self._parts) == 1:
return self.spawn(parent) return self
return Path(self._parts[:-1], _case_correct=self._case_correct)
def read(self, mode, **kwargs): def read(self, mode, **kwargs):
''' '''
@ -313,43 +385,40 @@ class Path:
@property @property
def relative_path(self): def relative_path(self):
return self.relative_to(os.getcwd()) return self.relative_to(cwd())
def relative_to(self, other, simple=False): def relative_to(self, other, simple=False):
if isinstance(other, str): if not isinstance(other, Path):
other = Path(other) other = Path(other)
if self == other: if self == other:
return '.' return '.'
self.correct_case()
other.correct_case()
if self in other: if self in other:
relative = self.absolute_path sub_parts = self._parts[len(other._parts):]
relative = relative.replace(other.absolute_path, '', 1) relative = os.sep.join(part._name for part in sub_parts)
relative = relative.lstrip(self.sep) if simple:
if not simple: return relative
relative = '.' + self.sep + relative else:
return relative return f'.{os.sep}{relative}'
common = common_path([other.absolute_path, self.absolute_path], fallback=None) common = common_path([self, other], fallback=None)
if common is None: if common is None:
return self.absolute_path return self.absolute_path
common = self.spawn(common)
backsteps = other.depth - common.depth backsteps = other.depth - common.depth
backsteps = self.sep.join('..' for x in range(backsteps)) backsteps = os.sep.join('..' for x in range(backsteps))
common = common.absolute_path unique = [part._name for part in self._parts[common.depth:]]
if not common.endswith(self.sep): relative_path = os.path.join(backsteps, *unique)
common += self.sep
unique = self.absolute_path.replace(common, '', 1)
relative_path = os.path.join(backsteps, unique)
relative_path = relative_path.replace('/', self.sep).replace('\\', self.sep)
return relative_path return relative_path
def replace_extension(self, extension): def replace_extension(self, extension):
'''
Return a new Path that has the same basename as this one, but with a
different extension. If this Path does not have any extension, it is
added.
'''
extension = Extension(extension) extension = Extension(extension)
base = os.path.splitext(self.basename)[0] base = os.path.splitext(self.basename)[0]
@ -366,23 +435,29 @@ class Path:
elif self.is_dir: elif self.is_dir:
return sum(file.size for file in self.walk() if file.is_file) return sum(file.size for file in self.walk() if file.is_file)
def spawn(self, path, **kwargs):
return self.__class__(path, force_sep=self.force_sep, **kwargs)
@property @property
def stat(self): def stat(self):
return os.stat(self.absolute_path) return os.stat(self.absolute_path)
def touch(self): def touch(self):
'''
Update the file's mtime if it exists, or create it.
'''
try: try:
os.utime(self.absolute_path) os.utime(self.absolute_path)
except FileNotFoundError: except FileNotFoundError:
self.open('a').close() self.open('a').close()
def walk(self): def walk(self):
'''
Yield files and directories from this directory and subdirectories.
'''
directories = [] directories = []
for child in self.listdir():
if child.is_dir: entries = os.scandir(self.absolute_path)
for entry in entries:
child = self.with_child(entry.name, _case_correct=self._case_correct)
if entry.is_dir():
directories.append(child) directories.append(child)
else: else:
yield child yield child
@ -391,8 +466,9 @@ class Path:
yield directory yield directory
yield from directory.walk() yield from directory.walk()
def with_child(self, basename): def with_child(self, basename, **spawn_kwargs):
return self.join(os.path.basename(basename)) parts = (*self._parts, basename)
return Path(parts, **spawn_kwargs)
def write(self, mode, data, **kwargs): def write(self, mode, data, **kwargs):
''' '''
@ -401,10 +477,15 @@ class Path:
with self.open(mode, **kwargs) as handle: with self.open(mode, **kwargs) as handle:
return handle.write(data) return handle.write(data)
class PathPart:
def __init__(self, name):
if any(sep in name for sep in SEPS):
raise ValueError('A path part cannot contain path separators.')
self._name = name
def common_path(paths, fallback): def common_path(paths, fallback):
''' '''
Given a list of file paths, determine the deepest path which all Given a list of paths, determine the deepest path which all have in common.
have in common.
''' '''
if isinstance(paths, (str, Path)): if isinstance(paths, (str, Path)):
raise TypeError('`paths` must be a collection') raise TypeError('`paths` must be a collection')
@ -414,21 +495,21 @@ def common_path(paths, fallback):
if len(paths) == 0: if len(paths) == 0:
raise ValueError('Empty list') raise ValueError('Empty list')
if hasattr(paths, 'pop'): index = 0
model = paths.pop()
else:
model = paths[0]
paths = paths[1:]
while True: while True:
if all(f in model for f in paths): try:
return model this_level = set(os.path.normcase(path._parts[index]._name) for path in paths)
parent = model.parent except IndexError:
if parent == model: break
# We just processed the root, and now we're stuck at the root. if len(this_level) > 1:
# Which means there was no common path. break
return fallback index += 1
model = parent
if index == 0:
return fallback
parts = paths[0]._parts[:index]
return Path(parts)
def cwd(): def cwd():
return Path(os.getcwd()) return Path(os.getcwd())
@ -541,12 +622,15 @@ def glob_patternize(piece):
break break
return piece return piece
def normalize_sep(path, sep=None): def normalize_drive(name):
sep = sep or os.sep if type(name) is Drive:
path = path.replace('/', sep) return name
path = path.replace('\\', sep) return Drive(name)
return path def normalize_pathpart(name):
if type(name) is PathPart:
return name
return PathPart(name)
def system_root(): def system_root():
return os.path.abspath(os.sep) return Path(os.sep)

View file

@ -2,6 +2,7 @@
This module provides functions related to walking the filesystem and This module provides functions related to walking the filesystem and
copying files and folders. copying files and folders.
''' '''
import collections
import hashlib import hashlib
import os import os
import shutil import shutil
@ -20,6 +21,8 @@ from voussoirkit import winglob
log = vlogging.getLogger(__name__) log = vlogging.getLogger(__name__)
BAIL = sentinel.Sentinel('BAIL') BAIL = sentinel.Sentinel('BAIL')
YIELD_STYLE_FLAT = sentinel.Sentinel('yield style flat')
YIELD_STYLE_NESTED = sentinel.Sentinel('yield style nested')
# Number of bytes to read and write at a time # Number of bytes to read and write at a time
CHUNK_SIZE = 2 * bytestring.MIBIBYTE CHUNK_SIZE = 2 * bytestring.MIBIBYTE
@ -123,7 +126,7 @@ def copy_directory(
Passed into each `copy_file` as `callback_progress`. Passed into each `copy_file` as `callback_progress`.
callback_permission_denied: callback_permission_denied:
Passed into each `copy_file` as `callback_permission_denied`. Passed into `walk` and each `copy_file` as `callback_permission_denied`.
callback_pre_directory: callback_pre_directory:
This function will be called before each directory and subdirectory This function will be called before each directory and subdirectory
@ -237,9 +240,11 @@ def copy_directory(
# Copy # Copy
walker = walk( walker = walk(
source, source,
callback_permission_denied=callback_permission_denied,
exclude_directories=exclude_directories, exclude_directories=exclude_directories,
exclude_filenames=exclude_filenames, exclude_filenames=exclude_filenames,
yield_style='nested', sort=True,
yield_style=YIELD_STYLE_NESTED,
) )
def denester(walker): def denester(walker):
@ -773,9 +778,10 @@ def walk(
glob_directories=None, glob_directories=None,
glob_filenames=None, glob_filenames=None,
recurse=True, recurse=True,
sort=False,
yield_directories=False, yield_directories=False,
yield_files=True, yield_files=True,
yield_style='flat', yield_style=YIELD_STYLE_FLAT,
): ):
''' '''
Yield pathclass.Path objects for files in the tree, similar to os.walk. Yield pathclass.Path objects for files in the tree, similar to os.walk.
@ -805,7 +811,16 @@ def walk(
at least one of these patterns. at least one of these patterns.
recurse: recurse:
Yield from subdirectories. If False, only immediate files are returned. If False, we will yield only the items from the starting path and then
stop. This might seem silly for a walk function, but it makes it easier
on the calling side to have a recurse/no-recurse option without having
to call a separate function with different arguments for each case,
while still taking advantage of the other filtering features here.
sort:
If True, items are sorted before they are yielded. Otherwise, they
come in whatever order the filesystem returns them, which may not
be alphabetical.
yield_directories: yield_directories:
Should the generator produce directories? True or False. Should the generator produce directories? True or False.
@ -823,10 +838,15 @@ def walk(
if not yield_directories and not yield_files: if not yield_directories and not yield_files:
raise ValueError('yield_directories and yield_files cannot both be False.') raise ValueError('yield_directories and yield_files cannot both be False.')
if yield_style not in ['flat', 'nested']: yield_style = {
'flat': YIELD_STYLE_FLAT,
'nested': YIELD_STYLE_NESTED,
}.get(yield_style, yield_style)
if yield_style not in [YIELD_STYLE_FLAT, YIELD_STYLE_NESTED]:
raise ValueError(f'yield_style should be "flat" or "nested", not {yield_style}.') raise ValueError(f'yield_style should be "flat" or "nested", not {yield_style}.')
callback_permission_denied = callback_permission_denied or do_nothing callback_permission_denied = callback_permission_denied or None
if exclude_filenames is not None: if exclude_filenames is not None:
exclude_filenames = {normalize(f) for f in exclude_filenames} exclude_filenames = {normalize(f) for f in exclude_filenames}
@ -858,8 +878,8 @@ def walk(
exclude = not any(winglob.fnmatch(basename, whitelisted) for whitelisted in whitelist) exclude = not any(winglob.fnmatch(basename, whitelisted) for whitelisted in whitelist)
if blacklist is not None and not exclude: if blacklist is not None and not exclude:
n_basename = normalize(basename) n_basename = os.path.normcase(basename)
n_abspath = normalize(abspath) n_abspath = os.path.normcase(abspath)
exclude = any( exclude = any(
n_basename == blacklisted or n_basename == blacklisted or
@ -875,68 +895,81 @@ def walk(
if handle_exclusion(None, exclude_directories, path.basename, path.absolute_path): if handle_exclusion(None, exclude_directories, path.basename, path.absolute_path):
return return
# In the following loops, I found joining the os.sep with fstrings to be # In the following loop, I found joining the os.sep with fstrings to be
# 10x faster than `os.path.join`, reducing a 6.75 second walk to 5.7. # 10x faster than `os.path.join`, reducing a 6.75 second walk to 5.7.
# Because we trust the values of current_location and the child names, # Because we trust the values of current_location and the child names,
# we don't run the risk of producing bad values this way. # we don't run the risk of producing bad values this way.
def walkstep_nested(current_location, child_dirs, child_files): queue = collections.deque()
directories = [] queue.append(path)
new_child_dirs = [] while queue:
for child_dir in child_dirs: current = queue.pop()
child_dir_abspath = f'{current_location}{os.sep}{child_dir}' log.debug('Scanning %s.', current)
if handle_exclusion(glob_directories, exclude_directories, child_dir, child_dir_abspath): current_rstrip = current.absolute_path.rstrip(os.sep)
if yield_style is YIELD_STYLE_NESTED:
child_dirs = []
child_files = []
try:
entries = list(os.scandir(current))
except (OSError, PermissionError) as exc:
if callback_permission_denied is not None:
callback_permission_denied(exc)
continue continue
else:
raise
new_child_dirs.append(child_dir) if sort:
directories.append(pathclass.Path(child_dir_abspath, _case_correct=True)) entries = sorted(entries, key=lambda e: os.path.normcase(e.name))
# This will actually affect the results of the os.walk going forward! # The problem with stack-based depth-first search is that the last item
child_dirs[:] = new_child_dirs # from the parent dir becomes the first to be walked, leading to
# reverse-alphabetical order directory traversal. But we also don't
# want to reverse the input entries because then the files come out
# backwards. So instead we keep a more_queue to which we appendleft so
# that it's backwards, and popping will make it forward again.
more_queue = collections.deque()
for entry in entries:
entry_abspath = f'{current_rstrip}{os.sep}{entry.name}'
files = [] if entry.is_dir():
for child_file in child_files: if handle_exclusion(
child_file_abspath = f'{current_location}{os.sep}{child_file}' whitelist=glob_directories,
if handle_exclusion(glob_filenames, exclude_filenames, child_file, child_file_abspath): blacklist=exclude_directories,
continue basename=entry.name,
abspath=entry_abspath,
files.append(pathclass.Path(child_file_abspath, _case_correct=True)) ):
current_location = pathclass.Path(current_location, _case_correct=True)
yield (current_location, directories, files)
def walkstep_flat(current_location, child_dirs, child_files):
new_child_dirs = []
for child_dir in child_dirs:
child_dir_abspath = f'{current_location}{os.sep}{child_dir}'
if handle_exclusion(glob_directories, exclude_directories, child_dir, child_dir_abspath):
continue
new_child_dirs.append(child_dir)
if yield_directories:
yield pathclass.Path(child_dir_abspath, _case_correct=True)
# This will actually affect the results of the os.walk going forward!
child_dirs[:] = new_child_dirs
if yield_files:
for child_file in child_files:
child_file_abspath = f'{current_location}{os.sep}{child_file}'
if handle_exclusion(glob_filenames, exclude_filenames, child_file, child_file_abspath):
continue continue
yield pathclass.Path(child_file_abspath, _case_correct=True) child = current.with_child(entry.name, _case_correct=True)
if yield_directories and yield_style is YIELD_STYLE_FLAT:
yield child
elif yield_style is YIELD_STYLE_NESTED:
child_dirs.append(child)
walker = os.walk(path.absolute_path, onerror=callback_permission_denied, followlinks=True) if recurse:
if yield_style == 'flat': more_queue.appendleft(child)
my_stepper = walkstep_flat
if yield_style == 'nested':
my_stepper = walkstep_nested
for step in walker: elif entry.is_file():
yield from my_stepper(*step) if handle_exclusion(
if not recurse: whitelist=glob_filenames,
break blacklist=exclude_filenames,
basename=entry.name,
abspath=entry_abspath,
):
continue
child = current.with_child(entry.name, _case_correct=True)
if yield_files and yield_style is YIELD_STYLE_FLAT:
yield child
elif yield_style is YIELD_STYLE_NESTED:
child_files.append(child)
queue.extend(more_queue)
if yield_style is YIELD_STYLE_NESTED:
yield (current, child_dirs, child_files)
# Backwards compatibility # Backwards compatibility
walk_generator = walk walk_generator = walk

View file

@ -4,7 +4,7 @@ However, python's glob module is written for unix-style globs in which brackets
represent character classes / ranges. represent character classes / ranges.
On Windows we should escape those brackets to get results that are consistent On Windows we should escape those brackets to get results that are consistent
with a Windows' user's expectations. But calling glob.escape would also escape with a Windows user's expectations. But calling glob.escape would also escape
asterisk which may not be desired. So this module just provides a modified asterisk which may not be desired. So this module just provides a modified
version of glob.glob which will escape only square brackets when called on version of glob.glob which will escape only square brackets when called on
Windows, and behave normally on Linux. Windows, and behave normally on Linux.
@ -14,6 +14,11 @@ import glob as python_glob
import os import os
import re import re
if os.name == 'nt':
GLOB_SYMBOLS = {'*', '?'}
else:
GLOB_SYMBOLS = {'*', '?', '['}
def fix(pattern): def fix(pattern):
if os.name == 'nt': if os.name == 'nt':
pattern = re.sub(r'(\[|\])', r'[\1]', pattern) pattern = re.sub(r'(\[|\])', r'[\1]', pattern)
@ -22,6 +27,9 @@ def fix(pattern):
def fnmatch(name, pat): def fnmatch(name, pat):
return python_fnmatch.fnmatch(name, fix(pat)) return python_fnmatch.fnmatch(name, fix(pat))
def fnmatch_filter(names, pat):
return python_fnmatch.filter(names, fix(pat))
def glob(pathname, *, recursive=False): def glob(pathname, *, recursive=False):
return python_glob.glob(fix(pathname), recursive=recursive) return python_glob.glob(fix(pathname), recursive=recursive)
@ -35,8 +43,8 @@ def glob_many(patterns, *, recursive=False):
def is_glob(pattern): def is_glob(pattern):
''' '''
Improvements can be made to consider [] ranges for unix, but properly Improvements can be made to validate [] ranges for unix, but properly
parsing the range syntax is not something I'm interested in doing right now parsing the range syntax is not something I'm interested in doing right now
and it would become the largest function in the whole module. and it would become the largest function in the whole module.
''' '''
return any(c in pattern for c in '*?') return len(set(pattern).intersection(GLOB_SYMBOLS)) > 0