Make pathclass globs faster by checking scandir for stat first.
This commit is contained in:
parent
c2dae46c99
commit
359c255817
1 changed files with 45 additions and 26 deletions
|
@ -294,32 +294,31 @@ class Path:
|
||||||
'''
|
'''
|
||||||
Return Paths that match a glob pattern within this directory.
|
Return Paths that match a glob pattern within this directory.
|
||||||
'''
|
'''
|
||||||
pattern = os.path.normpath(pattern)
|
pattern = normalize_basename_glob(pattern)
|
||||||
|
# By sidestepping the glob function and going straight for fnmatch
|
||||||
if os.sep in pattern:
|
# filter, we have slightly different behavior than normal, which is
|
||||||
# If the user wants to glob names in a different path, they should
|
# that glob.glob treats .* as hidden files and won't match them with
|
||||||
# create a Pathclass for that directory first and do it normally.
|
# patterns that don't also start with .*.
|
||||||
raise TypeError('glob pattern should not have path separators.')
|
children = os.listdir(self)
|
||||||
|
children = winglob.fnmatch_filter(children, pattern)
|
||||||
if not pattern:
|
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
|
||||||
raise ValueError('glob pattern should not be empty.')
|
|
||||||
|
|
||||||
# I would like to rewrite this using listdir + fnmatch.filter so we can
|
|
||||||
# get straight to the basenames, but I need to learn what corner cases
|
|
||||||
# are handled by glob for us before I do so.
|
|
||||||
pattern_root = f'{self.absolute_path}{os.sep}'
|
|
||||||
cut_length = len(pattern_root)
|
|
||||||
pattern = f'{pattern_root}{pattern}'
|
|
||||||
items = winglob.glob(pattern)
|
|
||||||
basenames = (item[cut_length:] for item in items)
|
|
||||||
items = [self.with_child(item, _case_correct=self._case_correct) for item in basenames]
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
def glob_directories(self, pattern):
|
def glob_directories(self, pattern):
|
||||||
return [p for p in self.glob(pattern) if p.is_dir]
|
pattern = normalize_basename_glob(pattern)
|
||||||
|
# Instead of turning all children into Path objects and filtering by
|
||||||
|
# the stat, let's filter by the stat from scandir first.
|
||||||
|
children = (e.name for e in os.scandir(self) if e.is_dir())
|
||||||
|
children = winglob.fnmatch_filter(children, pattern)
|
||||||
|
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
|
||||||
|
return items
|
||||||
|
|
||||||
def glob_files(self, pattern):
|
def glob_files(self, pattern):
|
||||||
return [p for p in self.glob(pattern) if p.is_file]
|
pattern = normalize_basename_glob(pattern)
|
||||||
|
children = (e.name for e in os.scandir(self) if e.is_file())
|
||||||
|
children = winglob.fnmatch_filter(children, pattern)
|
||||||
|
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
|
||||||
|
return items
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_directory(self):
|
def is_directory(self):
|
||||||
|
@ -352,10 +351,14 @@ class Path:
|
||||||
return children
|
return children
|
||||||
|
|
||||||
def listdir_directories(self):
|
def listdir_directories(self):
|
||||||
return [p for p in self.listdir() if p.is_dir]
|
children = (e.name for e in os.scandir(self) if e.is_dir())
|
||||||
|
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
|
||||||
|
return items
|
||||||
|
|
||||||
def listdir_files(self):
|
def listdir_files(self):
|
||||||
return [p for p in self.listdir() if p.is_file]
|
children = (e.name for e in os.scandir(self) if e.is_file())
|
||||||
|
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
|
||||||
|
return items
|
||||||
|
|
||||||
def makedirs(self, mode=0o777, exist_ok=False):
|
def makedirs(self, mode=0o777, exist_ok=False):
|
||||||
return os.makedirs(self, mode=mode, exist_ok=exist_ok)
|
return os.makedirs(self, mode=mode, exist_ok=exist_ok)
|
||||||
|
@ -599,13 +602,16 @@ def glob(pattern):
|
||||||
If you want to recurse, consider using spinal.walk with glob_filenames
|
If you want to recurse, consider using spinal.walk with glob_filenames
|
||||||
instead.
|
instead.
|
||||||
'''
|
'''
|
||||||
return [Path(p) for p in winglob.glob(pattern)]
|
(dirname, pattern) = os.path.split(pattern)
|
||||||
|
return Path(dirname).glob(pattern)
|
||||||
|
|
||||||
def glob_directories(pattern):
|
def glob_directories(pattern):
|
||||||
return [p for p in glob(pattern) if p.is_dir]
|
(dirname, pattern) = os.path.split(pattern)
|
||||||
|
return Path(dirname).glob_directories(pattern)
|
||||||
|
|
||||||
def glob_files(pattern):
|
def glob_files(pattern):
|
||||||
return [p for p in glob(pattern) if p.is_file]
|
(dirname, pattern) = os.path.split(pattern)
|
||||||
|
return Path(dirname).glob_files(pattern)
|
||||||
|
|
||||||
def glob_many(patterns):
|
def glob_many(patterns):
|
||||||
'''
|
'''
|
||||||
|
@ -658,6 +664,19 @@ def normalize_drive(name):
|
||||||
return name
|
return name
|
||||||
return Drive(name)
|
return Drive(name)
|
||||||
|
|
||||||
|
def normalize_basename_glob(pattern):
|
||||||
|
pattern = os.path.normpath(pattern)
|
||||||
|
|
||||||
|
if os.sep in pattern:
|
||||||
|
# If the user wants to glob names in a different path, they should
|
||||||
|
# create a Pathclass for that directory first and do it normally.
|
||||||
|
raise TypeError('glob pattern should not have path separators.')
|
||||||
|
|
||||||
|
if not pattern:
|
||||||
|
raise ValueError('glob pattern should not be empty.')
|
||||||
|
|
||||||
|
return pattern
|
||||||
|
|
||||||
def normalize_pathpart(name):
|
def normalize_pathpart(name):
|
||||||
if type(name) is PathPart:
|
if type(name) is PathPart:
|
||||||
return name
|
return name
|
||||||
|
|
Loading…
Reference in a new issue