Make pathclass globs faster by checking scandir for stat first.

master
voussoir 2021-12-16 21:14:35 -08:00
parent c2dae46c99
commit 359c255817
No known key found for this signature in database
GPG Key ID: 5F7554F8C26DACCB
1 changed files with 45 additions and 26 deletions

View File

@ -294,32 +294,31 @@ class Path:
''' '''
Return Paths that match a glob pattern within this directory. Return Paths that match a glob pattern within this directory.
''' '''
pattern = os.path.normpath(pattern) pattern = normalize_basename_glob(pattern)
# By sidestepping the glob function and going straight for fnmatch
if os.sep in pattern: # filter, we have slightly different behavior than normal, which is
# If the user wants to glob names in a different path, they should # that glob.glob treats .* as hidden files and won't match them with
# create a Pathclass for that directory first and do it normally. # patterns that don't also start with .*.
raise TypeError('glob pattern should not have path separators.') children = os.listdir(self)
children = winglob.fnmatch_filter(children, pattern)
if not pattern: items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
raise ValueError('glob pattern should not be empty.')
# I would like to rewrite this using listdir + fnmatch.filter so we can
# get straight to the basenames, but I need to learn what corner cases
# are handled by glob for us before I do so.
pattern_root = f'{self.absolute_path}{os.sep}'
cut_length = len(pattern_root)
pattern = f'{pattern_root}{pattern}'
items = winglob.glob(pattern)
basenames = (item[cut_length:] for item in items)
items = [self.with_child(item, _case_correct=self._case_correct) for item in basenames]
return items return items
def glob_directories(self, pattern): def glob_directories(self, pattern):
return [p for p in self.glob(pattern) if p.is_dir] pattern = normalize_basename_glob(pattern)
# Instead of turning all children into Path objects and filtering by
# the stat, let's filter by the stat from scandir first.
children = (e.name for e in os.scandir(self) if e.is_dir())
children = winglob.fnmatch_filter(children, pattern)
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
def glob_files(self, pattern): def glob_files(self, pattern):
return [p for p in self.glob(pattern) if p.is_file] pattern = normalize_basename_glob(pattern)
children = (e.name for e in os.scandir(self) if e.is_file())
children = winglob.fnmatch_filter(children, pattern)
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
@property @property
def is_directory(self): def is_directory(self):
@ -352,10 +351,14 @@ class Path:
return children return children
def listdir_directories(self): def listdir_directories(self):
return [p for p in self.listdir() if p.is_dir] children = (e.name for e in os.scandir(self) if e.is_dir())
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
def listdir_files(self): def listdir_files(self):
return [p for p in self.listdir() if p.is_file] children = (e.name for e in os.scandir(self) if e.is_file())
items = [self.with_child(c, _case_correct=self._case_correct) for c in children]
return items
def makedirs(self, mode=0o777, exist_ok=False): def makedirs(self, mode=0o777, exist_ok=False):
return os.makedirs(self, mode=mode, exist_ok=exist_ok) return os.makedirs(self, mode=mode, exist_ok=exist_ok)
@ -599,13 +602,16 @@ def glob(pattern):
If you want to recurse, consider using spinal.walk with glob_filenames If you want to recurse, consider using spinal.walk with glob_filenames
instead. instead.
''' '''
return [Path(p) for p in winglob.glob(pattern)] (dirname, pattern) = os.path.split(pattern)
return Path(dirname).glob(pattern)
def glob_directories(pattern): def glob_directories(pattern):
return [p for p in glob(pattern) if p.is_dir] (dirname, pattern) = os.path.split(pattern)
return Path(dirname).glob_directories(pattern)
def glob_files(pattern): def glob_files(pattern):
return [p for p in glob(pattern) if p.is_file] (dirname, pattern) = os.path.split(pattern)
return Path(dirname).glob_files(pattern)
def glob_many(patterns): def glob_many(patterns):
''' '''
@ -658,6 +664,19 @@ def normalize_drive(name):
return name return name
return Drive(name) return Drive(name)
def normalize_basename_glob(pattern):
pattern = os.path.normpath(pattern)
if os.sep in pattern:
# If the user wants to glob names in a different path, they should
# create a Pathclass for that directory first and do it normally.
raise TypeError('glob pattern should not have path separators.')
if not pattern:
raise ValueError('glob pattern should not be empty.')
return pattern
def normalize_pathpart(name): def normalize_pathpart(name):
if type(name) is PathPart: if type(name) is PathPart:
return name return name