else/SpinalTap/spinal.py

714 lines
21 KiB
Python
Raw Normal View History

2016-03-07 20:25:50 +00:00
import collections
2016-09-24 00:35:58 +00:00
import hashlib
import logging
import os
import shutil
2016-02-21 07:13:50 +00:00
import sys
2016-12-02 06:37:07 +00:00
2016-12-25 03:18:23 +00:00
# pip install voussoirkit
2016-12-02 06:37:07 +00:00
from voussoirkit import bytestring
from voussoirkit import pathclass
from voussoirkit import ratelimiter
2016-08-09 08:33:36 +00:00
2016-11-08 04:48:43 +00:00
logging.basicConfig(level=logging.CRITICAL)
2016-09-24 00:35:58 +00:00
log = logging.getLogger(__name__)
2016-08-09 08:33:36 +00:00
2018-09-30 08:48:41 +00:00
CHUNK_SIZE = 2 * bytestring.MIBIBYTE
# Number of bytes to read and write at a time
2016-09-24 00:35:58 +00:00
HASH_CLASS = hashlib.md5
2016-02-21 07:13:50 +00:00
class DestinationIsDirectory(Exception):
pass
class DestinationIsFile(Exception):
pass
class RecursiveDirectory(Exception):
pass
class SourceNotDirectory(Exception):
pass
class SourceNotFile(Exception):
pass
class SpinalError(Exception):
2016-02-21 07:13:50 +00:00
pass
2016-09-24 00:35:58 +00:00
class ValidationError(Exception):
pass
2016-12-02 06:37:07 +00:00
def callback_exclusion_v1(name, path_type):
2016-02-21 07:13:50 +00:00
'''
Example of an exclusion callback function.
'''
2016-05-10 08:00:29 +00:00
print('Excluding', path_type, name)
2016-03-07 20:25:50 +00:00
def callback_v1(fpobj, written_bytes, total_bytes):
2016-02-21 07:13:50 +00:00
'''
Example of a copy callback function.
2016-05-10 08:00:29 +00:00
Prints "filename written/total (percent%)"
'''
2016-08-09 08:33:36 +00:00
filename = fpobj.absolute_path.encode('ascii', 'replace').decode()
2016-02-21 07:13:50 +00:00
if written_bytes >= total_bytes:
2017-11-03 19:50:16 +00:00
ends = '\r\n'
2016-02-21 07:13:50 +00:00
else:
ends = ''
2016-08-13 00:26:12 +00:00
percent = (100 * written_bytes) / max(total_bytes, 1)
2016-03-07 20:25:50 +00:00
percent = '%07.3f' % percent
2016-02-21 07:13:50 +00:00
written = '{:,}'.format(written_bytes)
total = '{:,}'.format(total_bytes)
written = written.rjust(len(total), ' ')
status = '{filename} {written}/{total} ({percent}%)\r'
status = status.format(filename=filename, written=written, total=total, percent=percent)
print(status, end=ends)
sys.stdout.flush()
2016-03-07 20:25:50 +00:00
def copy(source, file_args=None, file_kwargs=None, dir_args=None, dir_kwargs=None):
'''
Perform copy_dir or copy_file as appropriate for the source path.
'''
source = str_to_fp(source)
2016-08-09 08:33:36 +00:00
if source.is_file:
2016-03-07 20:25:50 +00:00
file_args = file_args or tuple()
file_kwargs = file_kwargs or dict()
return copy_file(source, *file_args, **file_kwargs)
2016-08-09 08:33:36 +00:00
elif source.is_dir:
2016-03-07 20:25:50 +00:00
dir_args = dir_args or tuple()
dir_kwargs = dir_kwargs or dict()
return copy_dir(source, *dir_args, **dir_kwargs)
raise SpinalError('Neither file nor dir: %s' % source)
def copy_dir(
2016-08-09 08:33:36 +00:00
source,
destination=None,
2018-03-13 08:45:23 +00:00
*,
2016-08-09 08:33:36 +00:00
bytes_per_second=None,
callback_directory=None,
callback_exclusion=None,
callback_file=None,
callback_permission_denied=None,
2016-12-02 06:37:07 +00:00
destination_new_root=None,
2016-08-09 08:33:36 +00:00
dry_run=False,
exclude_directories=None,
exclude_filenames=None,
files_per_second=None,
overwrite_old=True,
precalcsize=False,
2016-09-24 00:35:58 +00:00
validate_hash=False,
2016-02-21 07:13:50 +00:00
):
'''
2016-03-07 20:25:50 +00:00
Copy all of the contents from source to destination,
including subdirectories.
2016-02-21 07:13:50 +00:00
source:
2016-03-07 20:25:50 +00:00
The directory which will be copied.
2016-02-21 07:13:50 +00:00
destination:
2016-03-07 20:25:50 +00:00
The directory in which copied files are placed. Alternatively, use
2016-02-21 07:18:26 +00:00
destination_new_root.
2016-02-21 07:13:50 +00:00
bytes_per_second:
Restrict file copying to this many bytes per second. Can be an integer
or an existing Ratelimiter object.
2016-08-09 08:33:36 +00:00
The BYTE, KIBIBYTE, etc constants from module 'bytestring' may help.
2016-02-21 07:13:50 +00:00
Default = None
2016-03-07 20:25:50 +00:00
callback_directory:
This function will be called after each file copy with three parameters:
2016-12-02 06:37:07 +00:00
name of file copied, number of bytes written to destination directory
so far, total bytes needed (based on precalcsize).
2016-05-10 08:00:29 +00:00
If `precalcsize` is False, this function will receive written bytes
for both written and total, showing 100% always.
Default = None
callback_exclusion:
Passed directly into `walk_generator`.
2016-03-07 20:25:50 +00:00
Default = None
callback_file:
Will be passed into each individual `copy_file` operation as the
`callback` for that file.
Default = None
callback_permission_denied:
Will be passed into each individual `copy_file` operation as the
`callback_permission_denied` for that file.
2016-02-21 07:13:50 +00:00
Default = None
2016-12-02 06:37:07 +00:00
destination_new_root:
Determine the destination path by calling
`new_root(source, destination_new_root)`.
Thus, this path acts as a root and the rest of the path is matched.
`destination` and `destination_new_root` are mutually exclusive.
2016-02-21 07:13:50 +00:00
dry_run:
Do everything except the actual file copying.
2016-02-21 07:13:50 +00:00
Default = False
2016-03-07 20:25:50 +00:00
exclude_filenames:
Passed directly into `walk_generator`.
Default = None
exclude_directories:
Passed directly into `walk_generator`.
Default = None
2016-05-10 08:00:29 +00:00
files_per_second:
2016-12-02 06:37:07 +00:00
Maximum number of files to be processed per second. Helps to keep CPU
usage low.
2016-03-07 20:25:50 +00:00
Default = None
2016-02-21 07:13:50 +00:00
overwrite_old:
If True, overwrite the destination file if the source file
has a more recent "last modified" timestamp.
2016-02-21 07:13:50 +00:00
Default = True
2016-03-07 20:25:50 +00:00
precalcsize:
If True, calculate the size of source before beginning the
operation. This number can be used in the callback_directory function.
Else, callback_directory will receive written bytes as total bytes
(showing 100% always).
This can take a long time.
Default = False
2016-09-24 00:35:58 +00:00
validate_hash:
Passed directly into each `copy_file`.
2016-03-07 20:25:50 +00:00
Returns: [destination path, number of bytes written to destination]
(Written bytes is 0 if all files already existed.)
'''
2016-02-21 07:13:50 +00:00
# Prepare parameters
2016-02-21 07:18:26 +00:00
if not is_xor(destination, destination_new_root):
2016-12-02 06:37:07 +00:00
message = 'One and only one of `destination` and '
message += '`destination_new_root` can be passed.'
raise ValueError(message)
2016-02-21 07:18:26 +00:00
2016-03-07 20:25:50 +00:00
source = str_to_fp(source)
2016-02-21 07:18:26 +00:00
if destination_new_root is not None:
2016-09-05 23:37:07 +00:00
source.correct_case()
2016-02-21 07:18:26 +00:00
destination = new_root(source, destination_new_root)
2016-03-07 20:25:50 +00:00
destination = str_to_fp(destination)
2016-02-21 07:18:26 +00:00
2016-08-13 00:26:12 +00:00
if destination in source:
2016-03-07 20:25:50 +00:00
raise RecursiveDirectory(source, destination)
2016-08-09 08:33:36 +00:00
if not source.is_dir:
2016-03-07 20:25:50 +00:00
raise SourceNotDirectory(source)
2016-02-21 07:13:50 +00:00
2016-08-09 08:33:36 +00:00
if destination.is_file:
2016-03-07 20:25:50 +00:00
raise DestinationIsFile(destination)
if precalcsize is True:
total_bytes = get_dir_size(source)
else:
total_bytes = 0
2016-02-21 07:13:50 +00:00
2016-08-13 00:26:12 +00:00
callback_directory = callback_directory or do_nothing
2016-05-10 08:00:29 +00:00
bytes_per_second = limiter_or_none(bytes_per_second)
files_per_second = limiter_or_none(files_per_second)
2016-02-21 07:13:50 +00:00
2016-03-07 20:25:50 +00:00
# Copy
written_bytes = 0
walker = walk_generator(
source,
2016-05-10 08:00:29 +00:00
callback_exclusion=callback_exclusion,
2016-03-07 20:25:50 +00:00
exclude_directories=exclude_directories,
exclude_filenames=exclude_filenames,
2016-08-09 08:33:36 +00:00
)
2016-12-02 06:37:07 +00:00
for source_abspath in walker:
2016-03-07 20:25:50 +00:00
# Terminology:
# abspath: C:\folder\subfolder\filename.txt
# location: C:\folder\subfolder
# base_name: filename.txt
# folder: subfolder
2016-02-21 07:13:50 +00:00
2016-08-09 08:33:36 +00:00
destination_abspath = source_abspath.absolute_path.replace(
source.absolute_path,
destination.absolute_path
)
2016-03-07 20:25:50 +00:00
destination_abspath = str_to_fp(destination_abspath)
2016-02-21 07:13:50 +00:00
2016-08-09 08:33:36 +00:00
if destination_abspath.is_dir:
2016-03-07 20:25:50 +00:00
raise DestinationIsDirectory(destination_abspath)
2016-02-21 07:13:50 +00:00
2016-08-09 08:33:36 +00:00
destination_location = os.path.split(destination_abspath.absolute_path)[0]
2016-12-13 03:53:21 +00:00
if not dry_run:
os.makedirs(destination_location, exist_ok=True)
2016-03-07 20:25:50 +00:00
copied = copy_file(
source_abspath,
destination_abspath,
2016-05-10 08:00:29 +00:00
bytes_per_second=bytes_per_second,
2016-12-02 06:37:07 +00:00
callback_progress=callback_file,
2016-03-07 20:25:50 +00:00
callback_permission_denied=callback_permission_denied,
dry_run=dry_run,
overwrite_old=overwrite_old,
2016-09-24 00:35:58 +00:00
validate_hash=validate_hash,
2016-03-07 20:25:50 +00:00
)
2016-02-21 07:13:50 +00:00
2016-03-07 20:25:50 +00:00
copiedname = copied[0]
written_bytes += copied[1]
2016-02-21 07:13:50 +00:00
2016-05-10 08:00:29 +00:00
if precalcsize is False:
callback_directory(copiedname, written_bytes, written_bytes)
else:
callback_directory(copiedname, written_bytes, total_bytes)
if files_per_second is not None:
files_per_second.limit(1)
2016-02-21 07:13:50 +00:00
return [destination, written_bytes]
2016-03-07 20:25:50 +00:00
def copy_file(
2016-08-09 08:33:36 +00:00
source,
destination=None,
2018-03-13 08:45:23 +00:00
*,
2016-08-09 08:33:36 +00:00
destination_new_root=None,
bytes_per_second=None,
2016-12-02 06:37:07 +00:00
callback_progress=None,
2016-09-05 23:37:07 +00:00
callback_permission_denied=None,
2016-09-24 00:35:58 +00:00
callback_validate_hash=None,
2016-08-09 08:33:36 +00:00
dry_run=False,
overwrite_old=True,
2016-09-24 00:35:58 +00:00
validate_hash=False,
2016-02-21 07:13:50 +00:00
):
'''
2016-03-07 20:25:50 +00:00
Copy a file from one place to another.
2016-03-07 20:25:50 +00:00
source:
The file to copy.
2016-03-07 20:25:50 +00:00
destination:
The filename of the new copy. Alternatively, use
2016-02-21 07:13:50 +00:00
destination_new_root.
2016-02-21 07:13:50 +00:00
destination_new_root:
Determine the destination path by calling
`new_root(source_dir, destination_new_root)`.
Thus, this path acts as a root and the rest of the path is matched.
bytes_per_second:
Restrict file copying to this many bytes per second. Can be an integer
or an existing Ratelimiter object.
The provided BYTE, KIBIBYTE, etc constants may help.
Default = None
2016-03-07 20:25:50 +00:00
callback_permission_denied:
If provided, this function will be called when a source file denies
read access, with the file path and the exception object as parameters.
THE OPERATION WILL RETURN NORMALLY.
If not provided, the PermissionError is raised.
2016-02-21 07:13:50 +00:00
Default = None
2016-12-02 06:37:07 +00:00
callback_progress:
If provided, this function will be called after writing
each CHUNK_SIZE bytes to destination with three parameters:
the Path object being copied, number of bytes written so far,
total number of bytes needed.
Default = None
2016-09-24 00:35:58 +00:00
callback_validate_hash:
Passed directly into `verify_hash`
2016-05-10 08:00:29 +00:00
Default = None
2016-02-21 07:13:50 +00:00
dry_run:
Do everything except the actual file copying.
2016-02-21 07:13:50 +00:00
Default = False
2016-02-21 07:13:50 +00:00
overwrite_old:
If True, overwrite the destination file if the source file
has a more recent "last modified" timestamp.
2016-02-21 07:13:50 +00:00
Default = True
2016-09-24 00:35:58 +00:00
validate_hash:
If True, verify the file hash of the resulting file, using the
`HASH_CLASS` global.
Default = False
2016-03-07 20:25:50 +00:00
Returns: [destination filename, number of bytes written to destination]
(Written bytes is 0 if the file already existed.)
'''
2016-02-21 07:13:50 +00:00
# Prepare parameters
2016-03-07 20:25:50 +00:00
if not is_xor(destination, destination_new_root):
2016-12-02 06:37:07 +00:00
message = 'One and only one of `destination` and '
message += '`destination_new_root` can be passed'
raise ValueError(message)
2016-02-21 07:13:50 +00:00
2016-03-07 20:25:50 +00:00
source = str_to_fp(source)
2016-02-21 07:13:50 +00:00
2016-08-18 01:24:38 +00:00
if not source.is_file:
raise SourceNotFile(source)
2016-03-07 20:25:50 +00:00
if destination_new_root is not None:
2016-08-18 01:24:38 +00:00
source.correct_case()
2016-03-07 20:25:50 +00:00
destination = new_root(source, destination_new_root)
destination = str_to_fp(destination)
2016-02-21 07:13:50 +00:00
2016-12-02 06:37:07 +00:00
callback_progress = callback_progress or do_nothing
2016-05-10 08:00:29 +00:00
2016-08-09 08:33:36 +00:00
if destination.is_dir:
2016-12-02 06:37:07 +00:00
destination = destination.with_child(source.basename)
2016-02-21 07:13:50 +00:00
2016-05-10 08:00:29 +00:00
bytes_per_second = limiter_or_none(bytes_per_second)
2016-03-07 20:25:50 +00:00
# Determine overwrite
2016-08-09 08:33:36 +00:00
if destination.exists:
2016-03-07 20:25:50 +00:00
if overwrite_old is False:
return [destination, 0]
source_modtime = source.stat.st_mtime
2016-08-13 00:26:12 +00:00
if source_modtime == destination.stat.st_mtime:
2016-03-07 20:25:50 +00:00
return [destination, 0]
2016-02-21 07:13:50 +00:00
# Copy
2016-03-07 20:25:50 +00:00
if dry_run:
2016-12-02 06:37:07 +00:00
if callback_progress is not None:
callback_progress(destination, 0, 0)
2016-03-07 20:25:50 +00:00
return [destination, 0]
source_bytes = source.size
2016-08-09 08:33:36 +00:00
destination_location = os.path.split(destination.absolute_path)[0]
2016-05-10 08:00:29 +00:00
os.makedirs(destination_location, exist_ok=True)
2016-02-21 07:13:50 +00:00
def handlehelper(path, mode):
try:
handle = open(path.absolute_path, mode)
return handle
except PermissionError as exception:
if callback_permission_denied is not None:
callback_permission_denied(path, exception)
return None
else:
raise
log.debug('Opening handles.')
source_handle = handlehelper(source, 'rb')
destination_handle = handlehelper(destination, 'wb')
if None in (source_handle, destination_handle):
return [destination, 0]
2016-02-21 07:13:50 +00:00
2016-09-24 00:35:58 +00:00
if validate_hash:
hasher = HASH_CLASS()
2016-12-13 03:53:21 +00:00
written_bytes = 0
2016-03-07 20:25:50 +00:00
while True:
2017-11-03 19:50:16 +00:00
try:
data_chunk = source_handle.read(CHUNK_SIZE)
except PermissionError as e:
print(source)
raise
2016-03-07 20:25:50 +00:00
data_bytes = len(data_chunk)
if data_bytes == 0:
break
2016-02-21 07:13:50 +00:00
2016-09-24 00:35:58 +00:00
if validate_hash:
hasher.update(data_chunk)
destination_handle.write(data_chunk)
2016-03-07 20:25:50 +00:00
written_bytes += data_bytes
2016-02-21 07:13:50 +00:00
2016-05-10 08:00:29 +00:00
if bytes_per_second is not None:
bytes_per_second.limit(data_bytes)
2016-12-02 06:37:07 +00:00
callback_progress(destination, written_bytes, source_bytes)
2016-12-13 03:53:21 +00:00
if written_bytes == 0:
# For zero-length files, we want to get at least one call in there.
callback_progress(destination, written_bytes, source_bytes)
2016-03-07 20:25:50 +00:00
# Fin
2016-09-24 00:35:58 +00:00
log.debug('Closing source handle.')
source_handle.close()
log.debug('Closing dest handle.')
destination_handle.close()
log.debug('Copying metadata')
2016-08-09 08:33:36 +00:00
shutil.copystat(source.absolute_path, destination.absolute_path)
2016-09-24 00:35:58 +00:00
if validate_hash:
verify_hash(
destination,
callback=callback_validate_hash,
known_size=source_bytes,
known_hash=hasher.hexdigest(),
)
2016-03-07 20:25:50 +00:00
return [destination, written_bytes]
2016-05-10 08:00:29 +00:00
def do_nothing(*args):
'''
Used by other functions as the default callback.
'''
return
2016-03-07 20:25:50 +00:00
def get_dir_size(path):
2016-02-21 07:13:50 +00:00
'''
Calculate the total number of bytes across all files in this directory
and its subdirectories.
'''
2016-03-07 20:25:50 +00:00
path = str_to_fp(path)
2016-08-09 08:33:36 +00:00
if not path.is_dir:
2016-03-07 20:25:50 +00:00
raise SourceNotDirectory(path)
2016-02-21 07:13:50 +00:00
total_bytes = 0
2016-05-10 08:00:29 +00:00
for filepath in walk_generator(path):
total_bytes += filepath.size
2016-02-21 07:13:50 +00:00
return total_bytes
2016-02-21 07:13:50 +00:00
def is_subfolder(parent, child):
'''
2016-02-21 07:13:50 +00:00
Determine whether parent contains child.
'''
2016-08-09 08:33:36 +00:00
parent = normalize(str_to_fp(parent).absolute_path) + os.sep
child = normalize(str_to_fp(child).absolute_path) + os.sep
2016-02-21 07:13:50 +00:00
return child.startswith(parent)
2016-02-21 07:13:50 +00:00
def is_xor(*args):
'''
Return True if and only if one arg is truthy.
'''
2016-02-21 07:13:50 +00:00
return [bool(a) for a in args].count(True) == 1
2016-05-10 08:00:29 +00:00
def limiter_or_none(value):
2016-08-13 00:26:12 +00:00
if isinstance(value, str):
value = bytestring.parsebytes(value)
2016-05-10 08:00:29 +00:00
if isinstance(value, ratelimiter.Ratelimiter):
limiter = value
elif value is not None:
2016-09-05 23:37:07 +00:00
limiter = ratelimiter.Ratelimiter(allowance=value, period=1)
2016-05-10 08:00:29 +00:00
else:
limiter = None
return limiter
2016-02-21 07:13:50 +00:00
def new_root(filepath, root):
'''
Prepend `root` to `filepath`, drive letter included. For example:
"C:\\folder\\subfolder\\file.txt" and "C:\\backups" becomes
"C:\\backups\\C\\folder\\subfolder\\file.txt"
2016-02-21 07:13:50 +00:00
I use this so that my G: drive can have backups from my C: and D: drives
while preserving directory structure in G:\\D and G:\\C.
'''
2016-08-09 08:33:36 +00:00
filepath = str_to_fp(filepath).absolute_path
root = str_to_fp(root).absolute_path
2016-02-21 07:13:50 +00:00
filepath = filepath.replace(':', os.sep)
filepath = os.path.normpath(filepath)
filepath = os.path.join(root, filepath)
2016-03-07 20:25:50 +00:00
return str_to_fp(filepath)
2016-02-21 07:13:50 +00:00
def normalize(text):
'''
Apply os.path.normpath and os.path.normcase.
'''
return os.path.normpath(os.path.normcase(text))
2016-03-07 20:25:50 +00:00
def str_to_fp(path):
2016-02-21 07:13:50 +00:00
'''
2016-08-09 08:33:36 +00:00
If `path` is a string, create a Path object, otherwise just return it.
2016-03-07 20:25:50 +00:00
'''
if isinstance(path, str):
2016-08-09 08:33:36 +00:00
path = pathclass.Path(path)
2016-03-07 20:25:50 +00:00
return path
2016-09-24 00:35:58 +00:00
def verify_hash(path, known_size, known_hash, callback=None):
'''
callback:
A function that takes three parameters:
path object, bytes ingested so far, bytes total
'''
path = str_to_fp(path)
2016-12-02 06:37:07 +00:00
log.debug('Validating hash for "%s" against %s', path.absolute_path, known_hash)
2016-09-24 00:35:58 +00:00
file_size = os.path.getsize(path.absolute_path)
if file_size != known_size:
raise ValidationError('File size %d != known size %d' % (file_size, known_size))
handle = open(path.absolute_path, 'rb')
hasher = HASH_CLASS()
checked_bytes = 0
with handle:
while True:
chunk = handle.read(CHUNK_SIZE)
if not chunk:
break
hasher.update(chunk)
checked_bytes += len(chunk)
if callback is not None:
callback(path, checked_bytes, file_size)
file_hash = hasher.hexdigest()
if file_hash != known_hash:
raise ValidationError('File hash "%s" != known hash "%s"' % (file_hash, known_hash))
log.debug('Hash validation passed.')
2016-03-07 20:25:50 +00:00
def walk_generator(
2016-08-09 08:33:36 +00:00
path='.',
2018-03-13 08:45:23 +00:00
*,
2016-08-09 08:33:36 +00:00
callback_exclusion=None,
2016-12-02 06:37:07 +00:00
callback_permission_denied=None,
2016-12-25 03:18:23 +00:00
depth_first=True,
2016-08-09 08:33:36 +00:00
exclude_directories=None,
exclude_filenames=None,
2016-09-24 00:35:58 +00:00
recurse=True,
2016-12-13 03:53:21 +00:00
yield_directories=False,
yield_files=True,
yield_style='flat',
2016-03-07 20:25:50 +00:00
):
2016-02-21 07:13:50 +00:00
'''
2016-08-13 00:26:12 +00:00
Yield Path objects for files in the file tree, similar to os.walk.
2016-05-10 08:00:29 +00:00
callback_exclusion:
This function will be called when a file or directory is excluded with
two parameters: the path, and 'file' or 'directory'.
Default = None
2016-03-07 20:25:50 +00:00
exclude_filenames:
A set of filenames that will not be copied. Entries can be absolute
paths to exclude that particular file, or plain names to exclude
all matches. For example:
{'C:\\folder\\file.txt', 'desktop.ini'}
Default = None
exclude_directories:
A set of directories that will not be copied. Entries can be
absolute paths to exclude that particular directory, or plain names
to exclude all matches. For example:
{'C:\\folder', 'thumbnails'}
Default = None
2016-09-24 00:35:58 +00:00
recurse:
Yield from subdirectories. If False, only immediate files are returned.
2016-12-13 03:53:21 +00:00
yield_directories:
Should the generator produce directories? Has no effect in nested yield style.
yield_files:
Should the generator produce files? Has no effect in nested yield style.
2016-09-24 00:35:58 +00:00
yield_style:
If 'flat', yield individual files one by one in a constant stream.
If 'nested', yield tuple(root, directories, files) like os.walk does,
except I use Path objects with absolute paths for everything.
2016-03-07 20:25:50 +00:00
'''
2016-12-13 03:53:21 +00:00
if not yield_directories and not yield_files:
raise ValueError('yield_directories and yield_files cannot both be False')
2016-09-24 00:35:58 +00:00
if yield_style not in ['flat', 'nested']:
raise ValueError('Invalid yield_style %s. Either "flat" or "nested".' % repr(yield_style))
2016-03-07 20:25:50 +00:00
if exclude_directories is None:
exclude_directories = set()
if exclude_filenames is None:
exclude_filenames = set()
2016-05-10 08:00:29 +00:00
callback_exclusion = callback_exclusion or do_nothing
2016-12-02 06:37:07 +00:00
callback_permission_denied = callback_permission_denied or do_nothing
2016-03-07 20:25:50 +00:00
exclude_filenames = {normalize(f) for f in exclude_filenames}
exclude_directories = {normalize(f) for f in exclude_directories}
2016-09-24 00:35:58 +00:00
path = str_to_fp(path)
2016-12-13 03:53:21 +00:00
path.correct_case()
2016-03-07 20:25:50 +00:00
2016-09-24 00:35:58 +00:00
# Considering full paths
if normalize(path.absolute_path) in exclude_directories:
callback_exclusion(path.absolute_path, 'directory')
2016-03-07 20:25:50 +00:00
return
2016-09-24 00:35:58 +00:00
# Considering folder names
if normalize(path.basename) in exclude_directories:
callback_exclusion(path.absolute_path, 'directory')
2016-03-07 20:25:50 +00:00
return
directory_queue = collections.deque()
directory_queue.append(path)
# This is a recursion-free workplace.
# Thank you for your cooperation.
while len(directory_queue) > 0:
2016-08-13 00:26:12 +00:00
current_location = directory_queue.popleft()
2016-12-02 06:37:07 +00:00
log.debug('listdir: %s', current_location.absolute_path)
try:
contents = os.listdir(current_location.absolute_path)
except PermissionError as exception:
callback_permission_denied(current_location, exception)
continue
log.debug('received %d items', len(contents))
2016-03-07 20:25:50 +00:00
2016-12-13 03:53:21 +00:00
if yield_style == 'flat' and yield_directories:
yield current_location
2016-03-07 20:25:50 +00:00
directories = []
2016-09-24 00:35:58 +00:00
files = []
2016-03-07 20:25:50 +00:00
for base_name in contents:
2016-09-24 00:35:58 +00:00
absolute_name = os.path.join(current_location.absolute_path, base_name)
2016-03-07 20:25:50 +00:00
if os.path.isdir(absolute_name):
2016-12-27 09:25:32 +00:00
exclude = (
normalize(absolute_name) in exclude_directories or
normalize(base_name) in exclude_directories
)
2016-08-13 00:26:12 +00:00
if exclude:
2016-05-10 08:00:29 +00:00
callback_exclusion(absolute_name, 'directory')
2016-03-07 20:25:50 +00:00
continue
2016-12-13 03:53:21 +00:00
directory = str_to_fp(absolute_name)
directories.append(directory)
elif yield_style == 'flat' and not yield_files:
continue
2016-03-07 20:25:50 +00:00
else:
2016-08-13 00:26:12 +00:00
exclude = normalize(absolute_name) in exclude_filenames
exclude |= normalize(base_name) in exclude_filenames
if exclude:
2016-08-01 23:42:03 +00:00
callback_exclusion(absolute_name, 'file')
2016-03-07 20:25:50 +00:00
continue
2016-09-24 00:35:58 +00:00
fp = str_to_fp(absolute_name)
if yield_style == 'flat':
yield fp
else:
files.append(fp)
if yield_style == 'nested':
yield (current_location, directories, files)
if not recurse:
break
2016-03-07 20:25:50 +00:00
2016-12-25 03:18:23 +00:00
if depth_first:
# Extendleft causes them to get reversed, so flip it first.
directories.reverse()
directory_queue.extendleft(directories)
else:
directory_queue.extend(directories)