This commit is contained in:
Ethan Dalool 2016-09-23 17:35:58 -07:00
parent bfaed2e416
commit aa836ce5c3
15 changed files with 228 additions and 1516 deletions

3
.gitignore vendored
View file

@ -2,6 +2,9 @@ AwfulCrateBox/
Classifieds/ Classifieds/
Toddo/toddo.db Toddo/toddo.db
Meal/meal.db Meal/meal.db
*.key
*.csr
*.crt
# Windows image file caches # Windows image file caches
Thumbs.db Thumbs.db

4
AESFile/README.md Normal file
View file

@ -0,0 +1,4 @@
AESFile
=======
[Thanks @sfbahr for providing the prebuilt pycrypto wheel](https://github.com/sfbahr/PyCrypto-Wheels)

102
AESFile/aesfile.py Normal file
View file

@ -0,0 +1,102 @@
import argparse
import hashlib
from Crypto.Cipher import AES
import sys
import os
sys.path.append('C:\\git\\else\\Bytestring'); import bytestring
BLOCK_SIZE = 32
SEEK_END = 2
def decrypt_file(aes, input_handle, output_handle):
current_pos = input_handle.tell()
input_size = input_handle.seek(0, SEEK_END) - current_pos
input_handle.seek(current_pos)
bytes_read = 0
while True:
chunk = input_handle.read(BLOCK_SIZE)
if len(chunk) == 0:
break
bytes_read += len(chunk)
chunk = aes.decrypt(chunk)
if bytes_read == input_size:
last_byte = chunk[-1]
while chunk and chunk[-1] == last_byte:
chunk = chunk[:-1]
if bytes_read % bytestring.MIBIBYTE == 0:
print(bytestring.bytestring(bytes_read))
output_handle.write(chunk)
def encrypt_file(aes, input_handle, output_handle):
last_byte = 0
done = False
bytes_read = 0
while not done:
chunk = input_handle.read(BLOCK_SIZE)
if len(chunk) > 0:
last_byte = chunk[-1]
if len(chunk) < BLOCK_SIZE:
pad_byte = (last_byte + 1) % 256
pad_byte = chr(pad_byte)
pad_byte = pad_byte.encode('ascii')
chunk += pad_byte * (BLOCK_SIZE - len(chunk))
done = True
bytes_read += len(chunk)
if bytes_read % bytestring.MIBIBYTE == 0:
print(bytestring.bytestring(bytes_read))
chunk = aes.encrypt(chunk)
output_handle.write(chunk)
#print(''.join((hex(x)[2:].rjust(2, '0') for x in chunk)))
def prepare_handles_argparse(args):
return (aes, input_handle, output_handle)
def encrypt_argparse(args):
input_handle = open(args.input, 'rb')
output_handle = open(args.output, 'wb')
password = hashit(args.password, 32)
initialization_vector = os.urandom(16)
aes = AES.new(password, mode=3, IV=initialization_vector)
output_handle.write(initialization_vector)
encrypt_file(aes, input_handle, output_handle)
def decrypt_argparse(args):
input_handle = open(args.input, 'rb')
output_handle = open(args.output, 'wb')
password = hashit(args.password, 32)
initialization_vector = input_handle.read(16)
aes = AES.new(password, mode=3, IV=initialization_vector)
decrypt_file(aes, input_handle, output_handle)
def hashit(text, length=None):
h = hashlib.sha512(text.encode('utf-8')).hexdigest()
if length is not None:
h = h[:length]
return h
def main(argv):
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
p_encrypt = subparsers.add_parser('encrypt')
p_encrypt.add_argument('-i', '--input', dest='input', required=True)
p_encrypt.add_argument('-o', '--output', dest='output', required=True)
p_encrypt.add_argument('-p', '--password', dest='password', required=True)
p_encrypt.set_defaults(func=encrypt_argparse)
p_decrypt = subparsers.add_parser('decrypt')
p_decrypt.add_argument('-i', '--input', dest='input', required=True)
p_decrypt.add_argument('-o', '--output', dest='output', required=True)
p_decrypt.add_argument('-p', '--password', dest='password', required=True)
p_decrypt.set_defaults(func=decrypt_argparse)
args = parser.parse_args(argv)
args.func(args)
if __name__ == '__main__':
main(sys.argv[1:])

View file

@ -76,6 +76,8 @@ def download_file(
if plan['plan_type'] == 'resume': if plan['plan_type'] == 'resume':
bytes_downloaded = plan['seek_to'] bytes_downloaded = plan['seek_to']
elif plan['plan_type'] == 'partial':
bytes_downloaded = plan['seek_to']
else: else:
bytes_downloaded = 0 bytes_downloaded = 0

View file

@ -1,114 +0,0 @@
start_time = time.time()
# Raise for cases where the minimum > maximum
for (maxkey, maxval) in maximums.items():
if maxkey not in minimums:
continue
minval = minimums[maxkey]
if minval > maxval:
raise ValueError('Impossible min-max for %s' % maxkey)
conditions = []
minmaxers = {'<=': maximums, '>=': minimums}
# Convert the min-max parameters into query strings
print('Writing minmaxers')
for (comparator, minmaxer) in minmaxers.items():
for (field, value) in minmaxer.items():
if field not in Photo.int_properties:
raise ValueError('Unknown Photo property: %s' % field)
value = str(value)
query = min_max_query_builder(field, comparator, value)
conditions.append(query)
print(conditions)
print('Writing extension rule')
if extension is not None:
if isinstance(extension, str):
extension = [extension]
# Normalize to prevent injections
extension = [normalize_tagname(e) for e in extension]
extension = ['extension == "%s"' % e for e in extension]
extension = ' OR '.join(extension)
extension = '(%s)' % extension
conditions.append(extension)
def setify(l):
if l is None:
return set()
else:
return set(self.get_tag_by_name(t) for t in l)
tag_musts = setify(tag_musts)
tag_mays = setify(tag_mays)
tag_forbids = setify(tag_forbids)
base = '''
{negator} EXISTS(
SELECT 1 FROM photo_tag_rel
WHERE photo_tag_rel.photoid == photos.id
AND photo_tag_rel.tagid {operator} {value}
)'''
print('Writing musts')
for tag in tag_musts:
# tagid == must
query = base.format(
negator='',
operator='==',
value='"%s"' % tag.id,
)
conditions.append(query)
print('Writing mays')
if len(tag_mays) > 0:
# not any(tagid not in mays)
acceptable = tag_mays.union(tag_musts)
acceptable = ['"%s"' % t.id for t in acceptable]
acceptable = ', '.join(acceptable)
query = base.format(
negator='',
operator='IN',
value='(%s)' % acceptable,
)
conditions.append(query)
print('Writing forbids')
if len(tag_forbids) > 0:
# not any(tagid in forbids)
forbids = ['"%s"' % t.id for t in tag_forbids]
forbids = ', '.join(forbids)
query = base.format(
negator='NOT',
operator='IN',
value='(%s)' % forbids
)
conditions.append(query)
if len(conditions) == 0:
raise ValueError('No search query provided')
conditions = [query for query in conditions if query is not None]
conditions = ['(%s)' % c for c in conditions]
conditions = ' AND '.join(conditions)
conditions = 'WHERE %s' % conditions
query = 'SELECT * FROM photos %s' % conditions
query = query.replace('\n', ' ')
while ' ' in query:
query = query.replace(' ', ' ')
print(query)
temp_cur = self.sql.cursor()
temp_cur.execute(query)
for fetch in fetch_generator(temp_cur):
photo = Photo(self, fetch)
yield photo
end_time = time.time()
print(end_time - start_time)

View file

@ -1 +0,0 @@
import phototagger

File diff suppressed because it is too large Load diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 467 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 900 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 321 KiB

View file

@ -1,194 +0,0 @@
import os
import phototagger
import unittest
DB_NAME = ':memory:'
class PhotoDBTest(unittest.TestCase):
def setUp(self):
self.p = phototagger.PhotoDB(DB_NAME)
def tearDown(self):
pass
def test_add_and_remove_photo(self):
photo1 = self.p.new_photo('samples\\train.jpg')
self.assertEqual(len(photo1.id), self.p.id_length)
photo2 = self.p.get_photo_by_id(photo1.id)
self.assertEqual(photo1, photo2)
self.p.remove_photo(photo1.id)
photo3 = self.p.get_photo_by_id(photo1.id)
self.assertIsNone(photo3)
def test_add_and_remove_tag(self):
tag1 = self.p.new_tag('trains')
self.assertEqual(tag1.name, 'trains')
self.assertEqual(len(tag1.id), self.p.id_length)
tag2 = self.p.get_tag_by_id(tag1.id)
self.assertEqual(tag1, tag2)
self.p.remove_tag(tagid=tag1.id)
tag3 = self.p.get_tag_by_id(tag1.id)
self.assertIsNone(tag3)
# Normalization
tag = self.p.new_tag('one two!')
self.assertEqual(tag.name, 'one_two')
def test_add_and_remove_synonym(self):
# Add synonym
giraffe = self.p.new_tag('giraffe')
horse = self.p.new_tag_synonym('long horse', 'giraffe')
tag = self.p.get_tag_by_name('long horse', resolve_synonyms=True)
self.assertEqual(tag, giraffe)
# Synonym of synonym should resolve to master
snake = self.p.new_tag_synonym('snake with legs', 'long horse')
tag = self.p.get_tag_by_name('snake with legs')
self.assertEqual(tag, giraffe)
# Remove Tag
self.p.remove_tag_synonym('long horse')
horse = self.p.get_tag_by_name('long horse')
self.assertIsNone(horse)
# Exceptions
self.assertRaises(phototagger.NoSuchTag, self.p.new_tag_synonym, 'blanc', 'white')
self.assertRaises(phototagger.NoSuchSynonym, self.p.remove_tag_synonym, 'blanc')
def test_apply_photo_tag(self):
photo = self.p.new_photo('samples\\train.jpg')
self.p.new_tag('vehicles')
# Should only return True if it is a new tag.
status = self.p.apply_photo_tag(photo.id, tagname='vehicles')
self.assertTrue(status)
status = self.p.apply_photo_tag(photo.id, tagname='vehicles')
self.assertFalse(status)
def test_convert_tag_synonym(self):
# Install tags and a synonym
photo = self.p.new_photo('samples\\train.jpg')
trains = self.p.new_tag('trains')
locomotives = self.p.new_tag('locomotives')
choochoos = self.p.new_tag_synonym('choochoos', 'locomotives')
# The first two, as independents, return True.
self.assertTrue(self.p.apply_photo_tag(photo.id, trains.id))
self.assertTrue(self.p.apply_photo_tag(photo.id, locomotives.id))
self.assertFalse(self.p.apply_photo_tag(photo.id, tagname='choochoos'))
# Pre-conversion, they should be independent.
trains = self.p.get_tag_by_name('trains', resolve_synonyms=False)
locomotives = self.p.get_tag_by_name('locomotives', resolve_synonyms=False)
self.assertNotEqual(trains, locomotives)
trains_id = trains.id
# Convert and make sure the second is no longer independent.
self.p.convert_tag_to_synonym(oldtagname='locomotives', mastertagname='trains')
trains = self.p.get_tag_by_name('trains', resolve_synonyms=False)
locomotives = self.p.get_tag_by_name('locomotives', resolve_synonyms=False)
self.assertIsNone(locomotives)
self.assertEqual(trains.id, trains_id)
# The old tag should still pass has_tag as a synonym.
# The synonym of the old tag should have been remapped to the master.
self.assertTrue(self.p.photo_has_tag(photo.id, tagname='trains'))
self.assertTrue(self.p.photo_has_tag(photo.id, tagname='locomotives'))
self.assertTrue(self.p.photo_has_tag(photo.id, tagname='choochoos'))
# Synonym should not be included in the photo's tag list.
tags = list(self.p.get_tags_by_photo(photo.id))
self.assertEqual(len(tags), 1)
self.assertEqual(tags[0].id, trains_id)
def test_generate_id(self):
i_photo = self.p.generate_id('photos')
i_tag = self.p.generate_id('tags')
self.assertRaises(ValueError, self.p.generate_id, 'other')
self.assertEqual(len(i_photo), self.p.id_length)
self.assertEqual(len(i_tag), self.p.id_length)
self.assertEqual(int(i_photo), int(i_tag))
self.assertLess(int(i_photo), int(self.p.generate_id('photos')))
def test_get_photo_by_id(self):
photo = self.p.new_photo('samples\\train.jpg')
photo2 = self.p.get_photo_by_id(photo.id)
self.assertEqual(photo, photo2)
def test_get_photo_by_path(self):
photo = self.p.new_photo('samples\\train.jpg')
photo2 = self.p.get_photo_by_path(photo.filepath)
self.assertEqual(photo, photo2)
def test_get_photos_by_recent(self):
paths = ['train.jpg', 'bolts.jpg', 'reddit.png']
paths = ['samples\\' + path for path in paths]
paths = [os.path.abspath(path) for path in paths]
for path in paths:
self.p.new_photo(path)
photos = list(self.p.get_photos_by_recent())
paths.reverse()
for (index, photo) in enumerate(photos):
self.assertEqual(photo.filepath, paths[index])
photos = list(self.p.get_photos_by_recent(count=2))
self.assertEqual(len(photos), 2)
def test_get_photos_by_search(self):
print('NOT IMPLEMENTED')
def test_get_tag_by_id(self):
tag1 = self.p.new_tag('test by id')
tag2 = self.p.get_tag_by_id(tag1.id)
self.assertEqual(tag1, tag2)
tag2 = self.p.get_tag(tagid=tag1.id)
self.assertEqual(tag1, tag2)
def test_get_tag_by_name(self):
tag1 = self.p.new_tag('test by name')
tag2 = self.p.get_tag_by_name(tag1.name)
self.assertEqual(tag1, tag2)
tag2 = self.p.get_tag(tagname=tag1.name)
self.assertEqual(tag1, tag2)
def test_get_tags_by_photo(self):
photo = self.p.new_photo('samples\\train.jpg')
tag = self.p.new_tag('vehicles')
stat = self.p.apply_photo_tag(photo.id, tagname='vehicles')
tags = self.p.get_tags_by_photo(photo.id)
self.assertEqual(tags[0].name, 'vehicles')
def test_new_tag_lengths(self):
t = 'x' * (phototagger.MAX_TAG_NAME_LENGTH)
self.p.new_tag(t)
self.assertRaises(phototagger.TagTooLong, self.p.new_tag, t+'x')
self.assertRaises(phototagger.TagTooShort, self.p.new_tag, '')
self.assertRaises(phototagger.TagTooShort, self.p.new_tag, '!!??&&*')
def test_photo_has_tag(self):
photo = self.p.new_photo('samples\\train.jpg')
tag = self.p.new_tag('vehicles')
self.p.apply_photo_tag(photo.id, tag.id)
self.p.photo_has_tag(photo.id, tag.id)
def test_rename_tag(self):
print('NOT IMPLEMENTED')
if __name__ == '__main__':
unittest.main()

View file

@ -15,7 +15,7 @@
<body> <body>
<div id="control_panel"> <div id="control_panel">
<input type="text" id="subreddit_field" placeholder="/r/learnpython"> <input type="text" id="subreddit_field" placeholder="/r/learnpython" autofocus>
<button id="start_button" onclick="start()">Start</button> <button id="start_button" onclick="start()">Start</button>
<a id="browser_link"></a> <a id="browser_link"></a>
<button id="clear_button" onclick="clear_workspace()">Clear workspace</button> <button id="clear_button" onclick="clear_workspace()">Clear workspace</button>

View file

@ -19,7 +19,7 @@ class Path:
return hash(self.absolute_path) return hash(self.absolute_path)
def __repr__(self): def __repr__(self):
return '{c}({path})'.format(c=self.__class__, path=self.absolute_path) return '{c}({path})'.format(c=self.__class__.__name__, path=repr(self.absolute_path))
@property @property
def basename(self): def basename(self):

View file

@ -11,7 +11,6 @@ import types
sys.path.append('C:\\git\\else\\Bytestring'); import bytestring sys.path.append('C:\\git\\else\\Bytestring'); import bytestring
sys.path.append('C:\\git\\else\\Pathclass'); import pathclass sys.path.append('C:\\git\\else\\Pathclass'); import pathclass
sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter
sys.path.append('C:\\git\\else\\SpinalTap'); import spinal
FILE_READ_CHUNK = bytestring.MIBIBYTE FILE_READ_CHUNK = bytestring.MIBIBYTE
RATELIMITER = ratelimiter.Ratelimiter(16 * bytestring.MIBIBYTE) RATELIMITER = ratelimiter.Ratelimiter(16 * bytestring.MIBIBYTE)

View file

@ -1,6 +1,8 @@
import collections import collections
import glob import glob
import hashlib
import json import json
import logging
import os import os
import shutil import shutil
import stat import stat
@ -12,10 +14,13 @@ sys.path.append('C:\\git\\else\\Bytestring'); import bytestring
sys.path.append('C:\\git\\else\\Pathclass'); import pathclass sys.path.append('C:\\git\\else\\Pathclass'); import pathclass
sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)
CHUNK_SIZE = 128 * bytestring.KIBIBYTE CHUNK_SIZE = 128 * bytestring.KIBIBYTE
# Number of bytes to read and write at a time # Number of bytes to read and write at a time
HASH_CLASS = hashlib.md5
class DestinationIsDirectory(Exception): class DestinationIsDirectory(Exception):
pass pass
@ -35,6 +40,9 @@ class SourceNotFile(Exception):
class SpinalError(Exception): class SpinalError(Exception):
pass pass
class ValidationError(Exception):
pass
def callback_exclusion(name, path_type): def callback_exclusion(name, path_type):
''' '''
Example of an exclusion callback function. Example of an exclusion callback function.
@ -86,13 +94,13 @@ def copy_dir(
callback_exclusion=None, callback_exclusion=None,
callback_file=None, callback_file=None,
callback_permission_denied=None, callback_permission_denied=None,
callback_verbose=None,
dry_run=False, dry_run=False,
exclude_directories=None, exclude_directories=None,
exclude_filenames=None, exclude_filenames=None,
files_per_second=None, files_per_second=None,
overwrite_old=True, overwrite_old=True,
precalcsize=False, precalcsize=False,
validate_hash=False,
): ):
''' '''
Copy all of the contents from source to destination, Copy all of the contents from source to destination,
@ -145,11 +153,6 @@ def copy_dir(
Default = None Default = None
callback_verbose:
If provided, this function will be called with some operation notes.
Default = None
dry_run: dry_run:
Do everything except the actual file copying. Do everything except the actual file copying.
@ -186,6 +189,9 @@ def copy_dir(
Default = False Default = False
validate_hash:
Passed directly into each `copy_file`.
Returns: [destination path, number of bytes written to destination] Returns: [destination path, number of bytes written to destination]
(Written bytes is 0 if all files already existed.) (Written bytes is 0 if all files already existed.)
''' '''
@ -217,7 +223,6 @@ def copy_dir(
total_bytes = 0 total_bytes = 0
callback_directory = callback_directory or do_nothing callback_directory = callback_directory or do_nothing
callback_verbose = callback_verbose or do_nothing
bytes_per_second = limiter_or_none(bytes_per_second) bytes_per_second = limiter_or_none(bytes_per_second)
files_per_second = limiter_or_none(files_per_second) files_per_second = limiter_or_none(files_per_second)
@ -226,7 +231,6 @@ def copy_dir(
walker = walk_generator( walker = walk_generator(
source, source,
callback_exclusion=callback_exclusion, callback_exclusion=callback_exclusion,
callback_verbose=callback_verbose,
exclude_directories=exclude_directories, exclude_directories=exclude_directories,
exclude_filenames=exclude_filenames, exclude_filenames=exclude_filenames,
) )
@ -255,9 +259,9 @@ def copy_dir(
bytes_per_second=bytes_per_second, bytes_per_second=bytes_per_second,
callback=callback_file, callback=callback_file,
callback_permission_denied=callback_permission_denied, callback_permission_denied=callback_permission_denied,
callback_verbose=callback_verbose,
dry_run=dry_run, dry_run=dry_run,
overwrite_old=overwrite_old, overwrite_old=overwrite_old,
validate_hash=validate_hash,
) )
copiedname = copied[0] copiedname = copied[0]
@ -280,9 +284,10 @@ def copy_file(
bytes_per_second=None, bytes_per_second=None,
callback=None, callback=None,
callback_permission_denied=None, callback_permission_denied=None,
callback_verbose=None, callback_validate_hash=None,
dry_run=False, dry_run=False,
overwrite_old=True, overwrite_old=True,
validate_hash=False,
): ):
''' '''
Copy a file from one place to another. Copy a file from one place to another.
@ -323,8 +328,8 @@ def copy_file(
Default = None Default = None
callback_verbose: callback_validate_hash:
If provided, this function will be called with some operation notes. Passed directly into `verify_hash`
Default = None Default = None
@ -339,6 +344,12 @@ def copy_file(
Default = True Default = True
validate_hash:
If True, verify the file hash of the resulting file, using the
`HASH_CLASS` global.
Default = False
Returns: [destination filename, number of bytes written to destination] Returns: [destination filename, number of bytes written to destination]
(Written bytes is 0 if the file already existed.) (Written bytes is 0 if the file already existed.)
''' '''
@ -359,7 +370,6 @@ def copy_file(
destination = str_to_fp(destination) destination = str_to_fp(destination)
callback = callback or do_nothing callback = callback or do_nothing
callback_verbose = callback_verbose or do_nothing
if destination.is_dir: if destination.is_dir:
raise DestinationIsDirectory(destination) raise DestinationIsDirectory(destination)
@ -387,9 +397,9 @@ def copy_file(
written_bytes = 0 written_bytes = 0
try: try:
callback_verbose('Opening handles.') log.debug('Opening handles.')
source_file = open(source.absolute_path, 'rb') source_handle = open(source.absolute_path, 'rb')
destination_file = open(destination.absolute_path, 'wb') destination_handle = open(destination.absolute_path, 'wb')
except PermissionError as exception: except PermissionError as exception:
if callback_permission_denied is not None: if callback_permission_denied is not None:
callback_permission_denied(source, exception) callback_permission_denied(source, exception)
@ -397,13 +407,19 @@ def copy_file(
else: else:
raise raise
if validate_hash:
hasher = HASH_CLASS()
while True: while True:
data_chunk = source_file.read(CHUNK_SIZE) data_chunk = source_handle.read(CHUNK_SIZE)
data_bytes = len(data_chunk) data_bytes = len(data_chunk)
if data_bytes == 0: if data_bytes == 0:
break break
destination_file.write(data_chunk) if validate_hash:
hasher.update(data_chunk)
destination_handle.write(data_chunk)
written_bytes += data_bytes written_bytes += data_bytes
if bytes_per_second is not None: if bytes_per_second is not None:
@ -412,12 +428,21 @@ def copy_file(
callback(destination, written_bytes, source_bytes) callback(destination, written_bytes, source_bytes)
# Fin # Fin
callback_verbose('Closing source handle.') log.debug('Closing source handle.')
source_file.close() source_handle.close()
callback_verbose('Closing dest handle.') log.debug('Closing dest handle.')
destination_file.close() destination_handle.close()
callback_verbose('Copying metadata') log.debug('Copying metadata')
shutil.copystat(source.absolute_path, destination.absolute_path) shutil.copystat(source.absolute_path, destination.absolute_path)
if validate_hash:
verify_hash(
destination,
callback=callback_validate_hash,
known_size=source_bytes,
known_hash=hasher.hexdigest(),
)
return [destination, written_bytes] return [destination, written_bytes]
def do_nothing(*args): def do_nothing(*args):
@ -497,12 +522,43 @@ def str_to_fp(path):
path = pathclass.Path(path) path = pathclass.Path(path)
return path return path
def verify_hash(path, known_size, known_hash, callback=None):
'''
callback:
A function that takes three parameters:
path object, bytes ingested so far, bytes total
'''
path = str_to_fp(path)
log.debug('Validating hash for "%s" against %s' % (path.absolute_path, known_hash))
file_size = os.path.getsize(path.absolute_path)
if file_size != known_size:
raise ValidationError('File size %d != known size %d' % (file_size, known_size))
handle = open(path.absolute_path, 'rb')
hasher = HASH_CLASS()
checked_bytes = 0
with handle:
while True:
chunk = handle.read(CHUNK_SIZE)
if not chunk:
break
hasher.update(chunk)
checked_bytes += len(chunk)
if callback is not None:
callback(path, checked_bytes, file_size)
file_hash = hasher.hexdigest()
if file_hash != known_hash:
raise ValidationError('File hash "%s" != known hash "%s"' % (file_hash, known_hash))
log.debug('Hash validation passed.')
def walk_generator( def walk_generator(
path='.', path='.',
callback_exclusion=None, callback_exclusion=None,
callback_verbose=None,
exclude_directories=None, exclude_directories=None,
exclude_filenames=None, exclude_filenames=None,
recurse=True,
yield_style='flat'
): ):
''' '''
Yield Path objects for files in the file tree, similar to os.walk. Yield Path objects for files in the file tree, similar to os.walk.
@ -513,11 +569,6 @@ def walk_generator(
Default = None Default = None
callback_verbose:
If provided, this function will be called with some operation notes.
Default = None
exclude_filenames: exclude_filenames:
A set of filenames that will not be copied. Entries can be absolute A set of filenames that will not be copied. Entries can be absolute
paths to exclude that particular file, or plain names to exclude paths to exclude that particular file, or plain names to exclude
@ -533,7 +584,18 @@ def walk_generator(
{'C:\\folder', 'thumbnails'} {'C:\\folder', 'thumbnails'}
Default = None Default = None
recurse:
Yield from subdirectories. If False, only immediate files are returned.
yield_style:
If 'flat', yield individual files one by one in a constant stream.
If 'nested', yield tuple(root, directories, files) like os.walk does,
except I use Path objects with absolute paths for everything.
''' '''
if yield_style not in ['flat', 'nested']:
raise ValueError('Invalid yield_style %s. Either "flat" or "nested".' % repr(yield_style))
if exclude_directories is None: if exclude_directories is None:
exclude_directories = set() exclude_directories = set()
@ -541,19 +603,20 @@ def walk_generator(
exclude_filenames = set() exclude_filenames = set()
callback_exclusion = callback_exclusion or do_nothing callback_exclusion = callback_exclusion or do_nothing
callback_verbose = callback_verbose or do_nothing
exclude_filenames = {normalize(f) for f in exclude_filenames} exclude_filenames = {normalize(f) for f in exclude_filenames}
exclude_directories = {normalize(f) for f in exclude_directories} exclude_directories = {normalize(f) for f in exclude_directories}
path = str_to_fp(path).absolute_path path = str_to_fp(path)
if normalize(path) in exclude_directories: # Considering full paths
callback_exclusion(path, 'directory') if normalize(path.absolute_path) in exclude_directories:
callback_exclusion(path.absolute_path, 'directory')
return return
if normalize(os.path.split(path)[1]) in exclude_directories: # Considering folder names
callback_exclusion(path, 'directory') if normalize(path.basename) in exclude_directories:
callback_exclusion(path.absolute_path, 'directory')
return return
directory_queue = collections.deque() directory_queue = collections.deque()
@ -563,13 +626,14 @@ def walk_generator(
# Thank you for your cooperation. # Thank you for your cooperation.
while len(directory_queue) > 0: while len(directory_queue) > 0:
current_location = directory_queue.popleft() current_location = directory_queue.popleft()
callback_verbose('listdir: %s' % current_location) log.debug('listdir: %s' % current_location.absolute_path)
contents = os.listdir(current_location) contents = os.listdir(current_location.absolute_path)
callback_verbose('received %d items' % len(contents)) log.debug('received %d items' % len(contents))
directories = [] directories = []
files = []
for base_name in contents: for base_name in contents:
absolute_name = os.path.join(current_location, base_name) absolute_name = os.path.join(current_location.absolute_path, base_name)
if os.path.isdir(absolute_name): if os.path.isdir(absolute_name):
exclude = normalize(absolute_name) in exclude_directories exclude = normalize(absolute_name) in exclude_directories
@ -578,7 +642,7 @@ def walk_generator(
callback_exclusion(absolute_name, 'directory') callback_exclusion(absolute_name, 'directory')
continue continue
directories.append(absolute_name) directories.append(str_to_fp(absolute_name))
else: else:
exclude = normalize(absolute_name) in exclude_filenames exclude = normalize(absolute_name) in exclude_filenames
@ -587,7 +651,17 @@ def walk_generator(
callback_exclusion(absolute_name, 'file') callback_exclusion(absolute_name, 'file')
continue continue
yield(str_to_fp(absolute_name)) fp = str_to_fp(absolute_name)
if yield_style == 'flat':
yield fp
else:
files.append(fp)
if yield_style == 'nested':
yield (current_location, directories, files)
if not recurse:
break
# Extendleft causes them to get reversed, so flip it first. # Extendleft causes them to get reversed, so flip it first.
directories.reverse() directories.reverse()