diff --git a/.GitImages/desert_goats.png b/.GitImages/desert_goats.png new file mode 100644 index 0000000..7cc3bf5 Binary files /dev/null and b/.GitImages/desert_goats.png differ diff --git a/.GitImages/desert_goats_blur.png b/.GitImages/desert_goats_blur.png new file mode 100644 index 0000000..fadd4bb Binary files /dev/null and b/.GitImages/desert_goats_blur.png differ diff --git a/BlurredLetterbox/README.md b/BlurredLetterbox/README.md new file mode 100644 index 0000000..ee0244b --- /dev/null +++ b/BlurredLetterbox/README.md @@ -0,0 +1,13 @@ +Blurred Letterbox +================= + +I'm not entirely sure why you'd want to do this. + + +

+ sample +

+

+ sample +

+ diff --git a/BlurredLetterbox/blurredletterbox.py b/BlurredLetterbox/blurredletterbox.py new file mode 100644 index 0000000..3e062a2 --- /dev/null +++ b/BlurredLetterbox/blurredletterbox.py @@ -0,0 +1,102 @@ +import argparse +import os +import PIL.ImageFilter +import PIL.Image +import sys + +def blur_letterbox( + image, + new_width=None, + new_height=None, + blurring=None, + ): + + (iw, ih) = image.size + new_width = new_width or iw + new_height = new_height or ih + + if blurring is None: + blurring = (new_width * new_height) * 0.00001 + print('Using bluriness', blurring) + + background = image.resize(fit_over_bounds(iw, ih, new_width, new_height), PIL.Image.ANTIALIAS) + background = background.filter(PIL.ImageFilter.GaussianBlur(radius=blurring)) + foreground = image.resize(fit_into_bounds(iw, ih, new_width, new_height), PIL.Image.ANTIALIAS) + + background_offsets = offsets(background, new_width, new_height) + foreground_offsets = offsets(foreground, new_width, new_height) + + final = PIL.Image.new(mode=image.mode, size=(new_width, new_height)) + final.paste(background, (background_offsets)) + final.paste(foreground, (foreground_offsets)) + return final + +def fit_into_bounds(iw, ih, fw, fh): + ''' + Given the w+h of the image and the w+h of the frame, + return new w+h that fits the image into the frame + while maintaining the aspect ratio and leaving blank space + everywhere else + ''' + ratio = min(fw/iw, fh/ih) + + w = int(iw * ratio) + h = int(ih * ratio) + + return (w, h) + +def fit_over_bounds(iw, ih, fw, fh): + ''' + Given the w+h of the image and the w+h of the frame, + return new w+h that covers the entire frame + while maintaining the aspect ratio + ''' + ratio = max(fw/iw, fh/ih) + + w = int(iw * ratio) + h = int(ih * ratio) + + return (w, h) + +def listget(li, index, fallback=None): + try: + return li[index] + except IndexError: + return fallback + +def offsets(image, new_width, new_height): + ''' + Calculate the horizontal and vertical offsets + needed to center the image in the given box + ''' + horizontal = int((new_width - image.size[0]) / 2) + vertical = int((image.size[1] - new_height) / 2) * -1 + return (horizontal, vertical) + + +def main(argv): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument('filename') + parser.add_argument('-w', '--width', dest='width', default=None) + parser.add_argument('-h', '--height', dest='height', default=None) + parser.add_argument('-b', '--blurring', dest='blurring', default=None) + + args = parser.parse_args(argv) + if args.width is None and args.height is None: + print('Need a new width or height') + return + + int_or_none = lambda x: int(x) if x else x + (base, extension) = os.path.splitext(args.filename) + new_name = base + '_blur' + extension + image = PIL.Image.open(args.filename) + image = blur_letterbox( + image, + int_or_none(args.width), + int_or_none(args.height), + int_or_none(args.blurring) + ) + image.save(new_name) + +if __name__ == '__main__': + main(sys.argv[1:]) \ No newline at end of file diff --git a/OpenDirDL/README.md b/OpenDirDL/README.md index fb63aa1..efad798 100644 --- a/OpenDirDL/README.md +++ b/OpenDirDL/README.md @@ -1,6 +1,11 @@ Open Dir DL =========== +- 2016 07 19 + - Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes + - Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions that take argparse namespaces as their only parameter. Does not affect the commandline usage. + - Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception. + - 2016 07 08 - Fixed bug in which trees wouldn't generate on server:port urls. diff --git a/OpenDirDL/opendirdl.py b/OpenDirDL/opendirdl.py index fade867..0221f99 100644 --- a/OpenDirDL/opendirdl.py +++ b/OpenDirDL/opendirdl.py @@ -11,10 +11,10 @@ The basics: > opendirdl remove_pattern "folder\.jpg" Note the percent-encoded string. 3. Download the enabled files with - > opendirdl download database.db + > opendirdl download website.com.db -Specifics: +The specifics: digest: Recursively fetch directories and build a database of file URLs. @@ -61,7 +61,7 @@ remove_pattern: > opendirdl remove_pattern website.com.db ".*" list_basenames: - List enabled URLs in order of their base filename. This makes it easier to + List Enabled URLs in order of their base filename. This makes it easier to find titles of interest in a directory that is very scattered or poorly organized. @@ -83,11 +83,11 @@ measure: When included, perform HEAD requests on all files to update their size. -n | --new_only: - When included, perform HEAD requests only on files that haven't gotten one - yet. + When included, perform HEAD requests only on files that haven't gotten + one yet. - If a file's size is not known by the time this operation completes, you will - receive a printed note. + If a file's size is not known by the time this operation completes, you + will receive a printed note. tree: Print the file / folder tree. @@ -100,8 +100,8 @@ tree: filenames contain special characters that crash Python, or are so long that the console becomes unreadable. - If the filename ends with ".html", the webpage will use collapsible - boxes rather than plain text. + If the filename ends with ".html", the created page will have + collapsible boxes rather than a plaintext diagram. ''' @@ -134,6 +134,8 @@ TERMINAL_WIDTH = shutil.get_terminal_size().columns DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE +UNKNOWN_SIZE_STRING = '???' + # When doing a basic scan, we will not send HEAD requests to URLs that end in these strings, # because they're probably files. # This isn't meant to be a comprehensive filetype library, but it covers enough of the @@ -203,16 +205,29 @@ function collapse(id) { font-family: Consolas; } + button { display: block; } + div { padding: 10px; padding-left: 15px; margin-bottom: 10px; border: 1px solid #000; + box-shadow: 1px 1px 2px 0px rgba(0,0,0,0.3); +} + +.directory_even +{ + background-color: #fff; +} + +.directory_odd +{ + background-color: #eee; } ''' @@ -224,7 +239,7 @@ CREATE TABLE IF NOT EXISTS urls( content_length INT, content_type TEXT, do_download INT - ); +); CREATE INDEX IF NOT EXISTS urlindex on urls(url); CREATE INDEX IF NOT EXISTS baseindex on urls(basename); CREATE INDEX IF NOT EXISTS sizeindex on urls(content_length); @@ -238,166 +253,10 @@ SQL_DO_DOWNLOAD = 4 UNMEASURED_WARNING = ''' Note: %d files do not have a stored Content-Length. -Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request those files. +Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request +those files. '''.strip() -## DOWNLOADER ###################################################################################### -## ## -class Downloader: - def __init__(self, databasename, outputdir=None, headers=None): - self.databasename = databasename - self.sql = sqlite3.connect(databasename) - self.cur = self.sql.cursor() - - if outputdir is None or outputdir == "": - # This assumes that all URLs in the database are from the same domain. - # If they aren't, it's the user's fault. - self.cur.execute('SELECT url FROM urls LIMIT 1') - url = self.cur.fetchone()[0] - outputdir = url_to_filepath(url)['root'] - self.outputdir = outputdir - - def download(self, overwrite=False, bytespersecond=None): - overwrite = bool(overwrite) - - self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url') - while True: - fetch = self.cur.fetchone() - if fetch is None: - break - url = fetch[SQL_URL] - - ''' Creating the permanent and temporary filenames ''' - url_filepath = url_to_filepath(url) - # Ignore this value of `root`, because we might have a custom outputdir. - root = url_filepath['root'] - folder = os.path.join(root, url_filepath['folder']) - os.makedirs(folder, exist_ok=True) - fullname = os.path.join(folder, url_filepath['filename']) - temporary_basename = hashit(url, 16) + '.oddltemporary' - temporary_fullname = os.path.join(folder, temporary_basename) - - ''' Managing overwrite ''' - if os.path.isfile(fullname): - if overwrite is True: - os.remove(fullname) - else: - safeprint('Skipping "%s". Use `--overwrite`' % fullname) - continue - - safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename)) - filehandle = open(temporary_fullname, 'wb') - try: - download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond) - os.rename(temporary_fullname, fullname) - except: - filehandle.close() - raise -## ## -## DOWNLOADER ###################################################################################### - - -## GENERIC ######################################################################################### -## ## -class Generic: - def __init__(self, **kwargs): - for kwarg in kwargs: - setattr(self, kwarg, kwargs[kwarg]) - - -class TreeExistingChild(Exception): - pass - -class TreeInvalidIdentifier(Exception): - pass - -class TreeNode: - def __init__(self, identifier, data, parent=None): - assert isinstance(identifier, str) - assert '\\' not in identifier - self.identifier = identifier - self.data = data - self.parent = parent - self.children = {} - - def __getitem__(self, key): - return self.children[key] - - def __repr__(self): - return 'TreeNode %s' % self.abspath() - - def abspath(self): - node = self - nodes = [node] - while node.parent is not None: - node = node.parent - nodes.append(node) - nodes.reverse() - nodes = [node.identifier for node in nodes] - return '\\'.join(nodes) - - def add_child(self, other_node, overwrite_parent=False): - self.check_child_availability(other_node.identifier) - if other_node.parent is not None and not overwrite_parent: - raise ValueError('That node already has a parent. Try `overwrite_parent=True`') - - other_node.parent = self - self.children[other_node.identifier] = other_node - return other_node - - def check_child_availability(self, identifier): - if ':' in identifier: - raise TreeInvalidIdentifier('Only roots may have a colon') - if identifier in self.children: - raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier)) - - def detach(self): - del self.parent.children[self.identifier] - self.parent = None - - def listnodes(self, customsort=None): - items = list(self.children.items()) - if customsort is None: - items.sort(key=lambda x: x[0].lower()) - else: - items.sort(key=customsort) - return [item[1] for item in items] - - def merge_other(self, othertree, otherroot=None): - newroot = None - if ':' in othertree.identifier: - if otherroot is None: - raise Exception('Must specify a new name for the other tree\'s root') - else: - newroot = otherroot - else: - newroot = othertree.identifier - othertree.identifier = newroot - othertree.parent = self - self.check_child_availability(newroot) - self.children[newroot] = othertree - - def printtree(self, customsort=None): - for node in self.walk(customsort): - print(node.abspath()) - - def sorted_children(self, customsort=None): - if customsort: - keys = sorted(self.children.keys(), key=customsort) - else: - keys = sorted(self.children.keys()) - for key in keys: - yield (key, self.children[key]) - - def walk(self, customsort=None): - yield self - for child in self.listnodes(customsort=customsort): - #print(child) - #print(child.listnodes()) - yield from child.walk(customsort=customsort) -## ## -## GENERIC ######################################################################################### - ## WALKER ########################################################################################## ## ## @@ -544,6 +403,164 @@ class Walker: ## WALKER ########################################################################################## +## DOWNLOADER ###################################################################################### +## ## +class Downloader: + def __init__(self, databasename, outputdir=None, headers=None): + self.databasename = databasename + self.sql = sqlite3.connect(databasename) + self.cur = self.sql.cursor() + + if outputdir is None or outputdir == "": + # This assumes that all URLs in the database are from the same domain. + # If they aren't, it's the user's fault. + self.cur.execute('SELECT url FROM urls LIMIT 1') + url = self.cur.fetchone()[0] + outputdir = url_to_filepath(url)['root'] + self.outputdir = outputdir + + def download(self, overwrite=False, bytespersecond=None): + overwrite = bool(overwrite) + + self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url') + while True: + fetch = self.cur.fetchone() + if fetch is None: + break + url = fetch[SQL_URL] + + ''' Creating the permanent and temporary filenames ''' + url_filepath = url_to_filepath(url) + # Ignore this value of `root`, because we might have a custom outputdir. + root = url_filepath['root'] + folder = os.path.join(root, url_filepath['folder']) + os.makedirs(folder, exist_ok=True) + fullname = os.path.join(folder, url_filepath['filename']) + temporary_basename = hashit(url, 16) + '.oddltemporary' + temporary_fullname = os.path.join(folder, temporary_basename) + + ''' Managing overwrite ''' + if os.path.isfile(fullname): + if overwrite is True: + os.remove(fullname) + else: + safeprint('Skipping "%s". Use `--overwrite`' % fullname) + continue + + safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename)) + filehandle = open(temporary_fullname, 'wb') + try: + download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond) + os.rename(temporary_fullname, fullname) + except: + filehandle.close() + raise +## ## +## DOWNLOADER ###################################################################################### + + +## OTHER CLASSES ################################################################################### +## ## +class Generic: + def __init__(self, **kwargs): + for kwarg in kwargs: + setattr(self, kwarg, kwargs[kwarg]) + + +class TreeExistingChild(Exception): + pass + +class TreeInvalidIdentifier(Exception): + pass + +class TreeNode: + def __init__(self, identifier, data, parent=None): + assert isinstance(identifier, str) + assert '\\' not in identifier + self.identifier = identifier + self.data = data + self.parent = parent + self.children = {} + + def __getitem__(self, key): + return self.children[key] + + def __repr__(self): + return 'TreeNode %s' % self.abspath() + + def abspath(self): + node = self + nodes = [node] + while node.parent is not None: + node = node.parent + nodes.append(node) + nodes.reverse() + nodes = [node.identifier for node in nodes] + return '\\'.join(nodes) + + def add_child(self, other_node, overwrite_parent=False): + self.check_child_availability(other_node.identifier) + if other_node.parent is not None and not overwrite_parent: + raise ValueError('That node already has a parent. Try `overwrite_parent=True`') + + other_node.parent = self + self.children[other_node.identifier] = other_node + return other_node + + def check_child_availability(self, identifier): + if ':' in identifier: + raise TreeInvalidIdentifier('Only roots may have a colon') + if identifier in self.children: + raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier)) + + def detach(self): + del self.parent.children[self.identifier] + self.parent = None + + def listnodes(self, customsort=None): + items = list(self.children.items()) + if customsort is None: + items.sort(key=lambda x: x[0].lower()) + else: + items.sort(key=customsort) + return [item[1] for item in items] + + def merge_other(self, othertree, otherroot=None): + newroot = None + if ':' in othertree.identifier: + if otherroot is None: + raise Exception('Must specify a new name for the other tree\'s root') + else: + newroot = otherroot + else: + newroot = othertree.identifier + othertree.identifier = newroot + othertree.parent = self + self.check_child_availability(newroot) + self.children[newroot] = othertree + + def printtree(self, customsort=None): + for node in self.walk(customsort): + print(node.abspath()) + + def sorted_children(self, customsort=None): + if customsort: + keys = sorted(self.children.keys(), key=customsort) + else: + keys = sorted(self.children.keys()) + for key in keys: + yield (key, self.children[key]) + + def walk(self, customsort=None): + yield self + for child in self.listnodes(customsort=customsort): + #print(child) + #print(child.listnodes()) + yield from child.walk(customsort=customsort) +## ## +## OTHER CLASSES ################################################################################### + + ## GENERAL FUNCTIONS ############################################################################### ## ## def db_init(sql, cur): @@ -724,6 +741,12 @@ def url_to_filepath(text): 'filename': filename, } return result + +def write(line, file_handle=None): + if file_handle is None: + safeprint(line) + else: + file_handle.write(line + '\n') ## ## ## GENERAL FUNCTIONS ############################################################################### @@ -738,7 +761,7 @@ def digest(databasename, walkurl, fullscan=False): databasename=databasename, fullscan=fullscan, walkurl=walkurl, - ) + ) walker.walk() def digest_argparse(args): @@ -755,11 +778,11 @@ def download(databasename, outputdir=None, overwrite=False, bytespersecond=None) downloader = Downloader( databasename=databasename, outputdir=outputdir, - ) + ) downloader.download( bytespersecond=bytespersecond, overwrite=overwrite, - ) + ) def download_argparse(args): return download( @@ -777,8 +800,8 @@ def filter_pattern(databasename, regex, action='keep', *trash): When `action` is 'remove', then any URLs matching the regex will have their `do_download` flag set to False. - Actions will not act on each other's behalf. A 'keep' will NEVER disable a url, - and 'remove' will NEVER enable one. + Actions will not act on each other's behalf. Keep will NEVER disable a url, + and remove will NEVER enable one. ''' import re if isinstance(regex, str): @@ -810,55 +833,51 @@ def filter_pattern(databasename, regex, action='keep', *trash): cur.execute('UPDATE urls SET do_download = 0 WHERE url == ?', [url]) sql.commit() -def keep_pattern(args): +def keep_pattern_argparse(args): ''' See `filter_pattern`. ''' - filter_pattern( + return filter_pattern( action='keep', databasename=args.databasename, regex=args.regex, - ) + ) -def list_basenames(databasename, outputfile=None): +def list_basenames(databasename, output_filename=None): ''' - Given a database, print the entries in order of the file basenames. + Print the Enabled entries in order of the file basenames. This makes it easier to find interesting titles without worrying about what directory they're in. ''' sql = sqlite3.connect(databasename) cur = sql.cursor() - cur.execute('SELECT basename FROM urls WHERE do_download == 1 ORDER BY LENGTH(basename) DESC LIMIT 1') - fetch = cur.fetchone() - if fetch is None: - return - longest = len(fetch[0]) - cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY LOWER(basename)') - form = '{bn:<%ds} : {url} : {byt}' % longest - if outputfile: - outputfile = open(outputfile, 'w', encoding='utf-8') - while True: - fetch = cur.fetchone() - if fetch is None: - break - byt = fetch[SQL_CONTENT_LENGTH] - if byt is None: - byt = '' + cur.execute('SELECT * FROM urls WHERE do_download == 1') + items = cur.fetchall() + items.sort(key=lambda x: x[SQL_BASENAME].lower()) + + form = '{basename:<%ds} : {url} : {size}' % longest + if output_filename is not None: + output_file = open(output_filename, 'w', encoding='utf-8') + for item in items: + size = item[SQL_CONTENT_LENGTH] + if size is None: + size = '' else: - byt = '{:,}'.format(byt) - line = form.format(bn=fetch[SQL_BASENAME], url=fetch[SQL_URL], byt=byt) - if outputfile: - outputfile.write(line + '\n') - else: - print(line) - if outputfile: - outputfile.close() + size = bytestring.bytestring(size) + line = form.format( + basename=item[SQL_BASENAME], + url=item[SQL_URL], + size=size, + ) + write(line) + if output_file: + output_file.close() def list_basenames_argparse(args): return list_basenames( databasename=args.databasename, - outputfile=args.outputfile, + output_filename=args.outputfile, ) def measure(databasename, fullscan=False, new_only=False): @@ -923,17 +942,25 @@ def measure_argparse(args): new_only=args.new_only, ) -def remove_pattern(args): +def remove_pattern_argparse(args): ''' See `filter_pattern`. ''' - filter_pattern( + return filter_pattern( action='remove', databasename=args.databasename, regex=args.regex, - ) + ) def tree(databasename, output_filename=None): + ''' + Print a tree diagram of the directory-file structure. + + If an .html file is given for `output_filename`, the page will have + collapsible boxes and clickable filenames. Otherwise the file will just + be a plain text drawing. + ''' + sql = sqlite3.connect(databasename) cur = sql.cursor() cur.execute('SELECT * FROM urls WHERE do_download == 1') @@ -945,13 +972,13 @@ def tree(databasename, output_filename=None): path_parts = url_to_filepath(items[0][SQL_URL]) root_identifier = path_parts['root'] - print('Root', root_identifier) + #print('Root', root_identifier) root_data = {'name': root_identifier, 'item_type': 'directory'} root_identifier = root_identifier.replace(':', '') tree = TreeNode( identifier=root_identifier, data=root_data - ) + ) node_map = {} unmeasured_file_count = 0 @@ -985,7 +1012,7 @@ def tree(databasename, output_filename=None): data['size'] = item[SQL_CONTENT_LENGTH] else: unmeasured_file_count += 1 - data['size'] = 0 + data['size'] = None else: data['item_type'] = 'directory' @@ -1018,12 +1045,6 @@ def tree(databasename, output_filename=None): this_node.parent = parent_node #print(this_node.data) - def write(line, outfile=None): - if outfile is None: - safeprint(line) - else: - outfile.write(line + '\n') - def recursive_get_size(node): size = node.data.get('size', 0) if size: @@ -1031,27 +1052,40 @@ def tree(databasename, output_filename=None): return size for child in node.children.values(): - size += recursive_get_size(child) + child_size = recursive_get_size(child) + child_size = child_size or 0 + size += child_size node.data['size'] = size return size - def recursive_print_node(node, depth=0, outfile=None): + def recursive_print_node(node, depth=0, output_file=None): + size = node.data['size'] + if size is None: + size = UNKNOWN_SIZE_STRING + else: + size = bytestring.bytestring(size) + if use_html: + if depth % 2 == 0: + css_class = 'directory_even' + else: + css_class = 'directory_odd' + if node.data['item_type'] == 'directory': div_id = hashit(node.identifier, 16) line = '' - line += '', output_file) else: # This helps put some space between sibling directories - write('| ' * (depth), outfile) + write('| ' * (depth), output_file) if output_filename is not None: @@ -1084,12 +1123,12 @@ def tree(databasename, output_filename=None): if use_html: - write(HTML_TREE_HEADER, outfile=output_file) + write(HTML_TREE_HEADER, file_handle=output_file) recursive_get_size(tree) - recursive_print_node(tree, outfile=output_file) + recursive_print_node(tree, output_file=output_file) if unmeasured_file_count > 0: - write(UNMEASURED_WARNING % unmeasured_file_count, outfile=output_file) + write(UNMEASURED_WARNING % unmeasured_file_count, file_handle=output_file) if output_file is not None: output_file.close() @@ -1104,11 +1143,10 @@ def tree_argparse(args): ## ## ## COMMANDLINE FUNCTIONS ########################################################################### - -if __name__ == '__main__': - if listget(sys.argv, 1, '').lower() in ('help', '-h', '--help'): +def main(argv): + if listget(argv, 1, '').lower() in ('help', '-h', '--help', ''): print(DOCSTRING) - quit() + return parser = argparse.ArgumentParser() subparsers = parser.add_subparsers() @@ -1128,7 +1166,7 @@ if __name__ == '__main__': p_keep_pattern = subparsers.add_parser('keep_pattern') p_keep_pattern.add_argument('databasename') p_keep_pattern.add_argument('regex') - p_keep_pattern.set_defaults(func=keep_pattern) + p_keep_pattern.set_defaults(func=keep_pattern_argparse) p_list_basenames = subparsers.add_parser('list_basenames') p_list_basenames.add_argument('databasename') @@ -1144,12 +1182,15 @@ if __name__ == '__main__': p_remove_pattern = subparsers.add_parser('remove_pattern') p_remove_pattern.add_argument('databasename') p_remove_pattern.add_argument('regex') - p_remove_pattern.set_defaults(func=remove_pattern) + p_remove_pattern.set_defaults(func=remove_pattern_argparse) p_tree = subparsers.add_parser('tree') p_tree.add_argument('databasename') p_tree.add_argument('-o', '--outputfile', dest='outputfile', default=None) p_tree.set_defaults(func=tree_argparse) - args = parser.parse_args() + args = parser.parse_args(argv) args.func(args) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/ServerReference/files/Welcome to city 17.mp3 b/ServerReference/files/Welcome to city 17.mp3 new file mode 100644 index 0000000..ac0c2b5 Binary files /dev/null and b/ServerReference/files/Welcome to city 17.mp3 differ diff --git a/ServerReference/files/heyo.txt b/ServerReference/files/heyo.txt index 05a682b..2dea854 100644 --- a/ServerReference/files/heyo.txt +++ b/ServerReference/files/heyo.txt @@ -1 +1,3 @@ -Hello! \ No newline at end of file +Hello! +one +two \ No newline at end of file diff --git a/ServerReference/simpleserver.py b/ServerReference/simpleserver.py index 48827d5..5ca314b 100644 --- a/ServerReference/simpleserver.py +++ b/ServerReference/simpleserver.py @@ -2,20 +2,28 @@ import http.server import mimetypes import os import urllib.parse +import pathlib import random +import socketserver import sys +import types sys.path.append('C:\\git\\else\\Bytestring'); import bytestring sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter +sys.path.append('C:\\git\\else\\SpinalTap'); import spinal -f = open('favicon.png', 'rb') -FAVI = f.read() -f.close() +FILE_READ_CHUNK = bytestring.MIBIBYTE + +#f = open('favicon.png', 'rb') +#FAVI = f.read() +#f.close() CWD = os.getcwd() -# The paths which the user may access -# Attempting to access anything outside will 403 -OKAY_PATHS = set(x.lower() for x in ['/files', '/favicon.ico']) +# The paths which the user may access. +# Attempting to access anything outside will 403. +# These are convered to Path objects after that class definition. +OKAY_PATHS = set(['files', 'favicon.ico']) + OPENDIR_TEMPLATE = ''' @@ -29,27 +37,31 @@ OPENDIR_TEMPLATE = ''' ''' - -class Multipart: - def __init__(stream, boundary): - self.parts = [] - class Path: + ''' + I started to use pathlib.Path, but it was too much of a pain. + ''' def __init__(self, path): - path = path.replace('\\', '/') - if len(path) == 0 or path[0] != '/': - path = '/' + path - self.path = path + path = urllib.parse.unquote(path) + path = path.strip('/') + path = os.path.normpath(path) + path = spinal.get_path_casing(path).path + self.absolute_path = path - def __repr__(self): - return 'Path(%s)' % self.path + def __contains__(self, other): + return other.absolute_path.startswith(self.absolute_path) - def __str__(self): - return self.path + def __hash__(self): + return hash(self.absolute_path) + + @property + def allowed(self): + return any(self in okay for okay in OKAY_PATHS) def anchor(self, display_name=None): if display_name is None: display_name = self.basename + if self.is_dir: # Folder emoji icon = '\U0001F4C1' @@ -57,9 +69,9 @@ class Path: # Diamond emoji, because there's not one for files. icon = '\U0001F48E' - quoted_path = urllib.parse.quote(self.path) + #print('anchor', path) a = '{icon} {display}'.format( - full=quoted_path, + full=self.url_path, icon=icon, display=display_name, ) @@ -67,42 +79,45 @@ class Path: @property def basename(self): - return os.path.basename(self.path) + return os.path.basename(self.absolute_path) @property def is_dir(self): - return os.path.isdir(self.os_path) + return os.path.isdir(self.absolute_path) @property def is_file(self): - return os.path.isfile(self.os_path) - - @property - def os_path(self): - abspath = os.path.join(CWD, self.relative_path) - #print(abspath) - return abspath + return os.path.isfile(self.absolute_path) @property def parent(self): - parts = self.path.split('/')[:-1] - parts = '/'.join(parts) - return Path(parts) + parent = os.path.dirname(self.absolute_path) + parent = Path(parent) + return parent @property def relative_path(self): - return self.path.lstrip('/') + relative = self.absolute_path + relative = relative.replace(CWD, '') + relative = relative.lstrip(os.sep) + return relative @property def size(self): - if self.is_dir: - return -1 - return os.path.getsize(self.os_path) + if self.is_file: + return os.path.getsize(self.absolute_path) + else: + return None def table_row(self, display_name=None, shaded=False): - form = '{anchor}{size}' + form = '{anchor}{size}' + size = self.size + if size is None: + size = '' + else: + size = bytestring.bytestring(size) + bg = 'ddd' if shaded else 'fff'; - size = bytestring.bytestring(self.size) if self.size != -1 else '' row = form.format( bg=bg, anchor=self.anchor(display_name=display_name), @@ -110,134 +125,166 @@ class Path: ) return row + @property + def url_path(self): + url = self.relative_path + url = url.replace(os.sep, '/') + url = '/' + url + url = urllib.parse.quote(url) + return url + +OKAY_PATHS = set(Path(p) for p in OKAY_PATHS) class RequestHandler(http.server.BaseHTTPRequestHandler): - def write(self, string): - if isinstance(string, str): - string = string.encode('utf-8') - self.wfile.write(string) + def write(self, data): + if isinstance(data, str): + data = data.encode('utf-8') + if isinstance(data, types.GeneratorType): + for chunk in data: + self.wfile.write(chunk) + else: + self.wfile.write(data) - def read_filebytes(self, path): + def read_filebytes(self, path, range_min=None, range_max=None): #print(path) - if os.path.isfile(path.relative_path): - f = open(path.relative_path, 'rb') - fr = f.read() - f.close() - return fr - if os.path.isdir(path.relative_path): + if path.is_file: + if range_min is None: + range_min = 0 + + if range_max is None: + range_max = path.size + + range_span = range_max - range_min + + #print('read span', range_min, range_max, range_span) + f = open(path.absolute_path, 'rb') + f.seek(range_min) + sent_amount = 0 + while sent_amount < range_span: + chunk = f.read(FILE_READ_CHUNK) + if len(chunk) == 0: + break + + yield chunk + sent_amount += len(chunk) + + #print('I read', len(fr)) + f.close() + + elif path.is_dir: text = generate_opendir(path) text = text.encode('utf-8') - return text + yield text - self.send_error(404) - return bytes() + else: + self.send_error(404) + yield bytes() def do_GET(self): #print(dir(self)) - path = normalize_path(self.path) + path = Path(self.path) if self.send_path_validation_error(path): return - path = Path(path) + range_min = None + range_max = None - self.send_response(200) - mime = mimetypes.guess_type(path.path)[0] - if mime is not None: - #print(mime) - self.send_header('Content-type', mime) + status_code = 200 + headers = {} if path.is_file: - self.send_header('Content-length', path.size) + file_size = path.size + if 'range' in self.headers: + desired_range = self.headers['range'] + desired_range = desired_range.lower() + desired_range = desired_range.split('bytes=')[-1] - d = self.read_filebytes(path) + helper = lambda x: int(x) if x and x.isdigit() else None + if '-' in desired_range: + (desired_min, desired_max) = desired_range.split('-') + #print('desire', desired_min, desired_max) + range_min = helper(desired_min) + range_max = helper(desired_max) + else: + range_min = helper(desired_range) + + if range_min is None: + range_min = 0 + if range_max is None: + range_max = file_size + + # because ranges are 0 indexed + range_max = min(range_max, file_size - 1) + range_min = max(range_min, 0) + + status_code = 206 + range_header = 'bytes {min}-{max}/{outof}'.format( + min=range_min, + max=range_max, + outof=file_size, + ) + headers['Content-Range'] = range_header + headers['Accept-Ranges'] = 'bytes' + content_length = (range_max - range_min) + 1 + + else: + content_length = file_size + + headers['Content-length'] = content_length + + mime = mimetypes.guess_type(path.absolute_path)[0] + if mime is not None: + #print(mime) + headers['Content-type'] = mime + + self.send_response(status_code) + for (key, value) in headers.items(): + self.send_header(key, value) + + d = self.read_filebytes(path, range_min=range_min, range_max=range_max) #print('write') self.end_headers() self.write(d) def do_HEAD(self): - path = normalize_path(self.path) + path = Path(self.path) if self.send_path_validation_error(path): return - path = Path(path) - self.send_response(200) + status_code = 200 if path.is_dir: mime = 'text/html' else: - mime = mimetypes.guess_type(path.path)[0] + mime = mimetypes.guess_type(path.absolute_path)[0] + self.send_header('Content-length', path.size) if mime is not None: self.send_header('Content-type', mime) - if path.is_file: - self.send_header('Content-length', path.size) - + self.send_response(status_code) self.end_headers() - def path_validation(self, path): - path = path.lstrip('/') - absolute_path = os.path.join(CWD, path) - absolute_path = os.path.abspath(absolute_path) - path = absolute_path.replace(CWD, '') - path = path.lstrip('/') - path = path.replace('\\', '/') - #if '..' in path: - # return (403, 'I\'m not going to play games with you.') - #print(path) - print(path) - if not any(path.startswith(okay) for okay in OKAY_PATHS): - self.send_error(403, 'Stop that!') - return - def send_path_validation_error(self, path): - error = self.path_validation(path) - if error: - self.send_error(*error) + if not path.allowed: + self.send_error(403, 'Stop that!') return True return False - # def do_POST(self): - # path = self.path.lower() - # path = urllib.parse.unquote(path).rstrip('/') - # error = path_validation(path) - # if error: - # self.send_error(*error) - # return +class ThreadedServer(socketserver.ThreadingMixIn, http.server.HTTPServer): + ''' + Thanks root and twasbrillig http://stackoverflow.com/a/14089457 + ''' + pass - # path = Path(path) - # content_type = self.headers.get('Content-Type', '') - # if not any (req in content_type for req in ['multipart/form-data', 'boundary=']): - # self.send_error(400, 'Bad request') - # return - - # boundary = content_type.split('boundary=')[1] - # boundary = boundary.split(';')[0] - # boundary = boundary.strip() - # print('B:', self.headers.get_boundary()) - # print('F:', self.headers.get_filename()) - - # incoming_size = int(self.headers.get('Content-Length', 0)) - # received_bytes = 0 - # remaining_bytes = incoming_size - # while remaining_bytes > 0: - # chunk_size = min(remaining_bytes, 16*1024) - # chunk = self.rfile.read(chunk_size) - # remaining_bytes -= chunk_size - # received_bytes += chunk_size - # print(chunk) - # self.send_response(200) - # self.send_header('Content-Type', 'text/html') - # self.end_headers() - # print(dir(self.request)) - # self.write('Thanks') def generate_opendir(path): #print('Listdir:', path) - items = os.listdir(path.relative_path) - items = [os.path.join(path.relative_path, f) for f in items] + items = os.listdir(path.absolute_path) + items = [os.path.join(path.absolute_path, f) for f in items] + #print(items) # This places directories above files, each ordered alphabetically items.sort(key=str.lower) @@ -252,10 +299,14 @@ def generate_opendir(path): items = directories + files items = [Path(f) for f in items] entries = [] - if not any(okay == path.path for okay in OKAY_PATHS): - # If the user is on one of the OKAY_PATHS, then he can't step up - # because that would be outside the OKAY area. - entries.append(path.parent.table_row(display_name='up')) + + if any(path.absolute_path == okay.absolute_path for okay in OKAY_PATHS): + # This is different than a permission check, we're seeing if they're + # actually at the top, in which case they don't need an up button. + pass + else: + entry = path.parent.table_row(display_name='up') + entries.append(entry) shaded = True for item in items: @@ -269,17 +320,15 @@ def generate_opendir(path): def generate_random_filename(original_filename='', length=8): import random - bits = length * 4 + bits = length * 44 bits = random.getrandbits(bits) identifier = '{:x}'.format(bits).rjust(length, '0') return identifier -def normalize_path(path): - #path = path.lower() - path = urllib.parse.unquote(path).rstrip('/') - return path +def main(): + server = ThreadedServer(('', 32768), RequestHandler) + print('server starting') + server.serve_forever() - -server = http.server.HTTPServer(('', 32768), RequestHandler) -print('server starting') -server.serve_forever() +if __name__ == '__main__': + main() diff --git a/SpinalTap/spinal.py b/SpinalTap/spinal.py index 33a5e95..8dad800 100644 --- a/SpinalTap/spinal.py +++ b/SpinalTap/spinal.py @@ -542,8 +542,9 @@ def get_path_casing(path): ''' piece = glob.escape(piece) for character in piece: - if character not in '!': + if character not in '![]': replacement = '[%s]' % character + #print(piece, character, replacement) piece = piece.replace(character, replacement, 1) break return piece @@ -551,7 +552,7 @@ def get_path_casing(path): pattern = [patternize(piece) for piece in subpath.split(os.sep)] pattern = os.sep.join(pattern) pattern = drive.upper() + os.sep + pattern - print(pattern) + #print(pattern) try: return str_to_fp(glob.glob(pattern)[0]) except IndexError: