This commit is contained in:
unknown 2016-07-19 20:31:47 -07:00
parent ef15e1a644
commit 8907923138
10 changed files with 587 additions and 374 deletions

BIN
.GitImages/desert_goats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 337 KiB

View file

@ -0,0 +1,13 @@
Blurred Letterbox
=================
I'm not entirely sure why you'd want to do this.
<p align="center">
<img src="https://github.com/voussoir/else/blob/master/.GitImages/desert_goats.png?raw=true" alt="sample"/>
</p>
<p align="center">
<img src="https://github.com/voussoir/else/blob/master/.GitImages/desert_goats_blur.png?raw=true" alt="sample"/>
</p>

View file

@ -0,0 +1,102 @@
import argparse
import os
import PIL.ImageFilter
import PIL.Image
import sys
def blur_letterbox(
image,
new_width=None,
new_height=None,
blurring=None,
):
(iw, ih) = image.size
new_width = new_width or iw
new_height = new_height or ih
if blurring is None:
blurring = (new_width * new_height) * 0.00001
print('Using bluriness', blurring)
background = image.resize(fit_over_bounds(iw, ih, new_width, new_height), PIL.Image.ANTIALIAS)
background = background.filter(PIL.ImageFilter.GaussianBlur(radius=blurring))
foreground = image.resize(fit_into_bounds(iw, ih, new_width, new_height), PIL.Image.ANTIALIAS)
background_offsets = offsets(background, new_width, new_height)
foreground_offsets = offsets(foreground, new_width, new_height)
final = PIL.Image.new(mode=image.mode, size=(new_width, new_height))
final.paste(background, (background_offsets))
final.paste(foreground, (foreground_offsets))
return final
def fit_into_bounds(iw, ih, fw, fh):
'''
Given the w+h of the image and the w+h of the frame,
return new w+h that fits the image into the frame
while maintaining the aspect ratio and leaving blank space
everywhere else
'''
ratio = min(fw/iw, fh/ih)
w = int(iw * ratio)
h = int(ih * ratio)
return (w, h)
def fit_over_bounds(iw, ih, fw, fh):
'''
Given the w+h of the image and the w+h of the frame,
return new w+h that covers the entire frame
while maintaining the aspect ratio
'''
ratio = max(fw/iw, fh/ih)
w = int(iw * ratio)
h = int(ih * ratio)
return (w, h)
def listget(li, index, fallback=None):
try:
return li[index]
except IndexError:
return fallback
def offsets(image, new_width, new_height):
'''
Calculate the horizontal and vertical offsets
needed to center the image in the given box
'''
horizontal = int((new_width - image.size[0]) / 2)
vertical = int((image.size[1] - new_height) / 2) * -1
return (horizontal, vertical)
def main(argv):
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('filename')
parser.add_argument('-w', '--width', dest='width', default=None)
parser.add_argument('-h', '--height', dest='height', default=None)
parser.add_argument('-b', '--blurring', dest='blurring', default=None)
args = parser.parse_args(argv)
if args.width is None and args.height is None:
print('Need a new width or height')
return
int_or_none = lambda x: int(x) if x else x
(base, extension) = os.path.splitext(args.filename)
new_name = base + '_blur' + extension
image = PIL.Image.open(args.filename)
image = blur_letterbox(
image,
int_or_none(args.width),
int_or_none(args.height),
int_or_none(args.blurring)
)
image.save(new_name)
if __name__ == '__main__':
main(sys.argv[1:])

View file

@ -1,6 +1,11 @@
Open Dir DL
===========
- 2016 07 19
- Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
- Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions that take argparse namespaces as their only parameter. Does not affect the commandline usage.
- Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
- 2016 07 08
- Fixed bug in which trees wouldn't generate on server:port urls.

View file

@ -11,10 +11,10 @@ The basics:
> opendirdl remove_pattern "folder\.jpg"
Note the percent-encoded string.
3. Download the enabled files with
> opendirdl download database.db
> opendirdl download website.com.db
Specifics:
The specifics:
digest:
Recursively fetch directories and build a database of file URLs.
@ -61,7 +61,7 @@ remove_pattern:
> opendirdl remove_pattern website.com.db ".*"
list_basenames:
List enabled URLs in order of their base filename. This makes it easier to
List Enabled URLs in order of their base filename. This makes it easier to
find titles of interest in a directory that is very scattered or poorly
organized.
@ -83,11 +83,11 @@ measure:
When included, perform HEAD requests on all files to update their size.
-n | --new_only:
When included, perform HEAD requests only on files that haven't gotten one
yet.
When included, perform HEAD requests only on files that haven't gotten
one yet.
If a file's size is not known by the time this operation completes, you will
receive a printed note.
If a file's size is not known by the time this operation completes, you
will receive a printed note.
tree:
Print the file / folder tree.
@ -100,8 +100,8 @@ tree:
filenames contain special characters that crash Python, or are so long
that the console becomes unreadable.
If the filename ends with ".html", the webpage will use collapsible
boxes rather than plain text.
If the filename ends with ".html", the created page will have
collapsible boxes rather than a plaintext diagram.
'''
@ -134,6 +134,8 @@ TERMINAL_WIDTH = shutil.get_terminal_size().columns
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
UNKNOWN_SIZE_STRING = '???'
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings,
# because they're probably files.
# This isn't meant to be a comprehensive filetype library, but it covers enough of the
@ -203,16 +205,29 @@ function collapse(id)
{
font-family: Consolas;
}
button
{
display: block;
}
div
{
padding: 10px;
padding-left: 15px;
margin-bottom: 10px;
border: 1px solid #000;
box-shadow: 1px 1px 2px 0px rgba(0,0,0,0.3);
}
.directory_even
{
background-color: #fff;
}
.directory_odd
{
background-color: #eee;
}
</style>
'''
@ -224,7 +239,7 @@ CREATE TABLE IF NOT EXISTS urls(
content_length INT,
content_type TEXT,
do_download INT
);
);
CREATE INDEX IF NOT EXISTS urlindex on urls(url);
CREATE INDEX IF NOT EXISTS baseindex on urls(basename);
CREATE INDEX IF NOT EXISTS sizeindex on urls(content_length);
@ -238,166 +253,10 @@ SQL_DO_DOWNLOAD = 4
UNMEASURED_WARNING = '''
Note: %d files do not have a stored Content-Length.
Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request those files.
Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request
those files.
'''.strip()
## DOWNLOADER ######################################################################################
## ##
class Downloader:
def __init__(self, databasename, outputdir=None, headers=None):
self.databasename = databasename
self.sql = sqlite3.connect(databasename)
self.cur = self.sql.cursor()
if outputdir is None or outputdir == "":
# This assumes that all URLs in the database are from the same domain.
# If they aren't, it's the user's fault.
self.cur.execute('SELECT url FROM urls LIMIT 1')
url = self.cur.fetchone()[0]
outputdir = url_to_filepath(url)['root']
self.outputdir = outputdir
def download(self, overwrite=False, bytespersecond=None):
overwrite = bool(overwrite)
self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
while True:
fetch = self.cur.fetchone()
if fetch is None:
break
url = fetch[SQL_URL]
''' Creating the permanent and temporary filenames '''
url_filepath = url_to_filepath(url)
# Ignore this value of `root`, because we might have a custom outputdir.
root = url_filepath['root']
folder = os.path.join(root, url_filepath['folder'])
os.makedirs(folder, exist_ok=True)
fullname = os.path.join(folder, url_filepath['filename'])
temporary_basename = hashit(url, 16) + '.oddltemporary'
temporary_fullname = os.path.join(folder, temporary_basename)
''' Managing overwrite '''
if os.path.isfile(fullname):
if overwrite is True:
os.remove(fullname)
else:
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
continue
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
filehandle = open(temporary_fullname, 'wb')
try:
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
os.rename(temporary_fullname, fullname)
except:
filehandle.close()
raise
## ##
## DOWNLOADER ######################################################################################
## GENERIC #########################################################################################
## ##
class Generic:
def __init__(self, **kwargs):
for kwarg in kwargs:
setattr(self, kwarg, kwargs[kwarg])
class TreeExistingChild(Exception):
pass
class TreeInvalidIdentifier(Exception):
pass
class TreeNode:
def __init__(self, identifier, data, parent=None):
assert isinstance(identifier, str)
assert '\\' not in identifier
self.identifier = identifier
self.data = data
self.parent = parent
self.children = {}
def __getitem__(self, key):
return self.children[key]
def __repr__(self):
return 'TreeNode %s' % self.abspath()
def abspath(self):
node = self
nodes = [node]
while node.parent is not None:
node = node.parent
nodes.append(node)
nodes.reverse()
nodes = [node.identifier for node in nodes]
return '\\'.join(nodes)
def add_child(self, other_node, overwrite_parent=False):
self.check_child_availability(other_node.identifier)
if other_node.parent is not None and not overwrite_parent:
raise ValueError('That node already has a parent. Try `overwrite_parent=True`')
other_node.parent = self
self.children[other_node.identifier] = other_node
return other_node
def check_child_availability(self, identifier):
if ':' in identifier:
raise TreeInvalidIdentifier('Only roots may have a colon')
if identifier in self.children:
raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier))
def detach(self):
del self.parent.children[self.identifier]
self.parent = None
def listnodes(self, customsort=None):
items = list(self.children.items())
if customsort is None:
items.sort(key=lambda x: x[0].lower())
else:
items.sort(key=customsort)
return [item[1] for item in items]
def merge_other(self, othertree, otherroot=None):
newroot = None
if ':' in othertree.identifier:
if otherroot is None:
raise Exception('Must specify a new name for the other tree\'s root')
else:
newroot = otherroot
else:
newroot = othertree.identifier
othertree.identifier = newroot
othertree.parent = self
self.check_child_availability(newroot)
self.children[newroot] = othertree
def printtree(self, customsort=None):
for node in self.walk(customsort):
print(node.abspath())
def sorted_children(self, customsort=None):
if customsort:
keys = sorted(self.children.keys(), key=customsort)
else:
keys = sorted(self.children.keys())
for key in keys:
yield (key, self.children[key])
def walk(self, customsort=None):
yield self
for child in self.listnodes(customsort=customsort):
#print(child)
#print(child.listnodes())
yield from child.walk(customsort=customsort)
## ##
## GENERIC #########################################################################################
## WALKER ##########################################################################################
## ##
@ -544,6 +403,164 @@ class Walker:
## WALKER ##########################################################################################
## DOWNLOADER ######################################################################################
## ##
class Downloader:
def __init__(self, databasename, outputdir=None, headers=None):
self.databasename = databasename
self.sql = sqlite3.connect(databasename)
self.cur = self.sql.cursor()
if outputdir is None or outputdir == "":
# This assumes that all URLs in the database are from the same domain.
# If they aren't, it's the user's fault.
self.cur.execute('SELECT url FROM urls LIMIT 1')
url = self.cur.fetchone()[0]
outputdir = url_to_filepath(url)['root']
self.outputdir = outputdir
def download(self, overwrite=False, bytespersecond=None):
overwrite = bool(overwrite)
self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
while True:
fetch = self.cur.fetchone()
if fetch is None:
break
url = fetch[SQL_URL]
''' Creating the permanent and temporary filenames '''
url_filepath = url_to_filepath(url)
# Ignore this value of `root`, because we might have a custom outputdir.
root = url_filepath['root']
folder = os.path.join(root, url_filepath['folder'])
os.makedirs(folder, exist_ok=True)
fullname = os.path.join(folder, url_filepath['filename'])
temporary_basename = hashit(url, 16) + '.oddltemporary'
temporary_fullname = os.path.join(folder, temporary_basename)
''' Managing overwrite '''
if os.path.isfile(fullname):
if overwrite is True:
os.remove(fullname)
else:
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
continue
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
filehandle = open(temporary_fullname, 'wb')
try:
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
os.rename(temporary_fullname, fullname)
except:
filehandle.close()
raise
## ##
## DOWNLOADER ######################################################################################
## OTHER CLASSES ###################################################################################
## ##
class Generic:
def __init__(self, **kwargs):
for kwarg in kwargs:
setattr(self, kwarg, kwargs[kwarg])
class TreeExistingChild(Exception):
pass
class TreeInvalidIdentifier(Exception):
pass
class TreeNode:
def __init__(self, identifier, data, parent=None):
assert isinstance(identifier, str)
assert '\\' not in identifier
self.identifier = identifier
self.data = data
self.parent = parent
self.children = {}
def __getitem__(self, key):
return self.children[key]
def __repr__(self):
return 'TreeNode %s' % self.abspath()
def abspath(self):
node = self
nodes = [node]
while node.parent is not None:
node = node.parent
nodes.append(node)
nodes.reverse()
nodes = [node.identifier for node in nodes]
return '\\'.join(nodes)
def add_child(self, other_node, overwrite_parent=False):
self.check_child_availability(other_node.identifier)
if other_node.parent is not None and not overwrite_parent:
raise ValueError('That node already has a parent. Try `overwrite_parent=True`')
other_node.parent = self
self.children[other_node.identifier] = other_node
return other_node
def check_child_availability(self, identifier):
if ':' in identifier:
raise TreeInvalidIdentifier('Only roots may have a colon')
if identifier in self.children:
raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier))
def detach(self):
del self.parent.children[self.identifier]
self.parent = None
def listnodes(self, customsort=None):
items = list(self.children.items())
if customsort is None:
items.sort(key=lambda x: x[0].lower())
else:
items.sort(key=customsort)
return [item[1] for item in items]
def merge_other(self, othertree, otherroot=None):
newroot = None
if ':' in othertree.identifier:
if otherroot is None:
raise Exception('Must specify a new name for the other tree\'s root')
else:
newroot = otherroot
else:
newroot = othertree.identifier
othertree.identifier = newroot
othertree.parent = self
self.check_child_availability(newroot)
self.children[newroot] = othertree
def printtree(self, customsort=None):
for node in self.walk(customsort):
print(node.abspath())
def sorted_children(self, customsort=None):
if customsort:
keys = sorted(self.children.keys(), key=customsort)
else:
keys = sorted(self.children.keys())
for key in keys:
yield (key, self.children[key])
def walk(self, customsort=None):
yield self
for child in self.listnodes(customsort=customsort):
#print(child)
#print(child.listnodes())
yield from child.walk(customsort=customsort)
## ##
## OTHER CLASSES ###################################################################################
## GENERAL FUNCTIONS ###############################################################################
## ##
def db_init(sql, cur):
@ -724,6 +741,12 @@ def url_to_filepath(text):
'filename': filename,
}
return result
def write(line, file_handle=None):
if file_handle is None:
safeprint(line)
else:
file_handle.write(line + '\n')
## ##
## GENERAL FUNCTIONS ###############################################################################
@ -738,7 +761,7 @@ def digest(databasename, walkurl, fullscan=False):
databasename=databasename,
fullscan=fullscan,
walkurl=walkurl,
)
)
walker.walk()
def digest_argparse(args):
@ -755,11 +778,11 @@ def download(databasename, outputdir=None, overwrite=False, bytespersecond=None)
downloader = Downloader(
databasename=databasename,
outputdir=outputdir,
)
)
downloader.download(
bytespersecond=bytespersecond,
overwrite=overwrite,
)
)
def download_argparse(args):
return download(
@ -777,8 +800,8 @@ def filter_pattern(databasename, regex, action='keep', *trash):
When `action` is 'remove', then any URLs matching the regex will have their
`do_download` flag set to False.
Actions will not act on each other's behalf. A 'keep' will NEVER disable a url,
and 'remove' will NEVER enable one.
Actions will not act on each other's behalf. Keep will NEVER disable a url,
and remove will NEVER enable one.
'''
import re
if isinstance(regex, str):
@ -810,55 +833,51 @@ def filter_pattern(databasename, regex, action='keep', *trash):
cur.execute('UPDATE urls SET do_download = 0 WHERE url == ?', [url])
sql.commit()
def keep_pattern(args):
def keep_pattern_argparse(args):
'''
See `filter_pattern`.
'''
filter_pattern(
return filter_pattern(
action='keep',
databasename=args.databasename,
regex=args.regex,
)
)
def list_basenames(databasename, outputfile=None):
def list_basenames(databasename, output_filename=None):
'''
Given a database, print the entries in order of the file basenames.
Print the Enabled entries in order of the file basenames.
This makes it easier to find interesting titles without worrying about
what directory they're in.
'''
sql = sqlite3.connect(databasename)
cur = sql.cursor()
cur.execute('SELECT basename FROM urls WHERE do_download == 1 ORDER BY LENGTH(basename) DESC LIMIT 1')
fetch = cur.fetchone()
if fetch is None:
return
longest = len(fetch[0])
cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY LOWER(basename)')
form = '{bn:<%ds} : {url} : {byt}' % longest
if outputfile:
outputfile = open(outputfile, 'w', encoding='utf-8')
while True:
fetch = cur.fetchone()
if fetch is None:
break
byt = fetch[SQL_CONTENT_LENGTH]
if byt is None:
byt = ''
cur.execute('SELECT * FROM urls WHERE do_download == 1')
items = cur.fetchall()
items.sort(key=lambda x: x[SQL_BASENAME].lower())
form = '{basename:<%ds} : {url} : {size}' % longest
if output_filename is not None:
output_file = open(output_filename, 'w', encoding='utf-8')
for item in items:
size = item[SQL_CONTENT_LENGTH]
if size is None:
size = ''
else:
byt = '{:,}'.format(byt)
line = form.format(bn=fetch[SQL_BASENAME], url=fetch[SQL_URL], byt=byt)
if outputfile:
outputfile.write(line + '\n')
else:
print(line)
if outputfile:
outputfile.close()
size = bytestring.bytestring(size)
line = form.format(
basename=item[SQL_BASENAME],
url=item[SQL_URL],
size=size,
)
write(line)
if output_file:
output_file.close()
def list_basenames_argparse(args):
return list_basenames(
databasename=args.databasename,
outputfile=args.outputfile,
output_filename=args.outputfile,
)
def measure(databasename, fullscan=False, new_only=False):
@ -923,17 +942,25 @@ def measure_argparse(args):
new_only=args.new_only,
)
def remove_pattern(args):
def remove_pattern_argparse(args):
'''
See `filter_pattern`.
'''
filter_pattern(
return filter_pattern(
action='remove',
databasename=args.databasename,
regex=args.regex,
)
)
def tree(databasename, output_filename=None):
'''
Print a tree diagram of the directory-file structure.
If an .html file is given for `output_filename`, the page will have
collapsible boxes and clickable filenames. Otherwise the file will just
be a plain text drawing.
'''
sql = sqlite3.connect(databasename)
cur = sql.cursor()
cur.execute('SELECT * FROM urls WHERE do_download == 1')
@ -945,13 +972,13 @@ def tree(databasename, output_filename=None):
path_parts = url_to_filepath(items[0][SQL_URL])
root_identifier = path_parts['root']
print('Root', root_identifier)
#print('Root', root_identifier)
root_data = {'name': root_identifier, 'item_type': 'directory'}
root_identifier = root_identifier.replace(':', '')
tree = TreeNode(
identifier=root_identifier,
data=root_data
)
)
node_map = {}
unmeasured_file_count = 0
@ -985,7 +1012,7 @@ def tree(databasename, output_filename=None):
data['size'] = item[SQL_CONTENT_LENGTH]
else:
unmeasured_file_count += 1
data['size'] = 0
data['size'] = None
else:
data['item_type'] = 'directory'
@ -1018,12 +1045,6 @@ def tree(databasename, output_filename=None):
this_node.parent = parent_node
#print(this_node.data)
def write(line, outfile=None):
if outfile is None:
safeprint(line)
else:
outfile.write(line + '\n')
def recursive_get_size(node):
size = node.data.get('size', 0)
if size:
@ -1031,27 +1052,40 @@ def tree(databasename, output_filename=None):
return size
for child in node.children.values():
size += recursive_get_size(child)
child_size = recursive_get_size(child)
child_size = child_size or 0
size += child_size
node.data['size'] = size
return size
def recursive_print_node(node, depth=0, outfile=None):
def recursive_print_node(node, depth=0, output_file=None):
size = node.data['size']
if size is None:
size = UNKNOWN_SIZE_STRING
else:
size = bytestring.bytestring(size)
if use_html:
if depth % 2 == 0:
css_class = 'directory_even'
else:
css_class = 'directory_odd'
if node.data['item_type'] == 'directory':
div_id = hashit(node.identifier, 16)
line = '<button onclick="collapse(\'{div_id}\')">{name} ({size})</button>'
line += '<div id="{div_id}" style="display:none">'
line += '<div class="%s" id="{div_id}" style="display:none">' % css_class
line = line.format(
div_id=div_id,
name=node.data['name'],
size=bytestring.bytestring(node.data['size']),
size=size,
)
else:
line = '<a href="{url}">{name} ({size})</a><br>'
line = line.format(
url=node.data['url'],
name=node.data['name'],
size=bytestring.bytestring(node.data['size']),
size=size,
)
else:
line = '{space}{bar}{name} : ({size})'
@ -1059,20 +1093,25 @@ def tree(databasename, output_filename=None):
space='| '*(depth-1),
bar='|---' if depth > 0 else '',
name=node.data['name'],
size=bytestring.bytestring(node.data['size'])
size=size
)
write(line, outfile)
write(line, output_file)
# Sort by type (directories first) then subsort by lowercase path
customsort = lambda x: (
node.children[x].data['item_type'] == 'file',
node.children[x].data['url'].lower(),
)
customsort = lambda x: (node.children[x].data['item_type'] == 'file', node.children[x].data['url'].lower())
for (key, child) in node.sorted_children(customsort=customsort):
recursive_print_node(child, depth+1, outfile=outfile)
recursive_print_node(child, depth=depth+1, output_file=output_file)
if node.data['item_type'] == 'directory':
if use_html:
write('</div>', outfile)
write('</div>', output_file)
else:
# This helps put some space between sibling directories
write('| ' * (depth), outfile)
write('| ' * (depth), output_file)
if output_filename is not None:
@ -1084,12 +1123,12 @@ def tree(databasename, output_filename=None):
if use_html:
write(HTML_TREE_HEADER, outfile=output_file)
write(HTML_TREE_HEADER, file_handle=output_file)
recursive_get_size(tree)
recursive_print_node(tree, outfile=output_file)
recursive_print_node(tree, output_file=output_file)
if unmeasured_file_count > 0:
write(UNMEASURED_WARNING % unmeasured_file_count, outfile=output_file)
write(UNMEASURED_WARNING % unmeasured_file_count, file_handle=output_file)
if output_file is not None:
output_file.close()
@ -1104,11 +1143,10 @@ def tree_argparse(args):
## ##
## COMMANDLINE FUNCTIONS ###########################################################################
if __name__ == '__main__':
if listget(sys.argv, 1, '').lower() in ('help', '-h', '--help'):
def main(argv):
if listget(argv, 1, '').lower() in ('help', '-h', '--help', ''):
print(DOCSTRING)
quit()
return
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
@ -1128,7 +1166,7 @@ if __name__ == '__main__':
p_keep_pattern = subparsers.add_parser('keep_pattern')
p_keep_pattern.add_argument('databasename')
p_keep_pattern.add_argument('regex')
p_keep_pattern.set_defaults(func=keep_pattern)
p_keep_pattern.set_defaults(func=keep_pattern_argparse)
p_list_basenames = subparsers.add_parser('list_basenames')
p_list_basenames.add_argument('databasename')
@ -1144,12 +1182,15 @@ if __name__ == '__main__':
p_remove_pattern = subparsers.add_parser('remove_pattern')
p_remove_pattern.add_argument('databasename')
p_remove_pattern.add_argument('regex')
p_remove_pattern.set_defaults(func=remove_pattern)
p_remove_pattern.set_defaults(func=remove_pattern_argparse)
p_tree = subparsers.add_parser('tree')
p_tree.add_argument('databasename')
p_tree.add_argument('-o', '--outputfile', dest='outputfile', default=None)
p_tree.set_defaults(func=tree_argparse)
args = parser.parse_args()
args = parser.parse_args(argv)
args.func(args)
if __name__ == '__main__':
main(sys.argv[1:])

Binary file not shown.

View file

@ -1 +1,3 @@
Hello!
Hello!
one
two

View file

@ -2,20 +2,28 @@ import http.server
import mimetypes
import os
import urllib.parse
import pathlib
import random
import socketserver
import sys
import types
sys.path.append('C:\\git\\else\\Bytestring'); import bytestring
sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter
sys.path.append('C:\\git\\else\\SpinalTap'); import spinal
f = open('favicon.png', 'rb')
FAVI = f.read()
f.close()
FILE_READ_CHUNK = bytestring.MIBIBYTE
#f = open('favicon.png', 'rb')
#FAVI = f.read()
#f.close()
CWD = os.getcwd()
# The paths which the user may access
# Attempting to access anything outside will 403
OKAY_PATHS = set(x.lower() for x in ['/files', '/favicon.ico'])
# The paths which the user may access.
# Attempting to access anything outside will 403.
# These are convered to Path objects after that class definition.
OKAY_PATHS = set(['files', 'favicon.ico'])
OPENDIR_TEMPLATE = '''
<html>
<body>
@ -29,27 +37,31 @@ OPENDIR_TEMPLATE = '''
</html>
'''
class Multipart:
def __init__(stream, boundary):
self.parts = []
class Path:
'''
I started to use pathlib.Path, but it was too much of a pain.
'''
def __init__(self, path):
path = path.replace('\\', '/')
if len(path) == 0 or path[0] != '/':
path = '/' + path
self.path = path
path = urllib.parse.unquote(path)
path = path.strip('/')
path = os.path.normpath(path)
path = spinal.get_path_casing(path).path
self.absolute_path = path
def __repr__(self):
return 'Path(%s)' % self.path
def __contains__(self, other):
return other.absolute_path.startswith(self.absolute_path)
def __str__(self):
return self.path
def __hash__(self):
return hash(self.absolute_path)
@property
def allowed(self):
return any(self in okay for okay in OKAY_PATHS)
def anchor(self, display_name=None):
if display_name is None:
display_name = self.basename
if self.is_dir:
# Folder emoji
icon = '\U0001F4C1'
@ -57,9 +69,9 @@ class Path:
# Diamond emoji, because there's not one for files.
icon = '\U0001F48E'
quoted_path = urllib.parse.quote(self.path)
#print('anchor', path)
a = '<a href="{full}">{icon} {display}</a>'.format(
full=quoted_path,
full=self.url_path,
icon=icon,
display=display_name,
)
@ -67,42 +79,45 @@ class Path:
@property
def basename(self):
return os.path.basename(self.path)
return os.path.basename(self.absolute_path)
@property
def is_dir(self):
return os.path.isdir(self.os_path)
return os.path.isdir(self.absolute_path)
@property
def is_file(self):
return os.path.isfile(self.os_path)
@property
def os_path(self):
abspath = os.path.join(CWD, self.relative_path)
#print(abspath)
return abspath
return os.path.isfile(self.absolute_path)
@property
def parent(self):
parts = self.path.split('/')[:-1]
parts = '/'.join(parts)
return Path(parts)
parent = os.path.dirname(self.absolute_path)
parent = Path(parent)
return parent
@property
def relative_path(self):
return self.path.lstrip('/')
relative = self.absolute_path
relative = relative.replace(CWD, '')
relative = relative.lstrip(os.sep)
return relative
@property
def size(self):
if self.is_dir:
return -1
return os.path.getsize(self.os_path)
if self.is_file:
return os.path.getsize(self.absolute_path)
else:
return None
def table_row(self, display_name=None, shaded=False):
form = '<tr style="background-color:#{bg}"><td>{anchor}</td><td>{size}</td></tr>'
form = '<tr style="background-color:#{bg}"><td style="width:90%">{anchor}</td><td>{size}</td></tr>'
size = self.size
if size is None:
size = ''
else:
size = bytestring.bytestring(size)
bg = 'ddd' if shaded else 'fff';
size = bytestring.bytestring(self.size) if self.size != -1 else ''
row = form.format(
bg=bg,
anchor=self.anchor(display_name=display_name),
@ -110,134 +125,166 @@ class Path:
)
return row
@property
def url_path(self):
url = self.relative_path
url = url.replace(os.sep, '/')
url = '/' + url
url = urllib.parse.quote(url)
return url
OKAY_PATHS = set(Path(p) for p in OKAY_PATHS)
class RequestHandler(http.server.BaseHTTPRequestHandler):
def write(self, string):
if isinstance(string, str):
string = string.encode('utf-8')
self.wfile.write(string)
def write(self, data):
if isinstance(data, str):
data = data.encode('utf-8')
if isinstance(data, types.GeneratorType):
for chunk in data:
self.wfile.write(chunk)
else:
self.wfile.write(data)
def read_filebytes(self, path):
def read_filebytes(self, path, range_min=None, range_max=None):
#print(path)
if os.path.isfile(path.relative_path):
f = open(path.relative_path, 'rb')
fr = f.read()
f.close()
return fr
if os.path.isdir(path.relative_path):
if path.is_file:
if range_min is None:
range_min = 0
if range_max is None:
range_max = path.size
range_span = range_max - range_min
#print('read span', range_min, range_max, range_span)
f = open(path.absolute_path, 'rb')
f.seek(range_min)
sent_amount = 0
while sent_amount < range_span:
chunk = f.read(FILE_READ_CHUNK)
if len(chunk) == 0:
break
yield chunk
sent_amount += len(chunk)
#print('I read', len(fr))
f.close()
elif path.is_dir:
text = generate_opendir(path)
text = text.encode('utf-8')
return text
yield text
self.send_error(404)
return bytes()
else:
self.send_error(404)
yield bytes()
def do_GET(self):
#print(dir(self))
path = normalize_path(self.path)
path = Path(self.path)
if self.send_path_validation_error(path):
return
path = Path(path)
range_min = None
range_max = None
self.send_response(200)
mime = mimetypes.guess_type(path.path)[0]
if mime is not None:
#print(mime)
self.send_header('Content-type', mime)
status_code = 200
headers = {}
if path.is_file:
self.send_header('Content-length', path.size)
file_size = path.size
if 'range' in self.headers:
desired_range = self.headers['range']
desired_range = desired_range.lower()
desired_range = desired_range.split('bytes=')[-1]
d = self.read_filebytes(path)
helper = lambda x: int(x) if x and x.isdigit() else None
if '-' in desired_range:
(desired_min, desired_max) = desired_range.split('-')
#print('desire', desired_min, desired_max)
range_min = helper(desired_min)
range_max = helper(desired_max)
else:
range_min = helper(desired_range)
if range_min is None:
range_min = 0
if range_max is None:
range_max = file_size
# because ranges are 0 indexed
range_max = min(range_max, file_size - 1)
range_min = max(range_min, 0)
status_code = 206
range_header = 'bytes {min}-{max}/{outof}'.format(
min=range_min,
max=range_max,
outof=file_size,
)
headers['Content-Range'] = range_header
headers['Accept-Ranges'] = 'bytes'
content_length = (range_max - range_min) + 1
else:
content_length = file_size
headers['Content-length'] = content_length
mime = mimetypes.guess_type(path.absolute_path)[0]
if mime is not None:
#print(mime)
headers['Content-type'] = mime
self.send_response(status_code)
for (key, value) in headers.items():
self.send_header(key, value)
d = self.read_filebytes(path, range_min=range_min, range_max=range_max)
#print('write')
self.end_headers()
self.write(d)
def do_HEAD(self):
path = normalize_path(self.path)
path = Path(self.path)
if self.send_path_validation_error(path):
return
path = Path(path)
self.send_response(200)
status_code = 200
if path.is_dir:
mime = 'text/html'
else:
mime = mimetypes.guess_type(path.path)[0]
mime = mimetypes.guess_type(path.absolute_path)[0]
self.send_header('Content-length', path.size)
if mime is not None:
self.send_header('Content-type', mime)
if path.is_file:
self.send_header('Content-length', path.size)
self.send_response(status_code)
self.end_headers()
def path_validation(self, path):
path = path.lstrip('/')
absolute_path = os.path.join(CWD, path)
absolute_path = os.path.abspath(absolute_path)
path = absolute_path.replace(CWD, '')
path = path.lstrip('/')
path = path.replace('\\', '/')
#if '..' in path:
# return (403, 'I\'m not going to play games with you.')
#print(path)
print(path)
if not any(path.startswith(okay) for okay in OKAY_PATHS):
self.send_error(403, 'Stop that!')
return
def send_path_validation_error(self, path):
error = self.path_validation(path)
if error:
self.send_error(*error)
if not path.allowed:
self.send_error(403, 'Stop that!')
return True
return False
# def do_POST(self):
# path = self.path.lower()
# path = urllib.parse.unquote(path).rstrip('/')
# error = path_validation(path)
# if error:
# self.send_error(*error)
# return
class ThreadedServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
'''
Thanks root and twasbrillig http://stackoverflow.com/a/14089457
'''
pass
# path = Path(path)
# content_type = self.headers.get('Content-Type', '')
# if not any (req in content_type for req in ['multipart/form-data', 'boundary=']):
# self.send_error(400, 'Bad request')
# return
# boundary = content_type.split('boundary=')[1]
# boundary = boundary.split(';')[0]
# boundary = boundary.strip()
# print('B:', self.headers.get_boundary())
# print('F:', self.headers.get_filename())
# incoming_size = int(self.headers.get('Content-Length', 0))
# received_bytes = 0
# remaining_bytes = incoming_size
# while remaining_bytes > 0:
# chunk_size = min(remaining_bytes, 16*1024)
# chunk = self.rfile.read(chunk_size)
# remaining_bytes -= chunk_size
# received_bytes += chunk_size
# print(chunk)
# self.send_response(200)
# self.send_header('Content-Type', 'text/html')
# self.end_headers()
# print(dir(self.request))
# self.write('Thanks')
def generate_opendir(path):
#print('Listdir:', path)
items = os.listdir(path.relative_path)
items = [os.path.join(path.relative_path, f) for f in items]
items = os.listdir(path.absolute_path)
items = [os.path.join(path.absolute_path, f) for f in items]
#print(items)
# This places directories above files, each ordered alphabetically
items.sort(key=str.lower)
@ -252,10 +299,14 @@ def generate_opendir(path):
items = directories + files
items = [Path(f) for f in items]
entries = []
if not any(okay == path.path for okay in OKAY_PATHS):
# If the user is on one of the OKAY_PATHS, then he can't step up
# because that would be outside the OKAY area.
entries.append(path.parent.table_row(display_name='up'))
if any(path.absolute_path == okay.absolute_path for okay in OKAY_PATHS):
# This is different than a permission check, we're seeing if they're
# actually at the top, in which case they don't need an up button.
pass
else:
entry = path.parent.table_row(display_name='up')
entries.append(entry)
shaded = True
for item in items:
@ -269,17 +320,15 @@ def generate_opendir(path):
def generate_random_filename(original_filename='', length=8):
import random
bits = length * 4
bits = length * 44
bits = random.getrandbits(bits)
identifier = '{:x}'.format(bits).rjust(length, '0')
return identifier
def normalize_path(path):
#path = path.lower()
path = urllib.parse.unquote(path).rstrip('/')
return path
def main():
server = ThreadedServer(('', 32768), RequestHandler)
print('server starting')
server.serve_forever()
server = http.server.HTTPServer(('', 32768), RequestHandler)
print('server starting')
server.serve_forever()
if __name__ == '__main__':
main()

View file

@ -542,8 +542,9 @@ def get_path_casing(path):
'''
piece = glob.escape(piece)
for character in piece:
if character not in '!':
if character not in '![]':
replacement = '[%s]' % character
#print(piece, character, replacement)
piece = piece.replace(character, replacement, 1)
break
return piece
@ -551,7 +552,7 @@ def get_path_casing(path):
pattern = [patternize(piece) for piece in subpath.split(os.sep)]
pattern = os.sep.join(pattern)
pattern = drive.upper() + os.sep + pattern
print(pattern)
#print(pattern)
try:
return str_to_fp(glob.glob(pattern)[0])
except IndexError: