This commit is contained in:
unknown 2016-07-19 20:31:47 -07:00
parent ef15e1a644
commit 8907923138
10 changed files with 587 additions and 374 deletions

BIN
.GitImages/desert_goats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 337 KiB

View file

@ -0,0 +1,13 @@
Blurred Letterbox
=================
I'm not entirely sure why you'd want to do this.
<p align="center">
<img src="https://github.com/voussoir/else/blob/master/.GitImages/desert_goats.png?raw=true" alt="sample"/>
</p>
<p align="center">
<img src="https://github.com/voussoir/else/blob/master/.GitImages/desert_goats_blur.png?raw=true" alt="sample"/>
</p>

View file

@ -0,0 +1,102 @@
import argparse
import os
import PIL.ImageFilter
import PIL.Image
import sys
def blur_letterbox(
image,
new_width=None,
new_height=None,
blurring=None,
):
(iw, ih) = image.size
new_width = new_width or iw
new_height = new_height or ih
if blurring is None:
blurring = (new_width * new_height) * 0.00001
print('Using bluriness', blurring)
background = image.resize(fit_over_bounds(iw, ih, new_width, new_height), PIL.Image.ANTIALIAS)
background = background.filter(PIL.ImageFilter.GaussianBlur(radius=blurring))
foreground = image.resize(fit_into_bounds(iw, ih, new_width, new_height), PIL.Image.ANTIALIAS)
background_offsets = offsets(background, new_width, new_height)
foreground_offsets = offsets(foreground, new_width, new_height)
final = PIL.Image.new(mode=image.mode, size=(new_width, new_height))
final.paste(background, (background_offsets))
final.paste(foreground, (foreground_offsets))
return final
def fit_into_bounds(iw, ih, fw, fh):
'''
Given the w+h of the image and the w+h of the frame,
return new w+h that fits the image into the frame
while maintaining the aspect ratio and leaving blank space
everywhere else
'''
ratio = min(fw/iw, fh/ih)
w = int(iw * ratio)
h = int(ih * ratio)
return (w, h)
def fit_over_bounds(iw, ih, fw, fh):
'''
Given the w+h of the image and the w+h of the frame,
return new w+h that covers the entire frame
while maintaining the aspect ratio
'''
ratio = max(fw/iw, fh/ih)
w = int(iw * ratio)
h = int(ih * ratio)
return (w, h)
def listget(li, index, fallback=None):
try:
return li[index]
except IndexError:
return fallback
def offsets(image, new_width, new_height):
'''
Calculate the horizontal and vertical offsets
needed to center the image in the given box
'''
horizontal = int((new_width - image.size[0]) / 2)
vertical = int((image.size[1] - new_height) / 2) * -1
return (horizontal, vertical)
def main(argv):
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('filename')
parser.add_argument('-w', '--width', dest='width', default=None)
parser.add_argument('-h', '--height', dest='height', default=None)
parser.add_argument('-b', '--blurring', dest='blurring', default=None)
args = parser.parse_args(argv)
if args.width is None and args.height is None:
print('Need a new width or height')
return
int_or_none = lambda x: int(x) if x else x
(base, extension) = os.path.splitext(args.filename)
new_name = base + '_blur' + extension
image = PIL.Image.open(args.filename)
image = blur_letterbox(
image,
int_or_none(args.width),
int_or_none(args.height),
int_or_none(args.blurring)
)
image.save(new_name)
if __name__ == '__main__':
main(sys.argv[1:])

View file

@ -1,6 +1,11 @@
Open Dir DL Open Dir DL
=========== ===========
- 2016 07 19
- Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
- Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions that take argparse namespaces as their only parameter. Does not affect the commandline usage.
- Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
- 2016 07 08 - 2016 07 08
- Fixed bug in which trees wouldn't generate on server:port urls. - Fixed bug in which trees wouldn't generate on server:port urls.

View file

@ -11,10 +11,10 @@ The basics:
> opendirdl remove_pattern "folder\.jpg" > opendirdl remove_pattern "folder\.jpg"
Note the percent-encoded string. Note the percent-encoded string.
3. Download the enabled files with 3. Download the enabled files with
> opendirdl download database.db > opendirdl download website.com.db
Specifics:
The specifics:
digest: digest:
Recursively fetch directories and build a database of file URLs. Recursively fetch directories and build a database of file URLs.
@ -61,7 +61,7 @@ remove_pattern:
> opendirdl remove_pattern website.com.db ".*" > opendirdl remove_pattern website.com.db ".*"
list_basenames: list_basenames:
List enabled URLs in order of their base filename. This makes it easier to List Enabled URLs in order of their base filename. This makes it easier to
find titles of interest in a directory that is very scattered or poorly find titles of interest in a directory that is very scattered or poorly
organized. organized.
@ -83,11 +83,11 @@ measure:
When included, perform HEAD requests on all files to update their size. When included, perform HEAD requests on all files to update their size.
-n | --new_only: -n | --new_only:
When included, perform HEAD requests only on files that haven't gotten one When included, perform HEAD requests only on files that haven't gotten
yet. one yet.
If a file's size is not known by the time this operation completes, you will If a file's size is not known by the time this operation completes, you
receive a printed note. will receive a printed note.
tree: tree:
Print the file / folder tree. Print the file / folder tree.
@ -100,8 +100,8 @@ tree:
filenames contain special characters that crash Python, or are so long filenames contain special characters that crash Python, or are so long
that the console becomes unreadable. that the console becomes unreadable.
If the filename ends with ".html", the webpage will use collapsible If the filename ends with ".html", the created page will have
boxes rather than plain text. collapsible boxes rather than a plaintext diagram.
''' '''
@ -134,6 +134,8 @@ TERMINAL_WIDTH = shutil.get_terminal_size().columns
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
UNKNOWN_SIZE_STRING = '???'
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings, # When doing a basic scan, we will not send HEAD requests to URLs that end in these strings,
# because they're probably files. # because they're probably files.
# This isn't meant to be a comprehensive filetype library, but it covers enough of the # This isn't meant to be a comprehensive filetype library, but it covers enough of the
@ -203,16 +205,29 @@ function collapse(id)
{ {
font-family: Consolas; font-family: Consolas;
} }
button button
{ {
display: block; display: block;
} }
div div
{ {
padding: 10px; padding: 10px;
padding-left: 15px; padding-left: 15px;
margin-bottom: 10px; margin-bottom: 10px;
border: 1px solid #000; border: 1px solid #000;
box-shadow: 1px 1px 2px 0px rgba(0,0,0,0.3);
}
.directory_even
{
background-color: #fff;
}
.directory_odd
{
background-color: #eee;
} }
</style> </style>
''' '''
@ -224,7 +239,7 @@ CREATE TABLE IF NOT EXISTS urls(
content_length INT, content_length INT,
content_type TEXT, content_type TEXT,
do_download INT do_download INT
); );
CREATE INDEX IF NOT EXISTS urlindex on urls(url); CREATE INDEX IF NOT EXISTS urlindex on urls(url);
CREATE INDEX IF NOT EXISTS baseindex on urls(basename); CREATE INDEX IF NOT EXISTS baseindex on urls(basename);
CREATE INDEX IF NOT EXISTS sizeindex on urls(content_length); CREATE INDEX IF NOT EXISTS sizeindex on urls(content_length);
@ -238,166 +253,10 @@ SQL_DO_DOWNLOAD = 4
UNMEASURED_WARNING = ''' UNMEASURED_WARNING = '''
Note: %d files do not have a stored Content-Length. Note: %d files do not have a stored Content-Length.
Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request those files. Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request
those files.
'''.strip() '''.strip()
## DOWNLOADER ######################################################################################
## ##
class Downloader:
def __init__(self, databasename, outputdir=None, headers=None):
self.databasename = databasename
self.sql = sqlite3.connect(databasename)
self.cur = self.sql.cursor()
if outputdir is None or outputdir == "":
# This assumes that all URLs in the database are from the same domain.
# If they aren't, it's the user's fault.
self.cur.execute('SELECT url FROM urls LIMIT 1')
url = self.cur.fetchone()[0]
outputdir = url_to_filepath(url)['root']
self.outputdir = outputdir
def download(self, overwrite=False, bytespersecond=None):
overwrite = bool(overwrite)
self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
while True:
fetch = self.cur.fetchone()
if fetch is None:
break
url = fetch[SQL_URL]
''' Creating the permanent and temporary filenames '''
url_filepath = url_to_filepath(url)
# Ignore this value of `root`, because we might have a custom outputdir.
root = url_filepath['root']
folder = os.path.join(root, url_filepath['folder'])
os.makedirs(folder, exist_ok=True)
fullname = os.path.join(folder, url_filepath['filename'])
temporary_basename = hashit(url, 16) + '.oddltemporary'
temporary_fullname = os.path.join(folder, temporary_basename)
''' Managing overwrite '''
if os.path.isfile(fullname):
if overwrite is True:
os.remove(fullname)
else:
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
continue
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
filehandle = open(temporary_fullname, 'wb')
try:
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
os.rename(temporary_fullname, fullname)
except:
filehandle.close()
raise
## ##
## DOWNLOADER ######################################################################################
## GENERIC #########################################################################################
## ##
class Generic:
def __init__(self, **kwargs):
for kwarg in kwargs:
setattr(self, kwarg, kwargs[kwarg])
class TreeExistingChild(Exception):
pass
class TreeInvalidIdentifier(Exception):
pass
class TreeNode:
def __init__(self, identifier, data, parent=None):
assert isinstance(identifier, str)
assert '\\' not in identifier
self.identifier = identifier
self.data = data
self.parent = parent
self.children = {}
def __getitem__(self, key):
return self.children[key]
def __repr__(self):
return 'TreeNode %s' % self.abspath()
def abspath(self):
node = self
nodes = [node]
while node.parent is not None:
node = node.parent
nodes.append(node)
nodes.reverse()
nodes = [node.identifier for node in nodes]
return '\\'.join(nodes)
def add_child(self, other_node, overwrite_parent=False):
self.check_child_availability(other_node.identifier)
if other_node.parent is not None and not overwrite_parent:
raise ValueError('That node already has a parent. Try `overwrite_parent=True`')
other_node.parent = self
self.children[other_node.identifier] = other_node
return other_node
def check_child_availability(self, identifier):
if ':' in identifier:
raise TreeInvalidIdentifier('Only roots may have a colon')
if identifier in self.children:
raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier))
def detach(self):
del self.parent.children[self.identifier]
self.parent = None
def listnodes(self, customsort=None):
items = list(self.children.items())
if customsort is None:
items.sort(key=lambda x: x[0].lower())
else:
items.sort(key=customsort)
return [item[1] for item in items]
def merge_other(self, othertree, otherroot=None):
newroot = None
if ':' in othertree.identifier:
if otherroot is None:
raise Exception('Must specify a new name for the other tree\'s root')
else:
newroot = otherroot
else:
newroot = othertree.identifier
othertree.identifier = newroot
othertree.parent = self
self.check_child_availability(newroot)
self.children[newroot] = othertree
def printtree(self, customsort=None):
for node in self.walk(customsort):
print(node.abspath())
def sorted_children(self, customsort=None):
if customsort:
keys = sorted(self.children.keys(), key=customsort)
else:
keys = sorted(self.children.keys())
for key in keys:
yield (key, self.children[key])
def walk(self, customsort=None):
yield self
for child in self.listnodes(customsort=customsort):
#print(child)
#print(child.listnodes())
yield from child.walk(customsort=customsort)
## ##
## GENERIC #########################################################################################
## WALKER ########################################################################################## ## WALKER ##########################################################################################
## ## ## ##
@ -544,6 +403,164 @@ class Walker:
## WALKER ########################################################################################## ## WALKER ##########################################################################################
## DOWNLOADER ######################################################################################
## ##
class Downloader:
def __init__(self, databasename, outputdir=None, headers=None):
self.databasename = databasename
self.sql = sqlite3.connect(databasename)
self.cur = self.sql.cursor()
if outputdir is None or outputdir == "":
# This assumes that all URLs in the database are from the same domain.
# If they aren't, it's the user's fault.
self.cur.execute('SELECT url FROM urls LIMIT 1')
url = self.cur.fetchone()[0]
outputdir = url_to_filepath(url)['root']
self.outputdir = outputdir
def download(self, overwrite=False, bytespersecond=None):
overwrite = bool(overwrite)
self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
while True:
fetch = self.cur.fetchone()
if fetch is None:
break
url = fetch[SQL_URL]
''' Creating the permanent and temporary filenames '''
url_filepath = url_to_filepath(url)
# Ignore this value of `root`, because we might have a custom outputdir.
root = url_filepath['root']
folder = os.path.join(root, url_filepath['folder'])
os.makedirs(folder, exist_ok=True)
fullname = os.path.join(folder, url_filepath['filename'])
temporary_basename = hashit(url, 16) + '.oddltemporary'
temporary_fullname = os.path.join(folder, temporary_basename)
''' Managing overwrite '''
if os.path.isfile(fullname):
if overwrite is True:
os.remove(fullname)
else:
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
continue
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
filehandle = open(temporary_fullname, 'wb')
try:
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
os.rename(temporary_fullname, fullname)
except:
filehandle.close()
raise
## ##
## DOWNLOADER ######################################################################################
## OTHER CLASSES ###################################################################################
## ##
class Generic:
def __init__(self, **kwargs):
for kwarg in kwargs:
setattr(self, kwarg, kwargs[kwarg])
class TreeExistingChild(Exception):
pass
class TreeInvalidIdentifier(Exception):
pass
class TreeNode:
def __init__(self, identifier, data, parent=None):
assert isinstance(identifier, str)
assert '\\' not in identifier
self.identifier = identifier
self.data = data
self.parent = parent
self.children = {}
def __getitem__(self, key):
return self.children[key]
def __repr__(self):
return 'TreeNode %s' % self.abspath()
def abspath(self):
node = self
nodes = [node]
while node.parent is not None:
node = node.parent
nodes.append(node)
nodes.reverse()
nodes = [node.identifier for node in nodes]
return '\\'.join(nodes)
def add_child(self, other_node, overwrite_parent=False):
self.check_child_availability(other_node.identifier)
if other_node.parent is not None and not overwrite_parent:
raise ValueError('That node already has a parent. Try `overwrite_parent=True`')
other_node.parent = self
self.children[other_node.identifier] = other_node
return other_node
def check_child_availability(self, identifier):
if ':' in identifier:
raise TreeInvalidIdentifier('Only roots may have a colon')
if identifier in self.children:
raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier))
def detach(self):
del self.parent.children[self.identifier]
self.parent = None
def listnodes(self, customsort=None):
items = list(self.children.items())
if customsort is None:
items.sort(key=lambda x: x[0].lower())
else:
items.sort(key=customsort)
return [item[1] for item in items]
def merge_other(self, othertree, otherroot=None):
newroot = None
if ':' in othertree.identifier:
if otherroot is None:
raise Exception('Must specify a new name for the other tree\'s root')
else:
newroot = otherroot
else:
newroot = othertree.identifier
othertree.identifier = newroot
othertree.parent = self
self.check_child_availability(newroot)
self.children[newroot] = othertree
def printtree(self, customsort=None):
for node in self.walk(customsort):
print(node.abspath())
def sorted_children(self, customsort=None):
if customsort:
keys = sorted(self.children.keys(), key=customsort)
else:
keys = sorted(self.children.keys())
for key in keys:
yield (key, self.children[key])
def walk(self, customsort=None):
yield self
for child in self.listnodes(customsort=customsort):
#print(child)
#print(child.listnodes())
yield from child.walk(customsort=customsort)
## ##
## OTHER CLASSES ###################################################################################
## GENERAL FUNCTIONS ############################################################################### ## GENERAL FUNCTIONS ###############################################################################
## ## ## ##
def db_init(sql, cur): def db_init(sql, cur):
@ -724,6 +741,12 @@ def url_to_filepath(text):
'filename': filename, 'filename': filename,
} }
return result return result
def write(line, file_handle=None):
if file_handle is None:
safeprint(line)
else:
file_handle.write(line + '\n')
## ## ## ##
## GENERAL FUNCTIONS ############################################################################### ## GENERAL FUNCTIONS ###############################################################################
@ -738,7 +761,7 @@ def digest(databasename, walkurl, fullscan=False):
databasename=databasename, databasename=databasename,
fullscan=fullscan, fullscan=fullscan,
walkurl=walkurl, walkurl=walkurl,
) )
walker.walk() walker.walk()
def digest_argparse(args): def digest_argparse(args):
@ -755,11 +778,11 @@ def download(databasename, outputdir=None, overwrite=False, bytespersecond=None)
downloader = Downloader( downloader = Downloader(
databasename=databasename, databasename=databasename,
outputdir=outputdir, outputdir=outputdir,
) )
downloader.download( downloader.download(
bytespersecond=bytespersecond, bytespersecond=bytespersecond,
overwrite=overwrite, overwrite=overwrite,
) )
def download_argparse(args): def download_argparse(args):
return download( return download(
@ -777,8 +800,8 @@ def filter_pattern(databasename, regex, action='keep', *trash):
When `action` is 'remove', then any URLs matching the regex will have their When `action` is 'remove', then any URLs matching the regex will have their
`do_download` flag set to False. `do_download` flag set to False.
Actions will not act on each other's behalf. A 'keep' will NEVER disable a url, Actions will not act on each other's behalf. Keep will NEVER disable a url,
and 'remove' will NEVER enable one. and remove will NEVER enable one.
''' '''
import re import re
if isinstance(regex, str): if isinstance(regex, str):
@ -810,55 +833,51 @@ def filter_pattern(databasename, regex, action='keep', *trash):
cur.execute('UPDATE urls SET do_download = 0 WHERE url == ?', [url]) cur.execute('UPDATE urls SET do_download = 0 WHERE url == ?', [url])
sql.commit() sql.commit()
def keep_pattern(args): def keep_pattern_argparse(args):
''' '''
See `filter_pattern`. See `filter_pattern`.
''' '''
filter_pattern( return filter_pattern(
action='keep', action='keep',
databasename=args.databasename, databasename=args.databasename,
regex=args.regex, regex=args.regex,
) )
def list_basenames(databasename, outputfile=None): def list_basenames(databasename, output_filename=None):
''' '''
Given a database, print the entries in order of the file basenames. Print the Enabled entries in order of the file basenames.
This makes it easier to find interesting titles without worrying about This makes it easier to find interesting titles without worrying about
what directory they're in. what directory they're in.
''' '''
sql = sqlite3.connect(databasename) sql = sqlite3.connect(databasename)
cur = sql.cursor() cur = sql.cursor()
cur.execute('SELECT basename FROM urls WHERE do_download == 1 ORDER BY LENGTH(basename) DESC LIMIT 1')
fetch = cur.fetchone() cur.execute('SELECT * FROM urls WHERE do_download == 1')
if fetch is None: items = cur.fetchall()
return items.sort(key=lambda x: x[SQL_BASENAME].lower())
longest = len(fetch[0])
cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY LOWER(basename)') form = '{basename:<%ds} : {url} : {size}' % longest
form = '{bn:<%ds} : {url} : {byt}' % longest if output_filename is not None:
if outputfile: output_file = open(output_filename, 'w', encoding='utf-8')
outputfile = open(outputfile, 'w', encoding='utf-8') for item in items:
while True: size = item[SQL_CONTENT_LENGTH]
fetch = cur.fetchone() if size is None:
if fetch is None: size = ''
break
byt = fetch[SQL_CONTENT_LENGTH]
if byt is None:
byt = ''
else: else:
byt = '{:,}'.format(byt) size = bytestring.bytestring(size)
line = form.format(bn=fetch[SQL_BASENAME], url=fetch[SQL_URL], byt=byt) line = form.format(
if outputfile: basename=item[SQL_BASENAME],
outputfile.write(line + '\n') url=item[SQL_URL],
else: size=size,
print(line) )
if outputfile: write(line)
outputfile.close() if output_file:
output_file.close()
def list_basenames_argparse(args): def list_basenames_argparse(args):
return list_basenames( return list_basenames(
databasename=args.databasename, databasename=args.databasename,
outputfile=args.outputfile, output_filename=args.outputfile,
) )
def measure(databasename, fullscan=False, new_only=False): def measure(databasename, fullscan=False, new_only=False):
@ -923,17 +942,25 @@ def measure_argparse(args):
new_only=args.new_only, new_only=args.new_only,
) )
def remove_pattern(args): def remove_pattern_argparse(args):
''' '''
See `filter_pattern`. See `filter_pattern`.
''' '''
filter_pattern( return filter_pattern(
action='remove', action='remove',
databasename=args.databasename, databasename=args.databasename,
regex=args.regex, regex=args.regex,
) )
def tree(databasename, output_filename=None): def tree(databasename, output_filename=None):
'''
Print a tree diagram of the directory-file structure.
If an .html file is given for `output_filename`, the page will have
collapsible boxes and clickable filenames. Otherwise the file will just
be a plain text drawing.
'''
sql = sqlite3.connect(databasename) sql = sqlite3.connect(databasename)
cur = sql.cursor() cur = sql.cursor()
cur.execute('SELECT * FROM urls WHERE do_download == 1') cur.execute('SELECT * FROM urls WHERE do_download == 1')
@ -945,13 +972,13 @@ def tree(databasename, output_filename=None):
path_parts = url_to_filepath(items[0][SQL_URL]) path_parts = url_to_filepath(items[0][SQL_URL])
root_identifier = path_parts['root'] root_identifier = path_parts['root']
print('Root', root_identifier) #print('Root', root_identifier)
root_data = {'name': root_identifier, 'item_type': 'directory'} root_data = {'name': root_identifier, 'item_type': 'directory'}
root_identifier = root_identifier.replace(':', '') root_identifier = root_identifier.replace(':', '')
tree = TreeNode( tree = TreeNode(
identifier=root_identifier, identifier=root_identifier,
data=root_data data=root_data
) )
node_map = {} node_map = {}
unmeasured_file_count = 0 unmeasured_file_count = 0
@ -985,7 +1012,7 @@ def tree(databasename, output_filename=None):
data['size'] = item[SQL_CONTENT_LENGTH] data['size'] = item[SQL_CONTENT_LENGTH]
else: else:
unmeasured_file_count += 1 unmeasured_file_count += 1
data['size'] = 0 data['size'] = None
else: else:
data['item_type'] = 'directory' data['item_type'] = 'directory'
@ -1018,12 +1045,6 @@ def tree(databasename, output_filename=None):
this_node.parent = parent_node this_node.parent = parent_node
#print(this_node.data) #print(this_node.data)
def write(line, outfile=None):
if outfile is None:
safeprint(line)
else:
outfile.write(line + '\n')
def recursive_get_size(node): def recursive_get_size(node):
size = node.data.get('size', 0) size = node.data.get('size', 0)
if size: if size:
@ -1031,27 +1052,40 @@ def tree(databasename, output_filename=None):
return size return size
for child in node.children.values(): for child in node.children.values():
size += recursive_get_size(child) child_size = recursive_get_size(child)
child_size = child_size or 0
size += child_size
node.data['size'] = size node.data['size'] = size
return size return size
def recursive_print_node(node, depth=0, outfile=None): def recursive_print_node(node, depth=0, output_file=None):
size = node.data['size']
if size is None:
size = UNKNOWN_SIZE_STRING
else:
size = bytestring.bytestring(size)
if use_html: if use_html:
if depth % 2 == 0:
css_class = 'directory_even'
else:
css_class = 'directory_odd'
if node.data['item_type'] == 'directory': if node.data['item_type'] == 'directory':
div_id = hashit(node.identifier, 16) div_id = hashit(node.identifier, 16)
line = '<button onclick="collapse(\'{div_id}\')">{name} ({size})</button>' line = '<button onclick="collapse(\'{div_id}\')">{name} ({size})</button>'
line += '<div id="{div_id}" style="display:none">' line += '<div class="%s" id="{div_id}" style="display:none">' % css_class
line = line.format( line = line.format(
div_id=div_id, div_id=div_id,
name=node.data['name'], name=node.data['name'],
size=bytestring.bytestring(node.data['size']), size=size,
) )
else: else:
line = '<a href="{url}">{name} ({size})</a><br>' line = '<a href="{url}">{name} ({size})</a><br>'
line = line.format( line = line.format(
url=node.data['url'], url=node.data['url'],
name=node.data['name'], name=node.data['name'],
size=bytestring.bytestring(node.data['size']), size=size,
) )
else: else:
line = '{space}{bar}{name} : ({size})' line = '{space}{bar}{name} : ({size})'
@ -1059,20 +1093,25 @@ def tree(databasename, output_filename=None):
space='| '*(depth-1), space='| '*(depth-1),
bar='|---' if depth > 0 else '', bar='|---' if depth > 0 else '',
name=node.data['name'], name=node.data['name'],
size=bytestring.bytestring(node.data['size']) size=size
) )
write(line, outfile) write(line, output_file)
# Sort by type (directories first) then subsort by lowercase path
customsort = lambda x: (
node.children[x].data['item_type'] == 'file',
node.children[x].data['url'].lower(),
)
customsort = lambda x: (node.children[x].data['item_type'] == 'file', node.children[x].data['url'].lower())
for (key, child) in node.sorted_children(customsort=customsort): for (key, child) in node.sorted_children(customsort=customsort):
recursive_print_node(child, depth+1, outfile=outfile) recursive_print_node(child, depth=depth+1, output_file=output_file)
if node.data['item_type'] == 'directory': if node.data['item_type'] == 'directory':
if use_html: if use_html:
write('</div>', outfile) write('</div>', output_file)
else: else:
# This helps put some space between sibling directories # This helps put some space between sibling directories
write('| ' * (depth), outfile) write('| ' * (depth), output_file)
if output_filename is not None: if output_filename is not None:
@ -1084,12 +1123,12 @@ def tree(databasename, output_filename=None):
if use_html: if use_html:
write(HTML_TREE_HEADER, outfile=output_file) write(HTML_TREE_HEADER, file_handle=output_file)
recursive_get_size(tree) recursive_get_size(tree)
recursive_print_node(tree, outfile=output_file) recursive_print_node(tree, output_file=output_file)
if unmeasured_file_count > 0: if unmeasured_file_count > 0:
write(UNMEASURED_WARNING % unmeasured_file_count, outfile=output_file) write(UNMEASURED_WARNING % unmeasured_file_count, file_handle=output_file)
if output_file is not None: if output_file is not None:
output_file.close() output_file.close()
@ -1104,11 +1143,10 @@ def tree_argparse(args):
## ## ## ##
## COMMANDLINE FUNCTIONS ########################################################################### ## COMMANDLINE FUNCTIONS ###########################################################################
def main(argv):
if __name__ == '__main__': if listget(argv, 1, '').lower() in ('help', '-h', '--help', ''):
if listget(sys.argv, 1, '').lower() in ('help', '-h', '--help'):
print(DOCSTRING) print(DOCSTRING)
quit() return
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers()
@ -1128,7 +1166,7 @@ if __name__ == '__main__':
p_keep_pattern = subparsers.add_parser('keep_pattern') p_keep_pattern = subparsers.add_parser('keep_pattern')
p_keep_pattern.add_argument('databasename') p_keep_pattern.add_argument('databasename')
p_keep_pattern.add_argument('regex') p_keep_pattern.add_argument('regex')
p_keep_pattern.set_defaults(func=keep_pattern) p_keep_pattern.set_defaults(func=keep_pattern_argparse)
p_list_basenames = subparsers.add_parser('list_basenames') p_list_basenames = subparsers.add_parser('list_basenames')
p_list_basenames.add_argument('databasename') p_list_basenames.add_argument('databasename')
@ -1144,12 +1182,15 @@ if __name__ == '__main__':
p_remove_pattern = subparsers.add_parser('remove_pattern') p_remove_pattern = subparsers.add_parser('remove_pattern')
p_remove_pattern.add_argument('databasename') p_remove_pattern.add_argument('databasename')
p_remove_pattern.add_argument('regex') p_remove_pattern.add_argument('regex')
p_remove_pattern.set_defaults(func=remove_pattern) p_remove_pattern.set_defaults(func=remove_pattern_argparse)
p_tree = subparsers.add_parser('tree') p_tree = subparsers.add_parser('tree')
p_tree.add_argument('databasename') p_tree.add_argument('databasename')
p_tree.add_argument('-o', '--outputfile', dest='outputfile', default=None) p_tree.add_argument('-o', '--outputfile', dest='outputfile', default=None)
p_tree.set_defaults(func=tree_argparse) p_tree.set_defaults(func=tree_argparse)
args = parser.parse_args() args = parser.parse_args(argv)
args.func(args) args.func(args)
if __name__ == '__main__':
main(sys.argv[1:])

Binary file not shown.

View file

@ -1 +1,3 @@
Hello! Hello!
one
two

View file

@ -2,20 +2,28 @@ import http.server
import mimetypes import mimetypes
import os import os
import urllib.parse import urllib.parse
import pathlib
import random import random
import socketserver
import sys import sys
import types
sys.path.append('C:\\git\\else\\Bytestring'); import bytestring sys.path.append('C:\\git\\else\\Bytestring'); import bytestring
sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter
sys.path.append('C:\\git\\else\\SpinalTap'); import spinal
f = open('favicon.png', 'rb') FILE_READ_CHUNK = bytestring.MIBIBYTE
FAVI = f.read()
f.close() #f = open('favicon.png', 'rb')
#FAVI = f.read()
#f.close()
CWD = os.getcwd() CWD = os.getcwd()
# The paths which the user may access # The paths which the user may access.
# Attempting to access anything outside will 403 # Attempting to access anything outside will 403.
OKAY_PATHS = set(x.lower() for x in ['/files', '/favicon.ico']) # These are convered to Path objects after that class definition.
OKAY_PATHS = set(['files', 'favicon.ico'])
OPENDIR_TEMPLATE = ''' OPENDIR_TEMPLATE = '''
<html> <html>
<body> <body>
@ -29,27 +37,31 @@ OPENDIR_TEMPLATE = '''
</html> </html>
''' '''
class Multipart:
def __init__(stream, boundary):
self.parts = []
class Path: class Path:
'''
I started to use pathlib.Path, but it was too much of a pain.
'''
def __init__(self, path): def __init__(self, path):
path = path.replace('\\', '/') path = urllib.parse.unquote(path)
if len(path) == 0 or path[0] != '/': path = path.strip('/')
path = '/' + path path = os.path.normpath(path)
self.path = path path = spinal.get_path_casing(path).path
self.absolute_path = path
def __repr__(self): def __contains__(self, other):
return 'Path(%s)' % self.path return other.absolute_path.startswith(self.absolute_path)
def __str__(self): def __hash__(self):
return self.path return hash(self.absolute_path)
@property
def allowed(self):
return any(self in okay for okay in OKAY_PATHS)
def anchor(self, display_name=None): def anchor(self, display_name=None):
if display_name is None: if display_name is None:
display_name = self.basename display_name = self.basename
if self.is_dir: if self.is_dir:
# Folder emoji # Folder emoji
icon = '\U0001F4C1' icon = '\U0001F4C1'
@ -57,9 +69,9 @@ class Path:
# Diamond emoji, because there's not one for files. # Diamond emoji, because there's not one for files.
icon = '\U0001F48E' icon = '\U0001F48E'
quoted_path = urllib.parse.quote(self.path) #print('anchor', path)
a = '<a href="{full}">{icon} {display}</a>'.format( a = '<a href="{full}">{icon} {display}</a>'.format(
full=quoted_path, full=self.url_path,
icon=icon, icon=icon,
display=display_name, display=display_name,
) )
@ -67,42 +79,45 @@ class Path:
@property @property
def basename(self): def basename(self):
return os.path.basename(self.path) return os.path.basename(self.absolute_path)
@property @property
def is_dir(self): def is_dir(self):
return os.path.isdir(self.os_path) return os.path.isdir(self.absolute_path)
@property @property
def is_file(self): def is_file(self):
return os.path.isfile(self.os_path) return os.path.isfile(self.absolute_path)
@property
def os_path(self):
abspath = os.path.join(CWD, self.relative_path)
#print(abspath)
return abspath
@property @property
def parent(self): def parent(self):
parts = self.path.split('/')[:-1] parent = os.path.dirname(self.absolute_path)
parts = '/'.join(parts) parent = Path(parent)
return Path(parts) return parent
@property @property
def relative_path(self): def relative_path(self):
return self.path.lstrip('/') relative = self.absolute_path
relative = relative.replace(CWD, '')
relative = relative.lstrip(os.sep)
return relative
@property @property
def size(self): def size(self):
if self.is_dir: if self.is_file:
return -1 return os.path.getsize(self.absolute_path)
return os.path.getsize(self.os_path) else:
return None
def table_row(self, display_name=None, shaded=False): def table_row(self, display_name=None, shaded=False):
form = '<tr style="background-color:#{bg}"><td>{anchor}</td><td>{size}</td></tr>' form = '<tr style="background-color:#{bg}"><td style="width:90%">{anchor}</td><td>{size}</td></tr>'
size = self.size
if size is None:
size = ''
else:
size = bytestring.bytestring(size)
bg = 'ddd' if shaded else 'fff'; bg = 'ddd' if shaded else 'fff';
size = bytestring.bytestring(self.size) if self.size != -1 else ''
row = form.format( row = form.format(
bg=bg, bg=bg,
anchor=self.anchor(display_name=display_name), anchor=self.anchor(display_name=display_name),
@ -110,134 +125,166 @@ class Path:
) )
return row return row
@property
def url_path(self):
url = self.relative_path
url = url.replace(os.sep, '/')
url = '/' + url
url = urllib.parse.quote(url)
return url
OKAY_PATHS = set(Path(p) for p in OKAY_PATHS)
class RequestHandler(http.server.BaseHTTPRequestHandler): class RequestHandler(http.server.BaseHTTPRequestHandler):
def write(self, string): def write(self, data):
if isinstance(string, str): if isinstance(data, str):
string = string.encode('utf-8') data = data.encode('utf-8')
self.wfile.write(string) if isinstance(data, types.GeneratorType):
for chunk in data:
self.wfile.write(chunk)
else:
self.wfile.write(data)
def read_filebytes(self, path): def read_filebytes(self, path, range_min=None, range_max=None):
#print(path) #print(path)
if os.path.isfile(path.relative_path):
f = open(path.relative_path, 'rb')
fr = f.read()
f.close()
return fr
if os.path.isdir(path.relative_path): if path.is_file:
if range_min is None:
range_min = 0
if range_max is None:
range_max = path.size
range_span = range_max - range_min
#print('read span', range_min, range_max, range_span)
f = open(path.absolute_path, 'rb')
f.seek(range_min)
sent_amount = 0
while sent_amount < range_span:
chunk = f.read(FILE_READ_CHUNK)
if len(chunk) == 0:
break
yield chunk
sent_amount += len(chunk)
#print('I read', len(fr))
f.close()
elif path.is_dir:
text = generate_opendir(path) text = generate_opendir(path)
text = text.encode('utf-8') text = text.encode('utf-8')
return text yield text
self.send_error(404) else:
return bytes() self.send_error(404)
yield bytes()
def do_GET(self): def do_GET(self):
#print(dir(self)) #print(dir(self))
path = normalize_path(self.path) path = Path(self.path)
if self.send_path_validation_error(path): if self.send_path_validation_error(path):
return return
path = Path(path) range_min = None
range_max = None
self.send_response(200) status_code = 200
mime = mimetypes.guess_type(path.path)[0] headers = {}
if mime is not None:
#print(mime)
self.send_header('Content-type', mime)
if path.is_file: if path.is_file:
self.send_header('Content-length', path.size) file_size = path.size
if 'range' in self.headers:
desired_range = self.headers['range']
desired_range = desired_range.lower()
desired_range = desired_range.split('bytes=')[-1]
d = self.read_filebytes(path) helper = lambda x: int(x) if x and x.isdigit() else None
if '-' in desired_range:
(desired_min, desired_max) = desired_range.split('-')
#print('desire', desired_min, desired_max)
range_min = helper(desired_min)
range_max = helper(desired_max)
else:
range_min = helper(desired_range)
if range_min is None:
range_min = 0
if range_max is None:
range_max = file_size
# because ranges are 0 indexed
range_max = min(range_max, file_size - 1)
range_min = max(range_min, 0)
status_code = 206
range_header = 'bytes {min}-{max}/{outof}'.format(
min=range_min,
max=range_max,
outof=file_size,
)
headers['Content-Range'] = range_header
headers['Accept-Ranges'] = 'bytes'
content_length = (range_max - range_min) + 1
else:
content_length = file_size
headers['Content-length'] = content_length
mime = mimetypes.guess_type(path.absolute_path)[0]
if mime is not None:
#print(mime)
headers['Content-type'] = mime
self.send_response(status_code)
for (key, value) in headers.items():
self.send_header(key, value)
d = self.read_filebytes(path, range_min=range_min, range_max=range_max)
#print('write') #print('write')
self.end_headers() self.end_headers()
self.write(d) self.write(d)
def do_HEAD(self): def do_HEAD(self):
path = normalize_path(self.path) path = Path(self.path)
if self.send_path_validation_error(path): if self.send_path_validation_error(path):
return return
path = Path(path) status_code = 200
self.send_response(200)
if path.is_dir: if path.is_dir:
mime = 'text/html' mime = 'text/html'
else: else:
mime = mimetypes.guess_type(path.path)[0] mime = mimetypes.guess_type(path.absolute_path)[0]
self.send_header('Content-length', path.size)
if mime is not None: if mime is not None:
self.send_header('Content-type', mime) self.send_header('Content-type', mime)
if path.is_file: self.send_response(status_code)
self.send_header('Content-length', path.size)
self.end_headers() self.end_headers()
def path_validation(self, path):
path = path.lstrip('/')
absolute_path = os.path.join(CWD, path)
absolute_path = os.path.abspath(absolute_path)
path = absolute_path.replace(CWD, '')
path = path.lstrip('/')
path = path.replace('\\', '/')
#if '..' in path:
# return (403, 'I\'m not going to play games with you.')
#print(path)
print(path)
if not any(path.startswith(okay) for okay in OKAY_PATHS):
self.send_error(403, 'Stop that!')
return
def send_path_validation_error(self, path): def send_path_validation_error(self, path):
error = self.path_validation(path) if not path.allowed:
if error: self.send_error(403, 'Stop that!')
self.send_error(*error)
return True return True
return False return False
# def do_POST(self):
# path = self.path.lower()
# path = urllib.parse.unquote(path).rstrip('/')
# error = path_validation(path) class ThreadedServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
# if error: '''
# self.send_error(*error) Thanks root and twasbrillig http://stackoverflow.com/a/14089457
# return '''
pass
# path = Path(path)
# content_type = self.headers.get('Content-Type', '')
# if not any (req in content_type for req in ['multipart/form-data', 'boundary=']):
# self.send_error(400, 'Bad request')
# return
# boundary = content_type.split('boundary=')[1]
# boundary = boundary.split(';')[0]
# boundary = boundary.strip()
# print('B:', self.headers.get_boundary())
# print('F:', self.headers.get_filename())
# incoming_size = int(self.headers.get('Content-Length', 0))
# received_bytes = 0
# remaining_bytes = incoming_size
# while remaining_bytes > 0:
# chunk_size = min(remaining_bytes, 16*1024)
# chunk = self.rfile.read(chunk_size)
# remaining_bytes -= chunk_size
# received_bytes += chunk_size
# print(chunk)
# self.send_response(200)
# self.send_header('Content-Type', 'text/html')
# self.end_headers()
# print(dir(self.request))
# self.write('Thanks')
def generate_opendir(path): def generate_opendir(path):
#print('Listdir:', path) #print('Listdir:', path)
items = os.listdir(path.relative_path) items = os.listdir(path.absolute_path)
items = [os.path.join(path.relative_path, f) for f in items] items = [os.path.join(path.absolute_path, f) for f in items]
#print(items)
# This places directories above files, each ordered alphabetically # This places directories above files, each ordered alphabetically
items.sort(key=str.lower) items.sort(key=str.lower)
@ -252,10 +299,14 @@ def generate_opendir(path):
items = directories + files items = directories + files
items = [Path(f) for f in items] items = [Path(f) for f in items]
entries = [] entries = []
if not any(okay == path.path for okay in OKAY_PATHS):
# If the user is on one of the OKAY_PATHS, then he can't step up if any(path.absolute_path == okay.absolute_path for okay in OKAY_PATHS):
# because that would be outside the OKAY area. # This is different than a permission check, we're seeing if they're
entries.append(path.parent.table_row(display_name='up')) # actually at the top, in which case they don't need an up button.
pass
else:
entry = path.parent.table_row(display_name='up')
entries.append(entry)
shaded = True shaded = True
for item in items: for item in items:
@ -269,17 +320,15 @@ def generate_opendir(path):
def generate_random_filename(original_filename='', length=8): def generate_random_filename(original_filename='', length=8):
import random import random
bits = length * 4 bits = length * 44
bits = random.getrandbits(bits) bits = random.getrandbits(bits)
identifier = '{:x}'.format(bits).rjust(length, '0') identifier = '{:x}'.format(bits).rjust(length, '0')
return identifier return identifier
def normalize_path(path): def main():
#path = path.lower() server = ThreadedServer(('', 32768), RequestHandler)
path = urllib.parse.unquote(path).rstrip('/') print('server starting')
return path server.serve_forever()
if __name__ == '__main__':
server = http.server.HTTPServer(('', 32768), RequestHandler) main()
print('server starting')
server.serve_forever()

View file

@ -542,8 +542,9 @@ def get_path_casing(path):
''' '''
piece = glob.escape(piece) piece = glob.escape(piece)
for character in piece: for character in piece:
if character not in '!': if character not in '![]':
replacement = '[%s]' % character replacement = '[%s]' % character
#print(piece, character, replacement)
piece = piece.replace(character, replacement, 1) piece = piece.replace(character, replacement, 1)
break break
return piece return piece
@ -551,7 +552,7 @@ def get_path_casing(path):
pattern = [patternize(piece) for piece in subpath.split(os.sep)] pattern = [patternize(piece) for piece in subpath.split(os.sep)]
pattern = os.sep.join(pattern) pattern = os.sep.join(pattern)
pattern = drive.upper() + os.sep + pattern pattern = drive.upper() + os.sep + pattern
print(pattern) #print(pattern)
try: try:
return str_to_fp(glob.glob(pattern)[0]) return str_to_fp(glob.glob(pattern)[0])
except IndexError: except IndexError: