This commit is contained in:
Ethan Dalool 2016-08-12 17:26:12 -07:00
parent 53645b0123
commit c491e417f5
5 changed files with 240 additions and 93 deletions

114
Javascript/videotiles.js Normal file
View file

@ -0,0 +1,114 @@
javascript:
CSS = ""
+ "* {margin: 0; padding: 0}"
+ "html {height: 95%}"
+ "body {height: 100%; background-color: #000}"
+ "div { display:inline-block; }"
+ "video {"
+ " min-width: 100%;"
+ " min-height: 100%;"
+ " max-width: 100%;"
+ " max-height: 100%;"
+ " overflow: hidden;"
+ "}"
;
var VIDEO_TYPES = ["\\.mp4", "\\.m4v", "\\.webm", "\\.ogv"].join("|");
function apply_css()
{
console.log("applying CSS");
var css = document.createElement("style");
css.innerHTML = CSS;
document.head.appendChild(css);
}
function get_media_links()
{
var anchors = document.getElementsByTagName("a");
var media_links = [];
for (var index = 0; index < anchors.length; index += 1)
{
var anchor = anchors[index];
if (anchor.href.match(VIDEO_TYPES))
{
media_links.push(anchor.href);
}
}
return media_links;
}
function clear_page()
{
/* Remove EVERYTHING */
console.log("clearing page");
document.removeChild(document.documentElement);
var html = document.createElement("html");
document.appendChild(html);
var head = document.createElement("head");
html.appendChild(head);
var body = document.createElement("body");
html.appendChild(body);
document.documentElement = html;
return true;
}
function create_video_players(width, height)
{
var css_width = (100 / width).toString() + "%";
var css_height = (100 / height).toString() + "%";
console.log(css_width);
var players = [];
for (var index = 0; index < width * height; index += 1)
{
var player_holder = document.createElement("div");
var player = document.createElement("video");
player_holder.style.width = css_width;
player_holder.style.height = css_height;
player.holder = player_holder;
players.push(player);
player_holder.appendChild(player);
document.body.appendChild(player_holder);
}
return players;
}
function swap_source(player, source_list)
{
var index = Math.floor(Math.random() * source_list.length);
var href = source_list[index];
player.pause();
player.src = href;
player.load();
player.play();
}
function main()
{
var WIDTH = 3;
var HEIGHT = 3;
var MEDIAS = get_media_links();
clear_page();
apply_css();
var PLAYERS = create_video_players(WIDTH, HEIGHT);
function ended_callback()
{
swap_source(this, MEDIAS);
}
for (var index = 0; index < PLAYERS.length; index += 1)
{
var player = PLAYERS[index];
player.addEventListener("ended", ended_callback);
swap_source(player, MEDIAS);
}
}
main();

View file

@ -7,6 +7,11 @@ Requires `pip install beautifulsoup4`
See inside opendirdl.py for usage instructions. See inside opendirdl.py for usage instructions.
- 2016 08 10
- Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
- Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
- Added clickable links to each directory on HTML tree pages.
- 2016 08 02 - 2016 08 02
- Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`. - Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
- Rewrote `build_file_tree` with a way simpler algorithm. - Rewrote `build_file_tree` with a way simpler algorithm.

View file

@ -140,10 +140,10 @@ DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
UNKNOWN_SIZE_STRING = '???' UNKNOWN_SIZE_STRING = '???'
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings, # When doing a basic scan, we will not send HEAD requests to URLs that end in
# because they're probably files. # these strings, because they're probably files.
# This isn't meant to be a comprehensive filetype library, but it covers enough of the # This isn't meant to be a comprehensive filetype library, but it covers
# typical opendir to speed things up. # enough of the typical opendir to speed things up.
SKIPPABLE_FILETYPES = [ SKIPPABLE_FILETYPES = [
'.aac', '.aac',
'.avi', '.avi',
@ -192,7 +192,8 @@ BLACKLISTED_FILENAMES = [
] ]
# oh shit # oh shit
HTML_TREE_HEADER = ''' HTML_TREE_HEAD = '''
<head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<script type="text/javascript"> <script type="text/javascript">
@ -215,12 +216,7 @@ function collapse(div)
font-family: Consolas; font-family: Consolas;
} }
button .directory_even, .directory_odd
{
display: block;
}
div
{ {
padding: 10px; padding: 10px;
padding-left: 15px; padding-left: 15px;
@ -239,8 +235,18 @@ div
background-color: #eee; background-color: #eee;
} }
</style> </style>
</head>
''' '''
HTML_FORMAT_DIRECTORY = '''
<div class="buttonbox">
<button onclick="collapse(this.parentElement.nextElementSibling)">{name} ({size})</button>
{directory_anchor}
</div>
<div class="{css}" style="display:none">
'''.replace('\n', '')
HTML_FORMAT_FILE = '<a href="{url}">{name} ({size})</a><br>'
DB_INIT = ''' DB_INIT = '''
CREATE TABLE IF NOT EXISTS urls( CREATE TABLE IF NOT EXISTS urls(
url TEXT, url TEXT,
@ -259,7 +265,6 @@ SQL_CONTENT_LENGTH = 2
SQL_CONTENT_TYPE = 3 SQL_CONTENT_TYPE = 3
SQL_DO_DOWNLOAD = 4 SQL_DO_DOWNLOAD = 4
UNMEASURED_WARNING = ''' UNMEASURED_WARNING = '''
Note: %d files do not have a stored Content-Length. Note: %d files do not have a stored Content-Length.
Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request
@ -278,7 +283,7 @@ class Walker:
self.walkurl = walkurl self.walkurl = walkurl
if databasename in (None, ''): if databasename in (None, ''):
domain = url_to_filepath(self.walkurl)['root'] domain = url_split(self.walkurl)['root']
databasename = domain + '.db' databasename = domain + '.db'
databasename = databasename.replace(':', '#') databasename = databasename.replace(':', '#')
self.databasename = databasename self.databasename = databasename
@ -439,7 +444,7 @@ class TreeNode:
self.children = {} self.children = {}
def __eq__(self, other): def __eq__(self, other):
return isinstance(other, Treenode) and self.abspath() == other.abspath() return isinstance(other, TreeNode) and self.abspath() == other.abspath()
def __getitem__(self, key): def __getitem__(self, key):
return self.children[key] return self.children[key]
@ -470,8 +475,6 @@ class TreeNode:
return other_node return other_node
def check_child_availability(self, identifier): def check_child_availability(self, identifier):
if ':' in identifier:
raise TreeInvalidIdentifier('Only roots may have a colon')
if identifier in self.children: if identifier in self.children:
raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier)) raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier))
@ -526,7 +529,7 @@ def build_file_tree(databasename):
{ {
'url': item[SQL_URL], 'url': item[SQL_URL],
'size': item[SQL_CONTENT_LENGTH], 'size': item[SQL_CONTENT_LENGTH],
'path': path_form.format(**url_to_filepath(item[SQL_URL])).split('\\'), 'path_parts': path_form.format(**url_split(item[SQL_URL])).split('\\'),
} }
for item in all_items for item in all_items
] ]
@ -536,6 +539,7 @@ def build_file_tree(databasename):
'item_type': 'directory', 'item_type': 'directory',
'name': databasename, 'name': databasename,
} }
scheme = url_split(all_items[0]['url'])['scheme']
tree = TreeNode(databasename, data=root_data) tree = TreeNode(databasename, data=root_data)
tree.unsorted_children = all_items tree.unsorted_children = all_items
node_queue = set() node_queue = set()
@ -546,12 +550,12 @@ def build_file_tree(databasename):
# directories. Those nodes receive all subdirectories, and repeat. # directories. Those nodes receive all subdirectories, and repeat.
while len(node_queue) > 0: while len(node_queue) > 0:
node = node_queue.pop() node = node_queue.pop()
for to_sort in node.unsorted_children: for new_child_data in node.unsorted_children:
path = to_sort['path'] path_parts = new_child_data['path_parts']
# Create a new node for the directory, path[0] # Create a new node for the directory, path_parts[0]
# path[1:] is assigned to that node to be divided next. # path_parts[1:] is assigned to that node to be divided next.
child_identifier = path.pop(0) child_identifier = path_parts.pop(0)
child_identifier = child_identifier.replace(':', '#') #child_identifier = child_identifier.replace(':', '#')
child = node.children.get(child_identifier, None) child = node.children.get(child_identifier, None)
if not child: if not child:
@ -559,15 +563,21 @@ def build_file_tree(databasename):
child.unsorted_children = [] child.unsorted_children = []
node.add_child(child) node.add_child(child)
child.data['url'] = to_sort['url']
child.data['name'] = child_identifier child.data['name'] = child_identifier
if len(path) > 0: if len(path_parts) > 0:
child.data['item_type'] = 'directory' child.data['item_type'] = 'directory'
child.unsorted_children.append(to_sort) child.unsorted_children.append(new_child_data)
node_queue.add(child) node_queue.add(child)
else: else:
child.data['item_type'] = 'file' child.data['item_type'] = 'file'
child.data['size'] = to_sort['size'] child.data['size'] = new_child_data['size']
child.data['url'] = new_child_data['url']
if node.parent is None:
continue
elif node.parent == tree:
node.data['url'] = scheme + '://' + node.identifier
else:
node.data['url'] = node.parent.data['url'] + '/' + node.identifier
del node.unsorted_children del node.unsorted_children
@ -670,12 +680,21 @@ def recursive_print_node(node, depth=0, use_html=False, output_file=None):
if use_html: if use_html:
css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd' css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd'
if node.data['item_type'] == 'directory': if node.data['item_type'] == 'directory':
line = '<button onclick="collapse(this.nextSibling)">{name} ({size})</button>' directory_url = node.data.get('url')
line += '<div class="{css}" style="display:none">' directory_anchor = '<a href="{url}">►</a>' if directory_url else ''
line = line.format(name=node.data['name'], size=size, css=css_class) directory_anchor = directory_anchor.format(url=directory_url)
line = HTML_FORMAT_DIRECTORY.format(
css=css_class,
directory_anchor=directory_anchor,
name=node.data['name'],
size=size,
)
else: else:
line = '<a href="{url}">{name} ({size})</a><br>' line = HTML_FORMAT_FILE.format(
line = line.format(url=node.data['url'], name=node.data['name'], size=size) name=node.data['name'],
size=size,
url=node.data['url'],
)
else: else:
line = '{space}{bar}{name} : ({size})' line = '{space}{bar}{name} : ({size})'
line = line.format( line = line.format(
@ -697,6 +716,7 @@ def recursive_print_node(node, depth=0, use_html=False, output_file=None):
if node.data['item_type'] == 'directory': if node.data['item_type'] == 'directory':
if use_html: if use_html:
# Close the directory div
write('</div>', output_file) write('</div>', output_file)
else: else:
# This helps put some space between sibling directories # This helps put some space between sibling directories
@ -713,7 +733,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
INSERT or UPDATE the appropriate entry, or DELETE if the head INSERT or UPDATE the appropriate entry, or DELETE if the head
shows a 403 / 404. shows a 403 / 404.
''' '''
if bool(url) is bool(head): if bool(url) is bool(head) and not isinstance(head, requests.Response):
raise ValueError('One and only one of `url` or `head` is necessary.') raise ValueError('One and only one of `url` or `head` is necessary.')
if url is not None: if url is not None:
@ -722,6 +742,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
content_type = None content_type = None
elif head is not None: elif head is not None:
url = head.url
# When doing a full scan, we get a Response object. # When doing a full scan, we get a Response object.
if head.status_code in [403, 404]: if head.status_code in [403, 404]:
cur.execute('DELETE FROM urls WHERE url == ?', [url]) cur.execute('DELETE FROM urls WHERE url == ?', [url])
@ -735,7 +756,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
content_length = int(content_length) content_length = int(content_length)
content_type = head.headers.get('Content-Type', None) content_type = head.headers.get('Content-Type', None)
basename = url_to_filepath(url)['filename'] basename = url_split(url)['filename']
basename = urllib.parse.unquote(basename) basename = urllib.parse.unquote(basename)
do_download = True do_download = True
@ -759,7 +780,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
sql.commit() sql.commit()
return data return data
def url_to_filepath(text): def url_split(text):
text = urllib.parse.unquote(text) text = urllib.parse.unquote(text)
parts = urllib.parse.urlsplit(text) parts = urllib.parse.urlsplit(text)
if any(part == '' for part in [parts.scheme, parts.netloc]): if any(part == '' for part in [parts.scheme, parts.netloc]):
@ -852,7 +873,7 @@ def download(
# on their own. # on their own.
cur.execute('SELECT url FROM urls LIMIT 1') cur.execute('SELECT url FROM urls LIMIT 1')
url = cur.fetchone()[0] url = cur.fetchone()[0]
outputdir = url_to_filepath(url)['root'] outputdir = url_split(url)['root']
if isinstance(bytespersecond, str): if isinstance(bytespersecond, str):
bytespersecond = bytestring.parsebytes(bytespersecond) bytespersecond = bytestring.parsebytes(bytespersecond)
@ -861,7 +882,7 @@ def download(
for fetch in fetch_generator(cur): for fetch in fetch_generator(cur):
url = fetch[SQL_URL] url = fetch[SQL_URL]
url_filepath = url_to_filepath(url) url_filepath = url_split(url)
folder = os.path.join(outputdir, url_filepath['folder']) folder = os.path.join(outputdir, url_filepath['folder'])
os.makedirs(folder, exist_ok=True) os.makedirs(folder, exist_ok=True)
@ -1012,7 +1033,7 @@ def measure(databasename, fullscan=False, new_only=False):
items = cur.fetchall() items = cur.fetchall()
filecount = 0 filecount = len(items)
unmeasured_file_count = 0 unmeasured_file_count = 0
for fetch in items: for fetch in items:
@ -1023,21 +1044,27 @@ def measure(databasename, fullscan=False, new_only=False):
head = do_head(url, raise_for_status=False) head = do_head(url, raise_for_status=False)
fetch = smart_insert(sql, cur, head=head, commit=True) fetch = smart_insert(sql, cur, head=head, commit=True)
size = fetch[SQL_CONTENT_LENGTH] size = fetch[SQL_CONTENT_LENGTH]
if size is None:
write('"%s" is not revealing Content-Length' % url)
size = 0
elif size is None:
elif fetch[SQL_CONTENT_LENGTH] is None: # Unmeasured and no intention to measure.
unmeasured_file_count += 1 unmeasured_file_count += 1
size = 0 size = 0
if size is None:
# Unmeasured even though we tried the head request.
write('"%s" is not revealing Content-Length' % url)
size = 0
totalsize += size totalsize += size
filecount += 1
sql.commit() sql.commit()
short_string = bytestring.bytestring(totalsize) size_string = bytestring.bytestring(totalsize)
totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount) totalsize_string = '{size_short} ({size_exact:,} bytes) in {filecount:,} files'
totalsize_string = totalsize_string.format(
size_short=size_string,
size_exact=totalsize,
filecount=filecount,
)
write(totalsize_string) write(totalsize_string)
if unmeasured_file_count > 0: if unmeasured_file_count > 0:
write(UNMEASURED_WARNING % unmeasured_file_count) write(UNMEASURED_WARNING % unmeasured_file_count)
@ -1078,7 +1105,9 @@ def tree(databasename, output_filename=None):
use_html = False use_html = False
if use_html: if use_html:
write(HTML_TREE_HEADER, output_file) write('<!DOCTYPE html>\n<html>', output_file)
write(HTML_TREE_HEAD, output_file)
write('<body>', output_file)
size_details = recursive_get_size(tree) size_details = recursive_get_size(tree)
recursive_print_node(tree, use_html=use_html, output_file=output_file) recursive_print_node(tree, use_html=use_html, output_file=output_file)
@ -1086,6 +1115,8 @@ def tree(databasename, output_filename=None):
write(UNMEASURED_WARNING % size_details['unmeasured'], output_file) write(UNMEASURED_WARNING % size_details['unmeasured'], output_file)
if output_file is not None: if output_file is not None:
if use_html:
write('</body>\n</html>', output_file)
output_file.close() output_file.close()
return tree return tree

View file

@ -13,6 +13,9 @@ class Path:
def __contains__(self, other): def __contains__(self, other):
return other.absolute_path.startswith(self.absolute_path) return other.absolute_path.startswith(self.absolute_path)
def __eq__(self, other):
return hasattr(other, 'absolute_path') and self.absolute_path == other.absolute_path
def __hash__(self): def __hash__(self):
return hash(self.absolute_path) return hash(self.absolute_path)
@ -75,31 +78,31 @@ def get_path_casing(path):
(drive, subpath) = os.path.splitdrive(path) (drive, subpath) = os.path.splitdrive(path)
subpath = subpath.lstrip(os.sep) subpath = subpath.lstrip(os.sep)
def patternize(piece): pattern = [glob_patternize(piece) for piece in subpath.split(os.sep)]
'''
Create a pattern like "[u]ser" from "user", forcing glob to look up the
correct path name, and guaranteeing that the only result will be the correct path.
Special cases are:
!, because in glob syntax, [!x] tells glob to look for paths that don't contain
"x". [!] is invalid syntax, so we pick the first non-! character to put
in the brackets.
[, because this starts a capture group
'''
piece = glob.escape(piece)
for character in piece:
if character not in '![]':
replacement = '[%s]' % character
#print(piece, character, replacement)
piece = piece.replace(character, replacement, 1)
break
return piece
pattern = [patternize(piece) for piece in subpath.split(os.sep)]
pattern = os.sep.join(pattern) pattern = os.sep.join(pattern)
pattern = drive.upper() + os.sep + pattern pattern = drive.upper() + os.sep + pattern
#print(pattern) #print(pattern)
try: try:
return glob.glob(pattern)[0] return glob.glob(pattern)[0]
except IndexError: except IndexError:
return path return path
def glob_patternize(piece):
'''
Create a pattern like "[u]ser" from "user", forcing glob to look up the
correct path name, while guaranteeing that the only result will be the correct path.
Special cases are:
!, because in glob syntax, [!x] tells glob to look for paths that don't contain
"x". [!] is invalid syntax, so we pick the first non-! character to put
in the brackets.
[, because this starts a capture group
'''
piece = glob.escape(piece)
for character in piece:
if character not in '![]':
replacement = '[%s]' % character
#print(piece, character, replacement)
piece = piece.replace(character, replacement, 1)
break
return piece

View file

@ -52,7 +52,7 @@ def callback_v1(fpobj, written_bytes, total_bytes):
ends = '\n' ends = '\n'
else: else:
ends = '' ends = ''
percent = (100 * written_bytes) / total_bytes percent = (100 * written_bytes) / max(total_bytes, 1)
percent = '%07.3f' % percent percent = '%07.3f' % percent
written = '{:,}'.format(written_bytes) written = '{:,}'.format(written_bytes)
total = '{:,}'.format(total_bytes) total = '{:,}'.format(total_bytes)
@ -196,17 +196,13 @@ def copy_dir(
m += '`destination_new_root` can be passed.' m += '`destination_new_root` can be passed.'
raise ValueError(m) raise ValueError(m)
source = pathclass.get_path_casing(source)
source = str_to_fp(source) source = str_to_fp(source)
if destination_new_root is not None: if destination_new_root is not None:
destination = new_root(source, destination_new_root) destination = new_root(source, destination_new_root)
destination = str_to_fp(destination) destination = str_to_fp(destination)
callback_directory = callback_directory or do_nothing if destination in source:
callback_verbose = callback_verbose or do_nothing
if is_subfolder(source, destination):
raise RecursiveDirectory(source, destination) raise RecursiveDirectory(source, destination)
if not source.is_dir: if not source.is_dir:
@ -220,6 +216,8 @@ def copy_dir(
else: else:
total_bytes = 0 total_bytes = 0
callback_directory = callback_directory or do_nothing
callback_verbose = callback_verbose or do_nothing
bytes_per_second = limiter_or_none(bytes_per_second) bytes_per_second = limiter_or_none(bytes_per_second)
files_per_second = limiter_or_none(files_per_second) files_per_second = limiter_or_none(files_per_second)
@ -350,7 +348,6 @@ def copy_file(
m += '`destination_new_root` can be passed' m += '`destination_new_root` can be passed'
raise ValueError(m) raise ValueError(m)
source = pathclass.get_path_casing(source)
source = str_to_fp(source) source = str_to_fp(source)
if destination_new_root is not None: if destination_new_root is not None:
@ -370,13 +367,11 @@ def copy_file(
# Determine overwrite # Determine overwrite
if destination.exists: if destination.exists:
destination_modtime = destination.stat.st_mtime
if overwrite_old is False: if overwrite_old is False:
return [destination, 0] return [destination, 0]
source_modtime = source.stat.st_mtime source_modtime = source.stat.st_mtime
if source_modtime == destination_modtime: if source_modtime == destination.stat.st_mtime:
return [destination, 0] return [destination, 0]
# Copy # Copy
@ -460,6 +455,8 @@ def is_xor(*args):
return [bool(a) for a in args].count(True) == 1 return [bool(a) for a in args].count(True) == 1
def limiter_or_none(value): def limiter_or_none(value):
if isinstance(value, str):
value = bytestring.parsebytes(value)
if isinstance(value, ratelimiter.Ratelimiter): if isinstance(value, ratelimiter.Ratelimiter):
limiter = value limiter = value
elif value is not None: elif value is not None:
@ -506,7 +503,7 @@ def walk_generator(
exclude_filenames=None, exclude_filenames=None,
): ):
''' '''
Yield Path objects from the file tree similar to os.walk. Yield Path objects for files in the file tree, similar to os.walk.
callback_exclusion: callback_exclusion:
This function will be called when a file or directory is excluded with This function will be called when a file or directory is excluded with
@ -563,31 +560,28 @@ def walk_generator(
# This is a recursion-free workplace. # This is a recursion-free workplace.
# Thank you for your cooperation. # Thank you for your cooperation.
while len(directory_queue) > 0: while len(directory_queue) > 0:
location = directory_queue.popleft() current_location = directory_queue.popleft()
callback_verbose('listdir: %s' % location) callback_verbose('listdir: %s' % current_location)
contents = os.listdir(location) contents = os.listdir(current_location)
callback_verbose('received %d items' % len(contents)) callback_verbose('received %d items' % len(contents))
directories = [] directories = []
for base_name in contents: for base_name in contents:
absolute_name = os.path.join(location, base_name) absolute_name = os.path.join(current_location, base_name)
if os.path.isdir(absolute_name): if os.path.isdir(absolute_name):
if normalize(absolute_name) in exclude_directories: exclude = normalize(absolute_name) in exclude_directories
callback_exclusion(absolute_name, 'directory') exclude |= normalize(base_name) in exclude_directories
continue if exclude:
if normalize(base_name) in exclude_directories:
callback_exclusion(absolute_name, 'directory') callback_exclusion(absolute_name, 'directory')
continue continue
directories.append(absolute_name) directories.append(absolute_name)
else: else:
if normalize(base_name) in exclude_filenames: exclude = normalize(absolute_name) in exclude_filenames
callback_exclusion(absolute_name, 'file') exclude |= normalize(base_name) in exclude_filenames
continue if exclude:
if normalize(absolute_name) in exclude_filenames:
callback_exclusion(absolute_name, 'file') callback_exclusion(absolute_name, 'file')
continue continue