This commit is contained in:
Ethan Dalool 2016-08-12 17:26:12 -07:00
parent 53645b0123
commit c491e417f5
5 changed files with 240 additions and 93 deletions

114
Javascript/videotiles.js Normal file
View file

@ -0,0 +1,114 @@
javascript:
CSS = ""
+ "* {margin: 0; padding: 0}"
+ "html {height: 95%}"
+ "body {height: 100%; background-color: #000}"
+ "div { display:inline-block; }"
+ "video {"
+ " min-width: 100%;"
+ " min-height: 100%;"
+ " max-width: 100%;"
+ " max-height: 100%;"
+ " overflow: hidden;"
+ "}"
;
var VIDEO_TYPES = ["\\.mp4", "\\.m4v", "\\.webm", "\\.ogv"].join("|");
function apply_css()
{
console.log("applying CSS");
var css = document.createElement("style");
css.innerHTML = CSS;
document.head.appendChild(css);
}
function get_media_links()
{
var anchors = document.getElementsByTagName("a");
var media_links = [];
for (var index = 0; index < anchors.length; index += 1)
{
var anchor = anchors[index];
if (anchor.href.match(VIDEO_TYPES))
{
media_links.push(anchor.href);
}
}
return media_links;
}
function clear_page()
{
/* Remove EVERYTHING */
console.log("clearing page");
document.removeChild(document.documentElement);
var html = document.createElement("html");
document.appendChild(html);
var head = document.createElement("head");
html.appendChild(head);
var body = document.createElement("body");
html.appendChild(body);
document.documentElement = html;
return true;
}
function create_video_players(width, height)
{
var css_width = (100 / width).toString() + "%";
var css_height = (100 / height).toString() + "%";
console.log(css_width);
var players = [];
for (var index = 0; index < width * height; index += 1)
{
var player_holder = document.createElement("div");
var player = document.createElement("video");
player_holder.style.width = css_width;
player_holder.style.height = css_height;
player.holder = player_holder;
players.push(player);
player_holder.appendChild(player);
document.body.appendChild(player_holder);
}
return players;
}
function swap_source(player, source_list)
{
var index = Math.floor(Math.random() * source_list.length);
var href = source_list[index];
player.pause();
player.src = href;
player.load();
player.play();
}
function main()
{
var WIDTH = 3;
var HEIGHT = 3;
var MEDIAS = get_media_links();
clear_page();
apply_css();
var PLAYERS = create_video_players(WIDTH, HEIGHT);
function ended_callback()
{
swap_source(this, MEDIAS);
}
for (var index = 0; index < PLAYERS.length; index += 1)
{
var player = PLAYERS[index];
player.addEventListener("ended", ended_callback);
swap_source(player, MEDIAS);
}
}
main();

View file

@ -7,6 +7,11 @@ Requires `pip install beautifulsoup4`
See inside opendirdl.py for usage instructions. See inside opendirdl.py for usage instructions.
- 2016 08 10
- Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
- Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
- Added clickable links to each directory on HTML tree pages.
- 2016 08 02 - 2016 08 02
- Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`. - Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
- Rewrote `build_file_tree` with a way simpler algorithm. - Rewrote `build_file_tree` with a way simpler algorithm.

View file

@ -140,10 +140,10 @@ DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
UNKNOWN_SIZE_STRING = '???' UNKNOWN_SIZE_STRING = '???'
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings, # When doing a basic scan, we will not send HEAD requests to URLs that end in
# because they're probably files. # these strings, because they're probably files.
# This isn't meant to be a comprehensive filetype library, but it covers enough of the # This isn't meant to be a comprehensive filetype library, but it covers
# typical opendir to speed things up. # enough of the typical opendir to speed things up.
SKIPPABLE_FILETYPES = [ SKIPPABLE_FILETYPES = [
'.aac', '.aac',
'.avi', '.avi',
@ -192,7 +192,8 @@ BLACKLISTED_FILENAMES = [
] ]
# oh shit # oh shit
HTML_TREE_HEADER = ''' HTML_TREE_HEAD = '''
<head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<script type="text/javascript"> <script type="text/javascript">
@ -215,12 +216,7 @@ function collapse(div)
font-family: Consolas; font-family: Consolas;
} }
button .directory_even, .directory_odd
{
display: block;
}
div
{ {
padding: 10px; padding: 10px;
padding-left: 15px; padding-left: 15px;
@ -239,8 +235,18 @@ div
background-color: #eee; background-color: #eee;
} }
</style> </style>
</head>
''' '''
HTML_FORMAT_DIRECTORY = '''
<div class="buttonbox">
<button onclick="collapse(this.parentElement.nextElementSibling)">{name} ({size})</button>
{directory_anchor}
</div>
<div class="{css}" style="display:none">
'''.replace('\n', '')
HTML_FORMAT_FILE = '<a href="{url}">{name} ({size})</a><br>'
DB_INIT = ''' DB_INIT = '''
CREATE TABLE IF NOT EXISTS urls( CREATE TABLE IF NOT EXISTS urls(
url TEXT, url TEXT,
@ -259,7 +265,6 @@ SQL_CONTENT_LENGTH = 2
SQL_CONTENT_TYPE = 3 SQL_CONTENT_TYPE = 3
SQL_DO_DOWNLOAD = 4 SQL_DO_DOWNLOAD = 4
UNMEASURED_WARNING = ''' UNMEASURED_WARNING = '''
Note: %d files do not have a stored Content-Length. Note: %d files do not have a stored Content-Length.
Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request Run `measure` with `-f`|`--fullscan` or `-n`|`--new_only` to HEAD request
@ -278,7 +283,7 @@ class Walker:
self.walkurl = walkurl self.walkurl = walkurl
if databasename in (None, ''): if databasename in (None, ''):
domain = url_to_filepath(self.walkurl)['root'] domain = url_split(self.walkurl)['root']
databasename = domain + '.db' databasename = domain + '.db'
databasename = databasename.replace(':', '#') databasename = databasename.replace(':', '#')
self.databasename = databasename self.databasename = databasename
@ -439,7 +444,7 @@ class TreeNode:
self.children = {} self.children = {}
def __eq__(self, other): def __eq__(self, other):
return isinstance(other, Treenode) and self.abspath() == other.abspath() return isinstance(other, TreeNode) and self.abspath() == other.abspath()
def __getitem__(self, key): def __getitem__(self, key):
return self.children[key] return self.children[key]
@ -470,8 +475,6 @@ class TreeNode:
return other_node return other_node
def check_child_availability(self, identifier): def check_child_availability(self, identifier):
if ':' in identifier:
raise TreeInvalidIdentifier('Only roots may have a colon')
if identifier in self.children: if identifier in self.children:
raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier)) raise TreeExistingChild('Node %s already has child %s' % (self.identifier, identifier))
@ -526,7 +529,7 @@ def build_file_tree(databasename):
{ {
'url': item[SQL_URL], 'url': item[SQL_URL],
'size': item[SQL_CONTENT_LENGTH], 'size': item[SQL_CONTENT_LENGTH],
'path': path_form.format(**url_to_filepath(item[SQL_URL])).split('\\'), 'path_parts': path_form.format(**url_split(item[SQL_URL])).split('\\'),
} }
for item in all_items for item in all_items
] ]
@ -536,6 +539,7 @@ def build_file_tree(databasename):
'item_type': 'directory', 'item_type': 'directory',
'name': databasename, 'name': databasename,
} }
scheme = url_split(all_items[0]['url'])['scheme']
tree = TreeNode(databasename, data=root_data) tree = TreeNode(databasename, data=root_data)
tree.unsorted_children = all_items tree.unsorted_children = all_items
node_queue = set() node_queue = set()
@ -546,12 +550,12 @@ def build_file_tree(databasename):
# directories. Those nodes receive all subdirectories, and repeat. # directories. Those nodes receive all subdirectories, and repeat.
while len(node_queue) > 0: while len(node_queue) > 0:
node = node_queue.pop() node = node_queue.pop()
for to_sort in node.unsorted_children: for new_child_data in node.unsorted_children:
path = to_sort['path'] path_parts = new_child_data['path_parts']
# Create a new node for the directory, path[0] # Create a new node for the directory, path_parts[0]
# path[1:] is assigned to that node to be divided next. # path_parts[1:] is assigned to that node to be divided next.
child_identifier = path.pop(0) child_identifier = path_parts.pop(0)
child_identifier = child_identifier.replace(':', '#') #child_identifier = child_identifier.replace(':', '#')
child = node.children.get(child_identifier, None) child = node.children.get(child_identifier, None)
if not child: if not child:
@ -559,15 +563,21 @@ def build_file_tree(databasename):
child.unsorted_children = [] child.unsorted_children = []
node.add_child(child) node.add_child(child)
child.data['url'] = to_sort['url']
child.data['name'] = child_identifier child.data['name'] = child_identifier
if len(path) > 0: if len(path_parts) > 0:
child.data['item_type'] = 'directory' child.data['item_type'] = 'directory'
child.unsorted_children.append(to_sort) child.unsorted_children.append(new_child_data)
node_queue.add(child) node_queue.add(child)
else: else:
child.data['item_type'] = 'file' child.data['item_type'] = 'file'
child.data['size'] = to_sort['size'] child.data['size'] = new_child_data['size']
child.data['url'] = new_child_data['url']
if node.parent is None:
continue
elif node.parent == tree:
node.data['url'] = scheme + '://' + node.identifier
else:
node.data['url'] = node.parent.data['url'] + '/' + node.identifier
del node.unsorted_children del node.unsorted_children
@ -670,12 +680,21 @@ def recursive_print_node(node, depth=0, use_html=False, output_file=None):
if use_html: if use_html:
css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd' css_class = 'directory_even' if depth % 2 == 0 else 'directory_odd'
if node.data['item_type'] == 'directory': if node.data['item_type'] == 'directory':
line = '<button onclick="collapse(this.nextSibling)">{name} ({size})</button>' directory_url = node.data.get('url')
line += '<div class="{css}" style="display:none">' directory_anchor = '<a href="{url}">►</a>' if directory_url else ''
line = line.format(name=node.data['name'], size=size, css=css_class) directory_anchor = directory_anchor.format(url=directory_url)
line = HTML_FORMAT_DIRECTORY.format(
css=css_class,
directory_anchor=directory_anchor,
name=node.data['name'],
size=size,
)
else: else:
line = '<a href="{url}">{name} ({size})</a><br>' line = HTML_FORMAT_FILE.format(
line = line.format(url=node.data['url'], name=node.data['name'], size=size) name=node.data['name'],
size=size,
url=node.data['url'],
)
else: else:
line = '{space}{bar}{name} : ({size})' line = '{space}{bar}{name} : ({size})'
line = line.format( line = line.format(
@ -697,6 +716,7 @@ def recursive_print_node(node, depth=0, use_html=False, output_file=None):
if node.data['item_type'] == 'directory': if node.data['item_type'] == 'directory':
if use_html: if use_html:
# Close the directory div
write('</div>', output_file) write('</div>', output_file)
else: else:
# This helps put some space between sibling directories # This helps put some space between sibling directories
@ -713,7 +733,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
INSERT or UPDATE the appropriate entry, or DELETE if the head INSERT or UPDATE the appropriate entry, or DELETE if the head
shows a 403 / 404. shows a 403 / 404.
''' '''
if bool(url) is bool(head): if bool(url) is bool(head) and not isinstance(head, requests.Response):
raise ValueError('One and only one of `url` or `head` is necessary.') raise ValueError('One and only one of `url` or `head` is necessary.')
if url is not None: if url is not None:
@ -722,6 +742,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
content_type = None content_type = None
elif head is not None: elif head is not None:
url = head.url
# When doing a full scan, we get a Response object. # When doing a full scan, we get a Response object.
if head.status_code in [403, 404]: if head.status_code in [403, 404]:
cur.execute('DELETE FROM urls WHERE url == ?', [url]) cur.execute('DELETE FROM urls WHERE url == ?', [url])
@ -735,7 +756,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
content_length = int(content_length) content_length = int(content_length)
content_type = head.headers.get('Content-Type', None) content_type = head.headers.get('Content-Type', None)
basename = url_to_filepath(url)['filename'] basename = url_split(url)['filename']
basename = urllib.parse.unquote(basename) basename = urllib.parse.unquote(basename)
do_download = True do_download = True
@ -759,7 +780,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
sql.commit() sql.commit()
return data return data
def url_to_filepath(text): def url_split(text):
text = urllib.parse.unquote(text) text = urllib.parse.unquote(text)
parts = urllib.parse.urlsplit(text) parts = urllib.parse.urlsplit(text)
if any(part == '' for part in [parts.scheme, parts.netloc]): if any(part == '' for part in [parts.scheme, parts.netloc]):
@ -852,7 +873,7 @@ def download(
# on their own. # on their own.
cur.execute('SELECT url FROM urls LIMIT 1') cur.execute('SELECT url FROM urls LIMIT 1')
url = cur.fetchone()[0] url = cur.fetchone()[0]
outputdir = url_to_filepath(url)['root'] outputdir = url_split(url)['root']
if isinstance(bytespersecond, str): if isinstance(bytespersecond, str):
bytespersecond = bytestring.parsebytes(bytespersecond) bytespersecond = bytestring.parsebytes(bytespersecond)
@ -861,7 +882,7 @@ def download(
for fetch in fetch_generator(cur): for fetch in fetch_generator(cur):
url = fetch[SQL_URL] url = fetch[SQL_URL]
url_filepath = url_to_filepath(url) url_filepath = url_split(url)
folder = os.path.join(outputdir, url_filepath['folder']) folder = os.path.join(outputdir, url_filepath['folder'])
os.makedirs(folder, exist_ok=True) os.makedirs(folder, exist_ok=True)
@ -1012,7 +1033,7 @@ def measure(databasename, fullscan=False, new_only=False):
items = cur.fetchall() items = cur.fetchall()
filecount = 0 filecount = len(items)
unmeasured_file_count = 0 unmeasured_file_count = 0
for fetch in items: for fetch in items:
@ -1023,21 +1044,27 @@ def measure(databasename, fullscan=False, new_only=False):
head = do_head(url, raise_for_status=False) head = do_head(url, raise_for_status=False)
fetch = smart_insert(sql, cur, head=head, commit=True) fetch = smart_insert(sql, cur, head=head, commit=True)
size = fetch[SQL_CONTENT_LENGTH] size = fetch[SQL_CONTENT_LENGTH]
if size is None:
write('"%s" is not revealing Content-Length' % url)
size = 0
elif size is None:
elif fetch[SQL_CONTENT_LENGTH] is None: # Unmeasured and no intention to measure.
unmeasured_file_count += 1 unmeasured_file_count += 1
size = 0 size = 0
if size is None:
# Unmeasured even though we tried the head request.
write('"%s" is not revealing Content-Length' % url)
size = 0
totalsize += size totalsize += size
filecount += 1
sql.commit() sql.commit()
short_string = bytestring.bytestring(totalsize) size_string = bytestring.bytestring(totalsize)
totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount) totalsize_string = '{size_short} ({size_exact:,} bytes) in {filecount:,} files'
totalsize_string = totalsize_string.format(
size_short=size_string,
size_exact=totalsize,
filecount=filecount,
)
write(totalsize_string) write(totalsize_string)
if unmeasured_file_count > 0: if unmeasured_file_count > 0:
write(UNMEASURED_WARNING % unmeasured_file_count) write(UNMEASURED_WARNING % unmeasured_file_count)
@ -1078,7 +1105,9 @@ def tree(databasename, output_filename=None):
use_html = False use_html = False
if use_html: if use_html:
write(HTML_TREE_HEADER, output_file) write('<!DOCTYPE html>\n<html>', output_file)
write(HTML_TREE_HEAD, output_file)
write('<body>', output_file)
size_details = recursive_get_size(tree) size_details = recursive_get_size(tree)
recursive_print_node(tree, use_html=use_html, output_file=output_file) recursive_print_node(tree, use_html=use_html, output_file=output_file)
@ -1086,6 +1115,8 @@ def tree(databasename, output_filename=None):
write(UNMEASURED_WARNING % size_details['unmeasured'], output_file) write(UNMEASURED_WARNING % size_details['unmeasured'], output_file)
if output_file is not None: if output_file is not None:
if use_html:
write('</body>\n</html>', output_file)
output_file.close() output_file.close()
return tree return tree

View file

@ -13,6 +13,9 @@ class Path:
def __contains__(self, other): def __contains__(self, other):
return other.absolute_path.startswith(self.absolute_path) return other.absolute_path.startswith(self.absolute_path)
def __eq__(self, other):
return hasattr(other, 'absolute_path') and self.absolute_path == other.absolute_path
def __hash__(self): def __hash__(self):
return hash(self.absolute_path) return hash(self.absolute_path)
@ -75,10 +78,19 @@ def get_path_casing(path):
(drive, subpath) = os.path.splitdrive(path) (drive, subpath) = os.path.splitdrive(path)
subpath = subpath.lstrip(os.sep) subpath = subpath.lstrip(os.sep)
def patternize(piece): pattern = [glob_patternize(piece) for piece in subpath.split(os.sep)]
pattern = os.sep.join(pattern)
pattern = drive.upper() + os.sep + pattern
#print(pattern)
try:
return glob.glob(pattern)[0]
except IndexError:
return path
def glob_patternize(piece):
''' '''
Create a pattern like "[u]ser" from "user", forcing glob to look up the Create a pattern like "[u]ser" from "user", forcing glob to look up the
correct path name, and guaranteeing that the only result will be the correct path. correct path name, while guaranteeing that the only result will be the correct path.
Special cases are: Special cases are:
!, because in glob syntax, [!x] tells glob to look for paths that don't contain !, because in glob syntax, [!x] tells glob to look for paths that don't contain
@ -94,12 +106,3 @@ def get_path_casing(path):
piece = piece.replace(character, replacement, 1) piece = piece.replace(character, replacement, 1)
break break
return piece return piece
pattern = [patternize(piece) for piece in subpath.split(os.sep)]
pattern = os.sep.join(pattern)
pattern = drive.upper() + os.sep + pattern
#print(pattern)
try:
return glob.glob(pattern)[0]
except IndexError:
return path

View file

@ -52,7 +52,7 @@ def callback_v1(fpobj, written_bytes, total_bytes):
ends = '\n' ends = '\n'
else: else:
ends = '' ends = ''
percent = (100 * written_bytes) / total_bytes percent = (100 * written_bytes) / max(total_bytes, 1)
percent = '%07.3f' % percent percent = '%07.3f' % percent
written = '{:,}'.format(written_bytes) written = '{:,}'.format(written_bytes)
total = '{:,}'.format(total_bytes) total = '{:,}'.format(total_bytes)
@ -196,17 +196,13 @@ def copy_dir(
m += '`destination_new_root` can be passed.' m += '`destination_new_root` can be passed.'
raise ValueError(m) raise ValueError(m)
source = pathclass.get_path_casing(source)
source = str_to_fp(source) source = str_to_fp(source)
if destination_new_root is not None: if destination_new_root is not None:
destination = new_root(source, destination_new_root) destination = new_root(source, destination_new_root)
destination = str_to_fp(destination) destination = str_to_fp(destination)
callback_directory = callback_directory or do_nothing if destination in source:
callback_verbose = callback_verbose or do_nothing
if is_subfolder(source, destination):
raise RecursiveDirectory(source, destination) raise RecursiveDirectory(source, destination)
if not source.is_dir: if not source.is_dir:
@ -220,6 +216,8 @@ def copy_dir(
else: else:
total_bytes = 0 total_bytes = 0
callback_directory = callback_directory or do_nothing
callback_verbose = callback_verbose or do_nothing
bytes_per_second = limiter_or_none(bytes_per_second) bytes_per_second = limiter_or_none(bytes_per_second)
files_per_second = limiter_or_none(files_per_second) files_per_second = limiter_or_none(files_per_second)
@ -350,7 +348,6 @@ def copy_file(
m += '`destination_new_root` can be passed' m += '`destination_new_root` can be passed'
raise ValueError(m) raise ValueError(m)
source = pathclass.get_path_casing(source)
source = str_to_fp(source) source = str_to_fp(source)
if destination_new_root is not None: if destination_new_root is not None:
@ -370,13 +367,11 @@ def copy_file(
# Determine overwrite # Determine overwrite
if destination.exists: if destination.exists:
destination_modtime = destination.stat.st_mtime
if overwrite_old is False: if overwrite_old is False:
return [destination, 0] return [destination, 0]
source_modtime = source.stat.st_mtime source_modtime = source.stat.st_mtime
if source_modtime == destination_modtime: if source_modtime == destination.stat.st_mtime:
return [destination, 0] return [destination, 0]
# Copy # Copy
@ -460,6 +455,8 @@ def is_xor(*args):
return [bool(a) for a in args].count(True) == 1 return [bool(a) for a in args].count(True) == 1
def limiter_or_none(value): def limiter_or_none(value):
if isinstance(value, str):
value = bytestring.parsebytes(value)
if isinstance(value, ratelimiter.Ratelimiter): if isinstance(value, ratelimiter.Ratelimiter):
limiter = value limiter = value
elif value is not None: elif value is not None:
@ -506,7 +503,7 @@ def walk_generator(
exclude_filenames=None, exclude_filenames=None,
): ):
''' '''
Yield Path objects from the file tree similar to os.walk. Yield Path objects for files in the file tree, similar to os.walk.
callback_exclusion: callback_exclusion:
This function will be called when a file or directory is excluded with This function will be called when a file or directory is excluded with
@ -563,31 +560,28 @@ def walk_generator(
# This is a recursion-free workplace. # This is a recursion-free workplace.
# Thank you for your cooperation. # Thank you for your cooperation.
while len(directory_queue) > 0: while len(directory_queue) > 0:
location = directory_queue.popleft() current_location = directory_queue.popleft()
callback_verbose('listdir: %s' % location) callback_verbose('listdir: %s' % current_location)
contents = os.listdir(location) contents = os.listdir(current_location)
callback_verbose('received %d items' % len(contents)) callback_verbose('received %d items' % len(contents))
directories = [] directories = []
for base_name in contents: for base_name in contents:
absolute_name = os.path.join(location, base_name) absolute_name = os.path.join(current_location, base_name)
if os.path.isdir(absolute_name): if os.path.isdir(absolute_name):
if normalize(absolute_name) in exclude_directories: exclude = normalize(absolute_name) in exclude_directories
callback_exclusion(absolute_name, 'directory') exclude |= normalize(base_name) in exclude_directories
continue if exclude:
if normalize(base_name) in exclude_directories:
callback_exclusion(absolute_name, 'directory') callback_exclusion(absolute_name, 'directory')
continue continue
directories.append(absolute_name) directories.append(absolute_name)
else: else:
if normalize(base_name) in exclude_filenames: exclude = normalize(absolute_name) in exclude_filenames
callback_exclusion(absolute_name, 'file') exclude |= normalize(base_name) in exclude_filenames
continue if exclude:
if normalize(absolute_name) in exclude_filenames:
callback_exclusion(absolute_name, 'file') callback_exclusion(absolute_name, 'file')
continue continue