else
This commit is contained in:
parent
84b0578ef3
commit
25fd827eb1
13 changed files with 1486 additions and 242 deletions
|
@ -1,3 +1,5 @@
|
||||||
|
import re
|
||||||
|
|
||||||
BYTE = 1
|
BYTE = 1
|
||||||
KIBIBYTE = 1024 * BYTE
|
KIBIBYTE = 1024 * BYTE
|
||||||
MIBIBYTE = 1024 * KIBIBYTE
|
MIBIBYTE = 1024 * KIBIBYTE
|
||||||
|
@ -19,25 +21,38 @@ UNIT_STRINGS = {
|
||||||
ZEBIBYTE: 'ZiB',
|
ZEBIBYTE: 'ZiB',
|
||||||
YOBIBYTE: 'YiB',
|
YOBIBYTE: 'YiB',
|
||||||
}
|
}
|
||||||
|
UNITS_SORTED = sorted(UNIT_STRINGS.keys(), reverse=True)
|
||||||
|
|
||||||
def bytestring(bytes):
|
def bytestring(size, force_unit=None):
|
||||||
possible_units = sorted(UNIT_STRINGS.keys(), reverse=True)
|
'''
|
||||||
|
Convert a number into a binary-standard string.
|
||||||
|
|
||||||
|
force_unit:
|
||||||
|
If None, an appropriate size unit is chosen automatically.
|
||||||
|
Otherwise, you can provide one of the size constants to force that divisor.
|
||||||
|
'''
|
||||||
|
|
||||||
# choose which magnitutde to use as the divisor
|
# choose which magnitutde to use as the divisor
|
||||||
if bytes < 1:
|
if force_unit is None:
|
||||||
appropriate_unit = 1
|
divisor = get_appropriate_divisor(size)
|
||||||
else:
|
else:
|
||||||
for unit in possible_units:
|
divisor = force_unit
|
||||||
if bytes >= unit:
|
|
||||||
appropriate_unit = unit
|
|
||||||
break
|
|
||||||
|
|
||||||
size_unit_string = UNIT_STRINGS[appropriate_unit]
|
size_unit_string = UNIT_STRINGS[divisor]
|
||||||
size_string = '%.3f %s' % ((bytes / appropriate_unit), size_unit_string)
|
size_string = '%.3f %s' % ((size / divisor), size_unit_string)
|
||||||
return size_string
|
return size_string
|
||||||
|
|
||||||
|
def get_appropriate_divisor(size):
|
||||||
|
size = abs(size)
|
||||||
|
for unit in UNITS_SORTED:
|
||||||
|
if size >= unit:
|
||||||
|
appropriate_unit = unit
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
appropriate_unit = 1
|
||||||
|
return appropriate_unit
|
||||||
|
|
||||||
def parsebytes(string):
|
def parsebytes(string):
|
||||||
import re
|
|
||||||
string = string.lower().replace(' ', '')
|
string = string.lower().replace(' ', '')
|
||||||
|
|
||||||
matches = re.findall('((\\.|\\d)+)', string)
|
matches = re.findall('((\\.|\\d)+)', string)
|
||||||
|
|
311
Downloady/downloady.py
Normal file
311
Downloady/downloady.py
Normal file
|
@ -0,0 +1,311 @@
|
||||||
|
import sys
|
||||||
|
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
|
||||||
|
sys.path.append('C:\\git\\else\\bytestring'); import bytestring
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import pyperclip # pip install pyperclip
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import urllib
|
||||||
|
import warnings
|
||||||
|
warnings.simplefilter('ignore')
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
|
||||||
|
}
|
||||||
|
SLEEPINESS = 3
|
||||||
|
|
||||||
|
FILENAME_BADCHARS = '*?"<>|'
|
||||||
|
|
||||||
|
last_request = 0
|
||||||
|
CHUNKSIZE = 16 * bytestring.KIBIBYTE
|
||||||
|
STOP = False
|
||||||
|
TIMEOUT = 600
|
||||||
|
|
||||||
|
def download_file(
|
||||||
|
url,
|
||||||
|
localname=None,
|
||||||
|
auth=None,
|
||||||
|
bytespersecond=None,
|
||||||
|
callback_progress=None,
|
||||||
|
headers=None,
|
||||||
|
overwrite=None
|
||||||
|
):
|
||||||
|
if headers is None:
|
||||||
|
headers = {}
|
||||||
|
''' Determine local filename '''
|
||||||
|
url = url.replace('%3A//', '://')
|
||||||
|
if localname in [None, '']:
|
||||||
|
localname = localize(url)
|
||||||
|
|
||||||
|
localname = filepath_sanitize(localname)
|
||||||
|
|
||||||
|
directory = os.path.split(localname)[0]
|
||||||
|
if directory != '':
|
||||||
|
os.makedirs(directory, exist_ok=True)
|
||||||
|
|
||||||
|
if bytespersecond is None:
|
||||||
|
limiter = None
|
||||||
|
else:
|
||||||
|
limiter = ratelimiter.Ratelimiter(bytespersecond, period=1)
|
||||||
|
|
||||||
|
''' Prepare condition variables '''
|
||||||
|
local_exists = os.path.exists(localname)
|
||||||
|
if local_exists and overwrite is False:
|
||||||
|
print('Overwrite off. Nothing to do.')
|
||||||
|
return
|
||||||
|
|
||||||
|
user_provided_range = 'range' in headers
|
||||||
|
if user_provided_range:
|
||||||
|
user_range_min = int(headers['range'].split('bytes=')[1].split('-')[0])
|
||||||
|
user_range_max = headers['range'].split('-')[1]
|
||||||
|
if user_range_max != '':
|
||||||
|
user_range_max = int(user_range_max)
|
||||||
|
else:
|
||||||
|
# Included to determine whether the server supports this
|
||||||
|
headers['range'] = 'bytes=0-'
|
||||||
|
|
||||||
|
# I'm using a GET instead of an actual HEAD here because some servers respond
|
||||||
|
# differently, even though they're not supposed to.
|
||||||
|
head = request('get', url, stream=True, headers=headers, auth=auth)
|
||||||
|
remote_total_bytes = int(head.headers.get('content-length', 1))
|
||||||
|
server_respects_range = (head.status_code == 206 and 'content-range' in head.headers)
|
||||||
|
seek_to = 0
|
||||||
|
header_range_min = None
|
||||||
|
header_range_max = None
|
||||||
|
head.connection.close()
|
||||||
|
|
||||||
|
if not user_provided_range:
|
||||||
|
del headers['range']
|
||||||
|
|
||||||
|
touch(localname)
|
||||||
|
file_handle = open(localname, 'r+b')
|
||||||
|
file_handle.seek(0)
|
||||||
|
|
||||||
|
''' THINGS THAT CAN HAPPEN '''
|
||||||
|
if local_exists:
|
||||||
|
local_existing_bytes = os.path.getsize(localname)
|
||||||
|
if overwrite is True:
|
||||||
|
file_handle.truncate()
|
||||||
|
if user_provided_range:
|
||||||
|
header_range_min = user_range_min
|
||||||
|
header_range_max = user_range_max
|
||||||
|
seek_to = user_range_min
|
||||||
|
|
||||||
|
elif not user_provided_range:
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif overwrite is None:
|
||||||
|
if local_existing_bytes == remote_total_bytes:
|
||||||
|
print('File is 100%. Nothing to do.')
|
||||||
|
return
|
||||||
|
|
||||||
|
if user_provided_range:
|
||||||
|
if server_respects_range:
|
||||||
|
seek_to = user_range_min
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise Exception('The server did not respect your range header')
|
||||||
|
|
||||||
|
elif not user_provided_range:
|
||||||
|
if server_respects_range:
|
||||||
|
print('Resuming from %d' % local_existing_bytes)
|
||||||
|
header_range_min = local_existing_bytes
|
||||||
|
header_range_max = ''
|
||||||
|
seek_to = local_existing_bytes
|
||||||
|
|
||||||
|
else:
|
||||||
|
print('File exists, but server doesn\'t allow resumes. Restart from 0?')
|
||||||
|
permission = get_permission()
|
||||||
|
if permission:
|
||||||
|
file_handle.truncate()
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise Exception('Couldn\'t resume')
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError('Invalid value for `overwrite`. Must be True, False, or None')
|
||||||
|
|
||||||
|
elif not local_exists:
|
||||||
|
if user_provided_range:
|
||||||
|
if server_respects_range:
|
||||||
|
file_handle.seek(user_range_min)
|
||||||
|
file_handle.write(b'\0')
|
||||||
|
|
||||||
|
header_range_min = user_range_min
|
||||||
|
header_range_max = user_range_max
|
||||||
|
seek_to = user_range_min
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise Exception('The server did not respect your range header')
|
||||||
|
|
||||||
|
elif not user_provided_range:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if header_range_min is not None:
|
||||||
|
headers['range'] = 'bytes={0}-{1}'.format(header_range_min, header_range_max)
|
||||||
|
|
||||||
|
bytes_downloaded = seek_to
|
||||||
|
file_handle.seek(seek_to)
|
||||||
|
download_stream = request('get', url, stream=True, headers=headers, auth=auth)
|
||||||
|
|
||||||
|
''' Begin download '''
|
||||||
|
for chunk in download_stream.iter_content(chunk_size=CHUNKSIZE):
|
||||||
|
bytes_downloaded += len(chunk)
|
||||||
|
file_handle.write(chunk)
|
||||||
|
if callback_progress is not None:
|
||||||
|
callback_progress(bytes_downloaded, remote_total_bytes)
|
||||||
|
|
||||||
|
if limiter is not None and bytes_downloaded < remote_total_bytes:
|
||||||
|
limiter.limit(len(chunk))
|
||||||
|
|
||||||
|
file_handle.close()
|
||||||
|
return localname
|
||||||
|
|
||||||
|
def filepath_sanitize(text, exclusions=''):
|
||||||
|
bet = FILENAME_BADCHARS.replace(exclusions, '')
|
||||||
|
for char in bet:
|
||||||
|
text = text.replace(char, '')
|
||||||
|
return text
|
||||||
|
|
||||||
|
def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
|
||||||
|
permission = input(prompt)
|
||||||
|
return permission.lower() in affirmative
|
||||||
|
|
||||||
|
def is_clipboard(s):
|
||||||
|
return s.lower() in ['!c', '!clip', '!clipboard']
|
||||||
|
|
||||||
|
def localize(url):
|
||||||
|
'''
|
||||||
|
Determine the local filename appropriate for a URL.
|
||||||
|
'''
|
||||||
|
localname = urllib.parse.unquote(url)
|
||||||
|
localname = localname.split('?')[0]
|
||||||
|
localname = localname.split('/')[-1]
|
||||||
|
return localname
|
||||||
|
|
||||||
|
def progress(bytes_downloaded, bytes_total, prefix=''):
|
||||||
|
divisor = bytestring.get_appropriate_divisor(bytes_total)
|
||||||
|
bytes_total_string = bytestring.bytestring(bytes_total, force_unit=divisor)
|
||||||
|
bytes_downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=divisor)
|
||||||
|
bytes_downloaded_string = bytes_downloaded_string.rjust(len(bytes_total_string), ' ')
|
||||||
|
|
||||||
|
blocks = 50
|
||||||
|
char = '█'
|
||||||
|
percent = bytes_downloaded * 100 / bytes_total
|
||||||
|
percent = int(min(100, percent))
|
||||||
|
completed_blocks = char * int(blocks * percent / 100)
|
||||||
|
incompleted_blocks = ' ' * (blocks - len(completed_blocks))
|
||||||
|
statusbar = '{char}{complete}{incomplete}{char}'.format(
|
||||||
|
char=char,
|
||||||
|
complete=completed_blocks,
|
||||||
|
incomplete=incompleted_blocks,
|
||||||
|
)
|
||||||
|
|
||||||
|
end = '\n' if percent == 100 else ''
|
||||||
|
message = '\r{prefix}{bytes_downloaded} {statusbar} {bytes_total}'
|
||||||
|
message = message.format(
|
||||||
|
prefix=prefix,
|
||||||
|
bytes_downloaded=bytes_downloaded_string,
|
||||||
|
bytes_total=bytes_total_string,
|
||||||
|
statusbar=statusbar,
|
||||||
|
)
|
||||||
|
print(message, end=end, flush=True)
|
||||||
|
|
||||||
|
def progress2(bytes_downloaded, bytes_total, prefix=''):
|
||||||
|
percent = (bytes_downloaded*100)/bytes_total
|
||||||
|
percent = min(100, percent)
|
||||||
|
percent = '%08.4f' % percent
|
||||||
|
bytes_downloaded_string = '{0:,}'.format(bytes_downloaded)
|
||||||
|
bytes_total_string = '{0:,}'.format(bytes_total)
|
||||||
|
bytes_downloaded_string = bytes_downloaded_string.rjust(len(bytes_total_string), ' ')
|
||||||
|
|
||||||
|
end = '\n' if percent == 100 else ''
|
||||||
|
message = '\r{prefix}{bytes_downloaded} / {bytes_total} / {percent}%'
|
||||||
|
message = message.format(
|
||||||
|
prefix=prefix,
|
||||||
|
bytes_downloaded=bytes_downloaded_string,
|
||||||
|
bytes_total=bytes_total_string,
|
||||||
|
percent=percent,
|
||||||
|
)
|
||||||
|
print(message, end=end, flush=True)
|
||||||
|
|
||||||
|
def request(method, url, stream=False, headers=None, timeout=TIMEOUT, **kwargs):
|
||||||
|
if headers is None:
|
||||||
|
headers = {}
|
||||||
|
for (key, value) in HEADERS.items():
|
||||||
|
headers.setdefault(key, value)
|
||||||
|
session = requests.Session()
|
||||||
|
session.max_redirects = 40
|
||||||
|
|
||||||
|
method = {
|
||||||
|
'get': session.get,
|
||||||
|
'head': session.head,
|
||||||
|
'post': session.post,
|
||||||
|
}[method]
|
||||||
|
|
||||||
|
req = method(url, stream=stream, headers=headers, timeout=timeout, **kwargs)
|
||||||
|
req.raise_for_status()
|
||||||
|
return req
|
||||||
|
|
||||||
|
def touch(filename):
|
||||||
|
f = open(filename, 'ab')
|
||||||
|
f.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def download_argparse(args):
|
||||||
|
url = args.url
|
||||||
|
if is_clipboard(url):
|
||||||
|
url = pyperclip.paste()
|
||||||
|
print(url)
|
||||||
|
|
||||||
|
overwrite = {
|
||||||
|
'y':True, 't':True,
|
||||||
|
'n':False, 'f':False,
|
||||||
|
}.get(args.overwrite.lower(), None)
|
||||||
|
|
||||||
|
callback = {
|
||||||
|
None: progress,
|
||||||
|
'1': progress,
|
||||||
|
'2': progress2,
|
||||||
|
}.get(args.callback, None)
|
||||||
|
|
||||||
|
callback = args.callback
|
||||||
|
if callback == '1':
|
||||||
|
callback = progress
|
||||||
|
if callback == '2':
|
||||||
|
callback = progress2
|
||||||
|
|
||||||
|
bytespersecond = args.bytespersecond
|
||||||
|
if bytespersecond is not None:
|
||||||
|
bytespersecond = bytestring.parsebytes(bytespersecond)
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
if args.range is not None:
|
||||||
|
headers['range'] = 'bytes=%s' % args.range
|
||||||
|
|
||||||
|
download_file(
|
||||||
|
url=url,
|
||||||
|
localname=args.localname,
|
||||||
|
bytespersecond=bytespersecond,
|
||||||
|
callback_progress=callback,
|
||||||
|
headers=headers,
|
||||||
|
overwrite=overwrite,
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
#p_download_file = subparsers.add_parser('download_file')
|
||||||
|
parser.add_argument('url')
|
||||||
|
parser.add_argument('localname', nargs='?', default=None)
|
||||||
|
parser.add_argument('-c', '--callback', dest='callback', default=progress)
|
||||||
|
parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
|
||||||
|
parser.add_argument('-ow', '--overwrite', dest='overwrite', default='')
|
||||||
|
parser.add_argument('-r', '--range', dest='range', default=None)
|
||||||
|
parser.set_defaults(func=download_argparse)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
args.func(args)
|
45
Downloady/things that can happen.txt
Normal file
45
Downloady/things that can happen.txt
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
THINGS THAT CAN HAPPEN
|
||||||
|
├───File exists
|
||||||
|
│ ├───User disables overwrite
|
||||||
|
│ │ └───Return because there's nothing to do
|
||||||
|
│ │
|
||||||
|
│ ├───User enables overwrite
|
||||||
|
│ │ ├───User requests range
|
||||||
|
│ │ │ └───Raise exception because requesting a range and forcing overwrite are mutually exclusive
|
||||||
|
│ │ │
|
||||||
|
│ │ └───User does not request range
|
||||||
|
│ │ └───File opened, truncated, full download
|
||||||
|
│ │
|
||||||
|
│ └───User does not specify overwrite
|
||||||
|
│ ├───File is same size as content-length
|
||||||
|
│ │ └───Return because there's nothing to do.
|
||||||
|
│ │
|
||||||
|
│ ├───User requests range
|
||||||
|
│ │ ├───Server respects range
|
||||||
|
│ │ │ └───File opened, seeked to request, bytes filled in
|
||||||
|
│ │ │
|
||||||
|
│ │ └───Server does not respect range
|
||||||
|
│ │ └───Raise exception because user's request can't be fulfilled
|
||||||
|
│ │
|
||||||
|
│ └───User does not request range
|
||||||
|
│ ├───Server respects range
|
||||||
|
│ │ └───File is opened, seeked to end, download resumes
|
||||||
|
│ │
|
||||||
|
│ └───Server does not respect range
|
||||||
|
│ └───Ask for permission to overwrite from beginning
|
||||||
|
│
|
||||||
|
└───File does not exist
|
||||||
|
├───User requests range
|
||||||
|
│ ├───Server respects range
|
||||||
|
│ │ └───File created, seeked to request, bytes filled in. everything else left 0
|
||||||
|
│ └───Server does not respect range
|
||||||
|
│ └───Raise exception because user's request can't be fulfilled
|
||||||
|
│
|
||||||
|
└───User does not request range
|
||||||
|
└───File created, full download
|
||||||
|
|
||||||
|
Possible amibiguity: If the user requests a range, and the file does not exist, does he want:
|
||||||
|
1. to fill the file with zeroes, and patch the requested bytes into their correct spot; or
|
||||||
|
2. to create the file empty, and only write the requested bytes?
|
||||||
|
|
||||||
|
I will assume #1 because that plays nicely with other Things That Can Happen, such as letting the user patch the other bytes in later.
|
210
Javascript/audio_compare_test.html
Normal file
210
Javascript/audio_compare_test.html
Normal file
|
@ -0,0 +1,210 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<!--
|
||||||
|
This page lets you compare multiple audio tracks simultaneously,
|
||||||
|
to see whether you can spot the difference in audio quality.
|
||||||
|
|
||||||
|
The audio elements are scrambled when loaded, you can hover over the
|
||||||
|
"spoiler" tag to see the file.
|
||||||
|
-->
|
||||||
|
<head>
|
||||||
|
<title>Fear your ears</title>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<p>Drag files onto the page</p>
|
||||||
|
<button onclick="cycle();">Cycle</button>
|
||||||
|
<button onclick="pause_all();">Pause</button>
|
||||||
|
<button onclick="play_all();">Play</button>
|
||||||
|
<p>
|
||||||
|
Please use this slider as the seekbar. Dragging the audio elements directly causes syncing problems.
|
||||||
|
<br>
|
||||||
|
<input id="seekbar" type="range">
|
||||||
|
</p>
|
||||||
|
<div id="workspace">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
html, body
|
||||||
|
{
|
||||||
|
height: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
audio
|
||||||
|
{
|
||||||
|
vertical-align: middle;
|
||||||
|
width: 600px;
|
||||||
|
}
|
||||||
|
div button
|
||||||
|
{
|
||||||
|
height: 100%;
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
div
|
||||||
|
{
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
input
|
||||||
|
{
|
||||||
|
width: 600px;
|
||||||
|
}
|
||||||
|
#workspace
|
||||||
|
{
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
||||||
|
<script type="text/javascript">
|
||||||
|
var WORKSPACE = document.getElementById("workspace");
|
||||||
|
var SEEKBAR = document.getElementById("seekbar");
|
||||||
|
var audios = [];
|
||||||
|
var current = 0;
|
||||||
|
var duration = 0;
|
||||||
|
var seek_lock = false;
|
||||||
|
SEEKBAR.min = 0;
|
||||||
|
SEEKBAR.max = 0;
|
||||||
|
SEEKBAR.value = 0;
|
||||||
|
|
||||||
|
function cycle()
|
||||||
|
{
|
||||||
|
if (audios == undefined)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (audios.length == 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
current = (current + 1) % audios.length;
|
||||||
|
switch_to(current)
|
||||||
|
}
|
||||||
|
|
||||||
|
function drop_event(e)
|
||||||
|
{
|
||||||
|
e = e || window.event;
|
||||||
|
e.preventDefault();
|
||||||
|
|
||||||
|
var files = Array.from(e.dataTransfer.files);
|
||||||
|
shuffle(files);
|
||||||
|
while (WORKSPACE.lastChild)
|
||||||
|
{
|
||||||
|
WORKSPACE.removeChild(WORKSPACE.lastChild);
|
||||||
|
}
|
||||||
|
for (var index = 0; index < files.length; index += 1)
|
||||||
|
{
|
||||||
|
var mediadiv = document.createElement("div");
|
||||||
|
var anchor = document.createElement("a");
|
||||||
|
var audio = document.createElement("audio");
|
||||||
|
var source = document.createElement("source");
|
||||||
|
var choosebutton = document.createElement("button");
|
||||||
|
|
||||||
|
audio.preload = true;
|
||||||
|
audio.controls = true;
|
||||||
|
audio.volume = 0
|
||||||
|
|
||||||
|
anchor.innerHTML = "spoiler";
|
||||||
|
anchor.title = files[index]["name"];
|
||||||
|
anchor.href = "#";
|
||||||
|
source.src = URL.createObjectURL(files[index]);
|
||||||
|
|
||||||
|
choosebutton.innerHTML = "play";
|
||||||
|
choosebutton.index = index;
|
||||||
|
choosebutton.onclick = function()
|
||||||
|
{
|
||||||
|
current = this.index;
|
||||||
|
switch_to(current);
|
||||||
|
}
|
||||||
|
audio.appendChild(source);
|
||||||
|
mediadiv.appendChild(anchor);
|
||||||
|
mediadiv.appendChild(audio);
|
||||||
|
mediadiv.appendChild(choosebutton);
|
||||||
|
WORKSPACE.appendChild(mediadiv);
|
||||||
|
audios.push(audio);
|
||||||
|
}
|
||||||
|
|
||||||
|
load();
|
||||||
|
}
|
||||||
|
|
||||||
|
function load()
|
||||||
|
{
|
||||||
|
audios = Array.from(document.getElementsByTagName("audio"));
|
||||||
|
current = -1;
|
||||||
|
if (audios.length == 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
audios[0].oncanplay = function()
|
||||||
|
{
|
||||||
|
duration = this.duration;
|
||||||
|
SEEKBAR.max = duration;
|
||||||
|
}
|
||||||
|
SEEKBAR.value = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function pause_all()
|
||||||
|
{
|
||||||
|
for (var index = 0; index < audios.length; index += 1)
|
||||||
|
{
|
||||||
|
audios[index].pause();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function play_all()
|
||||||
|
{
|
||||||
|
for (var index = 0; index < audios.length; index += 1)
|
||||||
|
{
|
||||||
|
audios[index].play();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function seek_event()
|
||||||
|
{
|
||||||
|
if (seek_lock)
|
||||||
|
{
|
||||||
|
console.log("locked");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pause_all();
|
||||||
|
seek_lock = true;
|
||||||
|
var timestamp = this.value;
|
||||||
|
for (var index = 0; index < audios.length; index += 1)
|
||||||
|
{
|
||||||
|
audios[index].currentTime = timestamp;
|
||||||
|
}
|
||||||
|
setTimeout(function(){seek_lock = false; play_all();}, 500);
|
||||||
|
}
|
||||||
|
|
||||||
|
function shuffle(a)
|
||||||
|
{
|
||||||
|
var target_index;
|
||||||
|
var temp;
|
||||||
|
for (var index = a.length; index > 0; index -= 1)
|
||||||
|
{
|
||||||
|
target_index = Math.floor(Math.random() * index);
|
||||||
|
temp = a[index - 1];
|
||||||
|
a[index - 1] = a[target_index];
|
||||||
|
a[target_index] = temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function switch_to(audio_index)
|
||||||
|
{
|
||||||
|
play_all();
|
||||||
|
for (var index = 0; index < audios.length; index += 1)
|
||||||
|
{
|
||||||
|
audios[index].volume = 0;
|
||||||
|
}
|
||||||
|
audios[audio_index].volume = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEEKBAR.onchange = seek_event;
|
||||||
|
document.body.addEventListener('drop', drop_event);
|
||||||
|
document.body.addEventListener('dragover', function(e){e.preventDefault();});
|
||||||
|
|
||||||
|
</script>
|
770
Javascript/opendir_image.js
Normal file
770
Javascript/opendir_image.js
Normal file
|
@ -0,0 +1,770 @@
|
||||||
|
javascript:
|
||||||
|
/*
|
||||||
|
This javascript bookmarklet takes anchors, images, and media elements from
|
||||||
|
the page, and displays them in a nice gallery. Designed for use on open
|
||||||
|
directory listings, but works in many places.
|
||||||
|
*/
|
||||||
|
var seen_urls = new Set();
|
||||||
|
var all_urls = [];
|
||||||
|
var image_height = 200;
|
||||||
|
var video_height = 300;
|
||||||
|
var audio_width = 1000;
|
||||||
|
var IMAGE_TYPES = ["\\.jpg", "\\.jpeg", "\\.jpg", "\\.bmp", "\\.tiff", "\\.tif", "\\.bmp", "\\.gif", "\\.png"].join("|");
|
||||||
|
var AUDIO_TYPES = ["\\.aac", "\\.mp3", "\\.m4a", "\\.ogg", "\\.wav"].join("|");
|
||||||
|
var VIDEO_TYPES = ["\\.mp4", "\\.m4v", "\\.webm", "\\.ogv"].join("|");
|
||||||
|
IMAGE_TYPES = new RegExp(IMAGE_TYPES, "i");
|
||||||
|
AUDIO_TYPES = new RegExp(AUDIO_TYPES, "i");
|
||||||
|
VIDEO_TYPES = new RegExp(VIDEO_TYPES, "i");
|
||||||
|
|
||||||
|
var has_started = false;
|
||||||
|
|
||||||
|
var CSS = "";
|
||||||
|
CSS += "audio, video { display: block; }";
|
||||||
|
CSS += "audio { width: $audio_width$px; }";
|
||||||
|
CSS += "video { height: $video_height$px; }";
|
||||||
|
CSS += "img { display: block; max-width: 100% }";
|
||||||
|
|
||||||
|
CSS += ".control_panel { background-color: #aaa; min-height: 10px; width: 100%; }";
|
||||||
|
CSS += ".workspace { background-color: #ddd; min-height: 10px; float: left; }";
|
||||||
|
CSS += ".arealabel { position:absolute; right: 0; bottom: 0; opacity: 0.8; background-color: #000; color: #fff; }";
|
||||||
|
CSS += ".delete_button { color: #d00; font-family: Arial; font-size: 11px; left: 0; position: absolute; top: 0; width: 25px; }";
|
||||||
|
CSS += ".load_button { position: absolute; top: 10%; width: 100%; height: 80%; word-wrap: break-word; }";
|
||||||
|
CSS += ".odi_anchor { display: block; }";
|
||||||
|
CSS += ".odi_image_div, .odi_media_div { display: inline-block; margin: 5px; float: left; position: relative; background-color: #aaa; }";
|
||||||
|
CSS += ".odi_image_div { min-width: $image_height$px; }";
|
||||||
|
|
||||||
|
function apply_css()
|
||||||
|
{
|
||||||
|
console.log("applying CSS");
|
||||||
|
var css = document.createElement("style");
|
||||||
|
css.innerHTML = format_css();
|
||||||
|
document.head.appendChild(css);
|
||||||
|
}
|
||||||
|
|
||||||
|
function array_extend(a, b)
|
||||||
|
{
|
||||||
|
/* Append all elements of b onto a */
|
||||||
|
for (var i = 0; i < b.length; i += 1)
|
||||||
|
{
|
||||||
|
a.push(b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function array_remove(a, item)
|
||||||
|
{
|
||||||
|
/* Thanks peter olson http://stackoverflow.com/a/5767335 */
|
||||||
|
for(var i = a.length - 1; i >= 0; i -= 1)
|
||||||
|
{
|
||||||
|
if(a[i].id === item.id)
|
||||||
|
{
|
||||||
|
a.splice(i, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function clear_page()
|
||||||
|
{
|
||||||
|
/* Remove EVERYTHING, insert own css */
|
||||||
|
console.log("clearing page");
|
||||||
|
document.removeChild(document.documentElement);
|
||||||
|
|
||||||
|
var html = document.createElement("html");
|
||||||
|
document.appendChild(html);
|
||||||
|
|
||||||
|
var head = document.createElement("head");
|
||||||
|
html.appendChild(head);
|
||||||
|
|
||||||
|
var body = document.createElement("body");
|
||||||
|
html.appendChild(body);
|
||||||
|
|
||||||
|
document.documentElement = html;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clear_workspace()
|
||||||
|
{
|
||||||
|
console.log("clearing workspace");
|
||||||
|
workspace = document.getElementById("WORKSPACE");
|
||||||
|
while (workspace.children.length > 0)
|
||||||
|
{
|
||||||
|
workspace.removeChild(workspace.children[0]);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function create_command_box(boxname, operation)
|
||||||
|
{
|
||||||
|
var box = document.createElement("input");
|
||||||
|
box.type = "text";
|
||||||
|
box.id = boxname;
|
||||||
|
box.onkeydown=function()
|
||||||
|
{
|
||||||
|
if (event.keyCode == 13)
|
||||||
|
{
|
||||||
|
operation(this.value);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return box;
|
||||||
|
}
|
||||||
|
|
||||||
|
function create_command_button(label, operation)
|
||||||
|
{
|
||||||
|
var button = document.createElement("button");
|
||||||
|
button.innerHTML = label;
|
||||||
|
button.onclick = operation;
|
||||||
|
return button;
|
||||||
|
}
|
||||||
|
|
||||||
|
function create_command_box_button(boxname, label, operation)
|
||||||
|
{
|
||||||
|
var box = create_command_box(boxname, operation);
|
||||||
|
var button = create_command_button(label, function(){operation(box.value)});
|
||||||
|
var div = document.createElement("div");
|
||||||
|
div.appendChild(box);
|
||||||
|
div.appendChild(button);
|
||||||
|
return div;
|
||||||
|
}
|
||||||
|
|
||||||
|
function create_odi_divs(urls)
|
||||||
|
{
|
||||||
|
image_divs = [];
|
||||||
|
media_divs = [];
|
||||||
|
for (var index = 0; index < urls.length; index += 1)
|
||||||
|
{
|
||||||
|
url = urls[index];
|
||||||
|
if (!url)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/*console.log("Building for " + url);*/
|
||||||
|
var div = null;
|
||||||
|
var paramless_url = url.split("?")[0];
|
||||||
|
var basename = get_basename(url);
|
||||||
|
|
||||||
|
if (paramless_url.match(IMAGE_TYPES))
|
||||||
|
{
|
||||||
|
var div = document.createElement("div");
|
||||||
|
div.id = generate_id(32);
|
||||||
|
div.className = "odi_image_div";
|
||||||
|
div.odi_type = "image";
|
||||||
|
|
||||||
|
var a = document.createElement("a");
|
||||||
|
a.className = "odi_anchor";
|
||||||
|
a.odi_div = div;
|
||||||
|
a.href = url;
|
||||||
|
a.target = "_blank";
|
||||||
|
|
||||||
|
var img = document.createElement("img");
|
||||||
|
img.odi_div = div;
|
||||||
|
img.anchor = a;
|
||||||
|
img.border = 0;
|
||||||
|
img.height = image_height;
|
||||||
|
|
||||||
|
img.lazy_src = url;
|
||||||
|
img.src = "";
|
||||||
|
|
||||||
|
var arealabel = document.createElement("span");
|
||||||
|
arealabel.className = "arealabel";
|
||||||
|
arealabel.odi_div = div;
|
||||||
|
arealabel.innerHTML = "0x0";
|
||||||
|
img.arealabel = arealabel;
|
||||||
|
|
||||||
|
var load_button = document.createElement("button");
|
||||||
|
load_button.className = "load_button";
|
||||||
|
load_button.odi_div = div;
|
||||||
|
load_button.innerHTML = basename;
|
||||||
|
load_button.onclick = function()
|
||||||
|
{
|
||||||
|
this.parentElement.removeChild(this);
|
||||||
|
lazy_load_one(this.odi_div);
|
||||||
|
};
|
||||||
|
|
||||||
|
div.image = img;
|
||||||
|
div.anchor = a;
|
||||||
|
a.appendChild(img);
|
||||||
|
a.appendChild(arealabel);
|
||||||
|
div.appendChild(a);
|
||||||
|
div.appendChild(load_button);
|
||||||
|
image_divs.push(div);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (paramless_url.match(AUDIO_TYPES))
|
||||||
|
{
|
||||||
|
var mediatype = "audio";
|
||||||
|
}
|
||||||
|
else if (paramless_url.match(VIDEO_TYPES))
|
||||||
|
{
|
||||||
|
var mediatype = "video";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var div = document.createElement("div");
|
||||||
|
div.id = generate_id(32);
|
||||||
|
div.className = "odi_media_div";
|
||||||
|
div.odi_type = "media";
|
||||||
|
|
||||||
|
var center = document.createElement("center");
|
||||||
|
center.odi_div = div;
|
||||||
|
|
||||||
|
var a = document.createElement("a");
|
||||||
|
a.odi_div = div;
|
||||||
|
a.innerHTML = get_basename(url);
|
||||||
|
a.target = "_blank";
|
||||||
|
a.style.display = "block";
|
||||||
|
a.href = url;
|
||||||
|
|
||||||
|
var media = document.createElement(mediatype);
|
||||||
|
media.odi_div = div;
|
||||||
|
media.controls = true;
|
||||||
|
media.preload = "none";
|
||||||
|
|
||||||
|
sources = get_alternate_sources(url);
|
||||||
|
for (var sourceindex = 0; sourceindex < sources.length; sourceindex += 1)
|
||||||
|
{
|
||||||
|
source = document.createElement("source");
|
||||||
|
source.src = sources[sourceindex];
|
||||||
|
source.odi_div = div;
|
||||||
|
media.appendChild(source);
|
||||||
|
}
|
||||||
|
|
||||||
|
div.media = media;
|
||||||
|
div.anchor = a;
|
||||||
|
center.appendChild(a);
|
||||||
|
div.appendChild(center);
|
||||||
|
div.appendChild(media);
|
||||||
|
media_divs.push(div);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (div == null)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.url = url;
|
||||||
|
div.basename = basename;
|
||||||
|
|
||||||
|
button = document.createElement("button");
|
||||||
|
button.className = "delete_button";
|
||||||
|
button.odi_div = div;
|
||||||
|
button.innerHTML = "X";
|
||||||
|
button.onclick = function()
|
||||||
|
{
|
||||||
|
delete_odi_div(this);
|
||||||
|
};
|
||||||
|
div.appendChild(button);
|
||||||
|
/*console.log("built " + div);*/
|
||||||
|
}
|
||||||
|
odi_divs = [];
|
||||||
|
array_extend(odi_divs, image_divs);
|
||||||
|
odi_divs.push(document.createElement("br"));
|
||||||
|
array_extend(odi_divs, media_divs);
|
||||||
|
return odi_divs;
|
||||||
|
}
|
||||||
|
|
||||||
|
function create_workspace()
|
||||||
|
{
|
||||||
|
clear_page();
|
||||||
|
apply_css();
|
||||||
|
console.log("creating workspace");
|
||||||
|
|
||||||
|
var control_panel = document.createElement("div");
|
||||||
|
var workspace = document.createElement("div");
|
||||||
|
|
||||||
|
var resizer = create_command_box_button("resizer", "resize", resize_images);
|
||||||
|
var refilter = create_command_box_button("refilter", "remove regex", function(x){filter_re(x, true)});
|
||||||
|
var rekeeper = create_command_box_button("rekeeper", "keep regex", function(x){filter_re(x, false)});
|
||||||
|
var heightfilter = create_command_box_button("heightfilter", "min height", filter_height);
|
||||||
|
var widthfilter = create_command_box_button("widthfilter", "min width", filter_width);
|
||||||
|
var sorter = create_command_button("sort size", sort_size);
|
||||||
|
var dumper = create_command_button("dump urls", dump_urls);
|
||||||
|
var start_button = create_command_button("load all", function(){start(); this.parentElement.removeChild(this);});
|
||||||
|
|
||||||
|
start_button.style.display = "block";
|
||||||
|
|
||||||
|
|
||||||
|
control_panel.id = "CONTROL_PANEL";
|
||||||
|
control_panel.className = "control_panel";
|
||||||
|
|
||||||
|
workspace.id = "WORKSPACE";
|
||||||
|
workspace.className = "workspace";
|
||||||
|
|
||||||
|
document.body.appendChild(control_panel);
|
||||||
|
control_panel.appendChild(resizer);
|
||||||
|
control_panel.appendChild(refilter);
|
||||||
|
control_panel.appendChild(rekeeper);
|
||||||
|
control_panel.appendChild(heightfilter);
|
||||||
|
control_panel.appendChild(widthfilter);
|
||||||
|
control_panel.appendChild(sorter);
|
||||||
|
control_panel.appendChild(dumper);
|
||||||
|
control_panel.appendChild(start_button);
|
||||||
|
document.body.appendChild(workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
function delete_odi_div(element)
|
||||||
|
{
|
||||||
|
if (element.odi_div != undefined)
|
||||||
|
{
|
||||||
|
element = element.odi_div;
|
||||||
|
}
|
||||||
|
if (element.media != undefined)
|
||||||
|
{
|
||||||
|
/* http://stackoverflow.com/questions/3258587/how-to-properly-unload-destroy-a-video-element */
|
||||||
|
element.media.pause();
|
||||||
|
element.media.src = "";
|
||||||
|
element.media.load();
|
||||||
|
}
|
||||||
|
var parent = element.parentElement;
|
||||||
|
parent.removeChild(element);
|
||||||
|
}
|
||||||
|
|
||||||
|
function dump_urls()
|
||||||
|
{
|
||||||
|
var divs = get_odi_divs();
|
||||||
|
var textbox = document.getElementById("url_dump_box");
|
||||||
|
if (textbox == null)
|
||||||
|
{
|
||||||
|
textbox = document.createElement("textarea");
|
||||||
|
textbox.id = "urldumpbox";
|
||||||
|
textbox.style.overflowY = "scroll";
|
||||||
|
textbox.style.height = "300px";
|
||||||
|
textbox.style.width = "90%";
|
||||||
|
workspace = document.getElementById("WORKSPACE");
|
||||||
|
workspace.appendChild(textbox);
|
||||||
|
}
|
||||||
|
textbox.innerHTML = "";
|
||||||
|
for (var index = 0; index < divs.length; index += 1)
|
||||||
|
{
|
||||||
|
textbox.innerHTML += divs[index].url + "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function fill_workspace(divs)
|
||||||
|
{
|
||||||
|
clear_workspace();
|
||||||
|
console.log("filling workspace");
|
||||||
|
|
||||||
|
workspace = document.getElementById("WORKSPACE");
|
||||||
|
for (var index = 0; index < divs.length; index += 1)
|
||||||
|
{
|
||||||
|
workspace.appendChild(divs[index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function filter_dimension(dimension, minimum)
|
||||||
|
{
|
||||||
|
minimum = parseInt(minimum);
|
||||||
|
images = Array.from(document.images);
|
||||||
|
for (var i = 0; i < images.length; i += 1)
|
||||||
|
{
|
||||||
|
image = images[i];
|
||||||
|
if (image[dimension] == 0)
|
||||||
|
{continue;}
|
||||||
|
if (image[dimension] < minimum)
|
||||||
|
{
|
||||||
|
delete_odi_div(image);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function filter_height(minimum)
|
||||||
|
{
|
||||||
|
filter_dimension('naturalHeight', minimum);
|
||||||
|
}
|
||||||
|
|
||||||
|
function filter_width(minimum)
|
||||||
|
{
|
||||||
|
filter_dimension('naturalWidth', minimum);
|
||||||
|
}
|
||||||
|
|
||||||
|
function filter_re(pattern, do_delete)
|
||||||
|
{
|
||||||
|
if (!pattern)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pattern = new RegExp(pattern, "i");
|
||||||
|
do_keep = !do_delete;
|
||||||
|
console.log(pattern + " " + do_delete);
|
||||||
|
odi_divs = get_odi_divs();
|
||||||
|
for (var index = 0; index < odi_divs.length; index += 1)
|
||||||
|
{
|
||||||
|
div = odi_divs[index];
|
||||||
|
match = div.basename.match(pattern);
|
||||||
|
if ((match && do_delete) || (!match && do_keep))
|
||||||
|
{
|
||||||
|
delete_odi_div(div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function format_css()
|
||||||
|
{
|
||||||
|
var css = CSS;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
matches = css.match("\\$.+?\\$");
|
||||||
|
if (!matches)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
matches = Array.from(new Set(matches));
|
||||||
|
for (var index = 0; index < matches.length; index += 1)
|
||||||
|
{
|
||||||
|
var injector = matches[index];
|
||||||
|
var injected = injector.replace(new RegExp("\\$", 'g'), "");
|
||||||
|
/*console.log(injector);*/
|
||||||
|
/*console.log(injected);*/
|
||||||
|
css = css.replace(injector, this[injected]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return css;
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_all_urls()
|
||||||
|
{
|
||||||
|
var urls = [];
|
||||||
|
function include(source, attr)
|
||||||
|
{
|
||||||
|
for (var index = 0; index < source.length; index += 1)
|
||||||
|
{
|
||||||
|
url = source[index][attr];
|
||||||
|
if (url === undefined)
|
||||||
|
{continue;}
|
||||||
|
|
||||||
|
if (seen_urls.has(url))
|
||||||
|
{continue;}
|
||||||
|
console.log(url);
|
||||||
|
if (url.indexOf("thumbs.redditmedia") != -1)
|
||||||
|
{console.log("Rejecting reddit thumb"); continue;}
|
||||||
|
if (url.indexOf("pixel.reddit") != -1 || url.indexOf("reddit.com/static/pixel") != -1)
|
||||||
|
{console.log("Rejecting reddit pixel"); continue}
|
||||||
|
if (url.indexOf("/thumb/") != -1)
|
||||||
|
{console.log("Rejecting /thumb/"); continue;}
|
||||||
|
if (url.indexOf("/loaders/") != -1)
|
||||||
|
{console.log("Rejecting loader"); continue;}
|
||||||
|
if (url.indexOf("memegen") != -1)
|
||||||
|
{console.log("Rejecting retardation"); continue;}
|
||||||
|
if (url.indexOf("4cdn") != -1 && url.indexOf("s.jpg") != -1)
|
||||||
|
{console.log("Rejecting 4chan thumb"); continue;}
|
||||||
|
|
||||||
|
sub_urls = normalize_url(url);
|
||||||
|
if (sub_urls == null)
|
||||||
|
{continue;}
|
||||||
|
|
||||||
|
for (var url_index = 0; url_index < sub_urls.length; url_index += 1)
|
||||||
|
{
|
||||||
|
sub_url = sub_urls[url_index];
|
||||||
|
if (seen_urls.has(sub_url))
|
||||||
|
{continue;}
|
||||||
|
|
||||||
|
urls.push(sub_url);
|
||||||
|
seen_urls.add(sub_url);
|
||||||
|
all_urls.push(sub_url);
|
||||||
|
}
|
||||||
|
seen_urls.add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var docs = [];
|
||||||
|
docs.push(document);
|
||||||
|
while (docs.length > 0)
|
||||||
|
{
|
||||||
|
var d = docs.pop();
|
||||||
|
include(d.links, "href");
|
||||||
|
include(d.images, "src");
|
||||||
|
include(d.getElementsByTagName("audio"), "src");
|
||||||
|
include(d.getElementsByTagName("video"), "src");
|
||||||
|
include(d.getElementsByTagName("source"), "src");
|
||||||
|
}
|
||||||
|
console.log("collected " + urls.length + " urls.");
|
||||||
|
return urls;
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_alternate_sources(url)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
For sites that must try multiple resource urls, that logic
|
||||||
|
may go here
|
||||||
|
*/
|
||||||
|
return [url];
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_basename(url)
|
||||||
|
{
|
||||||
|
var basename = url.split("/");
|
||||||
|
basename = basename[basename.length - 1];
|
||||||
|
return basename;
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_gfycat_video(id)
|
||||||
|
{
|
||||||
|
var url = "https://gfycat.com/cajax/get/" + id;
|
||||||
|
var request = new XMLHttpRequest();
|
||||||
|
request.answer = null;
|
||||||
|
request.onreadystatechange = function()
|
||||||
|
{
|
||||||
|
if (request.readyState == 4 && request.status == 200)
|
||||||
|
{
|
||||||
|
var text = request.responseText;
|
||||||
|
var details = JSON.parse(text);
|
||||||
|
request.answer = details["gfyItem"]["mp4Url"];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
var asynchronous = false;
|
||||||
|
request.open("GET", url, asynchronous);
|
||||||
|
request.send(null);
|
||||||
|
return request.answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_lazy_divs()
|
||||||
|
{
|
||||||
|
var divs = document.getElementsByTagName("div");
|
||||||
|
var lazy_elements = [];
|
||||||
|
for (index = 0; index < divs.length; index += 1)
|
||||||
|
{
|
||||||
|
var div = divs[index];
|
||||||
|
if (div.image && div.image.lazy_src)
|
||||||
|
{
|
||||||
|
lazy_elements.push(div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lazy_elements;
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_odi_divs()
|
||||||
|
{
|
||||||
|
var divs = document.getElementsByTagName("div");
|
||||||
|
var odi_divs = [];
|
||||||
|
for (index = 0; index < divs.length; index += 1)
|
||||||
|
{
|
||||||
|
var div = divs[index];
|
||||||
|
if (div.id.indexOf("odi_") == -1)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
odi_divs.push(div);
|
||||||
|
}
|
||||||
|
return odi_divs;
|
||||||
|
}
|
||||||
|
|
||||||
|
function generate_id(length)
|
||||||
|
{
|
||||||
|
/* Thanks csharptest http://stackoverflow.com/a/1349426 */
|
||||||
|
var text = [];
|
||||||
|
var possible = "abcdefghijklmnopqrstuvwxyz";
|
||||||
|
|
||||||
|
for(var i = 0; i < length; i += 1)
|
||||||
|
{
|
||||||
|
c = possible.charAt(Math.floor(Math.random() * possible.length));
|
||||||
|
text.push(c);
|
||||||
|
}
|
||||||
|
return "odi_" + text.join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
function lazy_load_all()
|
||||||
|
{
|
||||||
|
lazies = get_lazy_divs();
|
||||||
|
lazies.reverse();
|
||||||
|
lazy_buttons = document.getElementsByClassName("load_button");
|
||||||
|
for (var index = 0; index < lazy_buttons.length; index += 1)
|
||||||
|
{
|
||||||
|
lazy_buttons[index].parentElement.removeChild(lazy_buttons[index]);
|
||||||
|
}
|
||||||
|
while (lazies.length > 0)
|
||||||
|
{
|
||||||
|
var element = lazies.pop();
|
||||||
|
if (element.image != undefined)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (element == undefined)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lazy_load_one(element, true);
|
||||||
|
return
|
||||||
|
;}
|
||||||
|
|
||||||
|
function lazy_load_one(element, comeback)
|
||||||
|
{
|
||||||
|
var image = element.image;
|
||||||
|
if (!image.lazy_src)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
image.onload = function()
|
||||||
|
{
|
||||||
|
width = this.naturalWidth;
|
||||||
|
height = this.naturalHeight;
|
||||||
|
if (width == 161 && height == 81)
|
||||||
|
{delete_odi_div(this);}
|
||||||
|
this.arealabel.innerHTML = width + " x " + height;
|
||||||
|
this.odi_div.style.minWidth = "0px";
|
||||||
|
if (comeback){lazy_load_all()};
|
||||||
|
};
|
||||||
|
image.onerror = function()
|
||||||
|
{
|
||||||
|
array_remove(all_urls, this.odi_div);
|
||||||
|
delete_odi_div(this);
|
||||||
|
if (comeback){lazy_load_all()};
|
||||||
|
};
|
||||||
|
/*console.log("Lazy loading " + element.lazy_src)*/
|
||||||
|
image.src = image.lazy_src;
|
||||||
|
image.lazy_src = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalize_url(url)
|
||||||
|
{
|
||||||
|
var protocol = window.location.protocol;
|
||||||
|
if (protocol == "file:")
|
||||||
|
{
|
||||||
|
protocol = "http:";
|
||||||
|
}
|
||||||
|
url = url.replace("http:", protocol);
|
||||||
|
url = url.replace("https:", protocol);
|
||||||
|
url = decodeURIComponent(unescape(url));
|
||||||
|
url = url.replace("imgur.com/gallery/", "imgur.com/a/");
|
||||||
|
|
||||||
|
if (url.indexOf("vidble") >= 0)
|
||||||
|
{
|
||||||
|
url = url.replace("_med", "");
|
||||||
|
url = url.replace("_sqr", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (url.indexOf("imgur.com/a/") != -1)
|
||||||
|
{
|
||||||
|
var urls = [];
|
||||||
|
var id = url.split("imgur.com/a/")[1];
|
||||||
|
id = id.split("#")[0].split("?")[0];
|
||||||
|
console.log("imgur album: " + id);
|
||||||
|
var url = "https://api.imgur.com/3/album/" + id;
|
||||||
|
var request = new XMLHttpRequest();
|
||||||
|
request.onreadystatechange = function()
|
||||||
|
{
|
||||||
|
if (request.readyState == 4 && request.status == 200)
|
||||||
|
{
|
||||||
|
var text = request.responseText;
|
||||||
|
var images = JSON.parse(request.responseText);
|
||||||
|
images = images['data']['images'];
|
||||||
|
for (var index = 0; index < images.length; index += 1)
|
||||||
|
{
|
||||||
|
var image = images[index];
|
||||||
|
var image_url = image["mp4"] || image["link"];
|
||||||
|
if (!image_url){continue;}
|
||||||
|
image_url = normalize_url(image_url)[0];
|
||||||
|
console.log("+" + image_url);
|
||||||
|
urls.push(image_url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
var asynchronous = false;
|
||||||
|
request.open("GET", url, asynchronous);
|
||||||
|
request.setRequestHeader("Authorization", "Client-ID 1d8d9b36339e0e2");
|
||||||
|
request.send(null);
|
||||||
|
return urls;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (url.indexOf("imgur.com") >= 0)
|
||||||
|
{
|
||||||
|
var url_parts = url.split("/");
|
||||||
|
var image_id = url_parts[url_parts.length - 1];
|
||||||
|
var extension = ".jpg";
|
||||||
|
if (image_id.indexOf(".") != -1)
|
||||||
|
{
|
||||||
|
image_id = image_id.split(".");
|
||||||
|
extension = "." + image_id[1];
|
||||||
|
image_id = image_id[0];
|
||||||
|
}
|
||||||
|
extension = extension.replace(".gifv", ".mp4");
|
||||||
|
extension = extension.replace(".gif", ".mp4");
|
||||||
|
|
||||||
|
if (image_id.length % 2 == 0)
|
||||||
|
{
|
||||||
|
image_id = image_id.split("");
|
||||||
|
image_id[image_id.length - 1] = "";
|
||||||
|
image_id = image_id.join("");
|
||||||
|
}
|
||||||
|
url = protocol + "//i.imgur.com/" + image_id + extension;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (url.indexOf("gfycat.com") >= 0)
|
||||||
|
{
|
||||||
|
var gfy_id = url.split("/");
|
||||||
|
gfy_id = gfy_id[gfy_id.length - 1];
|
||||||
|
gfy_id = gfy_id.split(".")[0];
|
||||||
|
if (gfy_id.length > 0)
|
||||||
|
{
|
||||||
|
url = get_gfycat_video(gfy_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return [url];
|
||||||
|
}
|
||||||
|
|
||||||
|
function resize_images(height)
|
||||||
|
{
|
||||||
|
odi_divs = get_odi_divs();
|
||||||
|
image_height = height;
|
||||||
|
for (var index = 0; index < odi_divs.length; index += 1)
|
||||||
|
{
|
||||||
|
var div = odi_divs[index];
|
||||||
|
if (div.image == undefined)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
div.image.height = height;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function sort_size()
|
||||||
|
{
|
||||||
|
console.log("sorting size");
|
||||||
|
|
||||||
|
odi_divs = get_odi_divs();
|
||||||
|
odi_divs.sort(sort_size_comparator);
|
||||||
|
odi_divs.reverse();
|
||||||
|
clear_workspace();
|
||||||
|
fill_workspace(odi_divs);
|
||||||
|
}
|
||||||
|
|
||||||
|
function sort_size_comparator(div1, div2)
|
||||||
|
{
|
||||||
|
if (div1.odi_type != "image" || div1.lazy_src)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (div2.odi_type != "image" || div2.lazy_src)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
pixels1 = div1.image.naturalHeight * div1.image.naturalWidth;
|
||||||
|
pixels2 = div2.image.naturalHeight * div2.image.naturalWidth;
|
||||||
|
if (pixels1 < pixels2)
|
||||||
|
{return -1;}
|
||||||
|
if (pixels1 > pixels2)
|
||||||
|
{return 1;}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function start()
|
||||||
|
{
|
||||||
|
lazy_load_all();
|
||||||
|
has_started = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function main()
|
||||||
|
{
|
||||||
|
all_urls = get_all_urls();
|
||||||
|
var divs = create_odi_divs(all_urls);
|
||||||
|
create_workspace();
|
||||||
|
fill_workspace(divs);
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
|
@ -1,8 +1,16 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
|
<!--
|
||||||
|
This page lets you see new posts as they are made in a subreddit.
|
||||||
|
Just type a subreddit's name into the box, and press Start.
|
||||||
|
|
||||||
|
Although the default behavior is to interpret the box's contents as a subreddit
|
||||||
|
name, you can actually insert something like "/u/goldensights/m/loband" to watch
|
||||||
|
https://reddit.com/u/goldensights/m/loband/new
|
||||||
|
-->
|
||||||
<head>
|
<head>
|
||||||
<title>/new</title>
|
<title>/new</title>
|
||||||
|
<meta charset="UTF-8">
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
import jobs
|
|
||||||
import time
|
|
||||||
|
|
||||||
def continuous_register():
|
|
||||||
print('w')
|
|
||||||
jobs.register(2, continuous_register)
|
|
||||||
|
|
||||||
jobs.register(5, print, args=('heyo',))
|
|
||||||
time.sleep(10)
|
|
||||||
print('x')
|
|
||||||
jobs.register(5, print, args=('heyo',))
|
|
||||||
time.sleep(2)
|
|
||||||
jobs.unregister(print, args=('heyo', ), kwargs={})
|
|
||||||
time.sleep(10)
|
|
||||||
print('y')
|
|
52
Jobs/jobs.py
52
Jobs/jobs.py
|
@ -1,52 +0,0 @@
|
||||||
import signal
|
|
||||||
import time
|
|
||||||
import threading
|
|
||||||
|
|
||||||
class JobInterruption(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class JobSchedulingError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
THREAD = None
|
|
||||||
JOBS = {}
|
|
||||||
def thread_manager():
|
|
||||||
while True:
|
|
||||||
now = time.time()
|
|
||||||
for (functionid, joblist) in JOBS.items():
|
|
||||||
for job in joblist:
|
|
||||||
if now < job[0]:
|
|
||||||
continue
|
|
||||||
job[1](*job[2], **job[3])
|
|
||||||
joblist.remove(job)
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
def launch_thread():
|
|
||||||
global THREAD
|
|
||||||
if THREAD is None or THREAD.is_alive is False:
|
|
||||||
THREAD = threading.Thread(target=thread_manager)
|
|
||||||
THREAD.daemon = True
|
|
||||||
THREAD.start()
|
|
||||||
|
|
||||||
def register(seconds_from_now, function, args=[], kwargs={}):
|
|
||||||
if seconds_from_now <= 0:
|
|
||||||
raise JobSchedulingError('cannot schedule jobs for the past')
|
|
||||||
iid = id(function)
|
|
||||||
schedule = time.time() + seconds_from_now
|
|
||||||
if iid not in JOBS:
|
|
||||||
JOBS[iid] = [(schedule, function, args, kwargs)]
|
|
||||||
else:
|
|
||||||
JOBS[iid].append( (schedule, function, args, kwargs) )
|
|
||||||
launch_thread()
|
|
||||||
|
|
||||||
def unregister_all(function):
|
|
||||||
iid = id(function)
|
|
||||||
if iid in JOBS:
|
|
||||||
del JOBS[iid]
|
|
||||||
|
|
||||||
def unregister(function, args, kwargs):
|
|
||||||
joblist = JOBS[id(function)]
|
|
||||||
for job in joblist:
|
|
||||||
if job[1:] != (function, args, kwargs):
|
|
||||||
continue
|
|
||||||
joblist.remove(job)
|
|
|
@ -1,10 +1,16 @@
|
||||||
Open Dir DL
|
Open Dir DL
|
||||||
===========
|
===========
|
||||||
|
|
||||||
|
- 2016 07 25
|
||||||
|
- Removed the `Downloader` class after watching [this Jack Diederich talk](https://youtu.be/o9pEzgHorH0) about unecessary classes.
|
||||||
|
- Bytespersecond is now parsed by `bytestring.parsebytes` rather than `eval`, so you can write "100k" as opposed to "100 * 1024" etc.
|
||||||
|
|
||||||
- 2016 07 19
|
- 2016 07 19
|
||||||
- Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
|
- Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
|
||||||
- Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions that take argparse namespaces as their only parameter. Does not affect the commandline usage.
|
- Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions used by the argparser. *Does not affect the commandline usage!*
|
||||||
- Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
|
- Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
|
||||||
|
- Fixed some mismatched code vs comments
|
||||||
|
- Fixed the allowed characters parameter of `filepath_sanitize`, which was not written correctly but worked out of luck.
|
||||||
|
|
||||||
- 2016 07 08
|
- 2016 07 08
|
||||||
- Fixed bug in which trees wouldn't generate on server:port urls.
|
- Fixed bug in which trees wouldn't generate on server:port urls.
|
||||||
|
|
|
@ -181,6 +181,12 @@ SKIPPABLE_FILETYPES = [
|
||||||
]
|
]
|
||||||
SKIPPABLE_FILETYPES = set(x.lower() for x in SKIPPABLE_FILETYPES)
|
SKIPPABLE_FILETYPES = set(x.lower() for x in SKIPPABLE_FILETYPES)
|
||||||
|
|
||||||
|
# Will be ignored completely. Are case-sensitive
|
||||||
|
BLACKLISTED_FILENAMES = [
|
||||||
|
'desktop.ini',
|
||||||
|
'thumbs.db',
|
||||||
|
]
|
||||||
|
|
||||||
# oh shit
|
# oh shit
|
||||||
HTML_TREE_HEADER = '''
|
HTML_TREE_HEADER = '''
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
|
@ -262,12 +268,13 @@ those files.
|
||||||
## ##
|
## ##
|
||||||
class Walker:
|
class Walker:
|
||||||
def __init__(self, walkurl, databasename=None, fullscan=False):
|
def __init__(self, walkurl, databasename=None, fullscan=False):
|
||||||
if walkurl[-1] != '/':
|
if not walkurl.endswith('/'):
|
||||||
walkurl += '/'
|
walkurl += '/'
|
||||||
self.walkurl = walkurl
|
self.walkurl = walkurl
|
||||||
if databasename is None or databasename == "":
|
|
||||||
self.domain = url_to_filepath(walkurl)['root']
|
if databasename in (None, ''):
|
||||||
databasename = self.domain + '.db'
|
domain = url_to_filepath(self.walkurl)['root']
|
||||||
|
databasename = domain + '.db'
|
||||||
databasename = databasename.replace(':', '#')
|
databasename = databasename.replace(':', '#')
|
||||||
self.databasename = databasename
|
self.databasename = databasename
|
||||||
|
|
||||||
|
@ -289,27 +296,29 @@ class Walker:
|
||||||
def extract_hrefs(self, response, tag='a', attribute='href'):
|
def extract_hrefs(self, response, tag='a', attribute='href'):
|
||||||
'''
|
'''
|
||||||
Given a Response object, extract href urls.
|
Given a Response object, extract href urls.
|
||||||
External links, index sort links, and desktop.ini are discarded.
|
External links, index sort links, and blacklisted files are discarded.
|
||||||
'''
|
'''
|
||||||
import bs4
|
import bs4
|
||||||
soup = bs4.BeautifulSoup(response.text, 'html.parser')
|
soup = bs4.BeautifulSoup(response.text, 'html.parser')
|
||||||
elements = soup.findAll(tag)
|
elements = soup.find_all(tag)
|
||||||
for element in elements:
|
for element in elements:
|
||||||
try:
|
try:
|
||||||
href = element[attribute]
|
href = element[attribute]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
href = urllib.parse.urljoin(response.url, href)
|
href = urllib.parse.urljoin(response.url, href)
|
||||||
|
|
||||||
if not href.startswith(self.walkurl):
|
if not href.startswith(self.walkurl):
|
||||||
# Don't go to other sites or parent directories.
|
# Don't go to other sites or parent directories.
|
||||||
continue
|
continue
|
||||||
#if 'C=' in href and 'O=' in href:
|
|
||||||
if any(sorter in href for sorter in ('?C=', '?O=', '?M=', '?D=', '?N=', '?S=')):
|
if any(sorter in href for sorter in ('?C=', '?O=', '?M=', '?D=', '?N=', '?S=')):
|
||||||
# Alternative sort modes for index pages.
|
# Alternative sort modes for index pages.
|
||||||
continue
|
continue
|
||||||
if href.endswith('desktop.ini'):
|
|
||||||
# I hate these things.
|
if any(href.endswith(blacklisted) for blacklisted in BLACKLISTED_FILENAMES):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield href
|
yield href
|
||||||
|
|
||||||
def process_url(self, url=None):
|
def process_url(self, url=None):
|
||||||
|
@ -389,8 +398,6 @@ class Walker:
|
||||||
self.queue.appendleft(url)
|
self.queue.appendleft(url)
|
||||||
try:
|
try:
|
||||||
while len(self.queue) > 0:
|
while len(self.queue) > 0:
|
||||||
# Popping from right helps keep the queue short because it handles the files
|
|
||||||
# early.
|
|
||||||
url = self.queue.popleft()
|
url = self.queue.popleft()
|
||||||
self.process_url(url)
|
self.process_url(url)
|
||||||
line = '{:,} Remaining'.format(len(self.queue))
|
line = '{:,} Remaining'.format(len(self.queue))
|
||||||
|
@ -403,62 +410,6 @@ class Walker:
|
||||||
## WALKER ##########################################################################################
|
## WALKER ##########################################################################################
|
||||||
|
|
||||||
|
|
||||||
## DOWNLOADER ######################################################################################
|
|
||||||
## ##
|
|
||||||
class Downloader:
|
|
||||||
def __init__(self, databasename, outputdir=None, headers=None):
|
|
||||||
self.databasename = databasename
|
|
||||||
self.sql = sqlite3.connect(databasename)
|
|
||||||
self.cur = self.sql.cursor()
|
|
||||||
|
|
||||||
if outputdir is None or outputdir == "":
|
|
||||||
# This assumes that all URLs in the database are from the same domain.
|
|
||||||
# If they aren't, it's the user's fault.
|
|
||||||
self.cur.execute('SELECT url FROM urls LIMIT 1')
|
|
||||||
url = self.cur.fetchone()[0]
|
|
||||||
outputdir = url_to_filepath(url)['root']
|
|
||||||
self.outputdir = outputdir
|
|
||||||
|
|
||||||
def download(self, overwrite=False, bytespersecond=None):
|
|
||||||
overwrite = bool(overwrite)
|
|
||||||
|
|
||||||
self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
|
|
||||||
while True:
|
|
||||||
fetch = self.cur.fetchone()
|
|
||||||
if fetch is None:
|
|
||||||
break
|
|
||||||
url = fetch[SQL_URL]
|
|
||||||
|
|
||||||
''' Creating the permanent and temporary filenames '''
|
|
||||||
url_filepath = url_to_filepath(url)
|
|
||||||
# Ignore this value of `root`, because we might have a custom outputdir.
|
|
||||||
root = url_filepath['root']
|
|
||||||
folder = os.path.join(root, url_filepath['folder'])
|
|
||||||
os.makedirs(folder, exist_ok=True)
|
|
||||||
fullname = os.path.join(folder, url_filepath['filename'])
|
|
||||||
temporary_basename = hashit(url, 16) + '.oddltemporary'
|
|
||||||
temporary_fullname = os.path.join(folder, temporary_basename)
|
|
||||||
|
|
||||||
''' Managing overwrite '''
|
|
||||||
if os.path.isfile(fullname):
|
|
||||||
if overwrite is True:
|
|
||||||
os.remove(fullname)
|
|
||||||
else:
|
|
||||||
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
|
|
||||||
continue
|
|
||||||
|
|
||||||
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
|
|
||||||
filehandle = open(temporary_fullname, 'wb')
|
|
||||||
try:
|
|
||||||
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
|
|
||||||
os.rename(temporary_fullname, fullname)
|
|
||||||
except:
|
|
||||||
filehandle.close()
|
|
||||||
raise
|
|
||||||
## ##
|
|
||||||
## DOWNLOADER ######################################################################################
|
|
||||||
|
|
||||||
|
|
||||||
## OTHER CLASSES ###################################################################################
|
## OTHER CLASSES ###################################################################################
|
||||||
## ##
|
## ##
|
||||||
class Generic:
|
class Generic:
|
||||||
|
@ -539,15 +490,8 @@ class TreeNode:
|
||||||
self.check_child_availability(newroot)
|
self.check_child_availability(newroot)
|
||||||
self.children[newroot] = othertree
|
self.children[newroot] = othertree
|
||||||
|
|
||||||
def printtree(self, customsort=None):
|
|
||||||
for node in self.walk(customsort):
|
|
||||||
print(node.abspath())
|
|
||||||
|
|
||||||
def sorted_children(self, customsort=None):
|
def sorted_children(self, customsort=None):
|
||||||
if customsort:
|
|
||||||
keys = sorted(self.children.keys(), key=customsort)
|
keys = sorted(self.children.keys(), key=customsort)
|
||||||
else:
|
|
||||||
keys = sorted(self.children.keys())
|
|
||||||
for key in keys:
|
for key in keys:
|
||||||
yield (key, self.children[key])
|
yield (key, self.children[key])
|
||||||
|
|
||||||
|
@ -570,13 +514,6 @@ def db_init(sql, cur):
|
||||||
sql.commit()
|
sql.commit()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def dict_to_file(jdict, filename):
|
|
||||||
text = dict_to_string(jdict)
|
|
||||||
text = text.encode('utf-8')
|
|
||||||
filehandle = open(filename, 'wb')
|
|
||||||
filehandle.write(text)
|
|
||||||
filehandle.close()
|
|
||||||
|
|
||||||
def do_get(url, raise_for_status=True):
|
def do_get(url, raise_for_status=True):
|
||||||
return do_request('GET', requests.get, url)
|
return do_request('GET', requests.get, url)
|
||||||
|
|
||||||
|
@ -617,10 +554,17 @@ def download_file(url, filehandle, hookfunction=None, headers={}, bytespersecond
|
||||||
raise Exception('Did not receive expected total size. %d / %d' % (size, totalsize))
|
raise Exception('Did not receive expected total size. %d / %d' % (size, totalsize))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def fetch_generator(cur):
|
||||||
|
while True:
|
||||||
|
fetch = cur.fetchone()
|
||||||
|
if fetch is None:
|
||||||
|
break
|
||||||
|
yield fetch
|
||||||
|
|
||||||
def filepath_sanitize(text, allowed=''):
|
def filepath_sanitize(text, allowed=''):
|
||||||
bet = FILENAME_BADCHARS.replace(allowed, '')
|
badchars = FILENAME_BADCHARS
|
||||||
for char in bet:
|
badchars = ''.join(char for char in FILENAME_BADCHARS if char not in allowed)
|
||||||
text = text.replace(char, '')
|
text = ''.join(char for char in text if char not in badchars)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def get_clipboard():
|
def get_clipboard():
|
||||||
|
@ -771,18 +715,72 @@ def digest_argparse(args):
|
||||||
fullscan=args.fullscan,
|
fullscan=args.fullscan,
|
||||||
)
|
)
|
||||||
|
|
||||||
def download(databasename, outputdir=None, overwrite=False, bytespersecond=None):
|
def download(
|
||||||
if isinstance(bytespersecond, str):
|
databasename,
|
||||||
bytespersecond = eval(bytespersecond)
|
outputdir=None,
|
||||||
|
bytespersecond=None,
|
||||||
|
headers=None,
|
||||||
|
overwrite=False,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Download all of the Enabled files. The filepaths will match that of the
|
||||||
|
website, using `outputdir` as the root directory.
|
||||||
|
|
||||||
downloader = Downloader(
|
Parameters:
|
||||||
databasename=databasename,
|
outputdir:
|
||||||
outputdir=outputdir,
|
The directory to mirror the files into. If not provided, the domain
|
||||||
)
|
name is used.
|
||||||
downloader.download(
|
|
||||||
bytespersecond=bytespersecond,
|
bytespersecond:
|
||||||
overwrite=overwrite,
|
The speed to ratelimit the downloads. Can be an integer, or a string like
|
||||||
)
|
'500k', according to the capabilities of `bytestring.parsebytes`
|
||||||
|
|
||||||
|
Note that this is bytes, not bits.
|
||||||
|
|
||||||
|
headers:
|
||||||
|
Additional headers to pass to each `download_file` call.
|
||||||
|
|
||||||
|
overwrite:
|
||||||
|
If True, delete local copies of existing files and rewrite them.
|
||||||
|
Otherwise, completed files are skipped.
|
||||||
|
'''
|
||||||
|
sql = sqlite3.connect(databasename)
|
||||||
|
cur = sql.cursor()
|
||||||
|
|
||||||
|
if outputdir in (None, ''):
|
||||||
|
# This assumes that all URLs in the database are from the same domain.
|
||||||
|
# If they aren't, it's the user's fault because Walkers don't leave the given site.
|
||||||
|
cur.execute('SELECT url FROM urls LIMIT 1')
|
||||||
|
url = cur.fetchone()[0]
|
||||||
|
outputdir = url_to_filepath(url)['root']
|
||||||
|
|
||||||
|
if isinstance(bytespersecond, str):
|
||||||
|
bytespersecond = bytestring.parsebytes(bytespersecond)
|
||||||
|
|
||||||
|
cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
|
||||||
|
for fetch in fetch_generator(cur):
|
||||||
|
url = fetch[SQL_URL]
|
||||||
|
|
||||||
|
url_filepath = url_to_filepath(url)
|
||||||
|
folder = os.path.join(outputdir, url_filepath['folder'])
|
||||||
|
os.makedirs(folder, exist_ok=True)
|
||||||
|
|
||||||
|
fullname = os.path.join(folder, url_filepath['filename'])
|
||||||
|
temporary_basename = hashit(url, 16) + '.oddltemporary'
|
||||||
|
temporary_fullname = os.path.join(folder, temporary_basename)
|
||||||
|
|
||||||
|
if os.path.isfile(fullname):
|
||||||
|
if overwrite:
|
||||||
|
os.remove(fullname)
|
||||||
|
else:
|
||||||
|
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
|
||||||
|
continue
|
||||||
|
|
||||||
|
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
|
||||||
|
filehandle = open(temporary_fullname, 'wb')
|
||||||
|
with filehandle:
|
||||||
|
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
|
||||||
|
os.rename(temporary_fullname, fullname)
|
||||||
|
|
||||||
def download_argparse(args):
|
def download_argparse(args):
|
||||||
return download(
|
return download(
|
||||||
|
@ -812,15 +810,12 @@ def filter_pattern(databasename, regex, action='keep', *trash):
|
||||||
|
|
||||||
sql = sqlite3.connect(databasename)
|
sql = sqlite3.connect(databasename)
|
||||||
cur = sql.cursor()
|
cur = sql.cursor()
|
||||||
cur2 = sql.cursor()
|
|
||||||
|
|
||||||
cur2.execute('SELECT * FROM urls')
|
cur.execute('SELECT * FROM urls')
|
||||||
while True:
|
items = cur.fetchall()
|
||||||
fetch = cur2.fetchone()
|
for item in items:
|
||||||
if fetch is None:
|
url = item[SQL_URL]
|
||||||
break
|
current_do_dl = item[SQL_DO_DOWNLOAD]
|
||||||
url = fetch[SQL_URL]
|
|
||||||
current_do_dl = fetch[SQL_DO_DOWNLOAD]
|
|
||||||
for pattern in regex:
|
for pattern in regex:
|
||||||
contains = re.search(pattern, url) is not None
|
contains = re.search(pattern, url) is not None
|
||||||
|
|
||||||
|
@ -1145,7 +1140,6 @@ def tree_argparse(args):
|
||||||
databasename=args.databasename,
|
databasename=args.databasename,
|
||||||
output_filename=args.outputfile,
|
output_filename=args.outputfile,
|
||||||
)
|
)
|
||||||
|
|
||||||
## ##
|
## ##
|
||||||
## COMMANDLINE FUNCTIONS ###########################################################################
|
## COMMANDLINE FUNCTIONS ###########################################################################
|
||||||
|
|
||||||
|
|
|
@ -2,3 +2,10 @@ else
|
||||||
======
|
======
|
||||||
|
|
||||||
For anything that isn't Reddit
|
For anything that isn't Reddit
|
||||||
|
|
||||||
|
Note: Many projects in this repository import other projects. If you see
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
|
||||||
|
|
||||||
|
Just come back to this page and download those files. Arranging them is up to you.
|
|
@ -1,56 +0,0 @@
|
||||||
import time
|
|
||||||
|
|
||||||
|
|
||||||
class Ratelimiter:
|
|
||||||
def __init__(self, allowance_per_period, period, operation_cost=1, mode='sleep'):
|
|
||||||
'''
|
|
||||||
allowance_per_period:
|
|
||||||
The number of operations we can perform per `period` seconds.
|
|
||||||
|
|
||||||
period:
|
|
||||||
The number of seconds over which we can perform `allowance_per_period` operations.
|
|
||||||
|
|
||||||
operation_cost:
|
|
||||||
The default amount to remove from our balance after each operation.
|
|
||||||
Pass a `cost` parameter to `self.limit` to use a nondefault value.
|
|
||||||
|
|
||||||
mode:
|
|
||||||
'sleep': If we do not have the balance for an operation, sleep until we do.
|
|
||||||
Return True every time.
|
|
||||||
'reject': If we do not have the balance for an operation, return False.
|
|
||||||
'''
|
|
||||||
if mode not in ('sleep', 'reject'):
|
|
||||||
raise ValueError('Invalid mode %s' % repr(mode))
|
|
||||||
self.allowance_per_period = allowance_per_period
|
|
||||||
self.period = period
|
|
||||||
self.operation_cost = operation_cost
|
|
||||||
self.mode = mode
|
|
||||||
|
|
||||||
self.last_operation = time.time()
|
|
||||||
self.balance = 0
|
|
||||||
self.gain_rate = allowance_per_period / period
|
|
||||||
|
|
||||||
def limit(self, cost=None):
|
|
||||||
if cost is None:
|
|
||||||
cost = self.operation_cost
|
|
||||||
timediff = time.time() - self.last_operation
|
|
||||||
self.balance += timediff * self.gain_rate
|
|
||||||
self.balance = min(self.balance, self.allowance_per_period)
|
|
||||||
successful = False
|
|
||||||
|
|
||||||
deficit = cost - self.balance
|
|
||||||
if deficit > 0 and self.mode == 'sleep':
|
|
||||||
time_needed = (deficit / self.gain_rate)
|
|
||||||
#print(self.balance, deficit, 'Need to sleep %f' % time_needed)
|
|
||||||
time.sleep(time_needed)
|
|
||||||
self.balance = cost
|
|
||||||
|
|
||||||
#print(self.balance)
|
|
||||||
if self.balance >= cost:
|
|
||||||
#print('pass')
|
|
||||||
self.balance -= cost
|
|
||||||
successful = True
|
|
||||||
|
|
||||||
self.last_operation = time.time()
|
|
||||||
|
|
||||||
return successful
|
|
|
@ -2,13 +2,14 @@ import collections
|
||||||
import glob
|
import glob
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import ratelimiter
|
|
||||||
import shutil
|
import shutil
|
||||||
import stat
|
import stat
|
||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
|
||||||
|
|
||||||
BYTE = 1
|
BYTE = 1
|
||||||
KIBIBYTE = BYTE * 1024
|
KIBIBYTE = BYTE * 1024
|
||||||
MIBIBYTE = KIBIBYTE * 1024
|
MIBIBYTE = KIBIBYTE * 1024
|
||||||
|
|
Loading…
Reference in a new issue