This commit is contained in:
unknown 2016-07-27 20:41:13 -07:00
parent 84b0578ef3
commit 25fd827eb1
13 changed files with 1486 additions and 242 deletions

View file

@ -1,3 +1,5 @@
import re
BYTE = 1
KIBIBYTE = 1024 * BYTE
MIBIBYTE = 1024 * KIBIBYTE
@ -19,25 +21,38 @@ UNIT_STRINGS = {
ZEBIBYTE: 'ZiB',
YOBIBYTE: 'YiB',
}
UNITS_SORTED = sorted(UNIT_STRINGS.keys(), reverse=True)
def bytestring(bytes):
possible_units = sorted(UNIT_STRINGS.keys(), reverse=True)
def bytestring(size, force_unit=None):
'''
Convert a number into a binary-standard string.
force_unit:
If None, an appropriate size unit is chosen automatically.
Otherwise, you can provide one of the size constants to force that divisor.
'''
# choose which magnitutde to use as the divisor
if bytes < 1:
appropriate_unit = 1
if force_unit is None:
divisor = get_appropriate_divisor(size)
else:
for unit in possible_units:
if bytes >= unit:
appropriate_unit = unit
break
divisor = force_unit
size_unit_string = UNIT_STRINGS[appropriate_unit]
size_string = '%.3f %s' % ((bytes / appropriate_unit), size_unit_string)
size_unit_string = UNIT_STRINGS[divisor]
size_string = '%.3f %s' % ((size / divisor), size_unit_string)
return size_string
def get_appropriate_divisor(size):
size = abs(size)
for unit in UNITS_SORTED:
if size >= unit:
appropriate_unit = unit
break
else:
appropriate_unit = 1
return appropriate_unit
def parsebytes(string):
import re
string = string.lower().replace(' ', '')
matches = re.findall('((\\.|\\d)+)', string)

311
Downloady/downloady.py Normal file
View file

@ -0,0 +1,311 @@
import sys
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
sys.path.append('C:\\git\\else\\bytestring'); import bytestring
import argparse
import os
import pyperclip # pip install pyperclip
import requests
import time
import urllib
import warnings
warnings.simplefilter('ignore')
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
}
SLEEPINESS = 3
FILENAME_BADCHARS = '*?"<>|'
last_request = 0
CHUNKSIZE = 16 * bytestring.KIBIBYTE
STOP = False
TIMEOUT = 600
def download_file(
url,
localname=None,
auth=None,
bytespersecond=None,
callback_progress=None,
headers=None,
overwrite=None
):
if headers is None:
headers = {}
''' Determine local filename '''
url = url.replace('%3A//', '://')
if localname in [None, '']:
localname = localize(url)
localname = filepath_sanitize(localname)
directory = os.path.split(localname)[0]
if directory != '':
os.makedirs(directory, exist_ok=True)
if bytespersecond is None:
limiter = None
else:
limiter = ratelimiter.Ratelimiter(bytespersecond, period=1)
''' Prepare condition variables '''
local_exists = os.path.exists(localname)
if local_exists and overwrite is False:
print('Overwrite off. Nothing to do.')
return
user_provided_range = 'range' in headers
if user_provided_range:
user_range_min = int(headers['range'].split('bytes=')[1].split('-')[0])
user_range_max = headers['range'].split('-')[1]
if user_range_max != '':
user_range_max = int(user_range_max)
else:
# Included to determine whether the server supports this
headers['range'] = 'bytes=0-'
# I'm using a GET instead of an actual HEAD here because some servers respond
# differently, even though they're not supposed to.
head = request('get', url, stream=True, headers=headers, auth=auth)
remote_total_bytes = int(head.headers.get('content-length', 1))
server_respects_range = (head.status_code == 206 and 'content-range' in head.headers)
seek_to = 0
header_range_min = None
header_range_max = None
head.connection.close()
if not user_provided_range:
del headers['range']
touch(localname)
file_handle = open(localname, 'r+b')
file_handle.seek(0)
''' THINGS THAT CAN HAPPEN '''
if local_exists:
local_existing_bytes = os.path.getsize(localname)
if overwrite is True:
file_handle.truncate()
if user_provided_range:
header_range_min = user_range_min
header_range_max = user_range_max
seek_to = user_range_min
elif not user_provided_range:
pass
elif overwrite is None:
if local_existing_bytes == remote_total_bytes:
print('File is 100%. Nothing to do.')
return
if user_provided_range:
if server_respects_range:
seek_to = user_range_min
else:
raise Exception('The server did not respect your range header')
elif not user_provided_range:
if server_respects_range:
print('Resuming from %d' % local_existing_bytes)
header_range_min = local_existing_bytes
header_range_max = ''
seek_to = local_existing_bytes
else:
print('File exists, but server doesn\'t allow resumes. Restart from 0?')
permission = get_permission()
if permission:
file_handle.truncate()
else:
raise Exception('Couldn\'t resume')
else:
raise TypeError('Invalid value for `overwrite`. Must be True, False, or None')
elif not local_exists:
if user_provided_range:
if server_respects_range:
file_handle.seek(user_range_min)
file_handle.write(b'\0')
header_range_min = user_range_min
header_range_max = user_range_max
seek_to = user_range_min
else:
raise Exception('The server did not respect your range header')
elif not user_provided_range:
pass
if header_range_min is not None:
headers['range'] = 'bytes={0}-{1}'.format(header_range_min, header_range_max)
bytes_downloaded = seek_to
file_handle.seek(seek_to)
download_stream = request('get', url, stream=True, headers=headers, auth=auth)
''' Begin download '''
for chunk in download_stream.iter_content(chunk_size=CHUNKSIZE):
bytes_downloaded += len(chunk)
file_handle.write(chunk)
if callback_progress is not None:
callback_progress(bytes_downloaded, remote_total_bytes)
if limiter is not None and bytes_downloaded < remote_total_bytes:
limiter.limit(len(chunk))
file_handle.close()
return localname
def filepath_sanitize(text, exclusions=''):
bet = FILENAME_BADCHARS.replace(exclusions, '')
for char in bet:
text = text.replace(char, '')
return text
def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
permission = input(prompt)
return permission.lower() in affirmative
def is_clipboard(s):
return s.lower() in ['!c', '!clip', '!clipboard']
def localize(url):
'''
Determine the local filename appropriate for a URL.
'''
localname = urllib.parse.unquote(url)
localname = localname.split('?')[0]
localname = localname.split('/')[-1]
return localname
def progress(bytes_downloaded, bytes_total, prefix=''):
divisor = bytestring.get_appropriate_divisor(bytes_total)
bytes_total_string = bytestring.bytestring(bytes_total, force_unit=divisor)
bytes_downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=divisor)
bytes_downloaded_string = bytes_downloaded_string.rjust(len(bytes_total_string), ' ')
blocks = 50
char = ''
percent = bytes_downloaded * 100 / bytes_total
percent = int(min(100, percent))
completed_blocks = char * int(blocks * percent / 100)
incompleted_blocks = ' ' * (blocks - len(completed_blocks))
statusbar = '{char}{complete}{incomplete}{char}'.format(
char=char,
complete=completed_blocks,
incomplete=incompleted_blocks,
)
end = '\n' if percent == 100 else ''
message = '\r{prefix}{bytes_downloaded} {statusbar} {bytes_total}'
message = message.format(
prefix=prefix,
bytes_downloaded=bytes_downloaded_string,
bytes_total=bytes_total_string,
statusbar=statusbar,
)
print(message, end=end, flush=True)
def progress2(bytes_downloaded, bytes_total, prefix=''):
percent = (bytes_downloaded*100)/bytes_total
percent = min(100, percent)
percent = '%08.4f' % percent
bytes_downloaded_string = '{0:,}'.format(bytes_downloaded)
bytes_total_string = '{0:,}'.format(bytes_total)
bytes_downloaded_string = bytes_downloaded_string.rjust(len(bytes_total_string), ' ')
end = '\n' if percent == 100 else ''
message = '\r{prefix}{bytes_downloaded} / {bytes_total} / {percent}%'
message = message.format(
prefix=prefix,
bytes_downloaded=bytes_downloaded_string,
bytes_total=bytes_total_string,
percent=percent,
)
print(message, end=end, flush=True)
def request(method, url, stream=False, headers=None, timeout=TIMEOUT, **kwargs):
if headers is None:
headers = {}
for (key, value) in HEADERS.items():
headers.setdefault(key, value)
session = requests.Session()
session.max_redirects = 40
method = {
'get': session.get,
'head': session.head,
'post': session.post,
}[method]
req = method(url, stream=stream, headers=headers, timeout=timeout, **kwargs)
req.raise_for_status()
return req
def touch(filename):
f = open(filename, 'ab')
f.close()
return
def download_argparse(args):
url = args.url
if is_clipboard(url):
url = pyperclip.paste()
print(url)
overwrite = {
'y':True, 't':True,
'n':False, 'f':False,
}.get(args.overwrite.lower(), None)
callback = {
None: progress,
'1': progress,
'2': progress2,
}.get(args.callback, None)
callback = args.callback
if callback == '1':
callback = progress
if callback == '2':
callback = progress2
bytespersecond = args.bytespersecond
if bytespersecond is not None:
bytespersecond = bytestring.parsebytes(bytespersecond)
headers = {}
if args.range is not None:
headers['range'] = 'bytes=%s' % args.range
download_file(
url=url,
localname=args.localname,
bytespersecond=bytespersecond,
callback_progress=callback,
headers=headers,
overwrite=overwrite,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
#p_download_file = subparsers.add_parser('download_file')
parser.add_argument('url')
parser.add_argument('localname', nargs='?', default=None)
parser.add_argument('-c', '--callback', dest='callback', default=progress)
parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
parser.add_argument('-ow', '--overwrite', dest='overwrite', default='')
parser.add_argument('-r', '--range', dest='range', default=None)
parser.set_defaults(func=download_argparse)
args = parser.parse_args()
args.func(args)

View file

@ -0,0 +1,45 @@
THINGS THAT CAN HAPPEN
├───File exists
│ ├───User disables overwrite
│ │ └───Return because there's nothing to do
│ │
│ ├───User enables overwrite
│ │ ├───User requests range
│ │ │ └───Raise exception because requesting a range and forcing overwrite are mutually exclusive
│ │ │
│ │ └───User does not request range
│ │ └───File opened, truncated, full download
│ │
│ └───User does not specify overwrite
│ ├───File is same size as content-length
│ │ └───Return because there's nothing to do.
│ │
│ ├───User requests range
│ │ ├───Server respects range
│ │ │ └───File opened, seeked to request, bytes filled in
│ │ │
│ │ └───Server does not respect range
│ │ └───Raise exception because user's request can't be fulfilled
│ │
│ └───User does not request range
│ ├───Server respects range
│ │ └───File is opened, seeked to end, download resumes
│ │
│ └───Server does not respect range
│ └───Ask for permission to overwrite from beginning
└───File does not exist
├───User requests range
│ ├───Server respects range
│ │ └───File created, seeked to request, bytes filled in. everything else left 0
│ └───Server does not respect range
│ └───Raise exception because user's request can't be fulfilled
└───User does not request range
└───File created, full download
Possible amibiguity: If the user requests a range, and the file does not exist, does he want:
1. to fill the file with zeroes, and patch the requested bytes into their correct spot; or
2. to create the file empty, and only write the requested bytes?
I will assume #1 because that plays nicely with other Things That Can Happen, such as letting the user patch the other bytes in later.

View file

@ -0,0 +1,210 @@
<!DOCTYPE html>
<html>
<!--
This page lets you compare multiple audio tracks simultaneously,
to see whether you can spot the difference in audio quality.
The audio elements are scrambled when loaded, you can hover over the
"spoiler" tag to see the file.
-->
<head>
<title>Fear your ears</title>
<meta charset="UTF-8">
</head>
<body>
<p>Drag files onto the page</p>
<button onclick="cycle();">Cycle</button>
<button onclick="pause_all();">Pause</button>
<button onclick="play_all();">Play</button>
<p>
Please use this slider as the seekbar. Dragging the audio elements directly causes syncing problems.
<br>
<input id="seekbar" type="range">
</p>
<div id="workspace">
</div>
</body>
</html>
<style>
html, body
{
height: 100%;
}
audio
{
vertical-align: middle;
width: 600px;
}
div button
{
height: 100%;
vertical-align: middle;
}
div
{
display: block;
}
input
{
width: 600px;
}
#workspace
{
width: 100%;
height: 100%;
}
</style>
<script type="text/javascript">
var WORKSPACE = document.getElementById("workspace");
var SEEKBAR = document.getElementById("seekbar");
var audios = [];
var current = 0;
var duration = 0;
var seek_lock = false;
SEEKBAR.min = 0;
SEEKBAR.max = 0;
SEEKBAR.value = 0;
function cycle()
{
if (audios == undefined)
{
return
}
if (audios.length == 0)
{
return;
}
current = (current + 1) % audios.length;
switch_to(current)
}
function drop_event(e)
{
e = e || window.event;
e.preventDefault();
var files = Array.from(e.dataTransfer.files);
shuffle(files);
while (WORKSPACE.lastChild)
{
WORKSPACE.removeChild(WORKSPACE.lastChild);
}
for (var index = 0; index < files.length; index += 1)
{
var mediadiv = document.createElement("div");
var anchor = document.createElement("a");
var audio = document.createElement("audio");
var source = document.createElement("source");
var choosebutton = document.createElement("button");
audio.preload = true;
audio.controls = true;
audio.volume = 0
anchor.innerHTML = "spoiler";
anchor.title = files[index]["name"];
anchor.href = "#";
source.src = URL.createObjectURL(files[index]);
choosebutton.innerHTML = "play";
choosebutton.index = index;
choosebutton.onclick = function()
{
current = this.index;
switch_to(current);
}
audio.appendChild(source);
mediadiv.appendChild(anchor);
mediadiv.appendChild(audio);
mediadiv.appendChild(choosebutton);
WORKSPACE.appendChild(mediadiv);
audios.push(audio);
}
load();
}
function load()
{
audios = Array.from(document.getElementsByTagName("audio"));
current = -1;
if (audios.length == 0)
{
return;
}
audios[0].oncanplay = function()
{
duration = this.duration;
SEEKBAR.max = duration;
}
SEEKBAR.value = 0;
}
function pause_all()
{
for (var index = 0; index < audios.length; index += 1)
{
audios[index].pause();
}
}
function play_all()
{
for (var index = 0; index < audios.length; index += 1)
{
audios[index].play();
}
}
function seek_event()
{
if (seek_lock)
{
console.log("locked");
return;
}
pause_all();
seek_lock = true;
var timestamp = this.value;
for (var index = 0; index < audios.length; index += 1)
{
audios[index].currentTime = timestamp;
}
setTimeout(function(){seek_lock = false; play_all();}, 500);
}
function shuffle(a)
{
var target_index;
var temp;
for (var index = a.length; index > 0; index -= 1)
{
target_index = Math.floor(Math.random() * index);
temp = a[index - 1];
a[index - 1] = a[target_index];
a[target_index] = temp;
}
}
function switch_to(audio_index)
{
play_all();
for (var index = 0; index < audios.length; index += 1)
{
audios[index].volume = 0;
}
audios[audio_index].volume = 1;
}
SEEKBAR.onchange = seek_event;
document.body.addEventListener('drop', drop_event);
document.body.addEventListener('dragover', function(e){e.preventDefault();});
</script>

770
Javascript/opendir_image.js Normal file
View file

@ -0,0 +1,770 @@
javascript:
/*
This javascript bookmarklet takes anchors, images, and media elements from
the page, and displays them in a nice gallery. Designed for use on open
directory listings, but works in many places.
*/
var seen_urls = new Set();
var all_urls = [];
var image_height = 200;
var video_height = 300;
var audio_width = 1000;
var IMAGE_TYPES = ["\\.jpg", "\\.jpeg", "\\.jpg", "\\.bmp", "\\.tiff", "\\.tif", "\\.bmp", "\\.gif", "\\.png"].join("|");
var AUDIO_TYPES = ["\\.aac", "\\.mp3", "\\.m4a", "\\.ogg", "\\.wav"].join("|");
var VIDEO_TYPES = ["\\.mp4", "\\.m4v", "\\.webm", "\\.ogv"].join("|");
IMAGE_TYPES = new RegExp(IMAGE_TYPES, "i");
AUDIO_TYPES = new RegExp(AUDIO_TYPES, "i");
VIDEO_TYPES = new RegExp(VIDEO_TYPES, "i");
var has_started = false;
var CSS = "";
CSS += "audio, video { display: block; }";
CSS += "audio { width: $audio_width$px; }";
CSS += "video { height: $video_height$px; }";
CSS += "img { display: block; max-width: 100% }";
CSS += ".control_panel { background-color: #aaa; min-height: 10px; width: 100%; }";
CSS += ".workspace { background-color: #ddd; min-height: 10px; float: left; }";
CSS += ".arealabel { position:absolute; right: 0; bottom: 0; opacity: 0.8; background-color: #000; color: #fff; }";
CSS += ".delete_button { color: #d00; font-family: Arial; font-size: 11px; left: 0; position: absolute; top: 0; width: 25px; }";
CSS += ".load_button { position: absolute; top: 10%; width: 100%; height: 80%; word-wrap: break-word; }";
CSS += ".odi_anchor { display: block; }";
CSS += ".odi_image_div, .odi_media_div { display: inline-block; margin: 5px; float: left; position: relative; background-color: #aaa; }";
CSS += ".odi_image_div { min-width: $image_height$px; }";
function apply_css()
{
console.log("applying CSS");
var css = document.createElement("style");
css.innerHTML = format_css();
document.head.appendChild(css);
}
function array_extend(a, b)
{
/* Append all elements of b onto a */
for (var i = 0; i < b.length; i += 1)
{
a.push(b[i]);
}
}
function array_remove(a, item)
{
/* Thanks peter olson http://stackoverflow.com/a/5767335 */
for(var i = a.length - 1; i >= 0; i -= 1)
{
if(a[i].id === item.id)
{
a.splice(i, 1);
}
}
}
function clear_page()
{
/* Remove EVERYTHING, insert own css */
console.log("clearing page");
document.removeChild(document.documentElement);
var html = document.createElement("html");
document.appendChild(html);
var head = document.createElement("head");
html.appendChild(head);
var body = document.createElement("body");
html.appendChild(body);
document.documentElement = html;
return true;
}
function clear_workspace()
{
console.log("clearing workspace");
workspace = document.getElementById("WORKSPACE");
while (workspace.children.length > 0)
{
workspace.removeChild(workspace.children[0]);
}
return true;
}
function create_command_box(boxname, operation)
{
var box = document.createElement("input");
box.type = "text";
box.id = boxname;
box.onkeydown=function()
{
if (event.keyCode == 13)
{
operation(this.value);
}
};
return box;
}
function create_command_button(label, operation)
{
var button = document.createElement("button");
button.innerHTML = label;
button.onclick = operation;
return button;
}
function create_command_box_button(boxname, label, operation)
{
var box = create_command_box(boxname, operation);
var button = create_command_button(label, function(){operation(box.value)});
var div = document.createElement("div");
div.appendChild(box);
div.appendChild(button);
return div;
}
function create_odi_divs(urls)
{
image_divs = [];
media_divs = [];
for (var index = 0; index < urls.length; index += 1)
{
url = urls[index];
if (!url)
{
continue;
}
/*console.log("Building for " + url);*/
var div = null;
var paramless_url = url.split("?")[0];
var basename = get_basename(url);
if (paramless_url.match(IMAGE_TYPES))
{
var div = document.createElement("div");
div.id = generate_id(32);
div.className = "odi_image_div";
div.odi_type = "image";
var a = document.createElement("a");
a.className = "odi_anchor";
a.odi_div = div;
a.href = url;
a.target = "_blank";
var img = document.createElement("img");
img.odi_div = div;
img.anchor = a;
img.border = 0;
img.height = image_height;
img.lazy_src = url;
img.src = "";
var arealabel = document.createElement("span");
arealabel.className = "arealabel";
arealabel.odi_div = div;
arealabel.innerHTML = "0x0";
img.arealabel = arealabel;
var load_button = document.createElement("button");
load_button.className = "load_button";
load_button.odi_div = div;
load_button.innerHTML = basename;
load_button.onclick = function()
{
this.parentElement.removeChild(this);
lazy_load_one(this.odi_div);
};
div.image = img;
div.anchor = a;
a.appendChild(img);
a.appendChild(arealabel);
div.appendChild(a);
div.appendChild(load_button);
image_divs.push(div);
}
else
{
if (paramless_url.match(AUDIO_TYPES))
{
var mediatype = "audio";
}
else if (paramless_url.match(VIDEO_TYPES))
{
var mediatype = "video";
}
else
{
continue;
}
var div = document.createElement("div");
div.id = generate_id(32);
div.className = "odi_media_div";
div.odi_type = "media";
var center = document.createElement("center");
center.odi_div = div;
var a = document.createElement("a");
a.odi_div = div;
a.innerHTML = get_basename(url);
a.target = "_blank";
a.style.display = "block";
a.href = url;
var media = document.createElement(mediatype);
media.odi_div = div;
media.controls = true;
media.preload = "none";
sources = get_alternate_sources(url);
for (var sourceindex = 0; sourceindex < sources.length; sourceindex += 1)
{
source = document.createElement("source");
source.src = sources[sourceindex];
source.odi_div = div;
media.appendChild(source);
}
div.media = media;
div.anchor = a;
center.appendChild(a);
div.appendChild(center);
div.appendChild(media);
media_divs.push(div);
}
if (div == null)
{
continue;
}
div.url = url;
div.basename = basename;
button = document.createElement("button");
button.className = "delete_button";
button.odi_div = div;
button.innerHTML = "X";
button.onclick = function()
{
delete_odi_div(this);
};
div.appendChild(button);
/*console.log("built " + div);*/
}
odi_divs = [];
array_extend(odi_divs, image_divs);
odi_divs.push(document.createElement("br"));
array_extend(odi_divs, media_divs);
return odi_divs;
}
function create_workspace()
{
clear_page();
apply_css();
console.log("creating workspace");
var control_panel = document.createElement("div");
var workspace = document.createElement("div");
var resizer = create_command_box_button("resizer", "resize", resize_images);
var refilter = create_command_box_button("refilter", "remove regex", function(x){filter_re(x, true)});
var rekeeper = create_command_box_button("rekeeper", "keep regex", function(x){filter_re(x, false)});
var heightfilter = create_command_box_button("heightfilter", "min height", filter_height);
var widthfilter = create_command_box_button("widthfilter", "min width", filter_width);
var sorter = create_command_button("sort size", sort_size);
var dumper = create_command_button("dump urls", dump_urls);
var start_button = create_command_button("load all", function(){start(); this.parentElement.removeChild(this);});
start_button.style.display = "block";
control_panel.id = "CONTROL_PANEL";
control_panel.className = "control_panel";
workspace.id = "WORKSPACE";
workspace.className = "workspace";
document.body.appendChild(control_panel);
control_panel.appendChild(resizer);
control_panel.appendChild(refilter);
control_panel.appendChild(rekeeper);
control_panel.appendChild(heightfilter);
control_panel.appendChild(widthfilter);
control_panel.appendChild(sorter);
control_panel.appendChild(dumper);
control_panel.appendChild(start_button);
document.body.appendChild(workspace);
}
function delete_odi_div(element)
{
if (element.odi_div != undefined)
{
element = element.odi_div;
}
if (element.media != undefined)
{
/* http://stackoverflow.com/questions/3258587/how-to-properly-unload-destroy-a-video-element */
element.media.pause();
element.media.src = "";
element.media.load();
}
var parent = element.parentElement;
parent.removeChild(element);
}
function dump_urls()
{
var divs = get_odi_divs();
var textbox = document.getElementById("url_dump_box");
if (textbox == null)
{
textbox = document.createElement("textarea");
textbox.id = "urldumpbox";
textbox.style.overflowY = "scroll";
textbox.style.height = "300px";
textbox.style.width = "90%";
workspace = document.getElementById("WORKSPACE");
workspace.appendChild(textbox);
}
textbox.innerHTML = "";
for (var index = 0; index < divs.length; index += 1)
{
textbox.innerHTML += divs[index].url + "\n";
}
}
function fill_workspace(divs)
{
clear_workspace();
console.log("filling workspace");
workspace = document.getElementById("WORKSPACE");
for (var index = 0; index < divs.length; index += 1)
{
workspace.appendChild(divs[index]);
}
}
function filter_dimension(dimension, minimum)
{
minimum = parseInt(minimum);
images = Array.from(document.images);
for (var i = 0; i < images.length; i += 1)
{
image = images[i];
if (image[dimension] == 0)
{continue;}
if (image[dimension] < minimum)
{
delete_odi_div(image);
continue;
}
}
}
function filter_height(minimum)
{
filter_dimension('naturalHeight', minimum);
}
function filter_width(minimum)
{
filter_dimension('naturalWidth', minimum);
}
function filter_re(pattern, do_delete)
{
if (!pattern)
{
return;
}
pattern = new RegExp(pattern, "i");
do_keep = !do_delete;
console.log(pattern + " " + do_delete);
odi_divs = get_odi_divs();
for (var index = 0; index < odi_divs.length; index += 1)
{
div = odi_divs[index];
match = div.basename.match(pattern);
if ((match && do_delete) || (!match && do_keep))
{
delete_odi_div(div);
}
}
}
function format_css()
{
var css = CSS;
while (true)
{
matches = css.match("\\$.+?\\$");
if (!matches)
{
break;
}
matches = Array.from(new Set(matches));
for (var index = 0; index < matches.length; index += 1)
{
var injector = matches[index];
var injected = injector.replace(new RegExp("\\$", 'g'), "");
/*console.log(injector);*/
/*console.log(injected);*/
css = css.replace(injector, this[injected]);
}
}
return css;
}
function get_all_urls()
{
var urls = [];
function include(source, attr)
{
for (var index = 0; index < source.length; index += 1)
{
url = source[index][attr];
if (url === undefined)
{continue;}
if (seen_urls.has(url))
{continue;}
console.log(url);
if (url.indexOf("thumbs.redditmedia") != -1)
{console.log("Rejecting reddit thumb"); continue;}
if (url.indexOf("pixel.reddit") != -1 || url.indexOf("reddit.com/static/pixel") != -1)
{console.log("Rejecting reddit pixel"); continue}
if (url.indexOf("/thumb/") != -1)
{console.log("Rejecting /thumb/"); continue;}
if (url.indexOf("/loaders/") != -1)
{console.log("Rejecting loader"); continue;}
if (url.indexOf("memegen") != -1)
{console.log("Rejecting retardation"); continue;}
if (url.indexOf("4cdn") != -1 && url.indexOf("s.jpg") != -1)
{console.log("Rejecting 4chan thumb"); continue;}
sub_urls = normalize_url(url);
if (sub_urls == null)
{continue;}
for (var url_index = 0; url_index < sub_urls.length; url_index += 1)
{
sub_url = sub_urls[url_index];
if (seen_urls.has(sub_url))
{continue;}
urls.push(sub_url);
seen_urls.add(sub_url);
all_urls.push(sub_url);
}
seen_urls.add(url);
}
}
var docs = [];
docs.push(document);
while (docs.length > 0)
{
var d = docs.pop();
include(d.links, "href");
include(d.images, "src");
include(d.getElementsByTagName("audio"), "src");
include(d.getElementsByTagName("video"), "src");
include(d.getElementsByTagName("source"), "src");
}
console.log("collected " + urls.length + " urls.");
return urls;
}
function get_alternate_sources(url)
{
/*
For sites that must try multiple resource urls, that logic
may go here
*/
return [url];
}
function get_basename(url)
{
var basename = url.split("/");
basename = basename[basename.length - 1];
return basename;
}
function get_gfycat_video(id)
{
var url = "https://gfycat.com/cajax/get/" + id;
var request = new XMLHttpRequest();
request.answer = null;
request.onreadystatechange = function()
{
if (request.readyState == 4 && request.status == 200)
{
var text = request.responseText;
var details = JSON.parse(text);
request.answer = details["gfyItem"]["mp4Url"];
}
};
var asynchronous = false;
request.open("GET", url, asynchronous);
request.send(null);
return request.answer;
}
function get_lazy_divs()
{
var divs = document.getElementsByTagName("div");
var lazy_elements = [];
for (index = 0; index < divs.length; index += 1)
{
var div = divs[index];
if (div.image && div.image.lazy_src)
{
lazy_elements.push(div);
}
}
return lazy_elements;
}
function get_odi_divs()
{
var divs = document.getElementsByTagName("div");
var odi_divs = [];
for (index = 0; index < divs.length; index += 1)
{
var div = divs[index];
if (div.id.indexOf("odi_") == -1)
{
continue;
}
odi_divs.push(div);
}
return odi_divs;
}
function generate_id(length)
{
/* Thanks csharptest http://stackoverflow.com/a/1349426 */
var text = [];
var possible = "abcdefghijklmnopqrstuvwxyz";
for(var i = 0; i < length; i += 1)
{
c = possible.charAt(Math.floor(Math.random() * possible.length));
text.push(c);
}
return "odi_" + text.join("");
}
function lazy_load_all()
{
lazies = get_lazy_divs();
lazies.reverse();
lazy_buttons = document.getElementsByClassName("load_button");
for (var index = 0; index < lazy_buttons.length; index += 1)
{
lazy_buttons[index].parentElement.removeChild(lazy_buttons[index]);
}
while (lazies.length > 0)
{
var element = lazies.pop();
if (element.image != undefined)
{
break;
}
}
if (element == undefined)
{
return;
}
lazy_load_one(element, true);
return
;}
function lazy_load_one(element, comeback)
{
var image = element.image;
if (!image.lazy_src)
{
return;
}
image.onload = function()
{
width = this.naturalWidth;
height = this.naturalHeight;
if (width == 161 && height == 81)
{delete_odi_div(this);}
this.arealabel.innerHTML = width + " x " + height;
this.odi_div.style.minWidth = "0px";
if (comeback){lazy_load_all()};
};
image.onerror = function()
{
array_remove(all_urls, this.odi_div);
delete_odi_div(this);
if (comeback){lazy_load_all()};
};
/*console.log("Lazy loading " + element.lazy_src)*/
image.src = image.lazy_src;
image.lazy_src = null;
return;
}
function normalize_url(url)
{
var protocol = window.location.protocol;
if (protocol == "file:")
{
protocol = "http:";
}
url = url.replace("http:", protocol);
url = url.replace("https:", protocol);
url = decodeURIComponent(unescape(url));
url = url.replace("imgur.com/gallery/", "imgur.com/a/");
if (url.indexOf("vidble") >= 0)
{
url = url.replace("_med", "");
url = url.replace("_sqr", "");
}
else if (url.indexOf("imgur.com/a/") != -1)
{
var urls = [];
var id = url.split("imgur.com/a/")[1];
id = id.split("#")[0].split("?")[0];
console.log("imgur album: " + id);
var url = "https://api.imgur.com/3/album/" + id;
var request = new XMLHttpRequest();
request.onreadystatechange = function()
{
if (request.readyState == 4 && request.status == 200)
{
var text = request.responseText;
var images = JSON.parse(request.responseText);
images = images['data']['images'];
for (var index = 0; index < images.length; index += 1)
{
var image = images[index];
var image_url = image["mp4"] || image["link"];
if (!image_url){continue;}
image_url = normalize_url(image_url)[0];
console.log("+" + image_url);
urls.push(image_url);
}
}
};
var asynchronous = false;
request.open("GET", url, asynchronous);
request.setRequestHeader("Authorization", "Client-ID 1d8d9b36339e0e2");
request.send(null);
return urls;
}
else if (url.indexOf("imgur.com") >= 0)
{
var url_parts = url.split("/");
var image_id = url_parts[url_parts.length - 1];
var extension = ".jpg";
if (image_id.indexOf(".") != -1)
{
image_id = image_id.split(".");
extension = "." + image_id[1];
image_id = image_id[0];
}
extension = extension.replace(".gifv", ".mp4");
extension = extension.replace(".gif", ".mp4");
if (image_id.length % 2 == 0)
{
image_id = image_id.split("");
image_id[image_id.length - 1] = "";
image_id = image_id.join("");
}
url = protocol + "//i.imgur.com/" + image_id + extension;
}
else if (url.indexOf("gfycat.com") >= 0)
{
var gfy_id = url.split("/");
gfy_id = gfy_id[gfy_id.length - 1];
gfy_id = gfy_id.split(".")[0];
if (gfy_id.length > 0)
{
url = get_gfycat_video(gfy_id);
}
}
return [url];
}
function resize_images(height)
{
odi_divs = get_odi_divs();
image_height = height;
for (var index = 0; index < odi_divs.length; index += 1)
{
var div = odi_divs[index];
if (div.image == undefined)
{
continue;
}
div.image.height = height;
}
}
function sort_size()
{
console.log("sorting size");
odi_divs = get_odi_divs();
odi_divs.sort(sort_size_comparator);
odi_divs.reverse();
clear_workspace();
fill_workspace(odi_divs);
}
function sort_size_comparator(div1, div2)
{
if (div1.odi_type != "image" || div1.lazy_src)
{
return -1;
}
if (div2.odi_type != "image" || div2.lazy_src)
{
return 1;
}
pixels1 = div1.image.naturalHeight * div1.image.naturalWidth;
pixels2 = div2.image.naturalHeight * div2.image.naturalWidth;
if (pixels1 < pixels2)
{return -1;}
if (pixels1 > pixels2)
{return 1;}
return 0;
}
function start()
{
lazy_load_all();
has_started = true;
}
function main()
{
all_urls = get_all_urls();
var divs = create_odi_divs(all_urls);
create_workspace();
fill_workspace(divs);
}
main();

View file

@ -1,8 +1,16 @@
<!DOCTYPE html>
<html>
<!--
This page lets you see new posts as they are made in a subreddit.
Just type a subreddit's name into the box, and press Start.
Although the default behavior is to interpret the box's contents as a subreddit
name, you can actually insert something like "/u/goldensights/m/loband" to watch
https://reddit.com/u/goldensights/m/loband/new
-->
<head>
<title>/new</title>
<meta charset="UTF-8">
</head>
<body>

View file

@ -1,15 +0,0 @@
import jobs
import time
def continuous_register():
print('w')
jobs.register(2, continuous_register)
jobs.register(5, print, args=('heyo',))
time.sleep(10)
print('x')
jobs.register(5, print, args=('heyo',))
time.sleep(2)
jobs.unregister(print, args=('heyo', ), kwargs={})
time.sleep(10)
print('y')

View file

@ -1,52 +0,0 @@
import signal
import time
import threading
class JobInterruption(Exception):
pass
class JobSchedulingError(Exception):
pass
THREAD = None
JOBS = {}
def thread_manager():
while True:
now = time.time()
for (functionid, joblist) in JOBS.items():
for job in joblist:
if now < job[0]:
continue
job[1](*job[2], **job[3])
joblist.remove(job)
time.sleep(0.5)
def launch_thread():
global THREAD
if THREAD is None or THREAD.is_alive is False:
THREAD = threading.Thread(target=thread_manager)
THREAD.daemon = True
THREAD.start()
def register(seconds_from_now, function, args=[], kwargs={}):
if seconds_from_now <= 0:
raise JobSchedulingError('cannot schedule jobs for the past')
iid = id(function)
schedule = time.time() + seconds_from_now
if iid not in JOBS:
JOBS[iid] = [(schedule, function, args, kwargs)]
else:
JOBS[iid].append( (schedule, function, args, kwargs) )
launch_thread()
def unregister_all(function):
iid = id(function)
if iid in JOBS:
del JOBS[iid]
def unregister(function, args, kwargs):
joblist = JOBS[id(function)]
for job in joblist:
if job[1:] != (function, args, kwargs):
continue
joblist.remove(job)

View file

@ -1,10 +1,16 @@
Open Dir DL
===========
- 2016 07 25
- Removed the `Downloader` class after watching [this Jack Diederich talk](https://youtu.be/o9pEzgHorH0) about unecessary classes.
- Bytespersecond is now parsed by `bytestring.parsebytes` rather than `eval`, so you can write "100k" as opposed to "100 * 1024" etc.
- 2016 07 19
- Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
- Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions that take argparse namespaces as their only parameter. Does not affect the commandline usage.
- Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions used by the argparser. *Does not affect the commandline usage!*
- Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
- Fixed some mismatched code vs comments
- Fixed the allowed characters parameter of `filepath_sanitize`, which was not written correctly but worked out of luck.
- 2016 07 08
- Fixed bug in which trees wouldn't generate on server:port urls.

View file

@ -181,6 +181,12 @@ SKIPPABLE_FILETYPES = [
]
SKIPPABLE_FILETYPES = set(x.lower() for x in SKIPPABLE_FILETYPES)
# Will be ignored completely. Are case-sensitive
BLACKLISTED_FILENAMES = [
'desktop.ini',
'thumbs.db',
]
# oh shit
HTML_TREE_HEADER = '''
<meta charset="UTF-8">
@ -262,12 +268,13 @@ those files.
## ##
class Walker:
def __init__(self, walkurl, databasename=None, fullscan=False):
if walkurl[-1] != '/':
if not walkurl.endswith('/'):
walkurl += '/'
self.walkurl = walkurl
if databasename is None or databasename == "":
self.domain = url_to_filepath(walkurl)['root']
databasename = self.domain + '.db'
if databasename in (None, ''):
domain = url_to_filepath(self.walkurl)['root']
databasename = domain + '.db'
databasename = databasename.replace(':', '#')
self.databasename = databasename
@ -289,27 +296,29 @@ class Walker:
def extract_hrefs(self, response, tag='a', attribute='href'):
'''
Given a Response object, extract href urls.
External links, index sort links, and desktop.ini are discarded.
External links, index sort links, and blacklisted files are discarded.
'''
import bs4
soup = bs4.BeautifulSoup(response.text, 'html.parser')
elements = soup.findAll(tag)
elements = soup.find_all(tag)
for element in elements:
try:
href = element[attribute]
except KeyError:
continue
href = urllib.parse.urljoin(response.url, href)
if not href.startswith(self.walkurl):
# Don't go to other sites or parent directories.
continue
#if 'C=' in href and 'O=' in href:
if any(sorter in href for sorter in ('?C=', '?O=', '?M=', '?D=', '?N=', '?S=')):
# Alternative sort modes for index pages.
continue
if href.endswith('desktop.ini'):
# I hate these things.
if any(href.endswith(blacklisted) for blacklisted in BLACKLISTED_FILENAMES):
continue
yield href
def process_url(self, url=None):
@ -389,8 +398,6 @@ class Walker:
self.queue.appendleft(url)
try:
while len(self.queue) > 0:
# Popping from right helps keep the queue short because it handles the files
# early.
url = self.queue.popleft()
self.process_url(url)
line = '{:,} Remaining'.format(len(self.queue))
@ -403,62 +410,6 @@ class Walker:
## WALKER ##########################################################################################
## DOWNLOADER ######################################################################################
## ##
class Downloader:
def __init__(self, databasename, outputdir=None, headers=None):
self.databasename = databasename
self.sql = sqlite3.connect(databasename)
self.cur = self.sql.cursor()
if outputdir is None or outputdir == "":
# This assumes that all URLs in the database are from the same domain.
# If they aren't, it's the user's fault.
self.cur.execute('SELECT url FROM urls LIMIT 1')
url = self.cur.fetchone()[0]
outputdir = url_to_filepath(url)['root']
self.outputdir = outputdir
def download(self, overwrite=False, bytespersecond=None):
overwrite = bool(overwrite)
self.cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
while True:
fetch = self.cur.fetchone()
if fetch is None:
break
url = fetch[SQL_URL]
''' Creating the permanent and temporary filenames '''
url_filepath = url_to_filepath(url)
# Ignore this value of `root`, because we might have a custom outputdir.
root = url_filepath['root']
folder = os.path.join(root, url_filepath['folder'])
os.makedirs(folder, exist_ok=True)
fullname = os.path.join(folder, url_filepath['filename'])
temporary_basename = hashit(url, 16) + '.oddltemporary'
temporary_fullname = os.path.join(folder, temporary_basename)
''' Managing overwrite '''
if os.path.isfile(fullname):
if overwrite is True:
os.remove(fullname)
else:
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
continue
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
filehandle = open(temporary_fullname, 'wb')
try:
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
os.rename(temporary_fullname, fullname)
except:
filehandle.close()
raise
## ##
## DOWNLOADER ######################################################################################
## OTHER CLASSES ###################################################################################
## ##
class Generic:
@ -539,15 +490,8 @@ class TreeNode:
self.check_child_availability(newroot)
self.children[newroot] = othertree
def printtree(self, customsort=None):
for node in self.walk(customsort):
print(node.abspath())
def sorted_children(self, customsort=None):
if customsort:
keys = sorted(self.children.keys(), key=customsort)
else:
keys = sorted(self.children.keys())
keys = sorted(self.children.keys(), key=customsort)
for key in keys:
yield (key, self.children[key])
@ -570,13 +514,6 @@ def db_init(sql, cur):
sql.commit()
return True
def dict_to_file(jdict, filename):
text = dict_to_string(jdict)
text = text.encode('utf-8')
filehandle = open(filename, 'wb')
filehandle.write(text)
filehandle.close()
def do_get(url, raise_for_status=True):
return do_request('GET', requests.get, url)
@ -617,10 +554,17 @@ def download_file(url, filehandle, hookfunction=None, headers={}, bytespersecond
raise Exception('Did not receive expected total size. %d / %d' % (size, totalsize))
return True
def fetch_generator(cur):
while True:
fetch = cur.fetchone()
if fetch is None:
break
yield fetch
def filepath_sanitize(text, allowed=''):
bet = FILENAME_BADCHARS.replace(allowed, '')
for char in bet:
text = text.replace(char, '')
badchars = FILENAME_BADCHARS
badchars = ''.join(char for char in FILENAME_BADCHARS if char not in allowed)
text = ''.join(char for char in text if char not in badchars)
return text
def get_clipboard():
@ -771,18 +715,72 @@ def digest_argparse(args):
fullscan=args.fullscan,
)
def download(databasename, outputdir=None, overwrite=False, bytespersecond=None):
if isinstance(bytespersecond, str):
bytespersecond = eval(bytespersecond)
def download(
databasename,
outputdir=None,
bytespersecond=None,
headers=None,
overwrite=False,
):
'''
Download all of the Enabled files. The filepaths will match that of the
website, using `outputdir` as the root directory.
downloader = Downloader(
databasename=databasename,
outputdir=outputdir,
)
downloader.download(
bytespersecond=bytespersecond,
overwrite=overwrite,
)
Parameters:
outputdir:
The directory to mirror the files into. If not provided, the domain
name is used.
bytespersecond:
The speed to ratelimit the downloads. Can be an integer, or a string like
'500k', according to the capabilities of `bytestring.parsebytes`
Note that this is bytes, not bits.
headers:
Additional headers to pass to each `download_file` call.
overwrite:
If True, delete local copies of existing files and rewrite them.
Otherwise, completed files are skipped.
'''
sql = sqlite3.connect(databasename)
cur = sql.cursor()
if outputdir in (None, ''):
# This assumes that all URLs in the database are from the same domain.
# If they aren't, it's the user's fault because Walkers don't leave the given site.
cur.execute('SELECT url FROM urls LIMIT 1')
url = cur.fetchone()[0]
outputdir = url_to_filepath(url)['root']
if isinstance(bytespersecond, str):
bytespersecond = bytestring.parsebytes(bytespersecond)
cur.execute('SELECT * FROM urls WHERE do_download == 1 ORDER BY url')
for fetch in fetch_generator(cur):
url = fetch[SQL_URL]
url_filepath = url_to_filepath(url)
folder = os.path.join(outputdir, url_filepath['folder'])
os.makedirs(folder, exist_ok=True)
fullname = os.path.join(folder, url_filepath['filename'])
temporary_basename = hashit(url, 16) + '.oddltemporary'
temporary_fullname = os.path.join(folder, temporary_basename)
if os.path.isfile(fullname):
if overwrite:
os.remove(fullname)
else:
safeprint('Skipping "%s". Use `--overwrite`' % fullname)
continue
safeprint('Downloading "%s" as "%s"' % (fullname, temporary_basename))
filehandle = open(temporary_fullname, 'wb')
with filehandle:
download_file(url, filehandle, hookfunction=hook1, bytespersecond=bytespersecond)
os.rename(temporary_fullname, fullname)
def download_argparse(args):
return download(
@ -812,15 +810,12 @@ def filter_pattern(databasename, regex, action='keep', *trash):
sql = sqlite3.connect(databasename)
cur = sql.cursor()
cur2 = sql.cursor()
cur2.execute('SELECT * FROM urls')
while True:
fetch = cur2.fetchone()
if fetch is None:
break
url = fetch[SQL_URL]
current_do_dl = fetch[SQL_DO_DOWNLOAD]
cur.execute('SELECT * FROM urls')
items = cur.fetchall()
for item in items:
url = item[SQL_URL]
current_do_dl = item[SQL_DO_DOWNLOAD]
for pattern in regex:
contains = re.search(pattern, url) is not None
@ -1145,7 +1140,6 @@ def tree_argparse(args):
databasename=args.databasename,
output_filename=args.outputfile,
)
## ##
## COMMANDLINE FUNCTIONS ###########################################################################

View file

@ -2,3 +2,10 @@ else
======
For anything that isn't Reddit
Note: Many projects in this repository import other projects. If you see
import sys
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
Just come back to this page and download those files. Arranging them is up to you.

View file

@ -1,56 +0,0 @@
import time
class Ratelimiter:
def __init__(self, allowance_per_period, period, operation_cost=1, mode='sleep'):
'''
allowance_per_period:
The number of operations we can perform per `period` seconds.
period:
The number of seconds over which we can perform `allowance_per_period` operations.
operation_cost:
The default amount to remove from our balance after each operation.
Pass a `cost` parameter to `self.limit` to use a nondefault value.
mode:
'sleep': If we do not have the balance for an operation, sleep until we do.
Return True every time.
'reject': If we do not have the balance for an operation, return False.
'''
if mode not in ('sleep', 'reject'):
raise ValueError('Invalid mode %s' % repr(mode))
self.allowance_per_period = allowance_per_period
self.period = period
self.operation_cost = operation_cost
self.mode = mode
self.last_operation = time.time()
self.balance = 0
self.gain_rate = allowance_per_period / period
def limit(self, cost=None):
if cost is None:
cost = self.operation_cost
timediff = time.time() - self.last_operation
self.balance += timediff * self.gain_rate
self.balance = min(self.balance, self.allowance_per_period)
successful = False
deficit = cost - self.balance
if deficit > 0 and self.mode == 'sleep':
time_needed = (deficit / self.gain_rate)
#print(self.balance, deficit, 'Need to sleep %f' % time_needed)
time.sleep(time_needed)
self.balance = cost
#print(self.balance)
if self.balance >= cost:
#print('pass')
self.balance -= cost
successful = True
self.last_operation = time.time()
return successful

View file

@ -2,13 +2,14 @@ import collections
import glob
import json
import os
import ratelimiter
import shutil
import stat
import string
import sys
import time
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
BYTE = 1
KIBIBYTE = BYTE * 1024
MIBIBYTE = KIBIBYTE * 1024