else

2016-08-17 18:24:38 -07:00 · 2016-08-17 18:24:38 -07:00 · 82f63a75ab
commit 82f63a75ab
parent c491e417f5
14 changed files with 501 additions and 264 deletions
--- a/Clipext/README.md
+++ b/Clipext/README.md
@ -0,0 +1,8 @@
 Clip Extension
 ==============
 This module works with pyperclip to provide some handy features for commandline utilities.
 Instead of having the user paste text into the commandline to run your script, just let them enter `script.py !c` and resolve it automatically. Pasting into the cmd on Windows is annoying and requires a mouse-click so this can be very convenient.
 Since "!i" resolves to user input, your script can accept piping with `ls | script.py !i`.
--- a/Clipext/clipext.py
+++ b/Clipext/clipext.py
@ -0,0 +1,31 @@
 import pyperclip
 CLIPBOARD_STRINGS = ['!c', '!clip', '!clipboard']
 INPUT_STRINGS = ['!i', '!in', '!input', '!stdin']
 EOF = '\x1a'
 def multi_line_input():
    userinput = []
    while True:
        try:
            additional = input()
        except EOFError:
            # If you enter nothing but ctrl-z
            additional = EOF
        userinput.append(additional)
        if EOF in additional:
            break
    userinput = '\n'.join(userinput)
    userinput = userinput.split(EOF)[0]
    return userinput.strip()
 def resolve(arg):
    lowered = arg.lower()
    if lowered in CLIPBOARD_STRINGS:
        return pyperclip.paste()
    if lowered in INPUT_STRINGS:
        return multi_line_input()
    return arg
--- a/Downloady/README.md
+++ b/Downloady/README.md
@ -0,0 +1,6 @@
 Downloady
 =========
 - 2016 08 16
    - Downloady now uses temporary files for incomplete downloads, and renames them when finished. This helps distinguish downloads that were interrupted and should be resumed from files that just happen to have the same name, which previously would have been interpreted as a resume. This improves overall ease-of-use, simplifies the behavior of the `overwrite` parameter, and will remove duplicate work from other programs.
    - Rewrote the plan creator and download function to do a better job of separating concerns and simplify the plan selector.
--- a/Downloady/downloady.py
+++ b/Downloady/downloady.py
@ -1,27 +1,28 @@
 import sys
 sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
 sys.path.append('C:\\git\\else\\bytestring'); import bytestring
 import argparse
 import os
 import pyperclip # pip install pyperclip
 import requests
 import sys
 import time
 import urllib
 import warnings
 sys.path.append('C:\\git\\else\\clipext'); import clipext
 sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
 sys.path.append('C:\\git\\else\\bytestring'); import bytestring
 warnings.simplefilter('ignore')
 HEADERS = {
 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
 }
 SLEEPINESS = 3
 FILENAME_BADCHARS = '*?"<>|'
 last_request = 0
 CHUNKSIZE = 16 * bytestring.KIBIBYTE
 STOP = False
 TIMEOUT = 600
 TEMP_EXTENSION = '.downloadytemp'
 def basename_from_url(url):
    '''
@ -32,81 +33,6 @@ def basename_from_url(url):
    localname = localname.split('/')[-1]
    return localname
 def determine_seek_and_range(
        file_handle,
        localname,
        local_exists,
        overwrite,
        remote_total_bytes,
        server_respects_range,
        user_provided_range,
        user_range_min,
        user_range_max,
    ):
    ''' THINGS THAT CAN HAPPEN '''
    seek_to = 0
    header_range_min = None
    header_range_max = None
    if local_exists:
        local_existing_bytes = os.path.getsize(localname)
        if overwrite is True:
            file_handle.truncate()
            if user_provided_range:
                header_range_min = user_range_min
                header_range_max = user_range_max
                seek_to = user_range_min
            elif not user_provided_range:
                pass
        elif overwrite is None:
            if local_existing_bytes == remote_total_bytes:
                print('File is 100%. Nothing to do.')
                return
            if user_provided_range:
                if server_respects_range:
                    seek_to = user_range_min
                else:
                    raise Exception('The server did not respect your range header')
            elif not user_provided_range:
                if server_respects_range:
                    print('Resuming from byte %d' % local_existing_bytes)
                    header_range_min = local_existing_bytes
                    header_range_max = ''
                    seek_to = local_existing_bytes
                else:
                    print('File exists, but server doesn\'t allow resumes. Restart from 0?')
                    permission = get_permission()
                    if permission:
                        file_handle.truncate()
                    else:
                        raise Exception('Couldn\'t resume')
        else:
            raise TypeError('Invalid value for `overwrite`. Must be True, False, or None')
    elif not local_exists:
        if user_provided_range:
            if server_respects_range:
                file_handle.seek(user_range_min)
                file_handle.write(b'\0')
                header_range_min = user_range_min
                header_range_max = user_range_max
                seek_to = user_range_min
            else:
                raise Exception('The server did not respect your range header')
        elif not user_provided_range:
            pass
    return (seek_to, header_range_min, header_range_max)
 def download_file(
        url,
        localname=None,
@ -114,33 +40,103 @@ def download_file(
        bytespersecond=None,
        callback_progress=None,
        headers=None,
-        overwrite=None
+        overwrite=False,
        verbose=False,
    ):
-    if headers is None:
+    headers = headers or {}
-        headers = {}
+
-    ''' Determine local filename '''
+    url = sanitize_url(url)
    url = url.replace('%3A//', '://')
    if localname in [None, '']:
        localname = basename_from_url(url)
    localname = sanitize_filename(localname)
-    localname = filepath_sanitize(localname)
+    if verbose:
        print(url)
    plan = prepare_plan(
        url,
        localname,
        auth=auth,
        bytespersecond=bytespersecond,
        headers=headers,
        overwrite=overwrite,
    )
    #print(plan)
    if plan is None:
        return
    localname = plan['download_into']
    directory = os.path.split(localname)[0]
    if directory != '':
        os.makedirs(directory, exist_ok=True)
    touch(localname)
    file_handle = open(localname, 'r+b')
    file_handle.seek(plan['seek_to'])
    if plan['header_range_min'] is not None:
        headers['range'] = 'bytes={min}-{max}'.format(
            min=plan['header_range_min'],
            max=plan['header_range_max'],
        )
    if plan['plan_type'] == 'resume':
        bytes_downloaded = plan['seek_to']
    else:
        bytes_downloaded = 0
    download_stream = request('get', url, stream=True, headers=headers, auth=auth)
    for chunk in download_stream.iter_content(chunk_size=CHUNKSIZE):
        bytes_downloaded += len(chunk)
        file_handle.write(chunk)
        if callback_progress is not None:
            callback_progress(bytes_downloaded, plan['remote_total_bytes'])
        if plan['limiter'] is not None and bytes_downloaded < plan['remote_total_bytes']:
            plan['limiter'].limit(len(chunk))
    file_handle.close()
    if localname != plan['real_localname']:
        os.rename(localname, plan['real_localname'])
    localsize = os.path.getsize(plan['real_localname'])
    if plan['plan_type'] != 'partial' and localsize < plan['remote_total_bytes']:
        message = 'File does not contain expected number of bytes. Received {size} / {total}'
        message = message.format(size=os.path.getsize(localname), total=plan['remote_total_bytes'])
        raise Exception(message)
    return plan['real_localname']
 def prepare_plan(
        url,
        localname,
        auth,
        bytespersecond,
        headers,
        overwrite,
    ):
    # Chapter 1: File existence
    user_provided_range = 'range' in headers
    real_localname = localname
    temp_localname = localname + TEMP_EXTENSION
    real_exists = os.path.exists(real_localname)
    if real_exists and overwrite is False and not user_provided_range:
        print('File exists and overwrite is off. Nothing to do.')
        return None
    temp_exists = os.path.exists(temp_localname)
    real_localsize = int(real_exists and os.path.getsize(real_localname))
    temp_localsize = int(temp_exists and os.path.getsize(temp_localname))
    # Chapter 2: Ratelimiting
    if bytespersecond is None:
        limiter = None
    elif isinstance(bytespersecond, ratelimiter.Ratelimiter):
        limiter = bytespersecond
    else:
-        limiter = ratelimiter.Ratelimiter(bytespersecond, period=1)
+        limiter = ratelimiter.Ratelimiter(bytespersecond)
-    ''' Prepare plan variables '''
+    # Chapter 3: Extracting range
    local_exists = os.path.exists(localname)
    if local_exists and overwrite is False:
        print('Overwrite off. Nothing to do.')
        return
    user_provided_range = 'range' in headers
    if user_provided_range:
        user_range_min = int(headers['range'].split('bytes=')[1].split('-')[0])
        user_range_max = headers['range'].split('-')[1]
@ -150,71 +146,88 @@ def download_file(
        user_range_min = None
        user_range_max = None
    # Chapter 4: Server range support
    # Always include a range on the first request to figure out whether the
-    # server supports it. Use 0- so we get the right `remote_total_bytes`.
+    # server supports it. Use 0- to get correct remote_total_bytes
    temp_headers = headers
    temp_headers.update({'range': 'bytes=0-'})
    # I'm using a GET instead of an actual HEAD here because some servers respond
    # differently, even though they're not supposed to.
    head = request('get', url, stream=True, headers=temp_headers, auth=auth)
-    remote_total_bytes = int(head.headers.get('content-length', 1))
+    remote_total_bytes = int(head.headers.get('content-length', 0))
    server_respects_range = (head.status_code == 206 and 'content-range' in head.headers)
    head.connection.close()
-    touch(localname)
+    if user_provided_range and not server_respects_range:
-    file_handle = open(localname, 'r+b')
+        raise Exception('Server did not respect your range header')
    file_handle.seek(0)
-    plan = determine_seek_and_range(
+    # Chapter 5: Plan definitions
-        file_handle=file_handle,
+    plan_base = {
-        localname=localname,
+        'limiter': limiter,
-        local_exists=local_exists,
+        'real_localname': real_localname,
-        overwrite=overwrite,
+        'remote_total_bytes': remote_total_bytes,
-        remote_total_bytes=remote_total_bytes,
+    }
-        server_respects_range=server_respects_range,
+    plan_fulldownload = dict(
-        user_provided_range=user_provided_range,
+        plan_base,
-        user_range_min=user_range_min,
+        download_into=temp_localname,
-        user_range_max=user_range_max,
+        header_range_min=None,
        header_range_max=None,
        plan_type='fulldownload',
        seek_to=0,
    )
    plan_resume = dict(
        plan_base,
        download_into=temp_localname,
        header_range_min=temp_localsize,
        header_range_max='',
        plan_type='resume',
        seek_to=temp_localsize,
    )
    plan_partial = dict(
        plan_base,
        download_into=real_localname,
        header_range_min=user_range_min,
        header_range_max=user_range_max,
        plan_type='partial',
        seek_to=user_range_min,
    )
    if plan is None:
        return
-    (seek_to, header_range_min, header_range_max) = plan
+    # Chapter 6: Redeem your meal vouchers here
-    if header_range_min is not None:
+    if real_exists:
-        headers['range'] = 'bytes={0}-{1}'.format(header_range_min, header_range_max)
+        if overwrite:
            os.remove(real_localname)
-    bytes_downloaded = seek_to
+        if user_provided_range:
-    file_handle.seek(seek_to)
+            return plan_partial
    download_stream = request('get', url, stream=True, headers=headers, auth=auth)
-    ''' Begin download '''
+        return plan_fulldownload
    for chunk in download_stream.iter_content(chunk_size=CHUNKSIZE):
        bytes_downloaded += len(chunk)
        file_handle.write(chunk)
        if callback_progress is not None:
            callback_progress(bytes_downloaded, remote_total_bytes)
-        if limiter is not None and bytes_downloaded < remote_total_bytes:
+    elif temp_exists and temp_localsize > 0:
-            limiter.limit(len(chunk))
+        if overwrite:
            return plan_fulldownload
-    file_handle.close()
+        if user_provided_range:
-    return localname
+            return plan_partial
-def filepath_sanitize(text, exclusions=''):
+        if server_respects_range:
-    bet = FILENAME_BADCHARS.replace(exclusions, '')
+            print('Resume from byte %d' % plan_resume['seek_to'])
-    for char in bet:
+            return plan_resume
-        text = text.replace(char, '')
+
-    return text
+    else:
        if user_provided_range:
            return plan_partial
        return plan_fulldownload
    print('No plan was chosen?')
    return None
 def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
    permission = input(prompt)
    return permission.lower() in affirmative
-def is_clipboard(s):
+def progress1(bytes_downloaded, bytes_total, prefix=''):
    return s.lower() in ['!c', '!clip', '!clipboard']
 def progress(bytes_downloaded, bytes_total, prefix=''):
    divisor = bytestring.get_appropriate_divisor(bytes_total)
    bytes_total_string = bytestring.bytestring(bytes_total, force_unit=divisor)
    bytes_downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=divisor)
@ -278,6 +291,16 @@ def request(method, url, stream=False, headers=None, timeout=TIMEOUT, **kwargs):
    req.raise_for_status()
    return req
 def sanitize_filename(text, exclusions=''):
    bet = FILENAME_BADCHARS.replace(exclusions, '')
    for char in bet:
        text = text.replace(char, '')
    return text
 def sanitize_url(url):
    url = url.replace('%3A//', '://')
    return url
 def touch(filename):
    f = open(filename, 'ab')
    f.close()
@ -286,26 +309,14 @@ def touch(filename):
 def download_argparse(args):
    url = args.url
    if is_clipboard(url):
        url = pyperclip.paste()
        print(url)
-    overwrite = {
+    url = clipext.resolve(url)
        'y':True, 't':True,
        'n':False, 'f':False,
    }.get(args.overwrite.lower(), None)
    callback = {
-        None: progress,
+        None: progress1,
-        '1': progress,
+        '1': progress1,
        '2': progress2,
-    }.get(args.callback, None)
+    }.get(args.callback, args.callback)
    callback = args.callback
    if callback == '1':
        callback = progress
    if callback == '2':
        callback = progress2
    bytespersecond = args.bytespersecond
    if bytespersecond is not None:
@ -321,18 +332,19 @@ def download_argparse(args):
        bytespersecond=bytespersecond,
        callback_progress=callback,
        headers=headers,
-        overwrite=overwrite,
+        overwrite=args.overwrite,
        verbose=True,
    )
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    #p_download_file = subparsers.add_parser('download_file')
    parser.add_argument('url')
    parser.add_argument('localname', nargs='?', default=None)
-    parser.add_argument('-c', '--callback', dest='callback', default=progress)
+    parser.add_argument('-c', '--callback', dest='callback', default=progress1)
    parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
-    parser.add_argument('-ow', '--overwrite', dest='overwrite', default='')
+    parser.add_argument('-ow', '--overwrite', dest='overwrite', action='store_true')
    parser.add_argument('-r', '--range', dest='range', default=None)
    parser.set_defaults(func=download_argparse)
--- a/Downloady/things
+++ b/Downloady/things
@ -1,45 +0,0 @@
 THINGS THAT CAN HAPPEN
 ├───File exists
 │   ├───User disables overwrite
 │   │   └───Return because there's nothing to do
 │   │
 │   ├───User enables overwrite
 │   │   ├───User requests range
 │   │   │   └───Raise exception because requesting a range and forcing overwrite are mutually exclusive
 │   │   │
 │   │   └───User does not request range
 │   │       └───File opened, truncated, full download
 │   │
 │   └───User does not specify overwrite
 │       ├───File is same size as content-length
 │       │   └───Return because there's nothing to do.
 │       │
 │       ├───User requests range
 │       │   ├───Server respects range
 │       │   │   └───File opened, seeked to request, bytes filled in
 │       │   │
 │       │   └───Server does not respect range
 │       │       └───Raise exception because user's request can't be fulfilled
 │       │
 │       └───User does not request range
 │           ├───Server respects range
 │           │   └───File is opened, seeked to end, download resumes
 │           │
 │           └───Server does not respect range
 │               └───Ask for permission to overwrite from beginning
 │
 └───File does not exist
    ├───User requests range
    │   ├───Server respects range
    │   │   └───File created, seeked to request, bytes filled in. everything else left 0
    │   └───Server does not respect range
    │       └───Raise exception because user's request can't be fulfilled
    │
    └───User does not request range
        └───File created, full download
 Possible amibiguity: If the user requests a range, and the file does not exist, does he want:
 1. to fill the file with zeroes, and patch the requested bytes into their correct spot; or
 2. to create the file empty, and only write the requested bytes?
 I will assume #1 because that plays nicely with other Things That Can Happen, such as letting the user patch the other bytes in later.
--- a/Instathief/instathief.py
+++ b/Instathief/instathief.py
@ -0,0 +1,157 @@
 import argparse
 import bs4
 import datetime
 import json
 import os
 import requests
 import sys
 sys.path.append('C:\\git\\else\\clipext'); import clipext
 sys.path.append('C:\\git\\else\\downloady'); import downloady
 ''' '''
 STRFTIME = '%Y%m%d-%H%M%S'
 # strftime used for filenames when downloading
 URL_PROFILE = 'https://www.instagram.com/{username}'
 URL_QUERY = 'https://www.instagram.com/query/'
 PAGE_QUERY_TEMPLATE = '''
 ig_user({user_id})
 {{
    media.after({end_cur}, {count})
    {{
        count,
        nodes
        {{
            code,
            date,
            display_src,
            id,
            video_url
        }},
        page_info
    }}
 }}
 '''.replace('\n', '').replace(' ', '')
 USERAGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
 ''' '''
 last_cookie = None
 def download_media(media_list):
    for media in media_list:
        timestamp = datetime.datetime.utcfromtimestamp(media['created'])
        timestamp = timestamp.strftime(STRFTIME)
        basename = downloady.basename_from_url(media['url'])
        extension = os.path.splitext(basename)[1]
        filename = timestamp + extension
        downloady.download_file(
            url=media['url'],
            localname=filename,
            callback_progress=downloady.progress2,
            overwrite=False,
        )
 def get_page(user_id, end_cur, count, cookies):
    query = PAGE_QUERY_TEMPLATE.format(
        count=count,
        end_cur=end_cur,
        user_id=user_id,
    )
    headers = {
        'referer': 'https://www.instagram.com/',
        'user-agent': USERAGENT,
        'x-csrftoken': cookies['csrftoken'],
    }
    data = {
        'q': query,
        'ref': 'users::show',
    }
    response = requests.post(
        url=URL_QUERY,
        cookies=cookies,
        data=data,
        headers=headers,
    )
    response.raise_for_status()
    page = response.json()
    return page
 def get_user_info(username):
    global last_cookie
    url = URL_PROFILE.format(username=username)
    response = requests.get(url)
    response.raise_for_status()
    text = response.text
    soup = bs4.BeautifulSoup(text, 'html.parser')
    scripts = soup.find_all('script')
    for script in scripts:
        if 'window._sharedData' in script.text:
            break
    else:
        raise Exception('Did not find expected javascript')
    user_data = script.text
    user_data = user_data.split('window._sharedData = ')[1].rstrip(';').strip()
    user_data = json.loads(user_data)
    user_data = user_data['entry_data']['ProfilePage'][0]['user']
    user_id = user_data['id']
    page_info = user_data['media']['page_info']
    if page_info['has_next_page']:
        end_cur = page_info['start_cursor']
        # Minus 1 because the queries use "after" parameters for pagination, and
        # if we just take this cursor then we will only get items after it.
        end_cur = int(end_cur) - 1
    else:
        end_cur = None
    user_data = {
        'user_id': user_id,
        'end_cur': end_cur,
        'cookies': response.cookies,
    }
    last_cookie = response.cookies
    return user_data
 def get_user_media(username):
    user_info = get_user_info(username)
    end_cur = user_info.pop('end_cur')
    while True:
        page = get_page(count=50, end_cur=end_cur, **user_info)
        page = page['media']
        posts = page['nodes']
        for post in posts:
            timestamp = post['date']
            media_url = post.get('video_url') or post.get('display_src')
            ret = {
                'created': timestamp,
                'url': media_url
            }
            yield ret
        page_info = page['page_info']
        if page_info['has_next_page']:
            end_cur = page_info['end_cursor']
        else:
            break
 def main():
    username = sys.argv[1]
    media = get_user_media(username)
    for (timestamp, url) in media:
        print(url)
 if __name__ == '__main__':
    main()
--- a/Javascript/opendir_image.js
+++ b/Javascript/opendir_image.js
@ -138,6 +138,7 @@ function create_odi_div(url)
    if (paramless_url.match(IMAGE_TYPES))
    {
        console.log("Creating image div for " + paramless_url);
        var div = document.createElement("div");
        div.id = generate_id(32);
        div.className = "odi_image_div";
@ -194,6 +195,7 @@ function create_odi_div(url)
        {
            return null;
        }
        console.log("Creating " + mediatype + " div for " + paramless_url);
        var div = document.createElement("div");
        div.id = generate_id(32);
@ -253,6 +255,7 @@ function create_odi_div(url)
 }
 function create_odi_divs(urls)
 {
    console.log("Creating odi divs");
    image_divs = [];
    media_divs = [];
    odi_divs = [];
@ -332,6 +335,7 @@ function create_workspace()
    control_panel.appendChild(ingest_div);
    control_panel.appendChild(start_button);
    document.body.appendChild(workspace);
    console.log("finished workspace");
 }
 function delete_odi_div(element)
@ -430,6 +434,7 @@ function filter_re(pattern, do_delete)
 function format_css()
 {
    console.log("Formatting CSS variables");
    var css = CSS;
    while (true)
    {
@ -438,22 +443,24 @@ function format_css()
        {
            break;
        }
-
+        console.log(matches);
-        matches = Array.from(new Set(matches));
+        matches = new Set(matches);
-        for (var index = 0; index < matches.length; index += 1)
+        /* Originally used Array.from(set) and did regular iteration, but I found
        that sites can override and break that conversion. */
        matches.forEach(
            function(injector)
            {
            var injector = matches[index];
                var injected = injector.replace(new RegExp("\\$", 'g'), "");
            /*console.log(injector);*/
            /*console.log(injected);*/
                css = css.replace(injector, this[injected]);
            }
        );
    }
    return css;
 }
 function get_all_urls()
 {
    console.log("Collecting urls");
    var urls = [];
    function include(source, attr)
    {
@ -529,6 +536,7 @@ function get_basename(url)
 function get_gfycat_video(id)
 {
    console.log("Resolving gfycat " + id);
    var url = "https://gfycat.com/cajax/get/" + id;
    var request = new XMLHttpRequest();
    request.answer = null;
@ -595,6 +603,7 @@ function generate_id(length)
 function ingest()
 {
    /* Take the text from the INGEST box, and make odi divs from it */
    console.log("Ingesting");
    var odi_divs = get_odi_divs();
    var ingestbox = document.getElementById("ingestbox");
    var text = ingestbox.value;
@ -622,6 +631,7 @@ function ingest()
 function lazy_load_all()
 {
    console.log("Starting lazyload");
    lazies = get_lazy_divs();
    lazies.reverse();
    lazy_buttons = document.getElementsByClassName("load_button");
--- a/Javascript/videotiles.js
+++ b/Javascript/videotiles.js
@ -91,8 +91,8 @@ function swap_source(player, source_list)
 function main()
 {
-    var WIDTH = 3;
+    var WIDTH = 2;
-    var HEIGHT = 3;
+    var HEIGHT = 2;
    var MEDIAS = get_media_links();
    clear_page();
--- a/OpenDirDL/README.md
+++ b/OpenDirDL/README.md
@ -1,49 +1,52 @@
 Open Dir DL
 ===========
-The open directory downloader
+The open directory downloader.
-Requires `pip install beautifulsoup4`
+Requires `pip install beautifulsoup4`.
 See inside opendirdl.py for usage instructions.
 - 2016 08 16
    - **[cleanup]** Now that Downloady uses temp files for incomplete downloads, that logic can be removed from opendirdl.
 - 2016 08 10
-    - Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
+    - **[addition]** Added clickable links to each directory on HTML tree pages.
-    - Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
+    - **[bugfix]** Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
-    - Added clickable links to each directory on HTML tree pages.
+    - **[bugfix]** Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
 - 2016 08 02
-    - Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
+    - **[cleanup]** Removed the need for div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
-    - Rewrote `build_file_tree` with a way simpler algorithm.
+    - **[cleanup]** Rewrote `build_file_tree` with a way simpler algorithm.
-    - Removed the ability to set a Node's parent during `__init__` because it wasn't fully fleshed out and doesn't need to be used anyway.
+    - **[removal]** Removed the ability to set a Node's parent during `__init__` because it wasn't fully fleshed out and doesn't need to be used anyway.
 - 2016 08 01
-    - Made the digest work even if you forget the http://
+    - **[addition]** Made the digest work even if you forget the http://
 - 2016 07 29
-    - Moved some nested function definitions out to the top level, and made the construction of the file tree its own function. These functions really don't need to be used on their own, but they were cluttering the logic of the `tree` command.
+    - **[change]** Moved some nested function definitions out to the top level, and made the construction of the file tree its own function. These functions really don't need to be used on their own, but they were cluttering the logic of the `tree` command.
-    - Renamed `Tree.listnodes` to `Tree.list_children` and the `customsort` now expects to operate on Node objects rather than `(identifier, Node)` tuples. Nodes already have their identifier so the tuple was unecessary.
+    - **[change]** Renamed `Tree.listnodes` to `Tree.list_children`, and the `customsort` now expects to operate on Node objects rather than `(identifier, Node)` tuples. Nodes already have their identifier so the tuple was unecessary.
-    - Removed `Tree.sorted_children` since it was basically a duplicate of `Tree.listnodes` and I don't know why I had both.
+    - **[change]** Replaced local `download_file` function with a call to `downloady.download_file`. It supports download continuation and removes duplicate work.
-    - Replaced all `safeprint` calls with `write` because it provides access to safeprint as well as file writing if needed.
+    - **[cleanup]** Replaced all `safeprint` calls with `write` because it provides access to safeprint as well as file writing if needed.
-    - Replaced local `download_file` function with a call to `downloady.download_file`. It supports download continuation and removes duplicate work.
+    - **[removal]** Removed `Tree.sorted_children` since it was basically a duplicate of `Tree.listnodes` and I don't know why I had both.
 - 2016 07 25
-    - Removed the `Downloader` class after watching [this Jack Diederich talk](https://youtu.be/o9pEzgHorH0) about unecessary classes.
+    - **[change]** Bytespersecond is now parsed by `bytestring.parsebytes` rather than `eval`, so you can write "100k" as opposed to "100 * 1024" etc.
-    - Bytespersecond is now parsed by `bytestring.parsebytes` rather than `eval`, so you can write "100k" as opposed to "100 * 1024" etc.
+    - **[removal]** Removed the `Downloader` class after watching [this Jack Diederich talk](https://youtu.be/o9pEzgHorH0) about unecessary classes.
 - 2016 07 19
-    - Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
+    - **[addition]** Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
-    - Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions used by the argparser. *Does not affect the commandline usage!*
+    - **[bugfix]** Fixed the allowed characters parameter of `filepath_sanitize`, which was not written correctly but worked out of luck.
-    - Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
+    - **[cleanup]** Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
-    - Fixed some mismatched code vs comments
+    - **[cleanup]** Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions used by the argparser. *Does not affect the commandline usage!*
-    - Fixed the allowed characters parameter of `filepath_sanitize`, which was not written correctly but worked out of luck.
+    - **[cleanup]** Fixed some mismatched code vs comments
 - 2016 07 08
-    - Fixed bug in which trees wouldn't generate on server:port urls.
+    - **[bugfix]** Fixed bug in which trees wouldn't generate on server:port urls.
 - 2016 07 04
-    - Added new argparse command "tree"
+    - **[addition]** Added new argparse command "tree"
 - 2016 02 08
-    - Fixed bug where server:port urls did not create db files because of the colon. It's been replaced by a hash.
+    - **[bugfix]** Fixed bug where server:port urls did not create db files because of the colon. It's been replaced by a hash.
-    - Moved db commits to only happen at the end of a digest.
+    - **[change]** Moved db commits to only happen at the end of a digest.
--- a/OpenDirDL/opendirdl.py
+++ b/OpenDirDL/opendirdl.py
@ -614,7 +614,7 @@ def fetch_generator(cur):
 def filepath_sanitize(text, allowed=''):
    badchars = FILENAME_BADCHARS
-    badchars = ''.join(char for char in FILENAME_BADCHARS if char not in allowed)
+    badchars = set(char for char in FILENAME_BADCHARS if char not in allowed)
    text = ''.join(char for char in text if char not in badchars)
    return text
@ -886,32 +886,16 @@ def download(
        folder = os.path.join(outputdir, url_filepath['folder'])
        os.makedirs(folder, exist_ok=True)
-        final_fullname = os.path.join(folder, url_filepath['filename'])
+        fullname = os.path.join(folder, url_filepath['filename'])
        temporary_basename = hashit(url, 16) + '.oddltemporary'
        temporary_fullname = os.path.join(folder, temporary_basename)
-        # Because we use .oddltemporary files, the behavior of `overwrite` here
+        write('Downloading "%s"' % fullname)
        # is different than the behavior of `overwrite` in downloady.
        # The overwrite used in the following block refers to the finalized file.
        # The overwrite passed to downloady refers to the oddltemporary which
        # may be resumed.
        if os.path.isfile(final_fullname):
            if overwrite:
                os.remove(final_fullname)
            else:
                write('Skipping "%s". Use `--overwrite`' % final_fullname)
                continue
        overwrite = overwrite or None
        write('Downloading "%s" as "%s"' % (final_fullname, temporary_basename))
        downloady.download_file(
            url,
-            localname=temporary_fullname,
+            localname=fullname,
            bytespersecond=bytespersecond,
            callback_progress=downloady.progress2,
            overwrite=overwrite
        )
        os.rename(temporary_fullname, final_fullname)
 def download_argparse(args):
    return download(
--- a/Pathclass/pathclass.py
+++ b/Pathclass/pathclass.py
@ -7,7 +7,6 @@ class Path:
    def __init__(self, path):
        path = os.path.normpath(path)
        path = os.path.abspath(path)
        path = get_path_casing(path)
        self.absolute_path = path
    def __contains__(self, other):
@ -23,6 +22,10 @@ class Path:
    def basename(self):
        return os.path.basename(self.absolute_path)
    def correct_case(self):
        self.absolute_path = get_path_casing(self.absolute_path)
        return self.absolute_path
    @property
    def exists(self):
        return os.path.exists(self.absolute_path)
--- a/ServerReference/simpleserver.py
+++ b/ServerReference/simpleserver.py
@ -14,6 +14,7 @@ sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter
 sys.path.append('C:\\git\\else\\SpinalTap'); import spinal
 FILE_READ_CHUNK = bytestring.MIBIBYTE
 RATELIMITER = ratelimiter.Ratelimiter(16 * bytestring.MIBIBYTE)
 # The paths which the user may access.
 # Attempting to access anything outside will 403.
@ -98,6 +99,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
        if isinstance(data, types.GeneratorType):
            for chunk in data:
                self.wfile.write(chunk)
                RATELIMITER.limit(len(chunk))
        else:
            self.wfile.write(data)
--- a/SpinalTap/spinal.py
+++ b/SpinalTap/spinal.py
@ -350,16 +350,17 @@ def copy_file(
    source = str_to_fp(source)
    if not source.is_file:
        raise SourceNotFile(source)
    if destination_new_root is not None:
        source.correct_case()
        destination = new_root(source, destination_new_root)
    destination = str_to_fp(destination)
    callback = callback or do_nothing
    callback_verbose = callback_verbose or do_nothing
    if not source.is_file:
        raise SourceNotFile(source)
    if destination.is_dir:
        raise DestinationIsDirectory(destination)
--- a/ThreadedDL/threaded_dl.py
+++ b/ThreadedDL/threaded_dl.py
@ -0,0 +1,65 @@
 import os
 import sys
 import threading
 import time
 sys.path.append('C:\\git\\else\\clipext'); import clipext
 sys.path.append('C:\\git\\else\\downloady'); import downloady
 def remove_finished(threads):
    threads = [t for t in threads if t.is_alive()]
    return threads
 def download_thread(url, filename_prefix=''):
    url = url.strip()
    if url == '':
        return
    basename = downloady.basename_from_url(url)
    basename = filename_prefix + basename
    if os.path.exists(basename):
        print('Skipping existing file "%s"' % basename)
        return
    print('Starting "%s"' % basename)
    downloady.download_file(url, basename)
    print('Finished "%s"' % basename)
 def listget(li, index, fallback):
    try:
        return li[index]
    except IndexError:
        return fallback
 def threaded_dl(urls, thread_count=4):
    threads = []
    prefix_digits = len(str(len(urls)))
    prefix_text = '%0{digits}d_'.format(digits=prefix_digits)
    for (index, url) in enumerate(urls):
        while len(threads) == thread_count:
            threads = remove_finished(threads)
            time.sleep(0.1)
        prefix = prefix_text % index
        t = threading.Thread(target=download_thread, args=[url, prefix])
        t.daemon = True
        threads.append(t)
        t.start()
    while len(threads) > 0:
        threads = remove_finished(threads)
        time.sleep(0.1)
 def main():
    filename = sys.argv[1]
    if os.path.isfile(filename):
        f = open(filename, 'r')
        with f:
            urls = f.read()
        urls = urls.split()
    else:
        urls = clipext.resolve(filename)
        urls = urls.split()
    threaded_dl(urls)
 if __name__ == '__main__':
    main()