else

2016-10-21 20:47:08 -07:00 · 2016-10-21 20:47:08 -07:00 · 98667e75f3
commit 98667e75f3
parent e43bd02583
12 changed files with 326 additions and 47 deletions
--- a/AESFile/pycrypto
+++ b/AESFile/pycrypto
--- a/Bytestring/bytestring.py
+++ b/Bytestring/bytestring.py
@ -1,4 +1,7 @@
 import re
+import sys
+
+__VERSION__ = '0.0.1'

 BYTE = 1
 KIBIBYTE = 1024 * BYTE
@ -10,22 +13,34 @@ EXIBYTE = 1024 * PEBIBYTE
 ZEBIBYTE = 1024 * EXIBYTE
 YOBIBYTE = 1024 * ZEBIBYTE

-UNIT_STRINGS = {
-    BYTE: 'b',
-    KIBIBYTE: 'KiB',
-    MIBIBYTE: 'MiB',
-    GIBIBYTE: 'GiB',
-    TEBIBYTE: 'TiB',
-    PEBIBYTE: 'PiB',
-    EXIBYTE: 'EiB',
-    ZEBIBYTE: 'ZiB',
-    YOBIBYTE: 'YiB',
-}
-UNITS_SORTED = sorted(UNIT_STRINGS.keys(), reverse=True)
+BYTE_STRING = 'b'
+KIBIBYTE_STRING = 'KiB'
+MIBIBYTE_STRING = 'MiB'
+GIBIBYTE_STRING = 'GiB'
+TEBIBYTE_STRING = 'TiB'
+PEBIBYTE_STRING = 'PiB'
+EXIBYTE_STRING = 'EiB'
+ZEBIBYTE_STRING = 'ZiB'
+YOBIBYTE_STRING = 'YiB'

-def bytestring(size, force_unit=None):
+UNIT_STRINGS = {
+    BYTE: BYTE_STRING,
+    KIBIBYTE: KIBIBYTE_STRING,
+    MIBIBYTE: MIBIBYTE_STRING,
+    GIBIBYTE: GIBIBYTE_STRING,
+    TEBIBYTE: TEBIBYTE_STRING,
+    PEBIBYTE: PEBIBYTE_STRING,
+    EXIBYTE: EXIBYTE_STRING,
+    ZEBIBYTE: ZEBIBYTE_STRING,
+    YOBIBYTE: YOBIBYTE_STRING,
+}
+REVERSED_UNIT_STRINGS = {value: key for (key, value) in UNIT_STRINGS.items()}
+UNIT_SIZES = sorted(UNIT_STRINGS.keys(), reverse=True)
+
+
+def bytestring(size, decimal_places=3, force_unit=None):
    '''
-    Convert a number into a binary-standard string.
+    Convert a number into  string.

    force_unit:
        If None, an appropriate size unit is chosen automatically.
@ -34,15 +49,31 @@ def bytestring(size, force_unit=None):
    if force_unit is None:
        divisor = get_appropriate_divisor(size)
    else:
+        if isinstance(force_unit, str):
+            force_unit = normalize_unit_string(force_unit)
+            force_unit = REVERSED_UNIT_STRINGS[force_unit]
        divisor = force_unit

    size_unit_string = UNIT_STRINGS[divisor]
-    size_string = '%.3f %s' % ((size / divisor), size_unit_string)
+
+    size_string = '{number:.0{decimal_places}f} {unit}'
+    size_string = size_string.format(
+        decimal_places=decimal_places,
+        number=size/divisor,
+        unit=size_unit_string,
+    )
    return size_string

 def get_appropriate_divisor(size):
+    '''
+    Return the divisor that would be appropriate for displaying this byte size.
+    For example:
+        1000 => 1 to display 1,000 b
+        1024 => 1024 to display 1 KiB
+        123456789 => 1048576 to display 117.738 MiB
+    '''
    size = abs(size)
-    for unit in UNITS_SORTED:
+    for unit in UNIT_SIZES:
        if size >= unit:
            appropriate_unit = unit
            break
@ -50,11 +81,24 @@ def get_appropriate_divisor(size):
        appropriate_unit = 1
    return appropriate_unit

+def normalize_unit_string(string):
+    '''
+    Given a string "k" or "kb" or "kib" in any case, return "KiB", etc.
+    '''
+    string = string.lower()
+    for (size, unit_string) in UNIT_STRINGS.items():
+        unit_string_l = unit_string.lower()
+        if string in (unit_string_l, unit_string_l[0], unit_string_l.replace('i', '')):
+            return unit_string
+    raise ValueError('Unrecognized unit string "%s"' % string)
+
 def parsebytes(string):
    '''
    Given a string like "100 kib", return the appropriate integer value.
+    Accepts "k", "kb", "kib" in any casing.
    '''
-    string = string.lower().strip().replace(' ', '')
+    string = string.lower().strip()
+    string = string.replace(' ', '').replace(',', '')

    matches = re.findall('((\\.|-|\\d)+)', string)
    if len(matches) == 0:
@ -73,12 +117,21 @@ def parsebytes(string):
    if string == '':
        return int(byte_value)

-    reversed_units = {value.lower():key for (key, value) in UNIT_STRINGS.items()}
-    for (unit_string, multiplier) in reversed_units.items():
-        # accept kib, k, kb
-        if string in (unit_string, unit_string[0], unit_string.replace('i', '')):
-            break
-    else:
-        raise ValueError('Could not determine byte value of %s' % string)
+    unit_string = normalize_unit_string(string)
+    multiplier = REVERSED_UNIT_STRINGS[unit_string]

-    return int(byte_value * multiplier)
+    return int(byte_value * multiplier)
+
+def main(args=None):
+    if args is None:
+        args = sys.argv[1:]
+
+    if len(args) != 1:
+        print('Usage: bytestring.py <number>')
+        return 1
+    n = int(sys.argv[1])
+    print(bytestring(n))
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
--- a/Bytestring/bytestring_test.py
+++ b/Bytestring/bytestring_test.py
@ -0,0 +1,60 @@
+import bytestring
+import unittest
+
+bytestring_pairs = {
+    100: '100.000 b',
+    2 ** 10: '1.000 KiB',
+    2 ** 20: '1.000 MiB',
+    2 ** 30: '1.000 GiB',
+    -(2 ** 30): '-1.000 GiB',
+    (2 ** 30) + (512 * (2 ** 20)): '1.500 GiB',
+}
+
+parsebytes_pairs = {
+    '100k': 102400,
+    '100 k': 102400,
+    '100 kb': 102400,
+    '100  kib': 102400,
+    '100.00KB': 102400,
+    '1.5 mb': 1572864,
+    '-1.5 mb': -1572864,
+}
+
+unit_string_cases = [
+'B', 'b',
+'KiB', 'kib', 'KB', 'K', 'k',
+'MiB', 'mib', 'MB', 'M', 'm',
+'GiB', 'gib', 'GB', 'G', 'g',
+'TiB', 'tib', 'TB', 'T', 't',
+'PiB', 'pib', 'PB', 'P', 'p',
+'EiB', 'eib', 'EB', 'E', 'e',
+'ZiB', 'zib', 'ZB', 'Z', 'z',
+'YiB', 'yib', 'YB', 'Y', 'y',
+]
+
+class BytestringTest(unittest.TestCase):
+    def test_bytestring(self):
+        for (number, text) in bytestring_pairs.items():
+            self.assertEqual(bytestring.bytestring(number), text)
+        self.assertEqual(bytestring.bytestring(1024, force_unit=1), '1024.000 b')
+        self.assertEqual(bytestring.bytestring(1024, force_unit='b'), '1024.000 b')
+
+    def test_parsebytes(self):
+        for (number, text) in bytestring_pairs.items():
+            self.assertEqual(bytestring.parsebytes(text), number)
+        for (text, number) in parsebytes_pairs.items():
+            self.assertEqual(bytestring.parsebytes(text), number)
+        self.assertRaises(ValueError, bytestring.parsebytes, 'no numbers')
+        self.assertRaises(ValueError, bytestring.parsebytes, '100 and 300')
+        self.assertRaises(ValueError, bytestring.parsebytes, 'something300')
+        self.assertRaises(ValueError, bytestring.parsebytes, '100 wrongunit')
+
+    def test_normalize_unit_string(self):
+        for case in unit_string_cases:
+            normalized = bytestring.normalize_unit_string(case)
+            self.assertTrue(normalized in bytestring.REVERSED_UNIT_STRINGS)
+        self.assertRaises(ValueError, bytestring.normalize_unit_string, 'incorrect')
+        self.assertRaises(ValueError, bytestring.normalize_unit_string, 'x')
+
+if __name__ == '__main__':
+    unittest.main()
--- a/Bytestring/setup.py
+++ b/Bytestring/setup.py
@ -0,0 +1,17 @@
+import os
+from setuptools import setup
+
+def read(filename):
+    return open(os.path.join(os.path.dirname(__file__), filename)).read()
+
+setup(
+    author='Ethan Dalool (voussoir)',
+    name='bytestring',
+    version='0.0.1',
+    description='Convert integers into IEC binary strings and back',
+    py_modules=['bytestring', 'bytestring_test'],
+    entry_points='''
+        [console_scripts]
+        bytestring=bytestring:main
+    ''',
+)
--- a/Downloady/downloady.py
+++ b/Downloady/downloady.py
@ -17,7 +17,7 @@ HEADERS = {
 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
 }

-FILENAME_BADCHARS = '*?"<>|'
+FILENAME_BADCHARS = '*?"<>|\r'

 last_request = 0
 CHUNKSIZE = 4 * bytestring.KIBIBYTE
@ -37,16 +37,22 @@ def download_file(
        overwrite=False,
        raise_for_undersized=True,
        verbose=False,
+        **get_kwargs
    ):
    headers = headers or {}

    url = sanitize_url(url)
    if localname in [None, '']:
        localname = basename_from_url(url)
+    if os.path.isdir(localname):
+        localname = os.path.join(localname, basename_from_url(url))
    localname = sanitize_filename(localname)
+    if localname != os.devnull:
+        localname = os.path.abspath(localname)

    if verbose:
-        print(url)
+        safeprint(' URL:', url)
+        safeprint('File:', localname)

    plan = prepare_plan(
        url,
@ -81,7 +87,7 @@ def download_file(
    else:
        bytes_downloaded = 0

-    download_stream = request('get', url, stream=True, headers=headers, auth=auth)
+    download_stream = request('get', url, stream=True, headers=headers, auth=auth, **get_kwargs)
    if callback_progress is not None:
        callback_progress = callback_progress(plan['remote_total_bytes'])

@ -223,8 +229,7 @@ def prepare_plan(

        return plan_fulldownload

-    print('No plan was chosen?')
-    return None
+    raise Exception('No plan was chosen?')


 class Progress1:
@ -288,8 +293,6 @@ class Progress2:
        )
        print(message, end=end, flush=True)

-progress1 = Progress1
-progress2 = Progress2

 def basename_from_url(url):
    '''
@ -321,10 +324,14 @@ def request(method, url, stream=False, headers=None, timeout=TIMEOUT, **kwargs):
        'head': session.head,
        'post': session.post,
    }[method]
-    req = method(url, stream=stream, headers=headers, timeout=None, **kwargs)
+    req = method(url, stream=stream, headers=headers, timeout=timeout, **kwargs)
    req.raise_for_status()
    return req

+def safeprint(*texts, **kwargs):
+    texts = [str(text).encode('ascii', 'replace').decode() for text in texts]
+    print(*texts, **kwargs)
+
 def sanitize_filename(text, exclusions=''):
    bet = FILENAME_BADCHARS.replace(exclusions, '')
    for char in bet:
@ -375,7 +382,7 @@ if __name__ == '__main__':

    parser.add_argument('url')
    parser.add_argument('localname', nargs='?', default=None)
-    parser.add_argument('-c', '--callback', dest='callback', default=progress1)
+    parser.add_argument('-c', '--callback', dest='callback', default=Progress1)
    parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
    parser.add_argument('-ow', '--overwrite', dest='overwrite', action='store_true')
    parser.add_argument('-r', '--range', dest='range', default=None)
--- a/Javascript/opendir_image.js
+++ b/Javascript/opendir_image.js
@ -304,7 +304,7 @@ function create_workspace()
    var dumper = create_command_button("dump urls", dump_urls);
    var ingest_box = document.createElement("textarea");
    var ingest_button = create_command_button("ingest", ingest);
-    var start_button = create_command_button("load all", function(){start(); this.parentElement.removeChild(this);});
+    var start_button = create_command_button("load all", function(){start();});

    start_button.style.display = "block";

--- a/OpenDirDL/README.html
+++ b/OpenDirDL/README.html
@ -0,0 +1,90 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+  <meta http-equiv="Content-Style-Type" content="text/css" />
+  <meta name="generator" content="pandoc" />
+  <title></title>
+  <style type="text/css">code{white-space: pre;}</style>
+</head>
+<body>
+<h1 id="open-dir-dl">Open Dir DL</h1>
+<p>The open directory downloader.</p>
+<p>See inside opendirdl.py for usage instructions.</p>
+<ul>
+<li><strong>[addition]</strong> A new feature was added.</li>
+<li><strong>[bugfix]</strong> Incorrect behavior was fixed.</li>
+<li><strong>[change]</strong> An existing feature was slightly modified or parameters were renamed.</li>
+<li><strong>[cleanup]</strong> Code was improved, comments were added, or other changes with minor impact on the interface.</li>
+<li><strong>[removal]</strong> An old feature was removed.</li>
+</ul>
+<p> </p>
+<ul>
+<li>2016 10 03
+<ul>
+<li><strong>[bugfix]</strong> Fix KeyError caused by the 'root' -&gt; 'domain' rename.</li>
+</ul></li>
+<li>2016 10 01
+<ul>
+<li><strong>[bugfix]</strong> Fixed the download function so it actually passes <code>headers</code> into downloady.</li>
+<li><strong>[change]</strong> <code>url_split</code> key 'root' has been renamed to 'domain'.</li>
+<li><strong>[change]</strong> Improved some variable names, including <code>walkurl -&gt; root_url</code>.</li>
+<li><strong>[cleanup]</strong> Removed import for Ratelimiter since downloady handles all of that now.</li>
+</ul></li>
+<li>2016 08 16
+<ul>
+<li><strong>[cleanup]</strong> Now that Downloady uses temp files for incomplete downloads, that logic can be removed from opendirdl.</li>
+</ul></li>
+<li>2016 08 10
+<ul>
+<li><strong>[addition]</strong> Added clickable links to each directory on HTML tree pages.</li>
+<li><strong>[bugfix]</strong> Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.</li>
+<li><strong>[bugfix]</strong> Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.</li>
+</ul></li>
+<li>2016 08 02
+<ul>
+<li><strong>[cleanup]</strong> Removed the need for div IDs on the Tree pages by making the collapse button use <code>this.nextSibling</code>.</li>
+<li><strong>[cleanup]</strong> Rewrote <code>build_file_tree</code> with a way simpler algorithm.</li>
+<li><strong>[removal]</strong> Removed the ability to set a Node's parent during <code>__init__</code> because it wasn't fully fleshed out and doesn't need to be used anyway.</li>
+</ul></li>
+<li>2016 08 01
+<ul>
+<li><strong>[addition]</strong> Made the digest work even if you forget the <a href="http://" class="uri">http://</a></li>
+</ul></li>
+<li>2016 07 29
+<ul>
+<li><strong>[change]</strong> Moved some nested function definitions out to the top level, and made the construction of the file tree its own function. These functions really don't need to be used on their own, but they were cluttering the logic of the <code>tree</code> command.</li>
+<li><strong>[change]</strong> Renamed <code>Tree.listnodes</code> to <code>Tree.list_children</code>, and the <code>customsort</code> now expects to operate on Node objects rather than <code>(identifier, Node)</code> tuples. Nodes already have their identifier so the tuple was unecessary.</li>
+<li><strong>[change]</strong> Replaced local <code>download_file</code> function with a call to <code>downloady.download_file</code>. It supports download continuation and removes duplicate work.</li>
+<li><strong>[cleanup]</strong> Replaced all <code>safeprint</code> calls with <code>write</code> because it provides access to safeprint as well as file writing if needed.</li>
+<li><strong>[removal]</strong> Removed <code>Tree.sorted_children</code> since it was basically a duplicate of <code>Tree.listnodes</code> and I don't know why I had both.</li>
+</ul></li>
+<li>2016 07 25
+<ul>
+<li><strong>[change]</strong> Bytespersecond is now parsed by <code>bytestring.parsebytes</code> rather than <code>eval</code>, so you can write &quot;100k&quot; as opposed to &quot;100 * 1024&quot; etc.</li>
+<li><strong>[removal]</strong> Removed the <code>Downloader</code> class after watching <a href="https://youtu.be/o9pEzgHorH0">this Jack Diederich talk</a> about unecessary classes.</li>
+</ul></li>
+<li>2016 07 19
+<ul>
+<li><strong>[addition]</strong> Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.</li>
+<li><strong>[bugfix]</strong> Fixed the allowed characters parameter of <code>filepath_sanitize</code>, which was not written correctly but worked out of luck.</li>
+<li><strong>[cleanup]</strong> Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker &gt; Downloader &gt; other classes</li>
+<li><strong>[cleanup]</strong> Renamed the <code>keep_pattern</code> and <code>remove_pattern</code> functions to <code>keep_pattern_argparse</code> etc to be consistent with the other functions used by the argparser. <em>Does not affect the commandline usage!</em></li>
+<li><strong>[cleanup]</strong> Fixed some mismatched code vs comments</li>
+</ul></li>
+<li>2016 07 08
+<ul>
+<li><strong>[bugfix]</strong> Fixed bug in which trees wouldn't generate on server:port urls.</li>
+</ul></li>
+<li>2016 07 04
+<ul>
+<li><strong>[addition]</strong> Added new argparse command &quot;tree&quot;</li>
+</ul></li>
+<li>2016 02 08
+<ul>
+<li><strong>[bugfix]</strong> Fixed bug where server:port urls did not create db files because of the colon. It's been replaced by a hash.</li>
+<li><strong>[change]</strong> Moved db commits to only happen at the end of a digest.</li>
+</ul></li>
+</ul>
+</body>
+</html>
--- a/OpenDirDL/README.md
+++ b/OpenDirDL/README.md
@ -3,18 +3,24 @@ Open Dir DL

 The open directory downloader.

-Requires `pip install beautifulsoup4`.
-
 See inside opendirdl.py for usage instructions.

+- **[addition]** A new feature was added.
+- **[bugfix]** Incorrect behavior was fixed.
+- **[change]** An existing feature was slightly modified or parameters were renamed.
+- **[cleanup]** Code was improved, comments were added, or other changes with minor impact on the interface.
+- **[removal]** An old feature was removed.
+
+&nbsp;
+
 - 2016 10 03
    - **[bugfix]** Fix KeyError caused by the 'root' -> 'domain' rename.

 - 2016 10 01
    - **[bugfix]** Fixed the download function so it actually passes `headers` into downloady.
    - **[change]** `url_split` key 'root' has been renamed to 'domain'.
+    - **[change]** Improved some variable names, including `walkurl -> root_url`.
    - **[cleanup]** Removed import for Ratelimiter since downloady handles all of that now.
-    - **[cleanup]** Improved some variable names, including `walkurl -> root_url`.

 - 2016 08 16
    - **[cleanup]** Now that Downloady uses temp files for incomplete downloads, that logic can be removed from opendirdl.
@ -22,7 +28,7 @@ See inside opendirdl.py for usage instructions.
 - 2016 08 10
    - **[addition]** Added clickable links to each directory on HTML tree pages.
    - **[bugfix]** Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
-    - **[bugfix]** Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
+    - **[bugfix]** Fixed bug in smart_insert where 404'd URLs were not being dele`ted from the database.

 - 2016 08 02
    - **[cleanup]** Removed the need for div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
--- a/OpenDirDL/opendirdl.py
+++ b/OpenDirDL/opendirdl.py
@ -714,10 +714,9 @@ def safeindex(sequence, index, fallback=None):
    except IndexError:
        return fallback

-def safeprint(text, **kwargs):
-    text = str(text)
-    text = text.encode('ascii', 'replace').decode()
-    print(text, **kwargs)
+def safeprint(*texts, **kwargs):
+    texts = [str(text).encode('ascii', 'replace').decode() for text in texts]
+    print(*texts, **kwargs)

 def smart_insert(sql, cur, url=None, head=None, commit=True):
    '''
@ -887,7 +886,7 @@ def download(
            url,
            localname=fullname,
            bytespersecond=bytespersecond,
-            callback_progress=downloady.progress2,
+            callback_progress=downloady.Progress2,
            headers=headers,
            overwrite=overwrite,
        )
--- a/Ratelimiter/ratelimiter-0.0.1.zip
+++ b/Ratelimiter/ratelimiter-0.0.1.zip
--- a/SingleDist/singledist.py
+++ b/SingleDist/singledist.py
@ -0,0 +1,41 @@
+import glob
+import os
+import shutil
+import sys
+
+filename = sys.argv[1]
+package_name = filename.split('.py')[0]
+
+print('Creating setup.py')
+setup_content = '''
+import setuptools
+
+setuptools.setup(
+    author='voussoir',
+    name='{package_name}',
+    version='0.0.1',
+    description='',
+    py_modules=['{package_name}'],
+)
+'''
+
+setup_content = setup_content.format(package_name=package_name)
+
+setup_file = open('setup.py', 'w')
+setup_file.write(setup_content)
+setup_file.close()
+
+print('Executing setup.py')
+os.system('python setup.py sdist')
+
+print('Moving zip file')
+zips = glob.glob('dist\\*.zip')
+for zip_filename in zips:
+    new_zip = os.path.basename(zip_filename)
+    new_zip = os.path.abspath(new_zip)
+    shutil.move(zip_filename, new_zip)
+
+print('Deleting temp')
+shutil.rmtree('dist')
+shutil.rmtree(glob.glob('*.egg-info')[0])
+os.remove('setup.py')
--- a/ThreadedDL/threaded_dl.py
+++ b/ThreadedDL/threaded_dl.py
@ -19,7 +19,7 @@ def download_thread(url, filename):
        print('Skipping existing file "%s"' % filename)
        return
    print(' Starting "%s"' % filename)
-    downloady.download_file(url, filename)
+    downloady.download_file(url, filename, timeout=15)
    print('+Finished "%s"' % filename)

 def listget(li, index, fallback):
@ -41,7 +41,13 @@ def threaded_dl(urls, thread_count, filename_format=None):
            time.sleep(0.1)

        basename = downloady.basename_from_url(url)
-        filename = filename_format.format(now=now, index=index, basename=basename)
+        extension = os.path.splitext(basename)[1]
+        filename = filename_format.format(
+            basename=basename,
+            extension=extension,
+            index=index,
+            now=now,
+        )
        t = threading.Thread(target=download_thread, args=[url, filename])
        t.daemon = True
        threads.append(t)
@ -60,7 +66,7 @@ def main():
            urls = f.read()
    else:
        urls = clipext.resolve(filename)
-    urls = urls.split('\n')
+    urls = urls.replace('\r', '').split('\n')
    thread_count = int(listget(sys.argv, 2, 4))
    filename_format = listget(sys.argv, 3, None)
    threaded_dl(urls, thread_count=thread_count, filename_format=filename_format)