This commit is contained in:
Ethan Dalool 2016-10-21 20:47:08 -07:00
parent e43bd02583
commit 98667e75f3
12 changed files with 326 additions and 47 deletions

BIN
AESFile/pycrypto wheels.zip Normal file

Binary file not shown.

View file

@ -1,4 +1,7 @@
import re
import sys
__VERSION__ = '0.0.1'
BYTE = 1
KIBIBYTE = 1024 * BYTE
@ -10,22 +13,34 @@ EXIBYTE = 1024 * PEBIBYTE
ZEBIBYTE = 1024 * EXIBYTE
YOBIBYTE = 1024 * ZEBIBYTE
UNIT_STRINGS = {
BYTE: 'b',
KIBIBYTE: 'KiB',
MIBIBYTE: 'MiB',
GIBIBYTE: 'GiB',
TEBIBYTE: 'TiB',
PEBIBYTE: 'PiB',
EXIBYTE: 'EiB',
ZEBIBYTE: 'ZiB',
YOBIBYTE: 'YiB',
}
UNITS_SORTED = sorted(UNIT_STRINGS.keys(), reverse=True)
BYTE_STRING = 'b'
KIBIBYTE_STRING = 'KiB'
MIBIBYTE_STRING = 'MiB'
GIBIBYTE_STRING = 'GiB'
TEBIBYTE_STRING = 'TiB'
PEBIBYTE_STRING = 'PiB'
EXIBYTE_STRING = 'EiB'
ZEBIBYTE_STRING = 'ZiB'
YOBIBYTE_STRING = 'YiB'
def bytestring(size, force_unit=None):
UNIT_STRINGS = {
BYTE: BYTE_STRING,
KIBIBYTE: KIBIBYTE_STRING,
MIBIBYTE: MIBIBYTE_STRING,
GIBIBYTE: GIBIBYTE_STRING,
TEBIBYTE: TEBIBYTE_STRING,
PEBIBYTE: PEBIBYTE_STRING,
EXIBYTE: EXIBYTE_STRING,
ZEBIBYTE: ZEBIBYTE_STRING,
YOBIBYTE: YOBIBYTE_STRING,
}
REVERSED_UNIT_STRINGS = {value: key for (key, value) in UNIT_STRINGS.items()}
UNIT_SIZES = sorted(UNIT_STRINGS.keys(), reverse=True)
def bytestring(size, decimal_places=3, force_unit=None):
'''
Convert a number into a binary-standard string.
Convert a number into string.
force_unit:
If None, an appropriate size unit is chosen automatically.
@ -34,15 +49,31 @@ def bytestring(size, force_unit=None):
if force_unit is None:
divisor = get_appropriate_divisor(size)
else:
if isinstance(force_unit, str):
force_unit = normalize_unit_string(force_unit)
force_unit = REVERSED_UNIT_STRINGS[force_unit]
divisor = force_unit
size_unit_string = UNIT_STRINGS[divisor]
size_string = '%.3f %s' % ((size / divisor), size_unit_string)
size_string = '{number:.0{decimal_places}f} {unit}'
size_string = size_string.format(
decimal_places=decimal_places,
number=size/divisor,
unit=size_unit_string,
)
return size_string
def get_appropriate_divisor(size):
'''
Return the divisor that would be appropriate for displaying this byte size.
For example:
1000 => 1 to display 1,000 b
1024 => 1024 to display 1 KiB
123456789 => 1048576 to display 117.738 MiB
'''
size = abs(size)
for unit in UNITS_SORTED:
for unit in UNIT_SIZES:
if size >= unit:
appropriate_unit = unit
break
@ -50,11 +81,24 @@ def get_appropriate_divisor(size):
appropriate_unit = 1
return appropriate_unit
def normalize_unit_string(string):
'''
Given a string "k" or "kb" or "kib" in any case, return "KiB", etc.
'''
string = string.lower()
for (size, unit_string) in UNIT_STRINGS.items():
unit_string_l = unit_string.lower()
if string in (unit_string_l, unit_string_l[0], unit_string_l.replace('i', '')):
return unit_string
raise ValueError('Unrecognized unit string "%s"' % string)
def parsebytes(string):
'''
Given a string like "100 kib", return the appropriate integer value.
Accepts "k", "kb", "kib" in any casing.
'''
string = string.lower().strip().replace(' ', '')
string = string.lower().strip()
string = string.replace(' ', '').replace(',', '')
matches = re.findall('((\\.|-|\\d)+)', string)
if len(matches) == 0:
@ -73,12 +117,21 @@ def parsebytes(string):
if string == '':
return int(byte_value)
reversed_units = {value.lower():key for (key, value) in UNIT_STRINGS.items()}
for (unit_string, multiplier) in reversed_units.items():
# accept kib, k, kb
if string in (unit_string, unit_string[0], unit_string.replace('i', '')):
break
else:
raise ValueError('Could not determine byte value of %s' % string)
unit_string = normalize_unit_string(string)
multiplier = REVERSED_UNIT_STRINGS[unit_string]
return int(byte_value * multiplier)
def main(args=None):
if args is None:
args = sys.argv[1:]
if len(args) != 1:
print('Usage: bytestring.py <number>')
return 1
n = int(sys.argv[1])
print(bytestring(n))
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))

View file

@ -0,0 +1,60 @@
import bytestring
import unittest
bytestring_pairs = {
100: '100.000 b',
2 ** 10: '1.000 KiB',
2 ** 20: '1.000 MiB',
2 ** 30: '1.000 GiB',
-(2 ** 30): '-1.000 GiB',
(2 ** 30) + (512 * (2 ** 20)): '1.500 GiB',
}
parsebytes_pairs = {
'100k': 102400,
'100 k': 102400,
'100 kb': 102400,
'100 kib': 102400,
'100.00KB': 102400,
'1.5 mb': 1572864,
'-1.5 mb': -1572864,
}
unit_string_cases = [
'B', 'b',
'KiB', 'kib', 'KB', 'K', 'k',
'MiB', 'mib', 'MB', 'M', 'm',
'GiB', 'gib', 'GB', 'G', 'g',
'TiB', 'tib', 'TB', 'T', 't',
'PiB', 'pib', 'PB', 'P', 'p',
'EiB', 'eib', 'EB', 'E', 'e',
'ZiB', 'zib', 'ZB', 'Z', 'z',
'YiB', 'yib', 'YB', 'Y', 'y',
]
class BytestringTest(unittest.TestCase):
def test_bytestring(self):
for (number, text) in bytestring_pairs.items():
self.assertEqual(bytestring.bytestring(number), text)
self.assertEqual(bytestring.bytestring(1024, force_unit=1), '1024.000 b')
self.assertEqual(bytestring.bytestring(1024, force_unit='b'), '1024.000 b')
def test_parsebytes(self):
for (number, text) in bytestring_pairs.items():
self.assertEqual(bytestring.parsebytes(text), number)
for (text, number) in parsebytes_pairs.items():
self.assertEqual(bytestring.parsebytes(text), number)
self.assertRaises(ValueError, bytestring.parsebytes, 'no numbers')
self.assertRaises(ValueError, bytestring.parsebytes, '100 and 300')
self.assertRaises(ValueError, bytestring.parsebytes, 'something300')
self.assertRaises(ValueError, bytestring.parsebytes, '100 wrongunit')
def test_normalize_unit_string(self):
for case in unit_string_cases:
normalized = bytestring.normalize_unit_string(case)
self.assertTrue(normalized in bytestring.REVERSED_UNIT_STRINGS)
self.assertRaises(ValueError, bytestring.normalize_unit_string, 'incorrect')
self.assertRaises(ValueError, bytestring.normalize_unit_string, 'x')
if __name__ == '__main__':
unittest.main()

17
Bytestring/setup.py Normal file
View file

@ -0,0 +1,17 @@
import os
from setuptools import setup
def read(filename):
return open(os.path.join(os.path.dirname(__file__), filename)).read()
setup(
author='Ethan Dalool (voussoir)',
name='bytestring',
version='0.0.1',
description='Convert integers into IEC binary strings and back',
py_modules=['bytestring', 'bytestring_test'],
entry_points='''
[console_scripts]
bytestring=bytestring:main
''',
)

View file

@ -17,7 +17,7 @@ HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
}
FILENAME_BADCHARS = '*?"<>|'
FILENAME_BADCHARS = '*?"<>|\r'
last_request = 0
CHUNKSIZE = 4 * bytestring.KIBIBYTE
@ -37,16 +37,22 @@ def download_file(
overwrite=False,
raise_for_undersized=True,
verbose=False,
**get_kwargs
):
headers = headers or {}
url = sanitize_url(url)
if localname in [None, '']:
localname = basename_from_url(url)
if os.path.isdir(localname):
localname = os.path.join(localname, basename_from_url(url))
localname = sanitize_filename(localname)
if localname != os.devnull:
localname = os.path.abspath(localname)
if verbose:
print(url)
safeprint(' URL:', url)
safeprint('File:', localname)
plan = prepare_plan(
url,
@ -81,7 +87,7 @@ def download_file(
else:
bytes_downloaded = 0
download_stream = request('get', url, stream=True, headers=headers, auth=auth)
download_stream = request('get', url, stream=True, headers=headers, auth=auth, **get_kwargs)
if callback_progress is not None:
callback_progress = callback_progress(plan['remote_total_bytes'])
@ -223,8 +229,7 @@ def prepare_plan(
return plan_fulldownload
print('No plan was chosen?')
return None
raise Exception('No plan was chosen?')
class Progress1:
@ -288,8 +293,6 @@ class Progress2:
)
print(message, end=end, flush=True)
progress1 = Progress1
progress2 = Progress2
def basename_from_url(url):
'''
@ -321,10 +324,14 @@ def request(method, url, stream=False, headers=None, timeout=TIMEOUT, **kwargs):
'head': session.head,
'post': session.post,
}[method]
req = method(url, stream=stream, headers=headers, timeout=None, **kwargs)
req = method(url, stream=stream, headers=headers, timeout=timeout, **kwargs)
req.raise_for_status()
return req
def safeprint(*texts, **kwargs):
texts = [str(text).encode('ascii', 'replace').decode() for text in texts]
print(*texts, **kwargs)
def sanitize_filename(text, exclusions=''):
bet = FILENAME_BADCHARS.replace(exclusions, '')
for char in bet:
@ -375,7 +382,7 @@ if __name__ == '__main__':
parser.add_argument('url')
parser.add_argument('localname', nargs='?', default=None)
parser.add_argument('-c', '--callback', dest='callback', default=progress1)
parser.add_argument('-c', '--callback', dest='callback', default=Progress1)
parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
parser.add_argument('-ow', '--overwrite', dest='overwrite', action='store_true')
parser.add_argument('-r', '--range', dest='range', default=None)

View file

@ -304,7 +304,7 @@ function create_workspace()
var dumper = create_command_button("dump urls", dump_urls);
var ingest_box = document.createElement("textarea");
var ingest_button = create_command_button("ingest", ingest);
var start_button = create_command_button("load all", function(){start(); this.parentElement.removeChild(this);});
var start_button = create_command_button("load all", function(){start();});
start_button.style.display = "block";

90
OpenDirDL/README.html Normal file
View file

@ -0,0 +1,90 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Style-Type" content="text/css" />
<meta name="generator" content="pandoc" />
<title></title>
<style type="text/css">code{white-space: pre;}</style>
</head>
<body>
<h1 id="open-dir-dl">Open Dir DL</h1>
<p>The open directory downloader.</p>
<p>See inside opendirdl.py for usage instructions.</p>
<ul>
<li><strong>[addition]</strong> A new feature was added.</li>
<li><strong>[bugfix]</strong> Incorrect behavior was fixed.</li>
<li><strong>[change]</strong> An existing feature was slightly modified or parameters were renamed.</li>
<li><strong>[cleanup]</strong> Code was improved, comments were added, or other changes with minor impact on the interface.</li>
<li><strong>[removal]</strong> An old feature was removed.</li>
</ul>
<p> </p>
<ul>
<li>2016 10 03
<ul>
<li><strong>[bugfix]</strong> Fix KeyError caused by the 'root' -&gt; 'domain' rename.</li>
</ul></li>
<li>2016 10 01
<ul>
<li><strong>[bugfix]</strong> Fixed the download function so it actually passes <code>headers</code> into downloady.</li>
<li><strong>[change]</strong> <code>url_split</code> key 'root' has been renamed to 'domain'.</li>
<li><strong>[change]</strong> Improved some variable names, including <code>walkurl -&gt; root_url</code>.</li>
<li><strong>[cleanup]</strong> Removed import for Ratelimiter since downloady handles all of that now.</li>
</ul></li>
<li>2016 08 16
<ul>
<li><strong>[cleanup]</strong> Now that Downloady uses temp files for incomplete downloads, that logic can be removed from opendirdl.</li>
</ul></li>
<li>2016 08 10
<ul>
<li><strong>[addition]</strong> Added clickable links to each directory on HTML tree pages.</li>
<li><strong>[bugfix]</strong> Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.</li>
<li><strong>[bugfix]</strong> Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.</li>
</ul></li>
<li>2016 08 02
<ul>
<li><strong>[cleanup]</strong> Removed the need for div IDs on the Tree pages by making the collapse button use <code>this.nextSibling</code>.</li>
<li><strong>[cleanup]</strong> Rewrote <code>build_file_tree</code> with a way simpler algorithm.</li>
<li><strong>[removal]</strong> Removed the ability to set a Node's parent during <code>__init__</code> because it wasn't fully fleshed out and doesn't need to be used anyway.</li>
</ul></li>
<li>2016 08 01
<ul>
<li><strong>[addition]</strong> Made the digest work even if you forget the <a href="http://" class="uri">http://</a></li>
</ul></li>
<li>2016 07 29
<ul>
<li><strong>[change]</strong> Moved some nested function definitions out to the top level, and made the construction of the file tree its own function. These functions really don't need to be used on their own, but they were cluttering the logic of the <code>tree</code> command.</li>
<li><strong>[change]</strong> Renamed <code>Tree.listnodes</code> to <code>Tree.list_children</code>, and the <code>customsort</code> now expects to operate on Node objects rather than <code>(identifier, Node)</code> tuples. Nodes already have their identifier so the tuple was unecessary.</li>
<li><strong>[change]</strong> Replaced local <code>download_file</code> function with a call to <code>downloady.download_file</code>. It supports download continuation and removes duplicate work.</li>
<li><strong>[cleanup]</strong> Replaced all <code>safeprint</code> calls with <code>write</code> because it provides access to safeprint as well as file writing if needed.</li>
<li><strong>[removal]</strong> Removed <code>Tree.sorted_children</code> since it was basically a duplicate of <code>Tree.listnodes</code> and I don't know why I had both.</li>
</ul></li>
<li>2016 07 25
<ul>
<li><strong>[change]</strong> Bytespersecond is now parsed by <code>bytestring.parsebytes</code> rather than <code>eval</code>, so you can write &quot;100k&quot; as opposed to &quot;100 * 1024&quot; etc.</li>
<li><strong>[removal]</strong> Removed the <code>Downloader</code> class after watching <a href="https://youtu.be/o9pEzgHorH0">this Jack Diederich talk</a> about unecessary classes.</li>
</ul></li>
<li>2016 07 19
<ul>
<li><strong>[addition]</strong> Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.</li>
<li><strong>[bugfix]</strong> Fixed the allowed characters parameter of <code>filepath_sanitize</code>, which was not written correctly but worked out of luck.</li>
<li><strong>[cleanup]</strong> Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker &gt; Downloader &gt; other classes</li>
<li><strong>[cleanup]</strong> Renamed the <code>keep_pattern</code> and <code>remove_pattern</code> functions to <code>keep_pattern_argparse</code> etc to be consistent with the other functions used by the argparser. <em>Does not affect the commandline usage!</em></li>
<li><strong>[cleanup]</strong> Fixed some mismatched code vs comments</li>
</ul></li>
<li>2016 07 08
<ul>
<li><strong>[bugfix]</strong> Fixed bug in which trees wouldn't generate on server:port urls.</li>
</ul></li>
<li>2016 07 04
<ul>
<li><strong>[addition]</strong> Added new argparse command &quot;tree&quot;</li>
</ul></li>
<li>2016 02 08
<ul>
<li><strong>[bugfix]</strong> Fixed bug where server:port urls did not create db files because of the colon. It's been replaced by a hash.</li>
<li><strong>[change]</strong> Moved db commits to only happen at the end of a digest.</li>
</ul></li>
</ul>
</body>
</html>

View file

@ -3,18 +3,24 @@ Open Dir DL
The open directory downloader.
Requires `pip install beautifulsoup4`.
See inside opendirdl.py for usage instructions.
- **[addition]** A new feature was added.
- **[bugfix]** Incorrect behavior was fixed.
- **[change]** An existing feature was slightly modified or parameters were renamed.
- **[cleanup]** Code was improved, comments were added, or other changes with minor impact on the interface.
- **[removal]** An old feature was removed.
&nbsp;
- 2016 10 03
- **[bugfix]** Fix KeyError caused by the 'root' -> 'domain' rename.
- 2016 10 01
- **[bugfix]** Fixed the download function so it actually passes `headers` into downloady.
- **[change]** `url_split` key 'root' has been renamed to 'domain'.
- **[change]** Improved some variable names, including `walkurl -> root_url`.
- **[cleanup]** Removed import for Ratelimiter since downloady handles all of that now.
- **[cleanup]** Improved some variable names, including `walkurl -> root_url`.
- 2016 08 16
- **[cleanup]** Now that Downloady uses temp files for incomplete downloads, that logic can be removed from opendirdl.
@ -22,7 +28,7 @@ See inside opendirdl.py for usage instructions.
- 2016 08 10
- **[addition]** Added clickable links to each directory on HTML tree pages.
- **[bugfix]** Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
- **[bugfix]** Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
- **[bugfix]** Fixed bug in smart_insert where 404'd URLs were not being dele`ted from the database.
- 2016 08 02
- **[cleanup]** Removed the need for div IDs on the Tree pages by making the collapse button use `this.nextSibling`.

View file

@ -714,10 +714,9 @@ def safeindex(sequence, index, fallback=None):
except IndexError:
return fallback
def safeprint(text, **kwargs):
text = str(text)
text = text.encode('ascii', 'replace').decode()
print(text, **kwargs)
def safeprint(*texts, **kwargs):
texts = [str(text).encode('ascii', 'replace').decode() for text in texts]
print(*texts, **kwargs)
def smart_insert(sql, cur, url=None, head=None, commit=True):
'''
@ -887,7 +886,7 @@ def download(
url,
localname=fullname,
bytespersecond=bytespersecond,
callback_progress=downloady.progress2,
callback_progress=downloady.Progress2,
headers=headers,
overwrite=overwrite,
)

Binary file not shown.

41
SingleDist/singledist.py Normal file
View file

@ -0,0 +1,41 @@
import glob
import os
import shutil
import sys
filename = sys.argv[1]
package_name = filename.split('.py')[0]
print('Creating setup.py')
setup_content = '''
import setuptools
setuptools.setup(
author='voussoir',
name='{package_name}',
version='0.0.1',
description='',
py_modules=['{package_name}'],
)
'''
setup_content = setup_content.format(package_name=package_name)
setup_file = open('setup.py', 'w')
setup_file.write(setup_content)
setup_file.close()
print('Executing setup.py')
os.system('python setup.py sdist')
print('Moving zip file')
zips = glob.glob('dist\\*.zip')
for zip_filename in zips:
new_zip = os.path.basename(zip_filename)
new_zip = os.path.abspath(new_zip)
shutil.move(zip_filename, new_zip)
print('Deleting temp')
shutil.rmtree('dist')
shutil.rmtree(glob.glob('*.egg-info')[0])
os.remove('setup.py')

View file

@ -19,7 +19,7 @@ def download_thread(url, filename):
print('Skipping existing file "%s"' % filename)
return
print(' Starting "%s"' % filename)
downloady.download_file(url, filename)
downloady.download_file(url, filename, timeout=15)
print('+Finished "%s"' % filename)
def listget(li, index, fallback):
@ -41,7 +41,13 @@ def threaded_dl(urls, thread_count, filename_format=None):
time.sleep(0.1)
basename = downloady.basename_from_url(url)
filename = filename_format.format(now=now, index=index, basename=basename)
extension = os.path.splitext(basename)[1]
filename = filename_format.format(
basename=basename,
extension=extension,
index=index,
now=now,
)
t = threading.Thread(target=download_thread, args=[url, filename])
t.daemon = True
threads.append(t)
@ -60,7 +66,7 @@ def main():
urls = f.read()
else:
urls = clipext.resolve(filename)
urls = urls.split('\n')
urls = urls.replace('\r', '').split('\n')
thread_count = int(listget(sys.argv, 2, 4))
filename_format = listget(sys.argv, 3, None)
threaded_dl(urls, thread_count=thread_count, filename_format=filename_format)