else/Downloady/downloady.py

340 lines
10 KiB
Python
Raw Normal View History

2016-07-28 03:41:13 +00:00
import sys
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
sys.path.append('C:\\git\\else\\bytestring'); import bytestring
import argparse
import os
import pyperclip # pip install pyperclip
import requests
import time
import urllib
import warnings
warnings.simplefilter('ignore')
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
}
SLEEPINESS = 3
FILENAME_BADCHARS = '*?"<>|'
last_request = 0
CHUNKSIZE = 16 * bytestring.KIBIBYTE
STOP = False
TIMEOUT = 600
2016-07-29 20:39:04 +00:00
def basename_from_url(url):
'''
Determine the local filename appropriate for a URL.
'''
localname = urllib.parse.unquote(url)
localname = localname.split('?')[0]
localname = localname.split('/')[-1]
return localname
2016-07-28 03:41:13 +00:00
2016-07-29 20:39:04 +00:00
def determine_seek_and_range(
file_handle,
localname,
local_exists,
overwrite,
remote_total_bytes,
server_respects_range,
user_provided_range,
user_range_min,
user_range_max,
):
''' THINGS THAT CAN HAPPEN '''
2016-07-28 03:41:13 +00:00
seek_to = 0
header_range_min = None
header_range_max = None
if local_exists:
local_existing_bytes = os.path.getsize(localname)
if overwrite is True:
file_handle.truncate()
if user_provided_range:
header_range_min = user_range_min
header_range_max = user_range_max
seek_to = user_range_min
elif not user_provided_range:
pass
elif overwrite is None:
if local_existing_bytes == remote_total_bytes:
print('File is 100%. Nothing to do.')
return
if user_provided_range:
if server_respects_range:
seek_to = user_range_min
else:
raise Exception('The server did not respect your range header')
elif not user_provided_range:
if server_respects_range:
2016-07-29 20:39:04 +00:00
print('Resuming from byte %d' % local_existing_bytes)
2016-07-28 03:41:13 +00:00
header_range_min = local_existing_bytes
header_range_max = ''
seek_to = local_existing_bytes
else:
print('File exists, but server doesn\'t allow resumes. Restart from 0?')
permission = get_permission()
if permission:
file_handle.truncate()
else:
raise Exception('Couldn\'t resume')
else:
raise TypeError('Invalid value for `overwrite`. Must be True, False, or None')
elif not local_exists:
if user_provided_range:
if server_respects_range:
file_handle.seek(user_range_min)
file_handle.write(b'\0')
header_range_min = user_range_min
header_range_max = user_range_max
seek_to = user_range_min
else:
raise Exception('The server did not respect your range header')
elif not user_provided_range:
pass
2016-07-29 20:39:04 +00:00
return (seek_to, header_range_min, header_range_max)
2016-07-28 03:41:13 +00:00
2016-07-29 20:39:04 +00:00
def download_file(
url,
localname=None,
auth=None,
bytespersecond=None,
callback_progress=None,
headers=None,
overwrite=None
):
if headers is None:
headers = {}
''' Determine local filename '''
url = url.replace('%3A//', '://')
if localname in [None, '']:
localname = basename_from_url(url)
localname = filepath_sanitize(localname)
directory = os.path.split(localname)[0]
if directory != '':
os.makedirs(directory, exist_ok=True)
if bytespersecond is None:
limiter = None
else:
limiter = ratelimiter.Ratelimiter(bytespersecond, period=1)
''' Prepare plan variables '''
local_exists = os.path.exists(localname)
if local_exists and overwrite is False:
print('Overwrite off. Nothing to do.')
return
user_provided_range = 'range' in headers
if user_provided_range:
user_range_min = int(headers['range'].split('bytes=')[1].split('-')[0])
user_range_max = headers['range'].split('-')[1]
if user_range_max != '':
user_range_max = int(user_range_max)
else:
user_range_min = None
user_range_max = None
2016-08-09 08:33:36 +00:00
# Always include a range on the first request to figure out whether the
# server supports it. Use 0- so we get the right `remote_total_bytes`.
temp_headers = headers
temp_headers.update({'range': 'bytes=0-'})
2016-07-29 20:39:04 +00:00
# I'm using a GET instead of an actual HEAD here because some servers respond
# differently, even though they're not supposed to.
2016-08-09 08:33:36 +00:00
head = request('get', url, stream=True, headers=temp_headers, auth=auth)
2016-07-29 20:39:04 +00:00
remote_total_bytes = int(head.headers.get('content-length', 1))
server_respects_range = (head.status_code == 206 and 'content-range' in head.headers)
head.connection.close()
touch(localname)
file_handle = open(localname, 'r+b')
file_handle.seek(0)
plan = determine_seek_and_range(
file_handle=file_handle,
localname=localname,
local_exists=local_exists,
overwrite=overwrite,
remote_total_bytes=remote_total_bytes,
server_respects_range=server_respects_range,
user_provided_range=user_provided_range,
user_range_min=user_range_min,
user_range_max=user_range_max,
)
if plan is None:
return
(seek_to, header_range_min, header_range_max) = plan
2016-07-28 03:41:13 +00:00
if header_range_min is not None:
headers['range'] = 'bytes={0}-{1}'.format(header_range_min, header_range_max)
bytes_downloaded = seek_to
file_handle.seek(seek_to)
download_stream = request('get', url, stream=True, headers=headers, auth=auth)
''' Begin download '''
for chunk in download_stream.iter_content(chunk_size=CHUNKSIZE):
bytes_downloaded += len(chunk)
file_handle.write(chunk)
if callback_progress is not None:
callback_progress(bytes_downloaded, remote_total_bytes)
if limiter is not None and bytes_downloaded < remote_total_bytes:
limiter.limit(len(chunk))
file_handle.close()
return localname
def filepath_sanitize(text, exclusions=''):
bet = FILENAME_BADCHARS.replace(exclusions, '')
for char in bet:
text = text.replace(char, '')
return text
def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
permission = input(prompt)
return permission.lower() in affirmative
def is_clipboard(s):
return s.lower() in ['!c', '!clip', '!clipboard']
def progress(bytes_downloaded, bytes_total, prefix=''):
divisor = bytestring.get_appropriate_divisor(bytes_total)
bytes_total_string = bytestring.bytestring(bytes_total, force_unit=divisor)
bytes_downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=divisor)
bytes_downloaded_string = bytes_downloaded_string.rjust(len(bytes_total_string), ' ')
blocks = 50
char = ''
percent = bytes_downloaded * 100 / bytes_total
percent = int(min(100, percent))
completed_blocks = char * int(blocks * percent / 100)
incompleted_blocks = ' ' * (blocks - len(completed_blocks))
statusbar = '{char}{complete}{incomplete}{char}'.format(
char=char,
complete=completed_blocks,
incomplete=incompleted_blocks,
)
end = '\n' if percent == 100 else ''
message = '\r{prefix}{bytes_downloaded} {statusbar} {bytes_total}'
message = message.format(
prefix=prefix,
bytes_downloaded=bytes_downloaded_string,
bytes_total=bytes_total_string,
statusbar=statusbar,
)
print(message, end=end, flush=True)
def progress2(bytes_downloaded, bytes_total, prefix=''):
percent = (bytes_downloaded*100)/bytes_total
percent = min(100, percent)
2016-07-29 20:39:04 +00:00
percent_string = '%08.4f' % percent
2016-07-28 03:41:13 +00:00
bytes_downloaded_string = '{0:,}'.format(bytes_downloaded)
bytes_total_string = '{0:,}'.format(bytes_total)
bytes_downloaded_string = bytes_downloaded_string.rjust(len(bytes_total_string), ' ')
end = '\n' if percent == 100 else ''
message = '\r{prefix}{bytes_downloaded} / {bytes_total} / {percent}%'
message = message.format(
prefix=prefix,
bytes_downloaded=bytes_downloaded_string,
bytes_total=bytes_total_string,
2016-07-29 20:39:04 +00:00
percent=percent_string,
2016-07-28 03:41:13 +00:00
)
print(message, end=end, flush=True)
def request(method, url, stream=False, headers=None, timeout=TIMEOUT, **kwargs):
if headers is None:
headers = {}
for (key, value) in HEADERS.items():
headers.setdefault(key, value)
session = requests.Session()
session.max_redirects = 40
method = {
'get': session.get,
'head': session.head,
'post': session.post,
}[method]
req = method(url, stream=stream, headers=headers, timeout=timeout, **kwargs)
req.raise_for_status()
return req
def touch(filename):
f = open(filename, 'ab')
f.close()
return
def download_argparse(args):
url = args.url
if is_clipboard(url):
url = pyperclip.paste()
print(url)
overwrite = {
'y':True, 't':True,
'n':False, 'f':False,
}.get(args.overwrite.lower(), None)
callback = {
None: progress,
'1': progress,
'2': progress2,
}.get(args.callback, None)
callback = args.callback
if callback == '1':
callback = progress
if callback == '2':
callback = progress2
bytespersecond = args.bytespersecond
if bytespersecond is not None:
bytespersecond = bytestring.parsebytes(bytespersecond)
headers = {}
if args.range is not None:
headers['range'] = 'bytes=%s' % args.range
download_file(
url=url,
localname=args.localname,
bytespersecond=bytespersecond,
callback_progress=callback,
headers=headers,
overwrite=overwrite,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
#p_download_file = subparsers.add_parser('download_file')
parser.add_argument('url')
parser.add_argument('localname', nargs='?', default=None)
parser.add_argument('-c', '--callback', dest='callback', default=progress)
parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
parser.add_argument('-ow', '--overwrite', dest='overwrite', default='')
parser.add_argument('-r', '--range', dest='range', default=None)
parser.set_defaults(func=download_argparse)
args = parser.parse_args()
args.func(args)