Update downloady with better use of pathclass, vlogging.

master
voussoir 2021-09-23 22:32:48 -07:00
parent ac8e83a11e
commit 6c0233c239
No known key found for this signature in database
GPG Key ID: 5F7554F8C26DACCB
1 changed files with 65 additions and 72 deletions

View File

@ -3,7 +3,6 @@ import os
import requests import requests
import sys import sys
import urllib import urllib
import warnings
from voussoirkit import bytestring from voussoirkit import bytestring
from voussoirkit import dotdict from voussoirkit import dotdict
@ -14,8 +13,6 @@ from voussoirkit import vlogging
log = vlogging.getLogger(__name__, 'downloady') log = vlogging.getLogger(__name__, 'downloady')
warnings.simplefilter('ignore')
USERAGENT = ''' USERAGENT = '''
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/42.0.2311.152 Safari/537.36' Chrome/42.0.2311.152 Safari/537.36'
@ -46,6 +43,13 @@ class NotEnoughBytes(DownloadyException):
class ServerNoRange(DownloadyException): class ServerNoRange(DownloadyException):
pass pass
class SpecialPath:
def __init__(self, path):
self.absolute_path = path
def open(self, *args, **kwargs):
return open(self.absolute_path, *args, **kwargs)
def download_file( def download_file(
url, url,
localname=None, localname=None,
@ -60,23 +64,6 @@ def download_file(
verbose=False, verbose=False,
verify_ssl=True, verify_ssl=True,
): ):
headers = headers or {}
url = sanitize_url(url)
if localname in [None, '']:
localname = basename_from_url(url)
if not is_special_file(localname):
localname = pathclass.Path(localname)
if localname.is_dir:
localname = localname.with_child(basename_from_url(url))
localname = localname.absolute_path
localname = sanitize_filename(localname)
log.debug('URL: %s', url)
log.debug('File: %s', localname)
plan = prepare_plan( plan = prepare_plan(
url, url,
localname, localname,
@ -97,23 +84,17 @@ def download_file(
return download_plan(plan) return download_plan(plan)
def download_plan(plan): def download_plan(plan):
temp_localname = plan.download_into if not isinstance(plan.download_into, SpecialPath):
real_localname = plan.real_localname plan.download_into.parent.makedirs(exist_ok=True)
directory = os.path.split(temp_localname)[0] plan.download_into.touch()
if directory != '' and not is_special_file(temp_localname):
os.makedirs(directory, exist_ok=True)
if not is_special_file(temp_localname):
touch(temp_localname)
if plan.plan_type in ['resume', 'partial']: if plan.plan_type in ['resume', 'partial']:
file_handle = open(temp_localname, 'r+b') file_handle = plan.download_into.open('r+b')
file_handle.seek(plan.seek_to) file_handle.seek(plan.seek_to)
bytes_downloaded = plan.seek_to bytes_downloaded = plan.seek_to
elif plan.plan_type == 'fulldownload': elif plan.plan_type == 'fulldownload':
file_handle = open(temp_localname, 'wb') file_handle = plan.download_into.open('wb')
bytes_downloaded = 0 bytes_downloaded = 0
if plan.header_range_min is not None: if plan.header_range_min is not None:
@ -122,6 +103,8 @@ def download_plan(plan):
max=plan.header_range_max, max=plan.header_range_max,
) )
log.info('Downloading %s into "%s"', plan.url, plan.real_localname.absolute_path)
download_stream = request( download_stream = request(
'get', 'get',
plan.url, plan.url,
@ -152,18 +135,20 @@ def download_plan(plan):
file_handle.close() file_handle.close()
# Don't try to rename /dev/null or other special names # Don't try to rename /dev/null or other special names
if not is_special_file(temp_localname) and not is_special_file(real_localname): if isinstance(plan.real_localname, SpecialPath):
localsize = os.path.getsize(temp_localname) return plan.real_localname
undersized = plan.plan_type != 'partial' and localsize < plan.remote_total_bytes
if plan.raise_for_undersized and undersized:
message = 'File does not contain expected number of bytes. Received {size} / {total}'
message = message.format(size=localsize, total=plan.remote_total_bytes)
raise NotEnoughBytes(message)
if temp_localname != real_localname: temp_localsize = plan.download_into.size
os.rename(temp_localname, real_localname) undersized = plan.plan_type != 'partial' and temp_localsize < plan.remote_total_bytes
if undersized and plan.raise_for_undersized:
message = 'File does not contain expected number of bytes. Received {size} / {total}'
message = message.format(size=temp_localsize, total=plan.remote_total_bytes)
raise NotEnoughBytes(message)
return real_localname if plan.download_into != plan.real_localname:
os.rename(plan.download_into.absolute_path, plan.real_localname.absolute_path)
return plan.real_localname
def prepare_plan( def prepare_plan(
url, url,
@ -181,20 +166,34 @@ def prepare_plan(
# Chapter 1: File existence # Chapter 1: File existence
headers = headers or {} headers = headers or {}
user_provided_range = 'range' in headers user_provided_range = 'range' in headers
real_localname = localname
url = sanitize_url(url)
if localname in [None, '']:
localname = basename_from_url(url)
if is_special_file(localname): if is_special_file(localname):
temp_localname = localname real_localname = SpecialPath(localname)
temp_localname = SpecialPath(localname)
real_exists = False
temp_exists = False
else: else:
temp_localname = localname + TEMP_EXTENSION localname = pathclass.Path(localname)
real_exists = os.path.exists(real_localname) if localname.is_dir:
localname = localname.with_child(basename_from_url(url))
localname = sanitize_filename(localname.absolute_path)
real_localname = pathclass.Path(localname)
temp_localname = real_localname.add_extension(TEMP_EXTENSION)
real_exists = real_localname.exists
temp_exists = temp_localname.exists
if real_exists and overwrite is False and not user_provided_range: if real_exists and overwrite is False and not user_provided_range:
log.debug('File exists and overwrite is off. Nothing to do.') log.debug('File exists and overwrite is off. Nothing to do.')
return None return None
temp_exists = os.path.exists(temp_localname) if isinstance(real_localname, SpecialPath):
real_localsize = int(real_exists and os.path.getsize(real_localname)) temp_localsize = 0
temp_localsize = int(temp_exists and os.path.getsize(temp_localname)) else:
temp_localsize = int(temp_exists and temp_localname.size)
# Chapter 2: Ratelimiting # Chapter 2: Ratelimiting
if bytespersecond is None: if bytespersecond is None:
@ -220,7 +219,7 @@ def prepare_plan(
if user_provided_range and not do_head: if user_provided_range and not do_head:
raise DownloadyException('Cannot determine range support without the head request') raise DownloadyException('Cannot determine range support without the head request')
temp_headers = headers temp_headers = headers.copy()
temp_headers.update({'range': 'bytes=0-'}) temp_headers.update({'range': 'bytes=0-'})
if do_head: if do_head:
@ -304,7 +303,6 @@ def prepare_plan(
raise DownloadyException('No plan was chosen?') raise DownloadyException('No plan was chosen?')
class Progress1: class Progress1:
def __init__(self, total_bytes): def __init__(self, total_bytes):
self.limiter = ratelimiter.Ratelimiter(allowance=8, mode='reject') self.limiter = ratelimiter.Ratelimiter(allowance=8, mode='reject')
@ -318,8 +316,8 @@ class Progress1:
def step(self, bytes_downloaded): def step(self, bytes_downloaded):
percent = bytes_downloaded / self.total_bytes percent = bytes_downloaded / self.total_bytes
percent = min(1, percent) percent = min(1.00, percent)
if self.limiter.limit(1) is False and percent < 1: if self.limiter.limit(1) is False and percent < 1.00:
return return
downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=self.divisor) downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=self.divisor)
@ -336,8 +334,7 @@ class Progress1:
total_bytes=self.total_format, total_bytes=self.total_format,
statusbar=statusbar, statusbar=statusbar,
) )
print(message, end=end, flush=True) pipeable.stderr(message, end=end)
class Progress2: class Progress2:
def __init__(self, total_bytes): def __init__(self, total_bytes):
@ -348,12 +345,13 @@ class Progress2:
self.bytes_downloaded_string = '{:%d,}' % len(self.total_bytes_string) self.bytes_downloaded_string = '{:%d,}' % len(self.total_bytes_string)
def step(self, bytes_downloaded): def step(self, bytes_downloaded):
percent = (bytes_downloaded * 100) / self.total_bytes percent = bytes_downloaded / self.total_bytes
percent = min(100, percent) percent = min(1.00, percent)
if self.limiter.limit(1) is False and percent < 100: if self.limiter.limit(1) is False and percent < 1.00:
return return
percent_string = '%08.4f' % percent percent *= 100
percent_string = f'{percent:08.4f}'
bytes_downloaded_string = self.bytes_downloaded_string.format(bytes_downloaded) bytes_downloaded_string = self.bytes_downloaded_string.format(bytes_downloaded)
end = '\n' if percent == 100 else '' end = '\n' if percent == 100 else ''
@ -363,8 +361,7 @@ class Progress2:
total_bytes=self.total_bytes_string, total_bytes=self.total_bytes_string,
percent=percent_string, percent=percent_string,
) )
print(message, end=end, flush=True) pipeable.stderr(message, end=end)
def basename_from_url(url): def basename_from_url(url):
''' '''
@ -373,13 +370,9 @@ def basename_from_url(url):
localname = urllib.parse.unquote(url) localname = urllib.parse.unquote(url)
localname = localname.rstrip('/') localname = localname.rstrip('/')
localname = localname.split('?')[0] localname = localname.split('?')[0]
localname = localname.split('/')[-1] localname = localname.rsplit('/', 1)[-1]
return localname return localname
def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
permission = input(prompt)
return permission.lower() in affirmative
def is_special_file(file): def is_special_file(file):
if isinstance(file, pathclass.Path): if isinstance(file, pathclass.Path):
return False return False
@ -388,11 +381,15 @@ def is_special_file(file):
file = os.path.normcase(file) file = os.path.normcase(file)
return file in SPECIAL_FILENAMES return file in SPECIAL_FILENAMES
def request(method, url, stream=False, headers=None, timeout=TIMEOUT, verify_ssl=True, **kwargs): def request(method, url, headers=None, timeout=TIMEOUT, verify_ssl=True, **kwargs):
if headers is None: if headers is None:
headers = {} headers = {}
else:
headers = headers.copy()
for (key, value) in HEADERS.items(): for (key, value) in HEADERS.items():
headers.setdefault(key, value) headers.setdefault(key, value)
session = requests.Session() session = requests.Session()
a = requests.adapters.HTTPAdapter(max_retries=30) a = requests.adapters.HTTPAdapter(max_retries=30)
b = requests.adapters.HTTPAdapter(max_retries=30) b = requests.adapters.HTTPAdapter(max_retries=30)
@ -405,7 +402,8 @@ def request(method, url, stream=False, headers=None, timeout=TIMEOUT, verify_ssl
'head': session.head, 'head': session.head,
'post': session.post, 'post': session.post,
}[method] }[method]
req = method(url, stream=stream, headers=headers, timeout=timeout, verify=verify_ssl, **kwargs)
req = method(url, headers=headers, timeout=timeout, verify=verify_ssl, **kwargs)
req.raise_for_status() req.raise_for_status()
return req return req
@ -427,11 +425,6 @@ def sanitize_url(url):
url = url.replace('%3A//', '://') url = url.replace('%3A//', '://')
return url return url
def touch(filename):
f = open(filename, 'ab')
f.close()
return
def download_argparse(args): def download_argparse(args):
url = args.url url = args.url