Update downloady with better use of pathclass, vlogging.
This commit is contained in:
parent
ac8e83a11e
commit
6c0233c239
1 changed files with 65 additions and 72 deletions
|
@ -3,7 +3,6 @@ import os
|
||||||
import requests
|
import requests
|
||||||
import sys
|
import sys
|
||||||
import urllib
|
import urllib
|
||||||
import warnings
|
|
||||||
|
|
||||||
from voussoirkit import bytestring
|
from voussoirkit import bytestring
|
||||||
from voussoirkit import dotdict
|
from voussoirkit import dotdict
|
||||||
|
@ -14,8 +13,6 @@ from voussoirkit import vlogging
|
||||||
|
|
||||||
log = vlogging.getLogger(__name__, 'downloady')
|
log = vlogging.getLogger(__name__, 'downloady')
|
||||||
|
|
||||||
warnings.simplefilter('ignore')
|
|
||||||
|
|
||||||
USERAGENT = '''
|
USERAGENT = '''
|
||||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)
|
||||||
Chrome/42.0.2311.152 Safari/537.36'
|
Chrome/42.0.2311.152 Safari/537.36'
|
||||||
|
@ -46,6 +43,13 @@ class NotEnoughBytes(DownloadyException):
|
||||||
class ServerNoRange(DownloadyException):
|
class ServerNoRange(DownloadyException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class SpecialPath:
|
||||||
|
def __init__(self, path):
|
||||||
|
self.absolute_path = path
|
||||||
|
|
||||||
|
def open(self, *args, **kwargs):
|
||||||
|
return open(self.absolute_path, *args, **kwargs)
|
||||||
|
|
||||||
def download_file(
|
def download_file(
|
||||||
url,
|
url,
|
||||||
localname=None,
|
localname=None,
|
||||||
|
@ -60,23 +64,6 @@ def download_file(
|
||||||
verbose=False,
|
verbose=False,
|
||||||
verify_ssl=True,
|
verify_ssl=True,
|
||||||
):
|
):
|
||||||
headers = headers or {}
|
|
||||||
|
|
||||||
url = sanitize_url(url)
|
|
||||||
if localname in [None, '']:
|
|
||||||
localname = basename_from_url(url)
|
|
||||||
|
|
||||||
if not is_special_file(localname):
|
|
||||||
localname = pathclass.Path(localname)
|
|
||||||
if localname.is_dir:
|
|
||||||
localname = localname.with_child(basename_from_url(url))
|
|
||||||
|
|
||||||
localname = localname.absolute_path
|
|
||||||
localname = sanitize_filename(localname)
|
|
||||||
|
|
||||||
log.debug('URL: %s', url)
|
|
||||||
log.debug('File: %s', localname)
|
|
||||||
|
|
||||||
plan = prepare_plan(
|
plan = prepare_plan(
|
||||||
url,
|
url,
|
||||||
localname,
|
localname,
|
||||||
|
@ -97,23 +84,17 @@ def download_file(
|
||||||
return download_plan(plan)
|
return download_plan(plan)
|
||||||
|
|
||||||
def download_plan(plan):
|
def download_plan(plan):
|
||||||
temp_localname = plan.download_into
|
if not isinstance(plan.download_into, SpecialPath):
|
||||||
real_localname = plan.real_localname
|
plan.download_into.parent.makedirs(exist_ok=True)
|
||||||
directory = os.path.split(temp_localname)[0]
|
plan.download_into.touch()
|
||||||
|
|
||||||
if directory != '' and not is_special_file(temp_localname):
|
|
||||||
os.makedirs(directory, exist_ok=True)
|
|
||||||
|
|
||||||
if not is_special_file(temp_localname):
|
|
||||||
touch(temp_localname)
|
|
||||||
|
|
||||||
if plan.plan_type in ['resume', 'partial']:
|
if plan.plan_type in ['resume', 'partial']:
|
||||||
file_handle = open(temp_localname, 'r+b')
|
file_handle = plan.download_into.open('r+b')
|
||||||
file_handle.seek(plan.seek_to)
|
file_handle.seek(plan.seek_to)
|
||||||
bytes_downloaded = plan.seek_to
|
bytes_downloaded = plan.seek_to
|
||||||
|
|
||||||
elif plan.plan_type == 'fulldownload':
|
elif plan.plan_type == 'fulldownload':
|
||||||
file_handle = open(temp_localname, 'wb')
|
file_handle = plan.download_into.open('wb')
|
||||||
bytes_downloaded = 0
|
bytes_downloaded = 0
|
||||||
|
|
||||||
if plan.header_range_min is not None:
|
if plan.header_range_min is not None:
|
||||||
|
@ -122,6 +103,8 @@ def download_plan(plan):
|
||||||
max=plan.header_range_max,
|
max=plan.header_range_max,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
log.info('Downloading %s into "%s"', plan.url, plan.real_localname.absolute_path)
|
||||||
|
|
||||||
download_stream = request(
|
download_stream = request(
|
||||||
'get',
|
'get',
|
||||||
plan.url,
|
plan.url,
|
||||||
|
@ -152,18 +135,20 @@ def download_plan(plan):
|
||||||
file_handle.close()
|
file_handle.close()
|
||||||
|
|
||||||
# Don't try to rename /dev/null or other special names
|
# Don't try to rename /dev/null or other special names
|
||||||
if not is_special_file(temp_localname) and not is_special_file(real_localname):
|
if isinstance(plan.real_localname, SpecialPath):
|
||||||
localsize = os.path.getsize(temp_localname)
|
return plan.real_localname
|
||||||
undersized = plan.plan_type != 'partial' and localsize < plan.remote_total_bytes
|
|
||||||
if plan.raise_for_undersized and undersized:
|
temp_localsize = plan.download_into.size
|
||||||
|
undersized = plan.plan_type != 'partial' and temp_localsize < plan.remote_total_bytes
|
||||||
|
if undersized and plan.raise_for_undersized:
|
||||||
message = 'File does not contain expected number of bytes. Received {size} / {total}'
|
message = 'File does not contain expected number of bytes. Received {size} / {total}'
|
||||||
message = message.format(size=localsize, total=plan.remote_total_bytes)
|
message = message.format(size=temp_localsize, total=plan.remote_total_bytes)
|
||||||
raise NotEnoughBytes(message)
|
raise NotEnoughBytes(message)
|
||||||
|
|
||||||
if temp_localname != real_localname:
|
if plan.download_into != plan.real_localname:
|
||||||
os.rename(temp_localname, real_localname)
|
os.rename(plan.download_into.absolute_path, plan.real_localname.absolute_path)
|
||||||
|
|
||||||
return real_localname
|
return plan.real_localname
|
||||||
|
|
||||||
def prepare_plan(
|
def prepare_plan(
|
||||||
url,
|
url,
|
||||||
|
@ -181,20 +166,34 @@ def prepare_plan(
|
||||||
# Chapter 1: File existence
|
# Chapter 1: File existence
|
||||||
headers = headers or {}
|
headers = headers or {}
|
||||||
user_provided_range = 'range' in headers
|
user_provided_range = 'range' in headers
|
||||||
real_localname = localname
|
|
||||||
|
url = sanitize_url(url)
|
||||||
|
if localname in [None, '']:
|
||||||
|
localname = basename_from_url(url)
|
||||||
|
|
||||||
if is_special_file(localname):
|
if is_special_file(localname):
|
||||||
temp_localname = localname
|
real_localname = SpecialPath(localname)
|
||||||
|
temp_localname = SpecialPath(localname)
|
||||||
|
real_exists = False
|
||||||
|
temp_exists = False
|
||||||
else:
|
else:
|
||||||
temp_localname = localname + TEMP_EXTENSION
|
localname = pathclass.Path(localname)
|
||||||
real_exists = os.path.exists(real_localname)
|
if localname.is_dir:
|
||||||
|
localname = localname.with_child(basename_from_url(url))
|
||||||
|
localname = sanitize_filename(localname.absolute_path)
|
||||||
|
real_localname = pathclass.Path(localname)
|
||||||
|
temp_localname = real_localname.add_extension(TEMP_EXTENSION)
|
||||||
|
real_exists = real_localname.exists
|
||||||
|
temp_exists = temp_localname.exists
|
||||||
|
|
||||||
if real_exists and overwrite is False and not user_provided_range:
|
if real_exists and overwrite is False and not user_provided_range:
|
||||||
log.debug('File exists and overwrite is off. Nothing to do.')
|
log.debug('File exists and overwrite is off. Nothing to do.')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
temp_exists = os.path.exists(temp_localname)
|
if isinstance(real_localname, SpecialPath):
|
||||||
real_localsize = int(real_exists and os.path.getsize(real_localname))
|
temp_localsize = 0
|
||||||
temp_localsize = int(temp_exists and os.path.getsize(temp_localname))
|
else:
|
||||||
|
temp_localsize = int(temp_exists and temp_localname.size)
|
||||||
|
|
||||||
# Chapter 2: Ratelimiting
|
# Chapter 2: Ratelimiting
|
||||||
if bytespersecond is None:
|
if bytespersecond is None:
|
||||||
|
@ -220,7 +219,7 @@ def prepare_plan(
|
||||||
if user_provided_range and not do_head:
|
if user_provided_range and not do_head:
|
||||||
raise DownloadyException('Cannot determine range support without the head request')
|
raise DownloadyException('Cannot determine range support without the head request')
|
||||||
|
|
||||||
temp_headers = headers
|
temp_headers = headers.copy()
|
||||||
temp_headers.update({'range': 'bytes=0-'})
|
temp_headers.update({'range': 'bytes=0-'})
|
||||||
|
|
||||||
if do_head:
|
if do_head:
|
||||||
|
@ -304,7 +303,6 @@ def prepare_plan(
|
||||||
|
|
||||||
raise DownloadyException('No plan was chosen?')
|
raise DownloadyException('No plan was chosen?')
|
||||||
|
|
||||||
|
|
||||||
class Progress1:
|
class Progress1:
|
||||||
def __init__(self, total_bytes):
|
def __init__(self, total_bytes):
|
||||||
self.limiter = ratelimiter.Ratelimiter(allowance=8, mode='reject')
|
self.limiter = ratelimiter.Ratelimiter(allowance=8, mode='reject')
|
||||||
|
@ -318,8 +316,8 @@ class Progress1:
|
||||||
|
|
||||||
def step(self, bytes_downloaded):
|
def step(self, bytes_downloaded):
|
||||||
percent = bytes_downloaded / self.total_bytes
|
percent = bytes_downloaded / self.total_bytes
|
||||||
percent = min(1, percent)
|
percent = min(1.00, percent)
|
||||||
if self.limiter.limit(1) is False and percent < 1:
|
if self.limiter.limit(1) is False and percent < 1.00:
|
||||||
return
|
return
|
||||||
|
|
||||||
downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=self.divisor)
|
downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=self.divisor)
|
||||||
|
@ -336,8 +334,7 @@ class Progress1:
|
||||||
total_bytes=self.total_format,
|
total_bytes=self.total_format,
|
||||||
statusbar=statusbar,
|
statusbar=statusbar,
|
||||||
)
|
)
|
||||||
print(message, end=end, flush=True)
|
pipeable.stderr(message, end=end)
|
||||||
|
|
||||||
|
|
||||||
class Progress2:
|
class Progress2:
|
||||||
def __init__(self, total_bytes):
|
def __init__(self, total_bytes):
|
||||||
|
@ -348,12 +345,13 @@ class Progress2:
|
||||||
self.bytes_downloaded_string = '{:%d,}' % len(self.total_bytes_string)
|
self.bytes_downloaded_string = '{:%d,}' % len(self.total_bytes_string)
|
||||||
|
|
||||||
def step(self, bytes_downloaded):
|
def step(self, bytes_downloaded):
|
||||||
percent = (bytes_downloaded * 100) / self.total_bytes
|
percent = bytes_downloaded / self.total_bytes
|
||||||
percent = min(100, percent)
|
percent = min(1.00, percent)
|
||||||
if self.limiter.limit(1) is False and percent < 100:
|
if self.limiter.limit(1) is False and percent < 1.00:
|
||||||
return
|
return
|
||||||
|
|
||||||
percent_string = '%08.4f' % percent
|
percent *= 100
|
||||||
|
percent_string = f'{percent:08.4f}'
|
||||||
bytes_downloaded_string = self.bytes_downloaded_string.format(bytes_downloaded)
|
bytes_downloaded_string = self.bytes_downloaded_string.format(bytes_downloaded)
|
||||||
|
|
||||||
end = '\n' if percent == 100 else ''
|
end = '\n' if percent == 100 else ''
|
||||||
|
@ -363,8 +361,7 @@ class Progress2:
|
||||||
total_bytes=self.total_bytes_string,
|
total_bytes=self.total_bytes_string,
|
||||||
percent=percent_string,
|
percent=percent_string,
|
||||||
)
|
)
|
||||||
print(message, end=end, flush=True)
|
pipeable.stderr(message, end=end)
|
||||||
|
|
||||||
|
|
||||||
def basename_from_url(url):
|
def basename_from_url(url):
|
||||||
'''
|
'''
|
||||||
|
@ -373,13 +370,9 @@ def basename_from_url(url):
|
||||||
localname = urllib.parse.unquote(url)
|
localname = urllib.parse.unquote(url)
|
||||||
localname = localname.rstrip('/')
|
localname = localname.rstrip('/')
|
||||||
localname = localname.split('?')[0]
|
localname = localname.split('?')[0]
|
||||||
localname = localname.split('/')[-1]
|
localname = localname.rsplit('/', 1)[-1]
|
||||||
return localname
|
return localname
|
||||||
|
|
||||||
def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
|
|
||||||
permission = input(prompt)
|
|
||||||
return permission.lower() in affirmative
|
|
||||||
|
|
||||||
def is_special_file(file):
|
def is_special_file(file):
|
||||||
if isinstance(file, pathclass.Path):
|
if isinstance(file, pathclass.Path):
|
||||||
return False
|
return False
|
||||||
|
@ -388,11 +381,15 @@ def is_special_file(file):
|
||||||
file = os.path.normcase(file)
|
file = os.path.normcase(file)
|
||||||
return file in SPECIAL_FILENAMES
|
return file in SPECIAL_FILENAMES
|
||||||
|
|
||||||
def request(method, url, stream=False, headers=None, timeout=TIMEOUT, verify_ssl=True, **kwargs):
|
def request(method, url, headers=None, timeout=TIMEOUT, verify_ssl=True, **kwargs):
|
||||||
if headers is None:
|
if headers is None:
|
||||||
headers = {}
|
headers = {}
|
||||||
|
else:
|
||||||
|
headers = headers.copy()
|
||||||
|
|
||||||
for (key, value) in HEADERS.items():
|
for (key, value) in HEADERS.items():
|
||||||
headers.setdefault(key, value)
|
headers.setdefault(key, value)
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
a = requests.adapters.HTTPAdapter(max_retries=30)
|
a = requests.adapters.HTTPAdapter(max_retries=30)
|
||||||
b = requests.adapters.HTTPAdapter(max_retries=30)
|
b = requests.adapters.HTTPAdapter(max_retries=30)
|
||||||
|
@ -405,7 +402,8 @@ def request(method, url, stream=False, headers=None, timeout=TIMEOUT, verify_ssl
|
||||||
'head': session.head,
|
'head': session.head,
|
||||||
'post': session.post,
|
'post': session.post,
|
||||||
}[method]
|
}[method]
|
||||||
req = method(url, stream=stream, headers=headers, timeout=timeout, verify=verify_ssl, **kwargs)
|
|
||||||
|
req = method(url, headers=headers, timeout=timeout, verify=verify_ssl, **kwargs)
|
||||||
req.raise_for_status()
|
req.raise_for_status()
|
||||||
return req
|
return req
|
||||||
|
|
||||||
|
@ -427,11 +425,6 @@ def sanitize_url(url):
|
||||||
url = url.replace('%3A//', '://')
|
url = url.replace('%3A//', '://')
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def touch(filename):
|
|
||||||
f = open(filename, 'ab')
|
|
||||||
f.close()
|
|
||||||
return
|
|
||||||
|
|
||||||
def download_argparse(args):
|
def download_argparse(args):
|
||||||
url = args.url
|
url = args.url
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue