2020-09-29 21:27:48 +00:00
|
|
|
import argparse
|
2020-09-30 20:20:07 +00:00
|
|
|
import ast
|
2020-09-29 21:27:48 +00:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import threading
|
|
|
|
import time
|
|
|
|
|
2020-10-03 02:54:20 +00:00
|
|
|
from voussoirkit import bytestring
|
2020-09-29 21:27:48 +00:00
|
|
|
from voussoirkit import clipext
|
|
|
|
from voussoirkit import downloady
|
|
|
|
|
|
|
|
|
|
|
|
def clean_url_list(urls):
|
|
|
|
for url in urls:
|
2020-10-26 03:15:11 +00:00
|
|
|
if isinstance(url, (tuple, list)):
|
|
|
|
(url, filename) = url
|
|
|
|
else:
|
|
|
|
filename = None
|
2020-09-29 21:27:48 +00:00
|
|
|
url = url.strip()
|
|
|
|
|
|
|
|
if not url:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if url.startswith('#'):
|
|
|
|
continue
|
|
|
|
|
2020-10-26 03:15:11 +00:00
|
|
|
if filename:
|
|
|
|
yield (url, filename)
|
|
|
|
else:
|
|
|
|
yield url
|
2020-09-29 21:27:48 +00:00
|
|
|
|
2020-10-03 02:54:20 +00:00
|
|
|
def download_thread(url, filename, *, bytespersecond=None, headers=None, timeout=None):
|
2020-09-29 21:27:48 +00:00
|
|
|
print(f' Starting "{filename}"')
|
2021-08-10 00:37:19 +00:00
|
|
|
downloady.download_file(
|
|
|
|
url,
|
|
|
|
filename,
|
|
|
|
bytespersecond=bytespersecond,
|
|
|
|
headers=headers,
|
|
|
|
timeout=timeout,
|
|
|
|
)
|
2020-09-29 21:27:48 +00:00
|
|
|
print(f'+Finished "{filename}"')
|
|
|
|
|
|
|
|
def remove_finished(threads):
|
|
|
|
return [t for t in threads if t.is_alive()]
|
|
|
|
|
|
|
|
def threaded_dl(
|
|
|
|
urls,
|
|
|
|
thread_count,
|
|
|
|
filename_format,
|
2020-10-03 02:54:20 +00:00
|
|
|
bytespersecond=None,
|
2020-09-30 20:20:07 +00:00
|
|
|
headers=None,
|
2020-10-03 02:54:20 +00:00
|
|
|
timeout=None,
|
2020-09-29 21:27:48 +00:00
|
|
|
):
|
|
|
|
now = int(time.time())
|
|
|
|
threads = []
|
|
|
|
|
2020-10-03 02:54:20 +00:00
|
|
|
bytespersecond_thread = bytespersecond
|
|
|
|
if bytespersecond_thread is not None:
|
|
|
|
bytespersecond_thread = int(bytespersecond_thread / thread_count)
|
|
|
|
|
2020-09-29 21:27:48 +00:00
|
|
|
if filename_format != os.devnull:
|
|
|
|
index_digits = len(str(len(urls)))
|
|
|
|
filename_format = filename_format.replace('{index}', '{index:0%0dd}' % index_digits)
|
|
|
|
|
|
|
|
if '{' not in filename_format and len(urls) > 1:
|
|
|
|
filename_format += '_{index}'
|
|
|
|
|
|
|
|
if '{extension}' not in filename_format and '{basename}' not in filename_format:
|
|
|
|
filename_format += '{extension}'
|
|
|
|
|
|
|
|
for (index, url) in enumerate(clean_url_list(urls)):
|
|
|
|
|
|
|
|
while len(threads) >= thread_count:
|
|
|
|
threads = remove_finished(threads)
|
|
|
|
time.sleep(0.1)
|
|
|
|
|
2020-10-26 03:15:11 +00:00
|
|
|
if isinstance(url, (tuple, list)):
|
|
|
|
(url, filename) = url
|
|
|
|
else:
|
|
|
|
basename = downloady.basename_from_url(url)
|
|
|
|
extension = os.path.splitext(basename)[1]
|
|
|
|
filename = filename_format.format(
|
|
|
|
basename=basename,
|
|
|
|
ext=extension,
|
|
|
|
extension=extension,
|
|
|
|
index=index,
|
|
|
|
now=now,
|
|
|
|
)
|
2020-09-29 21:27:48 +00:00
|
|
|
|
|
|
|
if os.path.exists(filename):
|
|
|
|
print(f'Skipping existing file "{filename}"')
|
|
|
|
|
|
|
|
else:
|
2020-09-30 20:20:07 +00:00
|
|
|
kwargs = {
|
|
|
|
'url': url,
|
2020-10-03 02:54:20 +00:00
|
|
|
'bytespersecond': bytespersecond_thread,
|
2020-09-30 20:20:07 +00:00
|
|
|
'filename': filename,
|
|
|
|
'timeout': timeout,
|
|
|
|
'headers': headers,
|
|
|
|
}
|
|
|
|
t = threading.Thread(target=download_thread, kwargs=kwargs, daemon=True)
|
2020-09-29 21:27:48 +00:00
|
|
|
threads.append(t)
|
|
|
|
t.start()
|
|
|
|
|
|
|
|
while len(threads) > 0:
|
|
|
|
threads = remove_finished(threads)
|
2020-09-30 20:20:07 +00:00
|
|
|
print(f'{len(threads)} threads remaining\r', end='', flush=True)
|
2020-09-29 21:27:48 +00:00
|
|
|
time.sleep(0.1)
|
|
|
|
|
|
|
|
def threaded_dl_argparse(args):
|
|
|
|
if os.path.isfile(args.url_file):
|
|
|
|
f = open(args.url_file, 'r')
|
|
|
|
with f:
|
|
|
|
urls = f.read()
|
|
|
|
else:
|
|
|
|
urls = clipext.resolve(args.url_file)
|
|
|
|
urls = urls.replace('\r', '').split('\n')
|
|
|
|
|
2020-10-26 03:15:11 +00:00
|
|
|
urls = [u.split(' ', 1) if ' ' in u else u for u in urls]
|
|
|
|
|
2020-09-30 20:20:07 +00:00
|
|
|
headers = args.headers
|
|
|
|
if headers is not None:
|
|
|
|
if len(headers) == 1 and headers[0].startswith('{'):
|
|
|
|
headers = ast.literal_eval(headers[0])
|
|
|
|
else:
|
|
|
|
keys = headers[::2]
|
|
|
|
vals = headers[1::2]
|
|
|
|
headers = {key: val for (key, val) in zip(keys, vals)}
|
|
|
|
|
2020-10-03 02:54:20 +00:00
|
|
|
bytespersecond = args.bytespersecond
|
|
|
|
if bytespersecond is not None:
|
|
|
|
bytespersecond = bytestring.parsebytes(bytespersecond)
|
|
|
|
|
2020-09-29 21:27:48 +00:00
|
|
|
threaded_dl(
|
|
|
|
urls,
|
2020-10-03 02:54:20 +00:00
|
|
|
bytespersecond=bytespersecond,
|
2020-09-29 21:27:48 +00:00
|
|
|
filename_format=args.filename_format,
|
2020-09-30 20:20:07 +00:00
|
|
|
headers=headers,
|
|
|
|
thread_count=args.thread_count,
|
2020-09-29 21:27:48 +00:00
|
|
|
timeout=args.timeout,
|
|
|
|
)
|
|
|
|
|
|
|
|
def main(argv):
|
|
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
|
|
|
|
|
|
parser.add_argument('url_file')
|
2020-10-03 02:54:20 +00:00
|
|
|
parser.add_argument('thread_count', type=int)
|
2020-09-29 21:27:48 +00:00
|
|
|
parser.add_argument('filename_format', nargs='?', default='{now}_{index}_{basename}')
|
2021-02-21 05:01:55 +00:00
|
|
|
parser.add_argument('--bytespersecond', default=None)
|
|
|
|
parser.add_argument('--timeout', default=15)
|
|
|
|
parser.add_argument('--headers', nargs='+', default=None)
|
2020-09-29 21:27:48 +00:00
|
|
|
parser.set_defaults(func=threaded_dl_argparse)
|
|
|
|
|
|
|
|
args = parser.parse_args(argv)
|
|
|
|
return args.func(args)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
raise SystemExit(main(sys.argv[1:]))
|