threaded_dl improvements.

- use real argparse
- add timeout argument to command line
- apply default values for timeout and filename at argparse level
- check for existing files before creating their thread
This commit is contained in:
Ethan Dalool 2019-05-27 15:59:23 -07:00
parent f23f8ebc7d
commit 70dd4ef77a

View file

@ -1,47 +1,54 @@
import argparse
import os import os
import sys import sys
import threading import threading
import time import time
# pip install voussoirkit
from voussoirkit import clipext from voussoirkit import clipext
from voussoirkit import downloady from voussoirkit import downloady
def remove_finished(threads):
threads = [t for t in threads if t.is_alive()]
return threads
def download_thread(url, filename): def clean_url_list(urls):
for url in urls:
url = url.strip() url = url.strip()
if url == '':
return
if os.path.exists(filename): if not url:
print('Skipping existing file "%s"' % filename) continue
return
print(' Starting "%s"' % filename)
downloady.download_file(url, filename, timeout=15)
print('+Finished "%s"' % filename)
def listget(li, index, fallback): if url.startswith('#'):
try: continue
return li[index]
except IndexError:
return fallback
def threaded_dl(urls, thread_count, filename_format=None): yield url
def download_thread(url, filename, timeout=None):
print(f' Starting "{filename}"')
downloady.download_file(url, filename, timeout=timeout)
print(f'+Finished "{filename}"')
def remove_finished(threads):
return [t for t in threads if t.is_alive()]
def threaded_dl(
urls,
thread_count,
filename_format,
timeout=None,
):
now = int(time.time())
threads = [] threads = []
index_digits = len(str(len(urls)))
if filename_format is None:
filename_format = '{now}_{index}_{basename}'
filename_format = filename_format.replace('{index}', '{index:0%0dd}' % index_digits)
if filename_format != os.devnull: if filename_format != os.devnull:
index_digits = len(str(len(urls)))
filename_format = filename_format.replace('{index}', '{index:0%0dd}' % index_digits)
if '{' not in filename_format and len(urls) > 1: if '{' not in filename_format and len(urls) > 1:
filename_format += '_{index}' filename_format += '_{index}'
if '{extension}' not in filename_format and '{basename}' not in filename_format: if '{extension}' not in filename_format and '{basename}' not in filename_format:
filename_format += '{extension}' filename_format += '{extension}'
now = int(time.time())
for (index, url) in enumerate(urls): for (index, url) in enumerate(clean_url_list(urls)):
while len(threads) == thread_count: while len(threads) == thread_count:
threads = remove_finished(threads) threads = remove_finished(threads)
time.sleep(0.1) time.sleep(0.1)
@ -55,8 +62,12 @@ def threaded_dl(urls, thread_count, filename_format=None):
index=index, index=index,
now=now, now=now,
) )
t = threading.Thread(target=download_thread, args=[url, filename])
t.daemon = True if os.path.exists(filename):
print(f'Skipping existing file "{filename}"')
else:
t = threading.Thread(target=download_thread, args=[url, filename, timeout], daemon=True)
threads.append(t) threads.append(t)
t.start() t.start()
@ -65,18 +76,33 @@ def threaded_dl(urls, thread_count, filename_format=None):
print('%d threads remaining\r' % len(threads), end='', flush=True) print('%d threads remaining\r' % len(threads), end='', flush=True)
time.sleep(0.1) time.sleep(0.1)
def main(argv): def threaded_dl_argparse(args):
filename = argv[0] if os.path.isfile(args.url_file):
if os.path.isfile(filename): f = open(args.url_file, 'r')
f = open(filename, 'r')
with f: with f:
urls = f.read() urls = f.read()
else: else:
urls = clipext.resolve(filename) urls = clipext.resolve(args.url_file)
urls = urls.replace('\r', '').split('\n') urls = urls.replace('\r', '').split('\n')
thread_count = int(listget(argv, 1, 4))
filename_format = listget(argv, 2, None) threaded_dl(
threaded_dl(urls, thread_count=thread_count, filename_format=filename_format) urls,
thread_count=args.thread_count,
filename_format=args.filename_format,
timeout=args.timeout,
)
def main(argv):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('url_file')
parser.add_argument('thread_count', nargs='?', default=None)
parser.add_argument('filename_format', nargs='?', default='{now}_{index}_{basename}')
parser.add_argument('--timeout', dest='timeout', default=15)
parser.set_defaults(func=threaded_dl_argparse)
args = parser.parse_args(argv)
args.func(args)
if __name__ == '__main__': if __name__ == '__main__':
main(sys.argv[1:]) raise SystemExit(main(sys.argv[1:]))