threaded_dl improvements.
- use real argparse - add timeout argument to command line - apply default values for timeout and filename at argparse level - check for existing files before creating their thread
This commit is contained in:
parent
f23f8ebc7d
commit
70dd4ef77a
1 changed files with 67 additions and 41 deletions
|
@ -1,47 +1,54 @@
|
||||||
|
import argparse
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
# pip install voussoirkit
|
|
||||||
from voussoirkit import clipext
|
from voussoirkit import clipext
|
||||||
from voussoirkit import downloady
|
from voussoirkit import downloady
|
||||||
|
|
||||||
def remove_finished(threads):
|
|
||||||
threads = [t for t in threads if t.is_alive()]
|
|
||||||
return threads
|
|
||||||
|
|
||||||
def download_thread(url, filename):
|
def clean_url_list(urls):
|
||||||
|
for url in urls:
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
if url == '':
|
|
||||||
return
|
|
||||||
|
|
||||||
if os.path.exists(filename):
|
if not url:
|
||||||
print('Skipping existing file "%s"' % filename)
|
continue
|
||||||
return
|
|
||||||
print(' Starting "%s"' % filename)
|
|
||||||
downloady.download_file(url, filename, timeout=15)
|
|
||||||
print('+Finished "%s"' % filename)
|
|
||||||
|
|
||||||
def listget(li, index, fallback):
|
if url.startswith('#'):
|
||||||
try:
|
continue
|
||||||
return li[index]
|
|
||||||
except IndexError:
|
|
||||||
return fallback
|
|
||||||
|
|
||||||
def threaded_dl(urls, thread_count, filename_format=None):
|
yield url
|
||||||
|
|
||||||
|
def download_thread(url, filename, timeout=None):
|
||||||
|
print(f' Starting "{filename}"')
|
||||||
|
downloady.download_file(url, filename, timeout=timeout)
|
||||||
|
print(f'+Finished "{filename}"')
|
||||||
|
|
||||||
|
def remove_finished(threads):
|
||||||
|
return [t for t in threads if t.is_alive()]
|
||||||
|
|
||||||
|
def threaded_dl(
|
||||||
|
urls,
|
||||||
|
thread_count,
|
||||||
|
filename_format,
|
||||||
|
timeout=None,
|
||||||
|
):
|
||||||
|
now = int(time.time())
|
||||||
threads = []
|
threads = []
|
||||||
index_digits = len(str(len(urls)))
|
|
||||||
if filename_format is None:
|
|
||||||
filename_format = '{now}_{index}_{basename}'
|
|
||||||
filename_format = filename_format.replace('{index}', '{index:0%0dd}' % index_digits)
|
|
||||||
if filename_format != os.devnull:
|
if filename_format != os.devnull:
|
||||||
|
index_digits = len(str(len(urls)))
|
||||||
|
filename_format = filename_format.replace('{index}', '{index:0%0dd}' % index_digits)
|
||||||
|
|
||||||
if '{' not in filename_format and len(urls) > 1:
|
if '{' not in filename_format and len(urls) > 1:
|
||||||
filename_format += '_{index}'
|
filename_format += '_{index}'
|
||||||
|
|
||||||
if '{extension}' not in filename_format and '{basename}' not in filename_format:
|
if '{extension}' not in filename_format and '{basename}' not in filename_format:
|
||||||
filename_format += '{extension}'
|
filename_format += '{extension}'
|
||||||
now = int(time.time())
|
|
||||||
for (index, url) in enumerate(urls):
|
for (index, url) in enumerate(clean_url_list(urls)):
|
||||||
|
|
||||||
while len(threads) == thread_count:
|
while len(threads) == thread_count:
|
||||||
threads = remove_finished(threads)
|
threads = remove_finished(threads)
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
@ -55,8 +62,12 @@ def threaded_dl(urls, thread_count, filename_format=None):
|
||||||
index=index,
|
index=index,
|
||||||
now=now,
|
now=now,
|
||||||
)
|
)
|
||||||
t = threading.Thread(target=download_thread, args=[url, filename])
|
|
||||||
t.daemon = True
|
if os.path.exists(filename):
|
||||||
|
print(f'Skipping existing file "{filename}"')
|
||||||
|
|
||||||
|
else:
|
||||||
|
t = threading.Thread(target=download_thread, args=[url, filename, timeout], daemon=True)
|
||||||
threads.append(t)
|
threads.append(t)
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
|
@ -65,18 +76,33 @@ def threaded_dl(urls, thread_count, filename_format=None):
|
||||||
print('%d threads remaining\r' % len(threads), end='', flush=True)
|
print('%d threads remaining\r' % len(threads), end='', flush=True)
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
def main(argv):
|
def threaded_dl_argparse(args):
|
||||||
filename = argv[0]
|
if os.path.isfile(args.url_file):
|
||||||
if os.path.isfile(filename):
|
f = open(args.url_file, 'r')
|
||||||
f = open(filename, 'r')
|
|
||||||
with f:
|
with f:
|
||||||
urls = f.read()
|
urls = f.read()
|
||||||
else:
|
else:
|
||||||
urls = clipext.resolve(filename)
|
urls = clipext.resolve(args.url_file)
|
||||||
urls = urls.replace('\r', '').split('\n')
|
urls = urls.replace('\r', '').split('\n')
|
||||||
thread_count = int(listget(argv, 1, 4))
|
|
||||||
filename_format = listget(argv, 2, None)
|
threaded_dl(
|
||||||
threaded_dl(urls, thread_count=thread_count, filename_format=filename_format)
|
urls,
|
||||||
|
thread_count=args.thread_count,
|
||||||
|
filename_format=args.filename_format,
|
||||||
|
timeout=args.timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
|
||||||
|
parser.add_argument('url_file')
|
||||||
|
parser.add_argument('thread_count', nargs='?', default=None)
|
||||||
|
parser.add_argument('filename_format', nargs='?', default='{now}_{index}_{basename}')
|
||||||
|
parser.add_argument('--timeout', dest='timeout', default=15)
|
||||||
|
parser.set_defaults(func=threaded_dl_argparse)
|
||||||
|
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
args.func(args)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(sys.argv[1:])
|
raise SystemExit(main(sys.argv[1:]))
|
||||||
|
|
Loading…
Reference in a new issue