else/ThreadedDL/threaded_dl.py

70 lines
2 KiB
Python
Raw Normal View History

2016-08-18 01:24:38 +00:00
import os
import sys
import threading
import time
sys.path.append('C:\\git\\else\\clipext'); import clipext
sys.path.append('C:\\git\\else\\downloady'); import downloady
def remove_finished(threads):
threads = [t for t in threads if t.is_alive()]
return threads
2016-10-04 02:20:58 +00:00
def download_thread(url, filename):
2016-08-18 01:24:38 +00:00
url = url.strip()
if url == '':
return
2016-10-04 02:20:58 +00:00
if os.path.exists(filename):
print('Skipping existing file "%s"' % filename)
2016-08-18 01:24:38 +00:00
return
2016-10-04 02:20:58 +00:00
print(' Starting "%s"' % filename)
downloady.download_file(url, filename)
print('+Finished "%s"' % filename)
2016-08-18 01:24:38 +00:00
def listget(li, index, fallback):
try:
return li[index]
except IndexError:
return fallback
2016-10-04 02:20:58 +00:00
def threaded_dl(urls, thread_count, filename_format=None):
2016-08-18 01:24:38 +00:00
threads = []
2016-10-04 02:20:58 +00:00
index_digits = len(str(len(urls)))
if filename_format is None:
filename_format = '{now}_{index}_{basename}'
filename_format = filename_format.replace('{index}', '{index:0%0dd}' % index_digits)
now = int(time.time())
2016-08-18 01:24:38 +00:00
for (index, url) in enumerate(urls):
while len(threads) == thread_count:
threads = remove_finished(threads)
time.sleep(0.1)
2016-10-04 02:20:58 +00:00
basename = downloady.basename_from_url(url)
filename = filename_format.format(now=now, index=index, basename=basename)
t = threading.Thread(target=download_thread, args=[url, filename])
2016-08-18 01:24:38 +00:00
t.daemon = True
threads.append(t)
t.start()
while len(threads) > 0:
threads = remove_finished(threads)
2016-09-05 23:37:07 +00:00
print('%d threads remaining\r' % len(threads), end='', flush=True)
2016-08-18 01:24:38 +00:00
time.sleep(0.1)
def main():
filename = sys.argv[1]
if os.path.isfile(filename):
f = open(filename, 'r')
with f:
urls = f.read()
else:
urls = clipext.resolve(filename)
2016-10-04 02:20:58 +00:00
urls = urls.split('\n')
2016-09-05 23:37:07 +00:00
thread_count = int(listget(sys.argv, 2, 4))
2016-10-04 02:20:58 +00:00
filename_format = listget(sys.argv, 3, None)
threaded_dl(urls, thread_count=thread_count, filename_format=filename_format)
2016-08-18 01:24:38 +00:00
if __name__ == '__main__':
2016-10-04 02:20:58 +00:00
main()