else
This commit is contained in:
parent
c491e417f5
commit
82f63a75ab
14 changed files with 501 additions and 264 deletions
8
Clipext/README.md
Normal file
8
Clipext/README.md
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
Clip Extension
|
||||||
|
==============
|
||||||
|
|
||||||
|
This module works with pyperclip to provide some handy features for commandline utilities.
|
||||||
|
|
||||||
|
Instead of having the user paste text into the commandline to run your script, just let them enter `script.py !c` and resolve it automatically. Pasting into the cmd on Windows is annoying and requires a mouse-click so this can be very convenient.
|
||||||
|
|
||||||
|
Since "!i" resolves to user input, your script can accept piping with `ls | script.py !i`.
|
31
Clipext/clipext.py
Normal file
31
Clipext/clipext.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
import pyperclip
|
||||||
|
|
||||||
|
CLIPBOARD_STRINGS = ['!c', '!clip', '!clipboard']
|
||||||
|
INPUT_STRINGS = ['!i', '!in', '!input', '!stdin']
|
||||||
|
EOF = '\x1a'
|
||||||
|
|
||||||
|
def multi_line_input():
|
||||||
|
userinput = []
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
additional = input()
|
||||||
|
except EOFError:
|
||||||
|
# If you enter nothing but ctrl-z
|
||||||
|
additional = EOF
|
||||||
|
|
||||||
|
userinput.append(additional)
|
||||||
|
|
||||||
|
if EOF in additional:
|
||||||
|
break
|
||||||
|
|
||||||
|
userinput = '\n'.join(userinput)
|
||||||
|
userinput = userinput.split(EOF)[0]
|
||||||
|
return userinput.strip()
|
||||||
|
|
||||||
|
def resolve(arg):
|
||||||
|
lowered = arg.lower()
|
||||||
|
if lowered in CLIPBOARD_STRINGS:
|
||||||
|
return pyperclip.paste()
|
||||||
|
if lowered in INPUT_STRINGS:
|
||||||
|
return multi_line_input()
|
||||||
|
return arg
|
6
Downloady/README.md
Normal file
6
Downloady/README.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
Downloady
|
||||||
|
=========
|
||||||
|
|
||||||
|
- 2016 08 16
|
||||||
|
- Downloady now uses temporary files for incomplete downloads, and renames them when finished. This helps distinguish downloads that were interrupted and should be resumed from files that just happen to have the same name, which previously would have been interpreted as a resume. This improves overall ease-of-use, simplifies the behavior of the `overwrite` parameter, and will remove duplicate work from other programs.
|
||||||
|
- Rewrote the plan creator and download function to do a better job of separating concerns and simplify the plan selector.
|
|
@ -1,27 +1,28 @@
|
||||||
import sys
|
|
||||||
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
|
|
||||||
sys.path.append('C:\\git\\else\\bytestring'); import bytestring
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import pyperclip # pip install pyperclip
|
import pyperclip # pip install pyperclip
|
||||||
import requests
|
import requests
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
import urllib
|
import urllib
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
sys.path.append('C:\\git\\else\\clipext'); import clipext
|
||||||
|
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
|
||||||
|
sys.path.append('C:\\git\\else\\bytestring'); import bytestring
|
||||||
|
|
||||||
warnings.simplefilter('ignore')
|
warnings.simplefilter('ignore')
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'
|
||||||
}
|
}
|
||||||
SLEEPINESS = 3
|
|
||||||
|
|
||||||
FILENAME_BADCHARS = '*?"<>|'
|
FILENAME_BADCHARS = '*?"<>|'
|
||||||
|
|
||||||
last_request = 0
|
last_request = 0
|
||||||
CHUNKSIZE = 16 * bytestring.KIBIBYTE
|
CHUNKSIZE = 16 * bytestring.KIBIBYTE
|
||||||
STOP = False
|
|
||||||
TIMEOUT = 600
|
TIMEOUT = 600
|
||||||
|
TEMP_EXTENSION = '.downloadytemp'
|
||||||
|
|
||||||
def basename_from_url(url):
|
def basename_from_url(url):
|
||||||
'''
|
'''
|
||||||
|
@ -32,81 +33,6 @@ def basename_from_url(url):
|
||||||
localname = localname.split('/')[-1]
|
localname = localname.split('/')[-1]
|
||||||
return localname
|
return localname
|
||||||
|
|
||||||
def determine_seek_and_range(
|
|
||||||
file_handle,
|
|
||||||
localname,
|
|
||||||
local_exists,
|
|
||||||
overwrite,
|
|
||||||
remote_total_bytes,
|
|
||||||
server_respects_range,
|
|
||||||
user_provided_range,
|
|
||||||
user_range_min,
|
|
||||||
user_range_max,
|
|
||||||
):
|
|
||||||
''' THINGS THAT CAN HAPPEN '''
|
|
||||||
seek_to = 0
|
|
||||||
header_range_min = None
|
|
||||||
header_range_max = None
|
|
||||||
if local_exists:
|
|
||||||
local_existing_bytes = os.path.getsize(localname)
|
|
||||||
if overwrite is True:
|
|
||||||
file_handle.truncate()
|
|
||||||
if user_provided_range:
|
|
||||||
header_range_min = user_range_min
|
|
||||||
header_range_max = user_range_max
|
|
||||||
seek_to = user_range_min
|
|
||||||
|
|
||||||
elif not user_provided_range:
|
|
||||||
pass
|
|
||||||
|
|
||||||
elif overwrite is None:
|
|
||||||
if local_existing_bytes == remote_total_bytes:
|
|
||||||
print('File is 100%. Nothing to do.')
|
|
||||||
return
|
|
||||||
|
|
||||||
if user_provided_range:
|
|
||||||
if server_respects_range:
|
|
||||||
seek_to = user_range_min
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise Exception('The server did not respect your range header')
|
|
||||||
|
|
||||||
elif not user_provided_range:
|
|
||||||
if server_respects_range:
|
|
||||||
print('Resuming from byte %d' % local_existing_bytes)
|
|
||||||
header_range_min = local_existing_bytes
|
|
||||||
header_range_max = ''
|
|
||||||
seek_to = local_existing_bytes
|
|
||||||
|
|
||||||
else:
|
|
||||||
print('File exists, but server doesn\'t allow resumes. Restart from 0?')
|
|
||||||
permission = get_permission()
|
|
||||||
if permission:
|
|
||||||
file_handle.truncate()
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise Exception('Couldn\'t resume')
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise TypeError('Invalid value for `overwrite`. Must be True, False, or None')
|
|
||||||
|
|
||||||
elif not local_exists:
|
|
||||||
if user_provided_range:
|
|
||||||
if server_respects_range:
|
|
||||||
file_handle.seek(user_range_min)
|
|
||||||
file_handle.write(b'\0')
|
|
||||||
|
|
||||||
header_range_min = user_range_min
|
|
||||||
header_range_max = user_range_max
|
|
||||||
seek_to = user_range_min
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise Exception('The server did not respect your range header')
|
|
||||||
|
|
||||||
elif not user_provided_range:
|
|
||||||
pass
|
|
||||||
return (seek_to, header_range_min, header_range_max)
|
|
||||||
|
|
||||||
def download_file(
|
def download_file(
|
||||||
url,
|
url,
|
||||||
localname=None,
|
localname=None,
|
||||||
|
@ -114,33 +40,103 @@ def download_file(
|
||||||
bytespersecond=None,
|
bytespersecond=None,
|
||||||
callback_progress=None,
|
callback_progress=None,
|
||||||
headers=None,
|
headers=None,
|
||||||
overwrite=None
|
overwrite=False,
|
||||||
|
verbose=False,
|
||||||
):
|
):
|
||||||
if headers is None:
|
headers = headers or {}
|
||||||
headers = {}
|
|
||||||
''' Determine local filename '''
|
url = sanitize_url(url)
|
||||||
url = url.replace('%3A//', '://')
|
|
||||||
if localname in [None, '']:
|
if localname in [None, '']:
|
||||||
localname = basename_from_url(url)
|
localname = basename_from_url(url)
|
||||||
|
localname = sanitize_filename(localname)
|
||||||
|
|
||||||
localname = filepath_sanitize(localname)
|
if verbose:
|
||||||
|
print(url)
|
||||||
|
|
||||||
|
plan = prepare_plan(
|
||||||
|
url,
|
||||||
|
localname,
|
||||||
|
auth=auth,
|
||||||
|
bytespersecond=bytespersecond,
|
||||||
|
headers=headers,
|
||||||
|
overwrite=overwrite,
|
||||||
|
)
|
||||||
|
#print(plan)
|
||||||
|
if plan is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
localname = plan['download_into']
|
||||||
directory = os.path.split(localname)[0]
|
directory = os.path.split(localname)[0]
|
||||||
if directory != '':
|
if directory != '':
|
||||||
os.makedirs(directory, exist_ok=True)
|
os.makedirs(directory, exist_ok=True)
|
||||||
|
touch(localname)
|
||||||
|
file_handle = open(localname, 'r+b')
|
||||||
|
file_handle.seek(plan['seek_to'])
|
||||||
|
|
||||||
|
if plan['header_range_min'] is not None:
|
||||||
|
headers['range'] = 'bytes={min}-{max}'.format(
|
||||||
|
min=plan['header_range_min'],
|
||||||
|
max=plan['header_range_max'],
|
||||||
|
)
|
||||||
|
|
||||||
|
if plan['plan_type'] == 'resume':
|
||||||
|
bytes_downloaded = plan['seek_to']
|
||||||
|
else:
|
||||||
|
bytes_downloaded = 0
|
||||||
|
|
||||||
|
download_stream = request('get', url, stream=True, headers=headers, auth=auth)
|
||||||
|
for chunk in download_stream.iter_content(chunk_size=CHUNKSIZE):
|
||||||
|
bytes_downloaded += len(chunk)
|
||||||
|
file_handle.write(chunk)
|
||||||
|
if callback_progress is not None:
|
||||||
|
callback_progress(bytes_downloaded, plan['remote_total_bytes'])
|
||||||
|
|
||||||
|
if plan['limiter'] is not None and bytes_downloaded < plan['remote_total_bytes']:
|
||||||
|
plan['limiter'].limit(len(chunk))
|
||||||
|
|
||||||
|
file_handle.close()
|
||||||
|
|
||||||
|
if localname != plan['real_localname']:
|
||||||
|
os.rename(localname, plan['real_localname'])
|
||||||
|
|
||||||
|
localsize = os.path.getsize(plan['real_localname'])
|
||||||
|
if plan['plan_type'] != 'partial' and localsize < plan['remote_total_bytes']:
|
||||||
|
message = 'File does not contain expected number of bytes. Received {size} / {total}'
|
||||||
|
message = message.format(size=os.path.getsize(localname), total=plan['remote_total_bytes'])
|
||||||
|
raise Exception(message)
|
||||||
|
|
||||||
|
return plan['real_localname']
|
||||||
|
|
||||||
|
def prepare_plan(
|
||||||
|
url,
|
||||||
|
localname,
|
||||||
|
auth,
|
||||||
|
bytespersecond,
|
||||||
|
headers,
|
||||||
|
overwrite,
|
||||||
|
):
|
||||||
|
# Chapter 1: File existence
|
||||||
|
user_provided_range = 'range' in headers
|
||||||
|
real_localname = localname
|
||||||
|
temp_localname = localname + TEMP_EXTENSION
|
||||||
|
real_exists = os.path.exists(real_localname)
|
||||||
|
|
||||||
|
if real_exists and overwrite is False and not user_provided_range:
|
||||||
|
print('File exists and overwrite is off. Nothing to do.')
|
||||||
|
return None
|
||||||
|
temp_exists = os.path.exists(temp_localname)
|
||||||
|
real_localsize = int(real_exists and os.path.getsize(real_localname))
|
||||||
|
temp_localsize = int(temp_exists and os.path.getsize(temp_localname))
|
||||||
|
|
||||||
|
# Chapter 2: Ratelimiting
|
||||||
if bytespersecond is None:
|
if bytespersecond is None:
|
||||||
limiter = None
|
limiter = None
|
||||||
|
elif isinstance(bytespersecond, ratelimiter.Ratelimiter):
|
||||||
|
limiter = bytespersecond
|
||||||
else:
|
else:
|
||||||
limiter = ratelimiter.Ratelimiter(bytespersecond, period=1)
|
limiter = ratelimiter.Ratelimiter(bytespersecond)
|
||||||
|
|
||||||
''' Prepare plan variables '''
|
# Chapter 3: Extracting range
|
||||||
local_exists = os.path.exists(localname)
|
|
||||||
if local_exists and overwrite is False:
|
|
||||||
print('Overwrite off. Nothing to do.')
|
|
||||||
return
|
|
||||||
|
|
||||||
user_provided_range = 'range' in headers
|
|
||||||
if user_provided_range:
|
if user_provided_range:
|
||||||
user_range_min = int(headers['range'].split('bytes=')[1].split('-')[0])
|
user_range_min = int(headers['range'].split('bytes=')[1].split('-')[0])
|
||||||
user_range_max = headers['range'].split('-')[1]
|
user_range_max = headers['range'].split('-')[1]
|
||||||
|
@ -150,71 +146,88 @@ def download_file(
|
||||||
user_range_min = None
|
user_range_min = None
|
||||||
user_range_max = None
|
user_range_max = None
|
||||||
|
|
||||||
|
# Chapter 4: Server range support
|
||||||
# Always include a range on the first request to figure out whether the
|
# Always include a range on the first request to figure out whether the
|
||||||
# server supports it. Use 0- so we get the right `remote_total_bytes`.
|
# server supports it. Use 0- to get correct remote_total_bytes
|
||||||
temp_headers = headers
|
temp_headers = headers
|
||||||
temp_headers.update({'range': 'bytes=0-'})
|
temp_headers.update({'range': 'bytes=0-'})
|
||||||
|
|
||||||
# I'm using a GET instead of an actual HEAD here because some servers respond
|
# I'm using a GET instead of an actual HEAD here because some servers respond
|
||||||
# differently, even though they're not supposed to.
|
# differently, even though they're not supposed to.
|
||||||
head = request('get', url, stream=True, headers=temp_headers, auth=auth)
|
head = request('get', url, stream=True, headers=temp_headers, auth=auth)
|
||||||
remote_total_bytes = int(head.headers.get('content-length', 1))
|
remote_total_bytes = int(head.headers.get('content-length', 0))
|
||||||
server_respects_range = (head.status_code == 206 and 'content-range' in head.headers)
|
server_respects_range = (head.status_code == 206 and 'content-range' in head.headers)
|
||||||
head.connection.close()
|
head.connection.close()
|
||||||
|
|
||||||
touch(localname)
|
if user_provided_range and not server_respects_range:
|
||||||
file_handle = open(localname, 'r+b')
|
raise Exception('Server did not respect your range header')
|
||||||
file_handle.seek(0)
|
|
||||||
|
|
||||||
plan = determine_seek_and_range(
|
# Chapter 5: Plan definitions
|
||||||
file_handle=file_handle,
|
plan_base = {
|
||||||
localname=localname,
|
'limiter': limiter,
|
||||||
local_exists=local_exists,
|
'real_localname': real_localname,
|
||||||
overwrite=overwrite,
|
'remote_total_bytes': remote_total_bytes,
|
||||||
remote_total_bytes=remote_total_bytes,
|
}
|
||||||
server_respects_range=server_respects_range,
|
plan_fulldownload = dict(
|
||||||
user_provided_range=user_provided_range,
|
plan_base,
|
||||||
user_range_min=user_range_min,
|
download_into=temp_localname,
|
||||||
user_range_max=user_range_max,
|
header_range_min=None,
|
||||||
|
header_range_max=None,
|
||||||
|
plan_type='fulldownload',
|
||||||
|
seek_to=0,
|
||||||
|
)
|
||||||
|
plan_resume = dict(
|
||||||
|
plan_base,
|
||||||
|
download_into=temp_localname,
|
||||||
|
header_range_min=temp_localsize,
|
||||||
|
header_range_max='',
|
||||||
|
plan_type='resume',
|
||||||
|
seek_to=temp_localsize,
|
||||||
|
)
|
||||||
|
plan_partial = dict(
|
||||||
|
plan_base,
|
||||||
|
download_into=real_localname,
|
||||||
|
header_range_min=user_range_min,
|
||||||
|
header_range_max=user_range_max,
|
||||||
|
plan_type='partial',
|
||||||
|
seek_to=user_range_min,
|
||||||
)
|
)
|
||||||
if plan is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
(seek_to, header_range_min, header_range_max) = plan
|
# Chapter 6: Redeem your meal vouchers here
|
||||||
if header_range_min is not None:
|
if real_exists:
|
||||||
headers['range'] = 'bytes={0}-{1}'.format(header_range_min, header_range_max)
|
if overwrite:
|
||||||
|
os.remove(real_localname)
|
||||||
|
|
||||||
bytes_downloaded = seek_to
|
if user_provided_range:
|
||||||
file_handle.seek(seek_to)
|
return plan_partial
|
||||||
download_stream = request('get', url, stream=True, headers=headers, auth=auth)
|
|
||||||
|
|
||||||
''' Begin download '''
|
return plan_fulldownload
|
||||||
for chunk in download_stream.iter_content(chunk_size=CHUNKSIZE):
|
|
||||||
bytes_downloaded += len(chunk)
|
|
||||||
file_handle.write(chunk)
|
|
||||||
if callback_progress is not None:
|
|
||||||
callback_progress(bytes_downloaded, remote_total_bytes)
|
|
||||||
|
|
||||||
if limiter is not None and bytes_downloaded < remote_total_bytes:
|
elif temp_exists and temp_localsize > 0:
|
||||||
limiter.limit(len(chunk))
|
if overwrite:
|
||||||
|
return plan_fulldownload
|
||||||
|
|
||||||
file_handle.close()
|
if user_provided_range:
|
||||||
return localname
|
return plan_partial
|
||||||
|
|
||||||
def filepath_sanitize(text, exclusions=''):
|
if server_respects_range:
|
||||||
bet = FILENAME_BADCHARS.replace(exclusions, '')
|
print('Resume from byte %d' % plan_resume['seek_to'])
|
||||||
for char in bet:
|
return plan_resume
|
||||||
text = text.replace(char, '')
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
if user_provided_range:
|
||||||
|
return plan_partial
|
||||||
|
|
||||||
|
return plan_fulldownload
|
||||||
|
|
||||||
|
print('No plan was chosen?')
|
||||||
|
return None
|
||||||
|
|
||||||
def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
|
def get_permission(prompt='y/n\n>', affirmative=['y', 'yes']):
|
||||||
permission = input(prompt)
|
permission = input(prompt)
|
||||||
return permission.lower() in affirmative
|
return permission.lower() in affirmative
|
||||||
|
|
||||||
def is_clipboard(s):
|
def progress1(bytes_downloaded, bytes_total, prefix=''):
|
||||||
return s.lower() in ['!c', '!clip', '!clipboard']
|
|
||||||
|
|
||||||
def progress(bytes_downloaded, bytes_total, prefix=''):
|
|
||||||
divisor = bytestring.get_appropriate_divisor(bytes_total)
|
divisor = bytestring.get_appropriate_divisor(bytes_total)
|
||||||
bytes_total_string = bytestring.bytestring(bytes_total, force_unit=divisor)
|
bytes_total_string = bytestring.bytestring(bytes_total, force_unit=divisor)
|
||||||
bytes_downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=divisor)
|
bytes_downloaded_string = bytestring.bytestring(bytes_downloaded, force_unit=divisor)
|
||||||
|
@ -278,6 +291,16 @@ def request(method, url, stream=False, headers=None, timeout=TIMEOUT, **kwargs):
|
||||||
req.raise_for_status()
|
req.raise_for_status()
|
||||||
return req
|
return req
|
||||||
|
|
||||||
|
def sanitize_filename(text, exclusions=''):
|
||||||
|
bet = FILENAME_BADCHARS.replace(exclusions, '')
|
||||||
|
for char in bet:
|
||||||
|
text = text.replace(char, '')
|
||||||
|
return text
|
||||||
|
|
||||||
|
def sanitize_url(url):
|
||||||
|
url = url.replace('%3A//', '://')
|
||||||
|
return url
|
||||||
|
|
||||||
def touch(filename):
|
def touch(filename):
|
||||||
f = open(filename, 'ab')
|
f = open(filename, 'ab')
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -286,26 +309,14 @@ def touch(filename):
|
||||||
|
|
||||||
def download_argparse(args):
|
def download_argparse(args):
|
||||||
url = args.url
|
url = args.url
|
||||||
if is_clipboard(url):
|
|
||||||
url = pyperclip.paste()
|
|
||||||
print(url)
|
|
||||||
|
|
||||||
overwrite = {
|
url = clipext.resolve(url)
|
||||||
'y':True, 't':True,
|
|
||||||
'n':False, 'f':False,
|
|
||||||
}.get(args.overwrite.lower(), None)
|
|
||||||
|
|
||||||
callback = {
|
callback = {
|
||||||
None: progress,
|
None: progress1,
|
||||||
'1': progress,
|
'1': progress1,
|
||||||
'2': progress2,
|
'2': progress2,
|
||||||
}.get(args.callback, None)
|
}.get(args.callback, args.callback)
|
||||||
|
|
||||||
callback = args.callback
|
|
||||||
if callback == '1':
|
|
||||||
callback = progress
|
|
||||||
if callback == '2':
|
|
||||||
callback = progress2
|
|
||||||
|
|
||||||
bytespersecond = args.bytespersecond
|
bytespersecond = args.bytespersecond
|
||||||
if bytespersecond is not None:
|
if bytespersecond is not None:
|
||||||
|
@ -321,20 +332,21 @@ def download_argparse(args):
|
||||||
bytespersecond=bytespersecond,
|
bytespersecond=bytespersecond,
|
||||||
callback_progress=callback,
|
callback_progress=callback,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
overwrite=overwrite,
|
overwrite=args.overwrite,
|
||||||
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
#p_download_file = subparsers.add_parser('download_file')
|
|
||||||
parser.add_argument('url')
|
parser.add_argument('url')
|
||||||
parser.add_argument('localname', nargs='?', default=None)
|
parser.add_argument('localname', nargs='?', default=None)
|
||||||
parser.add_argument('-c', '--callback', dest='callback', default=progress)
|
parser.add_argument('-c', '--callback', dest='callback', default=progress1)
|
||||||
parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
|
parser.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
|
||||||
parser.add_argument('-ow', '--overwrite', dest='overwrite', default='')
|
parser.add_argument('-ow', '--overwrite', dest='overwrite', action='store_true')
|
||||||
parser.add_argument('-r', '--range', dest='range', default=None)
|
parser.add_argument('-r', '--range', dest='range', default=None)
|
||||||
parser.set_defaults(func=download_argparse)
|
parser.set_defaults(func=download_argparse)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
args.func(args)
|
args.func(args)
|
||||||
|
|
|
@ -1,45 +0,0 @@
|
||||||
THINGS THAT CAN HAPPEN
|
|
||||||
├───File exists
|
|
||||||
│ ├───User disables overwrite
|
|
||||||
│ │ └───Return because there's nothing to do
|
|
||||||
│ │
|
|
||||||
│ ├───User enables overwrite
|
|
||||||
│ │ ├───User requests range
|
|
||||||
│ │ │ └───Raise exception because requesting a range and forcing overwrite are mutually exclusive
|
|
||||||
│ │ │
|
|
||||||
│ │ └───User does not request range
|
|
||||||
│ │ └───File opened, truncated, full download
|
|
||||||
│ │
|
|
||||||
│ └───User does not specify overwrite
|
|
||||||
│ ├───File is same size as content-length
|
|
||||||
│ │ └───Return because there's nothing to do.
|
|
||||||
│ │
|
|
||||||
│ ├───User requests range
|
|
||||||
│ │ ├───Server respects range
|
|
||||||
│ │ │ └───File opened, seeked to request, bytes filled in
|
|
||||||
│ │ │
|
|
||||||
│ │ └───Server does not respect range
|
|
||||||
│ │ └───Raise exception because user's request can't be fulfilled
|
|
||||||
│ │
|
|
||||||
│ └───User does not request range
|
|
||||||
│ ├───Server respects range
|
|
||||||
│ │ └───File is opened, seeked to end, download resumes
|
|
||||||
│ │
|
|
||||||
│ └───Server does not respect range
|
|
||||||
│ └───Ask for permission to overwrite from beginning
|
|
||||||
│
|
|
||||||
└───File does not exist
|
|
||||||
├───User requests range
|
|
||||||
│ ├───Server respects range
|
|
||||||
│ │ └───File created, seeked to request, bytes filled in. everything else left 0
|
|
||||||
│ └───Server does not respect range
|
|
||||||
│ └───Raise exception because user's request can't be fulfilled
|
|
||||||
│
|
|
||||||
└───User does not request range
|
|
||||||
└───File created, full download
|
|
||||||
|
|
||||||
Possible amibiguity: If the user requests a range, and the file does not exist, does he want:
|
|
||||||
1. to fill the file with zeroes, and patch the requested bytes into their correct spot; or
|
|
||||||
2. to create the file empty, and only write the requested bytes?
|
|
||||||
|
|
||||||
I will assume #1 because that plays nicely with other Things That Can Happen, such as letting the user patch the other bytes in later.
|
|
157
Instathief/instathief.py
Normal file
157
Instathief/instathief.py
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
import argparse
|
||||||
|
import bs4
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append('C:\\git\\else\\clipext'); import clipext
|
||||||
|
sys.path.append('C:\\git\\else\\downloady'); import downloady
|
||||||
|
|
||||||
|
|
||||||
|
''' '''
|
||||||
|
STRFTIME = '%Y%m%d-%H%M%S'
|
||||||
|
# strftime used for filenames when downloading
|
||||||
|
|
||||||
|
URL_PROFILE = 'https://www.instagram.com/{username}'
|
||||||
|
URL_QUERY = 'https://www.instagram.com/query/'
|
||||||
|
|
||||||
|
PAGE_QUERY_TEMPLATE = '''
|
||||||
|
ig_user({user_id})
|
||||||
|
{{
|
||||||
|
media.after({end_cur}, {count})
|
||||||
|
{{
|
||||||
|
count,
|
||||||
|
nodes
|
||||||
|
{{
|
||||||
|
code,
|
||||||
|
date,
|
||||||
|
display_src,
|
||||||
|
id,
|
||||||
|
video_url
|
||||||
|
}},
|
||||||
|
page_info
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
'''.replace('\n', '').replace(' ', '')
|
||||||
|
|
||||||
|
USERAGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
|
||||||
|
''' '''
|
||||||
|
|
||||||
|
last_cookie = None
|
||||||
|
|
||||||
|
def download_media(media_list):
|
||||||
|
for media in media_list:
|
||||||
|
timestamp = datetime.datetime.utcfromtimestamp(media['created'])
|
||||||
|
timestamp = timestamp.strftime(STRFTIME)
|
||||||
|
basename = downloady.basename_from_url(media['url'])
|
||||||
|
extension = os.path.splitext(basename)[1]
|
||||||
|
|
||||||
|
filename = timestamp + extension
|
||||||
|
downloady.download_file(
|
||||||
|
url=media['url'],
|
||||||
|
localname=filename,
|
||||||
|
callback_progress=downloady.progress2,
|
||||||
|
overwrite=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_page(user_id, end_cur, count, cookies):
|
||||||
|
query = PAGE_QUERY_TEMPLATE.format(
|
||||||
|
count=count,
|
||||||
|
end_cur=end_cur,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
headers = {
|
||||||
|
'referer': 'https://www.instagram.com/',
|
||||||
|
'user-agent': USERAGENT,
|
||||||
|
'x-csrftoken': cookies['csrftoken'],
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
'q': query,
|
||||||
|
'ref': 'users::show',
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
url=URL_QUERY,
|
||||||
|
cookies=cookies,
|
||||||
|
data=data,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
page = response.json()
|
||||||
|
return page
|
||||||
|
|
||||||
|
def get_user_info(username):
|
||||||
|
global last_cookie
|
||||||
|
url = URL_PROFILE.format(username=username)
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
text = response.text
|
||||||
|
soup = bs4.BeautifulSoup(text, 'html.parser')
|
||||||
|
|
||||||
|
scripts = soup.find_all('script')
|
||||||
|
for script in scripts:
|
||||||
|
if 'window._sharedData' in script.text:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise Exception('Did not find expected javascript')
|
||||||
|
|
||||||
|
user_data = script.text
|
||||||
|
user_data = user_data.split('window._sharedData = ')[1].rstrip(';').strip()
|
||||||
|
user_data = json.loads(user_data)
|
||||||
|
user_data = user_data['entry_data']['ProfilePage'][0]['user']
|
||||||
|
|
||||||
|
user_id = user_data['id']
|
||||||
|
page_info = user_data['media']['page_info']
|
||||||
|
if page_info['has_next_page']:
|
||||||
|
end_cur = page_info['start_cursor']
|
||||||
|
# Minus 1 because the queries use "after" parameters for pagination, and
|
||||||
|
# if we just take this cursor then we will only get items after it.
|
||||||
|
end_cur = int(end_cur) - 1
|
||||||
|
else:
|
||||||
|
end_cur = None
|
||||||
|
|
||||||
|
user_data = {
|
||||||
|
'user_id': user_id,
|
||||||
|
'end_cur': end_cur,
|
||||||
|
'cookies': response.cookies,
|
||||||
|
}
|
||||||
|
last_cookie = response.cookies
|
||||||
|
return user_data
|
||||||
|
|
||||||
|
def get_user_media(username):
|
||||||
|
user_info = get_user_info(username)
|
||||||
|
end_cur = user_info.pop('end_cur')
|
||||||
|
|
||||||
|
while True:
|
||||||
|
page = get_page(count=50, end_cur=end_cur, **user_info)
|
||||||
|
page = page['media']
|
||||||
|
|
||||||
|
posts = page['nodes']
|
||||||
|
for post in posts:
|
||||||
|
timestamp = post['date']
|
||||||
|
media_url = post.get('video_url') or post.get('display_src')
|
||||||
|
ret = {
|
||||||
|
'created': timestamp,
|
||||||
|
'url': media_url
|
||||||
|
}
|
||||||
|
yield ret
|
||||||
|
|
||||||
|
page_info = page['page_info']
|
||||||
|
if page_info['has_next_page']:
|
||||||
|
end_cur = page_info['end_cursor']
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
username = sys.argv[1]
|
||||||
|
media = get_user_media(username)
|
||||||
|
for (timestamp, url) in media:
|
||||||
|
print(url)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -138,6 +138,7 @@ function create_odi_div(url)
|
||||||
|
|
||||||
if (paramless_url.match(IMAGE_TYPES))
|
if (paramless_url.match(IMAGE_TYPES))
|
||||||
{
|
{
|
||||||
|
console.log("Creating image div for " + paramless_url);
|
||||||
var div = document.createElement("div");
|
var div = document.createElement("div");
|
||||||
div.id = generate_id(32);
|
div.id = generate_id(32);
|
||||||
div.className = "odi_image_div";
|
div.className = "odi_image_div";
|
||||||
|
@ -194,6 +195,7 @@ function create_odi_div(url)
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
console.log("Creating " + mediatype + " div for " + paramless_url);
|
||||||
|
|
||||||
var div = document.createElement("div");
|
var div = document.createElement("div");
|
||||||
div.id = generate_id(32);
|
div.id = generate_id(32);
|
||||||
|
@ -253,6 +255,7 @@ function create_odi_div(url)
|
||||||
}
|
}
|
||||||
function create_odi_divs(urls)
|
function create_odi_divs(urls)
|
||||||
{
|
{
|
||||||
|
console.log("Creating odi divs");
|
||||||
image_divs = [];
|
image_divs = [];
|
||||||
media_divs = [];
|
media_divs = [];
|
||||||
odi_divs = [];
|
odi_divs = [];
|
||||||
|
@ -332,6 +335,7 @@ function create_workspace()
|
||||||
control_panel.appendChild(ingest_div);
|
control_panel.appendChild(ingest_div);
|
||||||
control_panel.appendChild(start_button);
|
control_panel.appendChild(start_button);
|
||||||
document.body.appendChild(workspace);
|
document.body.appendChild(workspace);
|
||||||
|
console.log("finished workspace");
|
||||||
}
|
}
|
||||||
|
|
||||||
function delete_odi_div(element)
|
function delete_odi_div(element)
|
||||||
|
@ -430,6 +434,7 @@ function filter_re(pattern, do_delete)
|
||||||
|
|
||||||
function format_css()
|
function format_css()
|
||||||
{
|
{
|
||||||
|
console.log("Formatting CSS variables");
|
||||||
var css = CSS;
|
var css = CSS;
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
@ -438,22 +443,24 @@ function format_css()
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
console.log(matches);
|
||||||
matches = Array.from(new Set(matches));
|
matches = new Set(matches);
|
||||||
for (var index = 0; index < matches.length; index += 1)
|
/* Originally used Array.from(set) and did regular iteration, but I found
|
||||||
{
|
that sites can override and break that conversion. */
|
||||||
var injector = matches[index];
|
matches.forEach(
|
||||||
var injected = injector.replace(new RegExp("\\$", 'g'), "");
|
function(injector)
|
||||||
/*console.log(injector);*/
|
{
|
||||||
/*console.log(injected);*/
|
var injected = injector.replace(new RegExp("\\$", 'g'), "");
|
||||||
css = css.replace(injector, this[injected]);
|
css = css.replace(injector, this[injected]);
|
||||||
}
|
}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
return css;
|
return css;
|
||||||
}
|
}
|
||||||
|
|
||||||
function get_all_urls()
|
function get_all_urls()
|
||||||
{
|
{
|
||||||
|
console.log("Collecting urls");
|
||||||
var urls = [];
|
var urls = [];
|
||||||
function include(source, attr)
|
function include(source, attr)
|
||||||
{
|
{
|
||||||
|
@ -529,6 +536,7 @@ function get_basename(url)
|
||||||
|
|
||||||
function get_gfycat_video(id)
|
function get_gfycat_video(id)
|
||||||
{
|
{
|
||||||
|
console.log("Resolving gfycat " + id);
|
||||||
var url = "https://gfycat.com/cajax/get/" + id;
|
var url = "https://gfycat.com/cajax/get/" + id;
|
||||||
var request = new XMLHttpRequest();
|
var request = new XMLHttpRequest();
|
||||||
request.answer = null;
|
request.answer = null;
|
||||||
|
@ -595,6 +603,7 @@ function generate_id(length)
|
||||||
function ingest()
|
function ingest()
|
||||||
{
|
{
|
||||||
/* Take the text from the INGEST box, and make odi divs from it */
|
/* Take the text from the INGEST box, and make odi divs from it */
|
||||||
|
console.log("Ingesting");
|
||||||
var odi_divs = get_odi_divs();
|
var odi_divs = get_odi_divs();
|
||||||
var ingestbox = document.getElementById("ingestbox");
|
var ingestbox = document.getElementById("ingestbox");
|
||||||
var text = ingestbox.value;
|
var text = ingestbox.value;
|
||||||
|
@ -622,6 +631,7 @@ function ingest()
|
||||||
|
|
||||||
function lazy_load_all()
|
function lazy_load_all()
|
||||||
{
|
{
|
||||||
|
console.log("Starting lazyload");
|
||||||
lazies = get_lazy_divs();
|
lazies = get_lazy_divs();
|
||||||
lazies.reverse();
|
lazies.reverse();
|
||||||
lazy_buttons = document.getElementsByClassName("load_button");
|
lazy_buttons = document.getElementsByClassName("load_button");
|
||||||
|
|
|
@ -91,8 +91,8 @@ function swap_source(player, source_list)
|
||||||
|
|
||||||
function main()
|
function main()
|
||||||
{
|
{
|
||||||
var WIDTH = 3;
|
var WIDTH = 2;
|
||||||
var HEIGHT = 3;
|
var HEIGHT = 2;
|
||||||
var MEDIAS = get_media_links();
|
var MEDIAS = get_media_links();
|
||||||
|
|
||||||
clear_page();
|
clear_page();
|
||||||
|
|
|
@ -1,49 +1,52 @@
|
||||||
Open Dir DL
|
Open Dir DL
|
||||||
===========
|
===========
|
||||||
|
|
||||||
The open directory downloader
|
The open directory downloader.
|
||||||
|
|
||||||
Requires `pip install beautifulsoup4`
|
Requires `pip install beautifulsoup4`.
|
||||||
|
|
||||||
See inside opendirdl.py for usage instructions.
|
See inside opendirdl.py for usage instructions.
|
||||||
|
|
||||||
|
- 2016 08 16
|
||||||
|
- **[cleanup]** Now that Downloady uses temp files for incomplete downloads, that logic can be removed from opendirdl.
|
||||||
|
|
||||||
- 2016 08 10
|
- 2016 08 10
|
||||||
- Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
|
- **[addition]** Added clickable links to each directory on HTML tree pages.
|
||||||
- Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
|
- **[bugfix]** Fixed bug in smart_insert caused by 404's being considered falsey, triggering the 'one and only one' exception.
|
||||||
- Added clickable links to each directory on HTML tree pages.
|
- **[bugfix]** Fixed bug in smart_insert where 404'd URLs were not being deleted from the database.
|
||||||
|
|
||||||
- 2016 08 02
|
- 2016 08 02
|
||||||
- Removed the usage of div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
|
- **[cleanup]** Removed the need for div IDs on the Tree pages by making the collapse button use `this.nextSibling`.
|
||||||
- Rewrote `build_file_tree` with a way simpler algorithm.
|
- **[cleanup]** Rewrote `build_file_tree` with a way simpler algorithm.
|
||||||
- Removed the ability to set a Node's parent during `__init__` because it wasn't fully fleshed out and doesn't need to be used anyway.
|
- **[removal]** Removed the ability to set a Node's parent during `__init__` because it wasn't fully fleshed out and doesn't need to be used anyway.
|
||||||
|
|
||||||
- 2016 08 01
|
- 2016 08 01
|
||||||
- Made the digest work even if you forget the http://
|
- **[addition]** Made the digest work even if you forget the http://
|
||||||
|
|
||||||
- 2016 07 29
|
- 2016 07 29
|
||||||
- Moved some nested function definitions out to the top level, and made the construction of the file tree its own function. These functions really don't need to be used on their own, but they were cluttering the logic of the `tree` command.
|
- **[change]** Moved some nested function definitions out to the top level, and made the construction of the file tree its own function. These functions really don't need to be used on their own, but they were cluttering the logic of the `tree` command.
|
||||||
- Renamed `Tree.listnodes` to `Tree.list_children` and the `customsort` now expects to operate on Node objects rather than `(identifier, Node)` tuples. Nodes already have their identifier so the tuple was unecessary.
|
- **[change]** Renamed `Tree.listnodes` to `Tree.list_children`, and the `customsort` now expects to operate on Node objects rather than `(identifier, Node)` tuples. Nodes already have their identifier so the tuple was unecessary.
|
||||||
- Removed `Tree.sorted_children` since it was basically a duplicate of `Tree.listnodes` and I don't know why I had both.
|
- **[change]** Replaced local `download_file` function with a call to `downloady.download_file`. It supports download continuation and removes duplicate work.
|
||||||
- Replaced all `safeprint` calls with `write` because it provides access to safeprint as well as file writing if needed.
|
- **[cleanup]** Replaced all `safeprint` calls with `write` because it provides access to safeprint as well as file writing if needed.
|
||||||
- Replaced local `download_file` function with a call to `downloady.download_file`. It supports download continuation and removes duplicate work.
|
- **[removal]** Removed `Tree.sorted_children` since it was basically a duplicate of `Tree.listnodes` and I don't know why I had both.
|
||||||
|
|
||||||
- 2016 07 25
|
- 2016 07 25
|
||||||
- Removed the `Downloader` class after watching [this Jack Diederich talk](https://youtu.be/o9pEzgHorH0) about unecessary classes.
|
- **[change]** Bytespersecond is now parsed by `bytestring.parsebytes` rather than `eval`, so you can write "100k" as opposed to "100 * 1024" etc.
|
||||||
- Bytespersecond is now parsed by `bytestring.parsebytes` rather than `eval`, so you can write "100k" as opposed to "100 * 1024" etc.
|
- **[removal]** Removed the `Downloader` class after watching [this Jack Diederich talk](https://youtu.be/o9pEzgHorH0) about unecessary classes.
|
||||||
|
|
||||||
- 2016 07 19
|
- 2016 07 19
|
||||||
- Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
|
- **[addition]** Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
|
||||||
- Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions used by the argparser. *Does not affect the commandline usage!*
|
- **[bugfix]** Fixed the allowed characters parameter of `filepath_sanitize`, which was not written correctly but worked out of luck.
|
||||||
- Gave the HTML tree divs a very gentle shadow and alternating colors to help with depth perception.
|
- **[cleanup]** Rearranged the big blocks to be in a logical order rather than alphabetical order. Walker > Downloader > other classes
|
||||||
- Fixed some mismatched code vs comments
|
- **[cleanup]** Renamed the `keep_pattern` and `remove_pattern` functions to `keep_pattern_argparse` etc to be consistent with the other functions used by the argparser. *Does not affect the commandline usage!*
|
||||||
- Fixed the allowed characters parameter of `filepath_sanitize`, which was not written correctly but worked out of luck.
|
- **[cleanup]** Fixed some mismatched code vs comments
|
||||||
|
|
||||||
- 2016 07 08
|
- 2016 07 08
|
||||||
- Fixed bug in which trees wouldn't generate on server:port urls.
|
- **[bugfix]** Fixed bug in which trees wouldn't generate on server:port urls.
|
||||||
|
|
||||||
- 2016 07 04
|
- 2016 07 04
|
||||||
- Added new argparse command "tree"
|
- **[addition]** Added new argparse command "tree"
|
||||||
|
|
||||||
- 2016 02 08
|
- 2016 02 08
|
||||||
- Fixed bug where server:port urls did not create db files because of the colon. It's been replaced by a hash.
|
- **[bugfix]** Fixed bug where server:port urls did not create db files because of the colon. It's been replaced by a hash.
|
||||||
- Moved db commits to only happen at the end of a digest.
|
- **[change]** Moved db commits to only happen at the end of a digest.
|
||||||
|
|
|
@ -614,7 +614,7 @@ def fetch_generator(cur):
|
||||||
|
|
||||||
def filepath_sanitize(text, allowed=''):
|
def filepath_sanitize(text, allowed=''):
|
||||||
badchars = FILENAME_BADCHARS
|
badchars = FILENAME_BADCHARS
|
||||||
badchars = ''.join(char for char in FILENAME_BADCHARS if char not in allowed)
|
badchars = set(char for char in FILENAME_BADCHARS if char not in allowed)
|
||||||
text = ''.join(char for char in text if char not in badchars)
|
text = ''.join(char for char in text if char not in badchars)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
@ -886,32 +886,16 @@ def download(
|
||||||
folder = os.path.join(outputdir, url_filepath['folder'])
|
folder = os.path.join(outputdir, url_filepath['folder'])
|
||||||
os.makedirs(folder, exist_ok=True)
|
os.makedirs(folder, exist_ok=True)
|
||||||
|
|
||||||
final_fullname = os.path.join(folder, url_filepath['filename'])
|
fullname = os.path.join(folder, url_filepath['filename'])
|
||||||
temporary_basename = hashit(url, 16) + '.oddltemporary'
|
|
||||||
temporary_fullname = os.path.join(folder, temporary_basename)
|
|
||||||
|
|
||||||
# Because we use .oddltemporary files, the behavior of `overwrite` here
|
write('Downloading "%s"' % fullname)
|
||||||
# is different than the behavior of `overwrite` in downloady.
|
|
||||||
# The overwrite used in the following block refers to the finalized file.
|
|
||||||
# The overwrite passed to downloady refers to the oddltemporary which
|
|
||||||
# may be resumed.
|
|
||||||
if os.path.isfile(final_fullname):
|
|
||||||
if overwrite:
|
|
||||||
os.remove(final_fullname)
|
|
||||||
else:
|
|
||||||
write('Skipping "%s". Use `--overwrite`' % final_fullname)
|
|
||||||
continue
|
|
||||||
|
|
||||||
overwrite = overwrite or None
|
|
||||||
write('Downloading "%s" as "%s"' % (final_fullname, temporary_basename))
|
|
||||||
downloady.download_file(
|
downloady.download_file(
|
||||||
url,
|
url,
|
||||||
localname=temporary_fullname,
|
localname=fullname,
|
||||||
bytespersecond=bytespersecond,
|
bytespersecond=bytespersecond,
|
||||||
callback_progress=downloady.progress2,
|
callback_progress=downloady.progress2,
|
||||||
overwrite=overwrite
|
overwrite=overwrite
|
||||||
)
|
)
|
||||||
os.rename(temporary_fullname, final_fullname)
|
|
||||||
|
|
||||||
def download_argparse(args):
|
def download_argparse(args):
|
||||||
return download(
|
return download(
|
||||||
|
|
|
@ -7,7 +7,6 @@ class Path:
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
path = os.path.normpath(path)
|
path = os.path.normpath(path)
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
path = get_path_casing(path)
|
|
||||||
self.absolute_path = path
|
self.absolute_path = path
|
||||||
|
|
||||||
def __contains__(self, other):
|
def __contains__(self, other):
|
||||||
|
@ -23,6 +22,10 @@ class Path:
|
||||||
def basename(self):
|
def basename(self):
|
||||||
return os.path.basename(self.absolute_path)
|
return os.path.basename(self.absolute_path)
|
||||||
|
|
||||||
|
def correct_case(self):
|
||||||
|
self.absolute_path = get_path_casing(self.absolute_path)
|
||||||
|
return self.absolute_path
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def exists(self):
|
def exists(self):
|
||||||
return os.path.exists(self.absolute_path)
|
return os.path.exists(self.absolute_path)
|
||||||
|
|
|
@ -14,6 +14,7 @@ sys.path.append('C:\\git\\else\\Ratelimiter'); import ratelimiter
|
||||||
sys.path.append('C:\\git\\else\\SpinalTap'); import spinal
|
sys.path.append('C:\\git\\else\\SpinalTap'); import spinal
|
||||||
|
|
||||||
FILE_READ_CHUNK = bytestring.MIBIBYTE
|
FILE_READ_CHUNK = bytestring.MIBIBYTE
|
||||||
|
RATELIMITER = ratelimiter.Ratelimiter(16 * bytestring.MIBIBYTE)
|
||||||
|
|
||||||
# The paths which the user may access.
|
# The paths which the user may access.
|
||||||
# Attempting to access anything outside will 403.
|
# Attempting to access anything outside will 403.
|
||||||
|
@ -98,6 +99,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
|
||||||
if isinstance(data, types.GeneratorType):
|
if isinstance(data, types.GeneratorType):
|
||||||
for chunk in data:
|
for chunk in data:
|
||||||
self.wfile.write(chunk)
|
self.wfile.write(chunk)
|
||||||
|
RATELIMITER.limit(len(chunk))
|
||||||
else:
|
else:
|
||||||
self.wfile.write(data)
|
self.wfile.write(data)
|
||||||
|
|
||||||
|
|
|
@ -350,16 +350,17 @@ def copy_file(
|
||||||
|
|
||||||
source = str_to_fp(source)
|
source = str_to_fp(source)
|
||||||
|
|
||||||
|
if not source.is_file:
|
||||||
|
raise SourceNotFile(source)
|
||||||
|
|
||||||
if destination_new_root is not None:
|
if destination_new_root is not None:
|
||||||
|
source.correct_case()
|
||||||
destination = new_root(source, destination_new_root)
|
destination = new_root(source, destination_new_root)
|
||||||
destination = str_to_fp(destination)
|
destination = str_to_fp(destination)
|
||||||
|
|
||||||
callback = callback or do_nothing
|
callback = callback or do_nothing
|
||||||
callback_verbose = callback_verbose or do_nothing
|
callback_verbose = callback_verbose or do_nothing
|
||||||
|
|
||||||
if not source.is_file:
|
|
||||||
raise SourceNotFile(source)
|
|
||||||
|
|
||||||
if destination.is_dir:
|
if destination.is_dir:
|
||||||
raise DestinationIsDirectory(destination)
|
raise DestinationIsDirectory(destination)
|
||||||
|
|
||||||
|
|
65
ThreadedDL/threaded_dl.py
Normal file
65
ThreadedDL/threaded_dl.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
sys.path.append('C:\\git\\else\\clipext'); import clipext
|
||||||
|
sys.path.append('C:\\git\\else\\downloady'); import downloady
|
||||||
|
|
||||||
|
def remove_finished(threads):
|
||||||
|
threads = [t for t in threads if t.is_alive()]
|
||||||
|
return threads
|
||||||
|
|
||||||
|
def download_thread(url, filename_prefix=''):
|
||||||
|
url = url.strip()
|
||||||
|
if url == '':
|
||||||
|
return
|
||||||
|
|
||||||
|
basename = downloady.basename_from_url(url)
|
||||||
|
basename = filename_prefix + basename
|
||||||
|
if os.path.exists(basename):
|
||||||
|
print('Skipping existing file "%s"' % basename)
|
||||||
|
return
|
||||||
|
print('Starting "%s"' % basename)
|
||||||
|
downloady.download_file(url, basename)
|
||||||
|
print('Finished "%s"' % basename)
|
||||||
|
|
||||||
|
def listget(li, index, fallback):
|
||||||
|
try:
|
||||||
|
return li[index]
|
||||||
|
except IndexError:
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
def threaded_dl(urls, thread_count=4):
|
||||||
|
threads = []
|
||||||
|
prefix_digits = len(str(len(urls)))
|
||||||
|
prefix_text = '%0{digits}d_'.format(digits=prefix_digits)
|
||||||
|
for (index, url) in enumerate(urls):
|
||||||
|
while len(threads) == thread_count:
|
||||||
|
threads = remove_finished(threads)
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
prefix = prefix_text % index
|
||||||
|
t = threading.Thread(target=download_thread, args=[url, prefix])
|
||||||
|
t.daemon = True
|
||||||
|
threads.append(t)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
while len(threads) > 0:
|
||||||
|
threads = remove_finished(threads)
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
filename = sys.argv[1]
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
f = open(filename, 'r')
|
||||||
|
with f:
|
||||||
|
urls = f.read()
|
||||||
|
urls = urls.split()
|
||||||
|
else:
|
||||||
|
urls = clipext.resolve(filename)
|
||||||
|
urls = urls.split()
|
||||||
|
threaded_dl(urls)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in a new issue