else
This commit is contained in:
parent
cc68bcbe06
commit
94e54c9861
3 changed files with 68 additions and 41 deletions
Binary file not shown.
|
@ -1,6 +1,10 @@
|
||||||
import traceback
|
import traceback
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import totaldl
|
import totaldl
|
||||||
|
import praw
|
||||||
|
|
||||||
|
r = praw.Reddit('')
|
||||||
|
r.login('', '')
|
||||||
|
|
||||||
sql = sqlite3.connect('!!testdata.db')
|
sql = sqlite3.connect('!!testdata.db')
|
||||||
cur = sql.cursor()
|
cur = sql.cursor()
|
||||||
|
@ -25,7 +29,13 @@ while True:
|
||||||
if len(title) > 35:
|
if len(title) > 35:
|
||||||
title = title[:34] + '-'
|
title = title[:34] + '-'
|
||||||
try:
|
try:
|
||||||
totaldl.handle_master(url, customname=title)
|
filepath = totaldl.handle_master(url, customname=title)
|
||||||
|
filepath = filepath.split('/')[-1]
|
||||||
|
if '.mp4' in filepath:
|
||||||
|
filepath = 'http://syriancivilwar.pw/Videos/' + filepath
|
||||||
|
submission = r.get_info(thing_id=item[1])
|
||||||
|
submission.add_comment('Mirror: %s' % filepath)
|
||||||
|
print(filepath)
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
cur2.execute('INSERT INTO totaldl_urls VALUES(?)', [url])
|
cur2.execute('INSERT INTO totaldl_urls VALUES(?)', [url])
|
||||||
|
|
|
@ -10,7 +10,8 @@ DOWNLOAD_DIRECTORY = ''
|
||||||
# Save files to this folder
|
# Save files to this folder
|
||||||
# If blank, it uses the local folder
|
# If blank, it uses the local folder
|
||||||
|
|
||||||
IMGUR_ALBUM_INDV = '"og:image"content="htt'
|
IMGUR_ALBUM_INDV = 'Viewfullresolution<'
|
||||||
|
IMGUR_ALBUM_INDV2 = 'linkrel="image_src"'
|
||||||
# The HTML string which tells us that an image link is
|
# The HTML string which tells us that an image link is
|
||||||
# on this line.
|
# on this line.
|
||||||
|
|
||||||
|
@ -42,6 +43,9 @@ LIVELEAK_YOUTUBEIFRAME = 'youtube.com/embed'
|
||||||
|
|
||||||
LIVELEAK_RESOLUTIONS = ['h264_base', 'h264_720p', 'h264_270p']
|
LIVELEAK_RESOLUTIONS = ['h264_base', 'h264_720p', 'h264_270p']
|
||||||
|
|
||||||
|
YOUTUBE_DL_FORMAT = 'youtube-dl "{url}" --no-playlist --force-ipv4 -o "/{dir}/{name}.%(ext)s"'
|
||||||
|
# The format for the youtube-dl shell command
|
||||||
|
|
||||||
DO_GENERIC = True
|
DO_GENERIC = True
|
||||||
# If true, attempt to download whatever URL goes in
|
# If true, attempt to download whatever URL goes in
|
||||||
# Else, only download from the explicitly supported sites
|
# Else, only download from the explicitly supported sites
|
||||||
|
@ -73,7 +77,7 @@ def download_file(url, localname):
|
||||||
for chunk in downloading.iter_content(chunk_size=1024):
|
for chunk in downloading.iter_content(chunk_size=1024):
|
||||||
if chunk:
|
if chunk:
|
||||||
localfile.write(chunk)
|
localfile.write(chunk)
|
||||||
return True
|
return localname
|
||||||
|
|
||||||
def request_get(url, stream=False):
|
def request_get(url, stream=False):
|
||||||
global last_request
|
global last_request
|
||||||
|
@ -93,11 +97,18 @@ def request_get(url, stream=False):
|
||||||
def handle_imgur_html(url):
|
def handle_imgur_html(url):
|
||||||
pagedata = request_get(url)
|
pagedata = request_get(url)
|
||||||
pagedata = pagedata.text.replace(' ', '')
|
pagedata = pagedata.text.replace(' ', '')
|
||||||
|
pagedata = pagedata.replace('src="', 'href="')
|
||||||
|
pagedata = pagedata.replace(IMGUR_ALBUM_INDV2, IMGUR_ALBUM_INDV)
|
||||||
pagedata = pagedata.split('\n')
|
pagedata = pagedata.split('\n')
|
||||||
pagedata = [line.strip() for line in pagedata]
|
pagedata = [line.strip() for line in pagedata]
|
||||||
pagedata = [line for line in pagedata if IMGUR_ALBUM_INDV in line]
|
pagedata = [line for line in pagedata if IMGUR_ALBUM_INDV in line]
|
||||||
pagedata = [line.split('"')[-2] for line in pagedata]
|
pagedata = [line.split('href=')[1] for line in pagedata]
|
||||||
|
pagedata = [line.replace('"//', '"http://') for line in pagedata]
|
||||||
|
pagedata = [line.split('"')[1] for line in pagedata]
|
||||||
links = []
|
links = []
|
||||||
|
first = pagedata[0].split('.')[0]
|
||||||
|
if [x.split('.')[0] for x in pagedata].count(first) > 1:
|
||||||
|
pagedata = pagedata[1:]
|
||||||
for image in pagedata:
|
for image in pagedata:
|
||||||
image = image.split('?')[0]
|
image = image.split('?')[0]
|
||||||
if image not in links:
|
if image not in links:
|
||||||
|
@ -110,6 +121,7 @@ def handle_imgur(url, albumid='', customname=None):
|
||||||
# This link doesn't appear to have an image id
|
# This link doesn't appear to have an image id
|
||||||
return
|
return
|
||||||
|
|
||||||
|
url = url.replace('/gallery/', '/a/')
|
||||||
basename = name.split('.')[0]
|
basename = name.split('.')[0]
|
||||||
if '.' in name:
|
if '.' in name:
|
||||||
# This is a direct image link
|
# This is a direct image link
|
||||||
|
@ -130,7 +142,7 @@ def handle_imgur(url, albumid='', customname=None):
|
||||||
else:
|
else:
|
||||||
localpath = name
|
localpath = name
|
||||||
|
|
||||||
download_file(url, localpath)
|
return download_file(url, localpath)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Not a direct image link, let's read the html.
|
# Not a direct image link, let's read the html.
|
||||||
|
@ -138,14 +150,19 @@ def handle_imgur(url, albumid='', customname=None):
|
||||||
if customname:
|
if customname:
|
||||||
name = customname
|
name = customname
|
||||||
print('\tFound %d images' % len(images))
|
print('\tFound %d images' % len(images))
|
||||||
|
|
||||||
|
localfiles = []
|
||||||
if len(images) > 1:
|
if len(images) > 1:
|
||||||
for imagei in range(len(images)):
|
for imagei in range(len(images)):
|
||||||
image = images[imagei]
|
image = images[imagei]
|
||||||
iname = image.split('/')[-1]
|
iname = image.split('/')[-1]
|
||||||
iname = iname.split('.')[0]
|
iname = iname.split('.')[0]
|
||||||
handle_imgur(image, albumid=name, customname='%d_%s' % (imagei, iname))
|
x = handle_imgur(image, albumid=name, customname='%d_%s' % (imagei, iname))
|
||||||
|
localfiles.append(x)
|
||||||
else:
|
else:
|
||||||
handle_imgur(images[0], customname=name)
|
x = handle_imgur(images[0], customname=name)
|
||||||
|
localfiles.append(x)
|
||||||
|
return localfiles
|
||||||
|
|
||||||
|
|
||||||
def handle_gfycat(url, customname=None):
|
def handle_gfycat(url, customname=None):
|
||||||
|
@ -166,8 +183,7 @@ def handle_gfycat(url, customname=None):
|
||||||
for subdomain in GFYCAT_SUBDOMAINS:
|
for subdomain in GFYCAT_SUBDOMAINS:
|
||||||
url = 'http://%s.gfycat.com/%s' % (subdomain, name)
|
url = 'http://%s.gfycat.com/%s' % (subdomain, name)
|
||||||
try:
|
try:
|
||||||
download_file(url, filename)
|
return download_file(url, filename)
|
||||||
break
|
|
||||||
except StatusExc:
|
except StatusExc:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -200,7 +216,7 @@ def handle_vimeo(url, customname=None):
|
||||||
filename = customname + '.mp4'
|
filename = customname + '.mp4'
|
||||||
else:
|
else:
|
||||||
filename = name + '.mp4'
|
filename = name + '.mp4'
|
||||||
download_file(fileurl, filename)
|
return download_file(fileurl, filename)
|
||||||
|
|
||||||
|
|
||||||
def handle_liveleak(url, customname=None):
|
def handle_liveleak(url, customname=None):
|
||||||
|
@ -231,23 +247,24 @@ def handle_liveleak(url, customname=None):
|
||||||
for res in LIVELEAK_RESOLUTIONS:
|
for res in LIVELEAK_RESOLUTIONS:
|
||||||
url = pagedata.replace('LIVELEAKRESOLUTION', res)
|
url = pagedata.replace('LIVELEAKRESOLUTION', res)
|
||||||
try:
|
try:
|
||||||
download_file(url, name)
|
return download_file(url, name)
|
||||||
return
|
|
||||||
except StatusExc:
|
except StatusExc:
|
||||||
pass
|
pass
|
||||||
download_file(original, name)
|
return download_file(original, name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def handle_youtube(url, customname=None):
|
def handle_youtube(url, customname=None):
|
||||||
# The customname doesn't do anything on this function
|
|
||||||
# but handle_master works better if everything uses
|
|
||||||
# the same format.
|
|
||||||
url = url.replace('&', '&')
|
url = url.replace('&', '&')
|
||||||
url = url.replace('feature=player_embedded&', '')
|
url = url.replace('feature=player_embedded&', '')
|
||||||
url = url.replace('&feature=player_embedded', '')
|
url = url.replace('&feature=player_embedded', '')
|
||||||
os.system('youtube-dl "{0}" --no-playlist --force-ipv4 -o "/{1}/%(title)s.%(ext)s"'.format(url, DOWNLOAD_DIRECTORY))
|
if not customname:
|
||||||
|
os.system(YOUTUBE_DL_FORMAT.format(url=url, dir=DOWNLOAD_DIRECTORY, name='%(title)s'))
|
||||||
|
return
|
||||||
|
os.system(YOUTUBE_DL_FORMAT.format(url=url, dir=DOWNLOAD_DIRECTORY, name=customname))
|
||||||
|
if DOWNLOAD_DIRECTORY:
|
||||||
|
return '%s/%s.mp4' % (DOWNLOAD_DIRECTORY, customname)
|
||||||
|
return '%s.mp4' % customname
|
||||||
|
|
||||||
def handle_twitter(url, customname=None):
|
def handle_twitter(url, customname=None):
|
||||||
pagedata = request_get(url)
|
pagedata = request_get(url)
|
||||||
|
@ -260,34 +277,32 @@ def handle_twitter(url, customname=None):
|
||||||
name = idnumber
|
name = idnumber
|
||||||
customname = idnumber
|
customname = idnumber
|
||||||
tweetpath = '%s.html' % (DOWNLOAD_DIRECTORY + name)
|
tweetpath = '%s.html' % (DOWNLOAD_DIRECTORY + name)
|
||||||
if not os.path.exists(tweetpath):
|
psplit = '<p class="TweetTextSize'
|
||||||
psplit = '<p class="TweetTextSize'
|
tweettext = pagedata.split(psplit)[1]
|
||||||
tweettext = pagedata.split(psplit)[1]
|
tweettext = tweettext.split('</p>')[0]
|
||||||
tweettext = tweettext.split('</p>')[0]
|
tweettext = psplit + tweettext + '</p>'
|
||||||
tweettext = psplit + tweettext + '</p>'
|
tweettext = '<html><body>%s</body></html>' % tweettext
|
||||||
tweettext = '<html><body>%s</body></html>' % tweettext
|
tweettext = tweettext.replace('/hashtag/', 'http://twitter.com/hashtag/')
|
||||||
tweettext = tweettext.replace('/hashtag/', 'http://twitter.com/hashtag/')
|
tweethtml = open(tweetpath, 'w', encoding='utf-8')
|
||||||
tweethtml = open(tweetpath, 'w')
|
tweethtml.write(tweettext)
|
||||||
tweethtml.write(tweettext)
|
tweethtml.close()
|
||||||
tweethtml.close()
|
print('\tSaved tweet text')
|
||||||
print('\tSaved tweet text')
|
|
||||||
else:
|
|
||||||
print('\tTweet text already exists')
|
|
||||||
try:
|
try:
|
||||||
link = pagedata.split('data-url="')[1]
|
link = pagedata.split('data-url="')[1]
|
||||||
link = link.split('"')[0]
|
link = link.split('"')[0]
|
||||||
if link != url:
|
if link != url:
|
||||||
handle_master(link, customname=customname)
|
handle_master(link, customname=customname)
|
||||||
return
|
return tweetpath
|
||||||
except IndexError:
|
except IndexError:
|
||||||
try:
|
try:
|
||||||
link = pagedata.split('data-expanded-url="')[1]
|
link = pagedata.split('data-expanded-url="')[1]
|
||||||
link = link.split('"')[0]
|
link = link.split('"')[0]
|
||||||
if link != url:
|
if link != url:
|
||||||
handle_master(link, customname=customname)
|
handle_master(link, customname=customname)
|
||||||
return
|
return tweetpath
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
|
return tweetpath
|
||||||
print('\tNo media detected')
|
print('\tNo media detected')
|
||||||
|
|
||||||
|
|
||||||
|
@ -298,7 +313,7 @@ def handle_generic(url, customname=None):
|
||||||
name = '%s.%s' % (customname, name.split('.')[-1])
|
name = '%s.%s' % (customname, name.split('.')[-1])
|
||||||
if '.' not in name:
|
if '.' not in name:
|
||||||
name += '.html'
|
name += '.html'
|
||||||
download_file(url, name)
|
return download_file(url, name)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
##
|
##
|
||||||
|
@ -318,10 +333,9 @@ def handle_master(url, customname=None):
|
||||||
print('Handling %s' % url)
|
print('Handling %s' % url)
|
||||||
for handlerkey in HANDLERS:
|
for handlerkey in HANDLERS:
|
||||||
if handlerkey.lower() in url.lower():
|
if handlerkey.lower() in url.lower():
|
||||||
HANDLERS[handlerkey](url, customname=customname)
|
return HANDLERS[handlerkey](url, customname=customname)
|
||||||
return
|
|
||||||
if DO_GENERIC:
|
if DO_GENERIC:
|
||||||
handle_generic(url, customname=customname)
|
return handle_generic(url, customname=customname)
|
||||||
|
|
||||||
def test_imgur():
|
def test_imgur():
|
||||||
# Imgur gallery album
|
# Imgur gallery album
|
||||||
|
@ -371,7 +385,7 @@ def test_youtube():
|
||||||
handle_master('https://www.youtube.com/watch?v=bEgeh5hA5ko')
|
handle_master('https://www.youtube.com/watch?v=bEgeh5hA5ko')
|
||||||
|
|
||||||
# Youtube short link
|
# Youtube short link
|
||||||
handle_master('https://youtu.be/GjOBTstnW20')
|
handle_master('https://youtu.be/GjOBTstnW20', customname='youtube')
|
||||||
|
|
||||||
# Youtube player embed link
|
# Youtube player embed link
|
||||||
handle_master('https://www.youtube.com/watch?feature=player_embedded&v=bEgeh5hA5ko')
|
handle_master('https://www.youtube.com/watch?feature=player_embedded&v=bEgeh5hA5ko')
|
||||||
|
@ -395,6 +409,9 @@ def test_twitter():
|
||||||
# Twitter plain text
|
# Twitter plain text
|
||||||
handle_master('https://twitter.com/SyriacMFS/status/556513635913437184')
|
handle_master('https://twitter.com/SyriacMFS/status/556513635913437184')
|
||||||
|
|
||||||
|
# Twitter with arabic characters
|
||||||
|
handle_master('https://twitter.com/HadiAlabdallah/status/600885154991706113')
|
||||||
|
|
||||||
def test_generic():
|
def test_generic():
|
||||||
# Some link that might work
|
# Some link that might work
|
||||||
handle_master('https://raw.githubusercontent.com/voussoir/reddit/master/SubredditBirthdays/show/statistics.txt')
|
handle_master('https://raw.githubusercontent.com/voussoir/reddit/master/SubredditBirthdays/show/statistics.txt')
|
||||||
|
@ -412,8 +429,8 @@ if __name__ == '__main__':
|
||||||
#test_imgur()
|
#test_imgur()
|
||||||
#test_gfycat()
|
#test_gfycat()
|
||||||
#test_vimeo()
|
#test_vimeo()
|
||||||
#test_liveleak()
|
test_liveleak()
|
||||||
#test_youtube()
|
test_youtube()
|
||||||
test_twitter()
|
#test_twitter()
|
||||||
#test_generic()
|
#test_generic()
|
||||||
pass
|
pass
|
Loading…
Reference in a new issue