else
totaldl 1
This commit is contained in:
parent
476c08d2d6
commit
f13db2ae88
4 changed files with 282 additions and 11 deletions
|
@ -5,19 +5,24 @@ Using Heroku to run a bot
|
||||||
|
|
||||||
Inside git.zip is the .git repo that I created for this. I had to zip it so that I can push it here.
|
Inside git.zip is the .git repo that I created for this. I had to zip it so that I can push it here.
|
||||||
|
|
||||||
1. Create a [Heroku account](http://heroku.com)
|
1. Create a [Heroku account](http://heroku.com).
|
||||||
2. Install [Heroku toolbelt](https://toolbelt.heroku.com/)
|
3. Install [Git](http://git-scm.com/).
|
||||||
3. Install [Git](http://git-scm.com/)
|
2. Install [Heroku toolbelt](https://toolbelt.heroku.com/).
|
||||||
4. Create a folder to keep your repo
|
4. Create a folder to keep your repo in.
|
||||||
5. cd into this folder
|
5. Open a terminal and `cd` into that folder.
|
||||||
5. Write your bot
|
5. Write your bot.
|
||||||
6. Create requirements.txt, and require a version of praw
|
6. Create requirements.txt, and require a version of praw.
|
||||||
7. Create runtime.txt, and require a version of Python
|
7. Create runtime.txt, and require a version of Python.
|
||||||
8. Create Procfile, and create a worker that will launch your bot.
|
8. Create Procfile, and create a worker that will launch your bot. Notice that this file does not have an extension.
|
||||||
9. `> heroku login`
|
9. `> heroku login`
|
||||||
|
|
||||||
|
Enter your Heroku credentials.
|
||||||
|
Email: email@email.com
|
||||||
|
Password (typing will be hidden):
|
||||||
|
Authentication successful.
|
||||||
10. `> git init`
|
10. `> git init`
|
||||||
11. `> git add .`
|
11. `> git add .`
|
||||||
12. `> git commit -m "1"`
|
12. `> git commit -m "Commit Message"`
|
||||||
13. `> heroku create`
|
13. `> heroku create`
|
||||||
|
|
||||||
Creating aqueous-plains-9797... done, stack is cedar-14
|
Creating aqueous-plains-9797... done, stack is cedar-14
|
||||||
|
@ -74,3 +79,5 @@ Inside git.zip is the .git repo that I created for this. I had to zip it so that
|
||||||
2015-05-01T00:32:45.316900+00:00 app[worker.1]: All done!
|
2015-05-01T00:32:45.316900+00:00 app[worker.1]: All done!
|
||||||
|
|
||||||
17. Celebrate
|
17. Celebrate
|
||||||
|
|
||||||
|
18. To turn your bot off at any time, simply `> heroku ps:scale worker=0`
|
Binary file not shown.
|
@ -1,16 +1,28 @@
|
||||||
import praw
|
import praw
|
||||||
import time
|
import time
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
print('Logging in.')
|
print('Logging in.')
|
||||||
r = praw.Reddit('Testing praw api usage over Heroku')
|
r = praw.Reddit('Testing praw api usage over Heroku')
|
||||||
r.login('qQGusVuAHezHxhYTiYGm', 'qQGusVuAHezHxhYTiYGm')
|
r.login('qQGusVuAHezHxhYTiYGm', 'qQGusVuAHezHxhYTiYGm')
|
||||||
|
|
||||||
|
print('Loading database')
|
||||||
|
sql = sqlite3.connect('sql.db')
|
||||||
|
cur = sql.cursor()
|
||||||
|
|
||||||
|
cur.execute('CREATE TABLE IF NOT EXISTS subreddits(name TEXT, subscribers INT)')
|
||||||
|
sql.commit()
|
||||||
|
|
||||||
print('Getting subreddit info.')
|
print('Getting subreddit info.')
|
||||||
sub = r.get_subreddit('Goldtesting')
|
sub = r.get_subreddit('Goldtesting')
|
||||||
print('/r/Goldtesting')
|
print('/r/Goldtesting')
|
||||||
print('\tCreated at: %d' % sub.created_utc)
|
print('\tCreated at: %d' % sub.created_utc)
|
||||||
print('\tSubscribers: %d' % sub.subscribers)
|
print('\tSubscribers: %d' % sub.subscribers)
|
||||||
|
|
||||||
|
print('Saving subreddit info.')
|
||||||
|
cur.execute('INSERT INTO subreddits VALUES(?, ?)', ['Goldtesting', sub.subscribers])
|
||||||
|
sql.commit()
|
||||||
|
|
||||||
print('All done!')
|
print('All done!')
|
||||||
while True:
|
while True:
|
||||||
time.sleep(60)
|
time.sleep(60)
|
252
TotalDL/totaldl.py
Normal file
252
TotalDL/totaldl.py
Normal file
|
@ -0,0 +1,252 @@
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'}
|
||||||
|
|
||||||
|
IMGUR_ALBUM_INDV = '"og:image"content="htt'
|
||||||
|
# The HTML string which tells us that an image link is
|
||||||
|
# on this line.
|
||||||
|
|
||||||
|
IMGUR_ALBUMFOLDERS = True
|
||||||
|
# If True, the individual images belonging to an album will be placed
|
||||||
|
# into a folder named after the album, like <album_id>/<img_id>.jpg
|
||||||
|
# Else, files will be named <album_id>_<img_id>.jpg and placed
|
||||||
|
# in the local folder.
|
||||||
|
|
||||||
|
GFYCAT_MP4 = True
|
||||||
|
# If True, download gfycat urls in .mp4
|
||||||
|
# Else, .webm
|
||||||
|
|
||||||
|
GFYCAT_SUBDOMAINS = ['zippy', 'fat', 'giant']
|
||||||
|
|
||||||
|
SLEEPINESS = 2
|
||||||
|
# The number of seconds to wait in between making requests
|
||||||
|
# Similar to PRAW's ratelimit handling.
|
||||||
|
# Not required, but probably better for the environment.
|
||||||
|
|
||||||
|
VIMEO_DICT_START = '"files":{"h264":'
|
||||||
|
VIMEO_DICT_END = ',"hls"'
|
||||||
|
# The HTML string which tells us where the mp4 file is
|
||||||
|
|
||||||
|
VIMEO_PRIORITY = ['hd', 'sd', 'mobile']
|
||||||
|
# Download files in this priority
|
||||||
|
|
||||||
|
DO_GENERIC = True
|
||||||
|
# If true, attempt to download whatever URL goes in
|
||||||
|
# Else, only download from the explicitly supported sites
|
||||||
|
|
||||||
|
''' End user config '''
|
||||||
|
|
||||||
|
last_request = 0
|
||||||
|
|
||||||
|
class StatusExc(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def download_file(url, localname):
|
||||||
|
if os.path.exists(localname):
|
||||||
|
print('\t%s already exists!!' % localname)
|
||||||
|
return
|
||||||
|
print('\tDownloading %s' % localname)
|
||||||
|
downloading = request_get(url, stream=True)
|
||||||
|
localfile = open(localname, 'wb')
|
||||||
|
for chunk in downloading.iter_content(chunk_size=1024):
|
||||||
|
if chunk:
|
||||||
|
localfile.write(chunk)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def request_get(url, stream=False):
|
||||||
|
global last_request
|
||||||
|
now = time.time()
|
||||||
|
diff = now - last_request
|
||||||
|
if diff < SLEEPINESS:
|
||||||
|
diff = SLEEPINESS - diff
|
||||||
|
time.sleep(diff)
|
||||||
|
last_request = time.time()
|
||||||
|
req = requests.get(url, stream=stream, headers=HEADERS)
|
||||||
|
if req.status_code != 200:
|
||||||
|
raise StatusExc("Status code %d on url %s" % (req.status_code, url))
|
||||||
|
return req
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
##
|
||||||
|
def handle_imgur_html(url):
|
||||||
|
pagedata = request_get(url)
|
||||||
|
pagedata = pagedata.text.replace(' ', '')
|
||||||
|
pagedata = pagedata.split('\n')
|
||||||
|
pagedata = [line.strip() for line in pagedata]
|
||||||
|
pagedata = [line for line in pagedata if IMGUR_ALBUM_INDV in line]
|
||||||
|
pagedata = [line.split('"')[-2] for line in pagedata]
|
||||||
|
links = []
|
||||||
|
for image in pagedata:
|
||||||
|
image = image.split('?')[0]
|
||||||
|
if image not in links:
|
||||||
|
links.append(image)
|
||||||
|
return links
|
||||||
|
|
||||||
|
def handle_imgur(url, albumid=''):
|
||||||
|
name = url.split('/')[-1]
|
||||||
|
if 'imgur.com' in name:
|
||||||
|
# This link doesn't appear to have an image id
|
||||||
|
return
|
||||||
|
|
||||||
|
basename = name.split('.')[0]
|
||||||
|
if '.' in name:
|
||||||
|
# This is a direct image link
|
||||||
|
if IMGUR_ALBUMFOLDERS and albumid and albumid != basename:
|
||||||
|
if not os.path.exists(albumid):
|
||||||
|
os.makedirs(albumid)
|
||||||
|
localpath = '%s\\%s' % (albumid, name)
|
||||||
|
|
||||||
|
elif albumid and albumid != basename:
|
||||||
|
localpath = '%s_%s' % (albumid, name)
|
||||||
|
|
||||||
|
else:
|
||||||
|
localpath = name
|
||||||
|
|
||||||
|
download_file(url, localpath)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Not a direct image link, let's read the html.
|
||||||
|
images = handle_imgur_html(url)
|
||||||
|
print('\tFound %d images' % len(images))
|
||||||
|
for image in images:
|
||||||
|
handle_imgur(image, albumid=name)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_gfycat(url):
|
||||||
|
name = url.split('/')[-1]
|
||||||
|
name = name.split('.')[0]
|
||||||
|
if GFYCAT_MP4:
|
||||||
|
name += '.mp4'
|
||||||
|
else:
|
||||||
|
name += '.webm'
|
||||||
|
for subdomain in GFYCAT_SUBDOMAINS:
|
||||||
|
url = 'http://%s.gfycat.com/%s' % (subdomain, name)
|
||||||
|
try:
|
||||||
|
download_file(url, name)
|
||||||
|
break
|
||||||
|
except StatusExc:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def handle_vimeo(url):
|
||||||
|
name = url.split('/')[-1]
|
||||||
|
name = name.split('?')[0]
|
||||||
|
try:
|
||||||
|
int(name)
|
||||||
|
except ValueError as e:
|
||||||
|
print('Could not identify filename of %s' % url)
|
||||||
|
raise e
|
||||||
|
url = 'http://player.vimeo.com/video/%s' % name
|
||||||
|
pagedata = request_get(url)
|
||||||
|
pagedata = pagedata.text
|
||||||
|
pagedata = pagedata.replace('</script>', '<script')
|
||||||
|
pagedata = pagedata.split('<script>')
|
||||||
|
for chunk in pagedata:
|
||||||
|
if VIMEO_DICT_START in chunk:
|
||||||
|
break
|
||||||
|
chunk = chunk.split(VIMEO_DICT_START)[1]
|
||||||
|
chunk = chunk.split(VIMEO_DICT_END)[0]
|
||||||
|
chunk = json.loads(chunk)
|
||||||
|
|
||||||
|
for priority in VIMEO_PRIORITY:
|
||||||
|
if priority in chunk:
|
||||||
|
fileurl = chunk[priority]['url']
|
||||||
|
break
|
||||||
|
filename = name + '.mp4'
|
||||||
|
download_file(fileurl, filename)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_liveleak(url):
|
||||||
|
filename = url.split('=')[1]
|
||||||
|
filename += '.mp4'
|
||||||
|
pagedata = request_get(url)
|
||||||
|
pagedata = pagedata.text
|
||||||
|
pagedata = pagedata.split('file: "')[1]
|
||||||
|
pagedata = pagedata.split('",')[0]
|
||||||
|
pagedata = pagedata.split('.')
|
||||||
|
for spoti in range(len(pagedata)):
|
||||||
|
if 'h264_' in pagedata[spoti]:
|
||||||
|
pagedata[spoti] = 'h264_720p'
|
||||||
|
pagedata = '.'.join(pagedata)
|
||||||
|
download_file(pagedata, filename)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_youtube(url):
|
||||||
|
os.system('youtube-dl %s --force-ipv4' % url)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_generic(url):
|
||||||
|
try:
|
||||||
|
name = url.split('/')[-1]
|
||||||
|
download_file(url, name)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
##
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
HANDLERS = {
|
||||||
|
'imgur.com': handle_imgur,
|
||||||
|
'gfycat.com': handle_gfycat,
|
||||||
|
'vimeo.com': handle_vimeo,
|
||||||
|
'liveleak.com': handle_liveleak,
|
||||||
|
'youtube.com': handle_youtube,
|
||||||
|
'youtu.be': handle_youtube
|
||||||
|
}
|
||||||
|
|
||||||
|
def handle_master(url):
|
||||||
|
print('Handling %s' % url)
|
||||||
|
for handlerkey in HANDLERS:
|
||||||
|
if handlerkey.lower() in url.lower():
|
||||||
|
HANDLERS[handlerkey](url)
|
||||||
|
return
|
||||||
|
if DO_GENERIC:
|
||||||
|
handle_generic(url)
|
||||||
|
|
||||||
|
def test(imgur=True, gfycat=True, vimeo=True, liveleak=True, youtube=True, generic=True):
|
||||||
|
print('Testing')
|
||||||
|
if imgur:
|
||||||
|
# Imgur gallery album
|
||||||
|
handle_master('http://imgur.com/gallery/s4WLG')
|
||||||
|
|
||||||
|
# Imgur album
|
||||||
|
handle_master('http://imgur.com/a/s4WLG')
|
||||||
|
|
||||||
|
# Imgur indirect single
|
||||||
|
handle_master('http://imgur.com/gvJUct0')
|
||||||
|
|
||||||
|
# Imgur direct single
|
||||||
|
handle_master('http://i.imgur.com/gvJUct0.jpg')
|
||||||
|
|
||||||
|
if gfycat:
|
||||||
|
# Gfycat direct .gif
|
||||||
|
handle_master('http://giant.gfycat.com/FatherlyBruisedIberianchiffchaff.gif')
|
||||||
|
|
||||||
|
# Gfycat general link
|
||||||
|
handle_master('http://www.gfycat.com/RawWetFlatcoatretriever')
|
||||||
|
|
||||||
|
if vimeo:
|
||||||
|
# Vimeo standard link
|
||||||
|
handle_master('https://vimeo.com/109405701')
|
||||||
|
|
||||||
|
if liveleak:
|
||||||
|
# LiveLeak standard link
|
||||||
|
handle_master('http://www.liveleak.com/view?i=9d1_1429192014')
|
||||||
|
|
||||||
|
if youtube:
|
||||||
|
# Youtube standard link
|
||||||
|
handle_master('https://www.youtube.com/watch?v=bEgeh5hA5ko')
|
||||||
|
|
||||||
|
# Youtube short link
|
||||||
|
handle_master('https://youtu.be/GjOBTstnW20')
|
||||||
|
|
||||||
|
if generic:
|
||||||
|
# Some link that might work
|
||||||
|
handle_master('https://raw.githubusercontent.com/voussoir/reddit/master/SubredditBirthdays/show/statistics.txt')
|
||||||
|
|
||||||
|
# Some link that might work
|
||||||
|
handle_master('https://github.com/voussoir/reddit/tree/master/SubredditBirthdays/show')
|
||||||
|
|
||||||
|
test()
|
Loading…
Reference in a new issue