else

totaldl 1
2015-05-14 22:40:19 -07:00 · 2015-05-14 22:40:19 -07:00 · f13db2ae88
commit f13db2ae88
parent 476c08d2d6
4 changed files with 282 additions and 11 deletions
--- a/HerokuBot/README.md
+++ b/HerokuBot/README.md
@ -5,19 +5,24 @@ Using Heroku to run a bot
 Inside git.zip is the .git repo that I created for this. I had to zip it so that I can push it here.
-1. Create a [Heroku account](http://heroku.com)
+1. Create a [Heroku account](http://heroku.com).
-2. Install [Heroku toolbelt](https://toolbelt.heroku.com/)
+3. Install [Git](http://git-scm.com/).
-3. Install [Git](http://git-scm.com/)
+2. Install [Heroku toolbelt](https://toolbelt.heroku.com/).
-4. Create a folder to keep your repo
+4. Create a folder to keep your repo in.
-5. cd into this folder
+5. Open a terminal and `cd` into that folder.
-5. Write your bot
+5. Write your bot.
-6. Create requirements.txt, and require a version of praw
+6. Create requirements.txt, and require a version of praw.
-7. Create runtime.txt, and require a version of Python
+7. Create runtime.txt, and require a version of Python.
-8. Create Procfile, and create a worker that will launch your bot.
+8. Create Procfile, and create a worker that will launch your bot. Notice that this file does not have an extension.
 9. `> heroku login`
    	Enter your Heroku credentials.
 		Email: email@email.com
 		Password (typing will be hidden):
 		Authentication successful.
 10. `> git init`
 11. `> git add .`
-12. `> git commit -m "1"`
+12. `> git commit -m "Commit Message"`
 13. `> heroku create`
        Creating aqueous-plains-9797... done, stack is cedar-14
@ -74,3 +79,5 @@ Inside git.zip is the .git repo that I created for this. I had to zip it so that
 	    2015-05-01T00:32:45.316900+00:00 app[worker.1]: All done!
 17. Celebrate
 18. To turn your bot off at any time, simply `> heroku ps:scale worker=0`
--- a/HerokuBot/git.zip
+++ b/HerokuBot/git.zip
--- a/HerokuBot/herokubot.py
+++ b/HerokuBot/herokubot.py
@ -1,16 +1,28 @@
 import praw
 import time
 import sqlite3
 print('Logging in.')
 r = praw.Reddit('Testing praw api usage over Heroku')
 r.login('qQGusVuAHezHxhYTiYGm', 'qQGusVuAHezHxhYTiYGm')
 print('Loading database')
 sql = sqlite3.connect('sql.db')
 cur = sql.cursor()
 cur.execute('CREATE TABLE IF NOT EXISTS subreddits(name TEXT, subscribers INT)')
 sql.commit()
 print('Getting subreddit info.')
 sub = r.get_subreddit('Goldtesting')
 print('/r/Goldtesting')
 print('\tCreated at: %d' % sub.created_utc)
 print('\tSubscribers: %d' % sub.subscribers)
 print('Saving subreddit info.')
 cur.execute('INSERT INTO subreddits VALUES(?, ?)', ['Goldtesting', sub.subscribers])
 sql.commit()
 print('All done!')
 while True:
 	time.sleep(60)
--- a/TotalDL/totaldl.py
+++ b/TotalDL/totaldl.py
@ -0,0 +1,252 @@
 import json
 import requests
 import os
 import time
 HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'}
 IMGUR_ALBUM_INDV = '"og:image"content="htt'
 # The HTML string which tells us that an image link is
 # on this line.
 IMGUR_ALBUMFOLDERS = True
 # If True, the individual images belonging to an album will be placed
 #  into a folder named after the album, like <album_id>/<img_id>.jpg
 # Else, files will be named <album_id>_<img_id>.jpg and placed
 #  in the local folder.
 GFYCAT_MP4 = True
 # If True, download gfycat urls in .mp4
 # Else, .webm
 GFYCAT_SUBDOMAINS = ['zippy', 'fat', 'giant']
 SLEEPINESS = 2
 # The number of seconds to wait in between making requests
 # Similar to PRAW's ratelimit handling.
 # Not required, but probably better for the environment.
 VIMEO_DICT_START = '"files":{"h264":'
 VIMEO_DICT_END = ',"hls"'
 # The HTML string which tells us where the mp4 file is
 VIMEO_PRIORITY = ['hd', 'sd', 'mobile']
 # Download files in this priority
 DO_GENERIC = True
 # If true, attempt to download whatever URL goes in
 # Else, only download from the explicitly supported sites
 ''' End user config '''
 last_request = 0
 class StatusExc(Exception):
 	pass
 def download_file(url, localname):
 	if os.path.exists(localname):
 		print('\t%s already exists!!' % localname)
 		return
 	print('\tDownloading %s' % localname)
 	downloading = request_get(url, stream=True)
 	localfile = open(localname, 'wb')
 	for chunk in downloading.iter_content(chunk_size=1024):
 		if chunk:
 			localfile.write(chunk)
 	return True
 def request_get(url, stream=False):
 	global last_request
 	now = time.time()
 	diff = now - last_request
 	if diff < SLEEPINESS:
 		diff = SLEEPINESS - diff
 		time.sleep(diff)
 	last_request = time.time()
 	req = requests.get(url, stream=stream, headers=HEADERS)
 	if req.status_code != 200:
 		raise StatusExc("Status code %d on url %s" % (req.status_code, url))
 	return req
 ##############################################################################
                                                                            ##
 def handle_imgur_html(url):
 	pagedata = request_get(url)
 	pagedata = pagedata.text.replace(' ', '')
 	pagedata = pagedata.split('\n')
 	pagedata = [line.strip() for line in pagedata]
 	pagedata = [line for line in pagedata if IMGUR_ALBUM_INDV in line]
 	pagedata = [line.split('"')[-2] for line in pagedata]
 	links = []
 	for image in pagedata:
 		image = image.split('?')[0]
 		if image not in links:
 			links.append(image)
 	return links
 def handle_imgur(url, albumid=''):
 	name = url.split('/')[-1]
 	if 'imgur.com' in name:
 		# This link doesn't appear to have an image id
 		return
 	basename = name.split('.')[0]
 	if '.' in name:
 		# This is a direct image link
 		if IMGUR_ALBUMFOLDERS and albumid and albumid != basename:
 			if not os.path.exists(albumid):
 				os.makedirs(albumid)
 			localpath = '%s\\%s' % (albumid, name)
 		elif albumid and albumid != basename:
 			localpath = '%s_%s' % (albumid, name)
 		else:
 			localpath = name
 		download_file(url, localpath)
 	else:
 		# Not a direct image link, let's read the html.
 		images = handle_imgur_html(url)
 		print('\tFound %d images' % len(images))
 		for image in images:
 			handle_imgur(image, albumid=name)
 def handle_gfycat(url):
 	name = url.split('/')[-1]
 	name = name.split('.')[0]
 	if GFYCAT_MP4:
 		name += '.mp4'
 	else:
 		name += '.webm'
 	for subdomain in GFYCAT_SUBDOMAINS:
 		url = 'http://%s.gfycat.com/%s' % (subdomain, name)
 		try:
 			download_file(url, name)
 			break
 		except StatusExc:
 			pass
 def handle_vimeo(url):
 	name = url.split('/')[-1]
 	name = name.split('?')[0]
 	try:
 		int(name)
 	except ValueError as e:
 		print('Could not identify filename of %s' % url)
 		raise e
 	url = 'http://player.vimeo.com/video/%s' % name
 	pagedata = request_get(url)
 	pagedata = pagedata.text
 	pagedata = pagedata.replace('</script>', '<script')
 	pagedata = pagedata.split('<script>')
 	for chunk in pagedata:
 		if VIMEO_DICT_START in chunk:
 			break
 	chunk = chunk.split(VIMEO_DICT_START)[1]
 	chunk = chunk.split(VIMEO_DICT_END)[0]
 	chunk = json.loads(chunk)
 	for priority in VIMEO_PRIORITY:
 		if priority in chunk:
 			fileurl = chunk[priority]['url']
 			break
 	filename = name + '.mp4'
 	download_file(fileurl, filename)
 def handle_liveleak(url):
 	filename = url.split('=')[1]
 	filename += '.mp4'
 	pagedata = request_get(url)
 	pagedata = pagedata.text
 	pagedata = pagedata.split('file: "')[1]
 	pagedata = pagedata.split('",')[0]
 	pagedata = pagedata.split('.')
 	for spoti in range(len(pagedata)):
 		if 'h264_' in pagedata[spoti]:
 			pagedata[spoti] = 'h264_720p'
 	pagedata = '.'.join(pagedata)
 	download_file(pagedata, filename)
 def handle_youtube(url):
 	os.system('youtube-dl %s --force-ipv4' % url)
 def handle_generic(url):
 	try:
 		name = url.split('/')[-1]
 		download_file(url, name)
 	except:
 		pass
                                                                            ##
 ##############################################################################
 HANDLERS = {
 	'imgur.com': handle_imgur,
 	'gfycat.com': handle_gfycat,
 	'vimeo.com': handle_vimeo,
 	'liveleak.com': handle_liveleak,
 	'youtube.com': handle_youtube,
 	'youtu.be': handle_youtube
 	}
 def handle_master(url):
 	print('Handling %s' % url)
 	for handlerkey in HANDLERS:
 		if handlerkey.lower() in url.lower():
 			HANDLERS[handlerkey](url)
 			return
 	if DO_GENERIC:
 		handle_generic(url)
 def test(imgur=True, gfycat=True, vimeo=True, liveleak=True, youtube=True, generic=True):
 	print('Testing')
 	if imgur:
 		# Imgur gallery album
 		handle_master('http://imgur.com/gallery/s4WLG')
 		# Imgur album
 		handle_master('http://imgur.com/a/s4WLG')
 		# Imgur indirect single
 		handle_master('http://imgur.com/gvJUct0')
 		# Imgur direct single
 		handle_master('http://i.imgur.com/gvJUct0.jpg')
 	if gfycat:
 		# Gfycat direct .gif
 		handle_master('http://giant.gfycat.com/FatherlyBruisedIberianchiffchaff.gif')
 		# Gfycat general link
 		handle_master('http://www.gfycat.com/RawWetFlatcoatretriever')
 	if vimeo:
 		# Vimeo standard link
 		handle_master('https://vimeo.com/109405701')
 	if liveleak:
 		# LiveLeak standard link
 		handle_master('http://www.liveleak.com/view?i=9d1_1429192014')
 	if youtube:
 		# Youtube standard link
 		handle_master('https://www.youtube.com/watch?v=bEgeh5hA5ko')
 		# Youtube short link
 		handle_master('https://youtu.be/GjOBTstnW20')
 	if generic:
 		# Some link that might work
 		handle_master('https://raw.githubusercontent.com/voussoir/reddit/master/SubredditBirthdays/show/statistics.txt')
 		# Some link that might work
 		handle_master('https://github.com/voussoir/reddit/tree/master/SubredditBirthdays/show')
 test()