else

totaldl 1
2015-05-14 22:40:19 -07:00 · 2015-05-14 22:40:19 -07:00 · f13db2ae88
commit f13db2ae88
parent 476c08d2d6
4 changed files with 282 additions and 11 deletions
--- a/HerokuBot/README.md
+++ b/HerokuBot/README.md
@ -5,19 +5,24 @@ Using Heroku to run a bot

 Inside git.zip is the .git repo that I created for this. I had to zip it so that I can push it here.

-1. Create a [Heroku account](http://heroku.com)
-2. Install [Heroku toolbelt](https://toolbelt.heroku.com/)
-3. Install [Git](http://git-scm.com/)
-4. Create a folder to keep your repo
-5. cd into this folder
-5. Write your bot
-6. Create requirements.txt, and require a version of praw
-7. Create runtime.txt, and require a version of Python
-8. Create Procfile, and create a worker that will launch your bot.
+1. Create a [Heroku account](http://heroku.com).
+3. Install [Git](http://git-scm.com/).
+2. Install [Heroku toolbelt](https://toolbelt.heroku.com/).
+4. Create a folder to keep your repo in.
+5. Open a terminal and `cd` into that folder.
+5. Write your bot.
+6. Create requirements.txt, and require a version of praw.
+7. Create runtime.txt, and require a version of Python.
+8. Create Procfile, and create a worker that will launch your bot. Notice that this file does not have an extension.
 9. `> heroku login`
+
+    	Enter your Heroku credentials.
+		Email: email@email.com
+		Password (typing will be hidden):
+		Authentication successful.
 10. `> git init`
 11. `> git add .`
-12. `> git commit -m "1"`
+12. `> git commit -m "Commit Message"`
 13. `> heroku create`

        Creating aqueous-plains-9797... done, stack is cedar-14
@ -73,4 +78,6 @@ Inside git.zip is the .git repo that I created for this. I had to zip it so that
 	    2015-05-01T00:32:45.316897+00:00 app[worker.1]: 	Subscribers: 17
 	    2015-05-01T00:32:45.316900+00:00 app[worker.1]: All done!

-17. Celebrate
+17. Celebrate
+
+18. To turn your bot off at any time, simply `> heroku ps:scale worker=0`
--- a/HerokuBot/git.zip
+++ b/HerokuBot/git.zip
--- a/HerokuBot/herokubot.py
+++ b/HerokuBot/herokubot.py
@ -1,16 +1,28 @@
 import praw
 import time
+import sqlite3

 print('Logging in.')
 r = praw.Reddit('Testing praw api usage over Heroku')
 r.login('qQGusVuAHezHxhYTiYGm', 'qQGusVuAHezHxhYTiYGm')

+print('Loading database')
+sql = sqlite3.connect('sql.db')
+cur = sql.cursor()
+
+cur.execute('CREATE TABLE IF NOT EXISTS subreddits(name TEXT, subscribers INT)')
+sql.commit()
+
 print('Getting subreddit info.')
 sub = r.get_subreddit('Goldtesting')
 print('/r/Goldtesting')
 print('\tCreated at: %d' % sub.created_utc)
 print('\tSubscribers: %d' % sub.subscribers)

+print('Saving subreddit info.')
+cur.execute('INSERT INTO subreddits VALUES(?, ?)', ['Goldtesting', sub.subscribers])
+sql.commit()
+
 print('All done!')
 while True:
 	time.sleep(60)
--- a/TotalDL/totaldl.py
+++ b/TotalDL/totaldl.py
@ -0,0 +1,252 @@
+import json
+import requests
+import os
+import time
+
+HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'}
+
+IMGUR_ALBUM_INDV = '"og:image"content="htt'
+# The HTML string which tells us that an image link is
+# on this line.
+
+IMGUR_ALBUMFOLDERS = True
+# If True, the individual images belonging to an album will be placed
+#  into a folder named after the album, like <album_id>/<img_id>.jpg
+# Else, files will be named <album_id>_<img_id>.jpg and placed
+#  in the local folder.
+
+GFYCAT_MP4 = True
+# If True, download gfycat urls in .mp4
+# Else, .webm
+
+GFYCAT_SUBDOMAINS = ['zippy', 'fat', 'giant']
+
+SLEEPINESS = 2
+# The number of seconds to wait in between making requests
+# Similar to PRAW's ratelimit handling.
+# Not required, but probably better for the environment.
+
+VIMEO_DICT_START = '"files":{"h264":'
+VIMEO_DICT_END = ',"hls"'
+# The HTML string which tells us where the mp4 file is
+
+VIMEO_PRIORITY = ['hd', 'sd', 'mobile']
+# Download files in this priority
+
+DO_GENERIC = True
+# If true, attempt to download whatever URL goes in
+# Else, only download from the explicitly supported sites
+
+''' End user config '''
+
+last_request = 0
+
+class StatusExc(Exception):
+	pass
+
+def download_file(url, localname):
+	if os.path.exists(localname):
+		print('\t%s already exists!!' % localname)
+		return
+	print('\tDownloading %s' % localname)
+	downloading = request_get(url, stream=True)
+	localfile = open(localname, 'wb')
+	for chunk in downloading.iter_content(chunk_size=1024):
+		if chunk:
+			localfile.write(chunk)
+	return True
+
+def request_get(url, stream=False):
+	global last_request
+	now = time.time()
+	diff = now - last_request
+	if diff < SLEEPINESS:
+		diff = SLEEPINESS - diff
+		time.sleep(diff)
+	last_request = time.time()
+	req = requests.get(url, stream=stream, headers=HEADERS)
+	if req.status_code != 200:
+		raise StatusExc("Status code %d on url %s" % (req.status_code, url))
+	return req
+
+##############################################################################
+                                                                            ##
+def handle_imgur_html(url):
+	pagedata = request_get(url)
+	pagedata = pagedata.text.replace(' ', '')
+	pagedata = pagedata.split('\n')
+	pagedata = [line.strip() for line in pagedata]
+	pagedata = [line for line in pagedata if IMGUR_ALBUM_INDV in line]
+	pagedata = [line.split('"')[-2] for line in pagedata]
+	links = []
+	for image in pagedata:
+		image = image.split('?')[0]
+		if image not in links:
+			links.append(image)
+	return links
+
+def handle_imgur(url, albumid=''):
+	name = url.split('/')[-1]
+	if 'imgur.com' in name:
+		# This link doesn't appear to have an image id
+		return
+
+	basename = name.split('.')[0]
+	if '.' in name:
+		# This is a direct image link
+		if IMGUR_ALBUMFOLDERS and albumid and albumid != basename:
+			if not os.path.exists(albumid):
+				os.makedirs(albumid)
+			localpath = '%s\\%s' % (albumid, name)
+
+		elif albumid and albumid != basename:
+			localpath = '%s_%s' % (albumid, name)
+
+		else:
+			localpath = name
+
+		download_file(url, localpath)
+
+	else:
+		# Not a direct image link, let's read the html.
+		images = handle_imgur_html(url)
+		print('\tFound %d images' % len(images))
+		for image in images:
+			handle_imgur(image, albumid=name)
+
+
+def handle_gfycat(url):
+	name = url.split('/')[-1]
+	name = name.split('.')[0]
+	if GFYCAT_MP4:
+		name += '.mp4'
+	else:
+		name += '.webm'
+	for subdomain in GFYCAT_SUBDOMAINS:
+		url = 'http://%s.gfycat.com/%s' % (subdomain, name)
+		try:
+			download_file(url, name)
+			break
+		except StatusExc:
+			pass
+
+
+def handle_vimeo(url):
+	name = url.split('/')[-1]
+	name = name.split('?')[0]
+	try:
+		int(name)
+	except ValueError as e:
+		print('Could not identify filename of %s' % url)
+		raise e
+	url = 'http://player.vimeo.com/video/%s' % name
+	pagedata = request_get(url)
+	pagedata = pagedata.text
+	pagedata = pagedata.replace('</script>', '<script')
+	pagedata = pagedata.split('<script>')
+	for chunk in pagedata:
+		if VIMEO_DICT_START in chunk:
+			break
+	chunk = chunk.split(VIMEO_DICT_START)[1]
+	chunk = chunk.split(VIMEO_DICT_END)[0]
+	chunk = json.loads(chunk)
+	
+	for priority in VIMEO_PRIORITY:
+		if priority in chunk:
+			fileurl = chunk[priority]['url']
+			break
+	filename = name + '.mp4'
+	download_file(fileurl, filename)
+
+
+def handle_liveleak(url):
+	filename = url.split('=')[1]
+	filename += '.mp4'
+	pagedata = request_get(url)
+	pagedata = pagedata.text
+	pagedata = pagedata.split('file: "')[1]
+	pagedata = pagedata.split('",')[0]
+	pagedata = pagedata.split('.')
+	for spoti in range(len(pagedata)):
+		if 'h264_' in pagedata[spoti]:
+			pagedata[spoti] = 'h264_720p'
+	pagedata = '.'.join(pagedata)
+	download_file(pagedata, filename)
+
+
+def handle_youtube(url):
+	os.system('youtube-dl %s --force-ipv4' % url)
+
+
+def handle_generic(url):
+	try:
+		name = url.split('/')[-1]
+		download_file(url, name)
+	except:
+		pass
+                                                                            ##
+##############################################################################
+
+HANDLERS = {
+	'imgur.com': handle_imgur,
+	'gfycat.com': handle_gfycat,
+	'vimeo.com': handle_vimeo,
+	'liveleak.com': handle_liveleak,
+	'youtube.com': handle_youtube,
+	'youtu.be': handle_youtube
+	}
+
+def handle_master(url):
+	print('Handling %s' % url)
+	for handlerkey in HANDLERS:
+		if handlerkey.lower() in url.lower():
+			HANDLERS[handlerkey](url)
+			return
+	if DO_GENERIC:
+		handle_generic(url)
+
+def test(imgur=True, gfycat=True, vimeo=True, liveleak=True, youtube=True, generic=True):
+	print('Testing')
+	if imgur:
+		# Imgur gallery album
+		handle_master('http://imgur.com/gallery/s4WLG')
+
+		# Imgur album
+		handle_master('http://imgur.com/a/s4WLG')
+
+		# Imgur indirect single
+		handle_master('http://imgur.com/gvJUct0')
+
+		# Imgur direct single
+		handle_master('http://i.imgur.com/gvJUct0.jpg')
+
+	if gfycat:
+		# Gfycat direct .gif
+		handle_master('http://giant.gfycat.com/FatherlyBruisedIberianchiffchaff.gif')
+
+		# Gfycat general link
+		handle_master('http://www.gfycat.com/RawWetFlatcoatretriever')
+
+	if vimeo:
+		# Vimeo standard link
+		handle_master('https://vimeo.com/109405701')
+
+	if liveleak:
+		# LiveLeak standard link
+		handle_master('http://www.liveleak.com/view?i=9d1_1429192014')
+
+	if youtube:
+		# Youtube standard link
+		handle_master('https://www.youtube.com/watch?v=bEgeh5hA5ko')
+
+		# Youtube short link
+		handle_master('https://youtu.be/GjOBTstnW20')
+
+	if generic:
+		# Some link that might work
+		handle_master('https://raw.githubusercontent.com/voussoir/reddit/master/SubredditBirthdays/show/statistics.txt')
+
+		# Some link that might work
+		handle_master('https://github.com/voussoir/reddit/tree/master/SubredditBirthdays/show')
+
+test()