else
This commit is contained in:
		
							parent
							
								
									cc68bcbe06
								
							
						
					
					
						commit
						94e54c9861
					
				
					 3 changed files with 68 additions and 41 deletions
				
			
		
										
											Binary file not shown.
										
									
								
							|  | @ -1,6 +1,10 @@ | |||
| import traceback | ||||
| import sqlite3 | ||||
| import totaldl | ||||
| import praw | ||||
| 
 | ||||
| r = praw.Reddit('') | ||||
| r.login('', '') | ||||
| 
 | ||||
| sql = sqlite3.connect('!!testdata.db') | ||||
| cur = sql.cursor() | ||||
|  | @ -25,7 +29,13 @@ while True: | |||
| 	if len(title) > 35: | ||||
| 		title = title[:34] + '-' | ||||
| 	try: | ||||
| 		totaldl.handle_master(url, customname=title) | ||||
| 		filepath = totaldl.handle_master(url, customname=title) | ||||
| 		filepath = filepath.split('/')[-1] | ||||
| 		if '.mp4' in filepath: | ||||
| 			filepath = 'http://syriancivilwar.pw/Videos/' + filepath | ||||
| 			submission = r.get_info(thing_id=item[1]) | ||||
| 			submission.add_comment('Mirror: %s' % filepath) | ||||
| 		print(filepath) | ||||
| 	except: | ||||
| 		traceback.print_exc() | ||||
| 	cur2.execute('INSERT INTO totaldl_urls VALUES(?)', [url]) | ||||
|  |  | |||
|  | @ -10,7 +10,8 @@ DOWNLOAD_DIRECTORY = '' | |||
| # Save files to this folder | ||||
| # If blank, it uses the local folder | ||||
| 
 | ||||
| IMGUR_ALBUM_INDV = '"og:image"content="htt' | ||||
| IMGUR_ALBUM_INDV = 'Viewfullresolution<' | ||||
| IMGUR_ALBUM_INDV2 = 'linkrel="image_src"' | ||||
| # The HTML string which tells us that an image link is | ||||
| # on this line. | ||||
| 
 | ||||
|  | @ -42,6 +43,9 @@ LIVELEAK_YOUTUBEIFRAME = 'youtube.com/embed' | |||
| 
 | ||||
| LIVELEAK_RESOLUTIONS = ['h264_base', 'h264_720p', 'h264_270p'] | ||||
| 
 | ||||
| YOUTUBE_DL_FORMAT = 'youtube-dl "{url}" --no-playlist --force-ipv4 -o "/{dir}/{name}.%(ext)s"' | ||||
| # The format for the youtube-dl shell command | ||||
| 
 | ||||
| DO_GENERIC = True | ||||
| # If true, attempt to download whatever URL goes in | ||||
| # Else, only download from the explicitly supported sites | ||||
|  | @ -73,7 +77,7 @@ def download_file(url, localname): | |||
| 	for chunk in downloading.iter_content(chunk_size=1024): | ||||
| 		if chunk: | ||||
| 			localfile.write(chunk) | ||||
| 	return True | ||||
| 	return localname | ||||
| 
 | ||||
| def request_get(url, stream=False): | ||||
| 	global last_request | ||||
|  | @ -93,11 +97,18 @@ def request_get(url, stream=False): | |||
| def handle_imgur_html(url): | ||||
| 	pagedata = request_get(url) | ||||
| 	pagedata = pagedata.text.replace(' ', '') | ||||
| 	pagedata = pagedata.replace('src="', 'href="') | ||||
| 	pagedata = pagedata.replace(IMGUR_ALBUM_INDV2, IMGUR_ALBUM_INDV) | ||||
| 	pagedata = pagedata.split('\n') | ||||
| 	pagedata = [line.strip() for line in pagedata] | ||||
| 	pagedata = [line for line in pagedata if IMGUR_ALBUM_INDV in line] | ||||
| 	pagedata = [line.split('"')[-2] for line in pagedata] | ||||
| 	pagedata = [line.split('href=')[1] for line in pagedata] | ||||
| 	pagedata = [line.replace('"//', '"http://') for line in pagedata] | ||||
| 	pagedata = [line.split('"')[1] for line in pagedata] | ||||
| 	links = [] | ||||
| 	first = pagedata[0].split('.')[0] | ||||
| 	if [x.split('.')[0] for x in pagedata].count(first) > 1: | ||||
| 		pagedata = pagedata[1:] | ||||
| 	for image in pagedata: | ||||
| 		image = image.split('?')[0] | ||||
| 		if image not in links: | ||||
|  | @ -110,6 +121,7 @@ def handle_imgur(url, albumid='', customname=None): | |||
| 		# This link doesn't appear to have an image id | ||||
| 		return | ||||
| 
 | ||||
| 	url = url.replace('/gallery/', '/a/') | ||||
| 	basename = name.split('.')[0] | ||||
| 	if '.' in name: | ||||
| 		# This is a direct image link | ||||
|  | @ -130,7 +142,7 @@ def handle_imgur(url, albumid='', customname=None): | |||
| 		else: | ||||
| 			localpath = name | ||||
| 
 | ||||
| 		download_file(url, localpath) | ||||
| 		return download_file(url, localpath) | ||||
| 
 | ||||
| 	else: | ||||
| 		# Not a direct image link, let's read the html. | ||||
|  | @ -138,14 +150,19 @@ def handle_imgur(url, albumid='', customname=None): | |||
| 		if customname: | ||||
| 			name = customname | ||||
| 		print('\tFound %d images' % len(images)) | ||||
| 
 | ||||
| 		localfiles = [] | ||||
| 		if len(images) > 1: | ||||
| 			for imagei in range(len(images)): | ||||
| 				image = images[imagei] | ||||
| 				iname = image.split('/')[-1] | ||||
| 				iname = iname.split('.')[0] | ||||
| 				handle_imgur(image, albumid=name, customname='%d_%s' % (imagei, iname)) | ||||
| 				x = handle_imgur(image, albumid=name, customname='%d_%s' % (imagei, iname)) | ||||
| 				localfiles.append(x) | ||||
| 		else: | ||||
| 			handle_imgur(images[0], customname=name) | ||||
| 			x = handle_imgur(images[0], customname=name) | ||||
| 			localfiles.append(x) | ||||
| 		return localfiles | ||||
| 
 | ||||
| 
 | ||||
| def handle_gfycat(url, customname=None): | ||||
|  | @ -166,8 +183,7 @@ def handle_gfycat(url, customname=None): | |||
| 	for subdomain in GFYCAT_SUBDOMAINS: | ||||
| 		url = 'http://%s.gfycat.com/%s' % (subdomain, name) | ||||
| 		try: | ||||
| 			download_file(url, filename) | ||||
| 			break | ||||
| 			return download_file(url, filename) | ||||
| 		except StatusExc: | ||||
| 			pass | ||||
| 
 | ||||
|  | @ -200,7 +216,7 @@ def handle_vimeo(url, customname=None): | |||
| 		filename = customname + '.mp4' | ||||
| 	else: | ||||
| 		filename = name + '.mp4' | ||||
| 	download_file(fileurl, filename) | ||||
| 	return download_file(fileurl, filename) | ||||
| 
 | ||||
| 
 | ||||
| def handle_liveleak(url, customname=None): | ||||
|  | @ -231,23 +247,24 @@ def handle_liveleak(url, customname=None): | |||
| 		for res in LIVELEAK_RESOLUTIONS: | ||||
| 			url = pagedata.replace('LIVELEAKRESOLUTION', res) | ||||
| 			try: | ||||
| 				download_file(url, name) | ||||
| 				return | ||||
| 				return download_file(url, name) | ||||
| 			except StatusExc: | ||||
| 				pass | ||||
| 		download_file(original, name) | ||||
| 		return download_file(original, name) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def handle_youtube(url, customname=None): | ||||
| 	# The customname doesn't do anything on this function | ||||
| 	# but handle_master works better if everything uses | ||||
| 	# the same format. | ||||
| 	url = url.replace('&', '&') | ||||
| 	url = url.replace('feature=player_embedded&', '') | ||||
| 	url = url.replace('&feature=player_embedded', '') | ||||
| 	os.system('youtube-dl "{0}" --no-playlist --force-ipv4 -o "/{1}/%(title)s.%(ext)s"'.format(url, DOWNLOAD_DIRECTORY)) | ||||
| 
 | ||||
| 	if not customname: | ||||
| 		os.system(YOUTUBE_DL_FORMAT.format(url=url, dir=DOWNLOAD_DIRECTORY, name='%(title)s')) | ||||
| 		return | ||||
| 	os.system(YOUTUBE_DL_FORMAT.format(url=url, dir=DOWNLOAD_DIRECTORY, name=customname)) | ||||
| 	if DOWNLOAD_DIRECTORY: | ||||
| 		return '%s/%s.mp4' % (DOWNLOAD_DIRECTORY, customname) | ||||
| 	return '%s.mp4' % customname | ||||
| 
 | ||||
| def handle_twitter(url, customname=None): | ||||
| 	pagedata = request_get(url) | ||||
|  | @ -260,34 +277,32 @@ def handle_twitter(url, customname=None): | |||
| 		name = idnumber | ||||
| 		customname = idnumber | ||||
| 	tweetpath = '%s.html' % (DOWNLOAD_DIRECTORY + name) | ||||
| 	if not os.path.exists(tweetpath): | ||||
| 		psplit = '<p class="TweetTextSize' | ||||
| 		tweettext = pagedata.split(psplit)[1] | ||||
| 		tweettext = tweettext.split('</p>')[0] | ||||
| 		tweettext = psplit + tweettext + '</p>' | ||||
| 		tweettext = '<html><body>%s</body></html>' % tweettext | ||||
| 		tweettext = tweettext.replace('/hashtag/', 'http://twitter.com/hashtag/') | ||||
| 		tweethtml = open(tweetpath, 'w') | ||||
| 		tweethtml.write(tweettext) | ||||
| 		tweethtml.close() | ||||
| 		print('\tSaved tweet text') | ||||
| 	else: | ||||
| 		print('\tTweet text already exists') | ||||
| 	psplit = '<p class="TweetTextSize' | ||||
| 	tweettext = pagedata.split(psplit)[1] | ||||
| 	tweettext = tweettext.split('</p>')[0] | ||||
| 	tweettext = psplit + tweettext + '</p>' | ||||
| 	tweettext = '<html><body>%s</body></html>' % tweettext | ||||
| 	tweettext = tweettext.replace('/hashtag/', 'http://twitter.com/hashtag/') | ||||
| 	tweethtml = open(tweetpath, 'w', encoding='utf-8') | ||||
| 	tweethtml.write(tweettext) | ||||
| 	tweethtml.close() | ||||
| 	print('\tSaved tweet text') | ||||
| 	try: | ||||
| 		link = pagedata.split('data-url="')[1] | ||||
| 		link = link.split('"')[0] | ||||
| 		if link != url: | ||||
| 			handle_master(link, customname=customname) | ||||
| 		return | ||||
| 		return tweetpath | ||||
| 	except IndexError: | ||||
| 		try: | ||||
| 			link = pagedata.split('data-expanded-url="')[1] | ||||
| 			link = link.split('"')[0] | ||||
| 			if link != url: | ||||
| 				handle_master(link, customname=customname) | ||||
| 			return | ||||
| 			return tweetpath | ||||
| 		except IndexError: | ||||
| 			pass | ||||
| 	return tweetpath | ||||
| 	print('\tNo media detected') | ||||
| 
 | ||||
| 
 | ||||
|  | @ -298,7 +313,7 @@ def handle_generic(url, customname=None): | |||
| 			name = '%s.%s' % (customname, name.split('.')[-1]) | ||||
| 		if '.' not in name: | ||||
| 			name += '.html' | ||||
| 		download_file(url, name) | ||||
| 		return download_file(url, name) | ||||
| 	except: | ||||
| 		pass | ||||
|                                                                             ## | ||||
|  | @ -318,10 +333,9 @@ def handle_master(url, customname=None): | |||
| 	print('Handling %s' % url) | ||||
| 	for handlerkey in HANDLERS: | ||||
| 		if handlerkey.lower() in url.lower(): | ||||
| 			HANDLERS[handlerkey](url, customname=customname) | ||||
| 			return | ||||
| 			return HANDLERS[handlerkey](url, customname=customname) | ||||
| 	if DO_GENERIC: | ||||
| 		handle_generic(url, customname=customname) | ||||
| 		return handle_generic(url, customname=customname) | ||||
| 
 | ||||
| def test_imgur(): | ||||
| 	# Imgur gallery album | ||||
|  | @ -371,7 +385,7 @@ def test_youtube(): | |||
| 	handle_master('https://www.youtube.com/watch?v=bEgeh5hA5ko') | ||||
| 
 | ||||
| 	# Youtube short link | ||||
| 	handle_master('https://youtu.be/GjOBTstnW20') | ||||
| 	handle_master('https://youtu.be/GjOBTstnW20', customname='youtube') | ||||
| 
 | ||||
| 	# Youtube player embed link | ||||
| 	handle_master('https://www.youtube.com/watch?feature=player_embedded&v=bEgeh5hA5ko') | ||||
|  | @ -395,6 +409,9 @@ def test_twitter(): | |||
| 	# Twitter plain text | ||||
| 	handle_master('https://twitter.com/SyriacMFS/status/556513635913437184') | ||||
| 
 | ||||
| 	# Twitter with arabic characters | ||||
| 	handle_master('https://twitter.com/HadiAlabdallah/status/600885154991706113') | ||||
| 
 | ||||
| def test_generic(): | ||||
| 	# Some link that might work | ||||
| 	handle_master('https://raw.githubusercontent.com/voussoir/reddit/master/SubredditBirthdays/show/statistics.txt') | ||||
|  | @ -412,8 +429,8 @@ if __name__ == '__main__': | |||
| 		#test_imgur() | ||||
| 		#test_gfycat() | ||||
| 		#test_vimeo() | ||||
| 		#test_liveleak() | ||||
| 		#test_youtube() | ||||
| 		test_twitter() | ||||
| 		test_liveleak() | ||||
| 		test_youtube() | ||||
| 		#test_twitter() | ||||
| 		#test_generic() | ||||
| 		pass | ||||
		Loading…
	
		Reference in a new issue
	
	 Voussoir
						Voussoir