else
totaldl tweet text
This commit is contained in:
parent
3845147531
commit
cc68bcbe06
2 changed files with 37 additions and 9 deletions
|
@ -7,6 +7,7 @@ cur = sql.cursor()
|
||||||
cur2 = sql.cursor()
|
cur2 = sql.cursor()
|
||||||
|
|
||||||
cur.execute('CREATE TABLE IF NOT EXISTS totaldl_urls(url TEXT)')
|
cur.execute('CREATE TABLE IF NOT EXISTS totaldl_urls(url TEXT)')
|
||||||
|
cur.execute('CREATE INDEX IF NOT EXISTS urlindex ON totaldl_urls(url)')
|
||||||
sql.commit()
|
sql.commit()
|
||||||
cur.execute('SELECT * FROM posts WHERE self=0 AND url IS NOT NULL')
|
cur.execute('SELECT * FROM posts WHERE self=0 AND url IS NOT NULL')
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -2,6 +2,7 @@ import json
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'}
|
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36'}
|
||||||
|
|
||||||
|
@ -140,7 +141,9 @@ def handle_imgur(url, albumid='', customname=None):
|
||||||
if len(images) > 1:
|
if len(images) > 1:
|
||||||
for imagei in range(len(images)):
|
for imagei in range(len(images)):
|
||||||
image = images[imagei]
|
image = images[imagei]
|
||||||
handle_imgur(image, albumid=name, customname=str(imagei))
|
iname = image.split('/')[-1]
|
||||||
|
iname = iname.split('.')[0]
|
||||||
|
handle_imgur(image, albumid=name, customname='%d_%s' % (imagei, iname))
|
||||||
else:
|
else:
|
||||||
handle_imgur(images[0], customname=name)
|
handle_imgur(images[0], customname=name)
|
||||||
|
|
||||||
|
@ -249,6 +252,27 @@ def handle_youtube(url, customname=None):
|
||||||
def handle_twitter(url, customname=None):
|
def handle_twitter(url, customname=None):
|
||||||
pagedata = request_get(url)
|
pagedata = request_get(url)
|
||||||
pagedata = pagedata.text
|
pagedata = pagedata.text
|
||||||
|
|
||||||
|
idnumber = url.split('status/')[1].split('/')[0]
|
||||||
|
if customname:
|
||||||
|
name = customname
|
||||||
|
else:
|
||||||
|
name = idnumber
|
||||||
|
customname = idnumber
|
||||||
|
tweetpath = '%s.html' % (DOWNLOAD_DIRECTORY + name)
|
||||||
|
if not os.path.exists(tweetpath):
|
||||||
|
psplit = '<p class="TweetTextSize'
|
||||||
|
tweettext = pagedata.split(psplit)[1]
|
||||||
|
tweettext = tweettext.split('</p>')[0]
|
||||||
|
tweettext = psplit + tweettext + '</p>'
|
||||||
|
tweettext = '<html><body>%s</body></html>' % tweettext
|
||||||
|
tweettext = tweettext.replace('/hashtag/', 'http://twitter.com/hashtag/')
|
||||||
|
tweethtml = open(tweetpath, 'w')
|
||||||
|
tweethtml.write(tweettext)
|
||||||
|
tweethtml.close()
|
||||||
|
print('\tSaved tweet text')
|
||||||
|
else:
|
||||||
|
print('\tTweet text already exists')
|
||||||
try:
|
try:
|
||||||
link = pagedata.split('data-url="')[1]
|
link = pagedata.split('data-url="')[1]
|
||||||
link = link.split('"')[0]
|
link = link.split('"')[0]
|
||||||
|
@ -382,11 +406,14 @@ def test_generic():
|
||||||
handle_master('https://github.com/voussoir/reddit/tree/master/SubredditBirthdays/show')
|
handle_master('https://github.com/voussoir/reddit/tree/master/SubredditBirthdays/show')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
#test_imgur()
|
if len(sys.argv) > 1:
|
||||||
#test_gfycat()
|
handle_master(sys.argv[1])
|
||||||
#test_vimeo()
|
else:
|
||||||
#test_liveleak()
|
#test_imgur()
|
||||||
#test_youtube()
|
#test_gfycat()
|
||||||
test_twitter()
|
#test_vimeo()
|
||||||
#test_generic()
|
#test_liveleak()
|
||||||
pass
|
#test_youtube()
|
||||||
|
test_twitter()
|
||||||
|
#test_generic()
|
||||||
|
pass
|
Loading…
Reference in a new issue