import praw,bot import requests import re r=bot.oG() KARMADECAY_URL = 'http://karmadecay.com/r/%s/comments/%s/' REPATTERN = 'href="/r/.* title=' HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36' } def get_karmadecay(subreddit, iid): url = KARMADECAY_URL % (subreddit, iid) page = requests.get(url, headers=HEADERS, timeout=60) page.raise_for_status() return page.text def extract_reddit_ids(karmadecay): if 'Unable to find an image at' in karmadecay: print('unable to find any images') return [] if 'No very similar images were found' in karmadecay: print('no very similar images') return [] if 'very similar images' not in karmadecay: print('expected "very similar" is missing') return [] karmadecay = karmadecay.split('Less similar images')[0] karmadecay = karmadecay.split('very similar images')[1] matches = re.findall(REPATTERN, karmadecay) matches = [m.split('"')[1] for m in matches] matches = [m.split('/comments/')[1].split('/')[0] for m in matches] matches = ['t3_' + m for m in matches] return matches SAMPLES = [ ('pics', '3i4gjy'), # valid submission with matches ('goldtesting', '26wkzi'), # valid submission with no matches ('goldtesting', '3cychc'), # selfpost, should fail ] for sample in SAMPLES: print(sample) a = get_karmadecay(*sample) a = extract_reddit_ids(a) if len(a) == 0: print('Got no items') continue print(a) submissions = r.get_info(thing_id=a) for submission in submissions: print(submission.id, submission.created_utc, submission.title) print()