timesearch/timesearch/timesearch.py

148 lines
4.7 KiB
Python
Raw Normal View History

import time
import traceback
from . import common
from . import tsdb
# The maximum amount by which it can multiply the interval
# when not enough posts are found.
MAXIMUM_EXPANSION_MULTIPLIER = 2
def timesearch(
subreddit=None,
username=None,
lower=None,
upper=None,
interval=86400,
):
'''
Collect submissions across time.
Please see the global DOCSTRING variable.
'''
if (subreddit is None) == (username is None):
raise Exception('Enter subreddit or username but not both')
common.bot.login(common.r)
if subreddit:
database = tsdb.TSDB.for_subreddit(subreddit)
else:
# When searching, we'll take the user's submissions from anywhere.
subreddit = 'all'
database = tsdb.TSDB.for_user(username)
cur = database.sql.cursor()
if lower == 'update':
# Start from the latest submission
cur.execute('SELECT * FROM submissions ORDER BY idint DESC LIMIT 1')
f = cur.fetchone()
if f:
lower = f[tsdb.SQL_SUBMISSION['created']]
print(f[tsdb.SQL_SUBMISSION['idstr']], common.human(lower), lower)
else:
lower = None
if not isinstance(subreddit, common.praw.models.Subreddit):
subreddit = common.r.subreddit(subreddit)
if subreddit != 'all':
if isinstance(subreddit, common.praw.models.Subreddit):
creation = subreddit.created_utc
else:
subreddits = subreddit.split('+')
subreddits = [common.r.subreddit(sr) for sr in subreddits]
creation = min([sr.created_utc for sr in subreddits])
else:
if not isinstance(username, common.praw.models.Redditor):
user = common.r.redditor(username)
creation = user.created_utc
if lower is None or lower < creation:
lower = creation
maxupper = upper
if maxupper is None:
maxupper = common.get_now() + 86400
form = '{upper} - {lower} +{gain}'
submissions = subreddit.submissions(start=lower, end=maxupper)
submissions = common.generator_chunker(submissions, 100)
for chunk in submissions:
chunk.sort(key=lambda x: x.created_utc, reverse=True)
new_count = database.insert(chunk)['new_submissions']
message = form.format(
upper=common.human(chunk[0].created_utc),
lower=common.human(chunk[-1].created_utc),
gain=new_count,
)
print(message)
#upper = lower + interval
#toomany_inarow = 0
# while lower < maxupper:
# print('\nCurrent interval:', interval, 'seconds')
# print('Lower:', common.human(lower), lower)
# print('Upper:', common.human(upper), upper)
# if username:
# query = '(and author:"%s" (and timestamp:%d..%d))' % (username, lower, upper)
# else:
# query = 'timestamp:%d..%d' % (lower, upper)
# try:
# searchresults = subreddit.search(
# query,
# sort='new',
# limit=100,
# syntax='cloudsearch'
# )
# searchresults = list(searchresults)
# except Exception:
# traceback.print_exc()
# print('resuming in 5...')
# time.sleep(5)
# continue
# searchresults.sort(key=lambda x: x.created_utc)
# print([i.id for i in searchresults])
# itemsfound = len(searchresults)
# print('Found', itemsfound, 'items.')
# if itemsfound < 50:
# print('Too few results, increasing interval', end='')
# diff = (1 - (itemsfound / 75)) + 1
# diff = min(MAXIMUM_EXPANSION_MULTIPLIER, diff)
# interval = int(interval * diff)
# if itemsfound > 99:
# #Intentionally not elif
# print('Too many results, reducing interval', end='')
# interval = int(interval * (0.8 - (0.05 * toomany_inarow)))
# upper = lower + interval
# toomany_inarow += 1
# else:
# lower = upper
# upper = lower + interval
# toomany_inarow = max(0, toomany_inarow-1)
# print(database.insert(searchresults))
# print()
cur.execute('SELECT COUNT(idint) FROM submissions')
itemcount = cur.fetchone()[0]
print('Ended with %d items in %s' % (itemcount, database.filepath.basename))
def timesearch_argparse(args):
if args.lower == 'update':
lower = 'update'
else:
lower = common.int_none(args.lower)
return timesearch(
subreddit=args.subreddit,
username=args.username,
lower=lower,
upper=common.int_none(args.upper),
interval=common.int_none(args.interval),
)