2017-11-14 03:13:19 +00:00
|
|
|
import time
|
|
|
|
import traceback
|
|
|
|
|
|
|
|
from . import common
|
2017-12-13 22:35:36 +00:00
|
|
|
from . import exceptions
|
2020-10-08 20:47:02 +00:00
|
|
|
from . import pushshift
|
2017-11-14 03:13:19 +00:00
|
|
|
from . import tsdb
|
|
|
|
|
2018-04-10 02:53:53 +00:00
|
|
|
def _normalize_subreddit(subreddit):
|
|
|
|
if subreddit is None:
|
|
|
|
pass
|
|
|
|
elif isinstance(subreddit, str):
|
|
|
|
subreddit = common.r.subreddit(subreddit)
|
|
|
|
elif not isinstance(subreddit, common.praw.models.Subreddit):
|
|
|
|
raise TypeError(type(subreddit))
|
|
|
|
return subreddit
|
|
|
|
|
|
|
|
def _normalize_user(user):
|
|
|
|
if user is None:
|
|
|
|
pass
|
|
|
|
elif isinstance(user, str):
|
|
|
|
user = common.r.redditor(user)
|
|
|
|
elif not isinstance(user, common.praw.models.Redditor):
|
|
|
|
raise TypeError(type(user))
|
|
|
|
return user
|
2017-11-14 03:13:19 +00:00
|
|
|
|
2020-01-28 02:39:54 +00:00
|
|
|
def get_submissions(
|
2017-11-14 03:13:19 +00:00
|
|
|
subreddit=None,
|
|
|
|
username=None,
|
|
|
|
lower=None,
|
|
|
|
upper=None,
|
2018-04-10 02:53:53 +00:00
|
|
|
do_supplement=True,
|
2017-11-14 03:13:19 +00:00
|
|
|
):
|
|
|
|
'''
|
|
|
|
Collect submissions across time.
|
|
|
|
Please see the global DOCSTRING variable.
|
|
|
|
'''
|
2017-12-13 22:35:36 +00:00
|
|
|
if not common.is_xor(subreddit, username):
|
|
|
|
raise exceptions.NotExclusive(['subreddit', 'username'])
|
2017-11-14 03:13:19 +00:00
|
|
|
|
2018-04-10 02:53:53 +00:00
|
|
|
common.login()
|
2017-11-14 03:13:19 +00:00
|
|
|
|
|
|
|
if subreddit:
|
2017-12-13 22:42:56 +00:00
|
|
|
(database, subreddit) = tsdb.TSDB.for_subreddit(subreddit, fix_name=True)
|
2018-04-10 02:53:53 +00:00
|
|
|
elif username:
|
2017-12-13 22:42:56 +00:00
|
|
|
(database, username) = tsdb.TSDB.for_user(username, fix_name=True)
|
2017-11-14 03:13:19 +00:00
|
|
|
cur = database.sql.cursor()
|
|
|
|
|
2018-04-10 02:53:53 +00:00
|
|
|
subreddit = _normalize_subreddit(subreddit)
|
|
|
|
user = _normalize_user(username)
|
|
|
|
|
2017-11-14 03:13:19 +00:00
|
|
|
if lower == 'update':
|
|
|
|
# Start from the latest submission
|
2018-04-10 02:53:53 +00:00
|
|
|
cur.execute('SELECT created FROM submissions ORDER BY created DESC LIMIT 1')
|
|
|
|
fetch = cur.fetchone()
|
|
|
|
if fetch is not None:
|
|
|
|
lower = fetch[0]
|
2017-11-14 03:13:19 +00:00
|
|
|
else:
|
|
|
|
lower = None
|
2018-04-10 02:53:53 +00:00
|
|
|
if lower is None:
|
|
|
|
lower = 0
|
2017-11-14 03:13:19 +00:00
|
|
|
|
2017-12-03 19:39:32 +00:00
|
|
|
if username:
|
2018-04-10 02:53:53 +00:00
|
|
|
submissions = pushshift.get_submissions_from_user(username, lower=lower, upper=upper)
|
2017-12-03 19:39:32 +00:00
|
|
|
else:
|
2018-04-10 02:53:53 +00:00
|
|
|
submissions = pushshift.get_submissions_from_subreddit(subreddit, lower=lower, upper=upper)
|
2017-12-03 19:39:32 +00:00
|
|
|
|
2018-04-10 02:53:53 +00:00
|
|
|
if do_supplement:
|
|
|
|
submissions = pushshift.supplement_reddit_data(submissions, chunk_size=100)
|
|
|
|
submissions = common.generator_chunker(submissions, 200)
|
2018-07-13 07:45:29 +00:00
|
|
|
|
2020-10-08 20:15:05 +00:00
|
|
|
form = '{lower} ({lower_unix}) - {upper} ({upper_unix}) +{gain}'
|
2017-11-14 03:13:19 +00:00
|
|
|
for chunk in submissions:
|
2018-04-10 02:53:53 +00:00
|
|
|
chunk.sort(key=lambda x: x.created_utc)
|
2020-10-08 20:15:05 +00:00
|
|
|
step = database.insert(chunk)
|
2017-11-14 03:13:19 +00:00
|
|
|
message = form.format(
|
2018-04-10 02:53:53 +00:00
|
|
|
lower=common.human(chunk[0].created_utc),
|
|
|
|
upper=common.human(chunk[-1].created_utc),
|
2020-10-08 20:15:05 +00:00
|
|
|
lower_unix=int(chunk[0].created_utc),
|
|
|
|
upper_unix=int(chunk[-1].created_utc),
|
|
|
|
gain=step['new_submissions'],
|
2017-11-14 03:13:19 +00:00
|
|
|
)
|
|
|
|
print(message)
|
|
|
|
|
|
|
|
cur.execute('SELECT COUNT(idint) FROM submissions')
|
|
|
|
itemcount = cur.fetchone()[0]
|
|
|
|
|
|
|
|
print('Ended with %d items in %s' % (itemcount, database.filepath.basename))
|
|
|
|
|
2020-01-28 02:39:54 +00:00
|
|
|
def get_submissions_argparse(args):
|
2018-04-10 02:53:53 +00:00
|
|
|
if args.verbose:
|
|
|
|
common.log.setLevel(common.logging.DEBUG)
|
|
|
|
|
2017-11-14 03:13:19 +00:00
|
|
|
if args.lower == 'update':
|
|
|
|
lower = 'update'
|
|
|
|
else:
|
|
|
|
lower = common.int_none(args.lower)
|
|
|
|
|
2020-01-28 02:39:54 +00:00
|
|
|
return get_submissions(
|
2017-11-14 03:13:19 +00:00
|
|
|
subreddit=args.subreddit,
|
|
|
|
username=args.username,
|
|
|
|
lower=lower,
|
|
|
|
upper=common.int_none(args.upper),
|
2018-04-10 02:53:53 +00:00
|
|
|
do_supplement=args.do_supplement,
|
2017-11-14 03:13:19 +00:00
|
|
|
)
|