Allow commentaugment to take lower and upper as well.

This commit is contained in:
Ethan Dalool 2018-04-20 23:29:42 -07:00
parent 5b7eb6870f
commit 3e9aa946b1
2 changed files with 46 additions and 26 deletions

View file

@ -74,6 +74,16 @@ commentaugment:
-s "t3_xxxxxx" | --specific "t3_xxxxxx": -s "t3_xxxxxx" | --specific "t3_xxxxxx":
Given a submission ID, t3_xxxxxx, scan only that submission. Given a submission ID, t3_xxxxxx, scan only that submission.
-l "update" | --lower "update":
If a number - the unix timestamp to start at.
If "update" - continue from latest comment in db.
Default: update
-up 1467460221 | --upper 1467460221:
If a number - the unix timestamp to stop at.
If not provided - stop at current time.
Default: current time
--dont_supplement: --dont_supplement:
If provided, trust the pushshift data and do not fetch live copies If provided, trust the pushshift data and do not fetch live copies
from reddit. from reddit.
@ -329,6 +339,8 @@ p_commentaugment.add_argument('-s', '--specific', dest='specific_submission', de
p_commentaugment.add_argument('-u', '--user', dest='username', default=None) p_commentaugment.add_argument('-u', '--user', dest='username', default=None)
p_commentaugment.add_argument('-v', '--verbose', dest='verbose', action='store_true') p_commentaugment.add_argument('-v', '--verbose', dest='verbose', action='store_true')
p_commentaugment.add_argument('--dont_supplement', dest='do_supplement', action='store_false') p_commentaugment.add_argument('--dont_supplement', dest='do_supplement', action='store_false')
p_commentaugment.add_argument('-l', '--lower', dest='lower', default='update')
p_commentaugment.add_argument('-up', '--upper', dest='upper', default=None)
p_commentaugment.set_defaults(func=commentaugment_gateway) p_commentaugment.set_defaults(func=commentaugment_gateway)
p_getstyles = subparsers.add_parser('getstyles') p_getstyles = subparsers.add_parser('getstyles')

View file

@ -11,6 +11,8 @@ def commentaugment(
username=None, username=None,
specific_submission=None, specific_submission=None,
do_supplement=True, do_supplement=True,
lower=None,
upper=None,
): ):
if not specific_submission and not common.is_xor(subreddit, username): if not specific_submission and not common.is_xor(subreddit, username):
raise exceptions.NotExclusive(['subreddit', 'username']) raise exceptions.NotExclusive(['subreddit', 'username'])
@ -33,40 +35,44 @@ def commentaugment(
if specific_submission is not None: if specific_submission is not None:
database.insert(specific_submission_obj) database.insert(specific_submission_obj)
lower = 0 if lower is None:
query_latest = 'SELECT created FROM comments ORDER BY created DESC LIMIT 1' lower = 0
if subreddit: if lower == 'update':
# Instead of blindly taking the highest timestamp currently in the db, query_latest = 'SELECT created FROM comments ORDER BY created DESC LIMIT 1'
# we must consider the case that the user has previously done a if subreddit:
# specific_submission scan and now wants to do a general scan, which # Instead of blindly taking the highest timestamp currently in the db,
# would trick the latest timestamp into missing anything before that # we must consider the case that the user has previously done a
# specific submission. # specific_submission scan and now wants to do a general scan, which
query = ''' # would trick the latest timestamp into missing anything before that
SELECT created FROM comments WHERE NOT EXISTS ( # specific submission.
SELECT 1 FROM submissions query = '''
WHERE submissions.idstr == comments.submission SELECT created FROM comments WHERE NOT EXISTS (
AND submissions.augmented_at IS NOT NULL SELECT 1 FROM submissions
) WHERE submissions.idstr == comments.submission
ORDER BY created DESC LIMIT 1 AND submissions.augmented_at IS NOT NULL
''' )
unaugmented = cur.execute(query).fetchone() ORDER BY created DESC LIMIT 1
if unaugmented: '''
lower = unaugmented[0] - 1 unaugmented = cur.execute(query).fetchone()
else: if unaugmented:
lower = unaugmented[0] - 1
else:
latest = cur.execute(query_latest).fetchone()
if latest:
lower = latest[0] - 1
if username:
latest = cur.execute(query_latest).fetchone() latest = cur.execute(query_latest).fetchone()
if latest: if latest:
lower = latest[0] - 1 lower = latest[0] - 1
if username: if lower == 'update':
latest = cur.execute(query_latest).fetchone() lower = 0
if latest:
lower = latest[0] - 1
if specific_submission: if specific_submission:
comments = pushshift.get_comments_from_submission(specific_submission_obj) comments = pushshift.get_comments_from_submission(specific_submission_obj)
elif subreddit: elif subreddit:
comments = pushshift.get_comments_from_subreddit(subreddit, lower=lower) comments = pushshift.get_comments_from_subreddit(subreddit, lower=lower, upper=upper)
elif username: elif username:
comments = pushshift.get_comments_from_user(username, lower=lower) comments = pushshift.get_comments_from_user(username, lower=lower, upper=upper)
form = '{lower} - {upper} +{gain}' form = '{lower} - {upper} +{gain}'
@ -104,4 +110,6 @@ def commentaugment_argparse(args):
#verbose=args.verbose, #verbose=args.verbose,
specific_submission=args.specific_submission, specific_submission=args.specific_submission,
do_supplement=args.do_supplement, do_supplement=args.do_supplement,
lower=args.lower,
upper=args.upper,
) )