diff --git a/timesearch/__init__.py b/timesearch/__init__.py index 4e13eae..2891efa 100644 --- a/timesearch/__init__.py +++ b/timesearch/__init__.py @@ -74,6 +74,16 @@ commentaugment: -s "t3_xxxxxx" | --specific "t3_xxxxxx": Given a submission ID, t3_xxxxxx, scan only that submission. + -l "update" | --lower "update": + If a number - the unix timestamp to start at. + If "update" - continue from latest comment in db. + Default: update + + -up 1467460221 | --upper 1467460221: + If a number - the unix timestamp to stop at. + If not provided - stop at current time. + Default: current time + --dont_supplement: If provided, trust the pushshift data and do not fetch live copies from reddit. @@ -329,6 +339,8 @@ p_commentaugment.add_argument('-s', '--specific', dest='specific_submission', de p_commentaugment.add_argument('-u', '--user', dest='username', default=None) p_commentaugment.add_argument('-v', '--verbose', dest='verbose', action='store_true') p_commentaugment.add_argument('--dont_supplement', dest='do_supplement', action='store_false') +p_commentaugment.add_argument('-l', '--lower', dest='lower', default='update') +p_commentaugment.add_argument('-up', '--upper', dest='upper', default=None) p_commentaugment.set_defaults(func=commentaugment_gateway) p_getstyles = subparsers.add_parser('getstyles') diff --git a/timesearch/commentaugment.py b/timesearch/commentaugment.py index 6352290..26a33bf 100644 --- a/timesearch/commentaugment.py +++ b/timesearch/commentaugment.py @@ -11,6 +11,8 @@ def commentaugment( username=None, specific_submission=None, do_supplement=True, + lower=None, + upper=None, ): if not specific_submission and not common.is_xor(subreddit, username): raise exceptions.NotExclusive(['subreddit', 'username']) @@ -33,40 +35,44 @@ def commentaugment( if specific_submission is not None: database.insert(specific_submission_obj) - lower = 0 - query_latest = 'SELECT created FROM comments ORDER BY created DESC LIMIT 1' - if subreddit: - # Instead of blindly taking the highest timestamp currently in the db, - # we must consider the case that the user has previously done a - # specific_submission scan and now wants to do a general scan, which - # would trick the latest timestamp into missing anything before that - # specific submission. - query = ''' - SELECT created FROM comments WHERE NOT EXISTS ( - SELECT 1 FROM submissions - WHERE submissions.idstr == comments.submission - AND submissions.augmented_at IS NOT NULL - ) - ORDER BY created DESC LIMIT 1 - ''' - unaugmented = cur.execute(query).fetchone() - if unaugmented: - lower = unaugmented[0] - 1 - else: + if lower is None: + lower = 0 + if lower == 'update': + query_latest = 'SELECT created FROM comments ORDER BY created DESC LIMIT 1' + if subreddit: + # Instead of blindly taking the highest timestamp currently in the db, + # we must consider the case that the user has previously done a + # specific_submission scan and now wants to do a general scan, which + # would trick the latest timestamp into missing anything before that + # specific submission. + query = ''' + SELECT created FROM comments WHERE NOT EXISTS ( + SELECT 1 FROM submissions + WHERE submissions.idstr == comments.submission + AND submissions.augmented_at IS NOT NULL + ) + ORDER BY created DESC LIMIT 1 + ''' + unaugmented = cur.execute(query).fetchone() + if unaugmented: + lower = unaugmented[0] - 1 + else: + latest = cur.execute(query_latest).fetchone() + if latest: + lower = latest[0] - 1 + if username: latest = cur.execute(query_latest).fetchone() if latest: lower = latest[0] - 1 - if username: - latest = cur.execute(query_latest).fetchone() - if latest: - lower = latest[0] - 1 + if lower == 'update': + lower = 0 if specific_submission: comments = pushshift.get_comments_from_submission(specific_submission_obj) elif subreddit: - comments = pushshift.get_comments_from_subreddit(subreddit, lower=lower) + comments = pushshift.get_comments_from_subreddit(subreddit, lower=lower, upper=upper) elif username: - comments = pushshift.get_comments_from_user(username, lower=lower) + comments = pushshift.get_comments_from_user(username, lower=lower, upper=upper) form = '{lower} - {upper} +{gain}' @@ -104,4 +110,6 @@ def commentaugment_argparse(args): #verbose=args.verbose, specific_submission=args.specific_submission, do_supplement=args.do_supplement, + lower=args.lower, + upper=args.upper, )