Use new betterhelp.

2022-02-12 19:54:34 -08:00 · 2022-02-12 19:54:34 -08:00 · dfca0e96b6
commit dfca0e96b6
parent b2abb29626
1 changed files with 465 additions and 320 deletions
--- a/timesearch.py
+++ b/timesearch.py
@ -18,270 +18,6 @@ from timesearch_modules import exceptions
 # importing those modules, which will subsequently import PRAW and a whole lot
 # of other things. This made TS very slow to load which is okay when you're
 # actually using it but really terrible when you're just viewing the help text.
-DOCSTRING = '''
-Timesearch
-The subreddit archiver
-
-The basics:
-1. Collect a subreddit's submissions
-    python timesearch.py get_submissions -r subredditname
-
-2. Collect the comments for those submissions
-    python timesearch.py get_comments -r subredditname
-
-3. Stay up to date
-    python timesearch.py livestream -r subredditname
-
-Commands for collecting:
-
-{get_submissions}
-
-{get_comments}
-
-{livestream}
-
-{get_styles}
-
-{get_wiki}
-
-Commands for processing:
-
-{breakdown}
-
-{index}
-
-{merge_db}
-
-{offline_reading}
-
-TO SEE DETAILS ON EACH COMMAND, RUN
-> python timesearch.py <command> --help
-'''.lstrip()
-
-SUB_DOCSTRINGS = dict(
-breakdown='''
-breakdown:
-    Give the comment / submission counts for users in a subreddit, or
-    the subreddits that a user posts to.
-
-    Automatically dumps into a <database>_breakdown.json file
-    in the same directory as the database.
-
-    python timesearch.py breakdown -r subredditname <flags>
-    python timesearch.py breakdown -u username <flags>
-
-    flags:
-    -r "test" | --subreddit "test":
-        The subreddit database to break down.
-
-    -u "test" | --username "test":
-        The username database to break down.
-
-    --sort "name" | "submissions" | "comments" | "total_posts"
-        Sort the output.
-'''.strip(),
-
-get_comments='''
-get_comments:
-    Collect comments on a subreddit or comments made by a user.
-
-    python timesearch.py get_comments -r subredditname <flags>
-    python timesearch.py get_comments -u username <flags>
-
-    flags:
-    -s "t3_xxxxxx" | --specific "t3_xxxxxx":
-        Given a submission ID, t3_xxxxxx, scan only that submission.
-
-    -l "update" | --lower "update":
-        If a number - the unix timestamp to start at.
-        If "update" - continue from latest comment in db.
-        WARNING: If at some point you collected comments for a particular
-        submission which was ahead of the rest of your comments, using "update"
-        will start from that later submission, and you will miss the stuff in
-        between that specific post and the past.
-        Default: update
-
-    -up 1467460221 | --upper 1467460221:
-        If a number - the unix timestamp to stop at.
-        If not provided - stop at current time.
-        Default: current time
-
-    --dont_supplement:
-        If provided, trust the pushshift data and do not fetch live copies
-        from reddit.
-'''.strip(),
-
-get_styles='''
-get_styles:
-    Collect the stylesheet, and css images.
-
-    python timesearch.py get_styles -r subredditname
-'''.strip(),
-
-get_submissions='''
-get_submissions:
-    Collect submissions from the subreddit across all of history, or
-    Collect submissions by a user (as many as possible).
-
-    python timesearch.py get_submissions -r subredditname <flags>
-    python timesearch.py get_submissions -u username <flags>
-
-    -r "test" | --subreddit "test":
-        The subreddit to scan. Mutually exclusive with username.
-
-    -u "test" | --username "test":
-        The user to scan. Mutually exclusive with subreddit.
-
-    -l "update" | --lower "update":
-        If a number - the unix timestamp to start at.
-        If "update" - continue from latest submission in db.
-        Default: update
-
-    -up 1467460221 | --upper 1467460221:
-        If a number - the unix timestamp to stop at.
-        If not provided - stop at current time.
-        Default: current time
-
-    --dont_supplement:
-        If provided, trust the pushshift data and do not fetch live copies
-        from reddit.
-'''.strip(),
-
-get_wiki='''
-get_wiki:
-    Collect all available wiki pages.
-
-    python timesearch.py get_wiki -r subredditname
-'''.strip(),
-
-index='''
-index:
-    Dump submission listings to a plaintext or HTML file.
-
-    python timesearch.py index -r subredditname <flags>
-    python timesearch.py index -u username <flags>
-
-    flags:
-    -r "test" | --subreddit "test":
-        The subreddit database to dump
-
-    -u "test" | --username "test":
-        The username database to dump
-
-    --html:
-        Write HTML files instead of plain text.
-
-    --offline:
-        The links in the index will point to the files generated by
-        offline_reading. That is, `../offline_reading/fullname.html` instead
-        of `http://redd.it/id`. This will NOT trigger offline_reading to
-        generate the files now, so you must run that tool separately.
-
-    -st 50 | --score_threshold 50:
-        Only index posts with at least this many points.
-        Applies to ALL indexes!
-
-    --all:
-        Perform all of the indexes listed below.
-
-    --date:
-        Perform a index sorted by date.
-
-    --title:
-        Perform a index sorted by title.
-
-    --score:
-        Perform a index sorted by score.
-
-    --author:
-        For subreddit databases only.
-        Perform a index sorted by author.
-
-    --sub:
-        For username databases only.
-        Perform a index sorted by subreddit.
-
-    --flair:
-        Perform a index sorted by flair.
-
-    examples:
-        `timesearch index -r botwatch --date`
-        does only the date file.
-
-        `timesearch index -r botwatch --score --title`
-        does both the score and title files.
-
-        `timesearch index -r botwatch --score --score_threshold 50`
-        only shows submissions with >= 50 points.
-
-        `timesearch index -r botwatch --all`
-        performs all of the different mashes.
-'''.strip(),
-
-livestream='''
-livestream:
-    Continously collect submissions and/or comments.
-
-    python timesearch.py livestream -r subredditname <flags>
-    python timesearch.py livestream -u username <flags>
-
-    flags:
-    -r "test" | --subreddit "test":
-        The subreddit to collect from.
-
-    -u "test" | --username "test":
-        The redditor to collect from.
-
-    -s | --submissions:
-        If provided, do collect submissions. Otherwise don't.
-
-    -c | --comments:
-        If provided, do collect comments. Otherwise don't.
-
-    If submissions and comments are BOTH left unspecified, then they will
-    BOTH be collected.
-
-    -w 30 | --wait 30:
-        The number of seconds to wait between cycles.
-
-    -1 | --once:
-        If provided, only do a single loop. Otherwise go forever.
-'''.strip(),
-
-merge_db='''
-merge_db:
-    Copy all new posts from one timesearch database into another.
-
-    python timesearch.py merge_db --from redditdev1.db --to redditdev2.db
-
-    flags:
-    --from:
-        The database file containing the posts you wish to copy.
-
-    --to:
-        The database file to which you will copy the posts.
-        The database is modified in-place.
-        Existing posts will be ignored and not updated.
-'''.strip(),
-
-offline_reading='''
-offline_reading:
-    Render submissions and comment threads to HTML via Markdown.
-
-    python timesearch.py offline_reading -r subredditname <flags>
-    python timesearch.py offline_reading -u username <flags>
-
-    flags:
-    -s "t3_xxxxxx" | --specific "t3_xxxxxx":
-        Given a submission ID, t3_xxxxxx, render only that submission.
-        Otherwise render every submission in the database.
-'''.strip(),
-)
-
-DOCSTRING = betterhelp.add_previews(DOCSTRING, SUB_DOCSTRINGS)
-
-####################################################################################################
-####################################################################################################

 def breakdown_gateway(args):
    from timesearch_modules import breakdown
@ -321,83 +57,492 @@ def get_submissions_gateway(args):

@vlogging.main_decorator
 def main(argv):
-    parser = argparse.ArgumentParser(description=__doc__)
+    parser = argparse.ArgumentParser(
+        description='''
+        The subreddit archiver
+
+        The basics:
+        1. Collect a subreddit's submissions
+            timesearch get_submissions -r subredditname
+
+        2. Collect the comments for those submissions
+            timesearch get_comments -r subredditname
+
+        3. Stay up to date
+            timesearch livestream -r subredditname
+        ''',
+    )
    subparsers = parser.add_subparsers()

-    p_breakdown = subparsers.add_parser('breakdown')
-    p_breakdown.add_argument('--sort', dest='sort', default=None)
-    p_breakdown.add_argument('-r', '--subreddit', dest='subreddit', default=None)
-    p_breakdown.add_argument('-u', '--user', dest='username', default=None)
+    # BREAKDOWN
+    p_breakdown = subparsers.add_parser(
+        'breakdown',
+        description='''
+        Generate the comment / submission counts for users in a subreddit, or
+        the subreddits that a user posts to.
+
+        Automatically dumps into a <database>_breakdown.json file
+        in the same directory as the database.
+        ''',
+    )
+    p_breakdown.add_argument(
+        '--sort',
+        dest='sort',
+        type=str,
+        default=None,
+        help='''
+        Sort the output by one property.
+        Should be one of "name", "submissions", "comments", "total_posts".
+        ''',
+    )
+    p_breakdown.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+        default=None,
+        help='''
+        The subreddit database to break down.
+        ''',
+    )
+    p_breakdown.add_argument(
+        '-u',
+        '--user',
+        dest='username',
+        default=None,
+        help='''
+        The username database to break down.
+        ''',
+    )
    p_breakdown.set_defaults(func=breakdown_gateway)

-    p_get_comments = subparsers.add_parser('get_comments', aliases=['commentaugment'])
-    p_get_comments.add_argument('-r', '--subreddit', dest='subreddit', default=None)
-    p_get_comments.add_argument('-s', '--specific', dest='specific_submission', default=None)
-    p_get_comments.add_argument('-u', '--user', dest='username', default=None)
-    p_get_comments.add_argument('--dont_supplement', '--dont-supplement', dest='do_supplement', action='store_false')
-    p_get_comments.add_argument('-l', '--lower', dest='lower', default='update')
-    p_get_comments.add_argument('-up', '--upper', dest='upper', default=None)
+    # GET_COMMENTS
+    p_get_comments = subparsers.add_parser(
+        'get_comments',
+        aliases=['commentaugment'],
+        description='''
+        Collect comments on a subreddit or comments made by a user.
+        ''',
+    )
+    p_get_comments.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+        default=None,
+    )
+    p_get_comments.add_argument(
+        '-s',
+        '--specific',
+        dest='specific_submission',
+        default=None,
+        help='''
+        Given a submission ID like t3_xxxxxx, scan only that submission.
+        ''',
+    )
+    p_get_comments.add_argument(
+        '-u',
+        '--user',
+        dest='username',
+        default=None,
+    )
+    p_get_comments.add_argument(
+        '--dont_supplement',
+        '--dont-supplement',
+        dest='do_supplement',
+        action='store_false',
+        help='''
+        If provided, trust the pushshift data and do not fetch live copies
+        from reddit.
+        ''',
+    )
+    p_get_comments.add_argument(
+        '--lower',
+        dest='lower',
+        default='update',
+        help='''
+        If a number - the unix timestamp to start at.
+        If "update" - continue from latest comment in db.
+        WARNING: If at some point you collected comments for a particular
+        submission which was ahead of the rest of your comments, using "update"
+        will start from that later submission, and you will miss the stuff in
+        between that specific post and the past.
+        ''',
+    )
+    p_get_comments.add_argument(
+        '--upper',
+        dest='upper',
+        default=None,
+        help='''
+        If a number - the unix timestamp to stop at.
+        If not provided - stop at current time.
+        ''',
+    )
    p_get_comments.set_defaults(func=get_comments_gateway)

-    p_get_styles = subparsers.add_parser('get_styles', aliases=['getstyles'])
-    p_get_styles.add_argument('-r', '--subreddit', dest='subreddit')
+    # GET_STYLES
+    p_get_styles = subparsers.add_parser(
+        'get_styles',
+        aliases=['getstyles'],
+        help='''
+        Collect the stylesheet, and css images.
+        ''',
+    )
+    p_get_styles.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+    )
    p_get_styles.set_defaults(func=get_styles_gateway)

-    p_get_wiki = subparsers.add_parser('get_wiki', aliases=['getwiki'])
-    p_get_wiki.add_argument('-r', '--subreddit', dest='subreddit')
+    # GET_WIKI
+    p_get_wiki = subparsers.add_parser(
+        'get_wiki',
+        aliases=['getwiki'],
+        description='''
+        Collect all available wiki pages.
+        ''',
+    )
+    p_get_wiki.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+    )
    p_get_wiki.set_defaults(func=get_wiki_gateway)

-    p_livestream = subparsers.add_parser('livestream')
-    p_livestream.add_argument('-1', '--once', dest='once', action='store_true')
-    p_livestream.add_argument('-c', '--comments', dest='comments', action='store_true')
-    p_livestream.add_argument('-l', '--limit', dest='limit', default=None)
-    p_livestream.add_argument('-r', '--subreddit', dest='subreddit', default=None)
-    p_livestream.add_argument('-s', '--submissions', dest='submissions', action='store_true')
-    p_livestream.add_argument('-u', '--user', dest='username', default=None)
-    p_livestream.add_argument('-w', '--wait', dest='sleepy', default=30)
+    # LIVESTREAM
+    p_livestream = subparsers.add_parser(
+        'livestream',
+        description='''
+        Continously collect submissions and/or comments.
+        ''',
+    )
+    p_livestream.add_argument(
+        '--once',
+        dest='once',
+        action='store_true',
+        help='''
+        If provided, only do a single loop. Otherwise go forever.
+        ''',
+    )
+    p_livestream.add_argument(
+        '-c',
+        '--comments',
+        dest='comments',
+        action='store_true',
+        help='''
+        If provided, do collect comments. Otherwise don't.
+
+        If submissions and comments are BOTH left unspecified, then they will
+        BOTH be collected.
+        ''',
+    )
+    p_livestream.add_argument(
+        '--limit',
+        dest='limit',
+        type=int,
+        default=None,
+        help='''
+        Number of items to fetch per request.
+        ''',
+    )
+    p_livestream.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+        default=None,
+        help='''
+        The subreddit to collect from.
+        ''',
+    )
+    p_livestream.add_argument(
+        '-s',
+        '--submissions',
+        dest='submissions',
+        action='store_true',
+        help='''
+        If provided, do collect submissions. Otherwise don't.
+
+        If submissions and comments are BOTH left unspecified, then they will
+        BOTH be collected.
+        ''',
+    )
+    p_livestream.add_argument(
+        '-u',
+        '--user',
+        dest='username',
+        default=None,
+        help='''
+        The redditor to collect from.
+        ''',
+    )
+    p_livestream.add_argument(
+        '-w',
+        '--wait',
+        dest='sleepy',
+        default=30,
+        help='''
+        The number of seconds to wait between cycles.
+        ''',
+    )
    p_livestream.set_defaults(func=livestream_gateway)

-    p_merge_db = subparsers.add_parser('merge_db', aliases=['mergedb'])
-    p_merge_db.add_argument('--from', dest='from_db_path', required=True)
-    p_merge_db.add_argument('--to', dest='to_db_path', required=True)
+    # MERGEDB'
+    p_merge_db = subparsers.add_parser(
+        'merge_db',
+        aliases=['mergedb'],
+        description='''
+        Copy all new posts from one timesearch database into another.
+        ''',
+    )
+    p_merge_db.examples = [
+        '--from redditdev1.db --to redditdev2.db',
+    ]
+    p_merge_db.add_argument(
+        '--from',
+        dest='from_db_path',
+        required=True,
+        help='''
+        The database file containing the posts you wish to copy.
+        ''',
+    )
+    p_merge_db.add_argument(
+        '--to',
+        dest='to_db_path',
+        required=True,
+        help='''
+        The database file to which you will copy the posts.
+        The database is modified in-place.
+        Existing posts will be ignored and not updated.
+        ''',
+    )
    p_merge_db.set_defaults(func=merge_db_gateway)

-    p_offline_reading = subparsers.add_parser('offline_reading')
-    p_offline_reading.add_argument('-r', '--subreddit', dest='subreddit', default=None)
-    p_offline_reading.add_argument('-s', '--specific', dest='specific_submission', default=None)
-    p_offline_reading.add_argument('-u', '--user', dest='username', default=None)
+    # OFFLINE_READING
+    p_offline_reading = subparsers.add_parser(
+        'offline_reading',
+        description='''
+        Render submissions and comment threads to HTML via Markdown.
+        ''',
+    )
+    p_offline_reading.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+        default=None,
+    )
+    p_offline_reading.add_argument(
+        '-s',
+        '--specific',
+        dest='specific_submission',
+        default=None,
+        type=str,
+        help='''
+        Given a submission ID like t3_xxxxxx, render only that submission.
+        Otherwise render every submission in the database.
+        ''',
+    )
+    p_offline_reading.add_argument(
+        '-u',
+        '--user',
+        dest='username',
+        default=None,
+    )
    p_offline_reading.set_defaults(func=offline_reading_gateway)

-    p_index = subparsers.add_parser('index', aliases=['redmash'])
-    p_index.add_argument('--all', dest='do_all', action='store_true')
-    p_index.add_argument('--author', dest='do_author', action='store_true')
-    p_index.add_argument('--date', dest='do_date', action='store_true')
-    p_index.add_argument('--flair', dest='do_flair', action='store_true')
-    p_index.add_argument('--html', dest='html', action='store_true')
-    p_index.add_argument('--score', dest='do_score', action='store_true')
-    p_index.add_argument('--sub', dest='do_subreddit', action='store_true')
-    p_index.add_argument('--title', dest='do_title', action='store_true')
-    p_index.add_argument('--offline', dest='offline', action='store_true')
-    p_index.add_argument('-r', '--subreddit', dest='subreddit', default=None)
-    p_index.add_argument('-st', '--score_threshold', '--score-threshold', dest='score_threshold', default=0)
-    p_index.add_argument('-u', '--user', dest='username', default=None)
+    # INDEX
+    p_index = subparsers.add_parser(
+        'index',
+        aliases=['redmash'],
+        description='''
+        Dump submission listings to a plaintext or HTML file.
+        ''',
+    )
+    p_index.examples = [
+        {
+            'args': '-r botwatch --date',
+            'comment': 'Does only the date file.'
+        },
+        {
+            'args': '-r botwatch --score --title',
+            'comment': 'Does both the score and title files.'
+        },
+        {
+            'args': '-r botwatch --score --score_threshold 50',
+            'comment': 'Only shows submissions with >= 50 points.'
+        },
+        {
+            'args': '-r botwatch --all',
+            'comment': 'Performs all of the different mashes.'
+        },
+    ]
+    p_index.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+        default=None,
+        help='''
+        The subreddit database to dump.
+        ''',
+    )
+    p_index.add_argument(
+        '-u',
+        '--user',
+        dest='username',
+        default=None,
+        help='''
+        The username database to dump.
+        ''',
+    )
+    p_index.add_argument(
+        '--all',
+        dest='do_all',
+        action='store_true',
+        help='''
+        Perform all of the indexes listed below.
+        ''',
+    )
+    p_index.add_argument(
+        '--author',
+        dest='do_author',
+        action='store_true',
+        help='''
+        For subreddit databases only.
+        Perform an index sorted by author.
+        ''',
+    )
+    p_index.add_argument(
+        '--date',
+        dest='do_date',
+        action='store_true',
+        help='''
+        Perform an index sorted by date.
+        ''',
+    )
+    p_index.add_argument(
+        '--flair',
+        dest='do_flair',
+        action='store_true',
+        help='''
+        Perform an index sorted by flair.
+        ''',
+    )
+    p_index.add_argument(
+        '--html',
+        dest='html',
+        action='store_true',
+        help='''
+        Write HTML files instead of plain text.
+        ''',
+    )
+    p_index.add_argument(
+        '--score',
+        dest='do_score',
+        action='store_true',
+        help='''
+        Perform an index sorted by score.
+        ''',
+    )
+    p_index.add_argument(
+        '--sub',
+        dest='do_subreddit',
+        action='store_true',
+        help='''
+        For username databases only.
+        Perform an index sorted by subreddit.
+        ''',
+    )
+    p_index.add_argument(
+        '--title',
+        dest='do_title',
+        action='store_true',
+        help='''
+        Perform an index sorted by title.
+        ''',
+    )
+    p_index.add_argument(
+        '--offline',
+        dest='offline',
+        action='store_true',
+        help='''
+        The links in the index will point to the files generated by
+        offline_reading. That is, `../offline_reading/fullname.html` instead
+        of `http://redd.it/id`. This will NOT trigger offline_reading to
+        generate the files now, so you must run that tool separately.
+        ''',
+    )
+    p_index.add_argument(
+        '--score_threshold',
+        '--score-threshold',
+        dest='score_threshold',
+        type=int,
+        default=0,
+        help='''
+        Only index posts with at least this many points.
+        Applies to ALL indexes!
+        ''',
+    )
    p_index.set_defaults(func=index_gateway)

-    p_get_submissions = subparsers.add_parser('get_submissions', aliases=['timesearch'])
-    p_get_submissions.add_argument('-l', '--lower', dest='lower', default='update')
-    p_get_submissions.add_argument('-r', '--subreddit', dest='subreddit', default=None)
-    p_get_submissions.add_argument('-u', '--user', dest='username', default=None)
-    p_get_submissions.add_argument('-up', '--upper', dest='upper', default=None)
-    p_get_submissions.add_argument('--dont_supplement', '--dont-supplement', dest='do_supplement', action='store_false')
+    # GET_SUBMISSIONS
+    p_get_submissions = subparsers.add_parser(
+        'get_submissions',
+        aliases=['timesearch'],
+        description='''
+        Collect submissions from the subreddit across all of history, or
+        Collect submissions by a user (as many as possible).
+        ''',
+    )
+    p_get_submissions.add_argument(
+        '--lower',
+        dest='lower',
+        default='update',
+        help='''
+        If a number - the unix timestamp to start at.
+        If "update" - continue from latest submission in db.
+        ''',
+    )
+    p_get_submissions.add_argument(
+        '-r',
+        '--subreddit',
+        dest='subreddit',
+        type=str,
+        default=None,
+        help='''
+        The subreddit to scan. Mutually exclusive with username.
+        ''',
+    )
+    p_get_submissions.add_argument(
+        '-u',
+        '--user',
+        dest='username',
+        type=str,
+        default=None,
+        help='''
+        The user to scan. Mutually exclusive with subreddit.
+        ''',
+    )
+    p_get_submissions.add_argument(
+        '--upper',
+        dest='upper',
+        default=None,
+        help='''
+        If a number - the unix timestamp to stop at.
+        If not provided - stop at current time.
+        ''',
+    )
+    p_get_submissions.add_argument(
+        '--dont_supplement',
+        '--dont-supplement',
+        dest='do_supplement',
+        action='store_false',
+        help='''
+        If provided, trust the pushshift data and do not fetch live copies
+        from reddit.
+        ''',
+    )
    p_get_submissions.set_defaults(func=get_submissions_gateway)

    try:
-        return betterhelp.subparser_main(
-            argv,
-            parser,
-            main_docstring=DOCSTRING,
-            sub_docstrings=SUB_DOCSTRINGS,
-        )
+        return betterhelp.go(parser, argv)
    except exceptions.DatabaseNotFound as exc:
        message = str(exc)
        message += '\nHave you used any of the other utilities to collect data?'