Initial migratory commit from voussoir/reddit.

2017-11-13 19:13:19 -08:00 · 2017-11-13 19:13:19 -08:00 · 708c774e52
commit 708c774e52
18 changed files with 2471 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,30 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Custom for Visual Studio
+*.cs     diff=csharp
+*.sln    merge=union
+*.csproj merge=union
+*.vbproj merge=union
+*.fsproj merge=union
+*.dbproj merge=union
+
+*.psd binary
+*.zip binary
+*.db binary
+*.png binary
+*.jpg binary
+*.ico binary
+*.exe binary
+
+# Standard to msysgit
+*.doc    diff=astextplain
+*.DOC    diff=astextplain
+*.docx   diff=astextplain
+*.DOCX   diff=astextplain
+*.dot    diff=astextplain
+*.DOT    diff=astextplain
+*.pdf    diff=astextplain
+*.PDF    diff=astextplain
+*.rtf    diff=astextplain
+*.RTF    diff=astextplain
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,231 @@
+databases/*
+@hangman.md
+hangman.py
+merge_database.py
+migrate_20160605.py
+timesearch_backup.py
+
+*.ignore
+*.db-journal
+*.pydevproject
+.project
+.metadata
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.classpath
+.settings/
+.loadpath
+
+# External tool builders
+.externalToolBuilders/
+
+# Locally stored "Eclipse launch configurations"
+*.launch
+
+# CDT-specific
+.cproject
+
+# PDT-specific
+.buildpath
+
+
+#################
+## Visual Studio
+#################
+
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.sln.docstates
+
+# Build results
+
+[Dd]ebug/
+[Rr]elease/
+x64/
+build/
+[Bb]in/
+[Oo]bj/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+*_i.c
+*_p.c
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.log
+*.scc
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opensdf
+*.sdf
+*.cachefile
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# NCrunch
+*.ncrunch*
+.*crunch*.local.xml
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.Publish.xml
+*.pubxml
+
+# NuGet Packages Directory
+## TODO: If you have NuGet Package Restore enabled, uncomment the next line
+#packages/
+
+# Windows Azure Build Output
+csx
+*.build.csdef
+
+# Windows Store app package directory
+AppPackages/
+
+# Others
+sql/
+*.Cache
+ClientBin/
+[Ss]tyle[Cc]op.*
+~$*
+*~
+*.dbmdl
+*.[Pp]ublish.xml
+*.pfx
+*.publishsettings
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file to a newer
+# Visual Studio version. Backup files are not needed, because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+
+# SQL Server files
+App_Data/*.mdf
+App_Data/*.ldf
+
+#############
+## Windows detritus
+#############
+
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Mac crap
+.DS_Store
+
+
+#############
+## Python
+#############
+
+*.py[co]
+
+# Packages
+*.egg
+*.egg-info
+dist/
+build/
+eggs/
+parts/
+var/
+sdist/
+develop-eggs/
+.installed.cfg
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+
+#Translations
+*.mo
+
+#Mr Developer
+.mr.developer.cfg
+=======
+*~
+*.egg
+*.pyc
+.coverage
+*.egg-info/
+_build/
+build/
+dist/
+.DS_Store
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,114 @@
+timesearch
+==========
+
+I don't have a test suite. You're my test suite! Messages go to [/u/GoldenSights](https://reddit.com/u/GoldenSights).
+
+Timesearch is a collection of utilities for archiving subreddits.
+
+### Make sure you have:
+- Installed [Python](https://www.python.org/download). I use Python 3.6.
+- Installed PRAW >= 4, as well as the other modules in `requirements.txt`. Try `pip install -r requirements.txt` to get them all.
+- Created an OAuth app at https://reddit.com/prefs/apps. Make it `script` type, and set the redirect URI to `http://localhost:8080`. The title and description can be anything you want, and the about URL is not required.
+- Used [this PRAW script](https://praw.readthedocs.io/en/latest/tutorials/refresh_token.html) to generate a refresh token. Just save it as a .py file somewhere and run it through your terminal / command line. For simplicity's sake, I just choose `all` for the scopes.
+- Downloaded a copy of [this file](https://github.com/voussoir/reddit/blob/master/bot4.py) and saved it as `bot.py`. Fill out the variables using your OAuth information, and read the instructions to see where to put it. The Useragent is a description of your API usage. Typically "/u/username's praw client" is sufficient.
+
+### This package consists of:
+
+- **timesearch**: If you try to page through `/new` on a subreddit, you'll hit a limit at or before 1,000 posts. Timesearch uses the `timestamp` cloudsearch query parameter to step from the beginning of a subreddit to present time, to collect as many submissions as possible. Read more about timestamp searching [here](https://www.reddit.com/r/reddittips/comments/2ix73n/use_cloudsearch_to_search_for_posts_on_reddit/).  
+    `> timesearch.py timesearch -r subredditname <flags>`  
+    `> timesearch.py timesearch -u username <flags>`
+
+- **commentaugment**: Although we can search for submissions, we cannot search for comments. After performing a timesearch, you can use commentaugment to download the comment tree for each submission.  
+    Note: commentaugment only gets the comments attached to the submissions that you found in your timesearch scan. If you're trying to commentaugment on a user, you're going to get comments that were made on their submissions, **not** comments they made on other people's submissions. Therefore, comprehensively collecting a user's activity is not possible. You will have to use someone else's dataset like that of [/u/Stuck_in_the_Matrix](https://reddit.com/u/Stuck_in_the_Matrix) at [pushshift.io](https://pushshift.io).  
+    `> timesearch.py commentaugment -r subredditname <flags>`  
+    `> timesearch.py commentaugment -u username <flags>`
+
+- **livestream**: timesearch+commentaugment is great for starting your database and getting historical posts, but it's not the best for staying up-to-date. Instead, livestream monitors `/new` and `/comments` to continuously ingest data.  
+    `> timesearch.py livestream -r subredditname <flags>`  
+    `> timesearch.py livestream -u username <flags>`
+
+- **getstyles**: Downloads the stylesheet and CSS images.  
+    `> timesearch.py getstyles -r subredditname`
+
+- **getwiki**: Downloads the wiki pages, sidebar, etc. from /wiki/pages.  
+    `> timesearch.py getwiki -r subredditname`
+
+- **offline_reading**: Renders comment threads into HTML via markdown.  
+    Note: I'm currently using the [markdown library from pypi](https://pypi.python.org/pypi/Markdown), and it doesn't do reddit's custom markdown like `/r/` or `/u/`, obviously. So far I don't think anybody really uses o_r so I haven't invested much time into improving it.  
+    `> timesearch.py offline_reading -r subredditname <flags>`  
+    `> timesearch.py offline_reading -u username <flags>`
+
+- **redmash**: Generates plaintext or HTML lists of submissions, sorted by a property of your choosing. You can order by date, author, flair, etc.  
+    `> timesearch.py redmash -r subredditname <flags>`  
+    `> timesearch.py redmash -u username <flags>`
+
+- **breakdown**: Produces a JSON file indicating which users make the most posts in a subreddit, or which subreddits a user posts in.  
+    `> timesearch.py breakdown -r subredditname` <flags>  
+    `> timesearch.py breakdown -u username` <flags>
+
+- **mergedb**: Copy all new data from one timesearch database into another. Useful for syncing or merging two scans of the same subreddit.  
+    `> timesearch.py mergedb --from filepath/database1.db --to filepath/database2.db`
+
+### To use it
+
+You will need both the `timesearch` package (folder) and the external `timesearch.py` file. You can click the green "Clone or Download" button in the upper right. When you run the .py file, it sends your commandline arguments into the package. You can view a summarized version of all the help text with just `timesearch.py`, or you can view a specific docstring with `timesearch.py livestream`, etc.
+
+I recommend [sqlitebrowser](https://github.com/sqlitebrowser/sqlitebrowser/releases) if you want to inspect the database yourself.
+
+### Changelog
+- 2017 11 13
+    - Gave timesearch its own Github repository so that (1) it will be easier for people to download it and (2) it has a cleaner, more independent URL. [voussoir/timesearch](https://github.com/voussoir/timesearch)
+
+- 2017 11 05
+    - Added a try-except inside livestream helper to prevent generator from terminating.
+
+- 2017 11 04
+    - For timesearch, I switched from using my custom cloudsearch iterator to the one that comes with PRAW4+.
+
+- 2017 10 12
+    - Added the `mergedb` utility for combining databases.
+
+- 2017 06 02
+    - You can use `commentaugment -s abcdef` to get a particular thread even if you haven't scraped anything else from that subreddit. Previously `-s` only worked if the database already existed and you specified it via `-r`. Now it is inferred from the submission itself.
+
+- 2017 04 28
+    - Complete restructure into package, started using PRAW4.
+
+- 2016 08 10
+    - Started merging redmash and wrote its argparser
+
+- 2016 07 03
+    - Improved docstring clarity.
+
+- 2016 07 02
+    - Added `livestream` argparse
+
+- 2016 06 07
+    - Offline_reading has been merged with the main timesearch file
+    - `get_all_posts` renamed to `timesearch`
+    - Timesearch parameter `usermode` renamed to `username`; `maxupper` renamed to `upper`.
+    - Everything now accessible via commandline arguments. Read the docstring at the top of the file.
+
+- 2016 06 05
+    - NEW DATABASE SCHEME. Submissions and comments now live in different tables like they should have all along. Submission table has two new columns for a little bit of commentaugment metadata. This allows commentaugment to only scan threads that are new.
+    - You can use the `migrate_20160605.py` script to convert old databases into new ones.
+
+- 2015 11 11
+    - created `offline_reading.py` which converts a timesearch database into a comment tree that can be rendered into HTML
+
+- 2015 09 07
+    - fixed bug which allowed `livestream` to crash because `bot.refresh()` was outside of the try-catch.
+
+- 2015 08 19
+    - fixed bug in which updatescores stopped iterating early if you had more than 100 comments in a row in the db
+    - commentaugment has been completely merged into the timesearch.py file. you can use commentaugment_prompt() to input the parameters, or use the commentaugment() function directly.
+
+
+____
+
+
+I want to live in a future where everyone uses UTC and agrees on daylight savings.
+
+<p align="center">
+    <img src="https://github.com/voussoir/reddit/blob/master/.GitImages/timesearch_logo_256.png?raw=true" alt="Timesearch"/>
+</p>
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+markdown
+praw
+voussoirkit
--- a/timesearch.py
+++ b/timesearch.py
@ -0,0 +1,5 @@
+import sys
+import timesearch
+
+status_code = timesearch.main(sys.argv[1:])
+raise SystemExit(status_code)
--- a/timesearch/init.py
+++ b/timesearch/init.py
@ -0,0 +1,436 @@
+import argparse
+import sys
+
+from . import exceptions
+
+# NOTE: Originally I wanted the docstring for each module to be within their
+# file. However, this means that composing the global helptext would require
+# importing those modules, which will subsequently import PRAW and a whole lot
+# of other things. This made TS very slow to load which is okay when you're
+# actually using it but really terrible when you're just viewing the help text.
+DOCSTRING = '''
+Timesearch
+The subreddit archiver
+
+The basics:
+1. Collect a subreddit's submissions
+    > timesearch.py timesearch -r subredditname
+
+2. Collect the comments for those submissions
+    > timesearch.py commentaugment -r subredditname
+
+3. Stay up-to-date
+    > timesearch.py livestream -r subredditname
+
+
+Commands for collecting:
+{timesearch}
+{commentaugment}
+{livestream}
+{getstyles}
+{getwiki}
+
+Commands for processing:
+{offline_reading}
+{redmash}
+{breakdown}
+{mergedb}
+
+TO SEE DETAILS ON EACH COMMAND, RUN
+> timesearch.py <command>
+'''
+
+MODULE_DOCSTRINGS = {
+    'breakdown': '''
+breakdown:
+    Give the comment / submission counts for users in a subreddit, or
+    the subreddits that a user posts to.
+
+    Automatically dumps into a <database>_breakdown.json file
+    in the same directory as the database.
+
+    > timesearch.py breakdown -r subredditname
+    > timesearch.py breakdown -u username
+
+    flags:
+    -r "test" | --subreddit "test":
+        The subreddit database to break down.
+
+    -u "test" | --username "test":
+        The username database to break down.
+
+    --sort "name" | "submissions" | "comments" | "total_posts"
+        Sort the output.
+''',
+
+    'commentaugment': '''
+commentaugment:
+    Collect comments for the submissions in the database.
+    NOTE - if you did a timesearch scan on a username, this function is
+    mostly useless. It collects comments that were made on OP's submissions
+    but it does not find OP's comments on other people's submissions which
+    is what you probably wanted. Unfortunately that's not possible.
+
+    > timesearch.py commentaugment -r subredditname <flags>
+    > timesearch.py commentaugment -u username <flags>
+
+    flags:
+    -l 18 | --limit 18:
+        The number of MoreComments objects to replace.
+        Default: No limit
+
+    -t 5 | --threshold 5:
+        The number of comments a MoreComments object must claim to have
+        for us to open it.
+        Actual number received may be lower.
+        Default: >= 0
+
+    -n 4 | --num_thresh 4:
+        The number of comments a submission must claim to have for us to
+        scan it at all.
+        Actual number received may be lower.
+        Default: >= 1
+
+    -s "t3_xxxxxx" | --specific "t3_xxxxxx":
+        Given a submission ID, t3_xxxxxx, scan only that submission.
+
+    -v | --verbose:
+        If provided, print more stuff while working.
+''',
+
+    'getstyles': '''
+getstyles:
+    Collect the stylesheet, and css images.
+
+    > timesearch.py getstyles -r subredditname
+''',
+
+    'getwiki': '''
+getwiki:
+    Collect all available wiki pages.
+
+    > timesearch.py getwiki -r subredditname
+''',
+
+    'mergedb': '''
+mergedb:
+    Copy all new posts from one timesearch database into another.
+
+    > timesearch mergedb --from redditdev1.db --to redditdev2.db
+
+    flags:
+    --from:
+        The database file containing the posts you wish to copy.
+
+    --to:
+        The database file to which you will copy the posts.
+        The database is modified in-place.
+        Existing posts will be ignored and not updated.
+''',
+
+    'livestream': '''
+livestream:
+    Continously collect submissions and/or comments.
+
+    > timesearch.py livestream -r subredditname <flags>
+    > timesearch.py livestream -u username <flags>
+
+    flags:
+    -r "test" | --subreddit "test":
+        The subreddit to collect from.
+
+    -u "test" | --username "test":
+        The redditor to collect from.
+
+    -s | --submissions:
+        If provided, do collect submissions. Otherwise don't.
+
+    -c | --comments:
+        If provided, do collect comments. Otherwise don't.
+
+    If submissions and comments are BOTH left unspecified, then they will
+    BOTH be collected.
+
+    -v | --verbose:
+        If provided, print extra information to the screen.
+
+    -w 30 | --wait 30:
+        The number of seconds to wait between cycles.
+
+    -1 | --once:
+        If provided, only do a single loop. Otherwise go forever.
+''',
+
+    'offline_reading': '''
+offline_reading:
+    Render submissions and comment threads to HTML via Markdown.
+
+    > timesearch.py offline_reading -r subredditname <flags>
+    > timesearch.py offline_reading -u username <flags>
+
+    flags:
+    -s "t3_xxxxxx" | --specific "t3_xxxxxx":
+        Given a submission ID, t3_xxxxxx, render only that submission.
+        Otherwise render every submission in the database.
+''',
+
+    'redmash': '''
+redmash:
+    Dump submission listings to a plaintext or HTML file.
+
+    > timesearch.py redmash -r subredditname <flags>
+    > timesearch.py redmash -u username <flags>
+
+    flags:
+    -r "test" | --subreddit "test":
+        The subreddit database to dump
+
+    -u "test" | --username "test":
+        The username database to dump
+
+    --html:
+        Write HTML files instead of plain text.
+
+    -st 50 | --score_threshold 50:
+        Only mash posts with at least this many points.
+        Applies to ALL mashes!
+
+    --all:
+        Perform all of the mashes listed below.
+
+    --date:
+        Perform a mash sorted by date.
+
+    --title:
+        Perform a mash sorted by title.
+
+    --score:
+        Perform a mash sorted by score.
+
+    --author:
+        For subreddit databases only.
+        Perform a mash sorted by author.
+
+    --sub:
+        For username databases only.
+        Perform a mash sorted by subreddit.
+
+    --flair:
+        Perform a mash sorted by flair.
+
+    examples:
+        `timesearch redmash -r botwatch --date`
+        does only the date file.
+
+        `timesearch redmash -r botwatch --score --title`
+        does both the score and title files.
+
+        `timesearch redmash -r botwatch --score --score_threshold 50`
+        only shows submissions with >= 50 points.
+
+        `timesearch redmash -r botwatch --all`
+        performs all of the different mashes.
+''',
+
+    'timesearch': '''
+timesearch:
+    Collect submissions from the subreddit across all of history, or
+    Collect submissions by a user (as many as possible).
+
+    > timesearch.py timesearch -r subredditname <flags>
+    > timesearch.py timesearch -u username <flags>
+
+    -r "test" | --subreddit "test":
+        The subreddit to scan. Mutually exclusive with username.
+
+    -u "test" | --username "test":
+        The user to scan. Mutually exclusive with subreddit.
+
+    -l "update" | --lower "update":
+        If a number - the unix timestamp to start at.
+        If "update" - continue from latest submission in db.
+        Default: update
+
+    -up 1467460221 | --upper 1467460221:
+        If a number - the unix timestamp to stop at.
+        If not provided - stop at current time.
+        Default: current time
+
+    -i 86400 | --interval 86400:
+        The initial interval for the scanning window, in seconds.
+        This is only a starting value. The window will shrink and stretch
+        as necessary based on received submission counts.
+        Default: 86400
+''',
+}
+
+
+def docstring_preview(text):
+    '''
+    Return the brief description at the top of the text.
+    User can get full text by looking at each specifically.
+    '''
+    return text.split('\n\n')[0]
+
+def listget(li, index, fallback=None):
+    try:
+        return li[index]
+    except IndexError:
+        return fallback
+
+def indent(text, spaces=4):
+    spaces = ' ' * spaces
+    return '\n'.join(spaces + line if line.strip() != '' else line for line in text.split('\n'))
+
+docstring_headers = {
+    key: indent(docstring_preview(value))
+    for (key, value) in MODULE_DOCSTRINGS.items()
+}
+
+DOCSTRING = DOCSTRING.format(**docstring_headers)
+
+####################################################################################################
+####################################################################################################
+
+def breakdown_gateway(args):
+    from . import breakdown
+    breakdown.breakdown_argparse(args)
+
+def commentaugment_gateway(args):
+    from . import commentaugment
+    commentaugment.commentaugment_argparse(args)
+
+def getstyles_gateway(args):
+    from . import getstyles
+    getstyles.getstyles_argparse(args)
+
+def getwiki_gateway(args):
+    from . import getwiki
+    getwiki.getwiki_argparse(args)
+
+def livestream_gateway(args):
+    from . import livestream
+    livestream.livestream_argparse(args)
+
+def mergedb_gateway(args):
+    from . import mergedb
+    mergedb.mergedb_argparse(args)
+
+def offline_reading_gateway(args):
+    from . import offline_reading
+    offline_reading.offline_reading_argparse(args)
+
+def redmash_gateway(args):
+    from . import redmash
+    redmash.redmash_argparse(args)
+
+def timesearch_gateway(args):
+    from . import timesearch
+    timesearch.timesearch_argparse(args)
+
+
+parser = argparse.ArgumentParser()
+subparsers = parser.add_subparsers()
+
+p_breakdown = subparsers.add_parser('breakdown')
+p_breakdown.add_argument('--sort', dest='sort', default=None)
+p_breakdown.add_argument('-r', '--subreddit', dest='subreddit', default=None)
+p_breakdown.add_argument('-u', '--user', dest='username', default=None)
+p_breakdown.set_defaults(func=breakdown_gateway)
+
+p_commentaugment = subparsers.add_parser('commentaugment')
+p_commentaugment.add_argument('-l', '--limit', dest='limit', default=None)
+p_commentaugment.add_argument('-n', '--num_thresh', dest='num_thresh', default=1)
+p_commentaugment.add_argument('-r', '--subreddit', dest='subreddit', default=None)
+p_commentaugment.add_argument('-s', '--specific', dest='specific_submission', default=None)
+p_commentaugment.add_argument('-t', '--threshold', dest='threshold', default=0)
+p_commentaugment.add_argument('-u', '--user', dest='username', default=None)
+p_commentaugment.add_argument('-v', '--verbose', dest='verbose', action='store_true')
+p_commentaugment.set_defaults(func=commentaugment_gateway)
+
+p_getstyles = subparsers.add_parser('getstyles')
+p_getstyles.add_argument('-r', '--subreddit', dest='subreddit')
+p_getstyles.set_defaults(func=getstyles_gateway)
+
+p_getwiki = subparsers.add_parser('getwiki')
+p_getwiki.add_argument('-r', '--subreddit', dest='subreddit')
+p_getwiki.set_defaults(func=getwiki_gateway)
+
+p_livestream = subparsers.add_parser('livestream')
+p_livestream.add_argument('-1', '--once', dest='once', action='store_true')
+p_livestream.add_argument('-c', '--comments', dest='comments', action='store_true')
+p_livestream.add_argument('-l', '--limit', dest='limit', default=None)
+p_livestream.add_argument('-r', '--subreddit', dest='subreddit', default=None)
+p_livestream.add_argument('-s', '--submissions', dest='submissions', action='store_true')
+p_livestream.add_argument('-u', '--user', dest='username', default=None)
+p_livestream.add_argument('-v', '--verbose', dest='verbose', action='store_true')
+p_livestream.add_argument('-w', '--wait', dest='sleepy', default=30)
+p_livestream.set_defaults(func=livestream_gateway)
+
+p_mergedb = subparsers.add_parser('mergedb')
+p_mergedb.add_argument('--from', dest='from_db_path', required=True)
+p_mergedb.add_argument('--to', dest='to_db_path', required=True)
+p_mergedb.set_defaults(func=mergedb_gateway)
+
+p_offline_reading = subparsers.add_parser('offline_reading')
+p_offline_reading.add_argument('-r', '--subreddit', dest='subreddit', default=None)
+p_offline_reading.add_argument('-s', '--specific', dest='specific_submission', default=None)
+p_offline_reading.add_argument('-u', '--user', dest='username', default=None)
+p_offline_reading.set_defaults(func=offline_reading_gateway)
+
+p_redmash = subparsers.add_parser('redmash')
+p_redmash.add_argument('--all', dest='do_all', action='store_true')
+p_redmash.add_argument('--author', dest='do_author', action='store_true')
+p_redmash.add_argument('--date', dest='do_date', action='store_true')
+p_redmash.add_argument('--flair', dest='do_flair', action='store_true')
+p_redmash.add_argument('--html', dest='html', action='store_true')
+p_redmash.add_argument('--score', dest='do_score', action='store_true')
+p_redmash.add_argument('--sub', dest='do_subreddit', action='store_true')
+p_redmash.add_argument('--title', dest='do_title', action='store_true')
+p_redmash.add_argument('-r', '--subreddit', dest='subreddit', default=None)
+p_redmash.add_argument('-st', '--score_threshold', dest='score_threshold', default=0)
+p_redmash.add_argument('-u', '--user', dest='username', default=None)
+p_redmash.set_defaults(func=redmash_gateway)
+
+p_timesearch = subparsers.add_parser('timesearch')
+p_timesearch.add_argument('-i', '--interval', dest='interval', default=86400)
+p_timesearch.add_argument('-l', '--lower', dest='lower', default='update')
+p_timesearch.add_argument('-r', '--subreddit', dest='subreddit', default=None)
+p_timesearch.add_argument('-u', '--user', dest='username', default=None)
+p_timesearch.add_argument('-up', '--upper', dest='upper', default=None)
+p_timesearch.set_defaults(func=timesearch_gateway)
+
+def main(argv):
+    helpstrings = {'', 'help', '-h', '--help'}
+
+    command = listget(argv, 0, '').lower()
+
+    # The user did not enter a command, or entered something unrecognized.
+    if command not in MODULE_DOCSTRINGS:
+        print(DOCSTRING)
+        if command == '':
+            print('You are seeing the default help text because you did not choose a command.')
+        elif command not in helpstrings:
+            print('You are seeing the default help text because "%s" was not recognized' % command)
+        return 1
+
+    # The user entered a command, but no further arguments, or just help.
+    argument = listget(argv, 1, '').lower()
+    if argument in helpstrings:
+        print(MODULE_DOCSTRINGS[command])
+        return 1
+
+    args = parser.parse_args(argv)
+    try:
+        args.func(args)
+    except exceptions.DBNotFound as e:
+        message = '"%s" is not an existing database.'
+        message += '\nHave you used any of the other utilities to collect data?'
+        message = message % e.path.absolute_path
+        print(message)
+        return 1
+
+    return 0
+
+if __name__ == '__main__':
+    raise SystemExit(main(sys.argv[1:]))
--- a/timesearch/breakdown.py
+++ b/timesearch/breakdown.py
@ -0,0 +1,103 @@
+import os
+import json
+
+from . import common
+from . import tsdb
+
+
+def breakdown_database(subreddit=None, username=None):
+    '''
+    Given a database, return a json dict breaking down the submission / comment count for
+    users (if a subreddit database) or subreddits (if a user database).
+    '''
+    if (subreddit is None) == (username is None):
+        raise Exception('Enter subreddit or username but not both')
+
+    breakdown_results = {}
+    def _ingest(names, subkey):
+        for name in names:
+            breakdown_results.setdefault(name, {})
+            breakdown_results[name].setdefault(subkey, 0)
+            breakdown_results[name][subkey] += 1
+
+    if subreddit:
+        database = tsdb.TSDB.for_subreddit(subreddit, do_create=False)
+    else:
+        database = tsdb.TSDB.for_user(username, do_create=False)
+    cur = database.sql.cursor()
+
+    for table in ['submissions', 'comments']:
+        if subreddit:
+            cur.execute('SELECT author FROM %s' % table)
+        elif username:
+            cur.execute('SELECT subreddit FROM %s' % table)
+
+        names = (row[0] for row in common.fetchgenerator(cur))
+        _ingest(names, table)
+
+    for name in breakdown_results:
+        breakdown_results[name].setdefault('submissions', 0)
+        breakdown_results[name].setdefault('comments', 0)
+
+    return breakdown_results
+
+def breakdown_argparse(args):
+    if args.subreddit:
+        database = tsdb.TSDB.for_subreddit(args.subreddit, do_create=False)
+    else:
+        database = tsdb.TSDB.for_user(args.username, do_create=False)
+
+    breakdown_results = breakdown_database(
+        subreddit=args.subreddit,
+        username=args.username,
+    )
+
+    def sort_name(name):
+        return name.lower()
+    def sort_submissions(name):
+        invert_score = -1 * breakdown_results[name]['submissions']
+        return (invert_score, name.lower())
+    def sort_comments(name):
+        invert_score = -1 * breakdown_results[name]['comments']
+        return (invert_score, name.lower())
+    def sort_total_posts(name):
+        invert_score = breakdown_results[name]['submissions'] + breakdown_results[name]['comments']
+        invert_score = -1 * invert_score
+        return (invert_score, name.lower())
+    breakdown_sorters = {
+        'name': sort_name,
+        'submissions': sort_submissions,
+        'comments': sort_comments,
+        'total_posts': sort_total_posts,
+    }
+
+    breakdown_names = list(breakdown_results.keys())
+    if args.sort is not None:
+        try:
+            sorter = breakdown_sorters[args.sort.lower()]
+        except KeyError:
+            message = '{sorter} is not a sorter. Choose from {options}'
+            message = message.format(sorter=args.sort, options=list(breakdown_sorters.keys()))
+            raise KeyError(message)
+        breakdown_names.sort(key=sorter)
+        dump = '    "{name}": {{"submissions": {submissions}, "comments": {comments}}}'
+        dump = [dump.format(name=name, **breakdown_results[name]) for name in breakdown_names]
+        dump = ',\n'.join(dump)
+        dump = '{\n' + dump + '\n}\n'
+    else:
+        dump = json.dumps(breakdown_results)
+
+    if args.sort is None:
+        breakdown_basename = '%s_breakdown.json'
+    else:
+        breakdown_basename = '%%s_breakdown_%s.json' % args.sort
+
+    breakdown_basename = breakdown_basename % database.filepath.replace_extension('').basename
+    breakdown_filepath = database.breakdown_dir.with_child(breakdown_basename)
+    os.makedirs(breakdown_filepath.parent.absolute_path, exist_ok=True)
+    breakdown_file = open(breakdown_filepath.absolute_path, 'w')
+    with breakdown_file:
+        breakdown_file.write(dump)
+    print('Wrote', breakdown_filepath.relative_path)
+
+    return breakdown_results
--- a/timesearch/commentaugment.py
+++ b/timesearch/commentaugment.py
@ -0,0 +1,179 @@
+import traceback
+
+from . import common
+from . import tsdb
+
+
+def commentaugment(
+        subreddit=None,
+        username=None,
+        limit=0,
+        num_thresh=0,
+        specific_submission=None,
+        threshold=0,
+        verbose=0,
+    ):
+    '''
+    Take the IDs of collected submissions, and gather comments from those threads.
+    Please see the global DOCSTRING_COMMENTAUGMENT variable.
+    '''
+    common.bot.login(common.r)
+    if specific_submission is not None:
+        if not specific_submission.startswith('t3_'):
+            specific_submission = 't3_' + specific_submission
+        specific_submission_obj = common.r.submission(specific_submission[3:])
+        subreddit = specific_submission_obj.subreddit.display_name
+
+    if (subreddit is None) == (username is None):
+        raise Exception('Enter subreddit or username but not both')
+
+    if subreddit:
+        if specific_submission is None:
+            database = tsdb.TSDB.for_subreddit(subreddit, do_create=False)
+        else:
+            database = tsdb.TSDB.for_subreddit(subreddit, do_create=True)
+    else:
+        database = tsdb.TSDB.for_user(username, do_create=False)
+    cur = database.sql.cursor()
+
+    if limit == 0:
+        limit = None
+
+    if specific_submission is None:
+        query = '''
+            SELECT idstr FROM submissions
+            WHERE idstr IS NOT NULL
+            AND augmented_at IS NULL
+            AND num_comments >= ?
+            ORDER BY num_comments DESC
+        '''
+        bindings = [num_thresh]
+        cur.execute(query, bindings)
+        fetchall = [item[0] for item in cur.fetchall()]
+    else:
+        # Make sure the object we're augmenting is in the table too!
+        database.insert(specific_submission_obj)
+        fetchall = [specific_submission]
+
+    totalthreads = len(fetchall)
+
+    if verbose:
+        spacer = '\n\t'
+    else:
+        spacer = ' '
+
+    scannedthreads = 0
+    get_submission = common.nofailrequest(get_submission_immediately)
+    while len(fetchall) > 0:
+        id_batch = fetchall[:100]
+        fetchall = fetchall[100:]
+
+        for submission in id_batch:
+            submission = get_submission(submission.split('_')[-1])
+            message = 'Processing {fullname}{spacer}expecting {num_comments} | '
+            message = message.format(
+                fullname=submission.fullname,
+                spacer=spacer,
+                num_comments=submission.num_comments,
+            )
+
+            print(message, end='', flush=True)
+            if verbose:
+                print()
+
+            comments = get_comments_for_thread(submission, limit, threshold, verbose)
+
+            database.insert(comments, commit=False)
+            query = '''
+                UPDATE submissions
+                set augmented_at = ?,
+                augmented_count = ?
+                WHERE idstr == ?
+            '''
+            bindings = [common.get_now(), len(comments), submission.fullname]
+            cur.execute(query, bindings)
+            database.sql.commit()
+
+            scannedthreads += 1
+            if verbose:
+                print('\t', end='')
+            message = 'Found {count} |{spacer}{scannedthreads} / {totalthreads}'
+            message = message.format(
+                count=len(comments),
+                spacer=spacer,
+                scannedthreads=scannedthreads,
+                totalthreads=totalthreads,
+            )
+            print(message)
+
+def get_comments_for_thread(submission, limit, threshold, verbose):
+    comments = common.nofailrequest(lambda x: x.comments)(submission)
+    # PRAW4 flatten is just list().
+    comments = manually_replace_comments(comments, limit, threshold, verbose)
+    return comments
+
+def get_submission_immediately(submission_id):
+    submission = common.r.submission(submission_id)
+    # force the lazyloader
+    submission.title = submission.title
+    return submission
+
+def manually_replace_comments(incomments, limit=None, threshold=0, verbose=False):
+    '''
+    PRAW's replace_more_comments method cannot continue
+    where it left off in the case of an Ow! screen.
+    So I'm writing my own function to get each MoreComments item individually
+
+    Furthermore, this function will maximize the number of retrieved comments by
+    sorting the MoreComments objects and getting the big chunks before worrying
+    about the tail ends.
+    '''
+    incomments = incomments.list()
+    comments = []
+    morecomments = []
+    while len(incomments) > 0:
+        item = incomments.pop()
+        if isinstance(item, common.praw.models.MoreComments) and item.count >= threshold:
+            morecomments.append(item)
+        elif isinstance(item, common.praw.models.Comment):
+            comments.append(item)
+
+    while True:
+        try:
+            if limit is not None and limit <= 0:
+                break
+            if len(morecomments) == 0:
+                break
+            morecomments.sort(key=lambda x: x.count)
+            mc = morecomments.pop()
+            additional = common.nofailrequest(mc.comments)()
+            additionals = 0
+            if limit is not None:
+                limit -= 1
+            for item in additional:
+                if isinstance(item, common.praw.models.MoreComments) and item.count >= threshold:
+                    morecomments.append(item)
+                elif isinstance(item, common.praw.models.Comment):
+                    comments.append(item)
+                    additionals += 1
+            if verbose:
+                s = '\tGot %d more, %d so far.' % (additionals, len(comments))
+                if limit is not None:
+                    s += ' Can perform %d more replacements' % limit
+                print(s)
+        except KeyboardInterrupt:
+            raise
+        except Exception:
+            traceback.print_exc()
+    return comments
+
+def commentaugment_argparse(args):
+    return commentaugment(
+        subreddit=args.subreddit,
+        username=args.username,
+        limit=common.int_none(args.limit),
+        threshold=common.int_none(args.threshold),
+        num_thresh=common.int_none(args.num_thresh),
+        verbose=args.verbose,
+        specific_submission=args.specific_submission,
+    )
--- a/timesearch/common.py
+++ b/timesearch/common.py
@ -0,0 +1,104 @@
+import datetime
+import os
+import time
+import traceback
+
+try:
+    import praw
+except ImportError:
+    praw = None
+if praw is None or praw.__version__.startswith('3.'):
+    import praw4
+    praw = praw4
+
+try:
+    import bot
+except ImportError:
+    bot = None
+if bot is None or bot.praw != praw:
+    import bot4
+    bot = bot4
+
+
+r = bot.anonymous()
+
+def assert_file_exists(filepath):
+    if not os.path.exists(filepath):
+        raise FileNotFoundError(filepath)
+
+def b36(i):
+    if isinstance(i, int):
+        return base36encode(i)
+    return base36decode(i)
+
+def base36decode(number):
+    return int(number, 36)
+
+def base36encode(number, alphabet='0123456789abcdefghijklmnopqrstuvwxyz'):
+    """Converts an integer to a base36 string."""
+    if not isinstance(number, (int)):
+        raise TypeError('number must be an integer')
+    base36 = ''
+    sign = ''
+    if number < 0:
+        sign = '-'
+        number = -number
+    if 0 <= number < len(alphabet):
+        return sign + alphabet[number]
+    while number != 0:
+        number, i = divmod(number, len(alphabet))
+        base36 = alphabet[i] + base36
+    return sign + base36
+
+def fetchgenerator(cursor):
+    while True:
+        item = cursor.fetchone()
+        if item is None:
+            break
+        yield item
+
+def generator_chunker(generator, chunk_size):
+    chunk = []
+    for item in generator:
+        chunk.append(item)
+        if len(chunk) == chunk_size:
+            yield chunk
+            chunk = []
+    if len(chunk) != 0:
+        yield chunk
+
+def get_now(stamp=True):
+    now = datetime.datetime.now(datetime.timezone.utc)
+    if stamp:
+        return int(now.timestamp())
+    return now
+
+def human(timestamp):
+    x = datetime.datetime.utcfromtimestamp(timestamp)
+    x = datetime.datetime.strftime(x, "%b %d %Y %H:%M:%S")
+    return x
+
+def int_none(x):
+    if x is None:
+        return None
+    return int(x)
+
+def nofailrequest(function):
+    '''
+    Creates a function that will retry until it succeeds.
+    This function accepts 1 parameter, a function, and returns a modified
+    version of that function that will try-catch, sleep, and loop until it
+    finally returns.
+    '''
+    def a(*args, **kwargs):
+        while True:
+            try:
+                result = function(*args, **kwargs)
+                return result
+            except KeyboardInterrupt:
+                raise
+            except Exception:
+                traceback.print_exc()
+                print('Retrying in 2...')
+                time.sleep(2)
+    return a
--- a/timesearch/exceptions.py
+++ b/timesearch/exceptions.py
@ -0,0 +1,3 @@
+class DBNotFound(FileNotFoundError):
+    def __init__(self, path):
+        self.path = path
--- a/timesearch/getstyles.py
+++ b/timesearch/getstyles.py
@ -0,0 +1,31 @@
+import os
+import requests
+
+from . import common
+from . import tsdb
+
+
+def getstyles(subreddit):
+    print('Getting styles for /r/%s' % subreddit)
+    subreddit = common.r.subreddit(subreddit)
+
+    styles = subreddit.stylesheet()
+    database = tsdb.TSDB.for_subreddit(subreddit.display_name)
+
+    os.makedirs(database.styles_dir.absolute_path, exist_ok=True)
+
+    stylesheet_filepath = database.styles_dir.with_child('stylesheet.css')
+    print('Downloading %s' % stylesheet_filepath.relative_path)
+    with open(stylesheet_filepath.absolute_path, 'w', encoding='utf-8') as stylesheet:
+        stylesheet.write(styles.stylesheet)
+
+    for image in styles.images:
+        image_basename = image['name'] + '.' + image['url'].split('.')[-1]
+        image_filepath = database.styles_dir.with_child(image_basename)
+        print('Downloading %s' % image_filepath.relative_path)
+        with open(image_filepath.absolute_path, 'wb') as image_file:
+            response = requests.get(image['url'])
+            image_file.write(response.content)
+
+def getstyles_argparse(args):
+    return getstyles(args.subreddit)
--- a/timesearch/getwiki.py
+++ b/timesearch/getwiki.py
@ -0,0 +1,23 @@
+import os
+
+from . import common
+from . import tsdb
+
+
+def getwiki(subreddit):
+    print('Getting wiki pages for /r/%s' % subreddit)
+    subreddit = common.r.subreddit(subreddit)
+    database = tsdb.TSDB.for_subreddit(subreddit)
+
+    for wikipage in subreddit.wiki:
+        if wikipage.name == 'config/stylesheet':
+            continue
+
+        wikipage_path = database.wiki_dir.join(wikipage.name).replace_extension('md')
+        os.makedirs(wikipage_path.parent.absolute_path, exist_ok=True)
+        with open(wikipage_path.absolute_path, 'w', encoding='utf-8') as handle:
+            handle.write(wikipage.content_md)
+        print('Wrote', wikipage_path.relative_path)
+
+def getwiki_argparse(args):
+    return getwiki(args.subreddit)
--- a/timesearch/livestream.py
+++ b/timesearch/livestream.py
@ -0,0 +1,175 @@
+import copy
+import time
+import traceback
+
+from . import common
+from . import tsdb
+
+
+def livestream(
+        subreddit=None,
+        username=None,
+        verbose=False,
+        as_a_generator=False,
+        do_submissions=True,
+        do_comments=True,
+        limit=100,
+        only_once=False,
+        sleepy=30,
+    ):
+    '''
+    Continuously get posts from this source
+    and insert them into the database
+
+    as_a_generator:
+        return a generator where every iteration does a single livestream loop.
+        This is good if you want to manage multiple livestreams yourself by
+        calling `next` on each of them, instead of getting stuck in here.
+    '''
+    if bool(subreddit) == bool(username):
+        raise Exception('Require either username / subreddit parameter, but not both')
+    if bool(do_submissions) is bool(do_comments) is False:
+        raise Exception('Require do_submissions and/or do_comments parameter')
+    common.bot.login(common.r)
+
+    if subreddit:
+        print('Getting subreddit %s' % subreddit)
+        database = tsdb.TSDB.for_subreddit(subreddit)
+        subreddit = common.r.subreddit(subreddit)
+        submissions = subreddit.new if do_submissions else None
+        comments = subreddit.comments if do_comments else None
+    else:
+        print('Getting redditor %s' % username)
+        database = tsdb.TSDB.for_user(username)
+        user = common.r.redditor(username)
+        submissions = user.submissions.new if do_submissions else None
+        comments = user.comments.new if do_comments else None
+
+    generator = _livestream_as_a_generator(
+        database,
+        submission_function=submissions,
+        comment_function=comments,
+        limit=limit,
+        params={'show': 'all'},
+        verbose=verbose,
+    )
+    if as_a_generator:
+        return generator
+
+    while True:
+        try:
+            step = next(generator)
+            newtext = '%ds, %dc' % (step['new_submissions'], step['new_comments'])
+            totalnew = step['new_submissions'] + step['new_comments']
+            status = '{now} +{new}'.format(now=common.human(common.get_now()), new=newtext)
+            print(status, end='', flush=True)
+            if totalnew == 0 and verbose is False:
+                # Since there were no news, allow the next line to overwrite status
+                print('\r', end='')
+            else:
+                print()
+
+            if verbose:
+                print('Loop finished.')
+            if only_once:
+                break
+            time.sleep(sleepy)
+
+        except KeyboardInterrupt:
+            print()
+            return
+
+        except Exception:
+            traceback.print_exc()
+            print('Retrying in 5...')
+            time.sleep(5)
+
+hangman = lambda: livestream(
+    username='gallowboob',
+    do_submissions=True,
+    do_comments=True,
+    sleepy=60,
+)
+
+def _livestream_as_a_generator(
+        database,
+        submission_function,
+        comment_function,
+        limit,
+        params,
+        verbose,
+    ):
+    while True:
+        #common.r.handler.clear_cache()
+        try:
+            items = _livestream_helper(
+                submission_function=submission_function,
+                comment_function=comment_function,
+                limit=limit,
+                params=params,
+                verbose=verbose,
+            )
+            newitems = database.insert(items)
+            yield newitems
+        except Exception:
+            traceback.print_exc()
+            print('Retrying in 5...')
+            time.sleep(5)
+
+
+def _livestream_helper(
+        submission_function=None,
+        comment_function=None,
+        verbose=False,
+        *args,
+        **kwargs,
+    ):
+    '''
+    Given a submission-retrieving function and/or a comment-retrieving function,
+    collect submissions and comments in a list together and return that.
+
+    args and kwargs go into the collecting functions.
+    '''
+    if bool(submission_function) is bool(comment_function) is False:
+        raise Exception('Require submissions and/or comments parameter')
+    results = []
+
+    if submission_function:
+        if verbose:
+            print('Getting submissions', args, kwargs)
+        this_kwargs = copy.deepcopy(kwargs)
+        submission_batch = submission_function(*args, **this_kwargs)
+        results.extend(submission_batch)
+    if comment_function:
+        if verbose:
+            print('Getting comments', args, kwargs)
+        this_kwargs = copy.deepcopy(kwargs)
+        comment_batch = comment_function(*args, **this_kwargs)
+        results.extend(comment_batch)
+    if verbose:
+        print('Collected. Saving...')
+    return results
+
+def livestream_argparse(args):
+    if args.submissions is args.comments is False:
+        args.submissions = True
+        args.comments = True
+    if args.limit is None:
+        limit = 100
+    else:
+        limit = int(args.limit)
+
+    if args.submissions is False and args.comments is False:
+        args.submissions = True
+        args.comments = True
+
+    return livestream(
+        subreddit=args.subreddit,
+        username=args.username,
+        do_comments=args.comments,
+        do_submissions=args.submissions,
+        limit=limit,
+        verbose=args.verbose,
+        only_once=args.once,
+        sleepy=common.int_none(args.sleepy),
+    )
--- a/timesearch/mergedb.py
+++ b/timesearch/mergedb.py
@ -0,0 +1,35 @@
+import os
+import requests
+
+from . import common
+from . import tsdb
+
+
+MIGRATE_QUERY = '''
+INSERT INTO {tablename}
+SELECT othertable.* FROM other.{tablename} othertable
+LEFT JOIN {tablename} mytable ON mytable.idint == othertable.idint
+WHERE mytable.idint IS NULL;
+'''
+
+def _migrate_helper(db, tablename):
+    oldcount = db.cur.execute('SELECT count(*) FROM %s' % tablename).fetchone()[0]
+
+    query = MIGRATE_QUERY.format(tablename=tablename)
+    print(query)
+    db.cur.execute(query)
+    db.sql.commit()
+
+    newcount = db.cur.execute('SELECT count(*) FROM %s' % tablename).fetchone()[0]
+    print('Gained %d items.' % (newcount - oldcount))
+
+def mergedb(from_db_path, to_db_path):
+    to_db = tsdb.TSDB(to_db_path)
+    from_db = tsdb.TSDB(from_db_path)
+
+    to_db.cur.execute('ATTACH DATABASE "%s" AS other' % from_db_path)
+    _migrate_helper(to_db, 'submissions')
+    _migrate_helper(to_db, 'comments')
+
+def mergedb_argparse(args):
+    return mergedb(args.from_db_path, args.to_db_path)
--- a/timesearch/offline_reading.py
+++ b/timesearch/offline_reading.py
@ -0,0 +1,340 @@
+import os
+import markdown
+
+from . import common
+from . import tsdb
+
+
+class DBEntry:
+    def __init__(self, fetch):
+        if fetch[1].startswith('t3_'):
+            columns = tsdb.SQL_SUBMISSION_COLUMNS
+            self.object_type = 'submission'
+        else:
+            columns = tsdb.SQL_COMMENT_COLUMNS
+            self.object_type = 'comment'
+
+        self.id = None
+        self.idstr = None
+        for (index, attribute) in enumerate(columns):
+            setattr(self, attribute, fetch[index])
+
+    def __repr__(self):
+        return 'DBEntry(\'%s\')' % self.id
+
+
+class TreeNode:
+    def __init__(self, identifier, data, parent=None):
+        assert isinstance(identifier, str)
+        assert '\\' not in identifier
+        self.identifier = identifier
+        self.data = data
+        self.parent = parent
+        self.children = {}
+
+    def __getitem__(self, key):
+        return self.children[key]
+
+    def __repr__(self):
+        return 'TreeNode %s' % self.abspath()
+
+    def abspath(self):
+        node = self
+        nodes = [node]
+        while node.parent is not None:
+            node = node.parent
+            nodes.append(node)
+        nodes.reverse()
+        nodes = [node.identifier for node in nodes]
+        return '\\'.join(nodes)
+
+    def add_child(self, other_node, overwrite_parent=False):
+        self.check_child_availability(other_node.identifier)
+        if other_node.parent is not None and not overwrite_parent:
+            raise ValueError('That node already has a parent. Try `overwrite_parent=True`')
+
+        other_node.parent = self
+        self.children[other_node.identifier] = other_node
+        return other_node
+
+    def check_child_availability(self, identifier):
+        if ':' in identifier:
+            raise Exception('Only roots may have a colon')
+        if identifier in self.children:
+            raise Exception('Node %s already has child %s' % (self.identifier, identifier))
+
+    def detach(self):
+        del self.parent.children[self.identifier]
+        self.parent = None
+
+    def listnodes(self, customsort=None):
+        items = list(self.children.items())
+        if customsort is None:
+            items.sort(key=lambda x: x[0].lower())
+        else:
+            items.sort(key=customsort)
+        return [item[1] for item in items]
+
+    def merge_other(self, othertree, otherroot=None):
+        newroot = None
+        if ':' in othertree.identifier:
+            if otherroot is None:
+                raise Exception('Must specify a new name for the other tree\'s root')
+            else:
+                newroot = otherroot
+        else:
+            newroot = othertree.identifier
+        othertree.identifier = newroot
+        othertree.parent = self
+        self.check_child_availability(newroot)
+        self.children[newroot] = othertree
+
+    def printtree(self, customsort=None):
+        for node in self.walk(customsort):
+            print(node.abspath())
+
+    def walk(self, customsort=None):
+        yield self
+        for child in self.listnodes(customsort=customsort):
+            #print(child)
+            #print(child.listnodes())
+            yield from child.walk(customsort=customsort)
+
+def html_format_comment(comment):
+    text = '''
+    <div class="comment"
+        id="{id}" 
+        style="
+        padding-left: 20px;
+        margin-top: 4px;
+        margin-right: 4px;
+        margin-bottom: 4px;
+        border: 2px #000 solid;
+    ">
+        <p class="userinfo">
+            {usernamelink}
+            <span class="score"> | {score} points</span>
+            <span class="timestamp"> | {human}</span>
+        </p>
+
+        <p>{body}</p>
+
+        <p class="toolbar">
+            {permalink}
+        </p>
+    {children}
+    </div>
+    '''.format(
+        id=comment.idstr,
+        body=sanitize_braces(render_markdown(comment.body)),
+        usernamelink=html_helper_userlink(comment),
+        score=comment.score,
+        human=common.human(comment.created),
+        permalink=html_helper_permalink(comment),
+        children='{children}',
+    )
+    return text
+
+def html_format_submission(submission):
+    text = '''
+    <div class="submission"
+        id="{id}" 
+        style="
+        border: 4px #00f solid;
+        padding-left: 20px;
+    ">
+
+        <p class="userinfo">
+            {usernamelink}
+            <span class="score"> | {score} points</span>
+            <span class="timestamp"> | {human}</span>
+        </p>
+
+        <strong>{title}</strong>
+        <p>{url_or_text}</p>
+
+        <p class="toolbar">
+            {permalink}
+        </p>
+    </div>
+    {children}
+    '''.format(
+        id=submission.idstr,
+        title=sanitize_braces(submission.title),
+        usernamelink=html_helper_userlink(submission),
+        score=submission.score,
+        human=common.human(submission.created),
+        permalink=html_helper_permalink(submission),
+        url_or_text=html_helper_urlortext(submission),
+        children='{children}',
+    )
+    return text
+
+def html_from_database(subreddit=None, username=None, specific_submission=None):
+    '''
+    Given a timesearch database filename, produce .html files for each
+    of the submissions it contains (or one particular submission fullname)
+    '''
+    if markdown is None:
+        raise ImportError('Page cannot be rendered without the markdown module')
+
+    if (subreddit is None) == (username is None):
+        raise Exception('Enter subreddit or username but not both')
+
+    if subreddit:
+        database = tsdb.TSDB.for_subreddit(subreddit, do_create=False)
+    else:
+        database = tsdb.TSDB.for_user(username, do_create=False)
+
+    submission_trees = trees_from_database(database, specific_submission)
+    for submission_tree in submission_trees:
+        page = html_from_tree(submission_tree, sort=lambda x: x.data.score * -1)
+        os.makedirs(database.offline_reading_dir.absolute_path, exist_ok=True)
+        html_basename = '%s.html' % submission_tree.identifier
+        html_filepath = database.offline_reading_dir.with_child(html_basename)
+        html_handle = open(html_filepath.absolute_path, 'w', encoding='utf-8')
+        html_handle.write('<html><body><meta charset="UTF-8">')
+        html_handle.write(page)
+        html_handle.write('</body></html>')
+        html_handle.close()
+        print('Wrote', html_filepath.relative_path)
+
+def html_from_tree(tree, sort=None):
+    '''
+    Given a tree *whose root is the submission*, return
+    HTML-formatted text representing each submission's comment page.
+    '''
+    if tree.data.object_type == 'submission':
+        page = html_format_submission(tree.data)
+    elif tree.data.object_type == 'comment':
+        page = html_format_comment(tree.data)
+    children = tree.listnodes()
+    if sort is not None:
+        children.sort(key=sort)
+    children = [html_from_tree(child, sort) for child in children]
+    if len(children) == 0:
+        children = ''
+    else:
+        children = '\n\n'.join(children)
+    try:
+        page = page.format(children=children)
+    except IndexError:
+        print(page)
+        raise
+    return page
+
+def html_helper_permalink(item):
+    link = 'https://www.reddit.com/r/%s/comments/' % item.subreddit
+    if item.object_type == 'submission':
+        link += item.idstr[3:]
+    elif item.object_type == 'comment':
+        link += '%s/_/%s' % (item.submission[3:], item.idstr[3:])
+    link = '<a href="%s">permalink</a>' % link
+    return link
+
+def html_helper_urlortext(submission):
+    if submission.url:
+        text = '<a href="{url}">{url}</a>'.format(url=submission.url)
+    elif submission.selftext:
+        text = render_markdown(submission.selftext)
+    else:
+        text = ''
+    text = sanitize_braces(text)
+    return text
+
+def html_helper_userlink(item):
+    name = item.author
+    if name.lower() == '[deleted]':
+        return '[deleted]'
+    link = 'https://www.reddit.com/u/{name}'
+    link = '<a href="%s">{name}</a>' % link
+    link = link.format(name=name)
+    return link
+
+def render_markdown(text):
+    text = markdown.markdown(text, output_format='html5')
+    return text
+
+def sanitize_braces(text):
+    text = text.replace('{', '{{')
+    text = text.replace('}', '}}')
+    return text
+
+def trees_from_database(database, specific_submission=None):
+    '''
+    Given a timesearch database filename, take all of the submission
+    ids, take all of the comments for each submission id, and run them
+    through `tree_from_submission`.
+
+    Yield each submission's tree as it is generated.
+    '''
+    cur1 = database.sql.cursor()
+    cur2 = database.sql.cursor()
+
+    if specific_submission is None:
+        cur1.execute('SELECT idstr FROM submissions ORDER BY created ASC')
+        submission_ids = common.fetchgenerator(cur1)
+    else:
+        specific_submission = 't3_' + specific_submission.split('_')[-1]
+        # Insert as a tuple to behave like the sql fetch results
+        submission_ids = [(specific_submission, None)]
+
+    found_some_posts = False
+    for submission_id in submission_ids:
+        # Extract sql fetch
+        submission_id = submission_id[0]
+        found_some_posts = True
+        cur2.execute('SELECT * FROM submissions WHERE idstr == ?', [submission_id])
+        submission = cur2.fetchone()
+        cur2.execute('SELECT * FROM comments WHERE submission == ?', [submission_id])
+        fetched_comments = cur2.fetchall()
+        submission_tree = tree_from_submission(submission, fetched_comments)
+        yield submission_tree
+
+    if not found_some_posts:
+        raise Exception('Found no submissions!')
+
+def tree_from_submission(submission, commentpool):
+    '''
+    Given the sqlite data for a submission and all of its comments,
+    return a tree with the submission id as the root
+    '''
+    submission = DBEntry(submission)
+    commentpool = [DBEntry(c) for c in commentpool]
+    commentpool.sort(key=lambda x: x.created)
+
+    print('Building tree for %s (%d comments)' % (submission.idstr, len(commentpool)))
+    # Thanks Martin Schmidt for the algorithm
+    # http://stackoverflow.com/a/29942118/5430534
+    tree = TreeNode(identifier=submission.idstr, data=submission)
+    node_map = {}
+
+    for comment in commentpool:
+        # Ensure this comment is in a node of its own
+        this_node = node_map.get(comment.idstr, None)
+        if this_node:
+            # This ID was detected as a parent of a previous iteration
+            # Now we're actually filling it in.
+            this_node.data = comment
+        else:
+            this_node = TreeNode(comment.idstr, comment)
+            node_map[comment.idstr] = this_node
+
+        # Attach this node to the parent.
+        if comment.parent.startswith('t3_'):
+            tree.add_child(this_node)
+        else:
+            parent_node = node_map.get(comment.parent, None)
+            if not parent_node:
+                parent_node = TreeNode(comment.parent, data=None)
+                node_map[comment.parent] = parent_node
+            parent_node.add_child(this_node)
+            this_node.parent = parent_node
+    return tree
+
+def offline_reading_argparse(args):
+    return html_from_database(
+        subreddit=args.subreddit,
+        username=args.username,
+        specific_submission=args.specific_submission,
+    )
--- a/timesearch/redmash.py
+++ b/timesearch/redmash.py
@ -0,0 +1,177 @@
+import datetime
+import os
+
+from . import common
+from . import tsdb
+
+
+LINE_FORMAT_TXT = '''
+{timestamp}: [{title}]({shortlink}) - /u/{author} (+{score})
+'''.replace('\n', '')
+
+LINE_FORMAT_HTML = '''
+{timestamp}: <a href=\"{shortlink}\">[{flairtext}] {title}</a> - <a href=\"{authorlink}\">{author}</a> (+{score})<br>
+'''.replace('\n', '')
+
+TIMESTAMP_FORMAT = '%Y %b %d'
+#The time format.
+# "%Y %b %d" = "2016 August 10"
+# See http://strftime.org/
+
+HTML_HEADER = '''
+<html>
+<head>
+<meta charset="UTF-8">
+<style>
+    *
+    {
+        font-family: Consolas;
+    }
+</style>
+</head>
+
+<body>
+'''
+
+HTML_FOOTER = '''
+</body>
+</html>
+'''
+
+
+def redmash(
+        subreddit=None,
+        username=None,
+        do_all=False,
+        do_date=False,
+        do_title=False,
+        do_score=False,
+        do_author=False,
+        do_subreddit=False,
+        do_flair=False,
+        html=False,
+        score_threshold=0,
+    ):
+    if (subreddit is None) == (username is None):
+        raise Exception('Enter subreddit or username but not both')
+
+    if subreddit:
+        database = tsdb.TSDB.for_subreddit(subreddit, do_create=False)
+    else:
+        database = tsdb.TSDB.for_user(username, do_create=False)
+
+    kwargs = {'html': html, 'score_threshold': score_threshold}
+    wrote = None
+
+    if do_all or do_date:
+        print('Writing time file')
+        wrote = redmash_worker(database, suffix='_date', orderby='created ASC', **kwargs)
+
+    if do_all or do_title:
+        print('Writing title file')
+        wrote = redmash_worker(database, suffix='_title', orderby='title ASC', **kwargs)
+
+    if do_all or do_score:
+        print('Writing score file')
+        wrote = redmash_worker(database, suffix='_score', orderby='score DESC', **kwargs)
+
+    if not username and (do_all or do_author):
+        print('Writing author file')
+        wrote = redmash_worker(database, suffix='_author', orderby='author ASC', **kwargs)
+
+    if username and (do_all or do_subreddit):
+        print('Writing subreddit file')
+        wrote = redmash_worker(database, suffix='_subreddit', orderby='subreddit ASC', **kwargs)
+
+    if do_all or do_flair:
+        print('Writing flair file')
+        # Items with flair come before items without. Each group is sorted by time separately.
+        orderby = 'flair_text IS NULL ASC, created ASC'
+        wrote = redmash_worker(database, suffix='_flair', orderby=orderby, **kwargs)
+
+    if not wrote:
+        raise Exception('No sorts selected! Read the docstring')
+    print('Done.')
+
+def redmash_worker(
+        database,
+        suffix,
+        orderby,
+        score_threshold=0,
+        html=False,
+    ):
+    cur = database.sql.cursor()
+    statement = 'SELECT * FROM submissions WHERE score >= {threshold} ORDER BY {order}'
+    statement = statement.format(threshold=score_threshold, order=orderby)
+    cur.execute(statement)
+
+    os.makedirs(database.redmash_dir.absolute_path, exist_ok=True)
+
+    extension = '.html' if html else '.txt'
+    mash_basename = database.filepath.replace_extension('').basename
+    mash_basename += suffix + extension
+    mash_filepath = database.redmash_dir.with_child(mash_basename)
+
+    mash_handle = open(mash_filepath.absolute_path, 'w', encoding='UTF-8')
+    if html:
+        mash_handle.write(HTML_HEADER)
+        line_format = LINE_FORMAT_HTML
+    else:
+        line_format = LINE_FORMAT_TXT
+
+    do_timestamp = '{timestamp}' in line_format
+
+    for item in common.fetchgenerator(cur):
+        if do_timestamp:
+            timestamp = int(item[tsdb.SQL_SUBMISSION['created']])
+            timestamp = datetime.datetime.utcfromtimestamp(timestamp)
+            timestamp = timestamp.strftime(TIMESTAMP_FORMAT)
+        else:
+            timestamp = ''
+
+        short_link = 'https://redd.it/%s' % item[tsdb.SQL_SUBMISSION['idstr']][3:]
+        author = item[tsdb.SQL_SUBMISSION['author']]
+        if author.lower() == '[deleted]':
+            author_link = '#'
+        else:
+            author_link = 'https://reddit.com/u/%s' % author
+        line = line_format.format(
+            author=author,
+            authorlink=author_link,
+            flaircss=item[tsdb.SQL_SUBMISSION['flair_css_class']] or '',
+            flairtext=item[tsdb.SQL_SUBMISSION['flair_text']] or '',
+            id=item[tsdb.SQL_SUBMISSION['idstr']],
+            numcomments=item[tsdb.SQL_SUBMISSION['num_comments']],
+            score=item[tsdb.SQL_SUBMISSION['score']],
+            shortlink=short_link,
+            subreddit=item[tsdb.SQL_SUBMISSION['subreddit']],
+            timestamp=timestamp,
+            title=item[tsdb.SQL_SUBMISSION['title']].replace('\n', ' '),
+            url=item[tsdb.SQL_SUBMISSION['url']] or short_link,
+        )
+        line += '\n'
+        mash_handle.write(line)
+
+    if html:
+        mash_handle.write(HTML_FOOTER)
+    mash_handle.close()
+    print('Wrote', mash_filepath.relative_path)
+    return mash_filepath
+
+def redmash_argparse(args):
+    if args.subreddit is args.username is None:
+        raise ValueError('-r subreddit OR -u username must be provided')
+
+    return redmash(
+        subreddit=args.subreddit,
+        username=args.username,
+        do_all=args.do_all,
+        do_date=args.do_date,
+        do_title=args.do_title,
+        do_score=args.do_score,
+        do_author=args.do_author,
+        do_subreddit=args.do_subreddit,
+        do_flair=args.do_flair,
+        html=args.html,
+        score_threshold=common.int_none(args.score_threshold),
+    )
--- a/timesearch/timesearch.py
+++ b/timesearch/timesearch.py
@ -0,0 +1,147 @@
+import time
+import traceback
+
+from . import common
+from . import tsdb
+
+
+# The maximum amount by which it can multiply the interval
+# when not enough posts are found.
+MAXIMUM_EXPANSION_MULTIPLIER = 2
+
+
+def timesearch(
+        subreddit=None,
+        username=None,
+        lower=None,
+        upper=None,
+        interval=86400,
+    ):
+    '''
+    Collect submissions across time.
+    Please see the global DOCSTRING variable.
+    '''
+    if (subreddit is None) == (username is None):
+        raise Exception('Enter subreddit or username but not both')
+
+    common.bot.login(common.r)
+
+    if subreddit:
+        database = tsdb.TSDB.for_subreddit(subreddit)
+    else:
+        # When searching, we'll take the user's submissions from anywhere.
+        subreddit = 'all'
+        database = tsdb.TSDB.for_user(username)
+    cur = database.sql.cursor()
+
+    if lower == 'update':
+        # Start from the latest submission
+        cur.execute('SELECT * FROM submissions ORDER BY idint DESC LIMIT 1')
+        f = cur.fetchone()
+        if f:
+            lower = f[tsdb.SQL_SUBMISSION['created']]
+            print(f[tsdb.SQL_SUBMISSION['idstr']], common.human(lower), lower)
+        else:
+            lower = None
+
+    if not isinstance(subreddit, common.praw.models.Subreddit):
+        subreddit = common.r.subreddit(subreddit)
+
+    if subreddit != 'all':
+        if isinstance(subreddit, common.praw.models.Subreddit):
+            creation = subreddit.created_utc
+        else:
+            subreddits = subreddit.split('+')
+            subreddits = [common.r.subreddit(sr) for sr in subreddits]
+            creation = min([sr.created_utc for sr in subreddits])
+    else:
+        if not isinstance(username, common.praw.models.Redditor):
+            user = common.r.redditor(username)
+        creation = user.created_utc
+
+    if lower is None or lower < creation:
+        lower = creation
+
+    maxupper = upper
+    if maxupper is None:
+        maxupper = common.get_now() + 86400
+
+    form = '{upper} - {lower} +{gain}'
+    submissions = subreddit.submissions(start=lower, end=maxupper)
+    submissions = common.generator_chunker(submissions, 100)
+    for chunk in submissions:
+        chunk.sort(key=lambda x: x.created_utc, reverse=True)
+        new_count = database.insert(chunk)['new_submissions']
+        message = form.format(
+            upper=common.human(chunk[0].created_utc),
+            lower=common.human(chunk[-1].created_utc),
+            gain=new_count,
+        )
+        print(message)
+
+    #upper = lower + interval
+    #toomany_inarow = 0
+    # while lower < maxupper:
+    #     print('\nCurrent interval:', interval, 'seconds')
+    #     print('Lower:', common.human(lower), lower)
+    #     print('Upper:', common.human(upper), upper)
+    #     if username:
+    #         query = '(and author:"%s" (and timestamp:%d..%d))' % (username, lower, upper)
+    #     else:
+    #         query = 'timestamp:%d..%d' % (lower, upper)
+
+    #     try:
+    #         searchresults = subreddit.search(
+    #             query,
+    #             sort='new',
+    #             limit=100,
+    #             syntax='cloudsearch'
+    #         )
+    #         searchresults = list(searchresults)
+    #     except Exception:
+    #         traceback.print_exc()
+    #         print('resuming in 5...')
+    #         time.sleep(5)
+    #         continue
+
+    #     searchresults.sort(key=lambda x: x.created_utc)
+    #     print([i.id for i in searchresults])
+
+    #     itemsfound = len(searchresults)
+    #     print('Found', itemsfound, 'items.')
+    #     if itemsfound < 50:
+    #         print('Too few results, increasing interval', end='')
+    #         diff = (1 - (itemsfound / 75)) + 1
+    #         diff = min(MAXIMUM_EXPANSION_MULTIPLIER, diff)
+    #         interval = int(interval * diff)
+    #     if itemsfound > 99:
+    #         #Intentionally not elif
+    #         print('Too many results, reducing interval', end='')
+    #         interval = int(interval * (0.8 - (0.05 * toomany_inarow)))
+    #         upper = lower + interval
+    #         toomany_inarow += 1
+    #     else:
+    #         lower = upper
+    #         upper = lower + interval
+    #         toomany_inarow = max(0, toomany_inarow-1)
+    #         print(database.insert(searchresults))
+    #     print()
+
+    cur.execute('SELECT COUNT(idint) FROM submissions')
+    itemcount = cur.fetchone()[0]
+
+    print('Ended with %d items in %s' % (itemcount, database.filepath.basename))
+
+def timesearch_argparse(args):
+    if args.lower == 'update':
+        lower = 'update'
+    else:
+        lower = common.int_none(args.lower)
+
+    return timesearch(
+        subreddit=args.subreddit,
+        username=args.username,
+        lower=lower,
+        upper=common.int_none(args.upper),
+        interval=common.int_none(args.interval),
+    )
--- a/timesearch/tsdb.py
+++ b/timesearch/tsdb.py
@ -0,0 +1,335 @@
+import os
+import sqlite3
+import types
+
+from . import common
+from . import exceptions
+
+from voussoirkit import pathclass
+
+
+# For backwards compatibility reasons, this list of format strings will help
+# timesearch find databases that are using the old filename style.
+# The final element will be used if none of the previous ones were found.
+DB_FORMATS_SUBREDDIT = [
+    '.\\{name}.db',
+    '.\\subreddits\\{name}\\{name}.db',
+    '.\\{name}\\{name}.db',
+    '.\\databases\\{name}.db',
+    '.\\subreddits\\{name}\\{name}.db',
+]
+DB_FORMATS_USER = [
+    '.\\@{name}.db',
+    '.\\users\\@{name}\\@{name}.db',
+    '.\\@{name}\\@{name}.db',
+    '.\\databases\\@{name}.db',
+    '.\\users\\@{name}\\@{name}.db',
+]
+
+DB_INIT = '''
+CREATE TABLE IF NOT EXISTS submissions(
+    idint INT,
+    idstr TEXT,
+    created INT,
+    self INT,
+    nsfw INT,
+    author TEXT,
+    title TEXT,
+    url TEXT,
+    selftext TEXT,
+    score INT,
+    subreddit TEXT,
+    distinguish INT,
+    textlen INT,
+    num_comments INT,
+    flair_text TEXT,
+    flair_css_class TEXT,
+    augmented_at INT,
+    augmented_count INT
+);
+CREATE INDEX IF NOT EXISTS submission_index ON submissions(idstr);
+----------------------------------------------------------------------------------------------------
+CREATE TABLE IF NOT EXISTS comments(
+    idint INT,
+    idstr TEXT,
+    created INT,
+    author TEXT,
+    parent TEXT,
+    submission TEXT,
+    body TEXT,
+    score INT,
+    subreddit TEXT,
+    distinguish TEXT,
+    textlen INT
+);
+CREATE INDEX IF NOT EXISTS comment_index ON comments(idstr);
+'''.strip()
+
+SQL_SUBMISSION_COLUMNS = [
+    'idint',
+    'idstr',
+    'created',
+    'self',
+    'nsfw',
+    'author',
+    'title',
+    'url',
+    'selftext',
+    'score',
+    'subreddit',
+    'distinguish',
+    'textlen',
+    'num_comments',
+    'flair_text',
+    'flair_css_class',
+    'augmented_at',
+    'augmented_count',
+]
+
+SQL_COMMENT_COLUMNS = [
+    'idint',
+    'idstr',
+    'created',
+    'author',
+    'parent',
+    'submission',
+    'body',
+    'score',
+    'subreddit',
+    'distinguish',
+    'textlen',
+]
+
+SQL_SUBMISSION = {key:index for (index, key) in enumerate(SQL_SUBMISSION_COLUMNS)}
+SQL_COMMENT = {key:index for (index, key) in enumerate(SQL_COMMENT_COLUMNS)}
+
+
+class TSDB:
+    def __init__(self, filepath, do_create=True):
+        self.filepath = pathclass.Path(filepath)
+        if not self.filepath.is_file:
+            if not do_create:
+                raise exceptions.DBNotFound(self.filepath)
+            print('New database', self.filepath.relative_path)
+
+        os.makedirs(self.filepath.parent.absolute_path, exist_ok=True)
+
+        self.breakdown_dir = self.filepath.parent.with_child('breakdown')
+        self.offline_reading_dir = self.filepath.parent.with_child('offline_reading')
+        self.redmash_dir = self.filepath.parent.with_child('redmash')
+        self.styles_dir = self.filepath.parent.with_child('styles')
+        self.wiki_dir = self.filepath.parent.with_child('wiki')
+
+        self.sql = sqlite3.connect(self.filepath.absolute_path)
+        self.cur = self.sql.cursor()
+        statements = DB_INIT.split(';')
+        for statement in statements:
+            self.cur.execute(statement)
+        self.sql.commit()
+
+    def __repr__(self):
+        return 'TSDB(%s)' % self.filepath
+
+    @staticmethod
+    def _pick_filepath(formats, name):
+        '''
+        Starting with the most specific and preferred filename format, check
+        if there is an existing database that matches the name we're looking
+        for, and return that path. If none of them exist, then use the most
+        preferred filepath.
+        '''
+        paths = [pathclass.Path(format.format(name=name)) for format in formats]
+        for path in paths:
+            if path.is_file:
+                return path
+        return paths[-1]
+
+    @classmethod
+    def for_subreddit(cls, name, do_create=True):
+        if isinstance(name, common.praw.models.Subreddit):
+            name = name.display_name
+        elif not isinstance(name, str):
+            raise TypeError(name, 'should be str or Subreddit.')
+
+        filepath = cls._pick_filepath(formats=DB_FORMATS_SUBREDDIT, name=name)
+        return cls(filepath=filepath, do_create=do_create)
+
+    @classmethod
+    def for_user(cls, name, do_create=True):
+        if isinstance(name, common.praw.models.Redditor):
+            name = name.name
+        elif not isinstance(name, str):
+            raise TypeError(name, 'should be str or Redditor.')
+
+        filepath = cls._pick_filepath(formats=DB_FORMATS_USER, name=name)
+        return cls(filepath=filepath, do_create=do_create)
+
+    def insert(self, objects, commit=True):
+        if not isinstance(objects, (list, tuple, types.GeneratorType)):
+            objects = [objects]
+
+        new_values = {
+            'new_submissions': 0,
+            'new_comments': 0,
+        }
+        methods = {
+            common.praw.models.Submission: (self.insert_submission, 'new_submissions'),
+            common.praw.models.Comment: (self.insert_comment, 'new_comments'),
+        }
+        for obj in objects:
+            (method, key) = methods.get(type(obj), (None, None))
+            if method is None:
+                raise TypeError('Unsupported', type(obj), obj)
+            status = method(obj)
+            new_values[key] += status
+
+        if commit:
+            self.sql.commit()
+
+        return new_values
+
+    def insert_submission(self, submission):
+        cur = self.sql.cursor()
+        cur.execute('SELECT * FROM submissions WHERE idstr == ?', [submission.fullname])
+        existing_entry = cur.fetchone()
+
+        if submission.author is None:
+            author = '[DELETED]'
+        else:
+            author = submission.author.name
+
+        if not existing_entry:
+            if submission.is_self:
+                # Selfpost's URL leads back to itself, so just ignore it.
+                url = None
+            else:
+                url = submission.url
+
+            postdata = {
+                'idint': common.b36(submission.id),
+                'idstr': submission.fullname,
+                'created': submission.created_utc,
+                'self': submission.is_self,
+                'nsfw': submission.over_18,
+                'author': author,
+                'title': submission.title,
+                'url': url,
+                'selftext': submission.selftext,
+                'score': submission.score,
+                'subreddit': submission.subreddit.display_name,
+                'distinguish': submission.distinguished,
+                'textlen': len(submission.selftext),
+                'num_comments': submission.num_comments,
+                'flair_text': submission.link_flair_text,
+                'flair_css_class': submission.link_flair_css_class,
+                'augmented_at': None,
+                'augmented_count': None,
+            }
+            (qmarks, bindings) = binding_filler(SQL_SUBMISSION_COLUMNS, postdata, require_all=True)
+            query = 'INSERT INTO submissions VALUES(%s)' % qmarks
+            cur.execute(query, bindings)
+
+        else:
+            if submission.author is None:
+                # This post is deleted, therefore its text probably says [deleted] or [removed].
+                # Discard that, and keep the data we already had here.
+                selftext = existing_entry[SQL_SUBMISSION['selftext']]
+            else:
+                selftext = submission.selftext
+
+            query = '''
+                UPDATE submissions SET
+                nsfw = coalesce(?, nsfw),
+                score = coalesce(?, score),
+                selftext = coalesce(?, selftext),
+                distinguish = coalesce(?, distinguish),
+                num_comments = coalesce(?, num_comments),
+                flair_text = coalesce(?, flair_text),
+                flair_css_class = coalesce(?, flair_css_class)
+                WHERE idstr == ?
+            '''
+            bindings = [
+                submission.over_18,
+                submission.score,
+                selftext,
+                submission.distinguished,
+                submission.num_comments,
+                submission.link_flair_text,
+                submission.link_flair_css_class,
+                submission.fullname
+            ]
+            cur.execute(query, bindings)
+
+        return existing_entry is None
+
+    def insert_comment(self, comment):
+        cur = self.sql.cursor()
+        cur.execute('SELECT * FROM comments WHERE idstr == ?', [comment.fullname])
+        existing_entry = cur.fetchone()
+
+        if comment.author is None:
+            author = '[DELETED]'
+        else:
+            author = comment.author.name
+
+        if not existing_entry:
+            postdata = {
+                'idint': common.b36(comment.id),
+                'idstr': comment.fullname,
+                'created': comment.created_utc,
+                'author': author,
+                'parent': comment.parent_id,
+                'submission': comment.link_id,
+                'body': comment.body,
+                'score': comment.score,
+                'subreddit': comment.subreddit.display_name,
+                'distinguish': comment.distinguished,
+                'textlen': len(comment.body),
+            }
+            (qmarks, bindings) = binding_filler(SQL_COMMENT_COLUMNS, postdata, require_all=True)
+            query = 'INSERT INTO comments VALUES(%s)' % qmarks
+            cur.execute(query, bindings)
+
+        else:
+            greasy = ['has been overwritten', 'pastebin.com/64GuVi2F']
+            if comment.author is None or any(grease in comment.body for grease in greasy):
+                body = existing_entry[SQL_COMMENT['body']]
+            else:
+                body = comment.body
+
+            query = '''
+                UPDATE comments SET
+                score = coalesce(?, score),
+                body = coalesce(?, body),
+                distinguish = coalesce(?, distinguish)
+                WHERE idstr == ?
+            '''
+            bindings = [
+                comment.score,
+                body,
+                comment.distinguished,
+                comment.fullname
+            ]
+            cur.execute(query, bindings)
+
+        return existing_entry is None
+
+
+def binding_filler(column_names, values, require_all=True):
+    '''
+    Manually aligning question marks and bindings is annoying.
+    Given the table's column names and a dictionary of {column: value},
+    return the question marks and the list of bindings in the right order.
+    '''
+    values = values.copy()
+    for column in column_names:
+        if column in values:
+            continue
+        if require_all:
+            raise ValueError('Missing column "%s"' % column)
+        else:
+            values.setdefault(column, None)
+    qmarks = '?' * len(column_names)
+    qmarks = ', '.join(qmarks)
+    bindings = [values[column] for column in column_names]
+    return (qmarks, bindings)