752 lines
22 KiB
Python
752 lines
22 KiB
Python
import argparse
|
|
import bs4
|
|
import datetime
|
|
import html
|
|
import logging
|
|
import requests
|
|
import sqlite3
|
|
import sys
|
|
import time
|
|
|
|
from voussoirkit import backoff
|
|
from voussoirkit import betterhelp
|
|
from voussoirkit import httperrors
|
|
from voussoirkit import mutables
|
|
from voussoirkit import operatornotify
|
|
from voussoirkit import pathclass
|
|
from voussoirkit import ratelimiter
|
|
from voussoirkit import sqlhelpers
|
|
from voussoirkit import threadpool
|
|
from voussoirkit import treeclass
|
|
from voussoirkit import vlogging
|
|
|
|
log = vlogging.getLogger(__name__, 'hnarchive')
|
|
|
|
VERSION = '1.0.0'
|
|
|
|
HEADERS = {
|
|
'User-Agent': f'voussoir/hnarchive v{VERSION}.',
|
|
}
|
|
|
|
session = requests.Session()
|
|
session.headers.update(HEADERS)
|
|
|
|
DB_INIT = '''
|
|
BEGIN;
|
|
PRAGMA user_version = 1;
|
|
CREATE TABLE IF NOT EXISTS items(
|
|
id INT PRIMARY KEY NOT NULL,
|
|
deleted INT,
|
|
type TEXT,
|
|
author TEXT,
|
|
time INT,
|
|
text TEXT,
|
|
dead INT,
|
|
parent TEXT,
|
|
poll TEXT,
|
|
url TEXT,
|
|
score INT,
|
|
title TEXT,
|
|
descendants INT,
|
|
retrieved INT
|
|
);
|
|
CREATE INDEX IF NOT EXISTS index_items_id on items(id);
|
|
CREATE INDEX IF NOT EXISTS index_items_parent on items(parent);
|
|
CREATE INDEX IF NOT EXISTS index_items_poll on items(poll) WHERE poll IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS index_items_time on items(time);
|
|
CREATE INDEX IF NOT EXISTS index_items_type_time on items(type, time);
|
|
CREATE INDEX IF NOT EXISTS index_items_age_at_retrieval on items(retrieved - time);
|
|
COMMIT;
|
|
'''
|
|
|
|
def init_db():
|
|
global sql
|
|
global cur
|
|
|
|
log.debug('Initializing database.')
|
|
db_path = pathclass.Path('hnarchive.db')
|
|
if db_path.is_link and not db_path.is_file:
|
|
raise RuntimeError(f'{db_path.absolute_path} is a broken link.')
|
|
|
|
db_exists = db_path.is_file
|
|
sql = sqlite3.connect(db_path.absolute_path)
|
|
cur = sql.cursor()
|
|
|
|
if not db_exists:
|
|
log.debug('Running first-time database setup.')
|
|
sqlhelpers.executescript(conn=sql, script=DB_INIT)
|
|
|
|
# HELPERS ##########################################################################################
|
|
|
|
def ctrlc_commit(function):
|
|
def wrapped(*args, **kwargs):
|
|
try:
|
|
return function(*args, **kwargs)
|
|
except KeyboardInterrupt:
|
|
commit()
|
|
return 1
|
|
return wrapped
|
|
|
|
def int_or_none(x):
|
|
if x is None:
|
|
return x
|
|
return int(x)
|
|
|
|
# API ##############################################################################################
|
|
|
|
def get(url, retries=1):
|
|
bo = backoff.Quadratic(a=0.2, b=0, c=1, max=10)
|
|
while retries > 0:
|
|
try:
|
|
log.loud(url)
|
|
response = session.get(url, timeout=2)
|
|
httperrors.raise_for_status(response)
|
|
return response
|
|
except (
|
|
httperrors.HTTP429,
|
|
httperrors.HTTP5XX,
|
|
requests.exceptions.ConnectionError,
|
|
requests.exceptions.ReadTimeout,
|
|
):
|
|
# Any other 4XX should raise.
|
|
retries -= 1
|
|
log.loud('Request failed, %d tries remain.', retries)
|
|
time.sleep(bo.next())
|
|
|
|
raise RuntimeError(f'Ran out of retries on {url}.')
|
|
|
|
def get_item(id):
|
|
url = f'https://hacker-news.firebaseio.com/v0/item/{id}.json'
|
|
response = get(url, retries=8)
|
|
item = response.json()
|
|
if item is None:
|
|
return None
|
|
if 'time' not in item:
|
|
# For example, 78692 from the api shows {"id": 78692, "type": "story"},
|
|
# but the web says "No such item."
|
|
# https://hacker-news.firebaseio.com/v0/item/78692.json
|
|
# https://news.ycombinator.com/item?id=78692
|
|
return None
|
|
return item
|
|
|
|
def get_items(ids, threads=None):
|
|
if threads and threads > 1:
|
|
return get_items_multithreaded(ids, threads)
|
|
else:
|
|
return get_items_singlethreaded(ids)
|
|
|
|
def get_items_multithreaded(ids, threads):
|
|
pool = threadpool.ThreadPool(threads, paused=True)
|
|
job_gen = ({'function': get_item, 'kwargs': {'id': id}} for id in ids)
|
|
pool.add_generator(job_gen)
|
|
|
|
for job in pool.result_generator(buffer_size=250):
|
|
if job.exception:
|
|
raise job.exception
|
|
if job.value is not None:
|
|
yield job.value
|
|
|
|
def get_items_singlethreaded(ids):
|
|
for id in ids:
|
|
item = get_item(id)
|
|
if item is not None:
|
|
yield item
|
|
|
|
def get_latest_id():
|
|
url = 'https://hacker-news.firebaseio.com/v0/maxitem.json'
|
|
response = get(url)
|
|
latest_id = int(response.text)
|
|
return latest_id
|
|
|
|
def livestream():
|
|
bo = backoff.Linear(m=2, b=5, max=60)
|
|
id = select_latest_id() or 1
|
|
# missed_loops:
|
|
# Usually, livestream assumes that `item is None` means the requested item
|
|
# id hasn't been published yet. But, if that item is actually just deleted,
|
|
# we would be stuck waiting for it forever. missed_loops is used to
|
|
# ocassionally check get_latest_id to see if new items are available, so we
|
|
# know that the current id is really just deleted.
|
|
# Items are released in small batches of < ~10 at a time. It is important
|
|
# that the number in `latest > id+XXX` is big enough that we are sure the
|
|
# requested item is really dead and not just part of a fresh batch that
|
|
# beat our check in a race condition (consider that between the last
|
|
# iteration which triggered the check and the call to get_latest_id, the
|
|
# item we were waiting for is published in a new batch). I chose 50 because
|
|
# catching up with 50 items is not a big deal.
|
|
missed_loops = 0
|
|
while True:
|
|
item = get_item(id)
|
|
if item is None:
|
|
log.debug('%s does not exist yet.', id)
|
|
missed_loops += 1
|
|
if missed_loops % 5 == 0:
|
|
latest = get_latest_id()
|
|
if latest > (id+50):
|
|
log.debug('Skipping %s because future ids exist.', id)
|
|
id += 1
|
|
continue
|
|
time.sleep(bo.next())
|
|
continue
|
|
id += 1
|
|
missed_loops = 0
|
|
bo.rewind(2)
|
|
yield item
|
|
|
|
# DATABASE #########################################################################################
|
|
|
|
def commit():
|
|
log.info('Committing.')
|
|
sql.commit()
|
|
|
|
def insert_item(data):
|
|
id = data['id']
|
|
retrieved = int(time.time())
|
|
|
|
existing = select_item(id)
|
|
if existing is None:
|
|
row = {
|
|
'id': id,
|
|
'deleted': bool(data.get('deleted', False)),
|
|
'type': data['type'],
|
|
'author': data.get('by', None),
|
|
'time': int(data['time']),
|
|
'text': data.get('text', None),
|
|
'dead': bool(data.get('dead', False)),
|
|
'parent': data.get('parent', None),
|
|
'poll': data.get('poll', None),
|
|
'url': data.get('url', None),
|
|
'score': int_or_none(data.get('score', None)),
|
|
'title': data.get('title', None),
|
|
'descendants': int_or_none(data.get('descendants', None)),
|
|
'retrieved': retrieved,
|
|
}
|
|
log.info('Inserting item %s.', id)
|
|
(qmarks, bindings) = sqlhelpers.insert_filler(row)
|
|
query = f'INSERT INTO items {qmarks}'
|
|
cur.execute(query, bindings)
|
|
log.loud('Inserted item %s.', id)
|
|
else:
|
|
row = {
|
|
'id': id,
|
|
'deleted': bool(data.get('deleted', False)),
|
|
'type': data['type'],
|
|
'author': data.get('by', existing.get('author', None)),
|
|
'time': int(data['time']),
|
|
'text': data.get('text', existing.get('text', None)),
|
|
'dead': bool(data.get('dead', False)),
|
|
'parent': data.get('parent', None),
|
|
'poll': data.get('poll', existing.get('poll', None)),
|
|
'url': data.get('url', existing.get('url', None)),
|
|
'score': int_or_none(data.get('score', existing.get('score', None))),
|
|
'title': data.get('title', existing.get('title', None)),
|
|
'descendants': int_or_none(data.get('descendants', None)),
|
|
'retrieved': retrieved,
|
|
}
|
|
log.info('Updating item %s.', id)
|
|
(qmarks, bindings) = sqlhelpers.update_filler(row, where_key='id')
|
|
query = f'UPDATE items {qmarks}'
|
|
cur.execute(query, bindings)
|
|
log.loud('Updated item %s.', id)
|
|
|
|
return {'row': row, 'is_new': existing is None}
|
|
|
|
def insert_items(items, commit_period=200):
|
|
ticker = 0
|
|
for item in items:
|
|
insert_item(item)
|
|
ticker = (ticker + 1) % commit_period
|
|
if ticker == 0:
|
|
commit()
|
|
commit()
|
|
|
|
def select_child_items(id):
|
|
'''
|
|
Return items whose parent is this id.
|
|
'''
|
|
cur.execute('SELECT * FROM items WHERE parent == ?', [id])
|
|
rows = cur.fetchall()
|
|
|
|
items = [dict(zip(ITEMS_COLUMNS, row)) for row in rows]
|
|
return items
|
|
|
|
def select_poll_options(id):
|
|
'''
|
|
Return items that are pollopts under this given poll id.
|
|
'''
|
|
cur.execute('SELECT * FROM items WHERE poll == ?', [id])
|
|
rows = cur.fetchall()
|
|
|
|
items = [dict(zip(ITEMS_COLUMNS, row)) for row in rows]
|
|
return items
|
|
|
|
def select_item(id):
|
|
cur.execute('SELECT * FROM items WHERE id == ?', [id])
|
|
row = cur.fetchone()
|
|
|
|
if row is None:
|
|
return None
|
|
|
|
item = dict(zip(ITEMS_COLUMNS, row))
|
|
return item
|
|
|
|
def select_latest_id():
|
|
cur.execute('SELECT id FROM items ORDER BY id DESC LIMIT 1')
|
|
row = cur.fetchone()
|
|
if row is None:
|
|
return None
|
|
return row[0]
|
|
|
|
# RENDERING ########################################################################################
|
|
|
|
def _fix_ptags(text):
|
|
'''
|
|
The text returned by HN only puts <p> in between paragraphs, they do
|
|
not add closing tags or put an opening <p> on the first paragraph.
|
|
|
|
If the user typed a literal <p> then it will have been stored with < and
|
|
> so it won't get messed up here.
|
|
'''
|
|
text = text.replace('<p>', '</p><p>')
|
|
text = '<p>' + text + '</p>'
|
|
return text
|
|
|
|
def build_item_tree(*, id=None, item=None):
|
|
if id is not None and item is None:
|
|
item = select_item(id)
|
|
if item is None:
|
|
raise ValueError('We dont have that item in the database.')
|
|
elif item is not None and id is None:
|
|
id = item['id']
|
|
else:
|
|
raise TypeError('Please pass only one of id, item.')
|
|
|
|
tree = treeclass.Tree(str(id), data=item)
|
|
for child in select_child_items(id):
|
|
tree.add_child(build_item_tree(item=child))
|
|
return tree
|
|
|
|
def html_render_comment(*, soup, item):
|
|
div = soup.new_tag('div')
|
|
div['class'] = item['type']
|
|
div['id'] = item['id']
|
|
|
|
userinfo = soup.new_tag('p')
|
|
div.append(userinfo)
|
|
|
|
author = item['author'] or '[deleted]'
|
|
username = soup.new_tag('a', href=f'https://news.ycombinator.com/user?id={author}')
|
|
username.append(author)
|
|
userinfo.append(username)
|
|
|
|
userinfo.append(' | ')
|
|
|
|
date = datetime.datetime.utcfromtimestamp(item['time'])
|
|
date = date.strftime('%Y %b %d %H:%M:%S')
|
|
timestamp = soup.new_tag('a', href=f'https://news.ycombinator.com/item?id={item["id"]}')
|
|
timestamp.append(date)
|
|
userinfo.append(timestamp)
|
|
|
|
text = item['text'] or '[deleted]'
|
|
text = bs4.BeautifulSoup(_fix_ptags(text), 'html.parser')
|
|
div.append(text)
|
|
return div
|
|
|
|
def html_render_comment_tree(*, soup, tree):
|
|
div = html_render_comment(soup=soup, item=tree.data)
|
|
|
|
for child in tree.list_children(sort=lambda node: node.data['time']):
|
|
div.append(html_render_comment_tree(soup=soup, tree=child))
|
|
|
|
return div
|
|
|
|
def html_render_job(*, soup, item):
|
|
div = soup.new_tag('div')
|
|
div['class'] = item['type']
|
|
div['id'] = item['id']
|
|
|
|
h = soup.new_tag('h1')
|
|
div.append(h)
|
|
h.append(item['title'])
|
|
|
|
if item['text']:
|
|
text = bs4.BeautifulSoup(_fix_ptags(item['text']), 'html.parser')
|
|
div.append(text)
|
|
|
|
return div
|
|
|
|
def html_render_poll(*, soup, item):
|
|
options = select_poll_options(item['id'])
|
|
div = html_render_story(soup=soup, item=item)
|
|
for option in options:
|
|
div.append(html_render_pollopt(soup=soup, item=option))
|
|
return div
|
|
|
|
def html_render_pollopt(*, soup, item):
|
|
div = soup.new_tag('div')
|
|
div['class'] = item['type']
|
|
|
|
text = bs4.BeautifulSoup(_fix_ptags(item['text']), 'html.parser')
|
|
div.append(text)
|
|
|
|
points = soup.new_tag('p')
|
|
points.append(f'{item["score"]} points')
|
|
div.append(points)
|
|
|
|
return div
|
|
|
|
def html_render_story(*, soup, item):
|
|
div = soup.new_tag('div')
|
|
div['class'] = item['type']
|
|
div['id'] = item['id']
|
|
|
|
h = soup.new_tag('h1')
|
|
div.append(h)
|
|
if item['url']:
|
|
a = soup.new_tag('a', href=item['url'])
|
|
a.append(item['title'])
|
|
h.append(a)
|
|
else:
|
|
h.append(item['title'])
|
|
if item['text']:
|
|
text = bs4.BeautifulSoup(_fix_ptags(item['text']), 'html.parser')
|
|
div.append(text)
|
|
|
|
userinfo = soup.new_tag('p')
|
|
div.append(userinfo)
|
|
|
|
author = item['author']
|
|
username = soup.new_tag('a', href=f'https://news.ycombinator.com/user?id={author}')
|
|
username.append(author)
|
|
userinfo.append(username)
|
|
|
|
userinfo.append(' | ')
|
|
|
|
date = datetime.datetime.utcfromtimestamp(item['time'])
|
|
date = date.strftime('%Y %b %d %H:%M:%S')
|
|
timestamp = soup.new_tag('a', href=f'https://news.ycombinator.com/item?id={item["id"]}')
|
|
timestamp.append(date)
|
|
userinfo.append(timestamp)
|
|
|
|
userinfo.append(' | ')
|
|
|
|
points = soup.new_tag('span')
|
|
points.append(f'{item["score"]} points')
|
|
userinfo.append(points)
|
|
return div
|
|
|
|
def html_render_page(tree):
|
|
soup = bs4.BeautifulSoup()
|
|
html = soup.new_tag('html')
|
|
soup.append(html)
|
|
|
|
head = soup.new_tag('head')
|
|
html.append(head)
|
|
|
|
style = soup.new_tag('style')
|
|
style.append('''
|
|
.comment,
|
|
.job,
|
|
.poll,
|
|
.pollopt,
|
|
.story
|
|
{
|
|
padding-left: 20px;
|
|
margin-top: 4px;
|
|
margin-right: 4px;
|
|
margin-bottom: 4px;
|
|
}
|
|
.job, .poll, .story
|
|
{
|
|
border: 2px solid blue;
|
|
}
|
|
body > .story + .comment,
|
|
body > .comment + .comment
|
|
{
|
|
margin-top: 10px;
|
|
}
|
|
.comment, .pollopt
|
|
{
|
|
border: 1px solid black;
|
|
}
|
|
''')
|
|
head.append(style)
|
|
|
|
body = soup.new_tag('body')
|
|
html.append(body)
|
|
|
|
item = tree.data
|
|
|
|
if item['type'] == 'comment':
|
|
body.append(html_render_comment_tree(soup=soup, tree=tree))
|
|
|
|
elif item['type'] == 'job':
|
|
body.append(html_render_job(soup=soup, item=item))
|
|
|
|
elif item['type'] == 'poll':
|
|
body.append(html_render_poll(soup=soup, item=item))
|
|
for child in tree.list_children(sort=lambda node: node.data['time']):
|
|
body.append(html_render_comment_tree(soup=soup, tree=child))
|
|
|
|
elif item['type'] == 'story':
|
|
body.append(html_render_story(soup=soup, item=item))
|
|
for child in tree.list_children(sort=lambda node: node.data['time']):
|
|
body.append(html_render_comment_tree(soup=soup, tree=child))
|
|
|
|
return soup
|
|
|
|
# COMMAND LINE #####################################################################################
|
|
|
|
@ctrlc_commit
|
|
def get_argparse(args):
|
|
init_db()
|
|
lower = args.lower
|
|
upper = args.upper or get_latest_id()
|
|
|
|
ids = range(lower, upper+1)
|
|
items = get_items(ids, threads=args.threads)
|
|
|
|
insert_items(items, commit_period=args.commit_period)
|
|
return 0
|
|
|
|
def html_render_argparse(args):
|
|
init_db()
|
|
for id in args.ids:
|
|
tree = build_item_tree(id=id)
|
|
soup = html_render_page(tree)
|
|
html = str(soup)
|
|
if args.output:
|
|
filename = args.output.format(id=id)
|
|
with open(filename, 'w', encoding='utf-8') as handle:
|
|
handle.write(html)
|
|
else:
|
|
print(html)
|
|
|
|
@ctrlc_commit
|
|
def livestream_argparse(args):
|
|
init_db()
|
|
NOTIFY_EVERY_LINE.set(True)
|
|
insert_items(livestream(), commit_period=args.commit_period)
|
|
return 0
|
|
|
|
@ctrlc_commit
|
|
def update_argparse(args):
|
|
init_db()
|
|
while True:
|
|
lower = select_latest_id() or 1
|
|
upper = get_latest_id()
|
|
if lower == upper:
|
|
break
|
|
|
|
ids = range(lower, upper+1)
|
|
items = get_items(ids, threads=args.threads)
|
|
|
|
insert_items(items, commit_period=args.commit_period)
|
|
return 0
|
|
|
|
@ctrlc_commit
|
|
def update_items_argparse(args):
|
|
init_db()
|
|
seconds = args.days * 86400
|
|
if args.only_mature:
|
|
then = time.time() - (86400 * 14)
|
|
query = 'SELECT id FROM items WHERE retrieved - time <= ? AND time < ?'
|
|
bindings = [seconds, then]
|
|
else:
|
|
query = 'SELECT id FROM items WHERE retrieved - time <= ?'
|
|
bindings = [seconds]
|
|
cur.execute(query, bindings)
|
|
ids = cur.fetchall()
|
|
|
|
log.info('Updating %d items.', len(ids))
|
|
|
|
if not ids:
|
|
return 0
|
|
|
|
ids = [id for (id,) in ids]
|
|
items = get_items(ids, threads=args.threads)
|
|
|
|
insert_items(items, commit_period=args.commit_period)
|
|
return 0
|
|
|
|
NOTIFY_EVERY_LINE = mutables.Boolean(False)
|
|
|
|
@operatornotify.main_decorator(subject='hnarchive.py', notify_every_line=NOTIFY_EVERY_LINE)
|
|
@vlogging.main_decorator
|
|
def main(argv):
|
|
parser = argparse.ArgumentParser(description='Hacker News downloader.')
|
|
subparsers = parser.add_subparsers()
|
|
|
|
################################################################################################
|
|
|
|
p_get = subparsers.add_parser(
|
|
'get',
|
|
description='''
|
|
Get items between two IDs, inclusive.
|
|
''',
|
|
)
|
|
p_get.add_argument(
|
|
'--lower',
|
|
type=int,
|
|
default=1,
|
|
help='''
|
|
Lower bound item ID.
|
|
''',
|
|
)
|
|
p_get.add_argument(
|
|
'--upper',
|
|
type=int,
|
|
default=None,
|
|
help='''
|
|
Upper bound item ID.
|
|
Default: most recent post.
|
|
''',
|
|
)
|
|
p_get.add_argument(
|
|
'--threads',
|
|
type=int,
|
|
default=1,
|
|
help='''
|
|
Use this many threads to download items.
|
|
''',
|
|
)
|
|
p_get.add_argument(
|
|
'--commit_period', '--commit-period',
|
|
type=int,
|
|
default=200,
|
|
help='''
|
|
Commit the database after every this many insertions.
|
|
'''
|
|
)
|
|
p_get.set_defaults(func=get_argparse)
|
|
|
|
################################################################################################
|
|
|
|
p_html_render = subparsers.add_parser(
|
|
'html_render',
|
|
aliases=['html-render'],
|
|
description='''
|
|
Render items to HTML -- stories, comment trees, etc.
|
|
''',
|
|
)
|
|
p_html_render.add_argument(
|
|
'ids',
|
|
nargs='+',
|
|
type=int,
|
|
help='''
|
|
One or more ids to render.
|
|
''',
|
|
)
|
|
p_html_render.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default=None,
|
|
help='''
|
|
Save the html to this file. Your filename may include "{id}" and
|
|
the item's ID will be formatted into the string. This will be necessary
|
|
if you are rendering multiple IDs in a single invocation.
|
|
''',
|
|
)
|
|
p_html_render.set_defaults(func=html_render_argparse)
|
|
|
|
################################################################################################
|
|
|
|
p_livestream = subparsers.add_parser(
|
|
'livestream',
|
|
description='''
|
|
Watch for new items in an infinite loop.
|
|
|
|
Starts from the most recent id in the database.
|
|
''',
|
|
)
|
|
p_livestream.add_argument(
|
|
'--commit_period', '--commit-period',
|
|
type=int,
|
|
default=200,
|
|
help='''
|
|
Commit the database after every this many insertions.
|
|
''',
|
|
)
|
|
p_livestream.set_defaults(func=livestream_argparse)
|
|
|
|
################################################################################################
|
|
|
|
p_update = subparsers.add_parser(
|
|
'update',
|
|
description='''
|
|
Get new items, from the highest ID in the database to the present.
|
|
''',
|
|
)
|
|
p_update.add_argument(
|
|
'--threads',
|
|
type=int,
|
|
default=1,
|
|
help='''
|
|
Use this many threads to download items.
|
|
''',
|
|
)
|
|
p_update.add_argument(
|
|
'--commit_period', '--commit-period',
|
|
type=int,
|
|
default=200,
|
|
help='''
|
|
Commit the database after every this many insertions.
|
|
'''
|
|
)
|
|
p_update.set_defaults(func=update_argparse)
|
|
|
|
################################################################################################
|
|
|
|
p_update_items = subparsers.add_parser(
|
|
'update_items',
|
|
aliases=['update-items'],
|
|
description='''
|
|
Redownload items to update their scores, descendant counts, etc.
|
|
''',
|
|
)
|
|
p_update_items.add_argument(
|
|
'--days',
|
|
type=float,
|
|
required=True,
|
|
help='''
|
|
Update items where the retrieval date is less than X days ahead of the
|
|
submission date.
|
|
Stories are only open for comments for 14 days, so the `descendants`
|
|
count of any story younger than 14 days should be considered volatile.
|
|
It seems the upvote button does not disappear at any age, though I
|
|
don't know whether votes on old submissions will actually count.
|
|
Regardless, votes and comments tend to solidify within a day or two
|
|
after submission so a small number should be sufficient.
|
|
''',
|
|
)
|
|
p_update_items.add_argument(
|
|
'--threads',
|
|
type=int,
|
|
default=1,
|
|
help='''
|
|
Use this many threads to download items.
|
|
''',
|
|
)
|
|
p_update_items.add_argument(
|
|
'--only_mature', '--only-mature',
|
|
action='store_true',
|
|
help='''
|
|
If True, only update items where the submission date is more than 14
|
|
days ago. Without this, you will be updating items which are very close
|
|
to the present time, an effort which you may find wasteful.
|
|
''',
|
|
)
|
|
p_update_items.add_argument(
|
|
'--commit_period', '--commit-period',
|
|
type=int,
|
|
default=200,
|
|
help='''
|
|
Commit the database after every this many insertions.
|
|
'''
|
|
)
|
|
p_update_items.set_defaults(func=update_items_argparse)
|
|
|
|
return betterhelp.go(parser, argv)
|
|
|
|
if __name__ == '__main__':
|
|
raise SystemExit(main(sys.argv[1:]))
|