Hardcode pushshift ratelimit due to removal of /meta.

master
voussoir 2023-02-22 17:17:45 -08:00
parent 5a4ff3bc02
commit 2c0ae7291a
1 changed files with 3 additions and 17 deletions

View File

@ -26,8 +26,8 @@ API_URL = 'https://api.pushshift.io/reddit/'
DEFAULT_PARAMS = { DEFAULT_PARAMS = {
'size': 1000, 'size': 1000,
'sort': 'asc', 'order': 'asc',
'sort_type': 'created_utc', 'sort': 'created_utc',
} }
# Pushshift does not supply attributes that are null. So we fill them back in. # Pushshift does not supply attributes that are null. So we fill them back in.
@ -51,7 +51,7 @@ useragent = USERAGENT.format(version=common.VERSION, contact=common.bot.CONTACT_
ratelimit = None ratelimit = None
session = requests.Session() session = requests.Session()
session.headers.update({'User-Agent': useragent}) session.headers.update({'User-Agent': useragent})
ratelimit = ratelimiter.Ratelimiter(allowance=120, period=60)
class DummyObject: class DummyObject:
''' '''
@ -140,21 +140,7 @@ def _pagination_core(url, params, dummy_type, lower=None, upper=None):
prev_batch_ids = batch_ids prev_batch_ids = batch_ids
ratelimit.limit() ratelimit.limit()
def _initialize_ratelimiter():
global ratelimit
if ratelimit is not None:
return
log.debug('Initializing pushshift ratelimiter.')
url = 'https://api.pushshift.io/meta'
response = session.get(url)
response.raise_for_status()
response = response.json()
limit = response['server_ratelimit_per_minute']
log.debug('Pushshift ratelimit is %d requests per minute.', limit)
ratelimit = ratelimiter.Ratelimiter(allowance=limit, period=60)
def get(url, params=None): def get(url, params=None):
_initialize_ratelimiter()
if not url.startswith('https://'): if not url.startswith('https://'):
url = API_URL + url.lstrip('/') url = API_URL + url.lstrip('/')