Add a couple retries to pushshift calls.
I was getting some 503 recently.
This commit is contained in:
parent
bca5addab3
commit
dba35e3c9b
1 changed files with 9 additions and 1 deletions
|
@ -10,6 +10,7 @@ as scanning all of a user's comments.
|
||||||
import html
|
import html
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
|
|
||||||
from . import common
|
from . import common
|
||||||
|
|
||||||
|
@ -109,7 +110,14 @@ def _pagination_core(url, params, dummy_type, lower=None, upper=None):
|
||||||
setify = lambda items: set(item['id'] for item in items)
|
setify = lambda items: set(item['id'] for item in items)
|
||||||
prev_batch_ids = set()
|
prev_batch_ids = set()
|
||||||
while True:
|
while True:
|
||||||
|
for retry in range(5):
|
||||||
|
try:
|
||||||
batch = get(url, params)
|
batch = get(url, params)
|
||||||
|
except requests.exceptions.HTTPError as exc:
|
||||||
|
traceback.print_exc()
|
||||||
|
print('Retrying in 5...')
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
batch_ids = setify(batch)
|
batch_ids = setify(batch)
|
||||||
if len(batch_ids) == 0 or batch_ids.issubset(prev_batch_ids):
|
if len(batch_ids) == 0 or batch_ids.issubset(prev_batch_ids):
|
||||||
break
|
break
|
||||||
|
|
Loading…
Reference in a new issue