Log the size of received batches from pushshift.
This commit is contained in:
parent
83d5a8e70e
commit
e136aeea4e
1 changed files with 2 additions and 0 deletions
|
@ -109,6 +109,7 @@ def _pagination_core(url, params, dummy_type, lower=None, upper=None):
|
||||||
|
|
||||||
setify = lambda items: set(item['id'] for item in items)
|
setify = lambda items: set(item['id'] for item in items)
|
||||||
prev_batch_ids = set()
|
prev_batch_ids = set()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
for retry in range(5):
|
for retry in range(5):
|
||||||
try:
|
try:
|
||||||
|
@ -120,6 +121,7 @@ def _pagination_core(url, params, dummy_type, lower=None, upper=None):
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
common.log.debug('Got batch of %d items.', len(batch))
|
||||||
batch_ids = setify(batch)
|
batch_ids = setify(batch)
|
||||||
if len(batch_ids) == 0 or batch_ids.issubset(prev_batch_ids):
|
if len(batch_ids) == 0 or batch_ids.issubset(prev_batch_ids):
|
||||||
break
|
break
|
||||||
|
|
Loading…
Reference in a new issue