From e136aeea4ebb8f7218ffd787fc144908aa27cded Mon Sep 17 00:00:00 2001 From: Ethan Dalool Date: Sun, 6 Sep 2020 21:41:44 -0700 Subject: [PATCH] Log the size of received batches from pushshift. --- timesearch_modules/pushshift.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/timesearch_modules/pushshift.py b/timesearch_modules/pushshift.py index c01c670..7a5f6ff 100644 --- a/timesearch_modules/pushshift.py +++ b/timesearch_modules/pushshift.py @@ -109,6 +109,7 @@ def _pagination_core(url, params, dummy_type, lower=None, upper=None): setify = lambda items: set(item['id'] for item in items) prev_batch_ids = set() + while True: for retry in range(5): try: @@ -120,6 +121,7 @@ def _pagination_core(url, params, dummy_type, lower=None, upper=None): else: break + common.log.debug('Got batch of %d items.', len(batch)) batch_ids = setify(batch) if len(batch_ids) == 0 or batch_ids.issubset(prev_batch_ids): break