From 29c80b26620889009712fe514603eb0d96598510 Mon Sep 17 00:00:00 2001
From: Ethan Dalool <ethan@voussoir.net>
Date: Fri, 9 Oct 2020 13:13:40 -0700
Subject: [PATCH] Use updated threadpool, add some docstrings.

---
 droidz.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/droidz.py b/droidz.py
index b3210f2..11f7648 100644
--- a/droidz.py
+++ b/droidz.py
@@ -135,6 +135,9 @@ def request(url):
     return response
 
 def scrape_direct(id, commit=True):
+    '''
+    Return the dict of Stick data for this ID.
+    '''
     url = f'http://droidz.org/direct/{id}'
     response = request(url)
     text = response.text
@@ -186,6 +189,9 @@ def scrape_direct(id, commit=True):
     return data
 
 def scrape_directs(ids, threads=1, commit=True):
+    '''
+    Given many Stick IDs, yield Stick datas.
+    '''
     if threads < 1:
         raise ValueError(threads)
 
@@ -199,15 +205,17 @@ def scrape_directs(ids, threads=1, commit=True):
             {'function': scrape_direct, 'args': [id], 'name': id}
             for id in ids
         ]
-        jobs = pool.add_many(kwargss)
-        while jobs:
-            job = jobs.pop(0)
-            job.join()
+        pool.add_many(kwargss)
+        for job in pool.result_generator():
             if job.exception:
                 raise job.exception
             yield job.value
 
 def scrape_category(category):
+    '''
+    Yield Stick IDs from all pages within this category. They are listed in
+    alphabetical order by Stick name.
+    '''
     page = 1
     all_directs = set()
     while True:
@@ -225,6 +233,9 @@ def scrape_category(category):
             yield id
 
 def scrape_latest():
+    '''
+    Yield the latest Stick IDs from the /stickmain homepage, most recent first.
+    '''
     url = 'http://droidz.org/stickmain/'
     response = request(url)
     soup = bs4.BeautifulSoup(response.text, 'html.parser')
@@ -256,8 +267,7 @@ def incremental_update(threads=1):
     else:
         print('No new sticks for incremental update.')
 
-    cur = sql.cursor()
-    cur.execute('SELECT id FROM sticks WHERE retrieved IS NULL')
+    cur = sql.execute('SELECT id FROM sticks WHERE retrieved IS NULL')
     ids = [row[0] for row in cur.fetchall()]
 
     sticks = scrape_directs(ids, threads=threads)
@@ -288,8 +298,7 @@ def download_stick(id, overwrite=False, extract=False):
     if directory.exists and not overwrite:
         return directory
 
-    cur = sql.cursor()
-    cur.execute('SELECT download_link FROM sticks WHERE id == ?', [id])
+    cur = sql.execute('SELECT download_link FROM sticks WHERE id == ?', [id])
     download_link = cur.fetchone()[0]
     filename = re.search(r'file=(.+)', download_link).group(1)
     filepath = directory.with_child(filename)
@@ -302,7 +311,7 @@ def download_stick(id, overwrite=False, extract=False):
     with filepath.open('wb') as handle:
         handle.write(response.content)
 
-    if extract and filepath.extension == 'zip':
+    if extract and WINRAR is not None and filepath.extension == 'zip':
         # As much as I would like to use Python's zipfile module, I found that
         # some of the .zips on the site are actually rars.
         command = [