else
This commit is contained in:
parent
fa2c2bda76
commit
64de5c94bb
14 changed files with 351 additions and 149 deletions
|
@ -133,6 +133,7 @@ import requests
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import sys
|
import sys
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
## import tkinter
|
## import tkinter
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
@ -143,6 +144,7 @@ from voussoirkit import downloady
|
||||||
from voussoirkit import fusker
|
from voussoirkit import fusker
|
||||||
from voussoirkit import treeclass
|
from voussoirkit import treeclass
|
||||||
from voussoirkit import pathtree
|
from voussoirkit import pathtree
|
||||||
|
sys.path.append('C:\\git\\else\\threadqueue'); import threadqueue
|
||||||
|
|
||||||
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
|
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
|
||||||
FILENAME_BADCHARS = '/\\:*?"<>|'
|
FILENAME_BADCHARS = '/\\:*?"<>|'
|
||||||
|
@ -184,6 +186,7 @@ SKIPPABLE_FILETYPES = [
|
||||||
'.pdf',
|
'.pdf',
|
||||||
'.png',
|
'.png',
|
||||||
'.rar',
|
'.rar',
|
||||||
|
'.sfv',
|
||||||
'.srt',
|
'.srt',
|
||||||
'.tar',
|
'.tar',
|
||||||
'.ttf',
|
'.ttf',
|
||||||
|
@ -237,7 +240,7 @@ class Walker:
|
||||||
'''
|
'''
|
||||||
This class manages the extraction and saving of URLs, given a starting root url.
|
This class manages the extraction and saving of URLs, given a starting root url.
|
||||||
'''
|
'''
|
||||||
def __init__(self, root_url, databasename=None, fullscan=False):
|
def __init__(self, root_url, databasename=None, fullscan=False, threads=1):
|
||||||
if not root_url.endswith('/'):
|
if not root_url.endswith('/'):
|
||||||
root_url += '/'
|
root_url += '/'
|
||||||
if '://' not in root_url.split('.')[0]:
|
if '://' not in root_url.split('.')[0]:
|
||||||
|
@ -255,6 +258,8 @@ class Walker:
|
||||||
self.cur = self.sql.cursor()
|
self.cur = self.sql.cursor()
|
||||||
db_init(self.sql, self.cur)
|
db_init(self.sql, self.cur)
|
||||||
|
|
||||||
|
self.thread_queue = threadqueue.ThreadQueue(threads)
|
||||||
|
self._main_thread = threading.current_thread().ident
|
||||||
self.fullscan = bool(fullscan)
|
self.fullscan = bool(fullscan)
|
||||||
self.queue = collections.deque()
|
self.queue = collections.deque()
|
||||||
self.seen_directories = set()
|
self.seen_directories = set()
|
||||||
|
@ -326,10 +331,15 @@ class Walker:
|
||||||
skippable = any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES)
|
skippable = any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES)
|
||||||
if skippable:
|
if skippable:
|
||||||
write('Skipping "%s" due to extension.' % url)
|
write('Skipping "%s" due to extension.' % url)
|
||||||
self.smart_insert(url=url, commit=False)
|
#self.smart_insert(url=url, commit=False)
|
||||||
|
#return {'url': url, 'commit': False}
|
||||||
|
self.thread_queue.behalf(self._main_thread, self.smart_insert, url=url, commit=False)
|
||||||
return
|
return
|
||||||
self.cur.execute('SELECT * FROM urls WHERE url == ?', [url])
|
skippable = lambda: self.cur.execute('SELECT * FROM urls WHERE url == ?', [url]).fetchone()
|
||||||
skippable = self.cur.fetchone() is not None
|
skippable = self.thread_queue.behalf(self._main_thread, skippable)
|
||||||
|
#print(skippable)
|
||||||
|
skippable = skippable is not None
|
||||||
|
#skippable = self.cur.fetchone() is not None
|
||||||
if skippable:
|
if skippable:
|
||||||
write('Skipping "%s" since we already have it.' % url)
|
write('Skipping "%s" since we already have it.' % url)
|
||||||
return
|
return
|
||||||
|
@ -359,28 +369,34 @@ class Walker:
|
||||||
if href in self.seen_directories:
|
if href in self.seen_directories:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
self.queue.append(href)
|
#self.queue.append(href)
|
||||||
|
self.thread_queue.add(self.process_url, href)
|
||||||
added += 1
|
added += 1
|
||||||
write('Queued %d urls' % added)
|
write('Queued %d urls' % added)
|
||||||
else:
|
else:
|
||||||
# This is not an index page, so save it.
|
# This is not an index page, so save it.
|
||||||
self.smart_insert(head=head, commit=False)
|
#self.smart_insert(head=head, commit=False)
|
||||||
|
self.thread_queue.behalf(self._main_thread, self.smart_insert, head=head, commit=False)
|
||||||
|
#return {'head': head, 'commit': False}
|
||||||
|
|
||||||
def walk(self, url=None):
|
def walk(self, url=None):
|
||||||
'''
|
'''
|
||||||
Given a starting URL (defaults to self.root_url), continually extract
|
Given a starting URL (defaults to self.root_url), continually extract
|
||||||
links from the page and repeat.
|
links from the page and repeat.
|
||||||
'''
|
'''
|
||||||
self.queue.appendleft(url)
|
#self.queue.appendleft(url)
|
||||||
try:
|
self.thread_queue.add(self.process_url, url)
|
||||||
while len(self.queue) > 0:
|
for return_value in self.thread_queue.run(hold_open=False):
|
||||||
url = self.queue.popleft()
|
pass
|
||||||
self.process_url(url)
|
#try:
|
||||||
line = '{:,} Remaining'.format(len(self.queue))
|
# while len(self.queue) > 0:
|
||||||
write(line)
|
# url = self.queue.popleft()
|
||||||
except:
|
# self.process_url(url)
|
||||||
self.sql.commit()
|
# line = '{:,} Remaining'.format(len(self.queue))
|
||||||
raise
|
# write(line)
|
||||||
|
#except:
|
||||||
|
# self.sql.commit()
|
||||||
|
# raise
|
||||||
self.sql.commit()
|
self.sql.commit()
|
||||||
## ##
|
## ##
|
||||||
## WALKER ##########################################################################################
|
## WALKER ##########################################################################################
|
||||||
|
@ -584,7 +600,7 @@ def write(line, file_handle=None, **kwargs):
|
||||||
|
|
||||||
## COMMANDLINE FUNCTIONS ###########################################################################
|
## COMMANDLINE FUNCTIONS ###########################################################################
|
||||||
## ##
|
## ##
|
||||||
def digest(root_url, databasename=None, fullscan=False):
|
def digest(root_url, databasename=None, fullscan=False, threads=1):
|
||||||
if root_url in ('!clipboard', '!c'):
|
if root_url in ('!clipboard', '!c'):
|
||||||
root_url = get_clipboard()
|
root_url = get_clipboard()
|
||||||
write('From clipboard: %s' % root_url)
|
write('From clipboard: %s' % root_url)
|
||||||
|
@ -592,6 +608,7 @@ def digest(root_url, databasename=None, fullscan=False):
|
||||||
databasename=databasename,
|
databasename=databasename,
|
||||||
fullscan=fullscan,
|
fullscan=fullscan,
|
||||||
root_url=root_url,
|
root_url=root_url,
|
||||||
|
threads=threads,
|
||||||
)
|
)
|
||||||
walker.walk()
|
walker.walk()
|
||||||
|
|
||||||
|
@ -600,6 +617,7 @@ def digest_argparse(args):
|
||||||
databasename=args.databasename,
|
databasename=args.databasename,
|
||||||
fullscan=args.fullscan,
|
fullscan=args.fullscan,
|
||||||
root_url=args.root_url,
|
root_url=args.root_url,
|
||||||
|
threads=int(args.threads),
|
||||||
)
|
)
|
||||||
|
|
||||||
def download(
|
def download(
|
||||||
|
@ -818,8 +836,7 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
|
||||||
if threads is None:
|
if threads is None:
|
||||||
threads = 1
|
threads = 1
|
||||||
|
|
||||||
threadpool = concurrent.futures.ThreadPoolExecutor(threads)
|
thread_queue = threadqueue.ThreadQueue(threads)
|
||||||
thread_promises = []
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for fetch in items:
|
for fetch in items:
|
||||||
|
@ -827,8 +844,7 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
|
||||||
|
|
||||||
if fullscan or new_only:
|
if fullscan or new_only:
|
||||||
url = fetch[SQL_URL]
|
url = fetch[SQL_URL]
|
||||||
promise = threadpool.submit(do_head, url, raise_for_status=False)
|
thread_queue.add(do_head, url, raise_for_status=False)
|
||||||
thread_promises.append(promise)
|
|
||||||
|
|
||||||
elif size is None:
|
elif size is None:
|
||||||
# Unmeasured and no intention to measure.
|
# Unmeasured and no intention to measure.
|
||||||
|
@ -837,16 +853,15 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
|
||||||
else:
|
else:
|
||||||
totalsize += size
|
totalsize += size
|
||||||
|
|
||||||
for head in promise_results(thread_promises):
|
for head in thread_queue.run():
|
||||||
fetch = smart_insert(sql, cur, head=head, commit=True)
|
fetch = smart_insert(sql, cur, head=head, commit=False)
|
||||||
size = fetch[SQL_CONTENT_LENGTH]
|
size = fetch[SQL_CONTENT_LENGTH]
|
||||||
if size is None:
|
if size is None:
|
||||||
write('"%s" is not revealing Content-Length' % url)
|
write('"%s" is not revealing Content-Length' % url)
|
||||||
size = 0
|
size = 0
|
||||||
totalsize += size
|
totalsize += size
|
||||||
except (Exception, KeyboardInterrupt):
|
except (Exception, KeyboardInterrupt):
|
||||||
for promise in thread_promises:
|
sql.commit()
|
||||||
promise.cancel()
|
|
||||||
raise
|
raise
|
||||||
|
|
||||||
sql.commit()
|
sql.commit()
|
||||||
|
@ -938,6 +953,7 @@ def main(argv):
|
||||||
p_digest.add_argument('root_url')
|
p_digest.add_argument('root_url')
|
||||||
p_digest.add_argument('-db', '--database', dest='databasename', default=None)
|
p_digest.add_argument('-db', '--database', dest='databasename', default=None)
|
||||||
p_digest.add_argument('-f', '--fullscan', dest='fullscan', action='store_true')
|
p_digest.add_argument('-f', '--fullscan', dest='fullscan', action='store_true')
|
||||||
|
p_digest.add_argument('-t', '--threads', dest='threads', default=1)
|
||||||
p_digest.set_defaults(func=digest_argparse)
|
p_digest.set_defaults(func=digest_argparse)
|
||||||
|
|
||||||
p_download = subparsers.add_parser('download')
|
p_download = subparsers.add_parser('download')
|
||||||
|
|
|
@ -45,6 +45,10 @@ class Path:
|
||||||
def exists(self):
|
def exists(self):
|
||||||
return os.path.exists(self.absolute_path)
|
return os.path.exists(self.absolute_path)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def extension(self):
|
||||||
|
return os.path.splitext(self.absolute_path)[1].lstrip('.')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_dir(self):
|
def is_dir(self):
|
||||||
return os.path.isdir(self.absolute_path)
|
return os.path.isdir(self.absolute_path)
|
||||||
|
@ -62,6 +66,11 @@ class Path:
|
||||||
raise TypeError('subpath must be a string')
|
raise TypeError('subpath must be a string')
|
||||||
return Path(os.path.join(self.absolute_path, subpath))
|
return Path(os.path.join(self.absolute_path, subpath))
|
||||||
|
|
||||||
|
def listdir(self):
|
||||||
|
children = os.listdir(self.absolute_path)
|
||||||
|
children = [self.with_child(child) for child in children]
|
||||||
|
return children
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def normcase(self):
|
def normcase(self):
|
||||||
return os.path.normcase(self.absolute_path)
|
return os.path.normcase(self.absolute_path)
|
||||||
|
@ -90,6 +99,15 @@ class Path:
|
||||||
backsteps = os.sep.join('..' for x in range(backsteps))
|
backsteps = os.sep.join('..' for x in range(backsteps))
|
||||||
return self.absolute_path.replace(common.absolute_path, backsteps)
|
return self.absolute_path.replace(common.absolute_path, backsteps)
|
||||||
|
|
||||||
|
def replace_extension(self, extension):
|
||||||
|
extension = extension.rsplit('.', 1)[-1]
|
||||||
|
base = os.path.splitext(self.absolute_path)[0]
|
||||||
|
|
||||||
|
if extension == '':
|
||||||
|
return Path(base)
|
||||||
|
|
||||||
|
return Path(base + '.' + extension)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def size(self):
|
def size(self):
|
||||||
if self.is_file:
|
if self.is_file:
|
||||||
|
@ -105,6 +123,7 @@ class Path:
|
||||||
return self.join(os.path.basename(basename))
|
return self.join(os.path.basename(basename))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def common_path(paths, fallback):
|
def common_path(paths, fallback):
|
||||||
'''
|
'''
|
||||||
Given a list of file paths, determine the deepest path which all
|
Given a list of file paths, determine the deepest path which all
|
||||||
|
@ -171,9 +190,10 @@ def get_path_casing(path):
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return input_path.absolute_path
|
return input_path.absolute_path
|
||||||
|
|
||||||
imaginary_portion = input_path.normcase
|
imaginary_portion = input_path.absolute_path
|
||||||
real_portion = os.path.normcase(cased)
|
imaginary_portion = imaginary_portion[len(cased):]
|
||||||
imaginary_portion = imaginary_portion.replace(real_portion, '')
|
#real_portion = os.path.normcase(cased)
|
||||||
|
#imaginary_portion = imaginary_portion.replace(real_portion, '')
|
||||||
imaginary_portion = imaginary_portion.lstrip(os.sep)
|
imaginary_portion = imaginary_portion.lstrip(os.sep)
|
||||||
cased = os.path.join(cased, imaginary_portion)
|
cased = os.path.join(cased, imaginary_portion)
|
||||||
cased = cased.rstrip(os.sep)
|
cased = cased.rstrip(os.sep)
|
||||||
|
|
10
Pathclass/test_pathclass.py
Normal file
10
Pathclass/test_pathclass.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
import pathclass
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
class Tests(unittest.TestCase):
|
||||||
|
def test_something(self):
|
||||||
|
self.assertEqual('C:\\Users', pathclass.get_path_casing('C:\\users'))
|
||||||
|
self.assertEqual('C:\\Users\\Nonexist', pathclass.get_path_casing('C:\\users\\Nonexist'))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -1,9 +1,10 @@
|
||||||
Continue
|
Continue
|
||||||
========
|
========
|
||||||
|
|
||||||
Discards the current iteration, and restarts the loop using the next item.
|
Skips the rest of the current iteration, and starts the next one.
|
||||||
|
|
||||||
|
|
||||||
|
```Python
|
||||||
>>> for x in range(6):
|
>>> for x in range(6):
|
||||||
... if x == 3:
|
... if x == 3:
|
||||||
... continue
|
... continue
|
||||||
|
@ -14,38 +15,63 @@ Discards the current iteration, and restarts the loop using the next item.
|
||||||
2
|
2
|
||||||
4
|
4
|
||||||
5
|
5
|
||||||
|
```
|
||||||
|
|
||||||
|
```Python
|
||||||
|
while len(directory_queue) > 0:
|
||||||
|
directory = directory_queue.popleft()
|
||||||
|
try:
|
||||||
|
filenames = os.listdir(directory)
|
||||||
|
except PermissionError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for filename in filenames:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
####Continue is great for cleaning code with lots of conditions:
|
####Continue is great for cleaning code with lots of conditions:
|
||||||
|
|
||||||
#####Without continue:
|
#####Without continue:
|
||||||
|
|
||||||
|
Nested:
|
||||||
|
|
||||||
|
```Python
|
||||||
for submission in submissions:
|
for submission in submissions:
|
||||||
if submission.author is not None:
|
if submission.author is not None:
|
||||||
if submission.over_18 is False:
|
if not submission.over_18:
|
||||||
if 'suggestion' in submission.title.lower():
|
if 'suggestion' in submission.title.lower():
|
||||||
print('Found:', submission.id)
|
print('Found:', submission.id)
|
||||||
|
```
|
||||||
|
|
||||||
|
or all grouped up:
|
||||||
|
|
||||||
|
```Python
|
||||||
for submission in submissions:
|
for submission in submissions:
|
||||||
if submission.author is not None and submission.over_18 is False and 'suggestion' in submission.title.lower():
|
if (
|
||||||
|
submission.author is not None
|
||||||
|
and not submission.over_18
|
||||||
|
and 'suggestion' in submission.title.lower()
|
||||||
|
):
|
||||||
print('Found:', submission.id)
|
print('Found:', submission.id)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#####With continue:
|
#####With continue:
|
||||||
|
|
||||||
|
```Python
|
||||||
for submission in submissions:
|
for submission in submissions:
|
||||||
if submission.author is None:
|
if submission.author is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if submission.over_18:
|
if submission.over_18:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if 'suggestion' not in submission.title.lower():
|
if 'suggestion' not in submission.title.lower():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print('Found:', submission.id)
|
print('Found:', submission.id)
|
||||||
|
```
|
||||||
|
|
||||||
The mentality changes from "keep only the items with the right properties" to "discard the items with the wrong properties".
|
Notice that all of the checks are the opposite of the originals. The mentality changes from "keep only the items with the right properties" to "discard the items with the wrong properties", and the result is the same.
|
||||||
|
|
|
@ -12,22 +12,27 @@ Generators are a type of iterable that create their contents on-the-fly. Unlike
|
||||||
|
|
||||||
Writing a generator looks like writing a function, but instead of `return`, you use `yield`. The object which is yielded is what you'll get when you do a loop over the generator. This one lets you count to a billion:
|
Writing a generator looks like writing a function, but instead of `return`, you use `yield`. The object which is yielded is what you'll get when you do a loop over the generator. This one lets you count to a billion:
|
||||||
|
|
||||||
|
```Python
|
||||||
def billion():
|
def billion():
|
||||||
x = 0
|
x = 0
|
||||||
while x < 1000000000:
|
while x < 1000000000:
|
||||||
yield x
|
yield x
|
||||||
x += 1
|
x += 1
|
||||||
|
```
|
||||||
|
|
||||||
|
I purposely used a `while` loop instead of `for x in range()` to show the extra work.
|
||||||
|
|
||||||
Note that, unlike a `return` statement, you can include more code after a `yield` statement. Also notice that generators keep track of their internal state -- the `billion` generator has an `x` that it increments every time you loop over it. You can imagine the code pausing after the `yield` line, and resuming when you come back for the next cycle. Try this with some extra print statements to help visualize.
|
Note that, unlike a `return` statement, you can include more code after a `yield` statement. Also notice that generators keep track of their internal state -- the `billion` generator has an `x` that it increments every time you loop over it. You can imagine the code pausing after the `yield` line, and resuming when you come back for the next cycle. Try this with some extra print statements to help visualize.
|
||||||
|
|
||||||
Generators can also take arguments. Here's a generator that counts to a custom amount:
|
Generators can also take arguments. Here's a generator that counts to a custom amount:
|
||||||
|
|
||||||
|
```Python
|
||||||
def count_to(y):
|
def count_to(y):
|
||||||
x = 0
|
x = 0
|
||||||
while x < y:
|
while x < y:
|
||||||
yield x
|
yield x
|
||||||
x += 1
|
x += 1
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,23 +40,31 @@ Generators can also take arguments. Here's a generator that counts to a custom a
|
||||||
|
|
||||||
Although generators look like functions when you're writing them, they feel more like objects when using them. Remember that generators don't calculate their contents until they are actually used in a loop, so simply doing:
|
Although generators look like functions when you're writing them, they feel more like objects when using them. Remember that generators don't calculate their contents until they are actually used in a loop, so simply doing:
|
||||||
|
|
||||||
|
```Python
|
||||||
numbers = count_to(100)
|
numbers = count_to(100)
|
||||||
|
```
|
||||||
|
|
||||||
does **not** create a list of 100 numbers. It creates a new instance of the generator that is ready to be iterated over, like this:
|
does **not** create a list of 100 numbers. It creates a new instance of the generator that is ready to be iterated over, like this:
|
||||||
|
|
||||||
|
```Python
|
||||||
numbers = count_to(100)
|
numbers = count_to(100)
|
||||||
for number in numbers:
|
for number in numbers:
|
||||||
print(number)
|
print(number)
|
||||||
|
```
|
||||||
|
|
||||||
or this:
|
or this:
|
||||||
|
|
||||||
|
```Python
|
||||||
for number in count_to(100):
|
for number in count_to(100):
|
||||||
print(number)
|
print(number)
|
||||||
|
```
|
||||||
|
|
||||||
This should remind you of:
|
This should remind you of:
|
||||||
|
|
||||||
|
```Python
|
||||||
for number in range(100):
|
for number in range(100):
|
||||||
print(number)
|
print(number)
|
||||||
|
```
|
||||||
|
|
||||||
because the `range` class behaves a lot like a generator ([but not exactly](http://stackoverflow.com/a/13092317)).
|
because the `range` class behaves a lot like a generator ([but not exactly](http://stackoverflow.com/a/13092317)).
|
||||||
|
|
||||||
|
@ -66,10 +79,11 @@ To get a single item from a generator without looping, use `next(generator)`.
|
||||||
|
|
||||||
# StopIteration
|
# StopIteration
|
||||||
|
|
||||||
Generators pause and resume a lot, but they still flow like normal functions. As long as there is no endless `while` loop inside, they'll come to an end at some point. When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()`. Luckily, `for` loops will detect this automatically and stop themselves.
|
Generators pause and resume a lot, but they still flow like normal functions. As long as there is no endless `while` loop inside, they'll come to an end at some point. When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()` on it. Luckily, `for` loops will detect this automatically and stop themselves.
|
||||||
|
|
||||||
Earlier, I said that generators use `yield` instead of `return`, but in fact you can include a return statement. If it is encountered, it will raise a `StopIteration`, and the generator will not resume even if there is more code.
|
Earlier, I said that generators use `yield` instead of `return`, but in fact you can include a return statement. If it is encountered, it will raise a `StopIteration`, and the generator will not resume even if there is more code.
|
||||||
|
|
||||||
|
```Python
|
||||||
>>> def generator():
|
>>> def generator():
|
||||||
... yield 1
|
... yield 1
|
||||||
... return 2
|
... return 2
|
||||||
|
@ -88,6 +102,7 @@ Earlier, I said that generators use `yield` instead of `return`, but in fact you
|
||||||
File "<stdin>", line 1, in <module>
|
File "<stdin>", line 1, in <module>
|
||||||
StopIteration
|
StopIteration
|
||||||
>>>
|
>>>
|
||||||
|
```
|
||||||
|
|
||||||
In general, I don't like to use `return` in generators. I prefer to `break` from their internal loops and conclude naturally.
|
In general, I don't like to use `return` in generators. I prefer to `break` from their internal loops and conclude naturally.
|
||||||
|
|
||||||
|
@ -106,6 +121,7 @@ In general, I don't like to use `return` in generators. I prefer to `break` from
|
||||||
|
|
||||||
Suppose you're getting data from an imaginary website which sends you items in groups of 100. You want to let the user loop over every item without having to worry about the groups themselves.
|
Suppose you're getting data from an imaginary website which sends you items in groups of 100. You want to let the user loop over every item without having to worry about the groups themselves.
|
||||||
|
|
||||||
|
```Python
|
||||||
def item_generator(url):
|
def item_generator(url):
|
||||||
page = 0
|
page = 0
|
||||||
while True:
|
while True:
|
||||||
|
@ -130,13 +146,15 @@ Suppose you're getting data from an imaginary website which sends you items in g
|
||||||
comments = item_generator('http://website.com/user/voussoir/comments')
|
comments = item_generator('http://website.com/user/voussoir/comments')
|
||||||
for comment in comments:
|
for comment in comments:
|
||||||
print(comment.body)
|
print(comment.body)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#### Sqlite3 fetch generator
|
#### Sqlite3 fetch generator
|
||||||
|
|
||||||
This is one that I almost always include in my program when I'm doing lots of sqlite work. Sqlite cursors don't allow you to simply do a for-loop over the results of a SELECT, so this generator is very handy:
|
This is one that I almost always include when I'm doing lots of sqlite work. Sqlite cursors don't allow you to simply do a for-loop over the results of a SELECT, and doing `fetchall` on a large query can be very memory-heavy, so this generator is very handy:
|
||||||
|
|
||||||
|
```Python
|
||||||
def fetch_generator(cur):
|
def fetch_generator(cur):
|
||||||
while True:
|
while True:
|
||||||
item = cur.fetchone()
|
item = cur.fetchone()
|
||||||
|
@ -147,11 +165,12 @@ This is one that I almost always include in my program when I'm doing lots of sq
|
||||||
cur.execute('SELECT * FROM table')
|
cur.execute('SELECT * FROM table')
|
||||||
for item in fetch_generator(cur):
|
for item in fetch_generator(cur):
|
||||||
print(item)
|
print(item)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Further reading
|
# Further reading
|
||||||
|
|
||||||
[Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) -- If you like recursive functions, how about recursive generators? The only time I've ever used this is to [iterate over a tree's nodes](https://github.com/voussoir/reddit/blob/2069c3bd731cc8f90401ee49a9fc4d0dbf436cfc/Prawtimestamps/timesearch.py#L756-L761).
|
[Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) — If you like recursive functions, how about recursive generators?
|
||||||
|
|
||||||
[Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) -- This quickly dives out of "quick tips" territory.
|
[Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) — This quickly dives out of "quick tips" territory.
|
||||||
|
|
|
@ -5,6 +5,7 @@ When using Tkinter alone, you can only embed .gif images in your interface. PIL
|
||||||
|
|
||||||
Requires `pip install pillow`
|
Requires `pip install pillow`
|
||||||
|
|
||||||
|
```Python
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
import PIL.ImageTk
|
import PIL.ImageTk
|
||||||
import tkinter
|
import tkinter
|
||||||
|
@ -15,6 +16,7 @@ Requires `pip install pillow`
|
||||||
label = tkinter.Label(t, image=image_tk)
|
label = tkinter.Label(t, image=image_tk)
|
||||||
label.image_reference = image_tk
|
label.image_reference = image_tk
|
||||||
label.pack()
|
label.pack()
|
||||||
|
```
|
||||||
|
|
||||||
You must store the `image_tk` somewhere, such as an attribute of the label it belongs to. Otherwise, it gets [prematurely garbage-collected](http://effbot.org/pyfaq/why-do-my-tkinter-images-not-appear.htm).
|
You must store the `image_tk` somewhere, such as an attribute of the label it belongs to. Otherwise, it gets [prematurely garbage-collected](http://effbot.org/pyfaq/why-do-my-tkinter-images-not-appear.htm).
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
|
import shutil
|
||||||
import string
|
import string
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
@ -12,7 +13,7 @@ import tkinter
|
||||||
# 0, 90, 180, 270
|
# 0, 90, 180, 270
|
||||||
# ░▒▓
|
# ░▒▓
|
||||||
|
|
||||||
SCREEN_WIDTH = 114
|
SCREEN_WIDTH = shutil.get_terminal_size()[1] - 6
|
||||||
|
|
||||||
DEFAULT_LINE = {
|
DEFAULT_LINE = {
|
||||||
'character': '#',
|
'character': '#',
|
||||||
|
@ -27,7 +28,7 @@ DEFAULT_LINE = {
|
||||||
variables = {
|
variables = {
|
||||||
'clock': 0,
|
'clock': 0,
|
||||||
'frames':[],
|
'frames':[],
|
||||||
'delay': 0.01,
|
'delay': 0.02,
|
||||||
'lines':[
|
'lines':[
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
8
Templates/unittester.py
Normal file
8
Templates/unittester.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
class Tests(unittest.TestCase):
|
||||||
|
def test_something(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
15
ThreadQueue/test.py
Normal file
15
ThreadQueue/test.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
import time
|
||||||
|
import threadqueue
|
||||||
|
import random
|
||||||
|
import threading
|
||||||
|
t = threadqueue.ThreadQueue(4, print)
|
||||||
|
main_thr = threading.current_thread().ident
|
||||||
|
def f():
|
||||||
|
mysleep = random.randint(1, 10)
|
||||||
|
time.sleep(mysleep)
|
||||||
|
t.behalf(main_thr, lambda: print(threading.current_thread().ident==main_thr))
|
||||||
|
raise ValueError()
|
||||||
|
return mysleep
|
||||||
|
|
||||||
|
[t.add(f) for x in range(20)]
|
||||||
|
list(t.run())
|
61
ThreadQueue/threadqueue.py
Normal file
61
ThreadQueue/threadqueue.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
class ThreadQueue:
|
||||||
|
def __init__(self, thread_count, post_processor=None):
|
||||||
|
self.thread_count = thread_count
|
||||||
|
self.post_processor = post_processor
|
||||||
|
self._returns = []
|
||||||
|
self._threads = []
|
||||||
|
self._lambdas = []
|
||||||
|
self._behalfs = {}
|
||||||
|
self.hold_open = False
|
||||||
|
|
||||||
|
def _post_process(self, returned_value):
|
||||||
|
if self.post_processor is not None:
|
||||||
|
self.post_processor(returned_value)
|
||||||
|
self._returns.append(returned_value)
|
||||||
|
|
||||||
|
def add(self, function, *function_args, **function_kwargs):
|
||||||
|
lam = lambda: self._post_process(function(*function_args, **function_kwargs))
|
||||||
|
self._lambdas.append(lam)
|
||||||
|
|
||||||
|
def behalf(self, thread_id, f, *args, **kwargs):
|
||||||
|
self._behalfs.setdefault(thread_id, [])
|
||||||
|
event = threading.Event()
|
||||||
|
call = {'f': f, 'args': args, 'kwargs': kwargs, 'event': event, 'return': None}
|
||||||
|
self._behalfs[thread_id].append(call)
|
||||||
|
event.wait()
|
||||||
|
return call['return']
|
||||||
|
|
||||||
|
def run_behalfs(self):
|
||||||
|
calls = self._behalfs.get(threading.current_thread().ident, [])
|
||||||
|
while calls:
|
||||||
|
call = calls.pop(0)
|
||||||
|
ret = call['f'](*call['args'], **call['kwargs'])
|
||||||
|
call['return'] = ret
|
||||||
|
call['event'].set()
|
||||||
|
|
||||||
|
def run_queue(self):
|
||||||
|
#print('Managing threads')
|
||||||
|
self._threads = [thread for thread in self._threads if thread.is_alive()]
|
||||||
|
threads_needed = self.thread_count - len(self._threads)
|
||||||
|
if threads_needed > 0:
|
||||||
|
for x in range(threads_needed):
|
||||||
|
if len(self._lambdas) == 0:
|
||||||
|
break
|
||||||
|
lam = self._lambdas.pop(0)
|
||||||
|
thread = threading.Thread(target=lam)
|
||||||
|
#thread.daemon = True
|
||||||
|
thread.start()
|
||||||
|
self._threads.append(thread)
|
||||||
|
|
||||||
|
def run(self, hold_open=False):
|
||||||
|
self.hold_open = hold_open
|
||||||
|
while self.hold_open or self._threads or self._lambdas:
|
||||||
|
self.run_queue()
|
||||||
|
while self._returns:
|
||||||
|
yield self._returns.pop(0)
|
||||||
|
self.run_behalfs()
|
||||||
|
|
||||||
|
#time.sleep(0.5)
|
|
@ -38,7 +38,7 @@ def threaded_dl(urls, thread_count, filename_format=None):
|
||||||
if filename_format != os.devnull:
|
if filename_format != os.devnull:
|
||||||
if '{' not in filename_format and len(urls) > 1:
|
if '{' not in filename_format and len(urls) > 1:
|
||||||
filename_format += '_{index}'
|
filename_format += '_{index}'
|
||||||
if '{extension}' not in filename_format:
|
if '{extension}' not in filename_format and '{basename}' not in filename_format:
|
||||||
filename_format += '{extension}'
|
filename_format += '{extension}'
|
||||||
now = int(time.time())
|
now = int(time.time())
|
||||||
for (index, url) in enumerate(urls):
|
for (index, url) in enumerate(urls):
|
||||||
|
|
|
@ -15,8 +15,18 @@ from voussoirkit import safeprint
|
||||||
from voussoirkit import spinal
|
from voussoirkit import spinal
|
||||||
|
|
||||||
|
|
||||||
def fileswith(filepattern, terms, do_regex=False, do_glob=False):
|
def fileswith(
|
||||||
search_terms = [term.lower() for term in terms]
|
filepattern,
|
||||||
|
terms,
|
||||||
|
case_sensitive=False,
|
||||||
|
do_regex=False,
|
||||||
|
do_glob=False,
|
||||||
|
inverse=False,
|
||||||
|
match_any=False,
|
||||||
|
):
|
||||||
|
|
||||||
|
if not case_sensitive:
|
||||||
|
terms = [term.lower() for term in terms]
|
||||||
|
|
||||||
def term_matches(text, term):
|
def term_matches(text, term):
|
||||||
return (
|
return (
|
||||||
|
@ -25,6 +35,8 @@ def fileswith(filepattern, terms, do_regex=False, do_glob=False):
|
||||||
(do_glob and fnmatch.fnmatch(text, term))
|
(do_glob and fnmatch.fnmatch(text, term))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
anyall = any if match_any else all
|
||||||
|
|
||||||
|
|
||||||
generator = spinal.walk_generator(depth_first=False, yield_directories=True)
|
generator = spinal.walk_generator(depth_first=False, yield_directories=True)
|
||||||
for filepath in generator:
|
for filepath in generator:
|
||||||
|
@ -35,8 +47,12 @@ def fileswith(filepattern, terms, do_regex=False, do_glob=False):
|
||||||
try:
|
try:
|
||||||
with handle:
|
with handle:
|
||||||
for (index, line) in enumerate(handle):
|
for (index, line) in enumerate(handle):
|
||||||
if all(term_matches(line, term) for term in terms):
|
if not case_sensitive:
|
||||||
line = '%d | %s' % (index, line.strip())
|
compare_line = line.lower()
|
||||||
|
else:
|
||||||
|
compare_line = line
|
||||||
|
if inverse ^ anyall(term_matches(compare_line, term) for term in terms):
|
||||||
|
line = '%d | %s' % (index+1, line.strip())
|
||||||
matches.append(line)
|
matches.append(line)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
@ -50,8 +66,11 @@ def fileswith_argparse(args):
|
||||||
return fileswith(
|
return fileswith(
|
||||||
filepattern=args.filepattern,
|
filepattern=args.filepattern,
|
||||||
terms=args.search_terms,
|
terms=args.search_terms,
|
||||||
|
case_sensitive=args.case_sensitive,
|
||||||
do_glob=args.do_glob,
|
do_glob=args.do_glob,
|
||||||
do_regex=args.do_regex,
|
do_regex=args.do_regex,
|
||||||
|
inverse=args.inverse,
|
||||||
|
match_any=args.match_any,
|
||||||
)
|
)
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
|
@ -59,8 +78,11 @@ def main(argv):
|
||||||
|
|
||||||
parser.add_argument('filepattern')
|
parser.add_argument('filepattern')
|
||||||
parser.add_argument('search_terms', nargs='+', default=None)
|
parser.add_argument('search_terms', nargs='+', default=None)
|
||||||
|
parser.add_argument('--any', dest='match_any', action='store_true')
|
||||||
|
parser.add_argument('--case', dest='case_sensitive', action='store_true')
|
||||||
parser.add_argument('--regex', dest='do_regex', action='store_true')
|
parser.add_argument('--regex', dest='do_regex', action='store_true')
|
||||||
parser.add_argument('--glob', dest='do_glob', action='store_true')
|
parser.add_argument('--glob', dest='do_glob', action='store_true')
|
||||||
|
parser.add_argument('--inverse', dest='inverse', action='store_true')
|
||||||
parser.set_defaults(func=fileswith_argparse)
|
parser.set_defaults(func=fileswith_argparse)
|
||||||
|
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
|
|
|
@ -13,6 +13,7 @@ def search(
|
||||||
case_sensitive=False,
|
case_sensitive=False,
|
||||||
do_regex=False,
|
do_regex=False,
|
||||||
do_glob=False,
|
do_glob=False,
|
||||||
|
inverse=False,
|
||||||
local_only=False,
|
local_only=False,
|
||||||
match_any=False,
|
match_any=False,
|
||||||
):
|
):
|
||||||
|
@ -23,10 +24,8 @@ def search(
|
||||||
(do_glob and fnmatch.fnmatch(text, term))
|
(do_glob and fnmatch.fnmatch(text, term))
|
||||||
)
|
)
|
||||||
|
|
||||||
if case_sensitive:
|
if not case_sensitive:
|
||||||
search_terms = terms
|
terms = [term.lower() for term in terms]
|
||||||
else:
|
|
||||||
search_terms = [term.lower() for term in terms]
|
|
||||||
|
|
||||||
anyall = any if match_any else all
|
anyall = any if match_any else all
|
||||||
|
|
||||||
|
@ -40,7 +39,8 @@ def search(
|
||||||
if not case_sensitive:
|
if not case_sensitive:
|
||||||
basename = basename.lower()
|
basename = basename.lower()
|
||||||
|
|
||||||
if anyall(term_matches(basename, term) for term in search_terms):
|
matches = anyall(term_matches(basename, term) for term in terms)
|
||||||
|
if matches ^ inverse:
|
||||||
safeprint.safeprint(filepath.absolute_path)
|
safeprint.safeprint(filepath.absolute_path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ def search_argparse(args):
|
||||||
case_sensitive=args.case_sensitive,
|
case_sensitive=args.case_sensitive,
|
||||||
do_glob=args.do_glob,
|
do_glob=args.do_glob,
|
||||||
do_regex=args.do_regex,
|
do_regex=args.do_regex,
|
||||||
|
inverse=args.inverse,
|
||||||
local_only=args.local_only,
|
local_only=args.local_only,
|
||||||
match_any=args.match_any,
|
match_any=args.match_any,
|
||||||
)
|
)
|
||||||
|
@ -59,10 +60,11 @@ def main(argv):
|
||||||
|
|
||||||
parser.add_argument('search_terms', nargs='+', default=None)
|
parser.add_argument('search_terms', nargs='+', default=None)
|
||||||
parser.add_argument('--any', dest='match_any', action='store_true')
|
parser.add_argument('--any', dest='match_any', action='store_true')
|
||||||
|
parser.add_argument('--case', dest='case_sensitive', action='store_true')
|
||||||
parser.add_argument('--regex', dest='do_regex', action='store_true')
|
parser.add_argument('--regex', dest='do_regex', action='store_true')
|
||||||
parser.add_argument('--glob', dest='do_glob', action='store_true')
|
parser.add_argument('--glob', dest='do_glob', action='store_true')
|
||||||
parser.add_argument('--case', dest='case_sensitive', action='store_true')
|
|
||||||
parser.add_argument('--local', dest='local_only', action='store_true')
|
parser.add_argument('--local', dest='local_only', action='store_true')
|
||||||
|
parser.add_argument('--inverse', dest='inverse', action='store_true')
|
||||||
parser.set_defaults(func=search_argparse)
|
parser.set_defaults(func=search_argparse)
|
||||||
|
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
|
|
Loading…
Reference in a new issue