This commit is contained in:
Ethan Dalool 2017-02-18 17:06:55 -08:00
parent fa2c2bda76
commit 64de5c94bb
14 changed files with 351 additions and 149 deletions

View file

@ -133,6 +133,7 @@ import requests
import shutil import shutil
import sqlite3 import sqlite3
import sys import sys
import threading
import time import time
## import tkinter ## import tkinter
import urllib.parse import urllib.parse
@ -143,6 +144,7 @@ from voussoirkit import downloady
from voussoirkit import fusker from voussoirkit import fusker
from voussoirkit import treeclass from voussoirkit import treeclass
from voussoirkit import pathtree from voussoirkit import pathtree
sys.path.append('C:\\git\\else\\threadqueue'); import threadqueue
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
FILENAME_BADCHARS = '/\\:*?"<>|' FILENAME_BADCHARS = '/\\:*?"<>|'
@ -184,6 +186,7 @@ SKIPPABLE_FILETYPES = [
'.pdf', '.pdf',
'.png', '.png',
'.rar', '.rar',
'.sfv',
'.srt', '.srt',
'.tar', '.tar',
'.ttf', '.ttf',
@ -237,7 +240,7 @@ class Walker:
''' '''
This class manages the extraction and saving of URLs, given a starting root url. This class manages the extraction and saving of URLs, given a starting root url.
''' '''
def __init__(self, root_url, databasename=None, fullscan=False): def __init__(self, root_url, databasename=None, fullscan=False, threads=1):
if not root_url.endswith('/'): if not root_url.endswith('/'):
root_url += '/' root_url += '/'
if '://' not in root_url.split('.')[0]: if '://' not in root_url.split('.')[0]:
@ -255,6 +258,8 @@ class Walker:
self.cur = self.sql.cursor() self.cur = self.sql.cursor()
db_init(self.sql, self.cur) db_init(self.sql, self.cur)
self.thread_queue = threadqueue.ThreadQueue(threads)
self._main_thread = threading.current_thread().ident
self.fullscan = bool(fullscan) self.fullscan = bool(fullscan)
self.queue = collections.deque() self.queue = collections.deque()
self.seen_directories = set() self.seen_directories = set()
@ -326,10 +331,15 @@ class Walker:
skippable = any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES) skippable = any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES)
if skippable: if skippable:
write('Skipping "%s" due to extension.' % url) write('Skipping "%s" due to extension.' % url)
self.smart_insert(url=url, commit=False) #self.smart_insert(url=url, commit=False)
#return {'url': url, 'commit': False}
self.thread_queue.behalf(self._main_thread, self.smart_insert, url=url, commit=False)
return return
self.cur.execute('SELECT * FROM urls WHERE url == ?', [url]) skippable = lambda: self.cur.execute('SELECT * FROM urls WHERE url == ?', [url]).fetchone()
skippable = self.cur.fetchone() is not None skippable = self.thread_queue.behalf(self._main_thread, skippable)
#print(skippable)
skippable = skippable is not None
#skippable = self.cur.fetchone() is not None
if skippable: if skippable:
write('Skipping "%s" since we already have it.' % url) write('Skipping "%s" since we already have it.' % url)
return return
@ -359,28 +369,34 @@ class Walker:
if href in self.seen_directories: if href in self.seen_directories:
continue continue
else: else:
self.queue.append(href) #self.queue.append(href)
self.thread_queue.add(self.process_url, href)
added += 1 added += 1
write('Queued %d urls' % added) write('Queued %d urls' % added)
else: else:
# This is not an index page, so save it. # This is not an index page, so save it.
self.smart_insert(head=head, commit=False) #self.smart_insert(head=head, commit=False)
self.thread_queue.behalf(self._main_thread, self.smart_insert, head=head, commit=False)
#return {'head': head, 'commit': False}
def walk(self, url=None): def walk(self, url=None):
''' '''
Given a starting URL (defaults to self.root_url), continually extract Given a starting URL (defaults to self.root_url), continually extract
links from the page and repeat. links from the page and repeat.
''' '''
self.queue.appendleft(url) #self.queue.appendleft(url)
try: self.thread_queue.add(self.process_url, url)
while len(self.queue) > 0: for return_value in self.thread_queue.run(hold_open=False):
url = self.queue.popleft() pass
self.process_url(url) #try:
line = '{:,} Remaining'.format(len(self.queue)) # while len(self.queue) > 0:
write(line) # url = self.queue.popleft()
except: # self.process_url(url)
self.sql.commit() # line = '{:,} Remaining'.format(len(self.queue))
raise # write(line)
#except:
# self.sql.commit()
# raise
self.sql.commit() self.sql.commit()
## ## ## ##
## WALKER ########################################################################################## ## WALKER ##########################################################################################
@ -584,7 +600,7 @@ def write(line, file_handle=None, **kwargs):
## COMMANDLINE FUNCTIONS ########################################################################### ## COMMANDLINE FUNCTIONS ###########################################################################
## ## ## ##
def digest(root_url, databasename=None, fullscan=False): def digest(root_url, databasename=None, fullscan=False, threads=1):
if root_url in ('!clipboard', '!c'): if root_url in ('!clipboard', '!c'):
root_url = get_clipboard() root_url = get_clipboard()
write('From clipboard: %s' % root_url) write('From clipboard: %s' % root_url)
@ -592,6 +608,7 @@ def digest(root_url, databasename=None, fullscan=False):
databasename=databasename, databasename=databasename,
fullscan=fullscan, fullscan=fullscan,
root_url=root_url, root_url=root_url,
threads=threads,
) )
walker.walk() walker.walk()
@ -600,6 +617,7 @@ def digest_argparse(args):
databasename=args.databasename, databasename=args.databasename,
fullscan=args.fullscan, fullscan=args.fullscan,
root_url=args.root_url, root_url=args.root_url,
threads=int(args.threads),
) )
def download( def download(
@ -818,8 +836,7 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
if threads is None: if threads is None:
threads = 1 threads = 1
threadpool = concurrent.futures.ThreadPoolExecutor(threads) thread_queue = threadqueue.ThreadQueue(threads)
thread_promises = []
try: try:
for fetch in items: for fetch in items:
@ -827,8 +844,7 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
if fullscan or new_only: if fullscan or new_only:
url = fetch[SQL_URL] url = fetch[SQL_URL]
promise = threadpool.submit(do_head, url, raise_for_status=False) thread_queue.add(do_head, url, raise_for_status=False)
thread_promises.append(promise)
elif size is None: elif size is None:
# Unmeasured and no intention to measure. # Unmeasured and no intention to measure.
@ -837,16 +853,15 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
else: else:
totalsize += size totalsize += size
for head in promise_results(thread_promises): for head in thread_queue.run():
fetch = smart_insert(sql, cur, head=head, commit=True) fetch = smart_insert(sql, cur, head=head, commit=False)
size = fetch[SQL_CONTENT_LENGTH] size = fetch[SQL_CONTENT_LENGTH]
if size is None: if size is None:
write('"%s" is not revealing Content-Length' % url) write('"%s" is not revealing Content-Length' % url)
size = 0 size = 0
totalsize += size totalsize += size
except (Exception, KeyboardInterrupt): except (Exception, KeyboardInterrupt):
for promise in thread_promises: sql.commit()
promise.cancel()
raise raise
sql.commit() sql.commit()
@ -938,6 +953,7 @@ def main(argv):
p_digest.add_argument('root_url') p_digest.add_argument('root_url')
p_digest.add_argument('-db', '--database', dest='databasename', default=None) p_digest.add_argument('-db', '--database', dest='databasename', default=None)
p_digest.add_argument('-f', '--fullscan', dest='fullscan', action='store_true') p_digest.add_argument('-f', '--fullscan', dest='fullscan', action='store_true')
p_digest.add_argument('-t', '--threads', dest='threads', default=1)
p_digest.set_defaults(func=digest_argparse) p_digest.set_defaults(func=digest_argparse)
p_download = subparsers.add_parser('download') p_download = subparsers.add_parser('download')

View file

@ -45,6 +45,10 @@ class Path:
def exists(self): def exists(self):
return os.path.exists(self.absolute_path) return os.path.exists(self.absolute_path)
@property
def extension(self):
return os.path.splitext(self.absolute_path)[1].lstrip('.')
@property @property
def is_dir(self): def is_dir(self):
return os.path.isdir(self.absolute_path) return os.path.isdir(self.absolute_path)
@ -62,6 +66,11 @@ class Path:
raise TypeError('subpath must be a string') raise TypeError('subpath must be a string')
return Path(os.path.join(self.absolute_path, subpath)) return Path(os.path.join(self.absolute_path, subpath))
def listdir(self):
children = os.listdir(self.absolute_path)
children = [self.with_child(child) for child in children]
return children
@property @property
def normcase(self): def normcase(self):
return os.path.normcase(self.absolute_path) return os.path.normcase(self.absolute_path)
@ -90,6 +99,15 @@ class Path:
backsteps = os.sep.join('..' for x in range(backsteps)) backsteps = os.sep.join('..' for x in range(backsteps))
return self.absolute_path.replace(common.absolute_path, backsteps) return self.absolute_path.replace(common.absolute_path, backsteps)
def replace_extension(self, extension):
extension = extension.rsplit('.', 1)[-1]
base = os.path.splitext(self.absolute_path)[0]
if extension == '':
return Path(base)
return Path(base + '.' + extension)
@property @property
def size(self): def size(self):
if self.is_file: if self.is_file:
@ -105,6 +123,7 @@ class Path:
return self.join(os.path.basename(basename)) return self.join(os.path.basename(basename))
def common_path(paths, fallback): def common_path(paths, fallback):
''' '''
Given a list of file paths, determine the deepest path which all Given a list of file paths, determine the deepest path which all
@ -171,9 +190,10 @@ def get_path_casing(path):
except IndexError: except IndexError:
return input_path.absolute_path return input_path.absolute_path
imaginary_portion = input_path.normcase imaginary_portion = input_path.absolute_path
real_portion = os.path.normcase(cased) imaginary_portion = imaginary_portion[len(cased):]
imaginary_portion = imaginary_portion.replace(real_portion, '') #real_portion = os.path.normcase(cased)
#imaginary_portion = imaginary_portion.replace(real_portion, '')
imaginary_portion = imaginary_portion.lstrip(os.sep) imaginary_portion = imaginary_portion.lstrip(os.sep)
cased = os.path.join(cased, imaginary_portion) cased = os.path.join(cased, imaginary_portion)
cased = cased.rstrip(os.sep) cased = cased.rstrip(os.sep)

View file

@ -0,0 +1,10 @@
import pathclass
import unittest
class Tests(unittest.TestCase):
def test_something(self):
self.assertEqual('C:\\Users', pathclass.get_path_casing('C:\\users'))
self.assertEqual('C:\\Users\\Nonexist', pathclass.get_path_casing('C:\\users\\Nonexist'))
if __name__ == '__main__':
unittest.main()

View file

@ -1,51 +1,77 @@
Continue Continue
======== ========
Discards the current iteration, and restarts the loop using the next item. Skips the rest of the current iteration, and starts the next one.
>>> for x in range(6): ```Python
... if x == 3: >>> for x in range(6):
... continue ... if x == 3:
... print(x) ... continue
... ... print(x)
0 ...
1 0
2 1
4 2
5 4
5
```
```Python
while len(directory_queue) > 0:
directory = directory_queue.popleft()
try:
filenames = os.listdir(directory)
except PermissionError:
continue
for filename in filenames:
...
```
####Continue is great for cleaning code with lots of conditions: ####Continue is great for cleaning code with lots of conditions:
#####Without continue: #####Without continue:
Nested:
for submission in submissions: ```Python
if submission.author is not None: for submission in submissions:
if submission.over_18 is False: if submission.author is not None:
if 'suggestion' in submission.title.lower(): if not submission.over_18:
print('Found:', submission.id) if 'suggestion' in submission.title.lower():
print('Found:', submission.id)
```
&nbsp; or all grouped up:
for submission in submissions: ```Python
if submission.author is not None and submission.over_18 is False and 'suggestion' in submission.title.lower(): for submission in submissions:
print('Found:', submission.id) if (
submission.author is not None
and not submission.over_18
and 'suggestion' in submission.title.lower()
):
print('Found:', submission.id)
```
#####With continue: #####With continue:
for submission in submissions: ```Python
if submission.author is None: for submission in submissions:
continue if submission.author is None:
if submission.over_18: continue
continue
if 'suggestion' not in submission.title.lower():
continue
print('Found:', submission.id) if submission.over_18:
continue
if 'suggestion' not in submission.title.lower():
continue
print('Found:', submission.id)
```
The mentality changes from "keep only the items with the right properties" to "discard the items with the wrong properties". Notice that all of the checks are the opposite of the originals. The mentality changes from "keep only the items with the right properties" to "discard the items with the wrong properties", and the result is the same.

View file

@ -12,22 +12,27 @@ Generators are a type of iterable that create their contents on-the-fly. Unlike
Writing a generator looks like writing a function, but instead of `return`, you use `yield`. The object which is yielded is what you'll get when you do a loop over the generator. This one lets you count to a billion: Writing a generator looks like writing a function, but instead of `return`, you use `yield`. The object which is yielded is what you'll get when you do a loop over the generator. This one lets you count to a billion:
def billion(): ```Python
x = 0 def billion():
while x < 1000000000: x = 0
yield x while x < 1000000000:
x += 1 yield x
x += 1
```
I purposely used a `while` loop instead of `for x in range()` to show the extra work.
Note that, unlike a `return` statement, you can include more code after a `yield` statement. Also notice that generators keep track of their internal state -- the `billion` generator has an `x` that it increments every time you loop over it. You can imagine the code pausing after the `yield` line, and resuming when you come back for the next cycle. Try this with some extra print statements to help visualize. Note that, unlike a `return` statement, you can include more code after a `yield` statement. Also notice that generators keep track of their internal state -- the `billion` generator has an `x` that it increments every time you loop over it. You can imagine the code pausing after the `yield` line, and resuming when you come back for the next cycle. Try this with some extra print statements to help visualize.
Generators can also take arguments. Here's a generator that counts to a custom amount: Generators can also take arguments. Here's a generator that counts to a custom amount:
def count_to(y): ```Python
x = 0 def count_to(y):
while x < y: x = 0
yield x while x < y:
x += 1 yield x
x += 1
```
&nbsp; &nbsp;
@ -35,23 +40,31 @@ Generators can also take arguments. Here's a generator that counts to a custom a
Although generators look like functions when you're writing them, they feel more like objects when using them. Remember that generators don't calculate their contents until they are actually used in a loop, so simply doing: Although generators look like functions when you're writing them, they feel more like objects when using them. Remember that generators don't calculate their contents until they are actually used in a loop, so simply doing:
numbers = count_to(100) ```Python
numbers = count_to(100)
```
does **not** create a list of 100 numbers. It creates a new instance of the generator that is ready to be iterated over, like this: does **not** create a list of 100 numbers. It creates a new instance of the generator that is ready to be iterated over, like this:
numbers = count_to(100) ```Python
for number in numbers: numbers = count_to(100)
print(number) for number in numbers:
print(number)
```
or this: or this:
for number in count_to(100): ```Python
print(number) for number in count_to(100):
print(number)
```
This should remind you of: This should remind you of:
for number in range(100): ```Python
print(number) for number in range(100):
print(number)
```
because the `range` class behaves a lot like a generator ([but not exactly](http://stackoverflow.com/a/13092317)). because the `range` class behaves a lot like a generator ([but not exactly](http://stackoverflow.com/a/13092317)).
@ -66,28 +79,30 @@ To get a single item from a generator without looping, use `next(generator)`.
# StopIteration # StopIteration
Generators pause and resume a lot, but they still flow like normal functions. As long as there is no endless `while` loop inside, they'll come to an end at some point. When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()`. Luckily, `for` loops will detect this automatically and stop themselves. Generators pause and resume a lot, but they still flow like normal functions. As long as there is no endless `while` loop inside, they'll come to an end at some point. When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()` on it. Luckily, `for` loops will detect this automatically and stop themselves.
Earlier, I said that generators use `yield` instead of `return`, but in fact you can include a return statement. If it is encountered, it will raise a `StopIteration`, and the generator will not resume even if there is more code. Earlier, I said that generators use `yield` instead of `return`, but in fact you can include a return statement. If it is encountered, it will raise a `StopIteration`, and the generator will not resume even if there is more code.
>>> def generator(): ```Python
... yield 1 >>> def generator():
... return 2 ... yield 1
... yield 3 ... return 2
... ... yield 3
>>> ...
>>> g = generator() >>>
>>> next(g) >>> g = generator()
1 >>> next(g)
>>> next(g) 1
Traceback (most recent call last): >>> next(g)
File "<stdin>", line 1, in <module> Traceback (most recent call last):
StopIteration: 2 File "<stdin>", line 1, in <module>
>>> next(g) StopIteration: 2
Traceback (most recent call last): >>> next(g)
File "<stdin>", line 1, in <module> Traceback (most recent call last):
StopIteration File "<stdin>", line 1, in <module>
>>> StopIteration
>>>
```
In general, I don't like to use `return` in generators. I prefer to `break` from their internal loops and conclude naturally. In general, I don't like to use `return` in generators. I prefer to `break` from their internal loops and conclude naturally.
@ -106,52 +121,56 @@ In general, I don't like to use `return` in generators. I prefer to `break` from
Suppose you're getting data from an imaginary website which sends you items in groups of 100. You want to let the user loop over every item without having to worry about the groups themselves. Suppose you're getting data from an imaginary website which sends you items in groups of 100. You want to let the user loop over every item without having to worry about the groups themselves.
def item_generator(url): ```Python
page = 0 def item_generator(url):
while True: page = 0
# get_items is a pretend method that collects the 100 items from that page while True:
batch = get_items(url, page=page) # get_items is a pretend method that collects the 100 items from that page
batch = get_items(url, page=page)
if len(batch) == 0: if len(batch) == 0:
# for this imaginary website, the batch will be empty when that page # for this imaginary website, the batch will be empty when that page
# doesn't have any items on it. # doesn't have any items on it.
break break
for item in batch: for item in batch:
# by yielding individual items, the user can just do a for loop # by yielding individual items, the user can just do a for loop
# over this generator and get them all one by one. # over this generator and get them all one by one.
yield item yield item
page += 1 page += 1
# When the while loop breaks, we reach the end of the function body, # When the while loop breaks, we reach the end of the function body,
# concluding the generator. # concluding the generator.
comments = item_generator('http://website.com/user/voussoir/comments') comments = item_generator('http://website.com/user/voussoir/comments')
for comment in comments: for comment in comments:
print(comment.body) print(comment.body)
```
&nbsp; &nbsp;
#### Sqlite3 fetch generator #### Sqlite3 fetch generator
This is one that I almost always include in my program when I'm doing lots of sqlite work. Sqlite cursors don't allow you to simply do a for-loop over the results of a SELECT, so this generator is very handy: This is one that I almost always include when I'm doing lots of sqlite work. Sqlite cursors don't allow you to simply do a for-loop over the results of a SELECT, and doing `fetchall` on a large query can be very memory-heavy, so this generator is very handy:
def fetch_generator(cur): ```Python
while True: def fetch_generator(cur):
item = cur.fetchone() while True:
if item is None: item = cur.fetchone()
break if item is None:
yield item break
yield item
cur.execute('SELECT * FROM table') cur.execute('SELECT * FROM table')
for item in fetch_generator(cur): for item in fetch_generator(cur):
print(item) print(item)
```
&nbsp; &nbsp;
# Further reading # Further reading
[Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) -- If you like recursive functions, how about recursive generators? The only time I've ever used this is to [iterate over a tree's nodes](https://github.com/voussoir/reddit/blob/2069c3bd731cc8f90401ee49a9fc4d0dbf436cfc/Prawtimestamps/timesearch.py#L756-L761). [Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) &mdash; If you like recursive functions, how about recursive generators?
[Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) -- This quickly dives out of "quick tips" territory. [Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) &mdash; This quickly dives out of "quick tips" territory.

View file

@ -5,16 +5,18 @@ When using Tkinter alone, you can only embed .gif images in your interface. PIL
Requires `pip install pillow` Requires `pip install pillow`
import PIL.Image ```Python
import PIL.ImageTk import PIL.Image
import tkinter import PIL.ImageTk
import tkinter
t = tkinter.Tk() t = tkinter.Tk()
image = PIL.Image.open('filename.png') image = PIL.Image.open('filename.png')
image_tk = PIL.ImageTk.PhotoImage(image) image_tk = PIL.ImageTk.PhotoImage(image)
label = tkinter.Label(t, image=image_tk) label = tkinter.Label(t, image=image_tk)
label.image_reference = image_tk label.image_reference = image_tk
label.pack() label.pack()
```
You must store the `image_tk` somewhere, such as an attribute of the label it belongs to. Otherwise, it gets [prematurely garbage-collected](http://effbot.org/pyfaq/why-do-my-tkinter-images-not-appear.htm). You must store the `image_tk` somewhere, such as an attribute of the label it belongs to. Otherwise, it gets [prematurely garbage-collected](http://effbot.org/pyfaq/why-do-my-tkinter-images-not-appear.htm).

View file

@ -1,5 +1,6 @@
import math import math
import random import random
import shutil
import string import string
import threading import threading
import time import time
@ -12,7 +13,7 @@ import tkinter
# 0, 90, 180, 270 # 0, 90, 180, 270
# ░▒▓ # ░▒▓
SCREEN_WIDTH = 114 SCREEN_WIDTH = shutil.get_terminal_size()[1] - 6
DEFAULT_LINE = { DEFAULT_LINE = {
'character': '#', 'character': '#',
@ -27,7 +28,7 @@ DEFAULT_LINE = {
variables = { variables = {
'clock': 0, 'clock': 0,
'frames':[], 'frames':[],
'delay': 0.01, 'delay': 0.02,
'lines':[ 'lines':[
] ]
} }

8
Templates/unittester.py Normal file
View file

@ -0,0 +1,8 @@
import unittest
class Tests(unittest.TestCase):
def test_something(self):
pass
if __name__ == '__main__':
unittest.main()

15
ThreadQueue/test.py Normal file
View file

@ -0,0 +1,15 @@
import time
import threadqueue
import random
import threading
t = threadqueue.ThreadQueue(4, print)
main_thr = threading.current_thread().ident
def f():
mysleep = random.randint(1, 10)
time.sleep(mysleep)
t.behalf(main_thr, lambda: print(threading.current_thread().ident==main_thr))
raise ValueError()
return mysleep
[t.add(f) for x in range(20)]
list(t.run())

View file

@ -0,0 +1,61 @@
import threading
import time
class ThreadQueue:
def __init__(self, thread_count, post_processor=None):
self.thread_count = thread_count
self.post_processor = post_processor
self._returns = []
self._threads = []
self._lambdas = []
self._behalfs = {}
self.hold_open = False
def _post_process(self, returned_value):
if self.post_processor is not None:
self.post_processor(returned_value)
self._returns.append(returned_value)
def add(self, function, *function_args, **function_kwargs):
lam = lambda: self._post_process(function(*function_args, **function_kwargs))
self._lambdas.append(lam)
def behalf(self, thread_id, f, *args, **kwargs):
self._behalfs.setdefault(thread_id, [])
event = threading.Event()
call = {'f': f, 'args': args, 'kwargs': kwargs, 'event': event, 'return': None}
self._behalfs[thread_id].append(call)
event.wait()
return call['return']
def run_behalfs(self):
calls = self._behalfs.get(threading.current_thread().ident, [])
while calls:
call = calls.pop(0)
ret = call['f'](*call['args'], **call['kwargs'])
call['return'] = ret
call['event'].set()
def run_queue(self):
#print('Managing threads')
self._threads = [thread for thread in self._threads if thread.is_alive()]
threads_needed = self.thread_count - len(self._threads)
if threads_needed > 0:
for x in range(threads_needed):
if len(self._lambdas) == 0:
break
lam = self._lambdas.pop(0)
thread = threading.Thread(target=lam)
#thread.daemon = True
thread.start()
self._threads.append(thread)
def run(self, hold_open=False):
self.hold_open = hold_open
while self.hold_open or self._threads or self._lambdas:
self.run_queue()
while self._returns:
yield self._returns.pop(0)
self.run_behalfs()
#time.sleep(0.5)

View file

@ -38,7 +38,7 @@ def threaded_dl(urls, thread_count, filename_format=None):
if filename_format != os.devnull: if filename_format != os.devnull:
if '{' not in filename_format and len(urls) > 1: if '{' not in filename_format and len(urls) > 1:
filename_format += '_{index}' filename_format += '_{index}'
if '{extension}' not in filename_format: if '{extension}' not in filename_format and '{basename}' not in filename_format:
filename_format += '{extension}' filename_format += '{extension}'
now = int(time.time()) now = int(time.time())
for (index, url) in enumerate(urls): for (index, url) in enumerate(urls):

View file

@ -15,8 +15,18 @@ from voussoirkit import safeprint
from voussoirkit import spinal from voussoirkit import spinal
def fileswith(filepattern, terms, do_regex=False, do_glob=False): def fileswith(
search_terms = [term.lower() for term in terms] filepattern,
terms,
case_sensitive=False,
do_regex=False,
do_glob=False,
inverse=False,
match_any=False,
):
if not case_sensitive:
terms = [term.lower() for term in terms]
def term_matches(text, term): def term_matches(text, term):
return ( return (
@ -25,6 +35,8 @@ def fileswith(filepattern, terms, do_regex=False, do_glob=False):
(do_glob and fnmatch.fnmatch(text, term)) (do_glob and fnmatch.fnmatch(text, term))
) )
anyall = any if match_any else all
generator = spinal.walk_generator(depth_first=False, yield_directories=True) generator = spinal.walk_generator(depth_first=False, yield_directories=True)
for filepath in generator: for filepath in generator:
@ -35,8 +47,12 @@ def fileswith(filepattern, terms, do_regex=False, do_glob=False):
try: try:
with handle: with handle:
for (index, line) in enumerate(handle): for (index, line) in enumerate(handle):
if all(term_matches(line, term) for term in terms): if not case_sensitive:
line = '%d | %s' % (index, line.strip()) compare_line = line.lower()
else:
compare_line = line
if inverse ^ anyall(term_matches(compare_line, term) for term in terms):
line = '%d | %s' % (index+1, line.strip())
matches.append(line) matches.append(line)
except: except:
pass pass
@ -50,8 +66,11 @@ def fileswith_argparse(args):
return fileswith( return fileswith(
filepattern=args.filepattern, filepattern=args.filepattern,
terms=args.search_terms, terms=args.search_terms,
case_sensitive=args.case_sensitive,
do_glob=args.do_glob, do_glob=args.do_glob,
do_regex=args.do_regex, do_regex=args.do_regex,
inverse=args.inverse,
match_any=args.match_any,
) )
def main(argv): def main(argv):
@ -59,8 +78,11 @@ def main(argv):
parser.add_argument('filepattern') parser.add_argument('filepattern')
parser.add_argument('search_terms', nargs='+', default=None) parser.add_argument('search_terms', nargs='+', default=None)
parser.add_argument('--any', dest='match_any', action='store_true')
parser.add_argument('--case', dest='case_sensitive', action='store_true')
parser.add_argument('--regex', dest='do_regex', action='store_true') parser.add_argument('--regex', dest='do_regex', action='store_true')
parser.add_argument('--glob', dest='do_glob', action='store_true') parser.add_argument('--glob', dest='do_glob', action='store_true')
parser.add_argument('--inverse', dest='inverse', action='store_true')
parser.set_defaults(func=fileswith_argparse) parser.set_defaults(func=fileswith_argparse)
args = parser.parse_args(argv) args = parser.parse_args(argv)

View file

@ -13,6 +13,7 @@ def search(
case_sensitive=False, case_sensitive=False,
do_regex=False, do_regex=False,
do_glob=False, do_glob=False,
inverse=False,
local_only=False, local_only=False,
match_any=False, match_any=False,
): ):
@ -23,10 +24,8 @@ def search(
(do_glob and fnmatch.fnmatch(text, term)) (do_glob and fnmatch.fnmatch(text, term))
) )
if case_sensitive: if not case_sensitive:
search_terms = terms terms = [term.lower() for term in terms]
else:
search_terms = [term.lower() for term in terms]
anyall = any if match_any else all anyall = any if match_any else all
@ -40,7 +39,8 @@ def search(
if not case_sensitive: if not case_sensitive:
basename = basename.lower() basename = basename.lower()
if anyall(term_matches(basename, term) for term in search_terms): matches = anyall(term_matches(basename, term) for term in terms)
if matches ^ inverse:
safeprint.safeprint(filepath.absolute_path) safeprint.safeprint(filepath.absolute_path)
@ -50,6 +50,7 @@ def search_argparse(args):
case_sensitive=args.case_sensitive, case_sensitive=args.case_sensitive,
do_glob=args.do_glob, do_glob=args.do_glob,
do_regex=args.do_regex, do_regex=args.do_regex,
inverse=args.inverse,
local_only=args.local_only, local_only=args.local_only,
match_any=args.match_any, match_any=args.match_any,
) )
@ -59,10 +60,11 @@ def main(argv):
parser.add_argument('search_terms', nargs='+', default=None) parser.add_argument('search_terms', nargs='+', default=None)
parser.add_argument('--any', dest='match_any', action='store_true') parser.add_argument('--any', dest='match_any', action='store_true')
parser.add_argument('--case', dest='case_sensitive', action='store_true')
parser.add_argument('--regex', dest='do_regex', action='store_true') parser.add_argument('--regex', dest='do_regex', action='store_true')
parser.add_argument('--glob', dest='do_glob', action='store_true') parser.add_argument('--glob', dest='do_glob', action='store_true')
parser.add_argument('--case', dest='case_sensitive', action='store_true')
parser.add_argument('--local', dest='local_only', action='store_true') parser.add_argument('--local', dest='local_only', action='store_true')
parser.add_argument('--inverse', dest='inverse', action='store_true')
parser.set_defaults(func=search_argparse) parser.set_defaults(func=search_argparse)
args = parser.parse_args(argv) args = parser.parse_args(argv)