else
This commit is contained in:
parent
fa2c2bda76
commit
64de5c94bb
14 changed files with 351 additions and 149 deletions
|
@ -133,6 +133,7 @@ import requests
|
|||
import shutil
|
||||
import sqlite3
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
## import tkinter
|
||||
import urllib.parse
|
||||
|
@ -143,6 +144,7 @@ from voussoirkit import downloady
|
|||
from voussoirkit import fusker
|
||||
from voussoirkit import treeclass
|
||||
from voussoirkit import pathtree
|
||||
sys.path.append('C:\\git\\else\\threadqueue'); import threadqueue
|
||||
|
||||
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
|
||||
FILENAME_BADCHARS = '/\\:*?"<>|'
|
||||
|
@ -184,6 +186,7 @@ SKIPPABLE_FILETYPES = [
|
|||
'.pdf',
|
||||
'.png',
|
||||
'.rar',
|
||||
'.sfv',
|
||||
'.srt',
|
||||
'.tar',
|
||||
'.ttf',
|
||||
|
@ -237,7 +240,7 @@ class Walker:
|
|||
'''
|
||||
This class manages the extraction and saving of URLs, given a starting root url.
|
||||
'''
|
||||
def __init__(self, root_url, databasename=None, fullscan=False):
|
||||
def __init__(self, root_url, databasename=None, fullscan=False, threads=1):
|
||||
if not root_url.endswith('/'):
|
||||
root_url += '/'
|
||||
if '://' not in root_url.split('.')[0]:
|
||||
|
@ -255,6 +258,8 @@ class Walker:
|
|||
self.cur = self.sql.cursor()
|
||||
db_init(self.sql, self.cur)
|
||||
|
||||
self.thread_queue = threadqueue.ThreadQueue(threads)
|
||||
self._main_thread = threading.current_thread().ident
|
||||
self.fullscan = bool(fullscan)
|
||||
self.queue = collections.deque()
|
||||
self.seen_directories = set()
|
||||
|
@ -326,10 +331,15 @@ class Walker:
|
|||
skippable = any(urll.endswith(ext) for ext in SKIPPABLE_FILETYPES)
|
||||
if skippable:
|
||||
write('Skipping "%s" due to extension.' % url)
|
||||
self.smart_insert(url=url, commit=False)
|
||||
#self.smart_insert(url=url, commit=False)
|
||||
#return {'url': url, 'commit': False}
|
||||
self.thread_queue.behalf(self._main_thread, self.smart_insert, url=url, commit=False)
|
||||
return
|
||||
self.cur.execute('SELECT * FROM urls WHERE url == ?', [url])
|
||||
skippable = self.cur.fetchone() is not None
|
||||
skippable = lambda: self.cur.execute('SELECT * FROM urls WHERE url == ?', [url]).fetchone()
|
||||
skippable = self.thread_queue.behalf(self._main_thread, skippable)
|
||||
#print(skippable)
|
||||
skippable = skippable is not None
|
||||
#skippable = self.cur.fetchone() is not None
|
||||
if skippable:
|
||||
write('Skipping "%s" since we already have it.' % url)
|
||||
return
|
||||
|
@ -359,28 +369,34 @@ class Walker:
|
|||
if href in self.seen_directories:
|
||||
continue
|
||||
else:
|
||||
self.queue.append(href)
|
||||
#self.queue.append(href)
|
||||
self.thread_queue.add(self.process_url, href)
|
||||
added += 1
|
||||
write('Queued %d urls' % added)
|
||||
else:
|
||||
# This is not an index page, so save it.
|
||||
self.smart_insert(head=head, commit=False)
|
||||
#self.smart_insert(head=head, commit=False)
|
||||
self.thread_queue.behalf(self._main_thread, self.smart_insert, head=head, commit=False)
|
||||
#return {'head': head, 'commit': False}
|
||||
|
||||
def walk(self, url=None):
|
||||
'''
|
||||
Given a starting URL (defaults to self.root_url), continually extract
|
||||
links from the page and repeat.
|
||||
'''
|
||||
self.queue.appendleft(url)
|
||||
try:
|
||||
while len(self.queue) > 0:
|
||||
url = self.queue.popleft()
|
||||
self.process_url(url)
|
||||
line = '{:,} Remaining'.format(len(self.queue))
|
||||
write(line)
|
||||
except:
|
||||
self.sql.commit()
|
||||
raise
|
||||
#self.queue.appendleft(url)
|
||||
self.thread_queue.add(self.process_url, url)
|
||||
for return_value in self.thread_queue.run(hold_open=False):
|
||||
pass
|
||||
#try:
|
||||
# while len(self.queue) > 0:
|
||||
# url = self.queue.popleft()
|
||||
# self.process_url(url)
|
||||
# line = '{:,} Remaining'.format(len(self.queue))
|
||||
# write(line)
|
||||
#except:
|
||||
# self.sql.commit()
|
||||
# raise
|
||||
self.sql.commit()
|
||||
## ##
|
||||
## WALKER ##########################################################################################
|
||||
|
@ -584,7 +600,7 @@ def write(line, file_handle=None, **kwargs):
|
|||
|
||||
## COMMANDLINE FUNCTIONS ###########################################################################
|
||||
## ##
|
||||
def digest(root_url, databasename=None, fullscan=False):
|
||||
def digest(root_url, databasename=None, fullscan=False, threads=1):
|
||||
if root_url in ('!clipboard', '!c'):
|
||||
root_url = get_clipboard()
|
||||
write('From clipboard: %s' % root_url)
|
||||
|
@ -592,6 +608,7 @@ def digest(root_url, databasename=None, fullscan=False):
|
|||
databasename=databasename,
|
||||
fullscan=fullscan,
|
||||
root_url=root_url,
|
||||
threads=threads,
|
||||
)
|
||||
walker.walk()
|
||||
|
||||
|
@ -600,6 +617,7 @@ def digest_argparse(args):
|
|||
databasename=args.databasename,
|
||||
fullscan=args.fullscan,
|
||||
root_url=args.root_url,
|
||||
threads=int(args.threads),
|
||||
)
|
||||
|
||||
def download(
|
||||
|
@ -818,8 +836,7 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
|
|||
if threads is None:
|
||||
threads = 1
|
||||
|
||||
threadpool = concurrent.futures.ThreadPoolExecutor(threads)
|
||||
thread_promises = []
|
||||
thread_queue = threadqueue.ThreadQueue(threads)
|
||||
|
||||
try:
|
||||
for fetch in items:
|
||||
|
@ -827,8 +844,7 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
|
|||
|
||||
if fullscan or new_only:
|
||||
url = fetch[SQL_URL]
|
||||
promise = threadpool.submit(do_head, url, raise_for_status=False)
|
||||
thread_promises.append(promise)
|
||||
thread_queue.add(do_head, url, raise_for_status=False)
|
||||
|
||||
elif size is None:
|
||||
# Unmeasured and no intention to measure.
|
||||
|
@ -837,16 +853,15 @@ def measure(databasename, fullscan=False, new_only=False, threads=4):
|
|||
else:
|
||||
totalsize += size
|
||||
|
||||
for head in promise_results(thread_promises):
|
||||
fetch = smart_insert(sql, cur, head=head, commit=True)
|
||||
for head in thread_queue.run():
|
||||
fetch = smart_insert(sql, cur, head=head, commit=False)
|
||||
size = fetch[SQL_CONTENT_LENGTH]
|
||||
if size is None:
|
||||
write('"%s" is not revealing Content-Length' % url)
|
||||
size = 0
|
||||
totalsize += size
|
||||
except (Exception, KeyboardInterrupt):
|
||||
for promise in thread_promises:
|
||||
promise.cancel()
|
||||
sql.commit()
|
||||
raise
|
||||
|
||||
sql.commit()
|
||||
|
@ -938,6 +953,7 @@ def main(argv):
|
|||
p_digest.add_argument('root_url')
|
||||
p_digest.add_argument('-db', '--database', dest='databasename', default=None)
|
||||
p_digest.add_argument('-f', '--fullscan', dest='fullscan', action='store_true')
|
||||
p_digest.add_argument('-t', '--threads', dest='threads', default=1)
|
||||
p_digest.set_defaults(func=digest_argparse)
|
||||
|
||||
p_download = subparsers.add_parser('download')
|
||||
|
|
|
@ -45,6 +45,10 @@ class Path:
|
|||
def exists(self):
|
||||
return os.path.exists(self.absolute_path)
|
||||
|
||||
@property
|
||||
def extension(self):
|
||||
return os.path.splitext(self.absolute_path)[1].lstrip('.')
|
||||
|
||||
@property
|
||||
def is_dir(self):
|
||||
return os.path.isdir(self.absolute_path)
|
||||
|
@ -62,6 +66,11 @@ class Path:
|
|||
raise TypeError('subpath must be a string')
|
||||
return Path(os.path.join(self.absolute_path, subpath))
|
||||
|
||||
def listdir(self):
|
||||
children = os.listdir(self.absolute_path)
|
||||
children = [self.with_child(child) for child in children]
|
||||
return children
|
||||
|
||||
@property
|
||||
def normcase(self):
|
||||
return os.path.normcase(self.absolute_path)
|
||||
|
@ -90,6 +99,15 @@ class Path:
|
|||
backsteps = os.sep.join('..' for x in range(backsteps))
|
||||
return self.absolute_path.replace(common.absolute_path, backsteps)
|
||||
|
||||
def replace_extension(self, extension):
|
||||
extension = extension.rsplit('.', 1)[-1]
|
||||
base = os.path.splitext(self.absolute_path)[0]
|
||||
|
||||
if extension == '':
|
||||
return Path(base)
|
||||
|
||||
return Path(base + '.' + extension)
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
if self.is_file:
|
||||
|
@ -105,6 +123,7 @@ class Path:
|
|||
return self.join(os.path.basename(basename))
|
||||
|
||||
|
||||
|
||||
def common_path(paths, fallback):
|
||||
'''
|
||||
Given a list of file paths, determine the deepest path which all
|
||||
|
@ -171,9 +190,10 @@ def get_path_casing(path):
|
|||
except IndexError:
|
||||
return input_path.absolute_path
|
||||
|
||||
imaginary_portion = input_path.normcase
|
||||
real_portion = os.path.normcase(cased)
|
||||
imaginary_portion = imaginary_portion.replace(real_portion, '')
|
||||
imaginary_portion = input_path.absolute_path
|
||||
imaginary_portion = imaginary_portion[len(cased):]
|
||||
#real_portion = os.path.normcase(cased)
|
||||
#imaginary_portion = imaginary_portion.replace(real_portion, '')
|
||||
imaginary_portion = imaginary_portion.lstrip(os.sep)
|
||||
cased = os.path.join(cased, imaginary_portion)
|
||||
cased = cased.rstrip(os.sep)
|
||||
|
|
10
Pathclass/test_pathclass.py
Normal file
10
Pathclass/test_pathclass.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
import pathclass
|
||||
import unittest
|
||||
|
||||
class Tests(unittest.TestCase):
|
||||
def test_something(self):
|
||||
self.assertEqual('C:\\Users', pathclass.get_path_casing('C:\\users'))
|
||||
self.assertEqual('C:\\Users\\Nonexist', pathclass.get_path_casing('C:\\users\\Nonexist'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -1,9 +1,10 @@
|
|||
Continue
|
||||
========
|
||||
|
||||
Discards the current iteration, and restarts the loop using the next item.
|
||||
Skips the rest of the current iteration, and starts the next one.
|
||||
|
||||
|
||||
```Python
|
||||
>>> for x in range(6):
|
||||
... if x == 3:
|
||||
... continue
|
||||
|
@ -14,38 +15,63 @@ Discards the current iteration, and restarts the loop using the next item.
|
|||
2
|
||||
4
|
||||
5
|
||||
```
|
||||
|
||||
```Python
|
||||
while len(directory_queue) > 0:
|
||||
directory = directory_queue.popleft()
|
||||
try:
|
||||
filenames = os.listdir(directory)
|
||||
except PermissionError:
|
||||
continue
|
||||
|
||||
for filename in filenames:
|
||||
...
|
||||
```
|
||||
|
||||
|
||||
####Continue is great for cleaning code with lots of conditions:
|
||||
|
||||
#####Without continue:
|
||||
|
||||
Nested:
|
||||
|
||||
```Python
|
||||
for submission in submissions:
|
||||
if submission.author is not None:
|
||||
if submission.over_18 is False:
|
||||
if not submission.over_18:
|
||||
if 'suggestion' in submission.title.lower():
|
||||
print('Found:', submission.id)
|
||||
```
|
||||
|
||||
|
||||
or all grouped up:
|
||||
|
||||
```Python
|
||||
for submission in submissions:
|
||||
if submission.author is not None and submission.over_18 is False and 'suggestion' in submission.title.lower():
|
||||
if (
|
||||
submission.author is not None
|
||||
and not submission.over_18
|
||||
and 'suggestion' in submission.title.lower()
|
||||
):
|
||||
print('Found:', submission.id)
|
||||
```
|
||||
|
||||
|
||||
|
||||
#####With continue:
|
||||
|
||||
```Python
|
||||
for submission in submissions:
|
||||
if submission.author is None:
|
||||
continue
|
||||
|
||||
if submission.over_18:
|
||||
continue
|
||||
|
||||
if 'suggestion' not in submission.title.lower():
|
||||
continue
|
||||
|
||||
print('Found:', submission.id)
|
||||
```
|
||||
|
||||
The mentality changes from "keep only the items with the right properties" to "discard the items with the wrong properties".
|
||||
Notice that all of the checks are the opposite of the originals. The mentality changes from "keep only the items with the right properties" to "discard the items with the wrong properties", and the result is the same.
|
||||
|
|
|
@ -12,22 +12,27 @@ Generators are a type of iterable that create their contents on-the-fly. Unlike
|
|||
|
||||
Writing a generator looks like writing a function, but instead of `return`, you use `yield`. The object which is yielded is what you'll get when you do a loop over the generator. This one lets you count to a billion:
|
||||
|
||||
```Python
|
||||
def billion():
|
||||
x = 0
|
||||
while x < 1000000000:
|
||||
yield x
|
||||
x += 1
|
||||
```
|
||||
|
||||
I purposely used a `while` loop instead of `for x in range()` to show the extra work.
|
||||
|
||||
Note that, unlike a `return` statement, you can include more code after a `yield` statement. Also notice that generators keep track of their internal state -- the `billion` generator has an `x` that it increments every time you loop over it. You can imagine the code pausing after the `yield` line, and resuming when you come back for the next cycle. Try this with some extra print statements to help visualize.
|
||||
|
||||
Generators can also take arguments. Here's a generator that counts to a custom amount:
|
||||
|
||||
```Python
|
||||
def count_to(y):
|
||||
x = 0
|
||||
while x < y:
|
||||
yield x
|
||||
x += 1
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
@ -35,23 +40,31 @@ Generators can also take arguments. Here's a generator that counts to a custom a
|
|||
|
||||
Although generators look like functions when you're writing them, they feel more like objects when using them. Remember that generators don't calculate their contents until they are actually used in a loop, so simply doing:
|
||||
|
||||
```Python
|
||||
numbers = count_to(100)
|
||||
```
|
||||
|
||||
does **not** create a list of 100 numbers. It creates a new instance of the generator that is ready to be iterated over, like this:
|
||||
|
||||
```Python
|
||||
numbers = count_to(100)
|
||||
for number in numbers:
|
||||
print(number)
|
||||
```
|
||||
|
||||
or this:
|
||||
|
||||
```Python
|
||||
for number in count_to(100):
|
||||
print(number)
|
||||
```
|
||||
|
||||
This should remind you of:
|
||||
|
||||
```Python
|
||||
for number in range(100):
|
||||
print(number)
|
||||
```
|
||||
|
||||
because the `range` class behaves a lot like a generator ([but not exactly](http://stackoverflow.com/a/13092317)).
|
||||
|
||||
|
@ -66,10 +79,11 @@ To get a single item from a generator without looping, use `next(generator)`.
|
|||
|
||||
# StopIteration
|
||||
|
||||
Generators pause and resume a lot, but they still flow like normal functions. As long as there is no endless `while` loop inside, they'll come to an end at some point. When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()`. Luckily, `for` loops will detect this automatically and stop themselves.
|
||||
Generators pause and resume a lot, but they still flow like normal functions. As long as there is no endless `while` loop inside, they'll come to an end at some point. When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()` on it. Luckily, `for` loops will detect this automatically and stop themselves.
|
||||
|
||||
Earlier, I said that generators use `yield` instead of `return`, but in fact you can include a return statement. If it is encountered, it will raise a `StopIteration`, and the generator will not resume even if there is more code.
|
||||
|
||||
```Python
|
||||
>>> def generator():
|
||||
... yield 1
|
||||
... return 2
|
||||
|
@ -88,6 +102,7 @@ Earlier, I said that generators use `yield` instead of `return`, but in fact you
|
|||
File "<stdin>", line 1, in <module>
|
||||
StopIteration
|
||||
>>>
|
||||
```
|
||||
|
||||
In general, I don't like to use `return` in generators. I prefer to `break` from their internal loops and conclude naturally.
|
||||
|
||||
|
@ -106,6 +121,7 @@ In general, I don't like to use `return` in generators. I prefer to `break` from
|
|||
|
||||
Suppose you're getting data from an imaginary website which sends you items in groups of 100. You want to let the user loop over every item without having to worry about the groups themselves.
|
||||
|
||||
```Python
|
||||
def item_generator(url):
|
||||
page = 0
|
||||
while True:
|
||||
|
@ -130,13 +146,15 @@ Suppose you're getting data from an imaginary website which sends you items in g
|
|||
comments = item_generator('http://website.com/user/voussoir/comments')
|
||||
for comment in comments:
|
||||
print(comment.body)
|
||||
```
|
||||
|
||||
|
||||
|
||||
#### Sqlite3 fetch generator
|
||||
|
||||
This is one that I almost always include in my program when I'm doing lots of sqlite work. Sqlite cursors don't allow you to simply do a for-loop over the results of a SELECT, so this generator is very handy:
|
||||
This is one that I almost always include when I'm doing lots of sqlite work. Sqlite cursors don't allow you to simply do a for-loop over the results of a SELECT, and doing `fetchall` on a large query can be very memory-heavy, so this generator is very handy:
|
||||
|
||||
```Python
|
||||
def fetch_generator(cur):
|
||||
while True:
|
||||
item = cur.fetchone()
|
||||
|
@ -147,11 +165,12 @@ This is one that I almost always include in my program when I'm doing lots of sq
|
|||
cur.execute('SELECT * FROM table')
|
||||
for item in fetch_generator(cur):
|
||||
print(item)
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Further reading
|
||||
|
||||
[Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) -- If you like recursive functions, how about recursive generators? The only time I've ever used this is to [iterate over a tree's nodes](https://github.com/voussoir/reddit/blob/2069c3bd731cc8f90401ee49a9fc4d0dbf436cfc/Prawtimestamps/timesearch.py#L756-L761).
|
||||
[Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) — If you like recursive functions, how about recursive generators?
|
||||
|
||||
[Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) -- This quickly dives out of "quick tips" territory.
|
||||
[Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) — This quickly dives out of "quick tips" territory.
|
||||
|
|
|
@ -5,6 +5,7 @@ When using Tkinter alone, you can only embed .gif images in your interface. PIL
|
|||
|
||||
Requires `pip install pillow`
|
||||
|
||||
```Python
|
||||
import PIL.Image
|
||||
import PIL.ImageTk
|
||||
import tkinter
|
||||
|
@ -15,6 +16,7 @@ Requires `pip install pillow`
|
|||
label = tkinter.Label(t, image=image_tk)
|
||||
label.image_reference = image_tk
|
||||
label.pack()
|
||||
```
|
||||
|
||||
You must store the `image_tk` somewhere, such as an attribute of the label it belongs to. Otherwise, it gets [prematurely garbage-collected](http://effbot.org/pyfaq/why-do-my-tkinter-images-not-appear.htm).
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import math
|
||||
import math
|
||||
import random
|
||||
import shutil
|
||||
import string
|
||||
import threading
|
||||
import time
|
||||
|
@ -12,7 +13,7 @@ import tkinter
|
|||
# 0, 90, 180, 270
|
||||
# ░▒▓
|
||||
|
||||
SCREEN_WIDTH = 114
|
||||
SCREEN_WIDTH = shutil.get_terminal_size()[1] - 6
|
||||
|
||||
DEFAULT_LINE = {
|
||||
'character': '#',
|
||||
|
@ -27,7 +28,7 @@ DEFAULT_LINE = {
|
|||
variables = {
|
||||
'clock': 0,
|
||||
'frames':[],
|
||||
'delay': 0.01,
|
||||
'delay': 0.02,
|
||||
'lines':[
|
||||
]
|
||||
}
|
||||
|
|
8
Templates/unittester.py
Normal file
8
Templates/unittester.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
import unittest
|
||||
|
||||
class Tests(unittest.TestCase):
|
||||
def test_something(self):
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
15
ThreadQueue/test.py
Normal file
15
ThreadQueue/test.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
import time
|
||||
import threadqueue
|
||||
import random
|
||||
import threading
|
||||
t = threadqueue.ThreadQueue(4, print)
|
||||
main_thr = threading.current_thread().ident
|
||||
def f():
|
||||
mysleep = random.randint(1, 10)
|
||||
time.sleep(mysleep)
|
||||
t.behalf(main_thr, lambda: print(threading.current_thread().ident==main_thr))
|
||||
raise ValueError()
|
||||
return mysleep
|
||||
|
||||
[t.add(f) for x in range(20)]
|
||||
list(t.run())
|
61
ThreadQueue/threadqueue.py
Normal file
61
ThreadQueue/threadqueue.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
import threading
|
||||
import time
|
||||
|
||||
class ThreadQueue:
|
||||
def __init__(self, thread_count, post_processor=None):
|
||||
self.thread_count = thread_count
|
||||
self.post_processor = post_processor
|
||||
self._returns = []
|
||||
self._threads = []
|
||||
self._lambdas = []
|
||||
self._behalfs = {}
|
||||
self.hold_open = False
|
||||
|
||||
def _post_process(self, returned_value):
|
||||
if self.post_processor is not None:
|
||||
self.post_processor(returned_value)
|
||||
self._returns.append(returned_value)
|
||||
|
||||
def add(self, function, *function_args, **function_kwargs):
|
||||
lam = lambda: self._post_process(function(*function_args, **function_kwargs))
|
||||
self._lambdas.append(lam)
|
||||
|
||||
def behalf(self, thread_id, f, *args, **kwargs):
|
||||
self._behalfs.setdefault(thread_id, [])
|
||||
event = threading.Event()
|
||||
call = {'f': f, 'args': args, 'kwargs': kwargs, 'event': event, 'return': None}
|
||||
self._behalfs[thread_id].append(call)
|
||||
event.wait()
|
||||
return call['return']
|
||||
|
||||
def run_behalfs(self):
|
||||
calls = self._behalfs.get(threading.current_thread().ident, [])
|
||||
while calls:
|
||||
call = calls.pop(0)
|
||||
ret = call['f'](*call['args'], **call['kwargs'])
|
||||
call['return'] = ret
|
||||
call['event'].set()
|
||||
|
||||
def run_queue(self):
|
||||
#print('Managing threads')
|
||||
self._threads = [thread for thread in self._threads if thread.is_alive()]
|
||||
threads_needed = self.thread_count - len(self._threads)
|
||||
if threads_needed > 0:
|
||||
for x in range(threads_needed):
|
||||
if len(self._lambdas) == 0:
|
||||
break
|
||||
lam = self._lambdas.pop(0)
|
||||
thread = threading.Thread(target=lam)
|
||||
#thread.daemon = True
|
||||
thread.start()
|
||||
self._threads.append(thread)
|
||||
|
||||
def run(self, hold_open=False):
|
||||
self.hold_open = hold_open
|
||||
while self.hold_open or self._threads or self._lambdas:
|
||||
self.run_queue()
|
||||
while self._returns:
|
||||
yield self._returns.pop(0)
|
||||
self.run_behalfs()
|
||||
|
||||
#time.sleep(0.5)
|
|
@ -38,7 +38,7 @@ def threaded_dl(urls, thread_count, filename_format=None):
|
|||
if filename_format != os.devnull:
|
||||
if '{' not in filename_format and len(urls) > 1:
|
||||
filename_format += '_{index}'
|
||||
if '{extension}' not in filename_format:
|
||||
if '{extension}' not in filename_format and '{basename}' not in filename_format:
|
||||
filename_format += '{extension}'
|
||||
now = int(time.time())
|
||||
for (index, url) in enumerate(urls):
|
||||
|
|
|
@ -15,8 +15,18 @@ from voussoirkit import safeprint
|
|||
from voussoirkit import spinal
|
||||
|
||||
|
||||
def fileswith(filepattern, terms, do_regex=False, do_glob=False):
|
||||
search_terms = [term.lower() for term in terms]
|
||||
def fileswith(
|
||||
filepattern,
|
||||
terms,
|
||||
case_sensitive=False,
|
||||
do_regex=False,
|
||||
do_glob=False,
|
||||
inverse=False,
|
||||
match_any=False,
|
||||
):
|
||||
|
||||
if not case_sensitive:
|
||||
terms = [term.lower() for term in terms]
|
||||
|
||||
def term_matches(text, term):
|
||||
return (
|
||||
|
@ -25,6 +35,8 @@ def fileswith(filepattern, terms, do_regex=False, do_glob=False):
|
|||
(do_glob and fnmatch.fnmatch(text, term))
|
||||
)
|
||||
|
||||
anyall = any if match_any else all
|
||||
|
||||
|
||||
generator = spinal.walk_generator(depth_first=False, yield_directories=True)
|
||||
for filepath in generator:
|
||||
|
@ -35,8 +47,12 @@ def fileswith(filepattern, terms, do_regex=False, do_glob=False):
|
|||
try:
|
||||
with handle:
|
||||
for (index, line) in enumerate(handle):
|
||||
if all(term_matches(line, term) for term in terms):
|
||||
line = '%d | %s' % (index, line.strip())
|
||||
if not case_sensitive:
|
||||
compare_line = line.lower()
|
||||
else:
|
||||
compare_line = line
|
||||
if inverse ^ anyall(term_matches(compare_line, term) for term in terms):
|
||||
line = '%d | %s' % (index+1, line.strip())
|
||||
matches.append(line)
|
||||
except:
|
||||
pass
|
||||
|
@ -50,8 +66,11 @@ def fileswith_argparse(args):
|
|||
return fileswith(
|
||||
filepattern=args.filepattern,
|
||||
terms=args.search_terms,
|
||||
case_sensitive=args.case_sensitive,
|
||||
do_glob=args.do_glob,
|
||||
do_regex=args.do_regex,
|
||||
inverse=args.inverse,
|
||||
match_any=args.match_any,
|
||||
)
|
||||
|
||||
def main(argv):
|
||||
|
@ -59,8 +78,11 @@ def main(argv):
|
|||
|
||||
parser.add_argument('filepattern')
|
||||
parser.add_argument('search_terms', nargs='+', default=None)
|
||||
parser.add_argument('--any', dest='match_any', action='store_true')
|
||||
parser.add_argument('--case', dest='case_sensitive', action='store_true')
|
||||
parser.add_argument('--regex', dest='do_regex', action='store_true')
|
||||
parser.add_argument('--glob', dest='do_glob', action='store_true')
|
||||
parser.add_argument('--inverse', dest='inverse', action='store_true')
|
||||
parser.set_defaults(func=fileswith_argparse)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
|
|
@ -13,6 +13,7 @@ def search(
|
|||
case_sensitive=False,
|
||||
do_regex=False,
|
||||
do_glob=False,
|
||||
inverse=False,
|
||||
local_only=False,
|
||||
match_any=False,
|
||||
):
|
||||
|
@ -23,10 +24,8 @@ def search(
|
|||
(do_glob and fnmatch.fnmatch(text, term))
|
||||
)
|
||||
|
||||
if case_sensitive:
|
||||
search_terms = terms
|
||||
else:
|
||||
search_terms = [term.lower() for term in terms]
|
||||
if not case_sensitive:
|
||||
terms = [term.lower() for term in terms]
|
||||
|
||||
anyall = any if match_any else all
|
||||
|
||||
|
@ -40,7 +39,8 @@ def search(
|
|||
if not case_sensitive:
|
||||
basename = basename.lower()
|
||||
|
||||
if anyall(term_matches(basename, term) for term in search_terms):
|
||||
matches = anyall(term_matches(basename, term) for term in terms)
|
||||
if matches ^ inverse:
|
||||
safeprint.safeprint(filepath.absolute_path)
|
||||
|
||||
|
||||
|
@ -50,6 +50,7 @@ def search_argparse(args):
|
|||
case_sensitive=args.case_sensitive,
|
||||
do_glob=args.do_glob,
|
||||
do_regex=args.do_regex,
|
||||
inverse=args.inverse,
|
||||
local_only=args.local_only,
|
||||
match_any=args.match_any,
|
||||
)
|
||||
|
@ -59,10 +60,11 @@ def main(argv):
|
|||
|
||||
parser.add_argument('search_terms', nargs='+', default=None)
|
||||
parser.add_argument('--any', dest='match_any', action='store_true')
|
||||
parser.add_argument('--case', dest='case_sensitive', action='store_true')
|
||||
parser.add_argument('--regex', dest='do_regex', action='store_true')
|
||||
parser.add_argument('--glob', dest='do_glob', action='store_true')
|
||||
parser.add_argument('--case', dest='case_sensitive', action='store_true')
|
||||
parser.add_argument('--local', dest='local_only', action='store_true')
|
||||
parser.add_argument('--inverse', dest='inverse', action='store_true')
|
||||
parser.set_defaults(func=search_argparse)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
|
Loading…
Reference in a new issue