This commit is contained in:
Ethan Dalool 2018-09-30 01:48:41 -07:00
parent 9b8db0d8ef
commit 86040cc447
13 changed files with 164 additions and 38 deletions

View file

@ -0,0 +1,13 @@
#NoEnv ; Recommended for performance and compatibility with future AutoHotkey releases.
SendMode Input ; Recommended for new scripts due to its superior speed and reliability.
SetWorkingDir %A_ScriptDir% ; Ensures a consistent starting directory.
; Shift-T causes the mousewheel to scroll down.
; I used this to throw lots of dosh in Killing Floor.
MButton::
While GetKeyState("MButton", "P")
{
Click WheelDown
Sleep 20
}
Return

View file

@ -0,0 +1,3 @@
javascript:
Array.from(document.getElementsByTagName("Video")).forEach(function(e){e.loop=true;e.play()})

View file

@ -8,9 +8,9 @@ var seen_urls = new Set();
var image_height = 200;
var video_height = 300;
var audio_width = 1000;
var IMAGE_TYPES = ["\\.jpg", "\\.jpeg", "\\.jpg", "\\.bmp", "\\.tiff", "\\.tif", "\\.bmp", "\\.gif", "\\.png", "reddituploads\.com"].join("|");
var IMAGE_TYPES = ["\\.jpg", "\\.jpeg", "\\.jpg", "\\.bmp", "\\.tiff", "\\.tif", "\\.bmp", "\\.gif", "\\.png", "reddituploads\.com", "\\.webp", "drscdn\\.500px\\.org\\/photo"].join("|");
var AUDIO_TYPES = ["\\.aac", "\\.flac", "\\.mp3", "\\.m4a", "\\.ogg", "\\.opus", "\\.wav"].join("|");
var VIDEO_TYPES = ["\\.mp4", "\\.m4v", "\\.webm", "\\.ogv"].join("|");
var VIDEO_TYPES = ["\\.mp4", "\\.m4v", "\\.mkv", "\\.webm", "\\.ogv"].join("|");
IMAGE_TYPES = new RegExp(IMAGE_TYPES, "i");
AUDIO_TYPES = new RegExp(AUDIO_TYPES, "i");
VIDEO_TYPES = new RegExp(VIDEO_TYPES, "i");
@ -135,7 +135,15 @@ function create_odi_div(url)
{
var div = null;
var paramless_url = url.split("?")[0];
var basename = decodeURI(get_basename(url));
try
{
var basename = decodeURI(get_basename(url));
}
catch (exc)
{
console.error(exc);
return;
}
if (paramless_url.match(IMAGE_TYPES))
{

26
Javascript/unrowspan.js Normal file
View file

@ -0,0 +1,26 @@
javascript:
function unrowspan(tbody)
{
var rows = tbody.children;
for (var row_index = 0; row_index < rows.length; row_index += 1)
{
var row = rows[row_index];
var columns = row.children;
for (var column_index = 0; column_index < columns.length; column_index += 1)
{
var column = columns[column_index];
var span = column.rowSpan;
column.rowSpan = 1;
for (var i = 1; i < span; i += 1)
{
var before = rows[row_index+i].children[column_index];
console.log("Put " + column.innerText + " before " + column_index + " - " + before.innerText);
rows[row_index+i].insertBefore(column.cloneNode(true), before);
}
}
}
}
var tbodies = Array.from(document.getElementsByTagName("tbody"));
tbodies.forEach(unrowspan);

View file

@ -144,7 +144,7 @@ from voussoirkit import downloady
from voussoirkit import fusker
from voussoirkit import treeclass
from voussoirkit import pathtree
sys.path.append('C:\\git\\else\\threadqueue'); import threadqueue
sys.path.append('D:\\git\\else\\threadqueue'); import threadqueue
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
FILENAME_BADCHARS = '/\\:*?"<>|'
@ -285,6 +285,9 @@ class Walker:
continue
href = urllib.parse.urljoin(response.url, href)
if href.startswith('javascript:'):
continue
if not href.startswith(self.root_url):
# Don't go to other sites or parent directories.
continue
@ -321,10 +324,10 @@ class Walker:
# We already picked this up at some point
return
if not url.startswith(self.root_url):
# Don't follow external links or parent directory.
write('Skipping "%s" due to external url.' % url)
return
# if not url.startswith(self.root_url):
# # Don't follow external links or parent directory.
# write('Skipping "%s" due to external url.' % url)
# return
urll = url.lower()
if self.fullscan is False:
@ -385,19 +388,15 @@ class Walker:
links from the page and repeat.
'''
#self.queue.appendleft(url)
self.thread_queue.add(self.process_url, url)
for return_value in self.thread_queue.run(hold_open=False):
pass
#try:
# while len(self.queue) > 0:
# url = self.queue.popleft()
# self.process_url(url)
# line = '{:,} Remaining'.format(len(self.queue))
# write(line)
#except:
# self.sql.commit()
# raise
self.sql.commit()
try:
self.thread_queue.add(self.process_url, url)
for return_value in self.thread_queue.run(hold_open=False):
pass
except KeyboardInterrupt:
self.sql.commit()
raise
else:
self.sql.commit()
## ##
## WALKER ##########################################################################################
@ -447,7 +446,12 @@ def do_head(url, raise_for_status=True):
def do_request(message, method, url, raise_for_status=True):
form = '{message:>4s}: {url} : {status}'
write(form.format(message=message, url=url, status=''))
response = method(url)
while True:
try:
response = method(url)
break
except requests.exceptions.ConnectionError:
pass
write(form.format(message=message, url=url, status=response.status_code))
if raise_for_status:
response.raise_for_status()

57
QuickID/quickid.py Normal file
View file

@ -0,0 +1,57 @@
'''
This module is designed to provide a GOOD ENOUGH means of identifying duplicate
files very quickly, so that more in-depth checks can be done on likely matches.
'''
import hashlib
import os
import sys
SEEK_END = 2
CHUNK_SIZE = 2 * 2**20
FORMAT = '{size}_{chunk_size}_{hash}'
def equal(handle1, handle2, *args, **kwargs):
size1 = handle1.seek(0, SEEK_END)
size2 = handle2.seek(0, SEEK_END)
handle1.seek(0)
handle2.seek(0)
if size1 != size2:
return False
return quickid(handle1, *args, **kwargs) == quickid(handle2, *args, **kwargs)
def equal_file(filename1, filename2, *args, **kwargs):
filename1 = os.path.abspath(filename1)
filename2 = os.path.abspath(filename2)
with open(filename1, 'rb') as handle1, open(filename2, 'rb') as handle2:
return equal(handle1, handle2, *args, **kwargs)
def quickid(handle, hashclass=None, chunk_size=None):
if hashclass is None:
hashclass = hashlib.md5
if chunk_size is None:
chunk_size = CHUNK_SIZE
hasher = hashclass()
size = handle.seek(0, SEEK_END)
handle.seek(0)
if size <= 2 * chunk_size:
hasher.update(handle.read())
else:
hasher.update(handle.read(chunk_size))
handle.seek(-1 * chunk_size, SEEK_END)
hasher.update(handle.read())
return FORMAT.format(size=size, chunk_size=chunk_size, hash=hasher.hexdigest())
def quickid_file(filename, *args, **kwargs):
filename = os.path.abspath(filename)
with open(filename, 'rb') as handle:
return quickid(handle, *args, **kwargs)
def main(argv):
print(quickid_file(argv[0]))
if __name__ == '__main__':
main(sys.argv[1:])

View file

@ -2,13 +2,13 @@ Generators
==========
# What are they
## What are they
Generators are a type of iterable that create their contents on-the-fly. Unlike a list, whose entire contents are available before beginning any loops or manipulations, generators don't know how many items they will produce or when they will stop. They can even go on forever.
Generators are a type of iterable that create their contents on the fly. Unlike a list, whose entire contents are available before beginning any loops or manipulations, generators don't know how many items they will produce or when they will stop. They can even go on forever.
&nbsp;
# Writing one
## Writing one
Writing a generator looks like writing a function, but instead of `return`, you use `yield`. The object which is yielded is what you'll get when you do a loop over the generator. This one lets you count to a billion:
@ -36,7 +36,7 @@ def count_to(y):
&nbsp;
# Using one
## Using one
Although generators look like functions when you're writing them, they feel more like objects when using them. Remember that generators don't calculate their contents until they are actually used in a loop, so simply doing:
@ -77,7 +77,7 @@ To get a single item from a generator without looping, use `next(generator)`.
&nbsp;
# StopIteration
## StopIteration
Generators pause and resume a lot, but they still flow like normal functions. As long as there is no endless `while` loop inside, they'll come to an end at some point. When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()` on it. Luckily, `for` loops will detect this automatically and stop themselves.
@ -108,16 +108,16 @@ In general, I don't like to use `return` in generators. I prefer to `break` from
&nbsp;
# Minor notes
## Minor notes
- You cannot access the items of a generator by index, because only one item exists at a time. Once you do a loop over a generator, those items are gone forever unless you kept them somewhere else.
&nbsp;
# More examples
## More examples
#### Yielding individual items from batches
### Yielding individual items from batches
Suppose you're getting data from an imaginary website which sends you items in groups of 100. You want to let the user loop over every item without having to worry about the groups themselves.
@ -150,7 +150,7 @@ for comment in comments:
&nbsp;
#### Sqlite3 fetch generator
### Sqlite3 fetch generator
This is one that I almost always include when I'm doing lots of sqlite work. Sqlite cursors don't allow you to simply do a for-loop over the results of a SELECT, and doing `fetchall` on a large query can be very memory-heavy, so this generator is very handy:
@ -169,8 +169,8 @@ for item in fetch_generator(cur):
&nbsp;
# Further reading
## Further reading
[Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) &mdash; If you like recursive functions, how about recursive generators?
[Stack Overflow - What are the main uses for `yield from`?](http://stackoverflow.com/questions/9708902/in-practice-what-are-the-main-uses-for-the-new-yield-from-syntax-in-python-3) -- If you like recursive functions, how about recursive generators?
[Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) &mdash; This quickly dives out of "quick tips" territory.
[Stack Overflow - Python generator `send` function purpose?](http://stackoverflow.com/questions/19302530/python-generator-send-function-purpose) -- This quickly dives out of "quick tips" territory.

View file

@ -90,6 +90,9 @@ def update_filler(pairs, where_key):
where_value = pairs.pop(where_key)
if isinstance(where_value, tuple):
(where_value, pairs[where_key]) = where_value
if isinstance(where_value, dict):
where_value = where_value['old']
pairs[where_key] = where_value['new']
if len(pairs) == 0:
raise ValueError('No pairs left after where_key.')

View file

@ -13,7 +13,7 @@ from voussoirkit import ratelimiter
logging.basicConfig(level=logging.CRITICAL)
log = logging.getLogger(__name__)
CHUNK_SIZE = 256 * bytestring.KIBIBYTE
CHUNK_SIZE = 2 * bytestring.MIBIBYTE
# Number of bytes to read and write at a time
HASH_CLASS = hashlib.md5

View file

@ -19,7 +19,7 @@ def main(argv):
args.func(args)
if __name__ == '__main__':
main(sys.argv[1:])
raise SystemExit(main(sys.argv[1:]))
###############################################################################
###############################################################################
@ -47,4 +47,4 @@ def main(argv):
args.func(args)
if __name__ == '__main__':
main(sys.argv[1:])
raise SystemExit(main(sys.argv[1:]))

View file

@ -35,6 +35,16 @@ def brename(transformation):
return
loop(pairs, dry=False)
def excise(s, mark_left, mark_right):
'''
Remove the text between the left and right landmarks, inclusive, returning
the rest of the text.
excise('What a wonderful day [soundtrack].mp3', ' [', ']') ->
returns 'What a wonderful day.mp3'
'''
return s.split(mark_left)[0] + s.split(mark_right)[-1]
def longest_length(li):
longest = 0
for item in li:

View file

@ -25,6 +25,7 @@ for (lineindex, line) in enumerate(lines):
words = line.split(' ')
for (wordindex, word) in enumerate(words):
word = word.replace('.', ',')
if not (':' in word and ',' in word):
continue

View file

@ -5,6 +5,7 @@ import glob
import os
import sys
from voussoirkit import clipext
from voussoirkit import safeprint
def touch(glob_pattern):
@ -18,6 +19,6 @@ def touch(glob_pattern):
os.utime(filename)
if __name__ == '__main__':
glob_patterns = sys.argv[1:]
glob_patterns = [clipext.resolve(x).strip() for x in sys.argv[1:]]
for glob_pattern in glob_patterns:
touch(glob_pattern)