else
This commit is contained in:
parent
3d0a3dc746
commit
5e88950156
19 changed files with 1355 additions and 278 deletions
BIN
.GitImages/quicktips_imagetk.png
Normal file
BIN
.GitImages/quicktips_imagetk.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 98 KiB |
82
BaseNumber/basenumber.py
Normal file
82
BaseNumber/basenumber.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
import string
|
||||
|
||||
ALPHABET = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
||||
|
||||
def from_base(number, base, alphabet=None):
|
||||
if base < 2:
|
||||
raise ValueError('base must be >= 2.')
|
||||
if not isinstance(base, int):
|
||||
raise TypeError('base must be an int.')
|
||||
|
||||
if base == 10:
|
||||
return number
|
||||
|
||||
if alphabet is None:
|
||||
alphabet = ALPHABET
|
||||
number = str(number)
|
||||
alphabet = alphabet[:base]
|
||||
|
||||
if number.count('.') > 1:
|
||||
raise ValueError('Too many decimal points')
|
||||
|
||||
mixed_case = any(c in string.ascii_uppercase for c in alphabet) and \
|
||||
any(c in string.ascii_lowercase for c in alphabet)
|
||||
if not mixed_case:
|
||||
alphabet = alphabet.upper()
|
||||
number = number.upper()
|
||||
|
||||
char_set = set(number.replace('.', '', 1))
|
||||
alpha_set = set(alphabet)
|
||||
differences = char_set.difference(alpha_set)
|
||||
if len(differences) > 0:
|
||||
raise ValueError('Unknown characters for base', base, differences)
|
||||
alpha_dict = {character:index for (index, character) in enumerate(alphabet)}
|
||||
|
||||
try:
|
||||
decimal_pos = number.index('.')
|
||||
except ValueError:
|
||||
decimal_pos = len(number)
|
||||
|
||||
|
||||
result = 0
|
||||
for (index, character) in enumerate(number):
|
||||
if index == decimal_pos:
|
||||
continue
|
||||
power = (decimal_pos - index)
|
||||
if index < decimal_pos:
|
||||
power -= 1
|
||||
value = alpha_dict[character] * (base ** power)
|
||||
#print(value)
|
||||
result += value
|
||||
return result
|
||||
|
||||
def to_base(number, base, decimal_places=10, alphabet=None):
|
||||
if base < 2:
|
||||
raise ValueError('base must be >= 2.')
|
||||
if not isinstance(base, int):
|
||||
raise TypeError('base must be an int.')
|
||||
|
||||
if base == 10:
|
||||
return str(number)
|
||||
|
||||
if alphabet is None:
|
||||
alphabet = ALPHABET
|
||||
|
||||
if base > len(alphabet):
|
||||
raise ValueError('Not enough symbols in alphabet for base %d' % base)
|
||||
|
||||
result = ''
|
||||
whole_portion = int(number)
|
||||
float_portion = number - whole_portion
|
||||
while whole_portion > 0:
|
||||
(whole_portion, remainder) = divmod(whole_portion, base)
|
||||
result = alphabet[remainder] + result
|
||||
if float_portion != 0:
|
||||
result += '.'
|
||||
for x in range(decimal_places):
|
||||
float_portion *= base
|
||||
whole = int(float_portion)
|
||||
float_portion -= whole
|
||||
result += alphabet[whole]
|
||||
|
||||
return result
|
|
@ -10,6 +10,7 @@ except:
|
|||
pass
|
||||
|
||||
def close_enough(a, b):
|
||||
#print(a, b)
|
||||
for (a_channel, b_channel) in zip(a, b):
|
||||
if abs(a_channel - b_channel) > close_enough_threshold:
|
||||
return False
|
||||
|
@ -17,15 +18,26 @@ def close_enough(a, b):
|
|||
|
||||
def deletterbox(filename):
|
||||
image = Image.open(filename)
|
||||
(base, ext) = os.path.splitext(filename)
|
||||
for x in range(4):
|
||||
image = trim_top(image)
|
||||
image = image.rotate(90, expand=True)
|
||||
(base, ext) = os.path.splitext(filename)
|
||||
filename = base + 'X' + ext
|
||||
print('size', image.size)
|
||||
#image.save('%s_%d%s' % (base, x, ext))
|
||||
|
||||
rotated = image.rotate(90, expand=True)
|
||||
# There is currently a bug in PIL which causes rotated images
|
||||
# to have a 1 px black border on the top and left
|
||||
if rotated.size != image.size:
|
||||
rotated = rotated.crop([1, 1, rotated.size[0], rotated.size[1]])
|
||||
|
||||
image = rotated
|
||||
print()
|
||||
filename = base + '_crop' + ext
|
||||
image.save(filename, quality=100)
|
||||
|
||||
def trim_top(image):
|
||||
letterbox_color = image.getpixel((0, 0))
|
||||
print('letterbox color', letterbox_color)
|
||||
for y in range(image.size[1]):
|
||||
solid = True
|
||||
for x in range(image.size[0]):
|
||||
|
@ -33,12 +45,12 @@ def trim_top(image):
|
|||
#print(pixel)
|
||||
if not close_enough(letterbox_color, pixel):
|
||||
solid = False
|
||||
#print(y,pixel)
|
||||
print('broke at', y,pixel)
|
||||
break
|
||||
if not solid:
|
||||
break
|
||||
bounds = (0, y, image.size[0], image.size[1])
|
||||
print(bounds)
|
||||
print('bounds', bounds)
|
||||
image = image.crop(bounds)
|
||||
return image
|
||||
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 343 KiB After Width: | Height: | Size: 429 KiB |
|
@ -10,6 +10,11 @@ KERNEL_EDGE_DETECTION_H = [
|
|||
[-2, 0, 2],
|
||||
[-2, 0, 2],
|
||||
]
|
||||
KERNEL_EDGE_DETECTION_V = [
|
||||
[-2, -2, 2],
|
||||
[0, 0, 0],
|
||||
[2, 2, 2],
|
||||
]
|
||||
def index_to_xy(index, width):
|
||||
(y, x) = divmod(index, width)
|
||||
return (x, y)
|
||||
|
@ -17,6 +22,15 @@ def index_to_xy(index, width):
|
|||
def xy_to_index(x, y, width):
|
||||
return (y * width) + x
|
||||
|
||||
def add(image_a, image_b):
|
||||
pixels_a = image_a.getdata()
|
||||
pixels_b = image_b.getdata()
|
||||
assert len(pixels_a) == len(pixels_b)
|
||||
pixels_c = [a + b for (a, b) in zip(pixels_a, pixels_b)]
|
||||
new_image = PIL.Image.new('L', (image_a.size))
|
||||
new_image.putdata(pixels_c, 1, 0)
|
||||
return new_image
|
||||
|
||||
def apply_filter(old_image, kernel):
|
||||
kernel_height = len(kernel)
|
||||
kernel_width = len(kernel[0])
|
||||
|
@ -49,6 +63,8 @@ def apply_filter(old_image, kernel):
|
|||
if subject_y < 0 or subject_y >= image_height:
|
||||
continue
|
||||
for (kernel_x, kernel_entry) in enumerate(kernel_row):
|
||||
if kernel_entry == 0:
|
||||
continue
|
||||
subject_x = x - (kernel_center[0] - kernel_x)
|
||||
if subject_x < 0 or subject_x >= image_width:
|
||||
continue
|
||||
|
@ -61,8 +77,8 @@ def apply_filter(old_image, kernel):
|
|||
operation_avg = abs(operation_sum / operation_denominator)
|
||||
#n_operation_avg = int(map_range(operation_avg, lower, upper, 0, 255))
|
||||
if index % 4096 == 0:
|
||||
print(x, y, operation_sum, operation_denominator, operation_avg)
|
||||
#print(y, '/', image_height)
|
||||
#print(x, y, operation_sum, operation_denominator, operation_avg)
|
||||
print(y, '/', image_height)
|
||||
new_pixels[index] = operation_avg
|
||||
|
||||
#print(new_pixels)
|
||||
|
@ -91,7 +107,10 @@ def map_range(x, old_low, old_high, new_low, new_high):
|
|||
return y
|
||||
|
||||
if __name__ == '__main__':
|
||||
i = PIL.Image.open('ear.jpg')
|
||||
i = PIL.Image.open('icon.jpg')
|
||||
i = i.convert('L')
|
||||
i = apply_filter(apply_filter(i, KERNEL_GAUSSIAN_BLUR), KERNEL_EDGE_DETECTION_H)
|
||||
i.save('ear.png')
|
||||
i = apply_filter(i, KERNEL_GAUSSIAN_BLUR)
|
||||
a = apply_filter(i, KERNEL_EDGE_DETECTION_H)
|
||||
b = apply_filter(i, KERNEL_EDGE_DETECTION_V)
|
||||
i = add(a, b)
|
||||
i.save('icon.png')
|
263
Javascript/reddit_live_new.html
Normal file
263
Javascript/reddit_live_new.html
Normal file
|
@ -0,0 +1,263 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<title>/new</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="control_panel">
|
||||
<input type="text" id="subreddit_field" placeholder="learnpython">
|
||||
<button id="start_button" onclick="start()">Start</button>
|
||||
<a id="browser_link"></a>
|
||||
<button id="clear_button" onclick="clear_workspace()">Clear workspace</button>
|
||||
</div>
|
||||
|
||||
<div id="workspace">
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
|
||||
<style>
|
||||
html
|
||||
{
|
||||
background-color: #1b1c18;
|
||||
}
|
||||
body
|
||||
{
|
||||
background-color: #272822;
|
||||
margin-left: 10%;
|
||||
margin-right: 10%;
|
||||
padding: 5px;
|
||||
}
|
||||
#control_panel
|
||||
{
|
||||
background-color: #284142;
|
||||
padding: 5px;
|
||||
}
|
||||
#workspace
|
||||
{
|
||||
}
|
||||
|
||||
a
|
||||
{
|
||||
color: #ddd;
|
||||
text-decoration: none;
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
a:hover
|
||||
{
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.submission
|
||||
{
|
||||
padding: 10px;
|
||||
padding-top: 20px;
|
||||
padding-bottom: 20px;
|
||||
|
||||
margin: 10px;
|
||||
margin-top: 20px;
|
||||
margin-bottom: 20px;
|
||||
|
||||
box-shadow: 5px 5px 10px 0px rgba(0,0,0,0.5);
|
||||
background-color: #284142;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
/*
|
||||
Thanks Joe Marini for the tab focus code
|
||||
http://www.html5rocks.com/en/tutorials/pagevisibility/intro/
|
||||
*/
|
||||
|
||||
var CHECK_DELAY = 30 * 1000;
|
||||
var WORKSPACE = document.getElementById("workspace");
|
||||
|
||||
var HTTPClient = function()
|
||||
{
|
||||
/* Thanks ttgagne http://stackoverflow.com/a/22076667 */
|
||||
var asynchronous = true;
|
||||
this.get = function(url, callback)
|
||||
{
|
||||
var request = new XMLHttpRequest();
|
||||
request.onreadystatechange = function()
|
||||
{
|
||||
if (request.readyState == 4 && request.status == 200)
|
||||
{
|
||||
callback(request.responseText);
|
||||
}
|
||||
}
|
||||
request.open("GET", url, asynchronous);
|
||||
//request.withCredentials = true;
|
||||
request.send(null);
|
||||
}
|
||||
}
|
||||
|
||||
function apply_to_page(text)
|
||||
{
|
||||
var j = JSON.parse(text);
|
||||
var submissions = j["data"]["children"];
|
||||
submissions.reverse(); // newest last
|
||||
var new_items = 0;
|
||||
for (var index = 0; index < submissions.length; index += 1)
|
||||
{
|
||||
var submission = submissions[index]["data"];
|
||||
if (done_ids.has(submission["id"]))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
done_ids.add(submission["id"]);
|
||||
|
||||
if (first_loop)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
new_items += 1;
|
||||
var div = document.createElement("div");
|
||||
div.className = "submission";
|
||||
|
||||
var anchor = document.createElement("a");
|
||||
anchor.innerHTML = "/r/" + submission["subreddit"] + " - " + submission["title"];
|
||||
anchor.href = "https://reddit.com/r/" + submission["subreddit"] + "/comments/" + submission["id"];
|
||||
anchor.target = "_blank";
|
||||
|
||||
var timestamp = document.createElement("span");
|
||||
var submission_time = new Date(submission["created_utc"])
|
||||
timestamp.innerHTML = "" + submission_time.getHours() + ":" + submission_time.getMinutes();
|
||||
|
||||
div.appendChild(anchor);
|
||||
//WORKSPACE.insertBefore(div, WORKSPACE.firstChild);
|
||||
WORKSPACE.appendChild(div);
|
||||
}
|
||||
console.log("+" + new_items);
|
||||
if (new_items > 0 && !page_focused_cached)
|
||||
{
|
||||
unread_notification_count += new_items;
|
||||
update_title();
|
||||
}
|
||||
first_loop = false;
|
||||
}
|
||||
|
||||
function check_forever()
|
||||
{
|
||||
clearTimeout(check_timer);
|
||||
check_once();
|
||||
check_timer = setTimeout(check_forever, CHECK_DELAY);
|
||||
}
|
||||
|
||||
function check_once()
|
||||
{
|
||||
console.log("checking");
|
||||
if (subreddit == "")
|
||||
{
|
||||
console.log("no subreddit");
|
||||
return;
|
||||
}
|
||||
var url = "https://api.reddit.com/r/" + subreddit + "/new.json";
|
||||
session.get(url, apply_to_page);
|
||||
}
|
||||
|
||||
function clear_workspace()
|
||||
{
|
||||
while (WORKSPACE.children.length > 0)
|
||||
{
|
||||
WORKSPACE.removeChild(WORKSPACE.firstChild);
|
||||
}
|
||||
}
|
||||
|
||||
function on_focus_change()
|
||||
{
|
||||
if (page_focused_fresh())
|
||||
{
|
||||
unread_notification_count = 0;
|
||||
update_title();
|
||||
}
|
||||
}
|
||||
|
||||
function page_focused_fresh()
|
||||
{
|
||||
var property = visibility_property();
|
||||
if (!property)
|
||||
{
|
||||
page_focused_cached = true;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
page_focused_cached = !document[property];
|
||||
}
|
||||
return page_focused_cached;
|
||||
}
|
||||
|
||||
function start()
|
||||
{
|
||||
console.log("start");
|
||||
first_loop = true;
|
||||
clear_workspace();
|
||||
var field = document.getElementById("subreddit_field");
|
||||
var text = field.value;
|
||||
text = text.replace("/r/", "").replace("r/", "");
|
||||
subreddit = text;
|
||||
var link = document.getElementById("browser_link");
|
||||
var url = "https://reddit.com/r/" + subreddit + "/new";
|
||||
link.href = url;
|
||||
link.innerHTML = url;
|
||||
update_title();
|
||||
check_forever();
|
||||
}
|
||||
|
||||
function update_title()
|
||||
{
|
||||
var title = subreddit + "/new";
|
||||
if (unread_notification_count > 0)
|
||||
{
|
||||
title = "(" + unread_notification_count + ") " + title;
|
||||
}
|
||||
document.title = title;
|
||||
}
|
||||
|
||||
function visibility_property()
|
||||
{
|
||||
var prefixes = ["webkit","moz","ms","o"];
|
||||
|
||||
if ("hidden" in document)
|
||||
{
|
||||
return "hidden";
|
||||
}
|
||||
|
||||
for (var i = 0; i < prefixes.length; i++)
|
||||
{
|
||||
hidden_attribute = prefixes[i] + "Hidden";
|
||||
if ((hidden_attribute) in document)
|
||||
return hidden_attribute;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
var done_ids = new Set();
|
||||
var first_loop = true;
|
||||
|
||||
var unread_notification_count = 0;
|
||||
var subreddit = "";
|
||||
var check_timer = null;
|
||||
|
||||
var page_focused_cached;
|
||||
page_focused_fresh();
|
||||
|
||||
var my_visibility_property = visibility_property();
|
||||
if (my_visibility_property)
|
||||
{
|
||||
var my_event_name = my_visibility_property.replace(/[H|h]idden/,'') + 'visibilitychange';
|
||||
document.addEventListener(my_event_name, on_focus_change);
|
||||
}
|
||||
|
||||
var session = new HTTPClient();
|
||||
</script>
|
|
@ -1,10 +1,13 @@
|
|||
Open Dir DL
|
||||
===========
|
||||
|
||||
- 2016 07 04
|
||||
- Added new argparse command "tree"
|
||||
|
||||
- 2016 02 08
|
||||
- Fixed bug where server:port urls did not create db files.
|
||||
- Moved db commits to only happen at the end of a digest.
|
||||
|
||||
Requires `pip install beautifulsoup4`
|
||||
|
||||
See inside opendirdl.py for usage instructions.
|
||||
|
||||
2016 02 08
|
||||
- Fixed bug where server:port urls did not create db files.
|
||||
- Moved db commits to only happen at the end of a digest.
|
||||
See inside opendirdl.py for usage instructions.
|
|
@ -4,14 +4,14 @@ downloads open directories
|
|||
|
||||
The basics:
|
||||
1. Create a database of the directory's files with
|
||||
> opendirdl digest http://website.com/directory/
|
||||
> opendirdl digest http://website.com/directory/
|
||||
2. Enable and disable the files you are interested in with
|
||||
> opendirdl remove_pattern ".*"
|
||||
> opendirdl keep_pattern "Daft%20Punk"
|
||||
> opendirdl remove_pattern "folder\.jpg"
|
||||
> opendirdl remove_pattern ".*"
|
||||
> opendirdl keep_pattern "Daft%20Punk"
|
||||
> opendirdl remove_pattern "folder\.jpg"
|
||||
Note the percent-encoded string.
|
||||
3. Download the enabled files with
|
||||
> opendirdl download database.db
|
||||
> opendirdl download database.db
|
||||
|
||||
Specifics:
|
||||
|
||||
|
@ -52,13 +52,13 @@ keep_pattern:
|
|||
Enable URLs which match a regex pattern. Matches are based on the percent-
|
||||
encoded strings!
|
||||
|
||||
> opendirdl keep_pattern database.db ".*"
|
||||
> opendirdl keep_pattern website.com.db ".*"
|
||||
|
||||
remove_pattern:
|
||||
Disable URLs which match a regex pattern. Matches are based on the percent-
|
||||
encoded strings!
|
||||
|
||||
> opendirdl remove_pattern database.db ".*"
|
||||
> opendirdl remove_pattern website.com.db ".*"
|
||||
|
||||
list_basenames:
|
||||
List enabled URLs in order of their base filename. This makes it easier to
|
||||
|
@ -76,13 +76,27 @@ list_basenames:
|
|||
measure:
|
||||
Sum up the filesizes of all Enabled URLs.
|
||||
|
||||
> opendirdl measure database.db <flags>
|
||||
> opendirdl measure website.com.db <flags>
|
||||
|
||||
flags:
|
||||
-f | --fullscan:
|
||||
When included, perform HEAD requests when a file's size is not known.
|
||||
If this flag is not included, and some file's size is unkown, you will
|
||||
receive a printed note.
|
||||
|
||||
tree:
|
||||
Print the file / folder tree.
|
||||
|
||||
> opendirdl tree website.com.db <flags>
|
||||
|
||||
flags:
|
||||
-o "x.txt" | --outputfile "x.txt":
|
||||
Output the results to a file instead of stdout. This is useful if the
|
||||
filenames contain special characters that crash Python, or are so long
|
||||
that the console becomes unreadable.
|
||||
|
||||
If the filename ends with ".html", the webpage will use collapsible
|
||||
boxes rather than plain text.
|
||||
'''
|
||||
|
||||
|
||||
|
@ -91,10 +105,14 @@ measure:
|
|||
# time importing them usually.
|
||||
import sys
|
||||
|
||||
# Please consult my github repo for these files
|
||||
# https://github.com/voussoir/else
|
||||
sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter
|
||||
sys.path.append('C:\\git\\else\\bytestring'); import bytestring
|
||||
|
||||
import argparse
|
||||
## ~import bs4
|
||||
import collections
|
||||
## ~import hashlib
|
||||
import os
|
||||
## ~import re
|
||||
|
@ -108,6 +126,8 @@ FILENAME_BADCHARS = '/\\:*?"<>|'
|
|||
|
||||
TERMINAL_WIDTH = shutil.get_terminal_size().columns
|
||||
|
||||
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
|
||||
|
||||
# When doing a basic scan, we will not send HEAD requests to URLs that end in these strings,
|
||||
# because they're probably files.
|
||||
# This isn't meant to be a comprehensive filetype library, but it covers enough of the
|
||||
|
@ -152,23 +172,43 @@ SKIPPABLE_FILETYPES = [
|
|||
]
|
||||
SKIPPABLE_FILETYPES = set(x.lower() for x in SKIPPABLE_FILETYPES)
|
||||
|
||||
BYTE = 1
|
||||
KIBIBYTE = 1024 * BYTE
|
||||
MIBIBYTE = 1024 * KIBIBYTE
|
||||
GIBIBYTE = 1024 * MIBIBYTE
|
||||
TEBIBYTE = 1024 * GIBIBYTE
|
||||
SIZE_UNITS = (TEBIBYTE, GIBIBYTE, MIBIBYTE, KIBIBYTE, BYTE)
|
||||
# oh shit
|
||||
HTML_TREE_HEADER = '''
|
||||
<meta charset="UTF-8">
|
||||
|
||||
UNIT_STRINGS = {
|
||||
BYTE: 'b',
|
||||
KIBIBYTE: 'KiB',
|
||||
MIBIBYTE: 'MiB',
|
||||
GIBIBYTE: 'GiB',
|
||||
TEBIBYTE: 'TiB',
|
||||
<script type="text/javascript">
|
||||
function collapse(id)
|
||||
{
|
||||
div = document.getElementById(id);
|
||||
if (div.style.display != "none")
|
||||
{
|
||||
div.style.display = "none";
|
||||
}
|
||||
else
|
||||
{
|
||||
div.style.display = "block";
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
DOWNLOAD_CHUNK = 2 * KIBIBYTE
|
||||
|
||||
<style>
|
||||
*
|
||||
{
|
||||
font-family: Consolas;
|
||||
}
|
||||
button
|
||||
{
|
||||
display: block;
|
||||
}
|
||||
div
|
||||
{
|
||||
padding: 10px;
|
||||
padding-left: 15px;
|
||||
margin-bottom: 10px;
|
||||
border: 1px solid #000;
|
||||
}
|
||||
</style>
|
||||
'''
|
||||
|
||||
DB_INIT = '''
|
||||
CREATE TABLE IF NOT EXISTS urls(
|
||||
|
@ -202,8 +242,7 @@ class Downloader:
|
|||
# If they aren't, it's the user's fault.
|
||||
self.cur.execute('SELECT url FROM urls LIMIT 1')
|
||||
url = self.cur.fetchone()[0]
|
||||
# returns (root, path, filename). Keep root.
|
||||
outputdir = url_to_filepath(url)[0]
|
||||
outputdir = url_to_filepath(url)['root']
|
||||
self.outputdir = outputdir
|
||||
|
||||
def download(self, overwrite=False, bytespersecond=None):
|
||||
|
@ -216,13 +255,13 @@ class Downloader:
|
|||
break
|
||||
url = fetch[SQL_URL]
|
||||
|
||||
''' Creating the Path '''
|
||||
(root, folder, basename) = url_to_filepath(url)
|
||||
''' Creating the permanent and temporary filenames '''
|
||||
url_filepath = url_to_filepath(url)
|
||||
# Ignore this value of `root`, because we might have a custom outputdir.
|
||||
root = self.outputdir
|
||||
folder = os.path.join(root, folder)
|
||||
root = url_filepath['root']
|
||||
folder = os.path.join(root, url_filepath['folder'])
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
fullname = os.path.join(folder, basename)
|
||||
fullname = os.path.join(folder, url_filepath['filename'])
|
||||
temporary_basename = hashit(url, 16) + '.oddltemporary'
|
||||
temporary_fullname = os.path.join(folder, temporary_basename)
|
||||
|
||||
|
@ -252,6 +291,89 @@ class Generic:
|
|||
def __init__(self, **kwargs):
|
||||
for kwarg in kwargs:
|
||||
setattr(self, kwarg, kwargs[kwarg])
|
||||
|
||||
|
||||
class TreeNode:
|
||||
def __init__(self, identifier, data, parent=None):
|
||||
assert isinstance(identifier, str)
|
||||
assert '\\' not in identifier
|
||||
self.identifier = identifier
|
||||
self.data = data
|
||||
self.parent = parent
|
||||
self.children = {}
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.children[key]
|
||||
|
||||
def __repr__(self):
|
||||
return 'TreeNode %s' % self.abspath()
|
||||
|
||||
def abspath(self):
|
||||
node = self
|
||||
nodes = [node]
|
||||
while node.parent is not None:
|
||||
node = node.parent
|
||||
nodes.append(node)
|
||||
nodes.reverse()
|
||||
nodes = [node.identifier for node in nodes]
|
||||
return '\\'.join(nodes)
|
||||
|
||||
def add_child(self, other_node, overwrite_parent=False):
|
||||
self.check_child_availability(other_node.identifier)
|
||||
if other_node.parent is not None and not overwrite_parent:
|
||||
raise ValueError('That node already has a parent. Try `overwrite_parent=True`')
|
||||
|
||||
other_node.parent = self
|
||||
self.children[other_node.identifier] = other_node
|
||||
return other_node
|
||||
|
||||
def check_child_availability(self, identifier):
|
||||
if ':' in identifier:
|
||||
raise Exception('Only roots may have a colon')
|
||||
if identifier in self.children:
|
||||
raise Exception('Node %s already has child %s' % (self.identifier, identifier))
|
||||
|
||||
def detach(self):
|
||||
del self.parent.children[self.identifier]
|
||||
self.parent = None
|
||||
|
||||
def listnodes(self, customsort=None):
|
||||
items = list(self.children.items())
|
||||
if customsort is None:
|
||||
items.sort(key=lambda x: x[0].lower())
|
||||
else:
|
||||
items.sort(key=customsort)
|
||||
return [item[1] for item in items]
|
||||
|
||||
def merge_other(self, othertree, otherroot=None):
|
||||
newroot = None
|
||||
if ':' in othertree.identifier:
|
||||
if otherroot is None:
|
||||
raise Exception('Must specify a new name for the other tree\'s root')
|
||||
else:
|
||||
newroot = otherroot
|
||||
else:
|
||||
newroot = othertree.identifier
|
||||
othertree.identifier = newroot
|
||||
othertree.parent = self
|
||||
self.check_child_availability(newroot)
|
||||
self.children[newroot] = othertree
|
||||
|
||||
def printtree(self, customsort=None):
|
||||
for node in self.walk(customsort):
|
||||
print(node.abspath())
|
||||
|
||||
def sorted_children(self):
|
||||
keys = sorted(self.children.keys())
|
||||
for key in keys:
|
||||
yield (key, self.children[key])
|
||||
|
||||
def walk(self, customsort=None):
|
||||
yield self
|
||||
for child in self.listnodes(customsort=customsort):
|
||||
#print(child)
|
||||
#print(child.listnodes())
|
||||
yield from child.walk(customsort=customsort)
|
||||
## ##
|
||||
## GENERIC #########################################################################################
|
||||
|
||||
|
@ -264,7 +386,7 @@ class Walker:
|
|||
walkurl += '/'
|
||||
self.walkurl = walkurl
|
||||
if databasename is None or databasename == "":
|
||||
self.domain = url_to_filepath(walkurl)[0]
|
||||
self.domain = url_to_filepath(walkurl)['root']
|
||||
databasename = self.domain + '.db'
|
||||
databasename = databasename.replace(':', '')
|
||||
self.databasename = databasename
|
||||
|
@ -275,7 +397,7 @@ class Walker:
|
|||
db_init(self.sql, self.cur)
|
||||
|
||||
self.fullscan = bool(fullscan)
|
||||
self.queue = []
|
||||
self.queue = collections.deque()
|
||||
self.seen_directories = set()
|
||||
|
||||
def smart_insert(self, url=None, head=None, commit=True):
|
||||
|
@ -301,7 +423,8 @@ class Walker:
|
|||
if not href.startswith(self.walkurl):
|
||||
# Don't go to other sites or parent directories.
|
||||
continue
|
||||
if 'C=' in href and 'O=' in href:
|
||||
#if 'C=' in href and 'O=' in href:
|
||||
if any(sorter in href for sorter in ('?C=', '?O=', '?M=', '?D=', '?N=', '?S=')):
|
||||
# Alternative sort modes for index pages.
|
||||
continue
|
||||
if href.endswith('desktop.ini'):
|
||||
|
@ -376,12 +499,12 @@ class Walker:
|
|||
self.smart_insert(head=head, commit=False)
|
||||
|
||||
def walk(self, url=None):
|
||||
self.queue.append(url)
|
||||
self.queue.appendleft(url)
|
||||
try:
|
||||
while len(self.queue) > 0:
|
||||
# Popping from right helps keep the queue short because it handles the files
|
||||
# early.
|
||||
url = self.queue.pop(-1)
|
||||
url = self.queue.popleft()
|
||||
self.process_url(url)
|
||||
line = '{:,} Remaining'.format(len(self.queue))
|
||||
print(line)
|
||||
|
@ -395,16 +518,6 @@ class Walker:
|
|||
|
||||
## GENERAL FUNCTIONS ###############################################################################
|
||||
## ##
|
||||
def bytes_to_unit_string(bytes):
|
||||
size_unit = 1
|
||||
for unit in SIZE_UNITS:
|
||||
if bytes >= unit:
|
||||
size_unit = unit
|
||||
break
|
||||
size_unit_string = UNIT_STRINGS[size_unit]
|
||||
size_string = '%.3f %s' % ((bytes / size_unit), size_unit_string)
|
||||
return size_string
|
||||
|
||||
def db_init(sql, cur):
|
||||
lines = DB_INIT.split(';')
|
||||
for line in lines:
|
||||
|
@ -419,20 +532,19 @@ def dict_to_file(jdict, filename):
|
|||
filehandle.write(text)
|
||||
filehandle.close()
|
||||
|
||||
def do_get(url):
|
||||
def do_get(url, raise_for_status=True):
|
||||
return do_request('GET', requests.get, url)
|
||||
|
||||
def do_head(url):
|
||||
def do_head(url, raise_for_status=True):
|
||||
return do_request('HEAD', requests.head, url)
|
||||
|
||||
def do_request(message, method, url):
|
||||
import sys
|
||||
def do_request(message, method, url, raise_for_status=True):
|
||||
message = '{message:>4s}: {url} : '.format(message=message, url=url)
|
||||
safeprint(message, end='')
|
||||
sys.stdout.flush()
|
||||
safeprint(message, end='', flush=True)
|
||||
response = method(url)
|
||||
safeprint(response.status_code)
|
||||
response.raise_for_status()
|
||||
if raise_for_status:
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def download_file(url, filehandle, hookfunction=None, headers={}, bytespersecond=None):
|
||||
|
@ -511,7 +623,8 @@ def safeprint(text, **kwargs):
|
|||
|
||||
def smart_insert(sql, cur, url=None, head=None, commit=True):
|
||||
'''
|
||||
INSERT or UPDATE the appropriate entry.
|
||||
INSERT or UPDATE the appropriate entry, or DELETE if the head
|
||||
shows a 403 / 404.
|
||||
'''
|
||||
if bool(url) is bool(head):
|
||||
raise ValueError('One and only one of `url` or `head` is necessary.')
|
||||
|
@ -523,21 +636,28 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
|
|||
|
||||
elif head is not None:
|
||||
# When doing a full scan, we get a Response object.
|
||||
url = head.url
|
||||
content_length = head.headers.get('Content-Length', None)
|
||||
if content_length is not None:
|
||||
content_length = int(content_length)
|
||||
content_type = head.headers.get('Content-Type', None)
|
||||
if head.status_code in [403, 404]:
|
||||
cur.execute('DELETE FROM urls WHERE url == ?', [url])
|
||||
if commit:
|
||||
sql.commit()
|
||||
return (url, None, 0, None, 0)
|
||||
else:
|
||||
url = head.url
|
||||
content_length = head.headers.get('Content-Length', None)
|
||||
if content_length is not None:
|
||||
content_length = int(content_length)
|
||||
content_type = head.headers.get('Content-Type', None)
|
||||
|
||||
basename = url_to_filepath(url)[2]
|
||||
basename = url_to_filepath(url)['filename']
|
||||
basename = urllib.parse.unquote(basename)
|
||||
do_download = True
|
||||
|
||||
cur.execute('SELECT * FROM urls WHERE url == ?', [url])
|
||||
existing_entry = cur.fetchone()
|
||||
is_new = existing_entry is None
|
||||
|
||||
data = (url, basename, content_length, content_type, do_download)
|
||||
if is_new:
|
||||
|
||||
cur.execute('INSERT INTO urls VALUES(?, ?, ?, ?, ?)', data)
|
||||
else:
|
||||
command = '''
|
||||
|
@ -547,6 +667,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
|
|||
WHERE url == ?
|
||||
'''
|
||||
cur.execute(command, [content_length, content_type, url])
|
||||
|
||||
if commit:
|
||||
sql.commit()
|
||||
return data
|
||||
|
@ -554,6 +675,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
|
|||
def url_to_filepath(text):
|
||||
text = urllib.parse.unquote(text)
|
||||
parts = urllib.parse.urlsplit(text)
|
||||
scheme = parts.scheme
|
||||
root = parts.netloc
|
||||
(folder, filename) = os.path.split(parts.path)
|
||||
while folder.startswith('/'):
|
||||
|
@ -566,42 +688,58 @@ def url_to_filepath(text):
|
|||
# ...but Files are not.
|
||||
filename = filepath_sanitize(filename)
|
||||
|
||||
return (root, folder, filename)
|
||||
result = {
|
||||
'scheme': scheme,
|
||||
'root': root,
|
||||
'folder': folder,
|
||||
'filename': filename,
|
||||
}
|
||||
return result
|
||||
## ##
|
||||
## GENERAL FUNCTIONS ###############################################################################
|
||||
|
||||
|
||||
## COMMANDLINE FUNCTIONS ###########################################################################
|
||||
## ##
|
||||
def digest(args):
|
||||
fullscan = args.fullscan
|
||||
if isinstance(fullscan, str):
|
||||
fullscan = bool(eval(fullscan))
|
||||
walkurl = args.walkurl
|
||||
if walkurl == '!clipboard':
|
||||
def digest(databasename, walkurl, fullscan=False):
|
||||
if walkurl in ('!clipboard', '!c'):
|
||||
walkurl = get_clipboard()
|
||||
safeprint('From clipboard: %s' % walkurl)
|
||||
walker = Walker(
|
||||
databasename=args.databasename,
|
||||
databasename=databasename,
|
||||
fullscan=fullscan,
|
||||
walkurl=walkurl,
|
||||
)
|
||||
walker.walk()
|
||||
|
||||
def download(args):
|
||||
bytespersecond = args.bytespersecond
|
||||
def digest_argparse(args):
|
||||
return digest(
|
||||
databasename=args.databasename,
|
||||
walkurl=args.walkurl,
|
||||
fullscan=args.fullscan,
|
||||
)
|
||||
|
||||
def download(databasename, outputdir=None, overwrite=False, bytespersecond=None):
|
||||
if isinstance(bytespersecond, str):
|
||||
bytespersecond = eval(bytespersecond)
|
||||
|
||||
downloader = Downloader(
|
||||
databasename=args.databasename,
|
||||
outputdir=args.outputdir,
|
||||
databasename=databasename,
|
||||
outputdir=outputdir,
|
||||
)
|
||||
downloader.download(
|
||||
bytespersecond=bytespersecond,
|
||||
overwrite=args.overwrite,
|
||||
overwrite=overwrite,
|
||||
)
|
||||
|
||||
def download_argparse(args):
|
||||
return download(
|
||||
databasename=args.databasename,
|
||||
outputdir=args.outputdir,
|
||||
overwrite=args.overwrite,
|
||||
bytespersecond=args.bytespersecond,
|
||||
)
|
||||
|
||||
def filter_pattern(databasename, regex, action='keep', *trash):
|
||||
'''
|
||||
When `action` is 'keep', then any URLs matching the regex will have their
|
||||
|
@ -653,15 +791,12 @@ def keep_pattern(args):
|
|||
regex=args.regex,
|
||||
)
|
||||
|
||||
def list_basenames(args):
|
||||
def list_basenames(databasename, outputfile=None):
|
||||
'''
|
||||
Given a database, print the entries in order of the file basenames.
|
||||
This makes it easier to find interesting titles without worrying about
|
||||
what directory they're in.
|
||||
'''
|
||||
databasename = args.databasename
|
||||
outputfile = args.outputfile
|
||||
|
||||
sql = sqlite3.connect(databasename)
|
||||
cur = sql.cursor()
|
||||
cur.execute('SELECT basename FROM urls WHERE do_download == 1 ORDER BY LENGTH(basename) DESC LIMIT 1')
|
||||
|
@ -691,14 +826,18 @@ def list_basenames(args):
|
|||
if outputfile:
|
||||
outputfile.close()
|
||||
|
||||
def measure(args):
|
||||
def list_basenames_argparse(args):
|
||||
return list_basenames(
|
||||
databasename=args.databasename,
|
||||
outputfile=args.outputfile,
|
||||
)
|
||||
|
||||
def measure(databasename, fullscan=False):
|
||||
'''
|
||||
Given a database, print the sum of all Content-Lengths.
|
||||
If `fullscan`, then URLs with no Content-Length will be
|
||||
HEAD requested, and the result will be saved back into the file.
|
||||
'''
|
||||
databasename = args.databasename
|
||||
fullscan = args.fullscan
|
||||
if isinstance(fullscan, str):
|
||||
fullscan = bool(fullscan)
|
||||
|
||||
|
@ -708,25 +847,29 @@ def measure(args):
|
|||
cur2 = sql.cursor()
|
||||
cur2.execute('SELECT * FROM urls WHERE do_download == 1')
|
||||
filecount = 0
|
||||
files_without_size = 0
|
||||
unmeasured_file_count = 0
|
||||
try:
|
||||
while True:
|
||||
fetch = cur2.fetchone()
|
||||
if fetch is None:
|
||||
break
|
||||
|
||||
size = fetch[SQL_CONTENT_LENGTH]
|
||||
if size is None:
|
||||
if fullscan:
|
||||
url = fetch[SQL_URL]
|
||||
head = do_head(url)
|
||||
fetch = smart_insert(sql, cur1, head=head, commit=False)
|
||||
size = fetch[SQL_CONTENT_LENGTH]
|
||||
if size is None:
|
||||
safeprint('"%s" is not revealing Content-Length' % url)
|
||||
size = 0
|
||||
else:
|
||||
files_without_size += 1
|
||||
|
||||
if fullscan:
|
||||
url = fetch[SQL_URL]
|
||||
head = do_head(url, raise_for_status=False)
|
||||
fetch = smart_insert(sql, cur1, head=head, commit=False)
|
||||
size = fetch[SQL_CONTENT_LENGTH]
|
||||
if size is None:
|
||||
safeprint('"%s" is not revealing Content-Length' % url)
|
||||
size = 0
|
||||
|
||||
|
||||
elif fetch[SQL_CONTENT_LENGTH] is None:
|
||||
unmeasured_file_count += 1
|
||||
size = 0
|
||||
|
||||
totalsize += size
|
||||
filecount += 1
|
||||
except:
|
||||
|
@ -734,14 +877,20 @@ def measure(args):
|
|||
raise
|
||||
|
||||
sql.commit()
|
||||
short_string = bytes_to_unit_string(totalsize)
|
||||
short_string = bytestring.bytestring(totalsize)
|
||||
totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount)
|
||||
print(totalsize_string)
|
||||
if files_without_size > 0:
|
||||
print('Note: %d files do not have a stored Content-Length.' % files_without_size)
|
||||
if unmeasured_file_count > 0:
|
||||
print('Note: %d files do not have a stored Content-Length.' % unmeasured_file_count)
|
||||
print('Run `measure` with `-f` or `--fullscan` to HEAD request those files.')
|
||||
return totalsize
|
||||
|
||||
def measure_argparse(args):
|
||||
return measure(
|
||||
databasename=args.databasename,
|
||||
fullscan=args.fullscan,
|
||||
)
|
||||
|
||||
def remove_pattern(args):
|
||||
'''
|
||||
See `filter_pattern`.
|
||||
|
@ -751,6 +900,160 @@ def remove_pattern(args):
|
|||
databasename=args.databasename,
|
||||
regex=args.regex,
|
||||
)
|
||||
|
||||
def tree(databasename, output_filename=None):
|
||||
sql = sqlite3.connect(databasename)
|
||||
cur = sql.cursor()
|
||||
cur.execute('SELECT * FROM urls WHERE do_download == 1')
|
||||
items = cur.fetchall()
|
||||
if len(items) == 0:
|
||||
return
|
||||
|
||||
items.sort(key=lambda x: x[SQL_URL])
|
||||
|
||||
path_parts = url_to_filepath(items[0][SQL_URL])
|
||||
root_identifier = path_parts['root']
|
||||
#print('Root', root_identifier)
|
||||
root_data = {'name': root_identifier, 'item_type': 'directory'}
|
||||
tree = TreeNode(identifier=root_identifier, data=root_data)
|
||||
node_map = {}
|
||||
|
||||
unmeasured_file_count = 0
|
||||
|
||||
for item in items:
|
||||
path = url_to_filepath(item[SQL_URL])
|
||||
scheme = path['scheme']
|
||||
path = '\\'.join([path['root'], path['folder'], path['filename']])
|
||||
parts = path.split('\\')
|
||||
for (index, part) in enumerate(parts):
|
||||
index += 1
|
||||
this_path = '/'.join(parts[:index])
|
||||
parent_path = '/'.join(parts[:index-1])
|
||||
#safeprint('this:' + this_path)
|
||||
#safeprint('parent:' + parent_path)
|
||||
#input()
|
||||
data = {
|
||||
'name': part,
|
||||
'url': scheme + '://' + this_path,
|
||||
}
|
||||
if index == len(parts):
|
||||
data['item_type'] = 'file'
|
||||
if item[SQL_CONTENT_LENGTH]:
|
||||
data['size'] = item[SQL_CONTENT_LENGTH]
|
||||
else:
|
||||
unmeasured_file_count += 1
|
||||
data['size'] = 0
|
||||
else:
|
||||
data['item_type'] = 'directory'
|
||||
|
||||
|
||||
# Ensure this comment is in a node of its own
|
||||
this_node = node_map.get(this_path, None)
|
||||
if this_node:
|
||||
# This ID was detected as a parent of a previous iteration
|
||||
# Now we're actually filling it in.
|
||||
this_node.data = data
|
||||
else:
|
||||
this_node = TreeNode(this_path, data)
|
||||
node_map[this_path] = this_node
|
||||
|
||||
# Attach this node to the parent.
|
||||
if parent_path == root_identifier:
|
||||
try:
|
||||
tree.add_child(this_node)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
parent_node = node_map.get(parent_path, None)
|
||||
if not parent_node:
|
||||
parent_node = TreeNode(parent_path, data=None)
|
||||
node_map[parent_path] = parent_node
|
||||
try:
|
||||
parent_node.add_child(this_node)
|
||||
except:
|
||||
pass
|
||||
this_node.parent = parent_node
|
||||
#print(this_node.data)
|
||||
|
||||
def write(line, outfile=None):
|
||||
if outfile is None:
|
||||
safeprint(line)
|
||||
else:
|
||||
outfile.write(line + '\n')
|
||||
|
||||
def recursive_get_size(node):
|
||||
size = node.data.get('size', 0)
|
||||
if size:
|
||||
# Files have this attribute, dirs don't
|
||||
return size
|
||||
|
||||
for child in node.children.values():
|
||||
size += recursive_get_size(child)
|
||||
node.data['size'] = size
|
||||
return size
|
||||
|
||||
def recursive_print_node(node, depth=0, outfile=None):
|
||||
if use_html:
|
||||
if node.data['item_type'] == 'directory':
|
||||
div_id = hashit(node.identifier, 16)
|
||||
line = '<button onclick="collapse(\'{div_id}\')">{name} ({size})</button>'
|
||||
line += '<div id="{div_id}">'
|
||||
line = line.format(
|
||||
div_id=div_id,
|
||||
name=node.data['name'],
|
||||
size=bytestring.bytestring(node.data['size']),
|
||||
)
|
||||
else:
|
||||
line = '<a href="{url}">{name} ({size})</a><br>'
|
||||
line = line.format(
|
||||
url=node.data['url'],
|
||||
name=node.data['name'],
|
||||
size=bytestring.bytestring(node.data['size']),
|
||||
)
|
||||
else:
|
||||
line = '{space}{bar}{name} : ({size})'
|
||||
line = line.format(
|
||||
space='| '*(depth-1),
|
||||
bar='|---' if depth > 0 else '',
|
||||
name=node.data['name'],
|
||||
size=bytestring.bytestring(node.data['size'])
|
||||
)
|
||||
write(line, outfile)
|
||||
|
||||
for (key, child) in node.sorted_children():
|
||||
recursive_print_node(child, depth+1, outfile=outfile)
|
||||
|
||||
if node.data['item_type'] == 'directory':
|
||||
if use_html:
|
||||
write('</div>', outfile)
|
||||
else:
|
||||
# This helps put some space between sibling directories
|
||||
write('| ' * (depth), outfile)
|
||||
|
||||
recursive_get_size(tree)
|
||||
use_html = output_filename.lower().endswith('.html')
|
||||
|
||||
if output_filename is not None:
|
||||
output_file = open(output_filename, 'w', encoding='utf-8')
|
||||
|
||||
if use_html:
|
||||
write(HTML_TREE_HEADER, outfile=output_file)
|
||||
|
||||
recursive_print_node(tree, outfile=output_file)
|
||||
if unmeasured_file_count > 0:
|
||||
write('Note: %d files do not have a stored Content-Length.' % unmeasured_file_count, outfile=output_file)
|
||||
write('Run `measure` with `-f` or `--fullscan` to HEAD request those files.', outfile=output_file)
|
||||
|
||||
if output_file is not None:
|
||||
output_file.close()
|
||||
return tree
|
||||
|
||||
def tree_argparse(args):
|
||||
return tree(
|
||||
databasename=args.databasename,
|
||||
output_filename=args.outputfile,
|
||||
)
|
||||
|
||||
## ##
|
||||
## COMMANDLINE FUNCTIONS ###########################################################################
|
||||
|
||||
|
@ -765,15 +1068,15 @@ if __name__ == '__main__':
|
|||
p_digest = subparsers.add_parser('digest')
|
||||
p_digest.add_argument('walkurl')
|
||||
p_digest.add_argument('-db', '--database', dest='databasename', default=None)
|
||||
p_digest.add_argument('-f', '--fullscan', action='store_true')
|
||||
p_digest.set_defaults(func=digest)
|
||||
p_digest.add_argument('-f', '--fullscan', dest='fullscan', action='store_true')
|
||||
p_digest.set_defaults(func=digest_argparse)
|
||||
|
||||
p_download = subparsers.add_parser('download')
|
||||
p_download.add_argument('databasename')
|
||||
p_download.add_argument('-o', '--outputdir', dest='outputdir', default=None)
|
||||
p_download.add_argument('-ow', '--overwrite', dest='overwrite', default=False)
|
||||
p_download.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None)
|
||||
p_download.set_defaults(func=download)
|
||||
p_download.add_argument('-ow', '--overwrite', dest='overwrite', action='store_true')
|
||||
p_download.set_defaults(func=download_argparse)
|
||||
|
||||
p_keep_pattern = subparsers.add_parser('keep_pattern')
|
||||
p_keep_pattern.add_argument('databasename')
|
||||
|
@ -782,18 +1085,23 @@ if __name__ == '__main__':
|
|||
|
||||
p_list_basenames = subparsers.add_parser('list_basenames')
|
||||
p_list_basenames.add_argument('databasename')
|
||||
p_list_basenames.add_argument('outputfile', nargs='?', default=None)
|
||||
p_list_basenames.set_defaults(func=list_basenames)
|
||||
p_list_basenames.add_argument('-o', '--outputfile', dest='outputfile', default=None)
|
||||
p_list_basenames.set_defaults(func=list_basenames_argparse)
|
||||
|
||||
p_measure = subparsers.add_parser('measure')
|
||||
p_measure.add_argument('databasename')
|
||||
p_measure.add_argument('-f', '--fullscan', action='store_true')
|
||||
p_measure.set_defaults(func=measure)
|
||||
p_measure.add_argument('-f', '--fullscan', dest='fullscan', action='store_true')
|
||||
p_measure.set_defaults(func=measure_argparse)
|
||||
|
||||
p_remove_pattern = subparsers.add_parser('remove_pattern')
|
||||
p_remove_pattern.add_argument('databasename')
|
||||
p_remove_pattern.add_argument('regex')
|
||||
p_remove_pattern.set_defaults(func=remove_pattern)
|
||||
|
||||
p_tree = subparsers.add_parser('tree')
|
||||
p_tree.add_argument('databasename')
|
||||
p_tree.add_argument('-o', '--outputfile', dest='outputfile', default=None)
|
||||
p_tree.set_defaults(func=tree_argparse)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(args)
|
||||
|
|
Binary file not shown.
|
@ -1,3 +1,8 @@
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import PIL.Image
|
||||
|
@ -10,31 +15,44 @@ ID_LENGTH = 22
|
|||
VALID_TAG_CHARS = string.ascii_lowercase + string.digits + '_-'
|
||||
MAX_TAG_NAME_LENGTH = 32
|
||||
|
||||
SQL_LASTID_COLUMNCOUNT = 2
|
||||
SQL_LASTID_TAB = 0
|
||||
SQL_LASTID_ID = 1
|
||||
SQL_LASTID_COLUMNS = [
|
||||
'table',
|
||||
'last_id',
|
||||
]
|
||||
|
||||
SQL_PHOTO_COLUMNCOUNT = 8
|
||||
SQL_PHOTO_ID = 0
|
||||
SQL_PHOTO_FILEPATH = 1
|
||||
SQL_PHOTO_EXTENSION = 2
|
||||
SQL_PHOTO_WIDTH = 3
|
||||
SQL_PHOTO_HEIGHT = 4
|
||||
SQL_PHOTO_AREA = 5
|
||||
SQL_PHOTO_BYTES = 6
|
||||
SQL_PHOTO_CREATED = 7
|
||||
SQL_PHOTO_COLUMNS = [
|
||||
'id',
|
||||
'filepath',
|
||||
'extension',
|
||||
'width',
|
||||
'height',
|
||||
'ratio',
|
||||
'area',
|
||||
'bytes',
|
||||
'created',
|
||||
]
|
||||
|
||||
SQL_PHOTOTAG_COLUMNCOUNT = 2
|
||||
SQL_PHOTOTAG_PHOTOID = 0
|
||||
SQL_PHOTOTAG_TAGID = 1
|
||||
SQL_PHOTOTAG_COLUMNS = [
|
||||
'photoid',
|
||||
'tagid',
|
||||
]
|
||||
|
||||
SQL_SYN_COLUMNCOUNT = 2
|
||||
SQL_SYN_NAME = 0
|
||||
SQL_SYN_MASTER = 1
|
||||
SQL_SYN_COLUMNS = [
|
||||
'name',
|
||||
'master',
|
||||
]
|
||||
|
||||
SQL_TAG_COLUMNS = [
|
||||
'id',
|
||||
'name',
|
||||
]
|
||||
|
||||
SQL_LASTID = {key:index for (index, key) in enumerate(SQL_LASTID_COLUMNS)}
|
||||
SQL_PHOTO = {key:index for (index, key) in enumerate(SQL_PHOTO_COLUMNS)}
|
||||
SQL_PHOTOTAG = {key:index for (index, key) in enumerate(SQL_PHOTOTAG_COLUMNS)}
|
||||
SQL_SYN = {key:index for (index, key) in enumerate(SQL_SYN_COLUMNS)}
|
||||
SQL_TAG = {key:index for (index, key) in enumerate(SQL_TAG_COLUMNS)}
|
||||
|
||||
SQL_TAG_COLUMNCOUNT = 2
|
||||
SQL_TAG_ID = 0
|
||||
SQL_TAG_NAME = 1
|
||||
|
||||
DB_INIT = '''
|
||||
CREATE TABLE IF NOT EXISTS photos(
|
||||
|
@ -43,6 +61,7 @@ CREATE TABLE IF NOT EXISTS photos(
|
|||
extension TEXT,
|
||||
width INT,
|
||||
height INT,
|
||||
ratio REAL,
|
||||
area INT,
|
||||
bytes INT,
|
||||
created INT
|
||||
|
@ -51,6 +70,10 @@ CREATE TABLE IF NOT EXISTS tags(
|
|||
id TEXT,
|
||||
name TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS albums(
|
||||
albumid TEXT,
|
||||
photoid TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS photo_tag_rel(
|
||||
photoid TEXT,
|
||||
tagid TEXT
|
||||
|
@ -76,15 +99,6 @@ CREATE INDEX IF NOT EXISTS index_tagrel_tagid on photo_tag_rel(tagid);
|
|||
CREATE INDEX IF NOT EXISTS index_tagsyn_name on tag_synonyms(name);
|
||||
'''
|
||||
|
||||
def assert_lower(*args):
|
||||
previous = args[0]
|
||||
for element in args[1:]:
|
||||
if element is None:
|
||||
continue
|
||||
if element < previous:
|
||||
raise ValueError('Min and Max out of order')
|
||||
previous = element
|
||||
|
||||
def basex(number, base, alphabet='0123456789abcdefghijklmnopqrstuvwxyz'):
|
||||
'''
|
||||
Converts an integer to a different base string.
|
||||
|
@ -131,12 +145,8 @@ def is_xor(*args):
|
|||
'''
|
||||
return [bool(a) for a in args].count(True) == 1
|
||||
|
||||
def min_max_query_builder(name, sign, value):
|
||||
|