diff --git a/.GitImages/quicktips_imagetk.png b/.GitImages/quicktips_imagetk.png new file mode 100644 index 0000000..c275d05 Binary files /dev/null and b/.GitImages/quicktips_imagetk.png differ diff --git a/BaseNumber/basenumber.py b/BaseNumber/basenumber.py new file mode 100644 index 0000000..1043a68 --- /dev/null +++ b/BaseNumber/basenumber.py @@ -0,0 +1,82 @@ +import string + +ALPHABET = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' + +def from_base(number, base, alphabet=None): + if base < 2: + raise ValueError('base must be >= 2.') + if not isinstance(base, int): + raise TypeError('base must be an int.') + + if base == 10: + return number + + if alphabet is None: + alphabet = ALPHABET + number = str(number) + alphabet = alphabet[:base] + + if number.count('.') > 1: + raise ValueError('Too many decimal points') + + mixed_case = any(c in string.ascii_uppercase for c in alphabet) and \ + any(c in string.ascii_lowercase for c in alphabet) + if not mixed_case: + alphabet = alphabet.upper() + number = number.upper() + + char_set = set(number.replace('.', '', 1)) + alpha_set = set(alphabet) + differences = char_set.difference(alpha_set) + if len(differences) > 0: + raise ValueError('Unknown characters for base', base, differences) + alpha_dict = {character:index for (index, character) in enumerate(alphabet)} + + try: + decimal_pos = number.index('.') + except ValueError: + decimal_pos = len(number) + + + result = 0 + for (index, character) in enumerate(number): + if index == decimal_pos: + continue + power = (decimal_pos - index) + if index < decimal_pos: + power -= 1 + value = alpha_dict[character] * (base ** power) + #print(value) + result += value + return result + +def to_base(number, base, decimal_places=10, alphabet=None): + if base < 2: + raise ValueError('base must be >= 2.') + if not isinstance(base, int): + raise TypeError('base must be an int.') + + if base == 10: + return str(number) + + if alphabet is None: + alphabet = ALPHABET + + if base > len(alphabet): + raise ValueError('Not enough symbols in alphabet for base %d' % base) + + result = '' + whole_portion = int(number) + float_portion = number - whole_portion + while whole_portion > 0: + (whole_portion, remainder) = divmod(whole_portion, base) + result = alphabet[remainder] + result + if float_portion != 0: + result += '.' + for x in range(decimal_places): + float_portion *= base + whole = int(float_portion) + float_portion -= whole + result += alphabet[whole] + + return result diff --git a/DeLetterbox/deletterbox.py b/DeLetterbox/deletterbox.py index 706bdec..b75a622 100644 --- a/DeLetterbox/deletterbox.py +++ b/DeLetterbox/deletterbox.py @@ -10,6 +10,7 @@ except: pass def close_enough(a, b): + #print(a, b) for (a_channel, b_channel) in zip(a, b): if abs(a_channel - b_channel) > close_enough_threshold: return False @@ -17,15 +18,26 @@ def close_enough(a, b): def deletterbox(filename): image = Image.open(filename) + (base, ext) = os.path.splitext(filename) for x in range(4): image = trim_top(image) - image = image.rotate(90, expand=True) - (base, ext) = os.path.splitext(filename) - filename = base + 'X' + ext + print('size', image.size) + #image.save('%s_%d%s' % (base, x, ext)) + + rotated = image.rotate(90, expand=True) + # There is currently a bug in PIL which causes rotated images + # to have a 1 px black border on the top and left + if rotated.size != image.size: + rotated = rotated.crop([1, 1, rotated.size[0], rotated.size[1]]) + + image = rotated + print() + filename = base + '_crop' + ext image.save(filename, quality=100) def trim_top(image): letterbox_color = image.getpixel((0, 0)) + print('letterbox color', letterbox_color) for y in range(image.size[1]): solid = True for x in range(image.size[0]): @@ -33,12 +45,12 @@ def trim_top(image): #print(pixel) if not close_enough(letterbox_color, pixel): solid = False - #print(y,pixel) + print('broke at', y,pixel) break if not solid: break bounds = (0, y, image.size[0], image.size[1]) - print(bounds) + print('bounds', bounds) image = image.crop(bounds) return image diff --git a/ImageFilters/ear.png b/ImageFilters/ear.png index e5aab4e..29fed5e 100644 Binary files a/ImageFilters/ear.png and b/ImageFilters/ear.png differ diff --git a/ImageFilters/imagefilters.py b/ImageFilters/imagefilters.py index 3b8e093..9bd3819 100644 --- a/ImageFilters/imagefilters.py +++ b/ImageFilters/imagefilters.py @@ -10,6 +10,11 @@ KERNEL_EDGE_DETECTION_H = [ [-2, 0, 2], [-2, 0, 2], ] +KERNEL_EDGE_DETECTION_V = [ + [-2, -2, 2], + [0, 0, 0], + [2, 2, 2], +] def index_to_xy(index, width): (y, x) = divmod(index, width) return (x, y) @@ -17,6 +22,15 @@ def index_to_xy(index, width): def xy_to_index(x, y, width): return (y * width) + x +def add(image_a, image_b): + pixels_a = image_a.getdata() + pixels_b = image_b.getdata() + assert len(pixels_a) == len(pixels_b) + pixels_c = [a + b for (a, b) in zip(pixels_a, pixels_b)] + new_image = PIL.Image.new('L', (image_a.size)) + new_image.putdata(pixels_c, 1, 0) + return new_image + def apply_filter(old_image, kernel): kernel_height = len(kernel) kernel_width = len(kernel[0]) @@ -49,6 +63,8 @@ def apply_filter(old_image, kernel): if subject_y < 0 or subject_y >= image_height: continue for (kernel_x, kernel_entry) in enumerate(kernel_row): + if kernel_entry == 0: + continue subject_x = x - (kernel_center[0] - kernel_x) if subject_x < 0 or subject_x >= image_width: continue @@ -61,8 +77,8 @@ def apply_filter(old_image, kernel): operation_avg = abs(operation_sum / operation_denominator) #n_operation_avg = int(map_range(operation_avg, lower, upper, 0, 255)) if index % 4096 == 0: - print(x, y, operation_sum, operation_denominator, operation_avg) - #print(y, '/', image_height) + #print(x, y, operation_sum, operation_denominator, operation_avg) + print(y, '/', image_height) new_pixels[index] = operation_avg #print(new_pixels) @@ -91,7 +107,10 @@ def map_range(x, old_low, old_high, new_low, new_high): return y if __name__ == '__main__': - i = PIL.Image.open('ear.jpg') + i = PIL.Image.open('icon.jpg') i = i.convert('L') - i = apply_filter(apply_filter(i, KERNEL_GAUSSIAN_BLUR), KERNEL_EDGE_DETECTION_H) - i.save('ear.png') \ No newline at end of file + i = apply_filter(i, KERNEL_GAUSSIAN_BLUR) + a = apply_filter(i, KERNEL_EDGE_DETECTION_H) + b = apply_filter(i, KERNEL_EDGE_DETECTION_V) + i = add(a, b) + i.save('icon.png') \ No newline at end of file diff --git a/Javascript/reddit_live_new.html b/Javascript/reddit_live_new.html new file mode 100644 index 0000000..f8a23b7 --- /dev/null +++ b/Javascript/reddit_live_new.html @@ -0,0 +1,263 @@ + + + + + /new + + + +
+ + + + +
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/OpenDirDL/README.md b/OpenDirDL/README.md index b2bc901..771a32c 100644 --- a/OpenDirDL/README.md +++ b/OpenDirDL/README.md @@ -1,10 +1,13 @@ Open Dir DL =========== +- 2016 07 04 + - Added new argparse command "tree" + +- 2016 02 08 + - Fixed bug where server:port urls did not create db files. + - Moved db commits to only happen at the end of a digest. + Requires `pip install beautifulsoup4` -See inside opendirdl.py for usage instructions. - - 2016 02 08 - - Fixed bug where server:port urls did not create db files. - - Moved db commits to only happen at the end of a digest. \ No newline at end of file +See inside opendirdl.py for usage instructions. \ No newline at end of file diff --git a/OpenDirDL/opendirdl.py b/OpenDirDL/opendirdl.py index 9078dca..e27cb48 100644 --- a/OpenDirDL/opendirdl.py +++ b/OpenDirDL/opendirdl.py @@ -4,14 +4,14 @@ downloads open directories The basics: 1. Create a database of the directory's files with - > opendirdl digest http://website.com/directory/ + > opendirdl digest http://website.com/directory/ 2. Enable and disable the files you are interested in with - > opendirdl remove_pattern ".*" - > opendirdl keep_pattern "Daft%20Punk" - > opendirdl remove_pattern "folder\.jpg" + > opendirdl remove_pattern ".*" + > opendirdl keep_pattern "Daft%20Punk" + > opendirdl remove_pattern "folder\.jpg" Note the percent-encoded string. 3. Download the enabled files with - > opendirdl download database.db + > opendirdl download database.db Specifics: @@ -52,13 +52,13 @@ keep_pattern: Enable URLs which match a regex pattern. Matches are based on the percent- encoded strings! - > opendirdl keep_pattern database.db ".*" + > opendirdl keep_pattern website.com.db ".*" remove_pattern: Disable URLs which match a regex pattern. Matches are based on the percent- encoded strings! - > opendirdl remove_pattern database.db ".*" + > opendirdl remove_pattern website.com.db ".*" list_basenames: List enabled URLs in order of their base filename. This makes it easier to @@ -76,13 +76,27 @@ list_basenames: measure: Sum up the filesizes of all Enabled URLs. - > opendirdl measure database.db + > opendirdl measure website.com.db flags: -f | --fullscan: When included, perform HEAD requests when a file's size is not known. If this flag is not included, and some file's size is unkown, you will receive a printed note. + +tree: + Print the file / folder tree. + + > opendirdl tree website.com.db + + flags: + -o "x.txt" | --outputfile "x.txt": + Output the results to a file instead of stdout. This is useful if the + filenames contain special characters that crash Python, or are so long + that the console becomes unreadable. + + If the filename ends with ".html", the webpage will use collapsible + boxes rather than plain text. ''' @@ -91,10 +105,14 @@ measure: # time importing them usually. import sys +# Please consult my github repo for these files +# https://github.com/voussoir/else sys.path.append('C:\\git\\else\\ratelimiter'); import ratelimiter +sys.path.append('C:\\git\\else\\bytestring'); import bytestring import argparse ## ~import bs4 +import collections ## ~import hashlib import os ## ~import re @@ -108,6 +126,8 @@ FILENAME_BADCHARS = '/\\:*?"<>|' TERMINAL_WIDTH = shutil.get_terminal_size().columns +DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE + # When doing a basic scan, we will not send HEAD requests to URLs that end in these strings, # because they're probably files. # This isn't meant to be a comprehensive filetype library, but it covers enough of the @@ -152,23 +172,43 @@ SKIPPABLE_FILETYPES = [ ] SKIPPABLE_FILETYPES = set(x.lower() for x in SKIPPABLE_FILETYPES) -BYTE = 1 -KIBIBYTE = 1024 * BYTE -MIBIBYTE = 1024 * KIBIBYTE -GIBIBYTE = 1024 * MIBIBYTE -TEBIBYTE = 1024 * GIBIBYTE -SIZE_UNITS = (TEBIBYTE, GIBIBYTE, MIBIBYTE, KIBIBYTE, BYTE) +# oh shit +HTML_TREE_HEADER = ''' + -UNIT_STRINGS = { - BYTE: 'b', - KIBIBYTE: 'KiB', - MIBIBYTE: 'MiB', - GIBIBYTE: 'GiB', - TEBIBYTE: 'TiB', + -DOWNLOAD_CHUNK = 2 * KIBIBYTE - + +''' DB_INIT = ''' CREATE TABLE IF NOT EXISTS urls( @@ -202,8 +242,7 @@ class Downloader: # If they aren't, it's the user's fault. self.cur.execute('SELECT url FROM urls LIMIT 1') url = self.cur.fetchone()[0] - # returns (root, path, filename). Keep root. - outputdir = url_to_filepath(url)[0] + outputdir = url_to_filepath(url)['root'] self.outputdir = outputdir def download(self, overwrite=False, bytespersecond=None): @@ -216,13 +255,13 @@ class Downloader: break url = fetch[SQL_URL] - ''' Creating the Path ''' - (root, folder, basename) = url_to_filepath(url) + ''' Creating the permanent and temporary filenames ''' + url_filepath = url_to_filepath(url) # Ignore this value of `root`, because we might have a custom outputdir. - root = self.outputdir - folder = os.path.join(root, folder) + root = url_filepath['root'] + folder = os.path.join(root, url_filepath['folder']) os.makedirs(folder, exist_ok=True) - fullname = os.path.join(folder, basename) + fullname = os.path.join(folder, url_filepath['filename']) temporary_basename = hashit(url, 16) + '.oddltemporary' temporary_fullname = os.path.join(folder, temporary_basename) @@ -252,6 +291,89 @@ class Generic: def __init__(self, **kwargs): for kwarg in kwargs: setattr(self, kwarg, kwargs[kwarg]) + + +class TreeNode: + def __init__(self, identifier, data, parent=None): + assert isinstance(identifier, str) + assert '\\' not in identifier + self.identifier = identifier + self.data = data + self.parent = parent + self.children = {} + + def __getitem__(self, key): + return self.children[key] + + def __repr__(self): + return 'TreeNode %s' % self.abspath() + + def abspath(self): + node = self + nodes = [node] + while node.parent is not None: + node = node.parent + nodes.append(node) + nodes.reverse() + nodes = [node.identifier for node in nodes] + return '\\'.join(nodes) + + def add_child(self, other_node, overwrite_parent=False): + self.check_child_availability(other_node.identifier) + if other_node.parent is not None and not overwrite_parent: + raise ValueError('That node already has a parent. Try `overwrite_parent=True`') + + other_node.parent = self + self.children[other_node.identifier] = other_node + return other_node + + def check_child_availability(self, identifier): + if ':' in identifier: + raise Exception('Only roots may have a colon') + if identifier in self.children: + raise Exception('Node %s already has child %s' % (self.identifier, identifier)) + + def detach(self): + del self.parent.children[self.identifier] + self.parent = None + + def listnodes(self, customsort=None): + items = list(self.children.items()) + if customsort is None: + items.sort(key=lambda x: x[0].lower()) + else: + items.sort(key=customsort) + return [item[1] for item in items] + + def merge_other(self, othertree, otherroot=None): + newroot = None + if ':' in othertree.identifier: + if otherroot is None: + raise Exception('Must specify a new name for the other tree\'s root') + else: + newroot = otherroot + else: + newroot = othertree.identifier + othertree.identifier = newroot + othertree.parent = self + self.check_child_availability(newroot) + self.children[newroot] = othertree + + def printtree(self, customsort=None): + for node in self.walk(customsort): + print(node.abspath()) + + def sorted_children(self): + keys = sorted(self.children.keys()) + for key in keys: + yield (key, self.children[key]) + + def walk(self, customsort=None): + yield self + for child in self.listnodes(customsort=customsort): + #print(child) + #print(child.listnodes()) + yield from child.walk(customsort=customsort) ## ## ## GENERIC ######################################################################################### @@ -264,7 +386,7 @@ class Walker: walkurl += '/' self.walkurl = walkurl if databasename is None or databasename == "": - self.domain = url_to_filepath(walkurl)[0] + self.domain = url_to_filepath(walkurl)['root'] databasename = self.domain + '.db' databasename = databasename.replace(':', '') self.databasename = databasename @@ -275,7 +397,7 @@ class Walker: db_init(self.sql, self.cur) self.fullscan = bool(fullscan) - self.queue = [] + self.queue = collections.deque() self.seen_directories = set() def smart_insert(self, url=None, head=None, commit=True): @@ -301,7 +423,8 @@ class Walker: if not href.startswith(self.walkurl): # Don't go to other sites or parent directories. continue - if 'C=' in href and 'O=' in href: + #if 'C=' in href and 'O=' in href: + if any(sorter in href for sorter in ('?C=', '?O=', '?M=', '?D=', '?N=', '?S=')): # Alternative sort modes for index pages. continue if href.endswith('desktop.ini'): @@ -376,12 +499,12 @@ class Walker: self.smart_insert(head=head, commit=False) def walk(self, url=None): - self.queue.append(url) + self.queue.appendleft(url) try: while len(self.queue) > 0: # Popping from right helps keep the queue short because it handles the files # early. - url = self.queue.pop(-1) + url = self.queue.popleft() self.process_url(url) line = '{:,} Remaining'.format(len(self.queue)) print(line) @@ -395,16 +518,6 @@ class Walker: ## GENERAL FUNCTIONS ############################################################################### ## ## -def bytes_to_unit_string(bytes): - size_unit = 1 - for unit in SIZE_UNITS: - if bytes >= unit: - size_unit = unit - break - size_unit_string = UNIT_STRINGS[size_unit] - size_string = '%.3f %s' % ((bytes / size_unit), size_unit_string) - return size_string - def db_init(sql, cur): lines = DB_INIT.split(';') for line in lines: @@ -419,20 +532,19 @@ def dict_to_file(jdict, filename): filehandle.write(text) filehandle.close() -def do_get(url): +def do_get(url, raise_for_status=True): return do_request('GET', requests.get, url) -def do_head(url): +def do_head(url, raise_for_status=True): return do_request('HEAD', requests.head, url) -def do_request(message, method, url): - import sys +def do_request(message, method, url, raise_for_status=True): message = '{message:>4s}: {url} : '.format(message=message, url=url) - safeprint(message, end='') - sys.stdout.flush() + safeprint(message, end='', flush=True) response = method(url) safeprint(response.status_code) - response.raise_for_status() + if raise_for_status: + response.raise_for_status() return response def download_file(url, filehandle, hookfunction=None, headers={}, bytespersecond=None): @@ -511,7 +623,8 @@ def safeprint(text, **kwargs): def smart_insert(sql, cur, url=None, head=None, commit=True): ''' - INSERT or UPDATE the appropriate entry. + INSERT or UPDATE the appropriate entry, or DELETE if the head + shows a 403 / 404. ''' if bool(url) is bool(head): raise ValueError('One and only one of `url` or `head` is necessary.') @@ -523,21 +636,28 @@ def smart_insert(sql, cur, url=None, head=None, commit=True): elif head is not None: # When doing a full scan, we get a Response object. - url = head.url - content_length = head.headers.get('Content-Length', None) - if content_length is not None: - content_length = int(content_length) - content_type = head.headers.get('Content-Type', None) + if head.status_code in [403, 404]: + cur.execute('DELETE FROM urls WHERE url == ?', [url]) + if commit: + sql.commit() + return (url, None, 0, None, 0) + else: + url = head.url + content_length = head.headers.get('Content-Length', None) + if content_length is not None: + content_length = int(content_length) + content_type = head.headers.get('Content-Type', None) - basename = url_to_filepath(url)[2] + basename = url_to_filepath(url)['filename'] basename = urllib.parse.unquote(basename) do_download = True + cur.execute('SELECT * FROM urls WHERE url == ?', [url]) existing_entry = cur.fetchone() is_new = existing_entry is None + data = (url, basename, content_length, content_type, do_download) if is_new: - cur.execute('INSERT INTO urls VALUES(?, ?, ?, ?, ?)', data) else: command = ''' @@ -547,6 +667,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True): WHERE url == ? ''' cur.execute(command, [content_length, content_type, url]) + if commit: sql.commit() return data @@ -554,6 +675,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True): def url_to_filepath(text): text = urllib.parse.unquote(text) parts = urllib.parse.urlsplit(text) + scheme = parts.scheme root = parts.netloc (folder, filename) = os.path.split(parts.path) while folder.startswith('/'): @@ -566,42 +688,58 @@ def url_to_filepath(text): # ...but Files are not. filename = filepath_sanitize(filename) - return (root, folder, filename) + result = { + 'scheme': scheme, + 'root': root, + 'folder': folder, + 'filename': filename, + } + return result ## ## ## GENERAL FUNCTIONS ############################################################################### ## COMMANDLINE FUNCTIONS ########################################################################### ## ## -def digest(args): - fullscan = args.fullscan - if isinstance(fullscan, str): - fullscan = bool(eval(fullscan)) - walkurl = args.walkurl - if walkurl == '!clipboard': +def digest(databasename, walkurl, fullscan=False): + if walkurl in ('!clipboard', '!c'): walkurl = get_clipboard() safeprint('From clipboard: %s' % walkurl) walker = Walker( - databasename=args.databasename, + databasename=databasename, fullscan=fullscan, walkurl=walkurl, ) walker.walk() -def download(args): - bytespersecond = args.bytespersecond +def digest_argparse(args): + return digest( + databasename=args.databasename, + walkurl=args.walkurl, + fullscan=args.fullscan, + ) + +def download(databasename, outputdir=None, overwrite=False, bytespersecond=None): if isinstance(bytespersecond, str): bytespersecond = eval(bytespersecond) downloader = Downloader( - databasename=args.databasename, - outputdir=args.outputdir, + databasename=databasename, + outputdir=outputdir, ) downloader.download( bytespersecond=bytespersecond, - overwrite=args.overwrite, + overwrite=overwrite, ) +def download_argparse(args): + return download( + databasename=args.databasename, + outputdir=args.outputdir, + overwrite=args.overwrite, + bytespersecond=args.bytespersecond, + ) + def filter_pattern(databasename, regex, action='keep', *trash): ''' When `action` is 'keep', then any URLs matching the regex will have their @@ -653,15 +791,12 @@ def keep_pattern(args): regex=args.regex, ) -def list_basenames(args): +def list_basenames(databasename, outputfile=None): ''' Given a database, print the entries in order of the file basenames. This makes it easier to find interesting titles without worrying about what directory they're in. ''' - databasename = args.databasename - outputfile = args.outputfile - sql = sqlite3.connect(databasename) cur = sql.cursor() cur.execute('SELECT basename FROM urls WHERE do_download == 1 ORDER BY LENGTH(basename) DESC LIMIT 1') @@ -691,14 +826,18 @@ def list_basenames(args): if outputfile: outputfile.close() -def measure(args): +def list_basenames_argparse(args): + return list_basenames( + databasename=args.databasename, + outputfile=args.outputfile, + ) + +def measure(databasename, fullscan=False): ''' Given a database, print the sum of all Content-Lengths. If `fullscan`, then URLs with no Content-Length will be HEAD requested, and the result will be saved back into the file. ''' - databasename = args.databasename - fullscan = args.fullscan if isinstance(fullscan, str): fullscan = bool(fullscan) @@ -708,25 +847,29 @@ def measure(args): cur2 = sql.cursor() cur2.execute('SELECT * FROM urls WHERE do_download == 1') filecount = 0 - files_without_size = 0 + unmeasured_file_count = 0 try: while True: fetch = cur2.fetchone() if fetch is None: break + size = fetch[SQL_CONTENT_LENGTH] - if size is None: - if fullscan: - url = fetch[SQL_URL] - head = do_head(url) - fetch = smart_insert(sql, cur1, head=head, commit=False) - size = fetch[SQL_CONTENT_LENGTH] - if size is None: - safeprint('"%s" is not revealing Content-Length' % url) - size = 0 - else: - files_without_size += 1 + + if fullscan: + url = fetch[SQL_URL] + head = do_head(url, raise_for_status=False) + fetch = smart_insert(sql, cur1, head=head, commit=False) + size = fetch[SQL_CONTENT_LENGTH] + if size is None: + safeprint('"%s" is not revealing Content-Length' % url) size = 0 + + + elif fetch[SQL_CONTENT_LENGTH] is None: + unmeasured_file_count += 1 + size = 0 + totalsize += size filecount += 1 except: @@ -734,14 +877,20 @@ def measure(args): raise sql.commit() - short_string = bytes_to_unit_string(totalsize) + short_string = bytestring.bytestring(totalsize) totalsize_string = '{} ({:,} bytes) in {:,} files'.format(short_string, totalsize, filecount) print(totalsize_string) - if files_without_size > 0: - print('Note: %d files do not have a stored Content-Length.' % files_without_size) + if unmeasured_file_count > 0: + print('Note: %d files do not have a stored Content-Length.' % unmeasured_file_count) print('Run `measure` with `-f` or `--fullscan` to HEAD request those files.') return totalsize +def measure_argparse(args): + return measure( + databasename=args.databasename, + fullscan=args.fullscan, + ) + def remove_pattern(args): ''' See `filter_pattern`. @@ -751,6 +900,160 @@ def remove_pattern(args): databasename=args.databasename, regex=args.regex, ) + +def tree(databasename, output_filename=None): + sql = sqlite3.connect(databasename) + cur = sql.cursor() + cur.execute('SELECT * FROM urls WHERE do_download == 1') + items = cur.fetchall() + if len(items) == 0: + return + + items.sort(key=lambda x: x[SQL_URL]) + + path_parts = url_to_filepath(items[0][SQL_URL]) + root_identifier = path_parts['root'] + #print('Root', root_identifier) + root_data = {'name': root_identifier, 'item_type': 'directory'} + tree = TreeNode(identifier=root_identifier, data=root_data) + node_map = {} + + unmeasured_file_count = 0 + + for item in items: + path = url_to_filepath(item[SQL_URL]) + scheme = path['scheme'] + path = '\\'.join([path['root'], path['folder'], path['filename']]) + parts = path.split('\\') + for (index, part) in enumerate(parts): + index += 1 + this_path = '/'.join(parts[:index]) + parent_path = '/'.join(parts[:index-1]) + #safeprint('this:' + this_path) + #safeprint('parent:' + parent_path) + #input() + data = { + 'name': part, + 'url': scheme + '://' + this_path, + } + if index == len(parts): + data['item_type'] = 'file' + if item[SQL_CONTENT_LENGTH]: + data['size'] = item[SQL_CONTENT_LENGTH] + else: + unmeasured_file_count += 1 + data['size'] = 0 + else: + data['item_type'] = 'directory' + + + # Ensure this comment is in a node of its own + this_node = node_map.get(this_path, None) + if this_node: + # This ID was detected as a parent of a previous iteration + # Now we're actually filling it in. + this_node.data = data + else: + this_node = TreeNode(this_path, data) + node_map[this_path] = this_node + + # Attach this node to the parent. + if parent_path == root_identifier: + try: + tree.add_child(this_node) + except: + pass + else: + parent_node = node_map.get(parent_path, None) + if not parent_node: + parent_node = TreeNode(parent_path, data=None) + node_map[parent_path] = parent_node + try: + parent_node.add_child(this_node) + except: + pass + this_node.parent = parent_node + #print(this_node.data) + + def write(line, outfile=None): + if outfile is None: + safeprint(line) + else: + outfile.write(line + '\n') + + def recursive_get_size(node): + size = node.data.get('size', 0) + if size: + # Files have this attribute, dirs don't + return size + + for child in node.children.values(): + size += recursive_get_size(child) + node.data['size'] = size + return size + + def recursive_print_node(node, depth=0, outfile=None): + if use_html: + if node.data['item_type'] == 'directory': + div_id = hashit(node.identifier, 16) + line = '' + line += '
' + line = line.format( + div_id=div_id, + name=node.data['name'], + size=bytestring.bytestring(node.data['size']), + ) + else: + line = '{name} ({size})
' + line = line.format( + url=node.data['url'], + name=node.data['name'], + size=bytestring.bytestring(node.data['size']), + ) + else: + line = '{space}{bar}{name} : ({size})' + line = line.format( + space='| '*(depth-1), + bar='|---' if depth > 0 else '', + name=node.data['name'], + size=bytestring.bytestring(node.data['size']) + ) + write(line, outfile) + + for (key, child) in node.sorted_children(): + recursive_print_node(child, depth+1, outfile=outfile) + + if node.data['item_type'] == 'directory': + if use_html: + write('
', outfile) + else: + # This helps put some space between sibling directories + write('| ' * (depth), outfile) + + recursive_get_size(tree) + use_html = output_filename.lower().endswith('.html') + + if output_filename is not None: + output_file = open(output_filename, 'w', encoding='utf-8') + + if use_html: + write(HTML_TREE_HEADER, outfile=output_file) + + recursive_print_node(tree, outfile=output_file) + if unmeasured_file_count > 0: + write('Note: %d files do not have a stored Content-Length.' % unmeasured_file_count, outfile=output_file) + write('Run `measure` with `-f` or `--fullscan` to HEAD request those files.', outfile=output_file) + + if output_file is not None: + output_file.close() + return tree + +def tree_argparse(args): + return tree( + databasename=args.databasename, + output_filename=args.outputfile, + ) + ## ## ## COMMANDLINE FUNCTIONS ########################################################################### @@ -765,15 +1068,15 @@ if __name__ == '__main__': p_digest = subparsers.add_parser('digest') p_digest.add_argument('walkurl') p_digest.add_argument('-db', '--database', dest='databasename', default=None) - p_digest.add_argument('-f', '--fullscan', action='store_true') - p_digest.set_defaults(func=digest) + p_digest.add_argument('-f', '--fullscan', dest='fullscan', action='store_true') + p_digest.set_defaults(func=digest_argparse) p_download = subparsers.add_parser('download') p_download.add_argument('databasename') p_download.add_argument('-o', '--outputdir', dest='outputdir', default=None) - p_download.add_argument('-ow', '--overwrite', dest='overwrite', default=False) p_download.add_argument('-bps', '--bytespersecond', dest='bytespersecond', default=None) - p_download.set_defaults(func=download) + p_download.add_argument('-ow', '--overwrite', dest='overwrite', action='store_true') + p_download.set_defaults(func=download_argparse) p_keep_pattern = subparsers.add_parser('keep_pattern') p_keep_pattern.add_argument('databasename') @@ -782,18 +1085,23 @@ if __name__ == '__main__': p_list_basenames = subparsers.add_parser('list_basenames') p_list_basenames.add_argument('databasename') - p_list_basenames.add_argument('outputfile', nargs='?', default=None) - p_list_basenames.set_defaults(func=list_basenames) + p_list_basenames.add_argument('-o', '--outputfile', dest='outputfile', default=None) + p_list_basenames.set_defaults(func=list_basenames_argparse) p_measure = subparsers.add_parser('measure') p_measure.add_argument('databasename') - p_measure.add_argument('-f', '--fullscan', action='store_true') - p_measure.set_defaults(func=measure) + p_measure.add_argument('-f', '--fullscan', dest='fullscan', action='store_true') + p_measure.set_defaults(func=measure_argparse) p_remove_pattern = subparsers.add_parser('remove_pattern') p_remove_pattern.add_argument('databasename') p_remove_pattern.add_argument('regex') p_remove_pattern.set_defaults(func=remove_pattern) + p_tree = subparsers.add_parser('tree') + p_tree.add_argument('databasename') + p_tree.add_argument('-o', '--outputfile', dest='outputfile', default=None) + p_tree.set_defaults(func=tree_argparse) + args = parser.parse_args() args.func(args) diff --git a/Phototagger/phototagger.db b/Phototagger/phototagger.db index 32988fa..478cb04 100644 Binary files a/Phototagger/phototagger.db and b/Phototagger/phototagger.db differ diff --git a/Phototagger/phototagger.py b/Phototagger/phototagger.py index 642fec6..4a3812d 100644 --- a/Phototagger/phototagger.py +++ b/Phototagger/phototagger.py @@ -1,3 +1,8 @@ + + + + + import datetime import os import PIL.Image @@ -10,31 +15,44 @@ ID_LENGTH = 22 VALID_TAG_CHARS = string.ascii_lowercase + string.digits + '_-' MAX_TAG_NAME_LENGTH = 32 -SQL_LASTID_COLUMNCOUNT = 2 -SQL_LASTID_TAB = 0 -SQL_LASTID_ID = 1 +SQL_LASTID_COLUMNS = [ + 'table', + 'last_id', +] -SQL_PHOTO_COLUMNCOUNT = 8 -SQL_PHOTO_ID = 0 -SQL_PHOTO_FILEPATH = 1 -SQL_PHOTO_EXTENSION = 2 -SQL_PHOTO_WIDTH = 3 -SQL_PHOTO_HEIGHT = 4 -SQL_PHOTO_AREA = 5 -SQL_PHOTO_BYTES = 6 -SQL_PHOTO_CREATED = 7 +SQL_PHOTO_COLUMNS = [ + 'id', + 'filepath', + 'extension', + 'width', + 'height', + 'ratio', + 'area', + 'bytes', + 'created', +] -SQL_PHOTOTAG_COLUMNCOUNT = 2 -SQL_PHOTOTAG_PHOTOID = 0 -SQL_PHOTOTAG_TAGID = 1 +SQL_PHOTOTAG_COLUMNS = [ + 'photoid', + 'tagid', +] -SQL_SYN_COLUMNCOUNT = 2 -SQL_SYN_NAME = 0 -SQL_SYN_MASTER = 1 +SQL_SYN_COLUMNS = [ + 'name', + 'master', +] + +SQL_TAG_COLUMNS = [ + 'id', + 'name', +] + +SQL_LASTID = {key:index for (index, key) in enumerate(SQL_LASTID_COLUMNS)} +SQL_PHOTO = {key:index for (index, key) in enumerate(SQL_PHOTO_COLUMNS)} +SQL_PHOTOTAG = {key:index for (index, key) in enumerate(SQL_PHOTOTAG_COLUMNS)} +SQL_SYN = {key:index for (index, key) in enumerate(SQL_SYN_COLUMNS)} +SQL_TAG = {key:index for (index, key) in enumerate(SQL_TAG_COLUMNS)} -SQL_TAG_COLUMNCOUNT = 2 -SQL_TAG_ID = 0 -SQL_TAG_NAME = 1 DB_INIT = ''' CREATE TABLE IF NOT EXISTS photos( @@ -43,6 +61,7 @@ CREATE TABLE IF NOT EXISTS photos( extension TEXT, width INT, height INT, + ratio REAL, area INT, bytes INT, created INT @@ -51,6 +70,10 @@ CREATE TABLE IF NOT EXISTS tags( id TEXT, name TEXT ); +CREATE TABLE IF NOT EXISTS albums( + albumid TEXT, + photoid TEXT + ); CREATE TABLE IF NOT EXISTS photo_tag_rel( photoid TEXT, tagid TEXT @@ -76,15 +99,6 @@ CREATE INDEX IF NOT EXISTS index_tagrel_tagid on photo_tag_rel(tagid); CREATE INDEX IF NOT EXISTS index_tagsyn_name on tag_synonyms(name); ''' -def assert_lower(*args): - previous = args[0] - for element in args[1:]: - if element is None: - continue - if element < previous: - raise ValueError('Min and Max out of order') - previous = element - def basex(number, base, alphabet='0123456789abcdefghijklmnopqrstuvwxyz'): ''' Converts an integer to a different base string. @@ -131,12 +145,8 @@ def is_xor(*args): ''' return [bool(a) for a in args].count(True) == 1 -def min_max_query_builder(name, sign, value): - if value is None: - return - value = str(int(value)) - name = normalize_tagname(name) - return ' '.join([name, sign, value]) +def min_max_query_builder(name, comparator, value): + return ' '.join([name, comparator, value]) def normalize_tagname(tagname): ''' @@ -207,6 +217,9 @@ class PhotoDB: photos. Photos may be selected by which tags they contain. Entries contain a unique ID and a name. + albums: + Rows represent the inclusion of a photo in an album + photo_tag_rel: Rows represent a Photo's ownership of a particular Tag. @@ -253,8 +266,6 @@ class PhotoDB: Raises NoSuchTag and NoSuchPhoto as appropriate. ''' tag = self.get_tag(tagid=tagid, tagname=tagname, resolve_synonyms=True) - if tag is None: - raise_nosuchtag(tagid=tagid, tagname=tagname) self.cur.execute('SELECT * FROM photo_tag_rel WHERE photoid == ? AND tagid == ?', [photoid, tag.id]) if self.cur.fetchone() is not None: @@ -293,7 +304,7 @@ class PhotoDB: temp_cur = self.sql.cursor() temp_cur.execute('SELECT * FROM photo_tag_rel WHERE tagid == ?', [oldtag.id]) for relationship in fetch_generator(temp_cur): - photoid = relationship[SQL_PHOTOTAG_PHOTOID] + photoid = relationship[SQL_PHOTOTAG['photoid']] self.cur.execute('SELECT * FROM photo_tag_rel WHERE tagid == ?', [mastertag.id]) if self.cur.fetchone() is not None: continue @@ -307,6 +318,48 @@ class PhotoDB: self.new_tag_synonym(oldtag.name, mastertag.name, commit=False) self.sql.commit() + def delete_photo(self, photoid): + ''' + Delete a photo and its relation to any tags and albums. + ''' + photo = self.get_photo_by_id(photoid) + if photo is None: + raise NoSuchPhoto(photoid) + self.cur.execute('DELETE FROM photos WHERE id == ?', [photoid]) + self.cur.execute('DELETE FROM photo_tag_rel WHERE photoid == ?', [photoid]) + self.sql.commit() + + def delete_tag(self, tagid=None, tagname=None): + ''' + Delete a tag, its synonyms, and its relation to any photos. + ''' + + tag = self.get_tag(tagid=tagid, tagname=tagname, resolve_synonyms=False) + + if tag is None: + message = 'Is it a synonym?' + raise_nosuchtag(tagid=tagid, tagname=tagname, comment=message) + + self.cur.execute('DELETE FROM tags WHERE id == ?', [tag.id]) + self.cur.execute('DELETE FROM photo_tag_rel WHERE tagid == ?', [tag.id]) + self.cur.execute('DELETE FROM tag_synonyms WHERE mastername == ?', [tag.name]) + self.sql.commit() + + def delete_tag_synonym(self, tagname): + ''' + Delete a tag synonym. + This will have no effect on photos or other synonyms because + they always resolve to the master tag before application. + ''' + tagname = normalize_tagname(tagname) + self.cur.execute('SELECT * FROM tag_synonyms WHERE name == ?', [tagname]) + fetch = self.cur.fetchone() + if fetch is None: + raise NoSuchSynonym(tagname) + + self.cur.execute('DELETE FROM tag_synonyms WHERE name == ?', [tagname]) + self.sql.commit() + def generate_id(self, table): ''' Create a new ID number that is unique to the given table. @@ -330,7 +383,7 @@ class PhotoDB: new_id = 1 else: # Use database value - new_id = int(fetch[SQL_LASTID_ID]) + 1 + new_id = int(fetch[SQL_LASTID['last_id']]) + 1 do_update = True new_id_s = str(new_id).rjust(self.id_length, '0') @@ -341,6 +394,10 @@ class PhotoDB: self._last_ids[table] = new_id return new_id_s + @not_implemented + def get_album_by_id(self, albumid): + return + def get_photo_by_id(self, photoid): ''' Return this Photo object, or None if it does not exist. @@ -433,30 +490,99 @@ class PhotoDB: If False, Photos need only comply with the `tag_musts`. If True, Photos need to comply with both `tag_musts` and `tag_mays`. ''' + maximums = {key:int(val) for (key, val) in maximums.items()} + minimums = {key:int(val) for (key, val) in minimums.items()} + + # Raise for cases where the minimum > maximum + for (maxkey, maxval) in maximums.items(): + if maxkey not in minimums: + continue + minval = minimums[maxkey] + if minval > maxval: + raise ValueError('Impossible min-max for %s' % maxkey) + conditions = [] - minmaxers = {'<=':maximums, '>=': minimums} + minmaxers = {'<=': maximums, '>=': minimums} + + # Convert the min-max parameters into query strings for (comparator, minmaxer) in minmaxers.items(): for (field, value) in minmaxer.items(): if field not in Photo.int_properties: raise ValueError('Unknown Photo property: %s' % field) + + value = str(value) query = min_max_query_builder(field, comparator, value) conditions.append(query) + if extension is not None: if isinstance(extension, str): extension = [extension] - # Don't inject me bro + + # Normalize to prevent injections extension = [normalize_tagname(e) for e in extension] extension = ['extension == "%s"' % e for e in extension] extension = ' OR '.join(extension) extension = '(%s)' % extension conditions.append(extension) - conditions = [query for query in conditions if query is not None] + + def setify(l): + return set(self.get_tag_by_name(t) for t in l) if l else set() + tag_musts = setify(tag_musts) + tag_mays = setify(tag_mays) + tag_forbids = setify(tag_forbids) + + base = '%s EXISTS (SELECT 1 FROM photo_tag_rel WHERE photo_tag_rel.photoid == photos.id AND photo_tag_rel.tagid %s %s)' + for tag in tag_musts: + query = base % ('', '==', '"%s"' % tag.id) + conditions.append(query) + + if tag_forbid_unspecified and len(tag_mays) > 0: + acceptable = tag_mays.union(tag_musts) + acceptable = ['"%s"' % t.id for t in acceptable] + acceptable = ', '.join(acceptable) + query = base % ('NOT', 'NOT IN', '(%s)' % acceptable) + conditions.append(query) + + for tag in tag_forbids: + query = base % ('NOT', '==', '"%s"' % tag.id) + conditions.append(query) + if len(conditions) == 0: raise ValueError('No search query provided') - conditions = ' AND '.join(conditions) - print(conditions) - query = 'SELECT * FROM photos WHERE %s' % conditions + conditions = [query for query in conditions if query is not None] + conditions = ['(%s)' % c for c in conditions] + conditions = ' AND '.join(conditions) + conditions = 'WHERE %s' % conditions + + + query = 'SELECT * FROM photos %s' % conditions + print(query) + temp_cur = self.sql.cursor() + temp_cur.execute(query) + acceptable_tags = tag_musts.union(tag_mays) + while True: + fetch = temp_cur.fetchone() + if fetch is None: + break + + photo = self.tuple_to_photo(fetch) + + # if any(forbid in photo.tags for forbid in tag_forbids): + # print('Forbidden') + # continue + + # if tag_forbid_unspecified: + # if any(tag not in acceptable_tags for tag in photo.tags): + # print('Forbid unspecified') + # continue + + # if any(must not in photo.tags for must in tag_musts): + # print('No must') + # continue + + yield photo + def get_tag(self, tagid=None, tagname=None, resolve_synonyms=True): ''' @@ -469,13 +595,13 @@ class PhotoDB: return self.get_tag_by_id(tagid) elif tagname is not None: return self.get_tag_by_name(tagname, resolve_synonyms=resolve_synonyms) - return None + raise_nosuchtag(tagid=tagid, tagname=tagname) def get_tag_by_id(self, tagid): self.cur.execute('SELECT * FROM tags WHERE id == ?', [tagid]) tag = self.cur.fetchone() if tag is None: - return None + return raise_nosuchtag(tagid=tagid) tag = self.tuple_to_tag(tag) return tag @@ -489,14 +615,14 @@ class PhotoDB: self.cur.execute('SELECT * FROM tag_synonyms WHERE name == ?', [tagname]) fetch = self.cur.fetchone() if fetch is not None: - mastertagname = fetch[SQL_SYN_MASTER] + mastertagname = fetch[SQL_SYN['master']] tag = self.get_tag_by_name(mastertagname) return tag self.cur.execute('SELECT * FROM tags WHERE name == ?', [tagname]) fetch = self.cur.fetchone() if fetch is None: - return None + raise_nosuchtag(tagname=tagname) tag = self.tuple_to_tag(fetch) return tag @@ -508,14 +634,14 @@ class PhotoDB: temp_cur = self.sql.cursor() temp_cur.execute('SELECT * FROM photo_tag_rel WHERE photoid == ?', [photoid]) tags = fetch_generator(temp_cur) - tagobjects = [] + tagobjects = set() for tag in tags: - tagid = tag[SQL_PHOTOTAG_TAGID] + tagid = tag[SQL_PHOTOTAG['tagid']] tagobj = self.get_tag_by_id(tagid) - tagobjects.append(tagobj) + tagobjects.add(tagobj) return tagobjects - def new_photo(self, filename, tags=[], allow_duplicates=False): + def new_photo(self, filename, tags=None, allow_duplicates=False): ''' Given a filepath, determine its attributes and create a new Photo object in the database. Tags may be applied now or later. @@ -539,22 +665,26 @@ class PhotoDB: extension = normalize_tagname(extension) (width, height) = image.size area = width * height + ratio = width / height bytes = os.path.getsize(filename) created = int(getnow()) photoid = self.generate_id('photos') - data = [None] * SQL_PHOTO_COLUMNCOUNT - data[SQL_PHOTO_ID] = photoid - data[SQL_PHOTO_FILEPATH] = filename - data[SQL_PHOTO_EXTENSION] = extension - data[SQL_PHOTO_WIDTH] = width - data[SQL_PHOTO_HEIGHT] = height - data[SQL_PHOTO_AREA] = area - data[SQL_PHOTO_BYTES] = bytes - data[SQL_PHOTO_CREATED] = created + data = [None] * len(SQL_PHOTO_COLUMNS) + data[SQL_PHOTO['id']] = photoid + data[SQL_PHOTO['filepath']] = filename + data[SQL_PHOTO['extension']] = extension + data[SQL_PHOTO['width']] = width + data[SQL_PHOTO['height']] = height + data[SQL_PHOTO['area']] = area + data[SQL_PHOTO['ratio']] = ratio + data[SQL_PHOTO['bytes']] = bytes + data[SQL_PHOTO['created']] = created photo = self.tuple_to_photo(data) - self.cur.execute('INSERT INTO photos VALUES(?, ?, ?, ?, ?, ?, ?, ?)', data) + self.cur.execute('INSERT INTO photos VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)', data) + + tags = tags or [] for tag in tags: try: self.apply_photo_tag(photoid, tagname=tag, commit=False) @@ -570,8 +700,11 @@ class PhotoDB: Register a new tag in the database and return the Tag object. ''' tagname = normalize_tagname(tagname) - if self.get_tag_by_name(tagname) is not None: - raise TagExists(tagname) + try: + self.get_tag_by_name(tagname) + TagExists(tagname) + except NoSuchTag: + pass tagid = self.generate_id('tags') self.cur.execute('INSERT INTO tags VALUES(?, ?)', [tagid, tagname]) self.sql.commit() @@ -613,49 +746,8 @@ class PhotoDB: exe = self.cur.execute exe('SELECT * FROM photo_tag_rel WHERE photoid == ? AND tagid == ?', [photoid, tag.id]) fetch = self.cur.fetchone() - return fetch is not None - - def remove_photo(self, photoid): - ''' - Delete a photo and its relation to any tags. - ''' - photo = self.get_photo_by_id(photoid) - if photo is None: - raise NoSuchPhoto(photoid) - self.cur.execute('DELETE FROM photos WHERE id == ?', [photoid]) - self.cur.execute('DELETE FROM photo_tag_rel WHERE photoid == ?', [photoid]) - self.sql.commit() - - def remove_tag(self, tagid=None, tagname=None): - ''' - Delete a tag, its synonyms, and its relation to any photos. - ''' - - tag = self.get_tag(tagid=tagid, tagname=tagname, resolve_synonyms=False) - - if tag is None: - message = 'Is it a synonym?' - raise_nosuchtag(tagid=tagid, tagname=tagname, comment=message) - - self.cur.execute('DELETE FROM tags WHERE id == ?', [tag.id]) - self.cur.execute('DELETE FROM photo_tag_rel WHERE tagid == ?', [tag.id]) - self.cur.execute('DELETE FROM tag_synonyms WHERE mastername == ?', [tag.name]) - self.sql.commit() - - def remove_tag_synonym(self, tagname): - ''' - Delete a tag synonym. - This will have no effect on photos or other synonyms because - they always resolve to the master tag before application. - ''' - tagname = normalize_tagname(tagname) - self.cur.execute('SELECT * FROM tag_synonyms WHERE name == ?', [tagname]) - fetch = self.cur.fetchone() - if fetch is None: - raise NoSuchSynonym(tagname) - - self.cur.execute('DELETE FROM tag_synonyms WHERE name == ?', [tagname]) - self.sql.commit() + has_tag = fetch is not None + return has_tag @not_implemented def rename_tag(self, tagname, newname, apply_to_synonyms): @@ -666,19 +758,18 @@ class PhotoDB: Given a tuple like the ones from an sqlite query, create a Photo object. ''' - photoid = tu[SQL_PHOTO_ID] + photoid = tu[SQL_PHOTO['id']] tags = self.get_tags_by_photo(photoid) photo = Photo( photodb = self, photoid = photoid, - filepath = tu[SQL_PHOTO_FILEPATH], - extension = tu[SQL_PHOTO_EXTENSION], - width = tu[SQL_PHOTO_WIDTH], - height = tu[SQL_PHOTO_HEIGHT], - area = tu[SQL_PHOTO_AREA], - created = tu[SQL_PHOTO_CREATED], - bytes = tu[SQL_PHOTO_BYTES], + filepath = tu[SQL_PHOTO['filepath']], + extension = tu[SQL_PHOTO['extension']], + width = tu[SQL_PHOTO['width']], + height = tu[SQL_PHOTO['height']], + created = tu[SQL_PHOTO['created']], + bytes = tu[SQL_PHOTO['bytes']], tags = tags, ) return photo @@ -690,8 +781,8 @@ class PhotoDB: ''' tag = Tag( photodb = self, - tagid = tu[SQL_TAG_ID], - name = tu[SQL_TAG_NAME] + tagid = tu[SQL_TAG['id']], + name = tu[SQL_TAG['name']] ) return tag @@ -710,20 +801,23 @@ class Photo: extension, width, height, - area, bytes, created, - tags=[], + tags=None, ): + if tags is None: + tags = [] + self.photodb = photodb self.id = photoid self.filepath = filepath self.extension = extension - self.width = width - self.height = height - self.area = area - self.bytes = bytes - self.created = created + self.width = int(width) + self.height = int(height) + self.ratio = self.width / self.height + self.area = self.width * self.height + self.bytes = int(bytes) + self.created = int(created) self.tags = tags def __eq__(self, other): @@ -741,8 +835,6 @@ class Photo: 'extension={extension}, ', 'width={width}, ', 'height={height}, ', - 'area={area}, ', - 'bytes={bytes} ', 'created={created})' ) r = ''.join(r) diff --git a/QuickTips/generators.md b/QuickTips/generators.md new file mode 100644 index 0000000..f2bfd40 --- /dev/null +++ b/QuickTips/generators.md @@ -0,0 +1,81 @@ +Generators +========== + + +# What are they + +Generators are a type of iterable that create their contents on-the-fly. Unlike a list, whose entire contents are available before beginning any loops or manipulations, generators don't know how many items they will produce or when they will stop. + + +# Writing one + +Writing a generator looks like writing a function, but instead of `return`, you use `yield`. The object which is yielded is what you'll get when you do a loop over the generator. This generator lets you count to a billion: + + def billion(): + x = 0 + while x < 1000000000: + yield x + x += 1 + +Note that, unlike a `return` statement, you can include more code after a `yield` statement. Also notice that generators keep track of their internal state. The `billion` generator has an `x` that it increments every time you loop over it. + + +# Using one + +Although generators look like a function when you're writing them, they feel more like objects when using them. Remember that generators don't calculate their contents until they are actually used in a loop, so simply doing: + + numbers = billion() + +does **not** create a list of a billion numbers. It creates a new instance of the generator that is ready to be iterated over, like this: + + numbers = billion() + for number in numbers: + print(number) + +This might remind you of: + + for number in range(1000000000): + print(number) + +because `range` is simply a generator. + + +Generators are excellent for cases where using a list is infeasible or unnecessary. If you wanted to count to a billion using a list, you would first have to create a list of every number, which is a huge waste of time and memory. With a generator, the item is created, used, and trashed. + +To get a single item from a generator without looping, use `next(generator)`. + + +# StopIteration + +When a generator is all finished, it will raise a `StopIteration` exception every time you try to do `next()`. `for` loops will detect this automatically and stop themselves. + + +# More examples + +Suppose you're getting data from an imaginary website which sends you items in groups of 100. You want to let the user loop over every item without having to worry about the groups themselves. + + def item_generator(url): + page = 0 + while True: + # get_items is a pretend method that collects the 100 items from that page + batch = get_items(url, page=page) + + if not batch: + # for this imaginary website, the batch will be empty when that page + # doesn't have any items on it. + break + + for item in batch: + # by yielding individual items, the user can just do a for loop + # over this generator and get them all one by one. + yield item + + page += 1 + + # When the while loop breaks, we reach the end of the function body, + # and a StopIteration will be raised and handled automatically, + # ending the for-loop. + + comments = item_generator('http://website.com/user/voussoir/comments') + for comment in comments: + print(comment.body) diff --git a/QuickTips/tkinter images.md b/QuickTips/tkinter images.md new file mode 100644 index 0000000..f1f3f7f --- /dev/null +++ b/QuickTips/tkinter images.md @@ -0,0 +1,21 @@ +Tkinter Images +============== + +When using Tkinter alone, you can only embed .gif images in your interface. PIL provides a `PhotoImage` class that lets you embed other supported file types. + +Requires `pip install pillow` + + import PIL.Image + import PIL.ImageTk + import tkinter + + t = tkinter.Tk() + image = PIL.Image.open('filename.png') + image_tk = PIL.ImageTk.PhotoImage(image) + label = tkinter.Label(t, image=image_tk) + label.image_reference = image_tk + label.pack() + +You must store the `image_tk` somewhere, such as an attribute of the label it belongs to. Otherwise, it gets [prematurely garbage-collected](http://effbot.org/pyfaq/why-do-my-tkinter-images-not-appear.htm). + +![Screenshot](/../master/.GitImages/quicktips_imagetk.png?raw=true) \ No newline at end of file diff --git a/RateMeter/speedtest.py b/RateMeter/speedtest.py index 8a2d923..456a324 100644 --- a/RateMeter/speedtest.py +++ b/RateMeter/speedtest.py @@ -2,9 +2,13 @@ import bytestring import downloady import ratemeter import requests +import sys import time -URL = 'http://cdn.speedof.me/sample32768k.bin?r=0.881750426312' +if len(sys.argv) == 2: + URL = sys.argv[1] +else: + URL = 'http://cdn.speedof.me/sample32768k.bin?r=0.881750426312' METER = ratemeter.RateMeter(span=5) METER_2 = ratemeter.RateMeter(span=None) class G: diff --git a/ServerReference/favicon.ico b/ServerReference/favicon.ico new file mode 100644 index 0000000..e26e4cf Binary files /dev/null and b/ServerReference/favicon.ico differ diff --git a/ServerReference/files/Stats.wav b/ServerReference/files/Stats.wav new file mode 100644 index 0000000..eb33126 Binary files /dev/null and b/ServerReference/files/Stats.wav differ diff --git a/ServerReference/simpleserver.py b/ServerReference/simpleserver.py index 6a9d2af..dd008a8 100644 --- a/ServerReference/simpleserver.py +++ b/ServerReference/simpleserver.py @@ -1,15 +1,104 @@ import http.server +import mimetypes import os +import urllib.parse +import random +import sys + +sys.path.append('C:\\git\\else\\Bytestring') +import bytestring + +sys.path.append('C:\\git\\else\\Ratelimiter') +import ratelimiter + f = open('favicon.png', 'rb') FAVI = f.read() f.close() +CWD = os.getcwd() + +# The paths which the user may access +# Attempting to access anything outside will 403 +OKAY_PATHS = set(x.lower() for x in ['/files', '/favicon.ico']) +OPENDIR_TEMPLATE = ''' + + + + + +{entries} +
+ + + +''' + + +class Multipart: + def __init__(stream, boundary): + self.parts = [] + +class Path: + def __init__(self, path): + path = path.replace('\\', '/') + if len(path) == 0 or path[0] != '/': + path = '/' + path + self.path = path + + def __repr__(self): + return 'Path(%s)' % self.path + + def __str__(self): + return self.path + + def anchor(self, display_name=None): + if display_name is None: + display_name = self.basename + if self.is_dir: + # Folder emoji + icon = '\U0001F4C1' + else: + # Diamond emoji, because there's not one for files. + icon = '\U0001F48E' + return '{icon} {display}'.format(full=self.path, icon=icon, display=display_name) + + @property + def basename(self): + return os.path.basename(self.path) + + @property + def is_dir(self): + return os.path.isdir(self.os_path) + + @property + def os_path(self): + abspath = os.path.join(CWD, self.relative_path) + #print(abspath) + return abspath + + @property + def parent(self): + parts = self.path.split('/')[:-1] + parts = '/'.join(parts) + return Path(parts) + + @property + def relative_path(self): + return self.path.lstrip('/') + + @property + def size(self): + if self.is_dir: + return -1 + return os.path.getsize(self.os_path) + + def table_row(self, display_name=None, shaded=False): + form = '{anchor}{size}' + bg = 'ddd' if shaded else 'fff'; + size = bytestring.bytestring(self.size) if self.size != -1 else '' + row = form.format(bg=bg, anchor=self.anchor(display_name=display_name), size=size) + return row -# The paths of the root folder which the user may access -# Attempting to access any other files in the root folder -# will 403 -OKAY_BASE_PATHS = set(x.lower() for x in ['/', '/favicon.ico']) -FORBIDDEN_PATHS = set(x.lower() for x in ['/admin']) class RequestHandler(http.server.BaseHTTPRequestHandler): def write(self, string): @@ -19,29 +108,123 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): def read_filebytes(self, path): #print(path) - if os.path.isfile(path): - f = open(path, 'rb') + if os.path.isfile(path.relative_path): + f = open(path.relative_path, 'rb') fr = f.read() f.close() return fr - if os.path.isdir(path): - return self.read_filebytes(os.path.join(path, 'index.html')) + + if os.path.isdir(path.relative_path): + text = generate_opendir(path) + text = text.encode('utf-8') + return text + self.send_error(404) - return b'' + return bytes() def do_GET(self): #print(dir(self)) path = self.path.lower() - if os.path.dirname(path) in FORBIDDEN_PATHS: - self.send_error(403, 'Forbidden path!') - return - if path not in OKAY_BASE_PATHS and (os.path.dirname(path) == '/'): - self.send_error(403, 'Stop that!') + path = urllib.parse.unquote(path).rstrip('/') + + error = path_validation(path) + if error: + self.send_error(*error) return - path = os.path.join(os.getcwd(), path[1:]) + + path = Path(path) + + self.send_response(200) + mime = mimetypes.guess_type(path.path)[0] + if mime is not None: + #print(mime) + self.send_header('Content-type', mime) + self.end_headers() + d = self.read_filebytes(path) self.write(d) -server = http.server.HTTPServer(('', 80), RequestHandler) + # def do_POST(self): + # path = self.path.lower() + # path = urllib.parse.unquote(path).rstrip('/') + + # error = path_validation(path) + # if error: + # self.send_error(*error) + # return + + # path = Path(path) + # content_type = self.headers.get('Content-Type', '') + # if not any (req in content_type for req in ['multipart/form-data', 'boundary=']): + # self.send_error(400, 'Bad request') + # return + + # boundary = content_type.split('boundary=')[1] + # boundary = boundary.split(';')[0] + # boundary = boundary.strip() + # print('B:', self.headers.get_boundary()) + # print('F:', self.headers.get_filename()) + + # incoming_size = int(self.headers.get('Content-Length', 0)) + # received_bytes = 0 + # remaining_bytes = incoming_size + # while remaining_bytes > 0: + # chunk_size = min(remaining_bytes, 16*1024) + # chunk = self.rfile.read(chunk_size) + # remaining_bytes -= chunk_size + # received_bytes += chunk_size + # print(chunk) + # self.send_response(200) + # self.send_header('Content-Type', 'text/html') + # self.end_headers() + # print(dir(self.request)) + # self.write('Thanks') + +def generate_opendir(path): + print('Listdir:', path) + items = os.listdir(path.relative_path) + items = [os.path.join(path.relative_path, f) for f in items] + directories = [] + files = [] + for item in items: + #item = item.lstrip('/') + if os.path.isdir(item): + directories.append(item) + else: + files.append(item) + directories.sort(key=str.lower) + files.sort(key=str.lower) + files = directories + files + #print(files) + files = [Path(f) for f in files] + entries = [] + if not any(okay == path.path for okay in OKAY_PATHS): + # If the path actually equals a okay_path, then we shouldn't + # let them step up because that would be outisde the okay area. + entries.append(path.parent.table_row(display_name='up')) + shaded = True + for f in files: + entry = f.table_row(shaded=shaded) + entries.append(entry) + shaded = not shaded + entries = '\n'.join(entries) + text = OPENDIR_TEMPLATE.format(entries=entries) + return text + +def generate_random_filename(original_filename='', length=8): + import random + bits = length * 4 + bits = random.getrandbits(bits) + identifier = '{:x}'.format(bits).rjust(length, '0') + return identifier + +def path_validation(path): + if '..' in path: + return (403, 'I\'m not going to play games with you.') + if not any(path.startswith(okay) for okay in OKAY_PATHS): + self.send_error(403, 'Stop that!') + return + +server = http.server.HTTPServer(('', 32768), RequestHandler) print('server starting') -server.serve_forever() \ No newline at end of file +server.serve_forever() diff --git a/SinWave/sinwave.py b/SinWave/sinwave.py index 94b0ee1..3453ba5 100644 --- a/SinWave/sinwave.py +++ b/SinWave/sinwave.py @@ -252,19 +252,23 @@ def regrid_frames(): def unregister_line(line): variables['lines'].remove(line) -t = tkinter.Tk() +def main(): + t = tkinter.Tk() -frame_add = tkinter.Frame(t) -entry_add = tkinter.Entry(frame_add) -entry_add.grid(row=0, column=0) -tkinter.Button(frame_add, text='+', command=create_line_frame).grid(row=0, column=1) -frame_add.grid(row=0, column=0) + frame_add = tkinter.Frame(t) + entry_add = tkinter.Entry(frame_add) + entry_add.grid(row=0, column=0) + tkinter.Button(frame_add, text='+', command=create_line_frame).grid(row=0, column=1) + frame_add.grid(row=0, column=0) -frame_delay = tkinter.Frame(t) -tkinter.Label(frame_delay, text='Speed:') -thread = threading.Thread(target=print_loop) -thread.daemon=True -thread.start() + frame_delay = tkinter.Frame(t) + tkinter.Label(frame_delay, text='Speed:') + thread = threading.Thread(target=print_loop) + thread.daemon=True + thread.start() -create_line_frame([0]) -t.mainloop() \ No newline at end of file + create_line_frame([0]) + t.mainloop() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/SpinalTap/spinal.py b/SpinalTap/spinal.py index 9608e7e..b97994b 100644 --- a/SpinalTap/spinal.py +++ b/SpinalTap/spinal.py @@ -65,6 +65,10 @@ class FilePath: def __repr__(self): return 'FilePath(%s)' % repr(self.path) + @property + def basename(self): + return os.path.basename(self.path) + @property def isdir(self): return self.type_getter('_isdir', stat.S_ISDIR) @@ -602,7 +606,7 @@ def str_to_fp(path): return path def walk_generator( - path, + path='.', callback_exclusion=None, callback_verbose=None, exclude_directories=None, diff --git a/TotalDL/totaldl.py b/TotalDL/totaldl.py index 6f0b1fb..5d64262 100644 --- a/TotalDL/totaldl.py +++ b/TotalDL/totaldl.py @@ -279,6 +279,7 @@ def handle_vidble(url, customname=None): name = image.split('/')[-1] localname = '{folder}\\{index}_{name}'.format(folder=folder, index=index, name=name) image = 'https://vidble.com' + image + image = image.replace('_med', '') download_file(image, localname) else: localname = url.split('/')[-1]