etiquette/phototagger.py

1327 lines
44 KiB
Python
Raw Normal View History

2016-12-16 23:45:46 +00:00
import bcrypt
2016-09-18 08:33:46 +00:00
import collections
import copy
import json
2016-09-18 08:33:46 +00:00
import logging
import os
import random
import sqlite3
import string
import time
import warnings
2016-11-06 04:24:43 +00:00
import constants
import decorators
import exceptions
2016-11-06 04:24:43 +00:00
import helpers
import objects
2016-11-06 04:24:43 +00:00
# pip install
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
from voussoirkit import spinal
2016-09-18 08:33:46 +00:00
logging.basicConfig(level=logging.DEBUG)
2016-11-27 09:06:11 +00:00
logging.getLogger('PIL.PngImagePlugin').setLevel(logging.WARNING)
2016-09-18 08:33:46 +00:00
2016-11-29 04:18:44 +00:00
# Note: Setting user_version pragma in init sequence is safe because it only
# happens after the out-of-date check occurs, so no chance of accidentally
# overwriting it.
2016-12-20 22:54:23 +00:00
DATABASE_VERSION = 4
2016-09-18 08:33:46 +00:00
DB_INIT = '''
PRAGMA count_changes = OFF;
PRAGMA cache_size = 10000;
2016-11-06 04:24:43 +00:00
PRAGMA user_version = {user_version};
2016-09-18 08:33:46 +00:00
CREATE TABLE IF NOT EXISTS albums(
id TEXT,
title TEXT,
2016-10-30 01:46:23 +00:00
description TEXT,
associated_directory TEXT COLLATE NOCASE
2016-09-18 08:33:46 +00:00
);
CREATE TABLE IF NOT EXISTS photos(
id TEXT,
2016-10-30 01:46:23 +00:00
filepath TEXT COLLATE NOCASE,
2016-10-30 21:42:09 +00:00
override_filename TEXT COLLATE NOCASE,
2016-09-18 08:33:46 +00:00
extension TEXT,
width INT,
height INT,
ratio REAL,
area INT,
2016-10-10 03:50:13 +00:00
duration INT,
2016-09-18 08:33:46 +00:00
bytes INT,
created INT,
2016-11-29 04:18:44 +00:00
thumbnail TEXT,
2016-12-20 22:54:23 +00:00
tagged_at INT,
author_id TEXT
2016-09-18 08:33:46 +00:00
);
CREATE TABLE IF NOT EXISTS tags(
id TEXT,
name TEXT
);
CREATE TABLE IF NOT EXISTS album_photo_rel(
albumid TEXT,
photoid TEXT
);
CREATE TABLE IF NOT EXISTS photo_tag_rel(
photoid TEXT,
tagid TEXT
);
CREATE TABLE IF NOT EXISTS tag_group_rel(
parentid TEXT,
memberid TEXT
);
CREATE TABLE IF NOT EXISTS tag_synonyms(
name TEXT,
mastername TEXT
);
CREATE TABLE IF NOT EXISTS id_numbers(
tab TEXT,
last_id TEXT
);
2016-12-16 23:45:46 +00:00
CREATE TABLE IF NOT EXISTS users(
id TEXT,
username TEXT COLLATE NOCASE,
password BLOB,
created INT
);
2016-09-18 08:33:46 +00:00
-- Album
CREATE INDEX IF NOT EXISTS index_album_id on albums(id);
CREATE INDEX IF NOT EXISTS index_albumrel_albumid on album_photo_rel(albumid);
CREATE INDEX IF NOT EXISTS index_albumrel_photoid on album_photo_rel(photoid);
-- Photo
CREATE INDEX IF NOT EXISTS index_photo_id on photos(id);
2016-10-30 01:46:23 +00:00
CREATE INDEX IF NOT EXISTS index_photo_path on photos(filepath COLLATE NOCASE);
2016-10-30 21:42:09 +00:00
CREATE INDEX IF NOT EXISTS index_photo_fakepath on photos(override_filename COLLATE NOCASE);
2016-09-18 08:33:46 +00:00
CREATE INDEX IF NOT EXISTS index_photo_created on photos(created);
2016-10-18 05:13:12 +00:00
CREATE INDEX IF NOT EXISTS index_photo_extension on photos(extension);
2016-12-20 22:54:23 +00:00
CREATE INDEX IF NOT EXISTS index_photo_author on photos(author_id);
2016-09-18 08:33:46 +00:00
-- Tag
CREATE INDEX IF NOT EXISTS index_tag_id on tags(id);
CREATE INDEX IF NOT EXISTS index_tag_name on tags(name);
-- Photo-tag relation
CREATE INDEX IF NOT EXISTS index_tagrel_photoid on photo_tag_rel(photoid);
CREATE INDEX IF NOT EXISTS index_tagrel_tagid on photo_tag_rel(tagid);
-- Tag-synonym relation
CREATE INDEX IF NOT EXISTS index_tagsyn_name on tag_synonyms(name);
-- Tag-group relation
CREATE INDEX IF NOT EXISTS index_grouprel_parentid on tag_group_rel(parentid);
CREATE INDEX IF NOT EXISTS index_grouprel_memberid on tag_group_rel(memberid);
2016-12-16 23:45:46 +00:00
-- User
CREATE INDEX IF NOT EXISTS index_user_id on users(id);
CREATE INDEX IF NOT EXISTS index_user_username on users(username COLLATE NOCASE);
2016-11-06 04:24:43 +00:00
'''.format(user_version=DATABASE_VERSION)
2016-09-18 08:33:46 +00:00
2016-10-30 01:46:23 +00:00
def _helper_filenamefilter(subject, terms):
basename = subject.lower()
return all(term in basename for term in terms)
def binding_filler(column_names, values, require_all=True):
'''
Manually aligning question marks and bindings is annoying.
Given the table's column names and a dictionary of {column: value},
return the question marks and the list of bindings in the right order.
'''
values = values.copy()
for column in column_names:
if column in values:
continue
if require_all:
raise ValueError('Missing column "%s"' % column)
else:
values.setdefault(column, None)
qmarks = '?' * len(column_names)
qmarks = ', '.join(qmarks)
bindings = [values[column] for column in column_names]
return (qmarks, bindings)
2016-09-18 08:33:46 +00:00
def operate(operand_stack, operator_stack):
#print('before:', operand_stack, operator_stack)
operator = operator_stack.pop()
if operator == 'NOT':
operand = operand_stack.pop()
value = operand ^ 1
else:
right = operand_stack.pop()
left = operand_stack.pop()
if operator == 'OR':
value = left | right
elif operator == 'AND':
value = left & right
else:
raise ValueError('werwer')
operand_stack.append(value)
#print('after:', operand_stack, operator_stack)
def raise_no_such_thing(exception_class, thing_id=None, thing_name=None, comment=''):
if thing_id is not None:
message = 'ID: %s. %s' % (thing_id, comment)
elif thing_name is not None:
message = 'Name: %s. %s' % (thing_name, comment)
else:
message = ''
raise exception_class(message)
def searchfilter_expression(photo_tags, expression, frozen_children, token_normalizer, warn_bad_tags):
2016-09-18 08:33:46 +00:00
photo_tags = set(tag.name for tag in photo_tags)
operator_stack = collections.deque()
operand_stack = collections.deque()
expression = expression.replace('-', ' ')
expression = expression.strip()
if not expression:
return False
expression = expression.replace('(', ' ( ')
expression = expression.replace(')', ' ) ')
while ' ' in expression:
expression = expression.replace(' ', ' ')
tokens = [token for token in expression.split(' ') if token]
has_operand = False
can_shortcircuit = False
for token in tokens:
#print(token, end=' ', flush=True)
if can_shortcircuit and token != ')':
continue
2016-11-06 04:24:43 +00:00
if token not in constants.EXPRESSION_OPERATORS:
2016-09-18 08:33:46 +00:00
try:
token = token_normalizer(token)
2016-09-18 08:33:46 +00:00
value = any(option in photo_tags for option in frozen_children[token])
except KeyError:
if warn_bad_tags:
warnings.warn(constants.WARNING_NO_SUCH_TAG.format(tag=token))
2016-09-18 08:33:46 +00:00
else:
raise exceptions.NoSuchTag(token)
2016-09-18 08:33:46 +00:00
return False
operand_stack.append(value)
if has_operand:
operate(operand_stack, operator_stack)
has_operand = True
continue
if token == '(':
has_operand = False
if token == ')':
if not can_shortcircuit:
while operator_stack[-1] != '(':
operate(operand_stack, operator_stack)
operator_stack.pop()
has_operand = True
continue
2016-11-06 04:24:43 +00:00
can_shortcircuit = (
has_operand and
(
(operand_stack[-1] == 0 and token == 'AND') or
(operand_stack[-1] == 1 and token == 'OR')
)
)
if can_shortcircuit:
2016-09-18 08:33:46 +00:00
if operator_stack and operator_stack[-1] == '(':
operator_stack.pop()
continue
operator_stack.append(token)
#time.sleep(.3)
#print()
while len(operand_stack) > 1 or len(operator_stack) > 0:
operate(operand_stack, operator_stack)
#print(operand_stack)
return operand_stack.pop()
def searchfilter_must_may_forbid(photo_tags, tag_musts, tag_mays, tag_forbids, frozen_children):
if tag_musts and not all(any(option in photo_tags for option in frozen_children[must]) for must in tag_musts):
#print('Failed musts')
return False
if tag_mays and not any(option in photo_tags for may in tag_mays for option in frozen_children[may]):
#print('Failed mays')
return False
if tag_forbids and any(option in photo_tags for forbid in tag_forbids for option in frozen_children[forbid]):
#print('Failed forbids')
return False
return True
def tag_export_easybake(tags, depth=0):
lines = []
for tag in tags:
if not hasattr(tag, 'string'):
tag.string = tag.name
children = tag.children()
synonyms = tag.synonyms()
lines.append(tag.string)
for synonym in synonyms:
synonym = tag.string + '+' + synonym
lines.append(synonym)
for child in children:
child.string = tag.string + '.' + child.name
child_bake = tag_export_easybake(children, depth=depth+1)
if child_bake != '':
lines.append(child_bake)
lines = '\n'.join(lines)
return lines
def tag_export_json(tags):
def fill(tag):
children = {child.name:fill(child) for child in tag.children()}
return children
result = {}
for tag in tags:
result[tag.name] = fill(tag)
return result
def tag_export_qualname_map(tags):
lines = tag_export_easybake(tags)
lines = lines.split('\n')
lines = [line for line in lines if line]
qualname_map = {}
for line in lines:
key = line.split('.')[-1].split('+')[-1]
value = line.split('+')[0]
qualname_map[key] = value
return qualname_map
def tag_export_stdout(tags, depth=0):
for tag in tags:
children = tag.children()
synonyms = tag.synonyms()
pad = ' ' * depth
synpad = ' ' * (depth + 1)
print(pad + str(tag))
for synonym in synonyms:
print(synpad + synonym)
tag_export_stdout(children, depth=depth+1)
if tag.parent() is None:
print()
2016-11-06 04:24:43 +00:00
@decorators.time_me
2016-09-18 08:33:46 +00:00
def tag_export_totally_flat(tags):
result = {}
for tag in tags:
for child in tag.walk_children():
children = list(child.walk_children())
result[child] = children
for synonym in child.synonyms():
result[synonym] = children
return result
####################################################################################################
####################################################################################################
class PDBAlbumMixin:
def get_album(self, id):
return self.get_thing_by_id('album', id)
2016-10-30 01:46:23 +00:00
def get_album_by_path(self, filepath):
'''
Return the album with the `associated_directory` of this value, NOT case-sensitive.
'''
filepath = os.path.abspath(filepath)
self.cur.execute('SELECT * FROM albums WHERE associated_directory == ?', [filepath])
fetch = self.cur.fetchone()
if fetch is None:
raise exceptions.NoSuchAlbum(filepath)
return self.get_album(fetch[constants.SQL_ALBUM['id']])
2016-10-30 01:46:23 +00:00
2016-09-18 08:33:46 +00:00
def get_albums(self):
yield from self.get_things(thing_type='album')
def new_album(
self,
2016-12-14 23:10:24 +00:00
title=None,
description=None,
*,
2016-10-30 01:46:23 +00:00
associated_directory=None,
commit=True,
photos=None
2016-10-30 01:46:23 +00:00
):
2016-09-18 08:33:46 +00:00
'''
Create a new album. Photos can be added now or later.
'''
# Albums share the tag table's ID counter
albumid = self.generate_id('tags')
title = title or ''
description = description or ''
2016-10-30 01:46:23 +00:00
if associated_directory is not None:
associated_directory = os.path.abspath(associated_directory)
2016-09-18 08:33:46 +00:00
if not isinstance(title, str):
raise TypeError('Title must be string, not %s' % type(title))
if not isinstance(description, str):
raise TypeError('Description must be string, not %s' % type(description))
2016-12-17 01:59:43 +00:00
data = {
'id': albumid,
'title': title,
'description': description,
'associated_directory': associated_directory,
}
(qmarks, bindings) = binding_filler(constants.SQL_ALBUM_COLUMNS, data)
query = 'INSERT INTO albums VALUES(%s)' % qmarks
self.cur.execute(query, bindings)
2016-09-18 08:33:46 +00:00
album = objects.Album(self, data)
2016-09-18 08:33:46 +00:00
if photos:
for photo in photos:
photo = self.get_photo(photo)
album.add_photo(photo, commit=False)
if commit:
self.log.debug('Committing - new Album')
2016-10-18 05:13:12 +00:00
self.commit()
2016-09-18 08:33:46 +00:00
return album
class PDBPhotoMixin:
def get_photo(self, photoid):
return self.get_thing_by_id('photo', photoid)
def get_photo_by_path(self, filepath):
filepath = os.path.abspath(filepath)
self.cur.execute('SELECT * FROM photos WHERE filepath == ?', [filepath])
fetch = self.cur.fetchone()
if fetch is None:
raise_no_such_thing(exceptions.NoSuchPhoto, thing_name=filepath)
photo = objects.Photo(self, fetch)
2016-09-18 08:33:46 +00:00
return photo
def get_photos_by_recent(self, count=None):
'''
Yield photo objects in order of creation time.
'''
if count is not None and count <= 0:
return
# We're going to use a second cursor because the first one may
# get used for something else, deactivating this query.
temp_cur = self.sql.cursor()
temp_cur.execute('SELECT * FROM photos ORDER BY created DESC')
while True:
fetch = temp_cur.fetchone()
if fetch is None:
2016-09-18 08:33:46 +00:00
break
photo = objects.Photo(self, fetch)
2016-09-18 08:33:46 +00:00
yield photo
if count is None:
continue
count -= 1
if count <= 0:
break
def new_photo(
self,
filename,
2016-12-14 23:10:24 +00:00
*,
2016-09-18 08:33:46 +00:00
allow_duplicates=False,
2016-12-20 22:54:23 +00:00
author=None,
2016-09-18 08:33:46 +00:00
commit=True,
do_metadata=True,
do_thumbnail=True,
tags=None
2016-09-18 08:33:46 +00:00
):
'''
Given a filepath, determine its attributes and create a new Photo object in the
database. Tags may be applied now or later.
If `allow_duplicates` is False, we will first check the database for any files
with the same path and raise exceptions.PhotoExists if found.
2016-09-18 08:33:46 +00:00
Returns the Photo object.
'''
filename = os.path.abspath(filename)
2016-11-07 02:00:30 +00:00
assert os.path.isfile(filename)
2016-09-18 08:33:46 +00:00
if not allow_duplicates:
try:
existing = self.get_photo_by_path(filename)
except exceptions.NoSuchPhoto:
2016-09-18 08:33:46 +00:00
pass
else:
exc = exceptions.PhotoExists(filename, existing)
2016-09-18 08:33:46 +00:00
exc.photo = existing
raise exc
2016-12-20 22:54:23 +00:00
if isinstance(author, objects.User):
if author.photodb != self:
raise ValueError('That user does not belong to this photodb')
author_id = author.id
elif author is not None:
# Just to confirm
author_id = self.get_user(id=author).id
2016-09-18 08:33:46 +00:00
extension = os.path.splitext(filename)[1]
extension = extension.replace('.', '')
extension = self.normalize_tagname(extension)
created = int(helpers.now())
2016-09-18 08:33:46 +00:00
photoid = self.generate_id('photos')
2016-12-17 01:59:43 +00:00
data = {
'id': photoid,
'filepath': filename,
'override_filename': None,
'extension': extension,
'created': created,
'tagged_at': None,
2016-12-20 22:54:23 +00:00
'author_id': author_id,
2016-12-17 01:59:43 +00:00
# These will be filled in during the metadata stage.
'bytes': None,
'width': None,
'height': None,
'area': None,
'ratio': None,
'duration': None,
'thumbnail': None,
}
(qmarks, bindings) = binding_filler(constants.SQL_PHOTO_COLUMNS, data)
query = 'INSERT INTO photos VALUES(%s)' % qmarks
self.cur.execute(query, bindings)
photo = objects.Photo(self, data)
2016-09-18 08:33:46 +00:00
if do_metadata:
photo.reload_metadata(commit=False)
if do_thumbnail:
photo.generate_thumbnail(commit=False)
tags = tags or []
tags = [self.get_tag(tag) for tag in tags]
for tag in tags:
photo.add_tag(tag, commit=False)
if commit:
self.log.debug('Commiting - new_photo')
self.commit()
2016-09-18 08:33:46 +00:00
return photo
2016-12-17 01:59:43 +00:00
def purge_deleted_files(self):
'''
Remove Photo entries if their corresponding file is no longer found.
'''
photos = self.get_photos_by_recent()
for photo in photos:
if os.path.exists(photo.real_filepath):
continue
photo.delete()
def purge_empty_albums(self):
albums = self.get_albums()
for album in albums:
if album.children() or album.photos():
continue
album.delete()
2016-09-18 08:33:46 +00:00
def search(
self,
2016-12-14 23:10:24 +00:00
*,
2016-09-18 08:33:46 +00:00
area=None,
width=None,
height=None,
ratio=None,
bytes=None,
2016-10-10 03:50:13 +00:00
duration=None,
2016-09-18 08:33:46 +00:00
created=None,
extension=None,
extension_not=None,
2016-10-30 01:46:23 +00:00
filename=None,
2016-09-18 08:33:46 +00:00
has_tags=None,
mimetype=None,
tag_musts=None,
tag_mays=None,
tag_forbids=None,
tag_expression=None,
warn_bad_tags=False,
limit=None,
offset=None,
orderby=None
2016-09-18 08:33:46 +00:00
):
'''
PHOTO PROPERTISE
2016-10-10 03:50:13 +00:00
area, width, height, ratio, bytes, duration:
2016-09-18 08:33:46 +00:00
A hyphen_range string representing min and max. Or just a number for lower bound.
TAGS AND FILTERS
created:
2016-10-10 03:50:13 +00:00
A hyphen_range string respresenting min and max. Or just a number for lower bound.
2016-09-18 08:33:46 +00:00
extension:
A string or list of strings of acceptable file extensions.
extension_not:
A string or list of strings of unacceptable file extensions.
2016-10-30 01:46:23 +00:00
filename:
A string or list of strings which will be split into words. The file's basename
must include every word, NOT case-sensitive.
2016-09-18 08:33:46 +00:00
has_tags:
If True, require that the Photo has >=1 tag.
If False, require that the Photo has no tags.
If None, not considered.
mimetype:
A string or list of strings of acceptable mimetypes. 'image', 'video', ...
tag_musts:
A list of tag names or Tag objects.
Photos MUST have ALL tags in this list.
tag_mays:
A list of tag names or Tag objects.
Photos MUST have AT LEAST ONE tag in this list.
tag_forbids:
A list of tag names or Tag objects.
Photos MUST NOT have ANY tag in the list.
tag_expression:
A string like 'family AND (animals OR vacation)' to filter by.
Can NOT be used with the must, may, forbid style search.
QUERY OPTIONS
warn_bad_tags:
If a tag is not found, issue a warning but continue the search.
Otherwise, a exceptions.NoSuchTag exception would be raised.
2016-09-18 08:33:46 +00:00
limit:
2016-10-10 03:50:13 +00:00
The maximum number of *successful* results to yield.
2016-09-18 08:33:46 +00:00
offset:
How many *successful* results to skip before we start yielding.
orderby:
A list of strings like ['ratio DESC', 'created ASC'] to sort
and subsort the results.
Descending is assumed if not provided.
'''
start_time = time.time()
maximums = {}
minimums = {}
helpers._minmax('area', area, minimums, maximums)
helpers._minmax('created', created, minimums, maximums)
helpers._minmax('width', width, minimums, maximums)
helpers._minmax('height', height, minimums, maximums)
helpers._minmax('ratio', ratio, minimums, maximums)
helpers._minmax('bytes', bytes, minimums, maximums)
helpers._minmax('duration', duration, minimums, maximums)
2016-09-18 08:33:46 +00:00
orderby = orderby or []
extension = helpers._normalize_extensions(extension)
extension_not = helpers._normalize_extensions(extension_not)
mimetype = helpers._normalize_extensions(mimetype)
2016-09-18 08:33:46 +00:00
2016-10-30 01:46:23 +00:00
if filename is not None:
if not isinstance(filename, str):
filename = ' '.join(filename)
filename = set(term.lower() for term in filename.strip().split(' '))
2016-09-18 08:33:46 +00:00
if (tag_musts or tag_mays or tag_forbids) and tag_expression:
raise exceptions.NotExclusive('Expression filter cannot be used with musts, mays, forbids')
2016-09-18 08:33:46 +00:00
tag_musts = helpers._setify_tags(photodb=self, tags=tag_musts, warn_bad_tags=warn_bad_tags)
tag_mays = helpers._setify_tags(photodb=self, tags=tag_mays, warn_bad_tags=warn_bad_tags)
tag_forbids = helpers._setify_tags(photodb=self, tags=tag_forbids, warn_bad_tags=warn_bad_tags)
2016-09-18 08:33:46 +00:00
query = 'SELECT * FROM photos'
orderby = [helpers._orderby(o) for o in orderby]
2016-09-18 08:33:46 +00:00
orderby = [o for o in orderby if o]
if orderby:
whereable_columns = [o[0] for o in orderby if o[0] != 'RANDOM()']
whereable_columns = [column + ' IS NOT NULL' for column in whereable_columns]
if whereable_columns:
query += ' WHERE '
query += ' AND '.join(whereable_columns)
orderby = [' '.join(o) for o in orderby]
orderby = ', '.join(orderby)
query += ' ORDER BY %s' % orderby
else:
query += ' ORDER BY created DESC'
print(query)
generator = helpers.select_generator(self.sql, query)
2016-09-18 08:33:46 +00:00
# To lighten the amount of database reading here, `frozen_children` is a dict where
# EVERY tag in the db is a key, and the value is a list of ALL ITS NESTED CHILDREN.
# This representation is memory inefficient, but it is faster than repeated
# database lookups
2016-10-10 03:50:13 +00:00
is_must_may_forbid = bool(tag_musts or tag_mays or tag_forbids)
is_tagsearch = is_must_may_forbid or tag_expression
if is_tagsearch:
if self._cached_frozen_children:
frozen_children = self._cached_frozen_children
else:
frozen_children = self.export_tags(tag_export_totally_flat)
self._cached_frozen_children = frozen_children
2016-09-18 08:33:46 +00:00
photos_received = 0
for fetch in generator:
photo = objects.Photo(self, fetch)
2016-09-18 08:33:46 +00:00
if extension and photo.extension not in extension:
#print('Failed extension')
continue
if extension_not and photo.extension in extension_not:
#print('Failed extension_not')
continue
if mimetype and photo.mimetype() not in mimetype:
2016-10-30 01:46:23 +00:00
#print('Failed mimetype')
continue
if filename and not _helper_filenamefilter(subject=photo.basename, terms=filename):
#print('Failed filename')
2016-09-18 08:33:46 +00:00
continue
2016-12-20 22:23:05 +00:00
if any(
not fetch[constants.SQL_PHOTO[key]] or
fetch[constants.SQL_PHOTO[key]] > value for (key, value) in maximums.items()
):
2016-09-18 08:33:46 +00:00
#print('Failed maximums')
continue
2016-12-20 22:23:05 +00:00
if any(
not fetch[constants.SQL_PHOTO[key]] or
fetch[constants.SQL_PHOTO[key]] < value for (key, value) in minimums.items()
):
2016-09-18 08:33:46 +00:00
#print('Failed minimums')
continue
2016-10-10 03:50:13 +00:00
if (has_tags is not None) or is_tagsearch:
2016-09-18 08:33:46 +00:00
photo_tags = photo.tags()
if has_tags is False and len(photo_tags) > 0:
continue
if has_tags is True and len(photo_tags) == 0:
continue
photo_tags = set(photo_tags)
if tag_expression:
success = searchfilter_expression(
photo_tags=photo_tags,
expression=tag_expression,
frozen_children=frozen_children,
token_normalizer=self.normalize_tagname,
warn_bad_tags=warn_bad_tags,
)
if not success:
2016-09-18 08:33:46 +00:00
continue
2016-10-10 03:50:13 +00:00
elif is_must_may_forbid:
success = searchfilter_must_may_forbid(
photo_tags=photo_tags,
tag_musts=tag_musts,
tag_mays=tag_mays,
tag_forbids=tag_forbids,
frozen_children=frozen_children,
)
if not success:
2016-09-18 08:33:46 +00:00
continue
if offset is not None and offset > 0:
offset -= 1
continue
if limit is not None and photos_received >= limit:
break
2016-10-10 03:50:13 +00:00
2016-09-18 08:33:46 +00:00
photos_received += 1
yield photo
end_time = time.time()
print(end_time - start_time)
class PDBTagMixin:
def export_tags(self, exporter=tag_export_stdout, specific_tag=None):
'''
Send the top-level tags to function `exporter`.
Strings 'start' and 'stop' are sent before and after the tags are sent.
Recursion is to be handled by the exporter.
'''
if specific_tag is None:
items = list(self.get_tags())
items = [item for item in items if item.parent() is None]
items.sort(key=lambda x: x.name)
else:
items = [self.get_tag(specific_tag)]
return exporter(items)
def get_tag(self, name=None, id=None):
'''
Redirect to get_tag_by_id or get_tag_by_name after xor-checking the parameters.
'''
2016-11-06 04:24:43 +00:00
if not helpers.is_xor(id, name):
2016-12-16 23:45:46 +00:00
raise exceptions.NotExclusive('One and only one of `id`, `name` must be passed.')
2016-09-18 08:33:46 +00:00
if id is not None:
return self.get_tag_by_id(id)
elif name is not None:
return self.get_tag_by_name(name)
else:
raise_no_such_thing(exceptions.NoSuchTag, thing_id=id, thing_name=name)
2016-09-18 08:33:46 +00:00
def get_tag_by_id(self, id):
return self.get_thing_by_id('tag', thing_id=id)
def get_tag_by_name(self, tagname):
if isinstance(tagname, objects.Tag):
2016-09-18 08:33:46 +00:00
tagname = tagname.name
tagname = tagname.split('.')[-1].split('+')[0]
tagname = self.normalize_tagname(tagname)
2016-09-18 08:33:46 +00:00
while True:
# Return if it's a toplevel, or resolve the synonym and try that.
self.cur.execute('SELECT * FROM tags WHERE name == ?', [tagname])
fetch = self.cur.fetchone()
if fetch is not None:
return objects.Tag(self, fetch)
2016-09-18 08:33:46 +00:00
self.cur.execute('SELECT * FROM tag_synonyms WHERE name == ?', [tagname])
fetch = self.cur.fetchone()
if fetch is None:
# was not a top tag or synonym
raise_no_such_thing(exceptions.NoSuchTag, thing_name=tagname)
tagname = fetch[constants.SQL_SYN['master']]
2016-09-18 08:33:46 +00:00
def get_tags(self):
yield from self.get_things(thing_type='tag')
2016-12-14 23:10:24 +00:00
def new_tag(self, tagname, *, commit=True):
2016-09-18 08:33:46 +00:00
'''
2016-12-14 23:10:24 +00:00
Register a new tag and return the Tag object.
2016-09-18 08:33:46 +00:00
'''
tagname = self.normalize_tagname(tagname)
2016-09-18 08:33:46 +00:00
try:
self.get_tag_by_name(tagname)
except exceptions.NoSuchTag:
2016-09-18 08:33:46 +00:00
pass
else:
raise exceptions.TagExists(tagname)
2016-10-10 03:50:13 +00:00
2016-09-18 08:33:46 +00:00
tagid = self.generate_id('tags')
2016-10-10 03:50:13 +00:00
self._cached_frozen_children = None
2016-09-18 08:33:46 +00:00
self.cur.execute('INSERT INTO tags VALUES(?, ?)', [tagid, tagname])
if commit:
self.log.debug('Commiting - new_tag')
2016-10-18 05:13:12 +00:00
self.commit()
tag = objects.Tag(self, [tagid, tagname])
2016-09-18 08:33:46 +00:00
return tag
def normalize_tagname(self, tagname):
'''
Tag names can only consist of characters defined in the config.
The given tagname is lowercased, gets its spaces and hyphens
replaced by underscores, and is stripped of any not-whitelisted
characters.
'''
tagname = tagname.lower()
tagname = tagname.replace('-', '_')
tagname = tagname.replace(' ', '_')
tagname = (c for c in tagname if c in self.config['valid_tag_chars'])
tagname = ''.join(tagname)
if len(tagname) < self.config['min_tag_name_length']:
raise exceptions.TagTooShort(tagname)
if len(tagname) > self.config['max_tag_name_length']:
raise exceptions.TagTooLong(tagname)
return tagname
2016-09-18 08:33:46 +00:00
2016-12-16 23:45:46 +00:00
class PDBUserMixin:
def generate_user_id(self):
'''
User IDs are randomized instead of integers like the other objects,
so they get their own method.
'''
possible = string.digits + string.ascii_uppercase
for retry in range(20):
user_id = [random.choice(possible) for x in range(self.config['id_length'])]
2016-12-16 23:45:46 +00:00
user_id = ''.join(user_id)
self.cur.execute('SELECT * FROM users WHERE id == ?', [user_id])
if self.cur.fetchone() is None:
break
else:
raise Exception('Failed to create user id after 20 tries.')
return user_id
def get_user(self, username=None, id=None):
if not helpers.is_xor(id, username):
raise exceptions.NotExclusive('One and only one of `id`, `username` must be passed.')
if username is not None:
self.cur.execute('SELECT * FROM users WHERE username == ?', [username])
else:
self.cur.execute('SELECT * FROM users WHERE id == ?', [id])
fetch = self.cur.fetchone()
if fetch is not None:
return objects.User(self, fetch)
2016-12-16 23:45:46 +00:00
else:
raise exceptions.NoSuchUser(username)
def login(self, user_id, password):
self.cur.execute('SELECT * FROM users WHERE id == ?', [user_id])
fetch = self.cur.fetchone()
if fetch is None:
raise exceptions.WrongLogin()
stored_password = fetch[constants.SQL_USER['password']]
2016-12-16 23:45:46 +00:00
if not isinstance(password, bytes):
password = password.encode('utf-8')
success = bcrypt.checkpw(password, stored_password)
if not success:
raise exceptions.WrongLogin()
return objects.User(self, fetch)
2016-12-16 23:45:46 +00:00
def register_user(self, username, password, commit=True):
if len(username) < self.config['min_username_length']:
2016-12-16 23:45:46 +00:00
raise exceptions.UsernameTooShort(username)
if len(username) > self.config['max_username_length']:
2016-12-16 23:45:46 +00:00
raise exceptions.UsernameTooLong(username)
badchars = [c for c in username if c not in self.config['valid_username_chars']]
2016-12-16 23:45:46 +00:00
if badchars:
raise exceptions.InvalidUsernameChars(badchars)
if not isinstance(password, bytes):
password = password.encode('utf-8')
if len(password) < self.config['min_password_length']:
2016-12-16 23:45:46 +00:00
raise exceptions.PasswordTooShort
self.cur.execute('SELECT * FROM users WHERE username == ?', [username])
if self.cur.fetchone() is not None:
raise exceptions.UserExists(username)
user_id = self.generate_user_id()
hashed_password = bcrypt.hashpw(password, bcrypt.gensalt())
created = int(helpers.now())
2016-12-16 23:45:46 +00:00
2016-12-17 01:59:43 +00:00
data = {
'id': user_id,
'username': username,
'password': hashed_password,
'created': created,
}
2016-12-16 23:45:46 +00:00
(qmarks, bindings) = binding_filler(constants.SQL_USER_COLUMNS, data)
2016-12-16 23:45:46 +00:00
query = 'INSERT INTO users VALUES(%s)' % qmarks
self.cur.execute(query, bindings)
if commit:
self.log.debug('Committing - register user')
2016-12-16 23:45:46 +00:00
self.commit()
return objects.User(self, data)
2016-12-16 23:45:46 +00:00
class PhotoDB(PDBAlbumMixin, PDBPhotoMixin, PDBTagMixin, PDBUserMixin):
2016-09-18 08:33:46 +00:00
'''
This class represents an SQLite3 database containing the following tables:
albums:
Rows represent the inclusion of a photo in an album
2016-10-10 03:50:13 +00:00
photos:
2016-09-18 08:33:46 +00:00
Rows represent image files on the local disk.
Entries contain a unique ID, the image's filepath, and metadata
like dimensions and filesize.
tags:
Rows represent labels, which can be applied to an arbitrary number of
photos. Photos may be selected by which tags they contain.
Entries contain a unique ID and a name.
photo_tag_rel:
Rows represent a Photo's ownership of a particular Tag.
tag_synonyms:
Rows represent relationships between two tag names, so that they both
resolve to the same Tag object when selected. Entries contain the
subordinate name and master name.
The master name MUST also exist in the `tags` table.
If a new synonym is created referring to another synoym, the master name
will be resolved and used instead, so a synonym never points to another synonym.
Tag objects will ALWAYS represent the master tag.
Note that the entries in this table do not contain ID numbers.
The rationale here is that "coco" is a synonym for "chocolate" regardless
of the "chocolate" tag's ID, and that if a tag is renamed, its synonyms
do not necessarily follow.
The `rename` method of Tag objects includes a parameter
`apply_to_synonyms` if you do want them to follow.
'''
2016-11-06 04:24:43 +00:00
def __init__(
self,
data_directory=None,
2016-11-06 04:24:43 +00:00
):
if data_directory is None:
data_directory = constants.DEFAULT_DATADIR
# DATA DIR PREP
data_directory = helpers.normalize_filepath(data_directory, allowed='/\\')
self.data_directory = os.path.abspath(data_directory)
os.makedirs(self.data_directory, exist_ok=True)
# DATABASE
self.database_abspath = os.path.join(self.data_directory, 'phototagger.db')
existing_database = os.path.exists(self.database_abspath)
self.sql = sqlite3.connect(self.database_abspath)
2016-09-18 08:33:46 +00:00
self.cur = self.sql.cursor()
2016-11-06 04:24:43 +00:00
if existing_database:
self.cur.execute('PRAGMA user_version')
existing_version = self.cur.fetchone()[0]
if existing_version != DATABASE_VERSION:
message = constants.ERROR_DATABASE_OUTOFDATE
message = message.format(current=existing_version, new=DATABASE_VERSION)
2016-12-16 23:45:46 +00:00
raise SystemExit(message)
2016-11-06 04:24:43 +00:00
2016-09-18 08:33:46 +00:00
statements = DB_INIT.split(';')
for statement in statements:
self.cur.execute(statement)
# CONFIG
self.config_abspath = os.path.join(self.data_directory, 'config.json')
self.config = copy.deepcopy(constants.DEFAULT_CONFIGURATION)
if os.path.isfile(self.config_abspath):
with open(self.config_abspath, 'r') as handle:
user_config = json.load(handle)
self.config.update(user_config)
else:
with open(self.config_abspath, 'w') as handle:
handle.write(json.dumps(self.config, indent=4, sort_keys=True))
#print(self.config)
# THUMBNAIL DIRECTORY
self.thumbnail_directory = os.path.join(self.data_directory, 'site_thumbnails')
self.thumbnail_directory = os.path.abspath(self.thumbnail_directory)
os.makedirs(self.thumbnail_directory, exist_ok=True)
2016-11-06 04:24:43 +00:00
# OTHER
self.log = logging.getLogger(__name__)
2016-10-18 05:13:12 +00:00
self.on_commit_queue = []
2016-10-10 03:50:13 +00:00
self._cached_frozen_children = None
2016-09-18 08:33:46 +00:00
def __repr__(self):
return 'PhotoDB(data_directory={datadir})'.format(datadir=repr(self.data_directory))
2016-09-18 08:33:46 +00:00
2016-10-10 03:50:13 +00:00
def _uncache(self):
self._cached_frozen_children = None
2016-10-18 05:13:12 +00:00
def commit(self):
while self.on_commit_queue:
task = self.on_commit_queue.pop()
print(task)
args = task.get('args', [])
kwargs = task.get('kwargs', {})
task['action'](*args, **kwargs)
self.sql.commit()
2016-12-14 23:10:24 +00:00
def digest_directory(
self,
directory,
*,
exclude_directories=None,
exclude_filenames=None,
commit=True
2016-12-14 23:10:24 +00:00
):
2016-09-18 08:33:46 +00:00
'''
2016-10-30 01:46:23 +00:00
Create an album, and add the directory's contents to it recursively.
If a Photo object already exists for a file, it will be added to the correct album.
2016-09-18 08:33:46 +00:00
'''
if not os.path.isdir(directory):
raise ValueError('Not a directory: %s' % directory)
if exclude_directories is None:
exclude_directories = self.config['digest_exclude_dirs']
2016-09-18 08:33:46 +00:00
if exclude_filenames is None:
exclude_filenames = self.config['digest_exclude_files']
2016-09-18 08:33:46 +00:00
directory = spinal.str_to_fp(directory)
2016-10-30 01:46:23 +00:00
directory.correct_case()
2016-09-18 08:33:46 +00:00
generator = spinal.walk_generator(
directory,
exclude_directories=exclude_directories,
exclude_filenames=exclude_filenames,
yield_style='nested',
)
2016-10-30 01:46:23 +00:00
try:
album = self.get_album_by_path(directory.absolute_path)
except exceptions.NoSuchAlbum:
2016-10-30 01:46:23 +00:00
album = self.new_album(
associated_directory=directory.absolute_path,
commit=False,
title=directory.basename,
)
2016-09-18 08:33:46 +00:00
albums = {directory.absolute_path: album}
for (current_location, directories, files) in generator:
current_album = albums.get(current_location.absolute_path, None)
if current_album is None:
2016-10-30 01:46:23 +00:00
try:
current_album = self.get_album_by_path(current_location.absolute_path)
except exceptions.NoSuchAlbum:
2016-10-30 01:46:23 +00:00
current_album = self.new_album(
associated_directory=current_location.absolute_path,
commit=False,
title=current_location.basename,
)
print('Created %s' % current_album.title)
2016-09-18 08:33:46 +00:00
albums[current_location.absolute_path] = current_album
parent = albums[current_location.parent.absolute_path]
2016-10-30 01:46:23 +00:00
try:
parent.add(current_album, commit=False)
except exceptions.GroupExists:
2016-10-30 01:46:23 +00:00
pass
2016-09-18 08:33:46 +00:00
#print('Added to %s' % parent.title)
for filepath in files:
try:
photo = self.new_photo(filepath.absolute_path, commit=False)
except exceptions.PhotoExists as e:
2016-09-18 08:33:46 +00:00
photo = e.photo
current_album.add_photo(photo, commit=False)
if commit:
self.log.debug('Commiting - digest')
2016-10-18 05:13:12 +00:00
self.commit()
2016-09-18 08:33:46 +00:00
return album
2016-10-10 03:50:13 +00:00
def digest_new_files(
self,
directory,
exclude_directories=None,
exclude_filenames=None,
2016-10-18 05:13:12 +00:00
recurse=False,
2016-10-10 03:50:13 +00:00
commit=True
):
2016-09-18 08:33:46 +00:00
'''
Walk the directory and add new files as Photos.
Does NOT create or modify any albums like `digest_directory` does.
'''
if not os.path.isdir(directory):
raise ValueError('Not a directory: %s' % directory)
if exclude_directories is None:
exclude_directories = self.config['digest_exclude_dirs']
2016-09-18 08:33:46 +00:00
if exclude_filenames is None:
exclude_filenames = self.config['digest_exclude_files']
2016-09-18 08:33:46 +00:00
directory = spinal.str_to_fp(directory)
generator = spinal.walk_generator(
directory,
exclude_directories=exclude_directories,
exclude_filenames=exclude_filenames,
2016-10-10 03:50:13 +00:00
recurse=recurse,
2016-09-18 08:33:46 +00:00
yield_style='flat',
)
for filepath in generator:
filepath = filepath.absolute_path
try:
self.get_photo_by_path(filepath)
except exceptions.NoSuchPhoto:
# This is what we want.
2016-09-18 08:33:46 +00:00
pass
else:
continue
2016-10-10 03:50:13 +00:00
photo = self.new_photo(filepath, commit=False)
if commit:
self.log.debug('Committing - digest_new_files')
2016-10-18 05:13:12 +00:00
self.commit()
2016-09-18 08:33:46 +00:00
def easybake(self, ebstring):
2016-09-18 08:33:46 +00:00
'''
Easily create tags, groups, and synonyms with a string like
"group1.group2.tag+synonym"
"family.parents.dad+father"
etc
'''
output_notes = []
def create_or_get(name):
print('cog', name)
try:
item = self.get_tag(name)
note = ('existing_tag', item.qualified_name())
except exceptions.NoSuchTag:
2016-09-18 08:33:46 +00:00
item = self.new_tag(name)
note = ('new_tag', item.qualified_name())
output_notes.append(note)
return item
ebstring = ebstring.strip()
ebstring = ebstring.strip('.+=')
if ebstring == '':
2016-09-18 08:33:46 +00:00
return
if '=' in ebstring and '+' in ebstring:
2016-09-18 08:33:46 +00:00
raise ValueError('Cannot rename and assign snynonym at once')
rename_parts = ebstring.split('=')
2016-09-18 08:33:46 +00:00
if len(rename_parts) == 2:
(ebstring, rename_to) = rename_parts
2016-09-18 08:33:46 +00:00
elif len(rename_parts) == 1:
ebstring = rename_parts[0]
2016-09-18 08:33:46 +00:00
rename_to = None
else:
raise ValueError('Too many equals signs')
create_parts = ebstring.split('+')
2016-09-18 08:33:46 +00:00
if len(create_parts) == 2:
(tag, synonym) = create_parts
elif len(create_parts) == 1:
tag = create_parts[0]
synonym = None
else:
raise ValueError('Too many plus signs')
if not tag:
return None
if rename_to:
tag = self.get_tag(tag)
note = ('rename', '%s=%s' % (tag.name, rename_to))
tag.rename(rename_to)
output_notes.append(note)
else:
tag_parts = tag.split('.')
tags = [create_or_get(t) for t in tag_parts]
for (higher, lower) in zip(tags, tags[1:]):
try:
2016-10-18 05:13:12 +00:00
lower.join_group(higher)
2016-09-18 08:33:46 +00:00
note = ('join_group', '%s.%s' % (higher.name, lower.name))
output_notes.append(note)
except exceptions.GroupExists:
2016-09-18 08:33:46 +00:00
pass
tag = tags[-1]
if synonym:
try:
tag.add_synonym(synonym)
note = ('new_synonym', '%s+%s' % (tag.name, synonym))
output_notes.append(note)
print('New syn %s' % synonym)
except exceptions.TagExists:
2016-09-18 08:33:46 +00:00
pass
return output_notes
def generate_id(self, table):
'''
Create a new ID number that is unique to the given table.
2016-12-16 23:45:46 +00:00
Note that while this method may INSERT / UPDATE, it does not commit.
We'll wait for that to happen in whoever is calling us, so we know the
ID is actually used.
2016-09-18 08:33:46 +00:00
'''
table = table.lower()
if table not in ['photos', 'tags', 'groups']:
raise ValueError('Invalid table requested: %s.', table)
self.cur.execute('SELECT * FROM id_numbers WHERE tab == ?', [table])
fetch = self.cur.fetchone()
if fetch is None:
# Register new value
new_id_int = 1
do_insert = True
else:
# Use database value
new_id_int = int(fetch[constants.SQL_LASTID['last_id']]) + 1
2016-12-16 23:45:46 +00:00
do_insert = False
2016-10-10 03:50:13 +00:00
new_id = str(new_id_int).rjust(self.config['id_length'], '0')
2016-09-18 08:33:46 +00:00
if do_insert:
self.cur.execute('INSERT INTO id_numbers VALUES(?, ?)', [table, new_id])
else:
self.cur.execute('UPDATE id_numbers SET last_id = ? WHERE tab == ?', [new_id, table])
return new_id
def get_thing_by_id(self, thing_type, thing_id):
2016-12-17 01:59:43 +00:00
thing_map = _THING_CLASSES[thing_type]
2016-09-18 08:33:46 +00:00
if isinstance(thing_id, thing_map['class']):
thing_id = thing_id.id
query = 'SELECT * FROM %s WHERE id == ?' % thing_map['table']
self.cur.execute(query, [thing_id])
thing = self.cur.fetchone()
if thing is None:
return raise_no_such_thing(thing_map['exception'], thing_id=thing_id)
thing = thing_map['class'](self, thing)
return thing
def get_things(self, thing_type, orderby=None):
2016-12-17 01:59:43 +00:00
thing_map = _THING_CLASSES[thing_type]
2016-09-18 08:33:46 +00:00
if orderby:
self.cur.execute('SELECT * FROM %s ORDER BY %s' % (thing_map['table'], orderby))
else:
self.cur.execute('SELECT * FROM %s' % thing_map['table'])
things = self.cur.fetchall()
for thing in things:
thing = thing_map['class'](self, row_tuple=thing)
yield thing
2016-12-17 01:59:43 +00:00
_THING_CLASSES = {
'album':
{
'class': objects.Album,
2016-12-17 01:59:43 +00:00
'exception': exceptions.NoSuchAlbum,
'table': 'albums',
},
'photo':
{
'class': objects.Photo,
2016-12-17 01:59:43 +00:00
'exception': exceptions.NoSuchPhoto,
'table': 'photos',
},
'tag':
{
'class': objects.Tag,
2016-12-17 01:59:43 +00:00
'exception': exceptions.NoSuchTag,
'table': 'tags',
},
'user':
{
'class': objects.User,
2016-12-17 01:59:43 +00:00
'exception': exceptions.NoSuchUser,
'table': 'users',
}
}
2016-09-18 08:33:46 +00:00
if __name__ == '__main__':
p = PhotoDB()
2016-10-10 03:50:13 +00:00
print(p)