Move thumbnail blobs into the database.

I've moved the thumbnails around many times over this project
and hopefully it doesn't happen too many more. Once the database has
tens of thousands of items, the thumbnails start to become the biggest
headache on the disk. Backing up, restoring, and sharding files per
directory are slower and more effortful with separate files. In the db
means the db is a larger file, but this is disk space that was already
getting used anyway. Now it's simpler and has atomic transactions.
This commit is contained in:
voussoir 2023-01-27 17:34:32 -08:00
parent d5ac0b1717
commit e4f686c86f
11 changed files with 155 additions and 70 deletions

View file

@ -41,7 +41,7 @@ ffmpeg = _load_ffmpeg()
# Database ######################################################################################### # Database #########################################################################################
DATABASE_VERSION = 23 DATABASE_VERSION = 24
DB_INIT = ''' DB_INIT = '''
CREATE TABLE IF NOT EXISTS albums( CREATE TABLE IF NOT EXISTS albums(
@ -79,7 +79,6 @@ CREATE TABLE IF NOT EXISTS photos(
duration INT, duration INT,
bytes INT, bytes INT,
created INT, created INT,
thumbnail TEXT,
tagged_at INT, tagged_at INT,
author_id INT, author_id INT,
searchhidden BOOLEAN, searchhidden BOOLEAN,
@ -106,7 +105,7 @@ CREATE INDEX IF NOT EXISTS index_photos_basename on photos(basename COLLATE NOCA
CREATE INDEX IF NOT EXISTS index_photos_created on photos(created); CREATE INDEX IF NOT EXISTS index_photos_created on photos(created);
CREATE INDEX IF NOT EXISTS index_photos_extension on photos(extension); CREATE INDEX IF NOT EXISTS index_photos_extension on photos(extension);
CREATE INDEX IF NOT EXISTS index_photos_author_id on photos(author_id); CREATE INDEX IF NOT EXISTS index_photos_author_id on photos(author_id);
CREATE INDEX IF NOT EXISTS index_photos_searchhidden on photos(searchhidden); CREATE INDEX IF NOT EXISTS index_photos_searchhidden_created on photos(searchhidden, created);
---------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS tags( CREATE TABLE IF NOT EXISTS tags(
id INT PRIMARY KEY NOT NULL, id INT PRIMARY KEY NOT NULL,
@ -177,6 +176,14 @@ CREATE INDEX IF NOT EXISTS index_photo_tag_rel_photoid on photo_tag_rel(photoid)
CREATE INDEX IF NOT EXISTS index_photo_tag_rel_tagid on photo_tag_rel(tagid); CREATE INDEX IF NOT EXISTS index_photo_tag_rel_tagid on photo_tag_rel(tagid);
CREATE INDEX IF NOT EXISTS index_photo_tag_rel_photoid_tagid on photo_tag_rel(photoid, tagid); CREATE INDEX IF NOT EXISTS index_photo_tag_rel_photoid_tagid on photo_tag_rel(photoid, tagid);
---------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS photo_thumbnails(
photoid INT PRIMARY KEY NOT NULL,
thumbnail BLOB NOT NULL,
created INT NOT NULL,
FOREIGN KEY(photoid) REFERENCES photos(id)
);
CREATE INDEX IF NOT EXISTS index_photo_thumbnails_photoid on photo_thumbnails(photoid);
----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS tag_group_rel( CREATE TABLE IF NOT EXISTS tag_group_rel(
parentid INT NOT NULL, parentid INT NOT NULL,
memberid INT NOT NULL, memberid INT NOT NULL,
@ -189,11 +196,12 @@ CREATE INDEX IF NOT EXISTS index_tag_group_rel_parentid on tag_group_rel(parenti
CREATE INDEX IF NOT EXISTS index_tag_group_rel_memberid on tag_group_rel(memberid); CREATE INDEX IF NOT EXISTS index_tag_group_rel_memberid on tag_group_rel(memberid);
---------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS tag_synonyms( CREATE TABLE IF NOT EXISTS tag_synonyms(
name TEXT NOT NULL, name TEXT PRIMARY KEY NOT NULL,
mastername TEXT NOT NULL, mastername TEXT NOT NULL,
created INT created INT,
); );
CREATE INDEX IF NOT EXISTS index_tag_synonyms_name on tag_synonyms(name); CREATE INDEX IF NOT EXISTS index_tag_synonyms_name on tag_synonyms(name);
CREATE INDEX IF NOT EXISTS index_tag_synonyms_mastername on tag_synonyms(mastername);
''' '''
SQL_COLUMNS = sqlhelpers.extract_table_column_map(DB_INIT) SQL_COLUMNS = sqlhelpers.extract_table_column_map(DB_INIT)

View file

@ -2,8 +2,30 @@ import functools
import time import time
import warnings import warnings
from voussoirkit import sentinel
from . import exceptions from . import exceptions
NOT_CACHED = sentinel.Sentinel('not cached')
def cache_until_commit(method):
cache_name = f'_cached_{method.__name__}'
cache_commit_name = f'_cached_{method.__name__}_commit_id'
@functools.wraps(method)
def wrapped(self, *args, **kwargs):
use_cache = (
getattr(self, cache_name, NOT_CACHED) is not NOT_CACHED and
getattr(self, cache_commit_name, NOT_CACHED) == self._photodb.last_commit_id
)
if use_cache:
return getattr(self, cache_name)
value = method(self, *args, **kwargs)
setattr(self, cache_name, value)
setattr(self, cache_commit_name, self._photodb.last_commit_id)
return value
return wrapped
def required_feature(features): def required_feature(features):
''' '''
Declare that the photodb or object method requires certain 'enable_*' Declare that the photodb or object method requires certain 'enable_*'

View file

@ -8,6 +8,7 @@ import hashlib
import os import os
import PIL.Image import PIL.Image
import re import re
import tempfile
import typing import typing
import zipstream import zipstream
@ -253,7 +254,7 @@ def generate_image_thumbnail(*args, trusted_file=False, **kwargs) -> PIL.Image:
finally: finally:
PIL.Image.MAX_IMAGE_PIXELS = _max_pixels PIL.Image.MAX_IMAGE_PIXELS = _max_pixels
def generate_video_thumbnail(filepath, outfile, width, height, **special) -> PIL.Image: def generate_video_thumbnail(filepath, width, height, **special) -> PIL.Image:
if not os.path.isfile(filepath): if not os.path.isfile(filepath):
raise FileNotFoundError(filepath) raise FileNotFoundError(filepath)
probe = constants.ffmpeg.probe(filepath) probe = constants.ffmpeg.probe(filepath)
@ -277,14 +278,17 @@ def generate_video_thumbnail(filepath, outfile, width, height, **special) -> PIL
else: else:
timestamp = 2 timestamp = 2
outfile = tempfile.NamedTemporaryFile(suffix='.jpg', delete=False)
constants.ffmpeg.thumbnail( constants.ffmpeg.thumbnail(
filepath, filepath,
outfile=outfile, outfile=outfile.name,
quality=2, quality=2,
size=size, size=size,
time=timestamp, time=timestamp,
) )
return True outfile.close()
image = PIL.Image.open(outfile.name)
return image
def get_mimetype(extension) -> typing.Optional[str]: def get_mimetype(extension) -> typing.Optional[str]:
extension = extension.strip('.') extension = extension.strip('.')

View file

@ -20,6 +20,7 @@ from voussoirkit import dotdict
from voussoirkit import expressionmatch from voussoirkit import expressionmatch
from voussoirkit import gentools from voussoirkit import gentools
from voussoirkit import hms from voussoirkit import hms
from voussoirkit import imagetools
from voussoirkit import pathclass from voussoirkit import pathclass
from voussoirkit import sentinel from voussoirkit import sentinel
from voussoirkit import spinal from voussoirkit import spinal
@ -921,7 +922,8 @@ class Photo(ObjectBase):
self.aspectratio = db_row['aspectratio'] self.aspectratio = db_row['aspectratio']
self.bitrate = db_row['bitrate'] self.bitrate = db_row['bitrate']
self.thumbnail = self.normalize_thumbnail(db_row['thumbnail']) # self.thumbnail = db_row['thumbnail_image']
self._has_thumbnail = None
self.tagged_at_unix = db_row['tagged_at'] self.tagged_at_unix = db_row['tagged_at']
self._tagged_at_dt = None self._tagged_at_dt = None
self.searchhidden = db_row['searchhidden'] self.searchhidden = db_row['searchhidden']
@ -934,16 +936,6 @@ class Photo(ObjectBase):
def __str__(self): def __str__(self):
return f'Photo:{self.id}:{self.basename}' return f'Photo:{self.id}:{self.basename}'
def normalize_thumbnail(self, thumbnail) -> pathclass.Path:
if thumbnail is None:
return None
thumbnail = self.photodb.thumbnail_directory.join(thumbnail)
if not thumbnail.is_file:
return None
return thumbnail
@staticmethod @staticmethod
def normalize_override_filename(override_filename) -> typing.Optional[str]: def normalize_override_filename(override_filename) -> typing.Optional[str]:
''' '''
@ -1082,6 +1074,7 @@ class Photo(ObjectBase):
log.info('Deleting %s.', self) log.info('Deleting %s.', self)
self.photodb.delete(table='photo_tag_rel', pairs={'photoid': self.id}) self.photodb.delete(table='photo_tag_rel', pairs={'photoid': self.id})
self.photodb.delete(table='album_photo_rel', pairs={'photoid': self.id}) self.photodb.delete(table='album_photo_rel', pairs={'photoid': self.id})
self.photodb.delete(table='photo_thumbnails', pairs={'photoid': self.id})
self.photodb.delete(table=Photo, pairs={'id': self.id}) self.photodb.delete(table=Photo, pairs={'id': self.id})
if delete_file and self.real_path.exists: if delete_file and self.real_path.exists:
@ -1096,11 +1089,6 @@ class Photo(ObjectBase):
'action': action, 'action': action,
'args': [self.real_path], 'args': [self.real_path],
}) })
if self.thumbnail and self.thumbnail.is_file:
self.photodb.on_commit_queue.append({
'action': action,
'args': [self.thumbnail],
})
self._uncache() self._uncache()
self.deleted = True self.deleted = True
@ -1113,15 +1101,14 @@ class Photo(ObjectBase):
@decorators.required_feature('photo.generate_thumbnail') @decorators.required_feature('photo.generate_thumbnail')
@worms.atomic @worms.atomic
def generate_thumbnail(self, trusted_file=False, **special) -> pathclass.Path: def generate_thumbnail(self, trusted_file=False, **special):
''' '''
special: special:
For images, you can provide `max_width` and/or `max_height` to For images, you can provide `max_width` and/or `max_height` to
override the config file. override the config file.
For videos, you can provide a `timestamp` to take the thumbnail at. For videos, you can provide a `timestamp` to take the thumbnail at.
''' '''
hopeful_filepath = self.make_thumbnail_filepath() image = None
return_filepath = None
if self.simple_mimetype == 'image': if self.simple_mimetype == 'image':
log.info('Thumbnailing %s.', self.real_path.absolute_path) log.info('Thumbnailing %s.', self.real_path.absolute_path)
@ -1133,44 +1120,27 @@ class Photo(ObjectBase):
trusted_file=trusted_file, trusted_file=trusted_file,
) )
except (OSError, ValueError): except (OSError, ValueError):
traceback.print_exc() log.warning(traceback.format_exc())
else: return
hopeful_filepath.parent.makedirs(exist_ok=True)
image.save(hopeful_filepath.absolute_path, quality=50)
return_filepath = hopeful_filepath
elif self.simple_mimetype == 'video' and constants.ffmpeg: elif self.simple_mimetype == 'video' and constants.ffmpeg:
log.info('Thumbnailing %s.', self.real_path.absolute_path) log.info('Thumbnailing %s.', self.real_path.absolute_path)
try: try:
hopeful_filepath.parent.makedirs(exist_ok=True) image = helpers.generate_video_thumbnail(
success = helpers.generate_video_thumbnail(
self.real_path.absolute_path, self.real_path.absolute_path,
outfile=hopeful_filepath.absolute_path,
width=self.photodb.config['thumbnail_width'], width=self.photodb.config['thumbnail_width'],
height=self.photodb.config['thumbnail_height'], height=self.photodb.config['thumbnail_height'],
**special **special
) )
if success:
return_filepath = hopeful_filepath
except Exception: except Exception:
log.warning(traceback.format_exc()) log.warning(traceback.format_exc())
return
if return_filepath != self.thumbnail: if image is None:
if return_filepath is None: return
store_as = None
else:
store_as = return_filepath.relative_to(self.photodb.thumbnail_directory)
data = {
'id': self.id,
'thumbnail': store_as,
}
self.photodb.update(table=Photo, pairs=data, where_key='id')
self.thumbnail = return_filepath
self._uncache() self.set_thumbnail(image)
return image
self.__reinit__()
return self.thumbnail
def get_containing_albums(self) -> set[Album]: def get_containing_albums(self) -> set[Album]:
''' '''
@ -1183,6 +1153,7 @@ class Photo(ObjectBase):
albums = set(self.photodb.get_albums_by_id(album_ids)) albums = set(self.photodb.get_albums_by_id(album_ids))
return albums return albums
@decorators.cache_until_commit
def get_tags(self) -> set: def get_tags(self) -> set:
''' '''
Return the tags assigned to this Photo. Return the tags assigned to this Photo.
@ -1194,6 +1165,11 @@ class Photo(ObjectBase):
tags = set(self.photodb.get_tags_by_id(tag_ids)) tags = set(self.photodb.get_tags_by_id(tag_ids))
return tags return tags
def get_thumbnail(self):
query = 'SELECT thumbnail FROM photo_thumbnails WHERE photoid = ?'
blob = self.photodb.select_one_value(query, [self.id])
return blob
# Will add -> Tag/False when forward references are supported. # Will add -> Tag/False when forward references are supported.
def has_tag(self, tag, *, check_children=True): def has_tag(self, tag, *, check_children=True):
''' '''
@ -1223,6 +1199,12 @@ class Photo(ObjectBase):
return tag_by_id[tag_id] return tag_by_id[tag_id]
def has_thumbnail(self) -> bool:
if self._has_thumbnail is not None:
return self._has_thumbnail
self._has_thumbnail = self.photodb.exists('SELECT 1 FROM photo_thumbnails WHERE photoid = ?', [self.id])
return self._has_thumbnail
def jsonify(self, include_albums=True, include_tags=True, minimal=False) -> dict: def jsonify(self, include_albums=True, include_tags=True, minimal=False) -> dict:
j = { j = {
'type': 'photo', 'type': 'photo',
@ -1237,7 +1219,7 @@ class Photo(ObjectBase):
'duration_string': self.duration_string, 'duration_string': self.duration_string,
'duration': self.duration, 'duration': self.duration,
'bytes_string': self.bytes_string, 'bytes_string': self.bytes_string,
'has_thumbnail': bool(self.thumbnail), 'has_thumbnail': self.has_thumbnail(),
'created': self.created_unix, 'created': self.created_unix,
'filename': self.basename, 'filename': self.basename,
'mimetype': self.mimetype, 'mimetype': self.mimetype,
@ -1372,8 +1354,6 @@ class Photo(ObjectBase):
} }
self.photodb.update(table=Photo, pairs=data, where_key='id') self.photodb.update(table=Photo, pairs=data, where_key='id')
# self._uncache()
@decorators.required_feature('photo.edit') @decorators.required_feature('photo.edit')
@worms.atomic @worms.atomic
def relocate(self, new_filepath) -> None: def relocate(self, new_filepath) -> None:
@ -1444,6 +1424,12 @@ class Photo(ObjectBase):
} }
self.photodb.update(table=Photo, pairs=data, where_key='id') self.photodb.update(table=Photo, pairs=data, where_key='id')
@decorators.required_feature('photo.edit')
@worms.atomic
def remove_thumbnail(self) -> None:
self.photodb.delete(table='photo_thumbnails', pairs={'photoid': self.id})
self._has_thumbnail = False
@decorators.required_feature('photo.edit') @decorators.required_feature('photo.edit')
@worms.atomic @worms.atomic
def rename_file(self, new_filename, *, move=False) -> None: def rename_file(self, new_filename, *, move=False) -> None:
@ -1552,6 +1538,25 @@ class Photo(ObjectBase):
self.photodb.update(table=Photo, pairs=data, where_key='id') self.photodb.update(table=Photo, pairs=data, where_key='id')
self.searchhidden = searchhidden self.searchhidden = searchhidden
@decorators.required_feature('photo.edit')
@worms.atomic
def set_thumbnail(self, image):
if not isinstance(image, PIL.Image.Image):
raise TypeError(image)
blob = imagetools.save_to_bytes(image, format='jpeg', quality=50)
pairs = {
'photoid': self.id,
'thumbnail': blob,
'created': timetools.now().timestamp(),
}
if self.photodb.exists('SELECT 1 FROM photo_thumbnails WHERE photoid = ?', [self.id]):
self.photodb.update(table='photo_thumbnails', pairs=pairs, where_key='photoid')
else:
self.photodb.insert(table='photo_thumbnails', pairs=pairs)
self._has_thumbnail = True
return blob
@property @property
def tagged_at(self) -> datetime.datetime: def tagged_at(self) -> datetime.datetime:
if self._tagged_at_dt is not None: if self._tagged_at_dt is not None:

View file

@ -368,7 +368,6 @@ class PDBPhotoMixin:
'width': None, 'width': None,
'height': None, 'height': None,
'duration': None, 'duration': None,
'thumbnail': None,
} }
self.insert(table=objects.Photo, pairs=data) self.insert(table=objects.Photo, pairs=data)

View file

@ -61,15 +61,30 @@ def get_file(photo_id, basename=None):
else: else:
return common.send_file(photo.real_path.absolute_path, override_mimetype=photo.mimetype) return common.send_file(photo.real_path.absolute_path, override_mimetype=photo.mimetype)
@site.route('/thumbnail/<photo_id>') @site.route('/photo/<photo_id>/thumbnail')
def get_thumbnail(photo_id): @site.route('/photo/<photo_id>/thumbnail/<basename>')
@flasktools.cached_endpoint(max_age=common.BROWSER_CACHE_DURATION)
def get_thumbnail(photo_id, basename=None):
photo_id = photo_id.split('.')[0] photo_id = photo_id.split('.')[0]
photo = common.P_photo(photo_id, response_type='html') photo = common.P_photo(photo_id, response_type='html')
if photo.thumbnail: blob = photo.get_thumbnail()
path = photo.thumbnail if blob is None:
else: return flask.abort(404)
flask.abort(404, 'That file doesnt have a thumbnail')
return common.send_file(path) outgoing_headers = {
'Content-Type': 'image/jpeg',
}
response = flask.Response(
blob,
status=200,
headers=outgoing_headers,
)
return response
# if photo.thumbnail:
# path = photo.thumbnail
# else:
# flask.abort(404, 'That file doesnt have a thumbnail')
# return common.send_file(path)
# Photo create and delete ########################################################################## # Photo create and delete ##########################################################################
@ -186,7 +201,8 @@ def post_photo_refresh_metadata_core(photo_ids):
photo.reload_metadata() photo.reload_metadata()
except pathclass.NotFile: except pathclass.NotFile:
flask.abort(404) flask.abort(404)
if photo.thumbnail is None:
if not photo.has_thumbnail() or photo.simple_mimetype == 'image':
try: try:
photo.generate_thumbnail() photo.generate_thumbnail()
except Exception: except Exception:

View file

@ -205,7 +205,7 @@ function create(photo, view)
let thumbnail_src; let thumbnail_src;
if (photo.has_thumbnail) if (photo.has_thumbnail)
{ {
thumbnail_src = `/thumbnail/${photo.id}.jpg`; thumbnail_src = `/photo/${photo.id}/thumbnail/${photo.id}.jpg`;
} }
else else
{ {

View file

@ -254,7 +254,7 @@ const ALBUM_ID = undefined;
</div> <!-- #album_metadata --> </div> <!-- #album_metadata -->
<div id="album_thumbnail"> <div id="album_thumbnail">
{%- if album.thumbnail_photo %} {%- if album.thumbnail_photo %}
<img src="/thumbnail/{{album.thumbnail_photo.id}}.jpg"/> <img src="/photo/{{album.thumbnail_photo.id}}/thumbnail/{{album.thumbnail_photo.id}}.jpg"/>
{% endif -%} {% endif -%}
</div> </div>
</div> </div>

View file

@ -22,7 +22,7 @@ draggable=true
<a class="album_card_thumbnail" href="/album/{{album.id}}{{viewparam}}" draggable="false"> <a class="album_card_thumbnail" href="/album/{{album.id}}{{viewparam}}" draggable="false">
{% endif %} {% endif %}
{% if album.thumbnail_photo %} {% if album.thumbnail_photo %}
{% set thumbnail_src = "/thumbnail/" ~ album.thumbnail_photo.id ~ ".jpg" %} {% set thumbnail_src = "/photo/" ~ album.thumbnail_photo.id ~ "/thumbnail/" ~ album.thumbnail_photo.id ~ ".jpg" %}
{% else %} {% else %}
{% set thumbnail_src = "/static/basic_thumbnails/album.png" %} {% set thumbnail_src = "/static/basic_thumbnails/album.png" %}
{% endif %} {% endif %}
@ -160,8 +160,8 @@ draggable="true"
</span> </span>
{% if view == "grid" %} {% if view == "grid" %}
{% if photo.thumbnail %} {% if photo.has_thumbnail() %}
{% set thumbnail_src = "/thumbnail/" ~ photo.id ~ ".jpg" %} {% set thumbnail_src = "/photo/" ~ photo.id ~ "/thumbnail/" ~ photo.id ~ ".jpg" %}
{% else %} {% else %}
{% set thumbnail_src = {% set thumbnail_src =
thumbnails.get(photo.extension, "") or thumbnails.get(photo.extension, "") or

View file

@ -259,7 +259,7 @@
src="{{photo|file_link}}" src="{{photo|file_link}}"
controls controls
preload=none preload=none
{%if photo.thumbnail%}poster="/thumbnail/{{photo.id}}.jpg"{%endif%} {%if photo.has_thumbnail()%}poster="/photo/{{photo.id}}/thumbnail/{{photo.id}}.jpg"{%endif%}
></video> ></video>
{% elif photo.simple_mimetype == "audio" %} {% elif photo.simple_mimetype == "audio" %}

View file

@ -999,6 +999,37 @@ def upgrade_22_to_23(photodb):
''' '''
m.go() m.go()
def upgrade_23_to_24(photodb):
'''
In this version, thumbnail files were moved into the database as BLOBs.
'''
thumbnail_directory = photodb.data_directory.with_child(etiquette.constants.DEFAULT_THUMBDIR)
photodb.execute('''
CREATE TABLE IF NOT EXISTS photo_thumbnails(
photoid INT PRIMARY KEY NOT NULL,
thumbnail BLOB NOT NULL,
created INT NOT NULL,
FOREIGN KEY(photoid) REFERENCES photos(id)
)
''')
photodb.execute('''
CREATE INDEX IF NOT EXISTS index_photo_thumbnails_photoid on photo_thumbnails(photoid)
''')
thumbnails = photodb.select('SELECT id, thumbnail FROM photos WHERE thumbnail IS NOT NULL')
for (photoid, thumbnail) in thumbnails:
thumbnail = thumbnail_directory.join(thumbnail)
if not thumbnail.is_file:
continue
print(thumbnail)
pairs = {
'photoid': photoid,
'thumbnail': thumbnail.read('rb'),
'created': thumbnail.stat.st_mtime,
}
photodb.insert(table='photo_thumbnails', pairs=pairs)
photodb.execute('ALTER TABLE photos DROP COLUMN thumbnail')
def upgrade_all(data_directory): def upgrade_all(data_directory):
''' '''
Given the directory containing a phototagger database, apply all of the Given the directory containing a phototagger database, apply all of the