Move thumbnail blobs into the database.

I've moved the thumbnails around many times over this project
and hopefully it doesn't happen too many more. Once the database has
tens of thousands of items, the thumbnails start to become the biggest
headache on the disk. Backing up, restoring, and sharding files per
directory are slower and more effortful with separate files. In the db
means the db is a larger file, but this is disk space that was already
getting used anyway. Now it's simpler and has atomic transactions.
This commit is contained in:
voussoir 2023-01-27 17:34:32 -08:00
parent d5ac0b1717
commit e4f686c86f
11 changed files with 155 additions and 70 deletions

View file

@ -41,7 +41,7 @@ ffmpeg = _load_ffmpeg()
# Database #########################################################################################
DATABASE_VERSION = 23
DATABASE_VERSION = 24
DB_INIT = '''
CREATE TABLE IF NOT EXISTS albums(
@ -79,7 +79,6 @@ CREATE TABLE IF NOT EXISTS photos(
duration INT,
bytes INT,
created INT,
thumbnail TEXT,
tagged_at INT,
author_id INT,
searchhidden BOOLEAN,
@ -106,7 +105,7 @@ CREATE INDEX IF NOT EXISTS index_photos_basename on photos(basename COLLATE NOCA
CREATE INDEX IF NOT EXISTS index_photos_created on photos(created);
CREATE INDEX IF NOT EXISTS index_photos_extension on photos(extension);
CREATE INDEX IF NOT EXISTS index_photos_author_id on photos(author_id);
CREATE INDEX IF NOT EXISTS index_photos_searchhidden on photos(searchhidden);
CREATE INDEX IF NOT EXISTS index_photos_searchhidden_created on photos(searchhidden, created);
----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS tags(
id INT PRIMARY KEY NOT NULL,
@ -177,6 +176,14 @@ CREATE INDEX IF NOT EXISTS index_photo_tag_rel_photoid on photo_tag_rel(photoid)
CREATE INDEX IF NOT EXISTS index_photo_tag_rel_tagid on photo_tag_rel(tagid);
CREATE INDEX IF NOT EXISTS index_photo_tag_rel_photoid_tagid on photo_tag_rel(photoid, tagid);
----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS photo_thumbnails(
photoid INT PRIMARY KEY NOT NULL,
thumbnail BLOB NOT NULL,
created INT NOT NULL,
FOREIGN KEY(photoid) REFERENCES photos(id)
);
CREATE INDEX IF NOT EXISTS index_photo_thumbnails_photoid on photo_thumbnails(photoid);
----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS tag_group_rel(
parentid INT NOT NULL,
memberid INT NOT NULL,
@ -189,11 +196,12 @@ CREATE INDEX IF NOT EXISTS index_tag_group_rel_parentid on tag_group_rel(parenti
CREATE INDEX IF NOT EXISTS index_tag_group_rel_memberid on tag_group_rel(memberid);
----------------------------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS tag_synonyms(
name TEXT NOT NULL,
name TEXT PRIMARY KEY NOT NULL,
mastername TEXT NOT NULL,
created INT
created INT,
);
CREATE INDEX IF NOT EXISTS index_tag_synonyms_name on tag_synonyms(name);
CREATE INDEX IF NOT EXISTS index_tag_synonyms_mastername on tag_synonyms(mastername);
'''
SQL_COLUMNS = sqlhelpers.extract_table_column_map(DB_INIT)

View file

@ -2,8 +2,30 @@ import functools
import time
import warnings
from voussoirkit import sentinel
from . import exceptions
NOT_CACHED = sentinel.Sentinel('not cached')
def cache_until_commit(method):
cache_name = f'_cached_{method.__name__}'
cache_commit_name = f'_cached_{method.__name__}_commit_id'
@functools.wraps(method)
def wrapped(self, *args, **kwargs):
use_cache = (
getattr(self, cache_name, NOT_CACHED) is not NOT_CACHED and
getattr(self, cache_commit_name, NOT_CACHED) == self._photodb.last_commit_id
)
if use_cache:
return getattr(self, cache_name)
value = method(self, *args, **kwargs)
setattr(self, cache_name, value)
setattr(self, cache_commit_name, self._photodb.last_commit_id)
return value
return wrapped
def required_feature(features):
'''
Declare that the photodb or object method requires certain 'enable_*'

View file

@ -8,6 +8,7 @@ import hashlib
import os
import PIL.Image
import re
import tempfile
import typing
import zipstream
@ -253,7 +254,7 @@ def generate_image_thumbnail(*args, trusted_file=False, **kwargs) -> PIL.Image:
finally:
PIL.Image.MAX_IMAGE_PIXELS = _max_pixels
def generate_video_thumbnail(filepath, outfile, width, height, **special) -> PIL.Image:
def generate_video_thumbnail(filepath, width, height, **special) -> PIL.Image:
if not os.path.isfile(filepath):
raise FileNotFoundError(filepath)
probe = constants.ffmpeg.probe(filepath)
@ -277,14 +278,17 @@ def generate_video_thumbnail(filepath, outfile, width, height, **special) -> PIL
else:
timestamp = 2
outfile = tempfile.NamedTemporaryFile(suffix='.jpg', delete=False)
constants.ffmpeg.thumbnail(
filepath,
outfile=outfile,
outfile=outfile.name,
quality=2,
size=size,
time=timestamp,
)
return True
outfile.close()
image = PIL.Image.open(outfile.name)
return image
def get_mimetype(extension) -> typing.Optional[str]:
extension = extension.strip('.')

View file

@ -20,6 +20,7 @@ from voussoirkit import dotdict
from voussoirkit import expressionmatch
from voussoirkit import gentools
from voussoirkit import hms
from voussoirkit import imagetools
from voussoirkit import pathclass
from voussoirkit import sentinel
from voussoirkit import spinal
@ -921,7 +922,8 @@ class Photo(ObjectBase):
self.aspectratio = db_row['aspectratio']
self.bitrate = db_row['bitrate']
self.thumbnail = self.normalize_thumbnail(db_row['thumbnail'])
# self.thumbnail = db_row['thumbnail_image']
self._has_thumbnail = None
self.tagged_at_unix = db_row['tagged_at']
self._tagged_at_dt = None
self.searchhidden = db_row['searchhidden']
@ -934,16 +936,6 @@ class Photo(ObjectBase):
def __str__(self):
return f'Photo:{self.id}:{self.basename}'
def normalize_thumbnail(self, thumbnail) -> pathclass.Path:
if thumbnail is None:
return None
thumbnail = self.photodb.thumbnail_directory.join(thumbnail)
if not thumbnail.is_file:
return None
return thumbnail
@staticmethod
def normalize_override_filename(override_filename) -> typing.Optional[str]:
'''
@ -1082,6 +1074,7 @@ class Photo(ObjectBase):
log.info('Deleting %s.', self)
self.photodb.delete(table='photo_tag_rel', pairs={'photoid': self.id})
self.photodb.delete(table='album_photo_rel', pairs={'photoid': self.id})
self.photodb.delete(table='photo_thumbnails', pairs={'photoid': self.id})
self.photodb.delete(table=Photo, pairs={'id': self.id})
if delete_file and self.real_path.exists:
@ -1096,11 +1089,6 @@ class Photo(ObjectBase):
'action': action,
'args': [self.real_path],
})
if self.thumbnail and self.thumbnail.is_file:
self.photodb.on_commit_queue.append({
'action': action,
'args': [self.thumbnail],
})
self._uncache()
self.deleted = True
@ -1113,15 +1101,14 @@ class Photo(ObjectBase):
@decorators.required_feature('photo.generate_thumbnail')
@worms.atomic
def generate_thumbnail(self, trusted_file=False, **special) -> pathclass.Path:
def generate_thumbnail(self, trusted_file=False, **special):
'''
special:
For images, you can provide `max_width` and/or `max_height` to
override the config file.
For videos, you can provide a `timestamp` to take the thumbnail at.
'''
hopeful_filepath = self.make_thumbnail_filepath()
return_filepath = None
image = None
if self.simple_mimetype == 'image':
log.info('Thumbnailing %s.', self.real_path.absolute_path)
@ -1133,44 +1120,27 @@ class Photo(ObjectBase):
trusted_file=trusted_file,
)
except (OSError, ValueError):
traceback.print_exc()
else:
hopeful_filepath.parent.makedirs(exist_ok=True)
image.save(hopeful_filepath.absolute_path, quality=50)
return_filepath = hopeful_filepath
log.warning(traceback.format_exc())
return
elif self.simple_mimetype == 'video' and constants.ffmpeg:
log.info('Thumbnailing %s.', self.real_path.absolute_path)
try:
hopeful_filepath.parent.makedirs(exist_ok=True)
success = helpers.generate_video_thumbnail(
image = helpers.generate_video_thumbnail(
self.real_path.absolute_path,
outfile=hopeful_filepath.absolute_path,
width=self.photodb.config['thumbnail_width'],
height=self.photodb.config['thumbnail_height'],
**special
)
if success:
return_filepath = hopeful_filepath
except Exception:
log.warning(traceback.format_exc())
return
if return_filepath != self.thumbnail:
if return_filepath is None:
store_as = None
else:
store_as = return_filepath.relative_to(self.photodb.thumbnail_directory)
data = {
'id': self.id,
'thumbnail': store_as,
}
self.photodb.update(table=Photo, pairs=data, where_key='id')
self.thumbnail = return_filepath
if image is None:
return
self._uncache()
self.__reinit__()
return self.thumbnail
self.set_thumbnail(image)
return image
def get_containing_albums(self) -> set[Album]:
'''
@ -1183,6 +1153,7 @@ class Photo(ObjectBase):
albums = set(self.photodb.get_albums_by_id(album_ids))
return albums
@decorators.cache_until_commit
def get_tags(self) -> set:
'''
Return the tags assigned to this Photo.
@ -1194,6 +1165,11 @@ class Photo(ObjectBase):
tags = set(self.photodb.get_tags_by_id(tag_ids))
return tags
def get_thumbnail(self):
query = 'SELECT thumbnail FROM photo_thumbnails WHERE photoid = ?'
blob = self.photodb.select_one_value(query, [self.id])
return blob
# Will add -> Tag/False when forward references are supported.
def has_tag(self, tag, *, check_children=True):
'''
@ -1223,6 +1199,12 @@ class Photo(ObjectBase):
return tag_by_id[tag_id]
def has_thumbnail(self) -> bool:
if self._has_thumbnail is not None:
return self._has_thumbnail
self._has_thumbnail = self.photodb.exists('SELECT 1 FROM photo_thumbnails WHERE photoid = ?', [self.id])
return self._has_thumbnail
def jsonify(self, include_albums=True, include_tags=True, minimal=False) -> dict:
j = {
'type': 'photo',
@ -1237,7 +1219,7 @@ class Photo(ObjectBase):
'duration_string': self.duration_string,
'duration': self.duration,
'bytes_string': self.bytes_string,
'has_thumbnail': bool(self.thumbnail),
'has_thumbnail': self.has_thumbnail(),
'created': self.created_unix,
'filename': self.basename,
'mimetype': self.mimetype,
@ -1372,8 +1354,6 @@ class Photo(ObjectBase):
}
self.photodb.update(table=Photo, pairs=data, where_key='id')
# self._uncache()
@decorators.required_feature('photo.edit')
@worms.atomic
def relocate(self, new_filepath) -> None:
@ -1444,6 +1424,12 @@ class Photo(ObjectBase):
}
self.photodb.update(table=Photo, pairs=data, where_key='id')
@decorators.required_feature('photo.edit')
@worms.atomic
def remove_thumbnail(self) -> None:
self.photodb.delete(table='photo_thumbnails', pairs={'photoid': self.id})
self._has_thumbnail = False
@decorators.required_feature('photo.edit')
@worms.atomic
def rename_file(self, new_filename, *, move=False) -> None:
@ -1552,6 +1538,25 @@ class Photo(ObjectBase):
self.photodb.update(table=Photo, pairs=data, where_key='id')
self.searchhidden = searchhidden
@decorators.required_feature('photo.edit')
@worms.atomic
def set_thumbnail(self, image):
if not isinstance(image, PIL.Image.Image):
raise TypeError(image)
blob = imagetools.save_to_bytes(image, format='jpeg', quality=50)
pairs = {
'photoid': self.id,
'thumbnail': blob,
'created': timetools.now().timestamp(),
}
if self.photodb.exists('SELECT 1 FROM photo_thumbnails WHERE photoid = ?', [self.id]):
self.photodb.update(table='photo_thumbnails', pairs=pairs, where_key='photoid')
else:
self.photodb.insert(table='photo_thumbnails', pairs=pairs)
self._has_thumbnail = True
return blob
@property
def tagged_at(self) -> datetime.datetime:
if self._tagged_at_dt is not None:

View file

@ -368,7 +368,6 @@ class PDBPhotoMixin:
'width': None,
'height': None,
'duration': None,
'thumbnail': None,
}
self.insert(table=objects.Photo, pairs=data)

View file

@ -61,15 +61,30 @@ def get_file(photo_id, basename=None):
else:
return common.send_file(photo.real_path.absolute_path, override_mimetype=photo.mimetype)
@site.route('/thumbnail/<photo_id>')
def get_thumbnail(photo_id):
@site.route('/photo/<photo_id>/thumbnail')
@site.route('/photo/<photo_id>/thumbnail/<basename>')
@flasktools.cached_endpoint(max_age=common.BROWSER_CACHE_DURATION)
def get_thumbnail(photo_id, basename=None):
photo_id = photo_id.split('.')[0]
photo = common.P_photo(photo_id, response_type='html')
if photo.thumbnail:
path = photo.thumbnail
else:
flask.abort(404, 'That file doesnt have a thumbnail')
return common.send_file(path)
blob = photo.get_thumbnail()
if blob is None:
return flask.abort(404)
outgoing_headers = {
'Content-Type': 'image/jpeg',
}
response = flask.Response(
blob,
status=200,
headers=outgoing_headers,
)
return response
# if photo.thumbnail:
# path = photo.thumbnail
# else:
# flask.abort(404, 'That file doesnt have a thumbnail')
# return common.send_file(path)
# Photo create and delete ##########################################################################
@ -186,7 +201,8 @@ def post_photo_refresh_metadata_core(photo_ids):
photo.reload_metadata()
except pathclass.NotFile:
flask.abort(404)
if photo.thumbnail is None:
if not photo.has_thumbnail() or photo.simple_mimetype == 'image':
try:
photo.generate_thumbnail()
except Exception:

View file

@ -205,7 +205,7 @@ function create(photo, view)
let thumbnail_src;
if (photo.has_thumbnail)
{
thumbnail_src = `/thumbnail/${photo.id}.jpg`;
thumbnail_src = `/photo/${photo.id}/thumbnail/${photo.id}.jpg`;
}
else
{

View file

@ -254,7 +254,7 @@ const ALBUM_ID = undefined;
</div> <!-- #album_metadata -->
<div id="album_thumbnail">
{%- if album.thumbnail_photo %}
<img src="/thumbnail/{{album.thumbnail_photo.id}}.jpg"/>
<img src="/photo/{{album.thumbnail_photo.id}}/thumbnail/{{album.thumbnail_photo.id}}.jpg"/>
{% endif -%}
</div>
</div>

View file

@ -22,7 +22,7 @@ draggable=true
<a class="album_card_thumbnail" href="/album/{{album.id}}{{viewparam}}" draggable="false">
{% endif %}
{% if album.thumbnail_photo %}
{% set thumbnail_src = "/thumbnail/" ~ album.thumbnail_photo.id ~ ".jpg" %}
{% set thumbnail_src = "/photo/" ~ album.thumbnail_photo.id ~ "/thumbnail/" ~ album.thumbnail_photo.id ~ ".jpg" %}
{% else %}
{% set thumbnail_src = "/static/basic_thumbnails/album.png" %}
{% endif %}
@ -160,8 +160,8 @@ draggable="true"
</span>
{% if view == "grid" %}
{% if photo.thumbnail %}
{% set thumbnail_src = "/thumbnail/" ~ photo.id ~ ".jpg" %}
{% if photo.has_thumbnail() %}
{% set thumbnail_src = "/photo/" ~ photo.id ~ "/thumbnail/" ~ photo.id ~ ".jpg" %}
{% else %}
{% set thumbnail_src =
thumbnails.get(photo.extension, "") or

View file

@ -259,7 +259,7 @@
src="{{photo|file_link}}"
controls
preload=none
{%if photo.thumbnail%}poster="/thumbnail/{{photo.id}}.jpg"{%endif%}
{%if photo.has_thumbnail()%}poster="/photo/{{photo.id}}/thumbnail/{{photo.id}}.jpg"{%endif%}
></video>
{% elif photo.simple_mimetype == "audio" %}

View file

@ -999,6 +999,37 @@ def upgrade_22_to_23(photodb):
'''
m.go()
def upgrade_23_to_24(photodb):
'''
In this version, thumbnail files were moved into the database as BLOBs.
'''
thumbnail_directory = photodb.data_directory.with_child(etiquette.constants.DEFAULT_THUMBDIR)
photodb.execute('''
CREATE TABLE IF NOT EXISTS photo_thumbnails(
photoid INT PRIMARY KEY NOT NULL,
thumbnail BLOB NOT NULL,
created INT NOT NULL,
FOREIGN KEY(photoid) REFERENCES photos(id)
)
''')
photodb.execute('''
CREATE INDEX IF NOT EXISTS index_photo_thumbnails_photoid on photo_thumbnails(photoid)
''')
thumbnails = photodb.select('SELECT id, thumbnail FROM photos WHERE thumbnail IS NOT NULL')
for (photoid, thumbnail) in thumbnails:
thumbnail = thumbnail_directory.join(thumbnail)
if not thumbnail.is_file:
continue
print(thumbnail)
pairs = {
'photoid': photoid,
'thumbnail': thumbnail.read('rb'),
'created': thumbnail.stat.st_mtime,
}
photodb.insert(table='photo_thumbnails', pairs=pairs)
photodb.execute('ALTER TABLE photos DROP COLUMN thumbnail')
def upgrade_all(data_directory):
'''
Given the directory containing a phototagger database, apply all of the