From 35c29e6778737ed1bc36b297b66013209cb504b2 Mon Sep 17 00:00:00 2001 From: Ethan Dalool Date: Thu, 27 Oct 2022 20:53:20 -0700 Subject: [PATCH] Completely hardcode mimetypes, improve mimetype search query. --- etiquette/constants.py | 82 ++++++++++++++++++++++++++++++++++-------- etiquette/helpers.py | 14 ++------ etiquette/objects.py | 8 +++-- etiquette/photodb.py | 6 ++-- 4 files changed, 77 insertions(+), 33 deletions(-) diff --git a/etiquette/constants.py b/etiquette/constants.py index a70b32a..a42ee70 100644 --- a/etiquette/constants.py +++ b/etiquette/constants.py @@ -215,24 +215,76 @@ FILENAME_BADCHARS = '\\/:*?<>|"' USER_ID_CHARACTERS = string.digits + string.ascii_uppercase -ADDITIONAL_MIMETYPES = { - '7z': 'archive', - 'gz': 'archive', - 'rar': 'archive', +MIMETYPES = { + '7z': ('archive', '7z'), + 'gz': ('archive', 'gz'), + 'rar': ('archive', 'rar'), + 'tar': ('archive', 'tar'), + 'zip': ('archive', 'zip'), - 'aac': 'audio/aac', - 'ac3': 'audio/ac3', - 'dts': 'audio/dts', - 'm4a': 'audio/mp4', - 'opus': 'audio/ogg', + 'aac': ('audio', 'aac'), + 'ac3': ('audio', 'ac3'), + 'aif': ('audio', 'x-aiff'), + 'aifc': ('audio', 'x-aiff'), + 'aiff': ('audio', 'x-aiff'), + 'au': ('audio', 'basic'), + 'dts': ('audio', 'dts'), + 'm4a': ('audio', 'mp4'), + 'mp2': ('audio', 'mpeg'), + 'mp3': ('audio', 'mpeg'), + 'opus': ('audio', 'ogg'), + 'snd': ('audio', 'basic'), + 'wav': ('audio', 'x-wav'), - 'mkv': 'video/x-matroska', + 'bmp': ('image', 'x-ms-bmp'), + 'gif': ('image', 'gif'), + 'ico': ('image', 'vnd.microsoft.icon'), + 'ief': ('image', 'ief'), + 'jpe': ('image', 'jpeg'), + 'jpeg': ('image', 'jpeg'), + 'jpg': ('image', 'jpeg'), + 'png': ('image', 'png'), + 'svg': ('image', 'svg+xml'), + 'tif': ('image', 'tiff'), + 'tiff': ('image', 'tiff'), - 'ass': 'text/plain', - 'md': 'text/plain', - 'nfo': 'text/plain', - 'rst': 'text/plain', - 'srt': 'text/plain', + 'ass': ('text', 'plain'), + 'bat': ('text', 'plain'), + 'c': ('text', 'plain'), + 'css': ('text', 'css'), + 'csv': ('text', 'csv'), + 'etx': ('text', 'x-setext'), + 'h': ('text', 'plain'), + 'htm': ('text', 'html'), + 'html': ('text', 'html'), + 'js': ('text', 'javascript'), + 'json': ('text', 'json'), + 'ksh': ('text', 'plain'), + 'md': ('text', 'plain'), + 'nfo': ('text', 'plain'), + 'pl': ('text', 'plain'), + 'py': ('text', 'x-python'), + 'rst': ('text', 'plain'), + 'rtx': ('text', 'richtext'), + 'sgm': ('text', 'x-sgml'), + 'sgml': ('text', 'x-sgml'), + 'srt': ('text', 'plain'), + 'tsv': ('text', 'tab-separated-values'), + 'txt': ('text', 'plain'), + 'vcf': ('text', 'x-vcard'), + 'xml': ('text', 'xml'), + + 'avi': ('video', 'x-msvideo'), + 'm1v': ('video', 'mpeg'), + 'mkv': ('video', 'x-matroska'), + 'mov': ('video', 'quicktime'), + 'mp4': ('video', 'mp4'), + 'mpa': ('video', 'mpeg'), + 'mpe': ('video', 'mpeg'), + 'mpeg': ('video', 'mpeg'), + 'mpg': ('video', 'mpeg'), + 'qt': ('video', 'quicktime'), + 'webm': ('video', 'webm'), } # Photodb ########################################################################################## diff --git a/etiquette/helpers.py b/etiquette/helpers.py index 269ca0f..72bc996 100644 --- a/etiquette/helpers.py +++ b/etiquette/helpers.py @@ -5,7 +5,6 @@ codebase but don't deserve to be methods of any class. import bs4 import datetime import hashlib -import mimetypes import os import re import PIL.Image @@ -286,16 +285,9 @@ def generate_video_thumbnail(filepath, outfile, width, height, **special) -> PIL ) return True -def get_mimetype(filepath) -> typing.Optional[str]: - ''' - Extension to mimetypes.guess_type which uses my - constants.ADDITIONAL_MIMETYPES. - ''' - extension = os.path.splitext(filepath)[1].replace('.', '') - mimetype = constants.ADDITIONAL_MIMETYPES.get(extension, None) - if mimetype is None: - mimetype = mimetypes.guess_type(filepath)[0] - return mimetype +def get_mimetype(extension) -> typing.Optional[str]: + extension = extension.strip('.') + return constants.MIMETYPES.get(extension, None) def hash_photoset(photos) -> str: ''' diff --git a/etiquette/objects.py b/etiquette/objects.py index 0459f78..e3af592 100644 --- a/etiquette/objects.py +++ b/etiquette/objects.py @@ -926,12 +926,14 @@ class Photo(ObjectBase): # self._mimetype vars to help memoize, which needs to be None-capable. # So although I normally like using @property, this is less lines of # code and less indirection really. - self.mimetype = helpers.get_mimetype(self.real_path.basename) + mime = helpers.get_mimetype(self.real_path.extension.no_dot) - if self.mimetype is None: + if mime is None: self.simple_mimetype = None + self.mimetype = None else: - self.simple_mimetype = self.mimetype.split('/')[0] + self.simple_mimetype = mime[0] + self.mimetype = '/'.join(mime) def _uncache(self): self.photodb.caches[Photo].remove(self.id) diff --git a/etiquette/photodb.py b/etiquette/photodb.py index 7b6193e..5459c08 100644 --- a/etiquette/photodb.py +++ b/etiquette/photodb.py @@ -725,7 +725,8 @@ class PDBPhotoMixin: bindings.extend(extension_not) if mimetype: - notnulls.add('extension') + extensions = {extension for (extension, (typ, subtyp)) in constants.MIMETYPES.items() if typ in mimetype} + wheres.append(f'extension IN {sqlhelpers.listify(extensions)} COLLATE NOCASE') if within_directory: patterns = {d.absolute_path.rstrip(os.sep) for d in within_directory} @@ -808,9 +809,6 @@ class PDBPhotoMixin: for row in generator: photo = self.get_cached_instance(objects.Photo, row) - if mimetype and photo.simple_mimetype not in mimetype: - continue - if filename_tree and not filename_tree.evaluate(photo.basename.lower()): continue