Improve MMF search speed by pre-filtering photo IDs
Not perfected yet but it's an improvement. Waste less time checking all photos, presearch for matching photos via photo_tag_rel table.
This commit is contained in:
parent
fbf73ac515
commit
5b7c05c39d
3 changed files with 153 additions and 71 deletions
|
@ -321,6 +321,9 @@ def select_generator(sql, query, bindings=None):
|
|||
break
|
||||
yield fetch
|
||||
|
||||
def sql_listify(items):
|
||||
return '(%s)' % ', '.join('"%s"' % item for item in items)
|
||||
|
||||
def truthystring(s):
|
||||
'''
|
||||
Convert strings to True, False, or None based on the options presented
|
||||
|
|
|
@ -707,38 +707,11 @@ class PDBPhotoMixin:
|
|||
notnulls.append('bytes')
|
||||
if duration:
|
||||
notnulls.append('duration')
|
||||
query = searchhelpers.build_query(orderby, notnulls)
|
||||
print(query)
|
||||
generator = helpers.select_generator(self.sql, query)
|
||||
|
||||
if orderby is None:
|
||||
giveback_orderby = None
|
||||
else:
|
||||
giveback_orderby = [term.replace('RANDOM()', 'random') for term in orderby]
|
||||
if give_back_parameters:
|
||||
parameters = {
|
||||
'area': area,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'ratio': ratio,
|
||||
'bytes': bytes,
|
||||
'duration': duration,
|
||||
'authors': authors,
|
||||
'created': created,
|
||||
'extension': extension,
|
||||
'extension_not': extension_not,
|
||||
'filename': filename,
|
||||
'has_tags': has_tags,
|
||||
'mimetype': mimetype,
|
||||
'tag_musts': tag_musts,
|
||||
'tag_mays': tag_mays,
|
||||
'tag_forbids': tag_forbids,
|
||||
'tag_expression': tag_expression,
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
'orderby': giveback_orderby,
|
||||
}
|
||||
yield parameters
|
||||
|
||||
# FROZEN CHILDREN
|
||||
# To lighten the amount of database reading here, `frozen_children` is a dict where
|
||||
|
@ -773,6 +746,44 @@ class PDBPhotoMixin:
|
|||
filename_tree = expressionmatch.ExpressionTree.parse(filename)
|
||||
filename_tree.map(lambda x: x.lower())
|
||||
|
||||
if give_back_parameters:
|
||||
parameters = {
|
||||
'area': area,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'ratio': ratio,
|
||||
'bytes': bytes,
|
||||
'duration': duration,
|
||||
'authors': authors,
|
||||
'created': created,
|
||||
'extension': extension,
|
||||
'extension_not': extension_not,
|
||||
'filename': filename,
|
||||
'has_tags': has_tags,
|
||||
'mimetype': mimetype,
|
||||
'tag_musts': tag_musts,
|
||||
'tag_mays': tag_mays,
|
||||
'tag_forbids': tag_forbids,
|
||||
'tag_expression': tag_expression,
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
'orderby': giveback_orderby,
|
||||
}
|
||||
yield parameters
|
||||
|
||||
if is_must_may_forbid:
|
||||
mmf_results = searchhelpers.mmf_photoids(self, tag_musts, tag_mays, tag_forbids, frozen_children)
|
||||
#print('mmf accept:', mmf_results)
|
||||
else:
|
||||
mmf_results = None
|
||||
|
||||
if mmf_results is not None and mmf_results['photoids'] == set():
|
||||
generator = []
|
||||
else:
|
||||
query = searchhelpers.build_query(orderby, notnulls, minimums, maximums, mmf_results=mmf_results)
|
||||
print(query[:200])
|
||||
generator = helpers.select_generator(self.sql, query)
|
||||
|
||||
photos_received = 0
|
||||
|
||||
# LET'S GET STARTED
|
||||
|
@ -813,21 +824,21 @@ class PDBPhotoMixin:
|
|||
#print('Failed filename')
|
||||
continue
|
||||
|
||||
if any(
|
||||
fetch[constants.SQL_PHOTO[key]] is None or
|
||||
fetch[constants.SQL_PHOTO[key]] > value
|
||||
for (key, value) in maximums.items()
|
||||
):
|
||||
#print('Failed maximums')
|
||||
continue
|
||||
# if any(
|
||||
# fetch[constants.SQL_PHOTO[key]] is None or
|
||||
# fetch[constants.SQL_PHOTO[key]] > value
|
||||
# for (key, value) in maximums.items()
|
||||
# ):
|
||||
# #print('Failed maximums')
|
||||
# continue
|
||||
|
||||
if any(
|
||||
fetch[constants.SQL_PHOTO[key]] is None or
|
||||
fetch[constants.SQL_PHOTO[key]] < value
|
||||
for (key, value) in minimums.items()
|
||||
):
|
||||
#print('Failed minimums')
|
||||
continue
|
||||
# if any(
|
||||
# fetch[constants.SQL_PHOTO[key]] is None or
|
||||
# fetch[constants.SQL_PHOTO[key]] < value
|
||||
# for (key, value) in minimums.items()
|
||||
# ):
|
||||
# #print('Failed minimums')
|
||||
# continue
|
||||
|
||||
if (has_tags is not None) or is_tagsearch:
|
||||
photo_tags = set(photo.tags())
|
||||
|
@ -848,17 +859,22 @@ class PDBPhotoMixin:
|
|||
if not success:
|
||||
#print('Failed tag expression')
|
||||
continue
|
||||
|
||||
elif is_must_may_forbid:
|
||||
success = searchfilter_must_may_forbid(
|
||||
photo_tags=photo_tags,
|
||||
tag_musts=tag_musts,
|
||||
tag_mays=tag_mays,
|
||||
tag_forbids=tag_forbids,
|
||||
frozen_children=frozen_children,
|
||||
)
|
||||
if not success:
|
||||
#print('Failed tag mmf')
|
||||
continue
|
||||
pass
|
||||
# if photo.id not in mmf_results:
|
||||
# #print('Failed tag mmf')
|
||||
# continue
|
||||
# success = searchfilter_must_may_forbid(
|
||||
# photo_tags=photo_tags,
|
||||
# tag_musts=tag_musts,
|
||||
# tag_mays=tag_mays,
|
||||
# tag_forbids=tag_forbids,
|
||||
# frozen_children=frozen_children,
|
||||
# )
|
||||
# if not success:
|
||||
# #print('Failed tag mmf')
|
||||
# continue
|
||||
|
||||
if offset > 0:
|
||||
offset -= 1
|
||||
|
|
|
@ -5,34 +5,45 @@ from . import objects
|
|||
|
||||
from voussoirkit import expressionmatch
|
||||
|
||||
def build_query(orderby, notnulls):
|
||||
query = 'SELECT * FROM photos'
|
||||
|
||||
def build_query(orderby, notnulls, minimums, maximums, mmf_results=None):
|
||||
query = ['SELECT * FROM photos']
|
||||
wheres = []
|
||||
|
||||
if mmf_results:
|
||||
wheres.append('id %s %s' % (mmf_results['operator'], helpers.sql_listify(mmf_results['photoids'])))
|
||||
|
||||
if orderby:
|
||||
orderby = [o.split('-') for o in orderby]
|
||||
orderby_columns = [column for (column, sorter) in orderby if column != 'RANDOM()']
|
||||
else:
|
||||
orderby_columns = []
|
||||
orderby = [('created', 'DESC')]
|
||||
|
||||
if notnulls:
|
||||
notnulls.extend(orderby_columns)
|
||||
elif orderby_columns:
|
||||
notnulls = orderby_columns
|
||||
for (column, direction) in orderby:
|
||||
if column != 'RANDOM()':
|
||||
notnulls.append(column)
|
||||
|
||||
if notnulls:
|
||||
notnulls = [x + ' IS NOT NULL' for x in notnulls]
|
||||
notnulls = ' AND '.join(notnulls)
|
||||
query += ' WHERE ' + notnulls
|
||||
if not orderby:
|
||||
query += ' ORDER BY created DESC'
|
||||
return query
|
||||
for column in notnulls:
|
||||
wheres.append(column + ' IS NOT NULL')
|
||||
|
||||
# Combine each column+sorter
|
||||
orderby = [' '.join(o) for o in orderby]
|
||||
for (column, value) in minimums.items():
|
||||
wheres.append(column + ' >= ' + str(value))
|
||||
|
||||
# Combine everything
|
||||
orderby = ', '.join(orderby)
|
||||
query += ' ORDER BY %s' % orderby
|
||||
for (column, value) in maximums.items():
|
||||
wheres.append(column + ' <= ' + str(value))
|
||||
|
||||
## Assemble
|
||||
|
||||
if wheres:
|
||||
wheres = 'WHERE ' + ' AND '.join(wheres)
|
||||
query.append(wheres)
|
||||
|
||||
if orderby:
|
||||
orderby = [' '.join(o) for o in orderby]
|
||||
orderby = ', '.join(orderby)
|
||||
orderby = 'ORDER BY ' + orderby
|
||||
query.append(orderby)
|
||||
|
||||
query = ' '.join(query)
|
||||
return query
|
||||
|
||||
def get_user(photodb, username_or_id):
|
||||
|
@ -90,6 +101,58 @@ def minmax(key, value, minimums, maximums, warning_bag=None):
|
|||
if high is not None:
|
||||
maximums[key] = high
|
||||
|
||||
def mmf_photoids(photodb, tag_musts, tag_mays, tag_forbids, frozen_children):
|
||||
if not(tag_musts or tag_mays or tag_forbids):
|
||||
return None
|
||||
|
||||
cur = photodb.sql.cursor()
|
||||
|
||||
operator = 'IN'
|
||||
first_time = True
|
||||
no_results = False
|
||||
results = set()
|
||||
|
||||
if tag_mays:
|
||||
for tag in tag_mays:
|
||||
choices = helpers.sql_listify(tag.id for tag in frozen_children[tag])
|
||||
query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices
|
||||
cur.execute(query)
|
||||
results.update(fetch[0] for fetch in cur.fetchall())
|
||||
first_time = False
|
||||
|
||||
if tag_musts:
|
||||
for tag in tag_musts:
|
||||
choices = helpers.sql_listify(tag.id for tag in frozen_children[tag])
|
||||
query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices
|
||||
cur.execute(query)
|
||||
photo_ids = (fetch[0] for fetch in cur.fetchall())
|
||||
if first_time:
|
||||
results.update(photo_ids)
|
||||
first_time = False
|
||||
else:
|
||||
results = results.intersection(photo_ids)
|
||||
if not results:
|
||||
no_results = True
|
||||
break
|
||||
|
||||
if tag_forbids and not no_results:
|
||||
if not results:
|
||||
operator = 'NOT IN'
|
||||
for tag in tag_forbids:
|
||||
choices = helpers.sql_listify(tag.id for tag in frozen_children[tag])
|
||||
query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices
|
||||
cur.execute(query)
|
||||
photo_ids = (fetch[0] for fetch in cur.fetchall())
|
||||
if operator == 'IN':
|
||||
results = results.difference(photo_ids)
|
||||
if not results:
|
||||
no_results = True
|
||||
break
|
||||
else:
|
||||
results.update(photo_ids)
|
||||
|
||||
return {'operator': operator, 'photoids': results}
|
||||
|
||||
def normalize_authors(authors, photodb, warning_bag=None):
|
||||
'''
|
||||
Either:
|
||||
|
|
Loading…
Reference in a new issue