Improve MMF search speed by pre-filtering photo IDs
Not perfected yet but it's an improvement. Waste less time checking all photos, presearch for matching photos via photo_tag_rel table.
This commit is contained in:
parent
fbf73ac515
commit
5b7c05c39d
3 changed files with 153 additions and 71 deletions
|
@ -321,6 +321,9 @@ def select_generator(sql, query, bindings=None):
|
||||||
break
|
break
|
||||||
yield fetch
|
yield fetch
|
||||||
|
|
||||||
|
def sql_listify(items):
|
||||||
|
return '(%s)' % ', '.join('"%s"' % item for item in items)
|
||||||
|
|
||||||
def truthystring(s):
|
def truthystring(s):
|
||||||
'''
|
'''
|
||||||
Convert strings to True, False, or None based on the options presented
|
Convert strings to True, False, or None based on the options presented
|
||||||
|
|
|
@ -707,38 +707,11 @@ class PDBPhotoMixin:
|
||||||
notnulls.append('bytes')
|
notnulls.append('bytes')
|
||||||
if duration:
|
if duration:
|
||||||
notnulls.append('duration')
|
notnulls.append('duration')
|
||||||
query = searchhelpers.build_query(orderby, notnulls)
|
|
||||||
print(query)
|
|
||||||
generator = helpers.select_generator(self.sql, query)
|
|
||||||
|
|
||||||
if orderby is None:
|
if orderby is None:
|
||||||
giveback_orderby = None
|
giveback_orderby = None
|
||||||
else:
|
else:
|
||||||
giveback_orderby = [term.replace('RANDOM()', 'random') for term in orderby]
|
giveback_orderby = [term.replace('RANDOM()', 'random') for term in orderby]
|
||||||
if give_back_parameters:
|
|
||||||
parameters = {
|
|
||||||
'area': area,
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'ratio': ratio,
|
|
||||||
'bytes': bytes,
|
|
||||||
'duration': duration,
|
|
||||||
'authors': authors,
|
|
||||||
'created': created,
|
|
||||||
'extension': extension,
|
|
||||||
'extension_not': extension_not,
|
|
||||||
'filename': filename,
|
|
||||||
'has_tags': has_tags,
|
|
||||||
'mimetype': mimetype,
|
|
||||||
'tag_musts': tag_musts,
|
|
||||||
'tag_mays': tag_mays,
|
|
||||||
'tag_forbids': tag_forbids,
|
|
||||||
'tag_expression': tag_expression,
|
|
||||||
'limit': limit,
|
|
||||||
'offset': offset,
|
|
||||||
'orderby': giveback_orderby,
|
|
||||||
}
|
|
||||||
yield parameters
|
|
||||||
|
|
||||||
# FROZEN CHILDREN
|
# FROZEN CHILDREN
|
||||||
# To lighten the amount of database reading here, `frozen_children` is a dict where
|
# To lighten the amount of database reading here, `frozen_children` is a dict where
|
||||||
|
@ -773,6 +746,44 @@ class PDBPhotoMixin:
|
||||||
filename_tree = expressionmatch.ExpressionTree.parse(filename)
|
filename_tree = expressionmatch.ExpressionTree.parse(filename)
|
||||||
filename_tree.map(lambda x: x.lower())
|
filename_tree.map(lambda x: x.lower())
|
||||||
|
|
||||||
|
if give_back_parameters:
|
||||||
|
parameters = {
|
||||||
|
'area': area,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'ratio': ratio,
|
||||||
|
'bytes': bytes,
|
||||||
|
'duration': duration,
|
||||||
|
'authors': authors,
|
||||||
|
'created': created,
|
||||||
|
'extension': extension,
|
||||||
|
'extension_not': extension_not,
|
||||||
|
'filename': filename,
|
||||||
|
'has_tags': has_tags,
|
||||||
|
'mimetype': mimetype,
|
||||||
|
'tag_musts': tag_musts,
|
||||||
|
'tag_mays': tag_mays,
|
||||||
|
'tag_forbids': tag_forbids,
|
||||||
|
'tag_expression': tag_expression,
|
||||||
|
'limit': limit,
|
||||||
|
'offset': offset,
|
||||||
|
'orderby': giveback_orderby,
|
||||||
|
}
|
||||||
|
yield parameters
|
||||||
|
|
||||||
|
if is_must_may_forbid:
|
||||||
|
mmf_results = searchhelpers.mmf_photoids(self, tag_musts, tag_mays, tag_forbids, frozen_children)
|
||||||
|
#print('mmf accept:', mmf_results)
|
||||||
|
else:
|
||||||
|
mmf_results = None
|
||||||
|
|
||||||
|
if mmf_results is not None and mmf_results['photoids'] == set():
|
||||||
|
generator = []
|
||||||
|
else:
|
||||||
|
query = searchhelpers.build_query(orderby, notnulls, minimums, maximums, mmf_results=mmf_results)
|
||||||
|
print(query[:200])
|
||||||
|
generator = helpers.select_generator(self.sql, query)
|
||||||
|
|
||||||
photos_received = 0
|
photos_received = 0
|
||||||
|
|
||||||
# LET'S GET STARTED
|
# LET'S GET STARTED
|
||||||
|
@ -813,21 +824,21 @@ class PDBPhotoMixin:
|
||||||
#print('Failed filename')
|
#print('Failed filename')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if any(
|
# if any(
|
||||||
fetch[constants.SQL_PHOTO[key]] is None or
|
# fetch[constants.SQL_PHOTO[key]] is None or
|
||||||
fetch[constants.SQL_PHOTO[key]] > value
|
# fetch[constants.SQL_PHOTO[key]] > value
|
||||||
for (key, value) in maximums.items()
|
# for (key, value) in maximums.items()
|
||||||
):
|
# ):
|
||||||
#print('Failed maximums')
|
# #print('Failed maximums')
|
||||||
continue
|
# continue
|
||||||
|
|
||||||
if any(
|
# if any(
|
||||||
fetch[constants.SQL_PHOTO[key]] is None or
|
# fetch[constants.SQL_PHOTO[key]] is None or
|
||||||
fetch[constants.SQL_PHOTO[key]] < value
|
# fetch[constants.SQL_PHOTO[key]] < value
|
||||||
for (key, value) in minimums.items()
|
# for (key, value) in minimums.items()
|
||||||
):
|
# ):
|
||||||
#print('Failed minimums')
|
# #print('Failed minimums')
|
||||||
continue
|
# continue
|
||||||
|
|
||||||
if (has_tags is not None) or is_tagsearch:
|
if (has_tags is not None) or is_tagsearch:
|
||||||
photo_tags = set(photo.tags())
|
photo_tags = set(photo.tags())
|
||||||
|
@ -848,17 +859,22 @@ class PDBPhotoMixin:
|
||||||
if not success:
|
if not success:
|
||||||
#print('Failed tag expression')
|
#print('Failed tag expression')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif is_must_may_forbid:
|
elif is_must_may_forbid:
|
||||||
success = searchfilter_must_may_forbid(
|
pass
|
||||||
photo_tags=photo_tags,
|
# if photo.id not in mmf_results:
|
||||||
tag_musts=tag_musts,
|
# #print('Failed tag mmf')
|
||||||
tag_mays=tag_mays,
|
# continue
|
||||||
tag_forbids=tag_forbids,
|
# success = searchfilter_must_may_forbid(
|
||||||
frozen_children=frozen_children,
|
# photo_tags=photo_tags,
|
||||||
)
|
# tag_musts=tag_musts,
|
||||||
if not success:
|
# tag_mays=tag_mays,
|
||||||
#print('Failed tag mmf')
|
# tag_forbids=tag_forbids,
|
||||||
continue
|
# frozen_children=frozen_children,
|
||||||
|
# )
|
||||||
|
# if not success:
|
||||||
|
# #print('Failed tag mmf')
|
||||||
|
# continue
|
||||||
|
|
||||||
if offset > 0:
|
if offset > 0:
|
||||||
offset -= 1
|
offset -= 1
|
||||||
|
|
|
@ -5,34 +5,45 @@ from . import objects
|
||||||
|
|
||||||
from voussoirkit import expressionmatch
|
from voussoirkit import expressionmatch
|
||||||
|
|
||||||
def build_query(orderby, notnulls):
|
|
||||||
query = 'SELECT * FROM photos'
|
def build_query(orderby, notnulls, minimums, maximums, mmf_results=None):
|
||||||
|
query = ['SELECT * FROM photos']
|
||||||
|
wheres = []
|
||||||
|
|
||||||
|
if mmf_results:
|
||||||
|
wheres.append('id %s %s' % (mmf_results['operator'], helpers.sql_listify(mmf_results['photoids'])))
|
||||||
|
|
||||||
if orderby:
|
if orderby:
|
||||||
orderby = [o.split('-') for o in orderby]
|
orderby = [o.split('-') for o in orderby]
|
||||||
orderby_columns = [column for (column, sorter) in orderby if column != 'RANDOM()']
|
|
||||||
else:
|
else:
|
||||||
orderby_columns = []
|
orderby = [('created', 'DESC')]
|
||||||
|
|
||||||
if notnulls:
|
for (column, direction) in orderby:
|
||||||
notnulls.extend(orderby_columns)
|
if column != 'RANDOM()':
|
||||||
elif orderby_columns:
|
notnulls.append(column)
|
||||||
notnulls = orderby_columns
|
|
||||||
|
|
||||||
if notnulls:
|
for column in notnulls:
|
||||||
notnulls = [x + ' IS NOT NULL' for x in notnulls]
|
wheres.append(column + ' IS NOT NULL')
|
||||||
notnulls = ' AND '.join(notnulls)
|
|
||||||
query += ' WHERE ' + notnulls
|
|
||||||
if not orderby:
|
|
||||||
query += ' ORDER BY created DESC'
|
|
||||||
return query
|
|
||||||
|
|
||||||
# Combine each column+sorter
|
for (column, value) in minimums.items():
|
||||||
|
wheres.append(column + ' >= ' + str(value))
|
||||||
|
|
||||||
|
for (column, value) in maximums.items():
|
||||||
|
wheres.append(column + ' <= ' + str(value))
|
||||||
|
|
||||||
|
## Assemble
|
||||||
|
|
||||||
|
if wheres:
|
||||||
|
wheres = 'WHERE ' + ' AND '.join(wheres)
|
||||||
|
query.append(wheres)
|
||||||
|
|
||||||
|
if orderby:
|
||||||
orderby = [' '.join(o) for o in orderby]
|
orderby = [' '.join(o) for o in orderby]
|
||||||
|
|
||||||
# Combine everything
|
|
||||||
orderby = ', '.join(orderby)
|
orderby = ', '.join(orderby)
|
||||||
query += ' ORDER BY %s' % orderby
|
orderby = 'ORDER BY ' + orderby
|
||||||
|
query.append(orderby)
|
||||||
|
|
||||||
|
query = ' '.join(query)
|
||||||
return query
|
return query
|
||||||
|
|
||||||
def get_user(photodb, username_or_id):
|
def get_user(photodb, username_or_id):
|
||||||
|
@ -90,6 +101,58 @@ def minmax(key, value, minimums, maximums, warning_bag=None):
|
||||||
if high is not None:
|
if high is not None:
|
||||||
maximums[key] = high
|
maximums[key] = high
|
||||||
|
|
||||||
|
def mmf_photoids(photodb, tag_musts, tag_mays, tag_forbids, frozen_children):
|
||||||
|
if not(tag_musts or tag_mays or tag_forbids):
|
||||||
|
return None
|
||||||
|
|
||||||
|
cur = photodb.sql.cursor()
|
||||||
|
|
||||||
|
operator = 'IN'
|
||||||
|
first_time = True
|
||||||
|
no_results = False
|
||||||
|
results = set()
|
||||||
|
|
||||||
|
if tag_mays:
|
||||||
|
for tag in tag_mays:
|
||||||
|
choices = helpers.sql_listify(tag.id for tag in frozen_children[tag])
|
||||||
|
query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices
|
||||||
|
cur.execute(query)
|
||||||
|
results.update(fetch[0] for fetch in cur.fetchall())
|
||||||
|
first_time = False
|
||||||
|
|
||||||
|
if tag_musts:
|
||||||
|
for tag in tag_musts:
|
||||||
|
choices = helpers.sql_listify(tag.id for tag in frozen_children[tag])
|
||||||
|
query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices
|
||||||
|
cur.execute(query)
|
||||||
|
photo_ids = (fetch[0] for fetch in cur.fetchall())
|
||||||
|
if first_time:
|
||||||
|
results.update(photo_ids)
|
||||||
|
first_time = False
|
||||||
|
else:
|
||||||
|
results = results.intersection(photo_ids)
|
||||||
|
if not results:
|
||||||
|
no_results = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if tag_forbids and not no_results:
|
||||||
|
if not results:
|
||||||
|
operator = 'NOT IN'
|
||||||
|
for tag in tag_forbids:
|
||||||
|
choices = helpers.sql_listify(tag.id for tag in frozen_children[tag])
|
||||||
|
query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices
|
||||||
|
cur.execute(query)
|
||||||
|
photo_ids = (fetch[0] for fetch in cur.fetchall())
|
||||||
|
if operator == 'IN':
|
||||||
|
results = results.difference(photo_ids)
|
||||||
|
if not results:
|
||||||
|
no_results = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
results.update(photo_ids)
|
||||||
|
|
||||||
|
return {'operator': operator, 'photoids': results}
|
||||||
|
|
||||||
def normalize_authors(authors, photodb, warning_bag=None):
|
def normalize_authors(authors, photodb, warning_bag=None):
|
||||||
'''
|
'''
|
||||||
Either:
|
Either:
|
||||||
|
|
Loading…
Reference in a new issue