Improve MMF search speed by pre-filtering photo IDs
Not perfected yet but it's an improvement. Waste less time checking all photos, presearch for matching photos via photo_tag_rel table.
This commit is contained in:
		
							parent
							
								
									fbf73ac515
								
							
						
					
					
						commit
						5b7c05c39d
					
				
					 3 changed files with 153 additions and 71 deletions
				
			
		|  | @ -321,6 +321,9 @@ def select_generator(sql, query, bindings=None): | ||||||
|             break |             break | ||||||
|         yield fetch |         yield fetch | ||||||
| 
 | 
 | ||||||
|  | def sql_listify(items): | ||||||
|  |     return '(%s)' % ', '.join('"%s"' % item for item in items) | ||||||
|  | 
 | ||||||
| def truthystring(s): | def truthystring(s): | ||||||
|     ''' |     ''' | ||||||
|     Convert strings to True, False, or None based on the options presented |     Convert strings to True, False, or None based on the options presented | ||||||
|  |  | ||||||
|  | @ -707,38 +707,11 @@ class PDBPhotoMixin: | ||||||
|             notnulls.append('bytes') |             notnulls.append('bytes') | ||||||
|         if duration: |         if duration: | ||||||
|             notnulls.append('duration') |             notnulls.append('duration') | ||||||
|         query = searchhelpers.build_query(orderby, notnulls) |  | ||||||
|         print(query) |  | ||||||
|         generator = helpers.select_generator(self.sql, query) |  | ||||||
| 
 | 
 | ||||||
|         if orderby is None: |         if orderby is None: | ||||||
|             giveback_orderby = None |             giveback_orderby = None | ||||||
|         else: |         else: | ||||||
|             giveback_orderby = [term.replace('RANDOM()', 'random') for term in orderby] |             giveback_orderby = [term.replace('RANDOM()', 'random') for term in orderby] | ||||||
|         if give_back_parameters: |  | ||||||
|             parameters = { |  | ||||||
|                 'area': area, |  | ||||||
|                 'width': width, |  | ||||||
|                 'height': height, |  | ||||||
|                 'ratio': ratio, |  | ||||||
|                 'bytes': bytes, |  | ||||||
|                 'duration': duration, |  | ||||||
|                 'authors': authors, |  | ||||||
|                 'created': created, |  | ||||||
|                 'extension': extension, |  | ||||||
|                 'extension_not': extension_not, |  | ||||||
|                 'filename': filename, |  | ||||||
|                 'has_tags': has_tags, |  | ||||||
|                 'mimetype': mimetype, |  | ||||||
|                 'tag_musts': tag_musts, |  | ||||||
|                 'tag_mays': tag_mays, |  | ||||||
|                 'tag_forbids': tag_forbids, |  | ||||||
|                 'tag_expression': tag_expression, |  | ||||||
|                 'limit': limit, |  | ||||||
|                 'offset': offset, |  | ||||||
|                 'orderby': giveback_orderby, |  | ||||||
|             } |  | ||||||
|             yield parameters |  | ||||||
| 
 | 
 | ||||||
|         # FROZEN CHILDREN |         # FROZEN CHILDREN | ||||||
|         # To lighten the amount of database reading here, `frozen_children` is a dict where |         # To lighten the amount of database reading here, `frozen_children` is a dict where | ||||||
|  | @ -773,6 +746,44 @@ class PDBPhotoMixin: | ||||||
|             filename_tree = expressionmatch.ExpressionTree.parse(filename) |             filename_tree = expressionmatch.ExpressionTree.parse(filename) | ||||||
|             filename_tree.map(lambda x: x.lower()) |             filename_tree.map(lambda x: x.lower()) | ||||||
| 
 | 
 | ||||||
|  |         if give_back_parameters: | ||||||
|  |             parameters = { | ||||||
|  |                 'area': area, | ||||||
|  |                 'width': width, | ||||||
|  |                 'height': height, | ||||||
|  |                 'ratio': ratio, | ||||||
|  |                 'bytes': bytes, | ||||||
|  |                 'duration': duration, | ||||||
|  |                 'authors': authors, | ||||||
|  |                 'created': created, | ||||||
|  |                 'extension': extension, | ||||||
|  |                 'extension_not': extension_not, | ||||||
|  |                 'filename': filename, | ||||||
|  |                 'has_tags': has_tags, | ||||||
|  |                 'mimetype': mimetype, | ||||||
|  |                 'tag_musts': tag_musts, | ||||||
|  |                 'tag_mays': tag_mays, | ||||||
|  |                 'tag_forbids': tag_forbids, | ||||||
|  |                 'tag_expression': tag_expression, | ||||||
|  |                 'limit': limit, | ||||||
|  |                 'offset': offset, | ||||||
|  |                 'orderby': giveback_orderby, | ||||||
|  |             } | ||||||
|  |             yield parameters | ||||||
|  | 
 | ||||||
|  |         if is_must_may_forbid: | ||||||
|  |             mmf_results = searchhelpers.mmf_photoids(self, tag_musts, tag_mays, tag_forbids, frozen_children) | ||||||
|  |             #print('mmf accept:', mmf_results) | ||||||
|  |         else: | ||||||
|  |             mmf_results = None | ||||||
|  | 
 | ||||||
|  |         if mmf_results is not None and mmf_results['photoids'] == set(): | ||||||
|  |             generator = [] | ||||||
|  |         else: | ||||||
|  |             query = searchhelpers.build_query(orderby, notnulls, minimums, maximums, mmf_results=mmf_results) | ||||||
|  |             print(query[:200]) | ||||||
|  |             generator = helpers.select_generator(self.sql, query) | ||||||
|  | 
 | ||||||
|         photos_received = 0 |         photos_received = 0 | ||||||
| 
 | 
 | ||||||
|         # LET'S GET STARTED |         # LET'S GET STARTED | ||||||
|  | @ -813,21 +824,21 @@ class PDBPhotoMixin: | ||||||
|                 #print('Failed filename') |                 #print('Failed filename') | ||||||
|                 continue |                 continue | ||||||
| 
 | 
 | ||||||
|             if any( |             # if any( | ||||||
|                     fetch[constants.SQL_PHOTO[key]] is None or |             #         fetch[constants.SQL_PHOTO[key]] is None or | ||||||
|                     fetch[constants.SQL_PHOTO[key]] > value |             #         fetch[constants.SQL_PHOTO[key]] > value | ||||||
|                     for (key, value) in maximums.items() |             #         for (key, value) in maximums.items() | ||||||
|                 ): |             #     ): | ||||||
|                 #print('Failed maximums') |             #     #print('Failed maximums') | ||||||
|                 continue |             #     continue | ||||||
| 
 | 
 | ||||||
|             if any( |             # if any( | ||||||
|                     fetch[constants.SQL_PHOTO[key]] is None or |             #         fetch[constants.SQL_PHOTO[key]] is None or | ||||||
|                     fetch[constants.SQL_PHOTO[key]] < value |             #         fetch[constants.SQL_PHOTO[key]] < value | ||||||
|                     for (key, value) in minimums.items() |             #         for (key, value) in minimums.items() | ||||||
|                 ): |             #     ): | ||||||
|                 #print('Failed minimums') |             #     #print('Failed minimums') | ||||||
|                 continue |             #     continue | ||||||
| 
 | 
 | ||||||
|             if (has_tags is not None) or is_tagsearch: |             if (has_tags is not None) or is_tagsearch: | ||||||
|                 photo_tags = set(photo.tags()) |                 photo_tags = set(photo.tags()) | ||||||
|  | @ -848,17 +859,22 @@ class PDBPhotoMixin: | ||||||
|                     if not success: |                     if not success: | ||||||
|                         #print('Failed tag expression') |                         #print('Failed tag expression') | ||||||
|                         continue |                         continue | ||||||
|  | 
 | ||||||
|                 elif is_must_may_forbid: |                 elif is_must_may_forbid: | ||||||
|                     success = searchfilter_must_may_forbid( |                     pass | ||||||
|                         photo_tags=photo_tags, |                     # if photo.id not in mmf_results: | ||||||
|                         tag_musts=tag_musts, |                     #     #print('Failed tag mmf') | ||||||
|                         tag_mays=tag_mays, |                     #     continue | ||||||
|                         tag_forbids=tag_forbids, |                     # success = searchfilter_must_may_forbid( | ||||||
|                         frozen_children=frozen_children, |                     #     photo_tags=photo_tags, | ||||||
|                     ) |                     #     tag_musts=tag_musts, | ||||||
|                     if not success: |                     #     tag_mays=tag_mays, | ||||||
|                         #print('Failed tag mmf') |                     #     tag_forbids=tag_forbids, | ||||||
|                         continue |                     #     frozen_children=frozen_children, | ||||||
|  |                     # ) | ||||||
|  |                     # if not success: | ||||||
|  |                     #     #print('Failed tag mmf') | ||||||
|  |                     #     continue | ||||||
| 
 | 
 | ||||||
|             if offset > 0: |             if offset > 0: | ||||||
|                 offset -= 1 |                 offset -= 1 | ||||||
|  |  | ||||||
|  | @ -5,34 +5,45 @@ from . import objects | ||||||
| 
 | 
 | ||||||
| from voussoirkit import expressionmatch | from voussoirkit import expressionmatch | ||||||
| 
 | 
 | ||||||
| def build_query(orderby, notnulls): | 
 | ||||||
|     query = 'SELECT * FROM photos' | def build_query(orderby, notnulls, minimums, maximums, mmf_results=None): | ||||||
|  |     query = ['SELECT * FROM photos'] | ||||||
|  |     wheres = [] | ||||||
|  | 
 | ||||||
|  |     if mmf_results: | ||||||
|  |         wheres.append('id %s %s' % (mmf_results['operator'], helpers.sql_listify(mmf_results['photoids']))) | ||||||
| 
 | 
 | ||||||
|     if orderby: |     if orderby: | ||||||
|         orderby = [o.split('-') for o in orderby] |         orderby = [o.split('-') for o in orderby] | ||||||
|         orderby_columns = [column for (column, sorter) in orderby if column != 'RANDOM()'] |  | ||||||
|     else: |     else: | ||||||
|         orderby_columns = [] |         orderby = [('created', 'DESC')] | ||||||
| 
 | 
 | ||||||
|     if notnulls: |     for (column, direction) in orderby: | ||||||
|         notnulls.extend(orderby_columns) |         if column != 'RANDOM()': | ||||||
|     elif orderby_columns: |             notnulls.append(column) | ||||||
|         notnulls = orderby_columns |  | ||||||
| 
 | 
 | ||||||
|     if notnulls: |     for column in notnulls: | ||||||
|         notnulls = [x + ' IS NOT NULL' for x in notnulls] |         wheres.append(column + ' IS NOT NULL') | ||||||
|         notnulls = ' AND '.join(notnulls) |  | ||||||
|         query += ' WHERE ' + notnulls |  | ||||||
|     if not orderby: |  | ||||||
|         query += ' ORDER BY created DESC' |  | ||||||
|         return query |  | ||||||
| 
 | 
 | ||||||
|     # Combine each column+sorter |     for (column, value) in minimums.items(): | ||||||
|     orderby = [' '.join(o) for o in orderby] |         wheres.append(column + ' >= ' + str(value)) | ||||||
| 
 | 
 | ||||||
|     # Combine everything |     for (column, value) in maximums.items(): | ||||||
|     orderby = ', '.join(orderby) |         wheres.append(column + ' <= ' + str(value)) | ||||||
|     query += ' ORDER BY %s' % orderby | 
 | ||||||
|  |     ## Assemble | ||||||
|  | 
 | ||||||
|  |     if wheres: | ||||||
|  |         wheres = 'WHERE '  + ' AND '.join(wheres) | ||||||
|  |         query.append(wheres) | ||||||
|  | 
 | ||||||
|  |     if orderby: | ||||||
|  |         orderby = [' '.join(o) for o in orderby] | ||||||
|  |         orderby = ', '.join(orderby) | ||||||
|  |         orderby = 'ORDER BY ' + orderby | ||||||
|  |         query.append(orderby) | ||||||
|  | 
 | ||||||
|  |     query = ' '.join(query) | ||||||
|     return query |     return query | ||||||
| 
 | 
 | ||||||
| def get_user(photodb, username_or_id): | def get_user(photodb, username_or_id): | ||||||
|  | @ -90,6 +101,58 @@ def minmax(key, value, minimums, maximums, warning_bag=None): | ||||||
|     if high is not None: |     if high is not None: | ||||||
|         maximums[key] = high |         maximums[key] = high | ||||||
| 
 | 
 | ||||||
|  | def mmf_photoids(photodb, tag_musts, tag_mays, tag_forbids, frozen_children): | ||||||
|  |     if not(tag_musts or tag_mays or tag_forbids): | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|  |     cur = photodb.sql.cursor() | ||||||
|  | 
 | ||||||
|  |     operator = 'IN' | ||||||
|  |     first_time = True | ||||||
|  |     no_results = False | ||||||
|  |     results = set() | ||||||
|  | 
 | ||||||
|  |     if tag_mays: | ||||||
|  |         for tag in tag_mays: | ||||||
|  |             choices = helpers.sql_listify(tag.id for tag in frozen_children[tag]) | ||||||
|  |             query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices | ||||||
|  |             cur.execute(query) | ||||||
|  |             results.update(fetch[0] for fetch in cur.fetchall()) | ||||||
|  |         first_time = False | ||||||
|  | 
 | ||||||
|  |     if tag_musts: | ||||||
|  |         for tag in tag_musts: | ||||||
|  |             choices = helpers.sql_listify(tag.id for tag in frozen_children[tag]) | ||||||
|  |             query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices | ||||||
|  |             cur.execute(query) | ||||||
|  |             photo_ids = (fetch[0] for fetch in cur.fetchall()) | ||||||
|  |             if first_time: | ||||||
|  |                 results.update(photo_ids) | ||||||
|  |                 first_time = False | ||||||
|  |             else: | ||||||
|  |                 results = results.intersection(photo_ids) | ||||||
|  |                 if not results: | ||||||
|  |                     no_results = True | ||||||
|  |                     break | ||||||
|  | 
 | ||||||
|  |     if tag_forbids and not no_results: | ||||||
|  |         if not results: | ||||||
|  |             operator = 'NOT IN' | ||||||
|  |         for tag in tag_forbids: | ||||||
|  |             choices = helpers.sql_listify(tag.id for tag in frozen_children[tag]) | ||||||
|  |             query = 'SELECT photoid FROM photo_tag_rel WHERE tagid in %s' % choices | ||||||
|  |             cur.execute(query) | ||||||
|  |             photo_ids = (fetch[0] for fetch in cur.fetchall()) | ||||||
|  |             if operator == 'IN': | ||||||
|  |                 results = results.difference(photo_ids) | ||||||
|  |                 if not results: | ||||||
|  |                     no_results = True | ||||||
|  |                     break | ||||||
|  |             else: | ||||||
|  |                 results.update(photo_ids) | ||||||
|  | 
 | ||||||
|  |     return {'operator': operator, 'photoids': results} | ||||||
|  | 
 | ||||||
| def normalize_authors(authors, photodb, warning_bag=None): | def normalize_authors(authors, photodb, warning_bag=None): | ||||||
|     ''' |     ''' | ||||||
|     Either: |     Either: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue