Much better search

This commit is contained in:
Ethan Dalool 2017-05-06 18:29:42 -07:00
parent ad6f4b0d01
commit 24dcbfb658

View file

@ -1,93 +1,212 @@
import argparse
import fnmatch
import itertools
import os
import re
import stat
import sys
import traceback
from voussoirkit import clipext
from voussoirkit import expressionmatch
from voussoirkit import pathclass
from voussoirkit import safeprint
from voussoirkit import spinal
# Thanks georg
# http://stackoverflow.com/a/13443424
STDIN_MODE = os.fstat(sys.stdin.fileno()).st_mode
if stat.S_ISFIFO(STDIN_MODE):
STDIN_MODE = 'pipe'
else:
STDIN_MODE = 'terminal'
def all_terms_match(search_text, terms, match_function):
matches = (
(not terms['yes_all'] or all(match_function(search_text, term) for term in terms['yes_all'])) and
(not terms['yes_any'] or any(match_function(search_text, term) for term in terms['yes_any'])) and
(not terms['not_all'] or not all(match_function(search_text, term) for term in terms['not_all'])) and
(not terms['not_any'] or not any(match_function(search_text, term) for term in terms['not_any']))
)
return matches
def search(
terms,
*,
yes_all=None,
yes_any=None,
not_all=None,
not_any=None,
case_sensitive=False,
content_args=None,
do_expression=False,
do_glob=False,
do_regex=False,
inverse=False,
line_numbers=False,
local_only=False,
match_any=False,
text=None,
):
def term_matches(text, term):
terms = {
'yes_all': yes_all,
'yes_any': yes_any,
'not_all': not_all,
'not_any': not_any
}
terms = {k: (v or []) for (k, v) in terms.items()}
#print(terms, content_args)
if all(v == [] for v in terms.values()) and not content_args:
raise ValueError('No terms supplied')
def term_matches(line, term):
if not case_sensitive:
text = text.lower()
line = line.lower()
if do_expression:
return term.evaluate(line)
return (
(term in text) or
(do_regex and re.search(term, text)) or
(do_glob and fnmatch.fnmatch(text, term)) or
(do_expression and term.evaluate(text))
(term in line) or
(do_regex and re.search(term, line)) or
(do_glob and fnmatch.fnmatch(line, term))
)
if not case_sensitive:
terms = [term.lower() for term in terms]
if do_expression:
terms = ' '.join(terms)
terms = [expressionmatch.ExpressionTree.parse(terms)]
# The value still needs to be a list so the upcoming any() / all()
# receives an iterable as it expects. It just happens to be 1 tree.
trees = {}
for (key, value) in terms.items():
if value == []:
trees[key] = []
continue
tree = ' '.join(value)
tree = expressionmatch.ExpressionTree.parse(tree)
if not case_sensitive:
tree.map(str.lower)
trees[key] = [tree]
terms = trees
anyall = any if match_any else all
elif not case_sensitive:
terms = {k: [x.lower() for x in v] for (k, v) in terms.items()}
if text is None:
walk = spinal.walk_generator(
search_objects = spinal.walk_generator(
depth_first=False,
recurse=not local_only,
yield_directories=True,
)
lines = ((filepath.basename, filepath.absolute_path) for filepath in walk)
else:
lines = text.splitlines()
search_objects = text.splitlines()
for line in lines:
if isinstance(line, tuple):
(line, printout) = line
for (index, search_object) in enumerate(search_objects):
if isinstance(search_object, pathclass.Path):
search_text = search_object.basename
result_text = search_object.absolute_path
else:
printout = line
matches = anyall(term_matches(line, term) for term in terms)
if matches ^ inverse:
safeprint.safeprint(printout)
search_text = search_object
result_text = search_object
if line_numbers:
result_text = '%d | %s' % (index+1, result_text)
if all_terms_match(search_text, terms, term_matches):
if not content_args:
yield result_text
else:
filepath = pathclass.Path(search_object)
if not filepath.is_file:
continue
try:
with open(filepath.absolute_path, 'r', encoding='utf-8') as handle:
text = handle.read()
except:
safeprint.safeprint(filepath.absolute_path)
traceback.print_exc()
continue
content_args['text'] = text
content_args['line_numbers'] = True
results = search(**content_args)
results = list(results)
if not results:
continue
yield filepath.absolute_path
yield from results
yield ''
def argparse_to_dict(args):
text = args.text
if text is not None:
text = clipext.resolve(text)
elif STDIN_MODE == 'pipe':
text = clipext.resolve('!i')
if hasattr(args, 'content_args') and args.content_args is not None:
content_args = argparse_to_dict(args.content_args)
else:
content_args = None
return {
'yes_all': args.yes_all,
'yes_any': args.yes_any,
'not_all': args.not_all,
'not_any': args.not_any,
'case_sensitive': args.case_sensitive,
'content_args': content_args,
'do_expression': args.do_expression,
'do_glob': args.do_glob,
'do_regex': args.do_regex,
'local_only': args.local_only,
'line_numbers': args.line_numbers,
'text': text,
}
def search_argparse(args):
return search(
terms=args.search_terms,
case_sensitive=args.case_sensitive,
do_glob=args.do_glob,
do_regex=args.do_regex,
inverse=args.inverse,
local_only=args.local_only,
match_any=args.match_any,
text=args.text if args.text is None else clipext.resolve(args.text),
)
generator = search(**argparse_to_dict(args))
result_count = 0
for result in generator:
safeprint.safeprint(result)
result_count += 1
if args.show_count:
print('%d items.' % result_count)
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument('search_terms', nargs='+', default=None)
parser.add_argument('--any', dest='match_any', action='store_true')
# The padding is inserted to guarantee that --content is not the first
# argument. Because if it were, we wouldn't know if we have
# [pre, '--content'] or ['--content', post], etc. and I don't want to
# actually check the values.
argv.insert(0, 'padding')
grouper = itertools.groupby(argv, lambda x: x == '--content')
halves = [list(group) for (key, group) in grouper]
# halves looks like [pre, '--content', post]
name_args = halves[0]
# Pop the padding
name_args.pop(0)
content_args = [item for chunk in halves[2:] for item in chunk]
parser.add_argument('yes_all', nargs='*', default=None)
parser.add_argument('--all', dest='yes_all', nargs='+')
parser.add_argument('--any', dest='yes_any', nargs='+')
parser.add_argument('--not_all', dest='not_all', nargs='+')
parser.add_argument('--not_any', dest='not_any', nargs='+')
parser.add_argument('--case', dest='case_sensitive', action='store_true')
parser.add_argument('--regex', dest='do_regex', action='store_true')
parser.add_argument('--glob', dest='do_glob', action='store_true')
parser.add_argument('--content', dest='do_content', action='store_true')
parser.add_argument('--count', dest='show_count', action='store_true')
parser.add_argument('--expression', dest='do_expression', action='store_true')
parser.add_argument('--glob', dest='do_glob', action='store_true')
parser.add_argument('--line_numbers', dest='line_numbers', action='store_true')
parser.add_argument('--local', dest='local_only', action='store_true')
parser.add_argument('--inverse', dest='inverse', action='store_true')
parser.add_argument('--regex', dest='do_regex', action='store_true')
parser.add_argument('--text', dest='text', default=None)
parser.set_defaults(func=search_argparse)
args = parser.parse_args(argv)
args = parser.parse_args(name_args)
if content_args:
args.content_args = parser.parse_args(content_args)
else:
args.content_args = None
args.func(args)
if __name__ == '__main__':