Make better use of generators when searching file text.
This commit is contained in:
parent
cb226265a3
commit
389f22faff
1 changed files with 24 additions and 8 deletions
30
search.py
30
search.py
|
@ -50,22 +50,36 @@ def all_terms_match(search_text, terms, match_function):
|
||||||
)
|
)
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
|
def is_iterable(something):
|
||||||
|
try:
|
||||||
|
iter(something)
|
||||||
|
return True
|
||||||
|
except TypeError:
|
||||||
|
return False
|
||||||
|
|
||||||
def search_contents_generic(filepath, content_args):
|
def search_contents_generic(filepath, content_args):
|
||||||
|
# We first test 1 MB of the file to see if it is text rather than binary.
|
||||||
try:
|
try:
|
||||||
text = filepath.read('r')
|
handle = filepath.open('r')
|
||||||
|
handle.read(2 ** 20)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
try:
|
try:
|
||||||
text = filepath.read('r', encoding='utf-8')
|
handle.close()
|
||||||
|
handle = filepath.open('r', encoding='utf-8')
|
||||||
|
handle.read(2 ** 20)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
#safeprint.safeprint(filepath.absolute_path)
|
log.debug('%s could not be read with encoding=utf-8.', filepath)
|
||||||
#traceback.print_exc()
|
|
||||||
return
|
return
|
||||||
except Exception:
|
except Exception:
|
||||||
safeprint.safeprint(filepath.absolute_path)
|
safeprint.safeprint(filepath.absolute_path)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return
|
return
|
||||||
|
|
||||||
content_args['text'] = text
|
# We keep the lines as a generator instead of using readlines,
|
||||||
|
# which makes a list.
|
||||||
|
handle.seek(0)
|
||||||
|
lines = (line.rstrip('\r\n') for line in handle)
|
||||||
|
content_args['text'] = lines
|
||||||
content_args['line_numbers'] = True
|
content_args['line_numbers'] = True
|
||||||
|
|
||||||
results = search(**content_args)
|
results = search(**content_args)
|
||||||
|
@ -171,10 +185,12 @@ def search(
|
||||||
recurse=not local_only,
|
recurse=not local_only,
|
||||||
yield_directories=True,
|
yield_directories=True,
|
||||||
)
|
)
|
||||||
elif isinstance(text, (list, tuple)) or inspect.isgenerator(text):
|
elif isinstance(text, str):
|
||||||
|
search_objects = text.splitlines()
|
||||||
|
elif is_iterable(text):
|
||||||
search_objects = text
|
search_objects = text
|
||||||
else:
|
else:
|
||||||
search_objects = text.splitlines()
|
raise TypeError(f'Don\'t know how to search text={text}')
|
||||||
|
|
||||||
for (index, search_object) in enumerate(search_objects):
|
for (index, search_object) in enumerate(search_objects):
|
||||||
# if index % 10 == 0:
|
# if index % 10 == 0:
|
||||||
|
|
Loading…
Reference in a new issue