Search for header elements directly instead of scanning descendants.

This commit is contained in:
Ethan Dalool 2020-02-03 22:40:48 -08:00
parent edbd1e9996
commit 3e254bddf1

View file

@ -1019,7 +1019,6 @@ class Epub:
current_level = None current_level = None
current_list = toc.ol current_list = toc.ol
toc_line_index = 1 toc_line_index = 1
HEADER_TAGS = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
spine = self.get_spine_order(linear_only=linear_only) spine = self.get_spine_order(linear_only=linear_only)
spine = [s for s in spine if s != nav_id] spine = [s for s in spine if s != nav_id]
@ -1028,9 +1027,7 @@ class Epub:
file_path = self.get_filepath(file_id) file_path = self.get_filepath(file_id)
soup = self.read_file(file_id, soup=True) soup = self.read_file(file_id, soup=True)
for header in soup.descendants: for header in soup.find_all(re.compile(r'^h[1-6]$')):
if header.name not in HEADER_TAGS:
continue
# 'hX' -> X # 'hX' -> X
level = int(header.name[1]) level = int(header.name[1])
if max_level is not None and level > max_level: if max_level is not None and level > max_level: