Create a regex pattern for exactly needed headers, no postfiltering.

This commit is contained in:
Ethan Dalool 2020-02-03 23:07:02 -08:00
parent 3e254bddf1
commit 3cfb8030a7

View file

@ -1004,6 +1004,15 @@ class Epub:
return r return r
return r.ol return r.ol
# Official HTML headers only go up to 6.
if max_level is None:
max_level = 6
elif max_level < 1:
raise ValueError('max_level must be >= 1.')
header_pattern = re.compile(rf'^h[1-{max_level}]$')
nav_id = self.get_nav() nav_id = self.get_nav()
if nav_id: if nav_id:
nav_filepath = self.get_filepath(nav_id) nav_filepath = self.get_filepath(nav_id)
@ -1027,11 +1036,9 @@ class Epub:
file_path = self.get_filepath(file_id) file_path = self.get_filepath(file_id)
soup = self.read_file(file_id, soup=True) soup = self.read_file(file_id, soup=True)
for header in soup.find_all(re.compile(r'^h[1-6]$')): for header in soup.find_all(header_pattern):
# 'hX' -> X # 'hX' -> X
level = int(header.name[1]) level = int(header.name[1])
if max_level is not None and level > max_level:
continue
header['id'] = f'toc_{toc_line_index}' header['id'] = f'toc_{toc_line_index}'
toc_line_index += 1 toc_line_index += 1