Add method generate_toc and commandline command.
This commit is contained in:
parent
365c28bfdb
commit
9e020eaa11
1 changed files with 187 additions and 0 deletions
187
epubfile.py
187
epubfile.py
|
@ -1,3 +1,4 @@
|
||||||
|
import copy
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -834,6 +835,166 @@ class Epub:
|
||||||
self.fix_interlinking_text(id, rename_map)
|
self.fix_interlinking_text(id, rename_map)
|
||||||
self.fix_interlinking_ncx(rename_map)
|
self.fix_interlinking_ncx(rename_map)
|
||||||
|
|
||||||
|
def _set_nav_toc(self, nav_id, new_toc):
|
||||||
|
for li in new_toc.find_all('li'):
|
||||||
|
href = li['nav_anchor']
|
||||||
|
atag = new_toc.new_tag('a')
|
||||||
|
atag.append(li['text'])
|
||||||
|
atag['href'] = href
|
||||||
|
li.insert(0, atag)
|
||||||
|
del li['nav_anchor']
|
||||||
|
del li['ncx_anchor']
|
||||||
|
del li['text']
|
||||||
|
soup = self.read_file(nav_id, soup=True)
|
||||||
|
toc = soup.find('nav', {'epub:type': 'toc'})
|
||||||
|
if not toc:
|
||||||
|
toc = soup.new_tag('nav')
|
||||||
|
toc['epub:type'] = 'toc'
|
||||||
|
soup.body.insert(0, toc)
|
||||||
|
if toc.ol:
|
||||||
|
toc.ol.extract()
|
||||||
|
toc.append(new_toc.ol)
|
||||||
|
self.write_file(nav_id, soup)
|
||||||
|
|
||||||
|
def _set_ncx_toc(self, ncx_id, new_toc):
|
||||||
|
play_order = 1
|
||||||
|
def li_to_navpoint(li):
|
||||||
|
# result:
|
||||||
|
# <navPoint id="navPoint{X}" playOrder="{X}">
|
||||||
|
# <navLabel>
|
||||||
|
# <text>{text}</text>
|
||||||
|
# </navLabel>
|
||||||
|
# <content src="{ncx_anchor}" />
|
||||||
|
# {children}
|
||||||
|
# </navPoint>
|
||||||
|
nonlocal play_order
|
||||||
|
navpoint = new_toc.new_tag('navPoint', id=f'navPoint{play_order}', playOrder=play_order)
|
||||||
|
play_order += 1
|
||||||
|
label = new_toc.new_tag('navLabel')
|
||||||
|
text = new_toc.new_tag('text')
|
||||||
|
text.append(li['text'])
|
||||||
|
label.append(text)
|
||||||
|
navpoint.append(label)
|
||||||
|
|
||||||
|
content = new_toc.new_tag('content', src=li['ncx_anchor'])
|
||||||
|
navpoint.append(content)
|
||||||
|
|
||||||
|
children = li.ol.children if li.ol else []
|
||||||
|
children = [li_to_navpoint(li) for li in children]
|
||||||
|
for child in children:
|
||||||
|
navpoint.append(child)
|
||||||
|
return navpoint
|
||||||
|
|
||||||
|
# xml because we have to preserve the casing on navMap.
|
||||||
|
soup = bs4.BeautifulSoup(self.read_file(ncx_id), 'xml')
|
||||||
|
navmap = soup.navMap
|
||||||
|
for child in list(navmap.children):
|
||||||
|
child.extract()
|
||||||
|
for li in list(new_toc.ol.children):
|
||||||
|
navpoint = li_to_navpoint(li)
|
||||||
|
li.insert_before(navpoint)
|
||||||
|
li.extract()
|
||||||
|
for navpoint in list(new_toc.ol.children):
|
||||||
|
navmap.append(navpoint)
|
||||||
|
self.write_file(ncx_id, soup)
|
||||||
|
|
||||||
|
def generate_toc(self, max_level=None, linear_only=True):
|
||||||
|
'''
|
||||||
|
Generate the table of contents (toc.nav and nav.xhtml) by collecting
|
||||||
|
<h1>..<h6> throughout all of the text documents.
|
||||||
|
'''
|
||||||
|
def new_list(root=False):
|
||||||
|
r = bs4.BeautifulSoup('<ol></ol>', 'html.parser')
|
||||||
|
if root:
|
||||||
|
return r
|
||||||
|
return r.ol
|
||||||
|
|
||||||
|
nav_id = self.get_nav()
|
||||||
|
if nav_id:
|
||||||
|
nav_filepath = self.get_filepath(nav_id)
|
||||||
|
|
||||||
|
ncx_id = self.get_ncx()
|
||||||
|
if ncx_id:
|
||||||
|
ncx_filepath = self.get_filepath(ncx_id)
|
||||||
|
|
||||||
|
if not nav_id and not ncx_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
toc = new_list(root=True)
|
||||||
|
current_level = None
|
||||||
|
current_list = toc.ol
|
||||||
|
toc_line_index = 1
|
||||||
|
HEADER_TAGS = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
|
||||||
|
|
||||||
|
spine = self.get_spine_order(linear_only=linear_only)
|
||||||
|
spine = [s for s in spine if s != nav_id]
|
||||||
|
|
||||||
|
for file_id in spine:
|
||||||
|
file_path = self.get_filepath(file_id)
|
||||||
|
soup = self.read_file(file_id, soup=True)
|
||||||
|
|
||||||
|
for header in soup.descendants:
|
||||||
|
if header.name not in HEADER_TAGS:
|
||||||
|
continue
|
||||||
|
# 'hX' -> X
|
||||||
|
level = int(header.name[1])
|
||||||
|
if max_level is not None and level > max_level:
|
||||||
|
continue
|
||||||
|
|
||||||
|
header['id'] = f'toc_{toc_line_index}'
|
||||||
|
toc_line_index += 1
|
||||||
|
|
||||||
|
toc_line = toc.new_tag('li')
|
||||||
|
toc_line['text'] = header.text
|
||||||
|
if nav_id:
|
||||||
|
relative = file_path.relative_to(nav_filepath.parent, simple=True)
|
||||||
|
toc_line['nav_anchor'] = f'{relative}#{header["id"]}'
|
||||||
|
if ncx_id:
|
||||||
|
relative = file_path.relative_to(ncx_filepath.parent, simple=True)
|
||||||
|
toc_line['ncx_anchor'] = f'{relative}#{header["id"]}'
|
||||||
|
|
||||||
|
if current_level is None:
|
||||||
|
current_level = level
|
||||||
|
|
||||||
|
while level < current_level:
|
||||||
|
current_level -= 1
|
||||||
|
# Because the sub-<ol> are actually a child of the last
|
||||||
|
# <li> of the previous <ol>, we must .parent twice.
|
||||||
|
current_list = current_list.parent
|
||||||
|
if current_list.name == 'li':
|
||||||
|
current_list = current_list.parent
|
||||||
|
# If the file has headers in a non-ascending order, like an
|
||||||
|
# h4 and then an h1, then backstepping too far will take us
|
||||||
|
# out of the list. So at that point we can just snap
|
||||||
|
# current_level and start using the root list again.
|
||||||
|
if current_list == toc:
|
||||||
|
current_level = level
|
||||||
|
current_list = toc.ol
|
||||||
|
|
||||||
|
if level > current_level:
|
||||||
|
current_level = level
|
||||||
|
# In order to properly render nested <ol>, you're supposed
|
||||||
|
# to make the new <ol> a child of the last <li> of the
|
||||||
|
# previous <ol>.
|
||||||
|
# Don't worry, .children can never be empty because on the
|
||||||
|
# first <li> this condition can never occur, and new <ol>s
|
||||||
|
# always receive a child right after being created.
|
||||||
|
_l = new_list()
|
||||||
|
list(current_list.children)[-1].append(_l)
|
||||||
|
current_list = _l
|
||||||
|
|
||||||
|
current_list.append(toc_line)
|
||||||
|
|
||||||
|
# We have to save the id="toc_X" that we gave to all the headers.
|
||||||
|
self.write_file(file_id, soup)
|
||||||
|
|
||||||
|
if nav_id:
|
||||||
|
self._set_nav_toc(nav_id, copy.copy(toc))
|
||||||
|
|
||||||
|
if ncx_id:
|
||||||
|
self._set_ncx_toc(ncx_id, copy.copy(toc))
|
||||||
|
|
||||||
|
|
||||||
def move_nav_to_end(self):
|
def move_nav_to_end(self):
|
||||||
'''
|
'''
|
||||||
Move the nav.xhtml file to the end and set linear=no.
|
Move the nav.xhtml file to the end and set linear=no.
|
||||||
|
@ -944,6 +1105,19 @@ covercomesfirst:
|
||||||
first, otherwise some /a/image.jpg will always be before /images/cover.jpg.
|
first, otherwise some /a/image.jpg will always be before /images/cover.jpg.
|
||||||
'''.strip(),
|
'''.strip(),
|
||||||
|
|
||||||
|
'generate_toc':
|
||||||
|
'''
|
||||||
|
generate_toc:
|
||||||
|
Regenerate the toc.ncx and nav.xhtml based on headers in the files.
|
||||||
|
|
||||||
|
> epubfile.py generate_toc book.epub <flags
|
||||||
|
|
||||||
|
flags:
|
||||||
|
--max_level X:
|
||||||
|
Only generate toc entries for headers up to level X.
|
||||||
|
That is, h1, h2, ... hX.
|
||||||
|
''',
|
||||||
|
|
||||||
'holdit':
|
'holdit':
|
||||||
'''
|
'''
|
||||||
holdit:
|
holdit:
|
||||||
|
@ -1050,6 +1224,14 @@ def covercomesfirst_argparse(args):
|
||||||
book = Epub.open(epub)
|
book = Epub.open(epub)
|
||||||
covercomesfirst(book)
|
covercomesfirst(book)
|
||||||
|
|
||||||
|
def generate_toc_argparse(args):
|
||||||
|
epubs = [epub for pattern in args.epubs for epub in glob.glob(pattern)]
|
||||||
|
books = []
|
||||||
|
for epub in epubs:
|
||||||
|
book = Epub.open(epub)
|
||||||
|
book.generate_toc(max_level=int(args.max_level) if args.max_level else None)
|
||||||
|
book.save(epub)
|
||||||
|
|
||||||
def holdit_argparse(args):
|
def holdit_argparse(args):
|
||||||
epubs = [epub for pattern in args.epubs for epub in glob.glob(pattern)]
|
epubs = [epub for pattern in args.epubs for epub in glob.glob(pattern)]
|
||||||
books = []
|
books = []
|
||||||
|
@ -1153,6 +1335,11 @@ def main(argv):
|
||||||
p_covercomesfirst.add_argument('epubs', nargs='+', default=[])
|
p_covercomesfirst.add_argument('epubs', nargs='+', default=[])
|
||||||
p_covercomesfirst.set_defaults(func=covercomesfirst_argparse)
|
p_covercomesfirst.set_defaults(func=covercomesfirst_argparse)
|
||||||
|
|
||||||
|
p_generate_toc = subparsers.add_parser('generate_toc')
|
||||||
|
p_generate_toc.add_argument('epubs', nargs='+', default=[])
|
||||||
|
p_generate_toc.add_argument('--max_level', dest='max_level', default=None)
|
||||||
|
p_generate_toc.set_defaults(func=generate_toc_argparse)
|
||||||
|
|
||||||
p_holdit = subparsers.add_parser('holdit')
|
p_holdit = subparsers.add_parser('holdit')
|
||||||
p_holdit.add_argument('epubs', nargs='+', default=[])
|
p_holdit.add_argument('epubs', nargs='+', default=[])
|
||||||
p_holdit.set_defaults(func=holdit_argparse)
|
p_holdit.set_defaults(func=holdit_argparse)
|
||||||
|
|
Loading…
Reference in a new issue