Add, improve some comments, docstrings, exception texts.
This commit is contained in:
parent
973731f797
commit
fd73ecfe2e
1 changed files with 19 additions and 4 deletions
23
epubfile.py
23
epubfile.py
|
@ -258,6 +258,7 @@ def make_meta_item(content=None, attrs=None):
|
||||||
meta_item = f'<meta>{content}</meta>'
|
meta_item = f'<meta>{content}</meta>'
|
||||||
else:
|
else:
|
||||||
meta_item = f'<meta/>'
|
meta_item = f'<meta/>'
|
||||||
|
# 'html.parser' just for having the simplest output.
|
||||||
meta_item = bs4.BeautifulSoup(meta_item, 'html.parser')
|
meta_item = bs4.BeautifulSoup(meta_item, 'html.parser')
|
||||||
if attrs:
|
if attrs:
|
||||||
meta_item.attrs.update(attrs)
|
meta_item.attrs.update(attrs)
|
||||||
|
@ -302,6 +303,7 @@ class NotInSpine(EpubfileException):
|
||||||
class Epub:
|
class Epub:
|
||||||
def __init__(self, directory, _original_epub_filepath=None):
|
def __init__(self, directory, _original_epub_filepath=None):
|
||||||
if isinstance(directory, tempfile.TemporaryDirectory):
|
if isinstance(directory, tempfile.TemporaryDirectory):
|
||||||
|
# Hold a reference so the tempdir doesn't clean up.
|
||||||
self._tempdir_reference = directory
|
self._tempdir_reference = directory
|
||||||
directory = directory.name
|
directory = directory.name
|
||||||
|
|
||||||
|
@ -385,7 +387,8 @@ class Epub:
|
||||||
def read_container_xml(self):
|
def read_container_xml(self):
|
||||||
container_xml_path = self.root_directory.join('META-INF/container.xml')
|
container_xml_path = self.root_directory.join('META-INF/container.xml')
|
||||||
container = open(container_xml_path.absolute_path, 'r', encoding='utf-8')
|
container = open(container_xml_path.absolute_path, 'r', encoding='utf-8')
|
||||||
# 'xml' and 'html.parser' seem about even here except that html.parser doesn't self-close.
|
# 'xml' and 'html.parser' seem about even here except that html.parser
|
||||||
|
# doesn't self-close.
|
||||||
container = bs4.BeautifulSoup(container, 'xml')
|
container = bs4.BeautifulSoup(container, 'xml')
|
||||||
return container
|
return container
|
||||||
|
|
||||||
|
@ -400,11 +403,13 @@ class Epub:
|
||||||
# the whole doc. 'lxml' wraps the content in <html><body> and also
|
# the whole doc. 'lxml' wraps the content in <html><body> and also
|
||||||
# botches the metas so it's not any better than html.parser.
|
# botches the metas so it's not any better than html.parser.
|
||||||
self.opf = bs4.BeautifulSoup(rootfile_xml, 'html.parser')
|
self.opf = bs4.BeautifulSoup(rootfile_xml, 'html.parser')
|
||||||
|
|
||||||
# Let's fix those metas.
|
# Let's fix those metas.
|
||||||
metas = self.opf.select('meta')
|
metas = self.opf.select('meta')
|
||||||
for meta in metas:
|
for meta in metas:
|
||||||
neighbor = meta.next
|
neighbor = meta.next
|
||||||
if neighbor.parent != meta.parent:
|
if neighbor.parent != meta.parent:
|
||||||
|
# This happens on the last meta, neighbor is outside of the manifest
|
||||||
break
|
break
|
||||||
if not isinstance(neighbor, bs4.element.NavigableString):
|
if not isinstance(neighbor, bs4.element.NavigableString):
|
||||||
continue
|
continue
|
||||||
|
@ -447,7 +452,7 @@ class Epub:
|
||||||
elif isinstance(content, bytes):
|
elif isinstance(content, bytes):
|
||||||
handle = open(filepath.absolute_path, 'wb')
|
handle = open(filepath.absolute_path, 'wb')
|
||||||
else:
|
else:
|
||||||
raise TypeError(type(content))
|
raise TypeError(f'content should be str or bytes, not {type(content)}.')
|
||||||
|
|
||||||
with handle:
|
with handle:
|
||||||
handle.write(content)
|
handle.write(content)
|
||||||
|
@ -491,7 +496,7 @@ class Epub:
|
||||||
|
|
||||||
def open_file(self, id, mode):
|
def open_file(self, id, mode):
|
||||||
if mode not in ('r', 'w'):
|
if mode not in ('r', 'w'):
|
||||||
raise ValueError(f'Mode {mode} should be either r or w.')
|
raise ValueError(f'mode should be either r or w, not {mode}.')
|
||||||
|
|
||||||
filepath = self.get_filepath(id)
|
filepath = self.get_filepath(id)
|
||||||
mime = self.opf.manifest.find('item', {'id': id})['media-type']
|
mime = self.opf.manifest.find('item', {'id': id})['media-type']
|
||||||
|
@ -898,6 +903,9 @@ class Epub:
|
||||||
self.fix_interlinking_ncx(rename_map)
|
self.fix_interlinking_ncx(rename_map)
|
||||||
|
|
||||||
def _set_nav_toc(self, nav_id, new_toc):
|
def _set_nav_toc(self, nav_id, new_toc):
|
||||||
|
'''
|
||||||
|
Write the table of contents created by `generate_toc` to the nav file.
|
||||||
|
'''
|
||||||
for li in new_toc.find_all('li'):
|
for li in new_toc.find_all('li'):
|
||||||
href = li['nav_anchor']
|
href = li['nav_anchor']
|
||||||
atag = new_toc.new_tag('a')
|
atag = new_toc.new_tag('a')
|
||||||
|
@ -919,6 +927,9 @@ class Epub:
|
||||||
self.write_file(nav_id, soup)
|
self.write_file(nav_id, soup)
|
||||||
|
|
||||||
def _set_ncx_toc(self, ncx_id, new_toc):
|
def _set_ncx_toc(self, ncx_id, new_toc):
|
||||||
|
'''
|
||||||
|
Write the table of contents created by `generate_toc` to the ncx file.
|
||||||
|
'''
|
||||||
play_order = 1
|
play_order = 1
|
||||||
def li_to_navpoint(li):
|
def li_to_navpoint(li):
|
||||||
# result:
|
# result:
|
||||||
|
@ -964,6 +975,10 @@ class Epub:
|
||||||
'''
|
'''
|
||||||
Generate the table of contents (toc.nav and nav.xhtml) by collecting
|
Generate the table of contents (toc.nav and nav.xhtml) by collecting
|
||||||
<h1>..<h6> throughout all of the text documents.
|
<h1>..<h6> throughout all of the text documents.
|
||||||
|
|
||||||
|
max_level: If provided, only collect the headers from h1..hX, inclusive.
|
||||||
|
|
||||||
|
linear_only: Ignore spine items that are marked as linear=no.
|
||||||
'''
|
'''
|
||||||
def new_list(root=False):
|
def new_list(root=False):
|
||||||
r = bs4.BeautifulSoup('<ol></ol>', 'html.parser')
|
r = bs4.BeautifulSoup('<ol></ol>', 'html.parser')
|
||||||
|
@ -1058,7 +1073,7 @@ class Epub:
|
||||||
|
|
||||||
def move_nav_to_end(self):
|
def move_nav_to_end(self):
|
||||||
'''
|
'''
|
||||||
Move the nav.xhtml file to the end and set linear=no.
|
Move the nav.xhtml file to the end and set its linear=no.
|
||||||
'''
|
'''
|
||||||
nav = self.get_nav()
|
nav = self.get_nav()
|
||||||
if not nav:
|
if not nav:
|
||||||
|
|
Loading…
Reference in a new issue