Add function promote_xhtml_headers.
Isn't used anywhere yet but may as well have it to balance out.
This commit is contained in:
parent
59776e5b4e
commit
9411e8cf80
1 changed files with 33 additions and 20 deletions
53
epubfile.py
53
epubfile.py
|
@ -156,26 +156,6 @@ def extract_epub(epub_filepath, directory):
|
|||
with zipfile.ZipFile(epub_filepath.absolute_path, 'r') as z:
|
||||
z.extractall(directory.absolute_path)
|
||||
|
||||
def demote_xhtml_headers(xhtml, return_soup=False):
|
||||
if isinstance(xhtml, bs4.BeautifulSoup):
|
||||
xhtml = str(xhtml)
|
||||
|
||||
replacements = [
|
||||
(r'<h5([^>]*?>.*?)</h5>', r'<h6\1</h6>'),
|
||||
(r'<h4([^>]*?>.*?)</h4>', r'<h5\1</h5>'),
|
||||
(r'<h3([^>]*?>.*?)</h3>', r'<h4\1</h4>'),
|
||||
(r'<h2([^>]*?>.*?)</h2>', r'<h3\1</h3>'),
|
||||
(r'<h1([^>]*?>.*?)</h1>', r'<h2\1</h2>'),
|
||||
]
|
||||
for (re_from, re_to) in replacements:
|
||||
xhtml = re.sub(re_from, re_to, xhtml, flags=re.DOTALL)
|
||||
|
||||
if return_soup:
|
||||
soup = bs4.BeautifulSoup(xhtml, 'html5lib')
|
||||
return soup
|
||||
|
||||
return xhtml
|
||||
|
||||
def fix_xhtml(xhtml, return_soup=False):
|
||||
if isinstance(xhtml, bs4.BeautifulSoup):
|
||||
soup = xhtml
|
||||
|
@ -228,6 +208,39 @@ def fix_xhtml(xhtml, return_soup=False):
|
|||
return soup
|
||||
return str(soup)
|
||||
|
||||
def xhtml_replacements(xhtml, replacements, return_soup=False):
|
||||
if isinstance(xhtml, bs4.BeautifulSoup):
|
||||
xhtml = str(xhtml)
|
||||
|
||||
for (re_from, re_to) in replacements:
|
||||
xhtml = re.sub(re_from, re_to, xhtml, flags=re.DOTALL)
|
||||
|
||||
if return_soup:
|
||||
soup = bs4.BeautifulSoup(xhtml, 'html5lib')
|
||||
return soup
|
||||
|
||||
return xhtml
|
||||
|
||||
def demote_xhtml_headers(xhtml, return_soup=False):
|
||||
replacements = [
|
||||
(r'<h5([^>]*?>.*?)</h5>', r'<h6\1</h6>'),
|
||||
(r'<h4([^>]*?>.*?)</h4>', r'<h5\1</h5>'),
|
||||
(r'<h3([^>]*?>.*?)</h3>', r'<h4\1</h4>'),
|
||||
(r'<h2([^>]*?>.*?)</h2>', r'<h3\1</h3>'),
|
||||
(r'<h1([^>]*?>.*?)</h1>', r'<h2\1</h2>'),
|
||||
]
|
||||
return xhtml_replacements(xhtml, replacements, return_soup=return_soup)
|
||||
|
||||
def promote_xhtml_headers(xhtml, return_soup=False):
|
||||
replacements = [
|
||||
(r'<h2([^>]*?>.*?)</h2>', r'<h1\1</h1>'),
|
||||
(r'<h3([^>]*?>.*?)</h3>', r'<h2\1</h2>'),
|
||||
(r'<h4([^>]*?>.*?)</h4>', r'<h3\1</h3>'),
|
||||
(r'<h5([^>]*?>.*?)</h5>', r'<h4\1</h4>'),
|
||||
(r'<h6([^>]*?>.*?)</h6>', r'<h5\1</h5>'),
|
||||
]
|
||||
return xhtml_replacements(xhtml, replacements, return_soup=return_soup)
|
||||
|
||||
def get_directory_for_mimetype(mime):
|
||||
directory = (
|
||||
MIMETYPE_DIRECTORIES.get(mime) or
|
||||
|
|
Loading…
Reference in a new issue