Add function promote_xhtml_headers.
Isn't used anywhere yet but may as well have it to balance out.
This commit is contained in:
parent
59776e5b4e
commit
9411e8cf80
1 changed files with 33 additions and 20 deletions
53
epubfile.py
53
epubfile.py
|
@ -156,26 +156,6 @@ def extract_epub(epub_filepath, directory):
|
||||||
with zipfile.ZipFile(epub_filepath.absolute_path, 'r') as z:
|
with zipfile.ZipFile(epub_filepath.absolute_path, 'r') as z:
|
||||||
z.extractall(directory.absolute_path)
|
z.extractall(directory.absolute_path)
|
||||||
|
|
||||||
def demote_xhtml_headers(xhtml, return_soup=False):
|
|
||||||
if isinstance(xhtml, bs4.BeautifulSoup):
|
|
||||||
xhtml = str(xhtml)
|
|
||||||
|
|
||||||
replacements = [
|
|
||||||
(r'<h5([^>]*?>.*?)</h5>', r'<h6\1</h6>'),
|
|
||||||
(r'<h4([^>]*?>.*?)</h4>', r'<h5\1</h5>'),
|
|
||||||
(r'<h3([^>]*?>.*?)</h3>', r'<h4\1</h4>'),
|
|
||||||
(r'<h2([^>]*?>.*?)</h2>', r'<h3\1</h3>'),
|
|
||||||
(r'<h1([^>]*?>.*?)</h1>', r'<h2\1</h2>'),
|
|
||||||
]
|
|
||||||
for (re_from, re_to) in replacements:
|
|
||||||
xhtml = re.sub(re_from, re_to, xhtml, flags=re.DOTALL)
|
|
||||||
|
|
||||||
if return_soup:
|
|
||||||
soup = bs4.BeautifulSoup(xhtml, 'html5lib')
|
|
||||||
return soup
|
|
||||||
|
|
||||||
return xhtml
|
|
||||||
|
|
||||||
def fix_xhtml(xhtml, return_soup=False):
|
def fix_xhtml(xhtml, return_soup=False):
|
||||||
if isinstance(xhtml, bs4.BeautifulSoup):
|
if isinstance(xhtml, bs4.BeautifulSoup):
|
||||||
soup = xhtml
|
soup = xhtml
|
||||||
|
@ -228,6 +208,39 @@ def fix_xhtml(xhtml, return_soup=False):
|
||||||
return soup
|
return soup
|
||||||
return str(soup)
|
return str(soup)
|
||||||
|
|
||||||
|
def xhtml_replacements(xhtml, replacements, return_soup=False):
|
||||||
|
if isinstance(xhtml, bs4.BeautifulSoup):
|
||||||
|
xhtml = str(xhtml)
|
||||||
|
|
||||||
|
for (re_from, re_to) in replacements:
|
||||||
|
xhtml = re.sub(re_from, re_to, xhtml, flags=re.DOTALL)
|
||||||
|
|
||||||
|
if return_soup:
|
||||||
|
soup = bs4.BeautifulSoup(xhtml, 'html5lib')
|
||||||
|
return soup
|
||||||
|
|
||||||
|
return xhtml
|
||||||
|
|
||||||
|
def demote_xhtml_headers(xhtml, return_soup=False):
|
||||||
|
replacements = [
|
||||||
|
(r'<h5([^>]*?>.*?)</h5>', r'<h6\1</h6>'),
|
||||||
|
(r'<h4([^>]*?>.*?)</h4>', r'<h5\1</h5>'),
|
||||||
|
(r'<h3([^>]*?>.*?)</h3>', r'<h4\1</h4>'),
|
||||||
|
(r'<h2([^>]*?>.*?)</h2>', r'<h3\1</h3>'),
|
||||||
|
(r'<h1([^>]*?>.*?)</h1>', r'<h2\1</h2>'),
|
||||||
|
]
|
||||||
|
return xhtml_replacements(xhtml, replacements, return_soup=return_soup)
|
||||||
|
|
||||||
|
def promote_xhtml_headers(xhtml, return_soup=False):
|
||||||
|
replacements = [
|
||||||
|
(r'<h2([^>]*?>.*?)</h2>', r'<h1\1</h1>'),
|
||||||
|
(r'<h3([^>]*?>.*?)</h3>', r'<h2\1</h2>'),
|
||||||
|
(r'<h4([^>]*?>.*?)</h4>', r'<h3\1</h3>'),
|
||||||
|
(r'<h5([^>]*?>.*?)</h5>', r'<h4\1</h4>'),
|
||||||
|
(r'<h6([^>]*?>.*?)</h6>', r'<h5\1</h5>'),
|
||||||
|
]
|
||||||
|
return xhtml_replacements(xhtml, replacements, return_soup=return_soup)
|
||||||
|
|
||||||
def get_directory_for_mimetype(mime):
|
def get_directory_for_mimetype(mime):
|
||||||
directory = (
|
directory = (
|
||||||
MIMETYPE_DIRECTORIES.get(mime) or
|
MIMETYPE_DIRECTORIES.get(mime) or
|
||||||
|
|
Loading…
Reference in a new issue