diff --git a/epubfile.py b/epubfile.py index 67e9b3f..728fd56 100644 --- a/epubfile.py +++ b/epubfile.py @@ -156,26 +156,6 @@ def extract_epub(epub_filepath, directory): with zipfile.ZipFile(epub_filepath.absolute_path, 'r') as z: z.extractall(directory.absolute_path) -def demote_xhtml_headers(xhtml, return_soup=False): - if isinstance(xhtml, bs4.BeautifulSoup): - xhtml = str(xhtml) - - replacements = [ - (r']*?>.*?)', r''), - (r']*?>.*?)', r''), - (r']*?>.*?)', r''), - (r']*?>.*?)', r''), - (r']*?>.*?)', r''), - ] - for (re_from, re_to) in replacements: - xhtml = re.sub(re_from, re_to, xhtml, flags=re.DOTALL) - - if return_soup: - soup = bs4.BeautifulSoup(xhtml, 'html5lib') - return soup - - return xhtml - def fix_xhtml(xhtml, return_soup=False): if isinstance(xhtml, bs4.BeautifulSoup): soup = xhtml @@ -228,6 +208,39 @@ def fix_xhtml(xhtml, return_soup=False): return soup return str(soup) +def xhtml_replacements(xhtml, replacements, return_soup=False): + if isinstance(xhtml, bs4.BeautifulSoup): + xhtml = str(xhtml) + + for (re_from, re_to) in replacements: + xhtml = re.sub(re_from, re_to, xhtml, flags=re.DOTALL) + + if return_soup: + soup = bs4.BeautifulSoup(xhtml, 'html5lib') + return soup + + return xhtml + +def demote_xhtml_headers(xhtml, return_soup=False): + replacements = [ + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + ] + return xhtml_replacements(xhtml, replacements, return_soup=return_soup) + +def promote_xhtml_headers(xhtml, return_soup=False): + replacements = [ + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + (r']*?>.*?)', r''), + ] + return xhtml_replacements(xhtml, replacements, return_soup=return_soup) + def get_directory_for_mimetype(mime): directory = ( MIMETYPE_DIRECTORIES.get(mime) or