From 205bbf61a2dc66095cd6bea09abe0b822b7abf4e Mon Sep 17 00:00:00 2001 From: Ethan Dalool Date: Tue, 7 Jan 2020 18:23:44 -0800 Subject: [PATCH] Add remove_header_br. --- cleanerupper/plugin.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cleanerupper/plugin.py b/cleanerupper/plugin.py index 2be0373..692c065 100644 --- a/cleanerupper/plugin.py +++ b/cleanerupper/plugin.py @@ -64,6 +64,16 @@ def bring_punctuation_into_italics(html): html = re.sub('\\{punct}'.format(**locals()), '{punct}'.format(**locals()), html) return html +@html_cleaner +def remove_header_br(html): + html = re.sub(r'

([^\n]+?)\s*
\s*([^\n]+?)

', r'

\1 \2

', html) + html = re.sub(r'

([^\n]+?)\s*
\s*([^\n]+?)

', r'

\1 \2

', html) + html = re.sub(r'

([^\n]+?)\s*
\s*([^\n]+?)

', r'

\1 \2

', html) + html = re.sub(r'

([^\n]+?)\s*
\s*([^\n]+?)

', r'

\1 \2

', html) + html = re.sub(r'
([^\n]+?)\s*
\s*([^\n]+?)
', r'
\1 \2
', html) + html = re.sub(r'
([^\n]+?)\s*
\s*([^\n]+?)
', r'
\1 \2
', html) + return html + @html_cleaner def remove_misc_strings(html): html = html.replace('epub:type="pagebreak"', '')