Add more classes to delete and convert.

This commit is contained in:
Ethan Dalool 2019-12-29 13:08:31 -08:00
parent 3ad862a9d0
commit b890aba570

View file

@ -195,6 +195,7 @@ def remove_unwanted_classes_ids(soup):
PATTERNS = [ PATTERNS = [
r'big\d+', r'big\d+',
r'blnonindent\d*', r'blnonindent\d*',
r'bodyMatter',
r'c\d+', r'c\d+',
r'calibre_?\d*', r'calibre_?\d*',
r'calibre_pb_\d+', r'calibre_pb_\d+',
@ -217,6 +218,7 @@ def remove_unwanted_classes_ids(soup):
r'page_?\d+', r'page_?\d+',
r'page_top_padding', r'page_top_padding',
r'pagebreak', r'pagebreak',
r'para',
r'pgepubid\d*', r'pgepubid\d*',
r'right', r'right',
r'section', r'section',
@ -351,6 +353,10 @@ def replace_classes_real_tags(soup):
'div.blockquote': 'blockquote', 'div.blockquote': 'blockquote',
'div.center': 'center', 'div.center': 'center',
'div.center1': 'center', 'div.center1': 'center',
'div.ext': 'blockquote',
'div.extract': 'blockquote',
'div.p': 'p',
'div.pp': 'p',
'p.block': 'blockquote', 'p.block': 'blockquote',
'p.block1': 'blockquote', 'p.block1': 'blockquote',
'p.block2': 'blockquote', 'p.block2': 'blockquote',
@ -358,12 +364,18 @@ def replace_classes_real_tags(soup):
'p.blockquote': 'blockquote', 'p.blockquote': 'blockquote',
'p.center': 'center', 'p.center': 'center',
'p.center1': 'center', 'p.center1': 'center',
'p.p': 'p',
'p.pp': 'p',
'span.b': 'b', 'span.b': 'b',
'span.i': 'i', 'span.i': 'i',
'span.italic': 'i', 'span.italic': 'i',
'span.sc': 'small',
'span.small': 'small', 'span.small': 'small',
'span.small1': 'small', 'span.small1': 'small',
'span.smallcaps': 'small', 'span.smallcaps': 'small',
'span.smallCaps': 'small',
'span.strike': 'strike',
'span.under': 'u',
'span.underline': 'u', 'span.underline': 'u',
} }
for (selector, new_name) in replace.items(): for (selector, new_name) in replace.items():