diff --git a/cleanerupper/plugin.py b/cleanerupper/plugin.py index e821289..3d172f8 100644 --- a/cleanerupper/plugin.py +++ b/cleanerupper/plugin.py @@ -195,6 +195,7 @@ def remove_unwanted_classes_ids(soup): PATTERNS = [ r'big\d+', r'blnonindent\d*', + r'bodyMatter', r'c\d+', r'calibre_?\d*', r'calibre_pb_\d+', @@ -217,6 +218,7 @@ def remove_unwanted_classes_ids(soup): r'page_?\d+', r'page_top_padding', r'pagebreak', + r'para', r'pgepubid\d*', r'right', r'section', @@ -351,6 +353,10 @@ def replace_classes_real_tags(soup): 'div.blockquote': 'blockquote', 'div.center': 'center', 'div.center1': 'center', + 'div.ext': 'blockquote', + 'div.extract': 'blockquote', + 'div.p': 'p', + 'div.pp': 'p', 'p.block': 'blockquote', 'p.block1': 'blockquote', 'p.block2': 'blockquote', @@ -358,12 +364,18 @@ def replace_classes_real_tags(soup): 'p.blockquote': 'blockquote', 'p.center': 'center', 'p.center1': 'center', + 'p.p': 'p', + 'p.pp': 'p', 'span.b': 'b', 'span.i': 'i', 'span.italic': 'i', + 'span.sc': 'small', 'span.small': 'small', 'span.small1': 'small', 'span.smallcaps': 'small', + 'span.smallCaps': 'small', + 'span.strike': 'strike', + 'span.under': 'u', 'span.underline': 'u', } for (selector, new_name) in replace.items():