From ca1c5e63736c649365058f8ee741e2318804e460 Mon Sep 17 00:00:00 2001 From: Ethan Dalool Date: Sun, 5 Jan 2020 23:33:15 -0800 Subject: [PATCH] More classes. --- cleanerupper/plugin.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cleanerupper/plugin.py b/cleanerupper/plugin.py index 5ffb558..95ac67f 100644 --- a/cleanerupper/plugin.py +++ b/cleanerupper/plugin.py @@ -212,6 +212,7 @@ def remove_unwanted_classes_ids(soup): r'large', r'mbp_?pagebreak', r'morespaceabove', + r'noindent\d*', r'nonindent\d*', r'p_?[ivx]+', r'p_?\d+', @@ -223,10 +224,12 @@ def remove_unwanted_classes_ids(soup): r'pgepubid\d*', r'right', r'section', + r'space[Bb]reak', r'spaceabove', r'squeeze(\d+)?', r'stickupcaps', r'title', + r'xrefInternal', ] for tag in soup.descendants: if not isinstance(tag, bs4.element.Tag):