Add soup cleaner fix_reddit_links to old.reddit.com.

Just in case I forget to type it as such in the document itself.
This commit is contained in:
Ethan Dalool 2020-05-20 09:16:31 -07:00
parent c7c581ad7a
commit 25d4f70377

View file

@ -556,6 +556,12 @@ def fix_classes(soup):
if get_innertext(element.nextSibling) == '(': if get_innertext(element.nextSibling) == '(':
fix_argument_call_classes(element) fix_argument_call_classes(element)
def fix_reddit_links(soup):
for a in soup.find_all('a'):
if not a.get('href'):
continue
a['href'] = re.sub(r'^(https?://)?(www\.)?reddit\.com', r'\1old.reddit.com', a['href'])
# FINAL MARKDOWNS # FINAL MARKDOWNS
################################################################################ ################################################################################
def markdown( def markdown(
@ -574,21 +580,28 @@ def markdown(
css = cat_files(css) css = cat_files(css)
body = VMARKDOWN(md) body = VMARKDOWN(md)
if footnote_link_index != footnote_text_index: if footnote_link_index != footnote_text_index:
links = footnote_link_index-1 links = footnote_link_index-1
texts = footnote_text_index-1 texts = footnote_text_index-1
warnings.warn(f'There are {links} footnote links, but {texts} texts.') warnings.warn(f'There are {links} footnote links, but {texts} texts.')
html = HTML_TEMPLATE.format(css=css, body=body) html = HTML_TEMPLATE.format(css=css, body=body)
# HTML cleaning
html = html_replacements(html) html = html_replacements(html)
soup = bs4.BeautifulSoup(html, 'html.parser') soup = bs4.BeautifulSoup(html, 'html.parser')
# Soup cleaning
# Make sure to add_head_title before add_header_anchors so you don't get # Make sure to add_head_title before add_header_anchors so you don't get
# the paragraph symbol in the <title>. # the paragraph symbol in the <title>.
add_head_title(soup) add_head_title(soup)
add_header_anchors(soup) add_header_anchors(soup)
add_toc(soup) add_toc(soup)
fix_classes(soup) fix_classes(soup)
fix_reddit_links(soup)
if do_embed_images: if do_embed_images:
embed_images(soup, cache=image_cache) embed_images(soup, cache=image_cache)