voussoir.net/voussoir.net/writing/generate_site.py
2020-11-01 00:21:13 -07:00

484 lines
14 KiB
Python

import bs4
import etiquette
import html
import jinja2
import os
import pprint
import re
import subprocess
import vmarkdown
from voussoirkit import pathclass
from voussoirkit import spinal
from voussoirkit import winwhich
P = etiquette.photodb.PhotoDB(ephemeral=True)
P.log.setLevel(100)
WRITING_ROOTDIR = pathclass.Path(__file__).parent
GIT = winwhich.which('git')
ARTICLE_TEMPLATE = '''
[Back to writing](/writing)
{body}
---
[View this document's history]({github_history})
{commits}
'''
# HELPERS
################################################################################
def check_output(command):
return subprocess.check_output(command, stderr=subprocess.PIPE).decode('utf-8')
def write(path, content):
'''
open() and write the file, with validation that it is in the writing dir.
'''
path = pathclass.Path(path)
if path not in WRITING_ROOTDIR:
raise ValueError(path)
print(path.absolute_path)
f = path.open('w', encoding='utf-8')
f.write(content)
f.close()
# GIT
################################################################################
def git_repo_for_file(path):
path = pathclass.Path(path)
folder = path.parent
prev = None
while folder != prev:
if folder.with_child('.git').exists:
return folder
prev = folder
folder = folder.parent
raise Exception('No Git repo.')
def git_file_edited_date(path):
'''
Return the YYYY-MM-DD date of the most recent commit that touched this file,
ignoring commits marked as "[minor]".
'''
path = pathclass.Path(path)
repo = git_repo_for_file(path)
path = path.relative_to(repo, simple=True)
command = [
GIT,
'-C', repo.absolute_path,
'log',
'-1',
'--pretty=format:%ad',
'--date=short',
r'--grep=\[minor\]',
'--invert-grep',
'--',
path,
]
output = check_output(command)
return output
def git_file_commit_history(path):
'''
Return tuples like (hash, 'YYYY-MM-DD commit message') for all commits that
touched this file, most recent first.
This is used for "view this document's history".
'''
path = pathclass.Path(path)
repo = git_repo_for_file(path)
path = path.relative_to(repo, simple=True)
command = [
GIT,
'-C', repo.absolute_path,
'log',
'--follow',
'--pretty=format:%H %ad %s',
'--date=short',
'--',
path,
]
output = check_output(command)
lines = [line for line in output.splitlines() if line.strip()] #'*' in line]
lines = [re.sub(r'([\*\_\[\]\(\)\^])', r'\\\1', line) for line in lines]
lines = [line.split(' ', 1) for line in lines]
return lines
def git_file_published_date(path):
'''
Return the YYYY-MM-DD date of the commit where this file first appeared.
'''
path = pathclass.Path(path)
repo = git_repo_for_file(path)
path = path.relative_to(repo, simple=True)
command = [
GIT,
'-C', repo.absolute_path,
'log',
'--follow',
'--diff-filter=A',
'--pretty=format:%ad',
'--date=short',
'--',
path,
]
output = check_output(command)
return output
# SOUP
################################################################################
def soup_set_tag_links(soup):
'''
vmarkdown renders [tag:example] into
<a class="tag_link" data-qualname="example">, with no href. At this point,
let's add the href to voussoir.net.
'''
tag_links = soup.find_all('a', {'class': 'tag_link'})
for tag_link in tag_links:
tagname = tag_link['data-qualname'].split('.')[-1]
tag_link['href'] = f'/writing/tags/{tagname}'
tags = [a['data-qualname'] for a in tag_links]
return tags
def soup_adjust_relative_links(soup, md_file, repo_path):
'''
The markdown files are stored in article/article.md so if they contain a
relative link to some screenshot.png, naturally that file is
article/screenshot.png. But because of the nginx rules where we visit
/writing/article, the relative link thinks that it points to
/writing/screenshot.png which doesn't exist. So this function turns all
relative links into absolute links starting from /writing.
'''
folder = pathclass.Path(md_file.parent, force_sep='/')
def fixby(tagname, attribute):
links = soup.find_all(tagname)
for link in links:
href = link[attribute]
if '://' in href:
continue
if href.startswith('/'):
continue
if href.startswith('#'):
continue
href = folder.join(href)
href = '/' + href.relative_to(WRITING_ROOTDIR.parent, simple=True)
if not href.startswith('/writing/'):
raise ValueError('Somethings wrong')
link[attribute] = href
fixby('a', 'href')
fixby('img', 'src')
fixby('video', 'src')
fixby('audio', 'src')
fixby('source', 'src')
# ARTICLE
################################################################################
class Article:
def __init__(self, md_file):
self.md_file = pathclass.Path(md_file)
self.html_file = self.md_file.replace_extension('html')
self.web_path = self.md_file.parent.relative_to(WRITING_ROOTDIR, simple=True)
self.date = git_file_published_date(self.md_file)
self.edited = git_file_edited_date(self.md_file)
repo_path = git_repo_for_file(self.md_file)
relative_path = self.md_file.relative_to(repo_path, simple=True)
github_history = f'https://github.com/voussoir/voussoir.net/commits/master/{relative_path}'
commits = git_file_commit_history(self.md_file)
commits = [
f'- [{html.escape(line)}](https://github.com/voussoir/voussoir.net/commit/{hash})'
for (hash, line) in commits
]
commits = '\n'.join(commits)
md = vmarkdown.cat_file(self.md_file.absolute_path)
md = ARTICLE_TEMPLATE.format(
body=md,
github_history=github_history,
commits=commits,
)
self.soup = vmarkdown.markdown(
md,
css=WRITING_ROOTDIR.with_child('dark.css').absolute_path,
return_soup=True,
)
if self.soup.head.title:
self.title = self.soup.head.title.get_text()
else:
self.title = self.md_file.basename
self.tags = soup_set_tag_links(self.soup)
soup_adjust_relative_links(self.soup, self.md_file, repo_path)
def __repr__(self):
return f'Article:{self.title}'
# TAG INDEX
################################################################################
class Index:
def __init__(self):
self.articles = []
self.children = {}
def __str__(self):
return f'Index (articles={self.articles}) (children={self.children})'
def navigate(self, query, create=False):
dest = self
while query:
parent = query[0]
if create:
dest = dest.children.setdefault(parent, Index())
else:
dest = dest.children.get(parent)
if not dest:
return
query = query[1:]
return dest
def assign(self, query, articles):
self.navigate(query, create=True).articles = articles
def get(self, query):
dest = self.navigate(query, create=False)
if dest:
return dest.articles
return []
def remove_redundant(query):
seen = set()
newq = tuple()
for tag in query:
if tag in seen:
continue
newq += (tag,)
seen.add(tag)
seen.update(tag.walk_parents())
return newq
def permute(pool, query=tuple()):
if query:
query = remove_redundant(query)
if complete_tag_index.get(query):
return
articles = list(P.search(tag_musts=query))
if not articles:
return
articles = [ARTICLES[article.real_path] for article in articles]
# Only generate a page for this tag query if it contains different
# results from the previous query. For example, if an article has tags
# A, B, and C, but it is the only article with those tags, there's no
# reason to generate tag pages for /A, /A/B, /A/B/C, all of which have
# the same single result.
if len(query) > 1:
previous = query[:-1]
prevarticles = complete_tag_index.get(previous)
if set(articles) == set(prevarticles):
return
complete_tag_index.assign(query, articles)
for tag in pool:
rest = pool.copy()
rest.remove(tag)
q = query + (tag,)
permute(rest, q)
# RENDER FILES
################################################################################
def write_articles():
for article in ARTICLES.values():
if article.md_file.replace_extension('').basename != article.md_file.parent.basename:
print(f'Warning: {article} does not match folder name.')
for qualname in article.tags:
P.easybake(qualname)
P.new_photo(article.md_file.absolute_path, tags=article.tags)
html = str(article.soup)
write(article.html_file.absolute_path, html)
def make_tag_page(index, path):
path = [tag.name for tag in path]
parent = path[:-1]
parent = '/'.join(parent)
path = '/'.join(path)
page = jinja2.Template('''
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<link rel="stylesheet" href="/writing/dark.css"/>
{% if path %}
<title>Articles tagged {{path}}</title>
{% else %}
<title>Articles by tag</title>
{% endif %}
</head>
<body>
<article>
<section style="grid-area:tagnav">
<p><a href="/writing">Back to writing</a></p>
{% if parent %}
<a href="/writing/tags/{{parent}}">Back to {{parent}}</a>
{% else %}
<a href="/writing/tags">Back to tags</a>
{% endif %}
</section>
{% if articles %}
<section style="grid-area:articles">
<h1>{{path}}</h1>
<ol class="article_list">
{% for article in articles %}
<li>
<a href="/writing/{{article.web_path}}">{{article.date}} - {{article.title|e}}</a>
</li>
{% endfor %}
</ol>
</section>
{% endif %}
{% if index.children %}
<section style="grid-area:refine">
<h1>Refine your query</h1>
<ul class="article_list">
{% for refine in children %}
<li>
{% if path %}
<a href="/writing/tags/{{path}}/{{refine}}">{{refine}}</a>
{% else %}
<a href="/writing/tags/{{refine}}">{{refine}}</a>
{% endif %}
</li>
{% endfor %}
</ul>
</section>
{% endif %}
</article>
</body>
</html>
''').render(
parent=parent,
index=index,
articles=sorted(index.articles, key=lambda a: a.date, reverse=True),
path=path,
children=sorted(tag.name for tag in index.children.keys()),
)
return page
def write_tag_pages(index, path=[]):
for (child_name, child_index) in index.children.items():
write_tag_pages(child_index, path=path+[child_name])
filepath = ['tags'] + [tag.name for tag in path] + ['index.html']
filepath = os.sep.join(filepath)
filepath = WRITING_ROOTDIR.join(filepath)
filepath.parent.makedirs(exist_ok=True)
page = make_tag_page(index, path)
write(filepath, page)
def write_writing_index():
page = jinja2.Template('''
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<link rel="stylesheet" href="/writing/dark.css"/>
<title>Writing</title>
</head>
<body>
<article>
<h1>Writing</h1>
<ol class="article_list">
{% for article in articles %}
<li>
<a href="{{article.web_path}}">{{article.date}} - {{article.title|e}}</a>
</li>
{% endfor %}
</ol>
<p><a href="/writing/rss.xml">RSS</a></p>
<h2>Recently edited</h2>
<ol class="article_list">
{% for article in articles_edited %}
{% if article.edited and article.edited != article.date %}
<li>
<a href="{{article.web_path}}">{{article.edited}} - {{article.title|e}} ({{article.date}})</a>
</li>
{% endif %}
{% endfor %}
</ol>
<h2>The footer</h2>
<p>I greatly appreciate the time you have taken to visit my page. If you
have feedback, corrections, or tales of harrowing adventure, send an email
to writing@voussoir.net. If you want me to hold on to some of your dollars
for permanent safekeeping,
<a href="https://www.buymeacoffee.com/voussoir">click here</a>.</p>
</article>
</body>
</html>
''').render(
articles=sorted(ARTICLES.values(), key=lambda a: a.date, reverse=True),
articles_edited=sorted(ARTICLES.values(), key=lambda a: a.edited, reverse=True)
)
write(WRITING_ROOTDIR.with_child('index.html'), page)
def write_rss():
rss = jinja2.Template('''
<rss version="2.0">
<channel>
<title>voussoir.net/writing</title>
<link>https://voussoir.net/writing</link>
<description>voussoir's writing</description>
{% for article in articles %}
<item>
<title>{{article.title|e}}</title>
<link>https://voussoir.net/writing/{{article.web_path}}</link>
<pubDate>{{article.date}}</pubDate>
<description>
<![CDATA[
{{article.soup.article}}
]]>
</description>
</item>
{% endfor %}
</channel>
</rss>
'''.strip()).render(articles=sorted(ARTICLES.values(), key=lambda a: a.date, reverse=True))
write(WRITING_ROOTDIR.with_child('rss.xml'), rss)
# GO
################################################################################
ARTICLES = {
file: Article(file)
for file in spinal.walk_generator(WRITING_ROOTDIR)
if file.extension == 'md' and file.parent != WRITING_ROOTDIR
}
write_articles()
complete_tag_index = Index()
all_tags = set(P.get_tags())
permute(all_tags)
write_tag_pages(complete_tag_index)
write_writing_index()
write_rss()