diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfe05ac --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +voussoir.net/writing/**/*.html diff --git a/README.md b/README.md new file mode 100644 index 0000000..5b1cc06 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +https://voussoir.net diff --git a/voussoir.net/writing/css/dark.css b/voussoir.net/writing/css/dark.css new file mode 100644 index 0000000..218559f --- /dev/null +++ b/voussoir.net/writing/css/dark.css @@ -0,0 +1,197 @@ +:root +{ + --color_bodybg: #272822; + --color_codebg: rgba(255, 255, 255, 0.05); + --color_codeborder: rgba(255, 255, 255, 0.2); + --color_h1bg: #284142; + --color_htmlbg: #1b1c18; + --color_blockquotebg: rgba(0, 0, 0, 0.2); + --color_inlinecodebg: rgba(255, 255, 255, 0.1); + --color_link: #ae81ff; + --color_maintext: #ddd; +} + +* +{ + font-family: Verdana, sans-serif; + font-size: 10pt; + color: var(--color_maintext); +} + +h1, h2, h3, h4, h5 +{ + padding: 8px; +} +h2, h3, h4, h5 +{ + border-bottom: 1px solid var(--color_maintext); + /*background-color: var(--color_h1bg);*/ +} + +h1 {font-size: 2.5em;} h1 * {font-size: inherit;} +h2 {font-size: 1.8em;} h2 * {font-size: inherit;} +h3 {font-size: 1.5em;} h3 * {font-size: inherit;} +h4 {font-size: 1.2em;} h4 * {font-size: inherit;} +h5 {font-size: 1.0em;} h5 * {font-size: inherit;} + + +.header_anchor_link {display: none; font-size: 1.0em; text-decoration: none} +h1:hover > .header_anchor_link {display: initial;} +h2:hover > .header_anchor_link {display: initial;} +h3:hover > .header_anchor_link {display: initial;} +h4:hover > .header_anchor_link {display: initial;} +h5:hover > .header_anchor_link {display: initial;} + + +html +{ + padding-top: 30px; + padding-bottom: 30px; + background-color: var(--color_htmlbg); +} + +a +{ + color: var(--color_link); +} + + +body +{ + width: 80%; + min-width: 30em; + max-width: 70em; + margin: auto; + padding: 16px; + padding-bottom: 64px; + box-shadow: #000 0px 0px 40px -10px; + background-color: var(--color_bodybg); +} + +body * +{ + max-width: 100%; + word-wrap: break-word; +} + +blockquote +{ + background-color: var(--color_blockquotebg); + margin-inline-start: 0; + margin-inline-end: 0; + + padding: 8px; + padding-inline-start: 40px; + padding-inline-end: 40px; +} + +*:not(pre) > code +{ + background-color: var(--color_inlinecodebg); + border-radius: 3px; + line-height: 1.5; + padding-left: 4px; + padding-right: 4px; +} + +pre +{ + padding: 8px; + border: 1px solid var(--color_codeborder); + background-color: var(--color_codebg); + overflow-x: auto; +} + +code, +pre, +.highlight * +{ + font-family: monospace; +} + +/* +Thank yourichleland for pre-building this Monokai style. +https://github.com/richleland/pygments-css +*/ +:root +{ + --color_monokai_bg: #272822; + --color_monokai_purple: #ae81ff; + --color_monokai_green: #a6e22e; + --color_monokai_pink: #f92672; + --color_monokai_white: #f8f8f2; + --color_monokai_orange: #fd971f; + --color_monokai_yellow: #e6db74; + --color_monokai_blue: #66d9ef; +} +.highlight .hll { background-color: #49483e } +.highlight { background-color: var(--color_monokai_bg); color: var(--color_monokai_white) } +.highlight .c { color: #75715e } /* Comment */ +.highlight .err { color: #960050; background-color: #1e0010 } /* Error */ +.highlight .k { color: var(--color_monokai_pink) } /* Keyword */ +.highlight .l { color: var(--color_monokai_purple) } /* Literal */ +.highlight .n { color: var(--color_monokai_white) } /* Name */ +.highlight .o { color: var(--color_monokai_pink) } /* Operator */ +.highlight .p { color: var(--color_monokai_white) } /* Punctuation */ +.highlight .ch { color: #75715e } /* Comment.Hashbang */ +.highlight .cm { color: #75715e } /* Comment.Multiline */ +.highlight .cp { color: #75715e } /* Comment.Preproc */ +.highlight .cpf { color: #75715e } /* Comment.PreprocFile */ +.highlight .c1 { color: #75715e } /* Comment.Single */ +.highlight .cs { color: #75715e } /* Comment.Special */ +.highlight .gd { color: var(--color_monokai_pink) } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gi { color: var(--color_monokai_green) } /* Generic.Inserted */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #75715e } /* Generic.Subheading */ +.highlight .kc { color: var(--color_monokai_blue) } /* Keyword.Constant */ +.highlight .kd { color: var(--color_monokai_blue) } /* Keyword.Declaration */ +.highlight .kn { color: var(--color_monokai_pink) } /* Keyword.Namespace */ +.highlight .kp { color: var(--color_monokai_blue) } /* Keyword.Pseudo */ +.highlight .kr { color: var(--color_monokai_blue) } /* Keyword.Reserved */ +.highlight .kt { color: var(--color_monokai_blue) } /* Keyword.Type */ +.highlight .ld { color: var(--color_monokai_yellow) } /* Literal.Date */ +.highlight .m { color: var(--color_monokai_purple) } /* Literal.Number */ +.highlight .s { color: var(--color_monokai_yellow) } /* Literal.String */ +.highlight .na { color: var(--color_monokai_white) } /* Name.Attribute */ +.highlight .narg {color: var(--color_monokai_orange) } /* Custom Name.Argument */ +.highlight .nb { color: var(--color_monokai_white) } /* Name.Builtin */ +.highlight .nc { color: var(--color_monokai_white) } /* Name.Class */ +.highlight .no { color: var(--color_monokai_blue) } /* Name.Constant */ +.highlight .nd { color: var(--color_monokai_green) } /* Name.Decorator */ +.highlight .ni { color: var(--color_monokai_white) } /* Name.Entity */ +.highlight .ne { color: var(--color_monokai_blue) } /* Name.Exception */ +.highlight .nf { color: var(--color_monokai_green) } /* Name.Function */ +.highlight .nl { color: var(--color_monokai_white) } /* Name.Label */ +.highlight .nn { color: var(--color_monokai_white) } /* Name.Namespace */ +.highlight .nx { color: var(--color_monokai_green) } /* Name.Other */ +.highlight .py { color: var(--color_monokai_white) } /* Name.Property */ +.highlight .nt { color: var(--color_monokai_pink) } /* Name.Tag */ +.highlight .nv { color: var(--color_monokai_white) } /* Name.Variable */ +.highlight .ow { color: var(--color_monokai_pink) } /* Operator.Word */ +.highlight .w { color: var(--color_monokai_white) } /* Text.Whitespace */ +.highlight .mb { color: var(--color_monokai_purple) } /* Literal.Number.Bin */ +.highlight .mf { color: var(--color_monokai_purple) } /* Literal.Number.Float */ +.highlight .mh { color: var(--color_monokai_purple) } /* Literal.Number.Hex */ +.highlight .mi { color: var(--color_monokai_purple) } /* Literal.Number.Integer */ +.highlight .mo { color: var(--color_monokai_purple) } /* Literal.Number.Oct */ +.highlight .sa { color: var(--color_monokai_white) } /* Literal.String.Affix */ +.highlight .sb { color: var(--color_monokai_yellow) } /* Literal.String.Backtick */ +.highlight .sc { color: var(--color_monokai_yellow) } /* Literal.String.Char */ +.highlight .dl { color: var(--color_monokai_yellow) } /* Literal.String.Delimiter */ +.highlight .sd { color: var(--color_monokai_yellow) } /* Literal.String.Doc */ +.highlight .s2 { color: var(--color_monokai_yellow) } /* Literal.String.Double */ +.highlight .se { color: var(--color_monokai_purple) } /* Literal.String.Escape */ +.highlight .sh { color: var(--color_monokai_yellow) } /* Literal.String.Heredoc */ +.highlight .si { color: var(--color_monokai_yellow) } /* Literal.String.Interpol */ +.highlight .sx { color: var(--color_monokai_yellow) } /* Literal.String.Other */ +.highlight .sr { color: var(--color_monokai_yellow) } /* Literal.String.Regex */ +.highlight .s1 { color: var(--color_monokai_yellow) } /* Literal.String.Single */ +.highlight .ss { color: var(--color_monokai_yellow) } /* Literal.String.Symbol */ +.highlight .bp { color: var(--color_monokai_white) } /* Name.Builtin.Pseudo */ +.highlight .fm { color: var(--color_monokai_blue) } /* Name.Function.Magic */ +.highlight .vc { color: var(--color_monokai_white) } /* Name.Variable.Class */ +.highlight .vg { color: var(--color_monokai_white) } /* Name.Variable.Global */ +.highlight .vi { color: var(--color_monokai_white) } /* Name.Variable.Instance */ +.highlight .vm { color: var(--color_monokai_white) } /* Name.Variable.Magic */ +.highlight .il { color: var(--color_monokai_purple) } /* Literal.Number.Integer.Long */ diff --git a/voussoir.net/writing/generate_site.py b/voussoir.net/writing/generate_site.py new file mode 100644 index 0000000..9e4e7ec --- /dev/null +++ b/voussoir.net/writing/generate_site.py @@ -0,0 +1,282 @@ +import os +import bs4 +import etiquette +import pprint +import vmarkdown +import jinja2 +import subprocess + +from voussoirkit import pathclass +from voussoirkit import spinal +from voussoirkit import winwhich + +P = etiquette.photodb.PhotoDB(ephemeral=True) +P.log.setLevel(100) + +writing_rootdir = pathclass.Path(__file__).parent + +def write(path, content): + path = pathclass.Path(path) + if path not in writing_rootdir: + raise ValueError(path) + print(path.absolute_path) + f = open(path.absolute_path, 'w', encoding='utf-8') + f.write(content) + f.close() + +GIT = winwhich.which('git') + +def git_repo_for_file(path): + path = pathclass.Path(path) + folder = path.parent + prev = None + while folder != prev: + if folder.with_child('.git').exists: + return folder + prev = folder + folder = folder.parent + raise Exception('No Git repo.') + +def git_file_date(path): + path = pathclass.Path(path) + repo = git_repo_for_file(path) + path = path.relative_to(repo, simple=True) + command = [ + GIT, + '-C', repo.absolute_path, + 'log', + '--diff-filter=A', + '--pretty=format:%ad', + '--date=short', + '--', path, + ] + # print(command) + output = subprocess.check_output(command, stderr=subprocess.PIPE).decode('utf-8') + return output + +class Article: + def __init__(self, md_file): + self.md_file = pathclass.Path(md_file) + self.html_file = self.md_file.replace_extension('html') + self.web_path = self.md_file.parent.relative_to(writing_rootdir, simple=True) + self.date = git_file_date(self.md_file) + + self.soup = vmarkdown.markdown( + self.md_file.absolute_path, + css=writing_rootdir.with_child('css').with_child('dark.css').absolute_path, + return_soup=True, + templates=writing_rootdir.with_child('headerfooter.md').absolute_path, + ) + self.title = self.soup.head.title.get_text() + + tag_links = self.soup.find_all('a', {'class': 'tag_link'}) + for tag_link in tag_links: + tagname = tag_link['data-qualname'].split('.')[-1] + tag_link['href'] = f'/writing/tags/{tagname}' + + self.tags = [a['data-qualname'] for a in tag_links] + + def __str__(self): + return f'Article({self.md_file.absolute_path})' + +ARTICLES = {file: Article(file) for file in spinal.walk_generator(writing_rootdir) if file.extension == 'md' and file.parent != writing_rootdir} + +def write_articles(): + for article in ARTICLES.values(): + if article.md_file.replace_extension('').basename != article.md_file.parent.basename: + print(f'Warning: {article} does not match folder name.') + + for qualname in article.tags: + P.easybake(qualname) + + P.new_photo(article.md_file.absolute_path, tags=article.tags) + html = str(article.soup) + write(article.html_file.absolute_path, html) + +class Index: + def __init__(self): + self.articles = [] + self.children = {} + + def navigate(self, query, create=False): + dest = self + while query: + parent = query[0] + if create: + dest = dest.children.setdefault(parent, Index()) + else: + dest = dest.children.get(parent) + if not dest: + return + query = query[1:] + return dest + + def assign(self, query, articles): + self.navigate(query, create=True).articles = articles + + def get(self, query): + dest = self.navigate(query, create=False) + if dest: + return dest.articles + return [] + +def remove_redundant(query): + seen = set() + newq = tuple() + for tag in query: + if tag in seen: + continue + newq += (tag,) + seen.add(tag) + seen.update(tag.walk_parents()) + return newq + +def permute(query, pool): + if query: + query = remove_redundant(query) + if complete_tag_index.get(query): + return + articles = list(P.search(tag_musts=query)) + if not articles: + return + articles = [ARTICLES[article.real_path] for article in articles] + + if len(query) > 1: + previous = query[:-1] + prevarticles = complete_tag_index.get(previous) + # print(f''' + # query={query}, + # docs={docs} + # previous={previous}, + # prevdocs={prevdocs}, + # ''') + if set(articles) == set(prevarticles): + return + s = str(query) + if 'python' in s and 'java' in s: + print('BAD', query, articles) + complete_tag_index.assign(query, articles) + # pprint.pprint(complete_tag_index) + # complete_tag_index[query] = docs + # print(query, pool, docs) + + for tag in pool: + rest = pool.copy() + rest.remove(tag) + q = query + (tag,) + permute(q, rest) + + +def maketagpage(index, path): + path = [tag.name for tag in path] + parent = path[:-1] + parent = '/'.join(parent) + path = '/'.join(path) + + page = jinja2.Template(''' + + + + + + + +
+

Back to writing

+ {% if parent %} + Back to {{parent}} + {% else %} + Back to tags + {% endif %} +
+ + {% if index.articles %} +
+

{{path}}

+ +
+ {% endif %} + + {% if index.children %} +
+

Refine your query

+ +
+ {% endif %} + + + ''').render( + parent=parent, + index=index, + path=path, + children=sorted(tag.name for tag in index.children.keys()), + ) + return page + +def outs(index, path=[]): + filepath = ['tags'] + [tag.name for tag in path] + ['index.html'] + for (child_name, child_index) in index.children.items(): + outs(child_index, path=path+[child_name]) + page = maketagpage(index, path) + filepath = os.sep.join(filepath) + filepath = writing_rootdir.join(filepath) + os.makedirs(filepath.parent.absolute_path, exist_ok=True) + write(filepath, page) + +def write_tag_pages(): + outs(complete_tag_index) + + +def write_writing_index(): + page = jinja2.Template(''' + + + + + + + +

Writing

+ + + ''').render( + articles=sorted(ARTICLES.values(), key=lambda a: a.date, reverse=True), + ) + write(writing_rootdir.with_child('index.html'), page) + +write_articles() +complete_tag_index = Index() +all_tags = set(P.get_tags()) +permute(tuple(), all_tags) +write_tag_pages() +write_writing_index() \ No newline at end of file diff --git a/voussoir.net/writing/headerfooter.md b/voussoir.net/writing/headerfooter.md new file mode 100644 index 0000000..a186c17 --- /dev/null +++ b/voussoir.net/writing/headerfooter.md @@ -0,0 +1,3 @@ +[Back to writing](/writing) + +{body} diff --git a/voussoir.net/writing/vmarkdown.py b/voussoir.net/writing/vmarkdown.py new file mode 100644 index 0000000..399a504 --- /dev/null +++ b/voussoir.net/writing/vmarkdown.py @@ -0,0 +1,667 @@ +import argparse +import base64 +import bs4 +import copy +import html +import mimetypes +import mistune +import os +import pygments +import pygments.formatters +import pygments.lexers +import pygments.token +import re +import requests +import string +import sys +import traceback + +from voussoirkit import pathclass + +HTML_TEMPLATE = ''' + + + + + + + + + +{body} + + +'''.strip() + +SLUG_CHARACTERS = string.ascii_lowercase + string.digits + '_' + + +class SyntaxHighlighting: + def block_code(self, text, lang): + inlinestyles = self.options.get('inlinestyles') or False + linenos = self.options.get('linenos') or False + return self._block_code(text, lang, inlinestyles, linenos) + + @staticmethod + def _block_code(text, lang, inlinestyles=False, linenos=False): + if not lang: + text = text.strip() + return f'
{mistune.escape(text)}
\n' + try: + lexer = pygments.lexers.get_lexer_by_name(lang.lower(), stripall=True) + # if isinstance(lexer, pygments.lexers.PythonLexer): + # lexer = pygments.lexers.PythonConsoleLexer() + + # But wait! Why aren't you doing this: + # formatter = pygments.formatters.HtmlFormatter( + # noclasses=inlinestyles, + # linenos=linenos, + # cssclass='highlight ' + (lang.lower() if lang else ''), + # ) + # code = pygments.highlight(text, lexer, formatter).decode('utf-8') + # ?? + elements = [] + for (token, text) in lexer.get_tokens(text): + if text.isspace(): + elements.append(text) + continue + css_class = pygments.token.STANDARD_TYPES.get(token, '') + element = f'{html.escape(text)}' + elements.append(element) + code = ''.join(elements) + + divclass = ['highlight'] + if lang: + divclass.append(lang.lower()) + divclass = ' '.join(divclass) + + code = f'
{code}
' + # if lang: + # code = code.replace('div class="highlight"', f'div class="highlight {lang.lower()}"') + # if linenos: + # return f'
{code}
\n' + return code + except Exception: + traceback.print_exc() + return f'
{mistune.escape(text)}
\n' + + +class VoussoirRenderer( + SyntaxHighlighting, + mistune.Renderer, + ): + pass + +class VoussoirGrammar(mistune.InlineGrammar): + larr = re.compile(r'<--') + rarr = re.compile(r'-->') + mdash = re.compile(r'--') + category_tag = re.compile(r'\[tag:([\w\.]+)\]') + text = re.compile(r'^[\s\S]+?(?=[\\[{tagname}]' + + def output_mdash(self, m): + return '—' + + def output_rarr(self, m): + return '→' + + def output_larr(self, m): + return '←' + +renderer = VoussoirRenderer() +inline = VoussoirLexer(renderer) +VMARKDOWN = mistune.Markdown(renderer=renderer, inline=inline) + +# GENERIC HELPERS +################################################################################ +def cat_file(path): + if isinstance(path, pathclass.Path): + path = path.absolute_path + with open(path, 'r', encoding='utf-8') as f: + return f.read() + +def cat_files(paths): + if not paths: + return '' + if isinstance(paths, str): + return cat_file(paths) + content = [cat_file(path) for path in paths] + return '\n\n'.join(content) + +def dump_file(path): + with open(path, 'rb') as f: + return f.read() + +# SOUP HELPERS +################################################################################ +def add_header_anchors(soup): + ''' + Give each an to link to it. + ''' + header_pattern = re.compile(rf'^h[1-6]$') + used_slugs = set() + + for header in soup.find_all(header_pattern): + slug = slugify(header.get_text()) + slug = uniqify_slug(slug, used_slugs) + + header['id'] = slug + + new_a = soup.new_tag('a') + new_a['href'] = '#' + slug + new_a['class'] = 'header_anchor_link' + paragraph_symbol = chr(182) + new_a.append(f' ({paragraph_symbol})') + header.append(new_a) + +def add_toc(soup, max_level=None): + ''' + Gather up all the header anchors and form a table of contents, + which will be placed below the first h1 on the page, if the page has an h1. + ''' + first_h1 = soup.h1 + if not first_h1: + return + + def new_list(root=False): + r = bs4.BeautifulSoup('
    ', 'html.parser') + if root: + return r + return r.ol + + # Official HTML headers only go up to 6. + if max_level is None: + max_level = 6 + + elif max_level < 1: + raise ValueError('max_level must be >= 1.') + + header_pattern = re.compile(rf'^h[1-{max_level}]$') + + toc = new_list(root=True) + toc.ol['id'] = 'table_of_contents' + toc.ol.append('Table of contents') + current_list = toc.ol + current_list['level'] = None + + headers = soup.find_all(header_pattern) + for header in headers: + if header == first_h1: + continue + # 'hX' -> X + level = int(header.name[1]) + + toc_line = toc.new_tag('li') + toc_a = toc.new_tag('a') + + toc_a.append(str(header.find(text=True))) + toc_a['href'] = f'#{header["id"]}' + toc_line.append(toc_a) + + if current_list['level'] is None: + current_list['level'] = level + + while level < current_list['level']: + # Because the sub-
      are actually a child of the last + #
    1. of the previous
        , we must .parent twice. + # The second .parent is conditional because if the current + # list is toc.ol, then parent is a Soup document object, and + # parenting again would be a mistake. We'll recover from + # this in just a moment. + current_list = current_list.parent + if current_list.name == 'li': + current_list = current_list.parent + # If the file has headers in a non-ascending order, like the + # first header is an h4 and then an h1 comes later, then + # this while loop would keep attempting to climb the .parent + # which would take us too far, off the top of the tree. + # So, if we reach `current_list == toc.ol` then we've + # reached the root and should stop climbing. At that point + # we can just snap current_level and use the root list again. + # In the resulting toc, that initial h4 would have the same + # toc depth as the later h1 since it never had parents. + if current_list == toc: + current_list['level'] = level + current_list = toc.ol + + if level > current_list['level']: + # In order to properly render nested
          , you're supposed + # to make the new
            a child of the last
          1. of the + # previous
              . NOT a child of the prev
                directly. + # Don't worry, .children can never be empty because on the + # first
              1. this condition can never occur, and new
                  s + # always receive a child right after being created. + _l = new_list() + _l['level'] = level + final_li = list(current_list.children)[-1] + final_li.append(_l) + current_list = _l + + current_list.append(toc_line) + + for ol in toc.find_all('ol'): + del ol['level'] + + first_h1.insert_after(toc.ol) + +def add_head_title(soup): + ''' + Add the element in <head> based on the text of the first <h1>. + ''' + first_h1 = soup.h1 + if not first_h1: + return + + text = get_innertext(first_h1) + title = soup.new_tag('title') + title.append(text) + soup.head.append(title) + +def embed_images(soup, cache=None): + ''' + Find <img> srcs and either download the url or load the local file, + and convert it to a data URI. + ''' + for element in soup.find_all('img'): + src = element['src'] + if cache is None: + cache = {} + if cache.get(src) is None: + print('Fetching %s' % src) + if src.startswith('https://') or src.startswith('http://'): + response = requests.get(src) + response.raise_for_status() + data = response.content + else: + data = dump_file(src) + data = base64.b64encode(data).decode('ascii') + mime = mimetypes.guess_type(src)[0] + mime = mime if mime is not None else '' + uri = f'data:{mime};base64,{data}' + cache[src] = uri + else: + uri = cache[src] + element['src'] = uri + +def get_innertext(element): + if isinstance(element, bs4.NavigableString): + return element.string + else: + return element.get_text() + +def next_element_sibling(element): + ''' + Like nextSibling but skips NavigableString. + ''' + while True: + element = element.nextSibling + if isinstance(element, bs4.NavigableString): + continue + return element + +def previous_element_sibling(element): + while True: + element = element.previousSibling + if isinstance(element, bs4.NavigableString): + continue + return element + +def remove_leading_empty_nodes(element): + ''' + Code <pre>s often start with an empty span, so this strips it off. + ''' + children = list(element.children) + while children: + if get_innertext(children[0]) == '': + children.pop(0).extract() + else: + break + +def slugify(text): + ''' + Filter text to contain only SLUG_CHARACTERS. + ''' + text = text.lower() + text = text.replace(' ', '_') + text = [c for c in text if c in SLUG_CHARACTERS] + text = ''.join(text) + return text + +def uniqify_slug(slug, used_slugs): + ''' + If the given slug has already been used, give it a trailing _2 or _3 etc. + ''' + count = 2 + try_slug = slug + while try_slug in used_slugs: + try_slug = f'{slug}_{count}' + count += 1 + slug = try_slug + used_slugs.add(slug) + return slug + +# HTML CLEANERS +################################################################################ +def html_replacements(html): + html = re.sub(r'<style>\s*</style>', '', html) + html = html.replace( + '<span class="o">>></span><span class="o">></span>', + '<span>>>></span>' + ) + html = html.replace( + '<span class="o">.</span><span class="o">.</span><span class="o">.</span>', + '<span>...</span>' + ) + return html + +# SOUP CLEANERS +################################################################################ +def fix_argument_call_classes(element): + ''' + Given a <span class="n"> pointing to a function being called, this fixes + the classes of all the keyword arguments from being plain names to being + argument names. + ''' + # print('INPUT', repr(element)) + paren_depth = 0 + while True: + element = next_element_sibling(element) + # print(element, paren_depth) + innertext = element.get_text() + + if innertext == '(': + paren_depth += 1 + + if innertext == ')': + paren_depth -= 1 + + if 'n' in element['class']: + last_known_candidate = element + + if 'o' in element['class'] and innertext == '=': + last_known_candidate['class'].remove('n') + last_known_candidate['class'].append('narg') + + if paren_depth == 0: + break + +def fix_argument_def_classes(element): + ''' + Given a <span class="kd">def</span>, fix the function arguments so they are + a special color like they're SUPPOSED TO BE. + ''' + # print('INPUT', repr(element)) + do_color = True + while True: + element = next_element_sibling(element) + # print(element) + innertext = element.get_text() + if innertext == ')' and next_element_sibling(element).get_text() == ':': + break + + if innertext == '=': + do_color = False + + elif innertext == ',': + do_color = True + + elif do_color: + if 'n' in element['class']: + element['class'].remove('n') + element['class'].append('narg') + elif 'bp' in element['class']: + element['class'].remove('bp') + element['class'].append('narg') + elif 'o' in element['class'] and innertext in ('*', '**'): + # Fix *args, the star should not be operator colored. + element['class'].remove('o') + element['class'].append('n') + +def fix_repl_classes(element): + ''' + Given a <pre> element, this function detects that this pre contains a REPL + session when the first line starts with '>>>'. + + For REPL sessions, any elements on an input line (which start with '>>>' or + '...') keep their styles, while elements on output lines are stripped of + their styles. + + Of course you can confuse it by having an output which starts with '>>>' + but that's not the point okay? + ''' + remove_leading_empty_nodes(element) + children = list(element.children) + if not children: + return + + if get_innertext(children[0]) != '>>>': + return + + del_styles = None + for child in children: + if get_innertext(child).endswith('\n'): + del_styles = None + + elif del_styles is None: + del_styles = child.string not in ('>>>', '...') + + if isinstance(child, bs4.NavigableString): + continue + + if del_styles: + del child['class'] + +def fix_leading_pre_spaces(element): + ''' + I noticed this issue when using code blocks inside of a numbered list. + The first line would be okay but then the rest of the lines would be + +1 or +2 spaces indented. + So this looks for linebreaks inside code blocks, and removes additional + spaces that come after the linebreak. + ''' + return + children = list(element.children) + for child in children: + if isinstance(child, bs4.element.NavigableString): + text = get_innertext(child) + text = text.split('\n') + text = [text[0]] + [t.lstrip() for t in text[1:]] + text = '\n'.join(text) + child.replace_with(text) + +def fix_classes(soup): + ''' + Because pygments does not conform to my standards of beauty already! + ''' + for element in soup.find_all('span', {'class': 'k'}): + if get_innertext(element) in ('def', 'class'): + element['class'] = ['kd'] + + for element in soup.find_all('span', {'class': 'bp'}): + if get_innertext(element) in ('None', 'True', 'False'): + element['class'] = ['m'] + + for element in soup.find_all('span', {'class': 'o'}): + if get_innertext(element) in ('.', '(', ')', '[', ']', '{', '}', ';', ','): + element['class'] = ['n'] + + for element in soup.find_all('pre'): + fix_repl_classes(element) + fix_leading_pre_spaces(element) + + for element in soup.find_all('span', {'class': 'kd'}): + if element.get_text() == 'def': + fix_argument_def_classes(element) + + for element in soup.find_all('span', {'class': 'n'}): + if get_innertext(element.nextSibling) == '(': + fix_argument_call_classes(element) + +# FINAL MARKDOWNS +################################################################################ +def markdown( + filename, + *, + css=None, + do_embed_images=False, + image_cache=None, + return_soup=False, + templates=None, + ): + body = cat_file(filename) + + if templates: + if isinstance(templates, str): + templates = [templates] + for template in templates: + template = cat_file(template) + body = template.replace('{body}', body) + + css = cat_files(css) + + body = VMARKDOWN(body) + html = HTML_TEMPLATE.format(css=css, body=body) + + html = html_replacements(html) + + soup = bs4.BeautifulSoup(html, 'html.parser') + # Make sure to add_head_title before add_header_anchors so you don't get + # the paragraph symbol in the <title>. + add_head_title(soup) + add_header_anchors(soup) + add_toc(soup) + fix_classes(soup) + if do_embed_images: + embed_images(soup, cache=image_cache) + + + if return_soup: + return soup + + html = str(soup) + return html + +def markdown_flask(core_filename, port, *args, **kwargs): + import flask + from flask import request + site = flask.Flask(__name__) + image_cache = {} + kwargs['image_cache'] = image_cache + core_filename = pathclass.Path(core_filename, force_sep='/') + if core_filename.is_dir: + cwd = core_filename + else: + cwd = pathclass.Path('.') + + def handle_path(path): + if path.extension == '.md': + return do_md_for(path) + + if path.is_dir: + atags = [] + for child in path.listdir(): + relative = child.relative_to(cwd, simple=True) + print(relative) + a = f'<p><a href="/{relative}">{child.basename}</a></p>' + atags.append(a) + page = '\n'.join(atags) + return page + + try: + content = open(path.absolute_path, 'rb').read() + except Exception as exc: + print(exc) + flask.abort(404) + else: + response = flask.make_response(content) + + mime = mimetypes.guess_type(path.absolute_path)[0] + if mime: + response.headers['Content-Type'] = mime + + return response + + def do_md_for(filename): + html = markdown(filename=filename, *args, **kwargs) + refresh = request.args.get('refresh', None) + if refresh is not None: + refresh = max(float(refresh), 1) + html += f'<script>setTimeout(function(){{window.location.reload()}}, {refresh * 1000})</script>' + return html + + @site.route('/') + def root(): + return handle_path(core_filename) + + @site.route('/<path:path>') + def other_file(path): + path = cwd.join(path) + if path not in cwd: + flask.abort(404) + return handle_path(path) + + site.run(host='0.0.0.0', port=port) + +# COMMAND LINE +################################################################################ +def markdown_argparse(args): + if args.output_filename: + md_file = pathclass.Path(args.md_filename) + output_file = pathclass.Path(args.output_filename) + if md_file == output_file: + raise ValueError('md file and output file are the same!') + + kwargs = { + 'filename': args.md_filename, + 'css': args.css, + 'do_embed_images': args.do_embed_images, + 'templates': args.template, + } + + if args.server: + return markdown_flask(core_filename=kwargs.pop('filename'), port=args.server, **kwargs) + + html = markdown(**kwargs) + + if args.output_filename: + f = open(args.output_filename, 'w', encoding='utf-8') + f.write(html) + f.close() + return + + print(html) + +def main(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('md_filename') + parser.add_argument('--css', dest='css', action='append', default=None) + parser.add_argument('--template', dest='template', action='append', default=None) + parser.add_argument('--embed_images', dest='do_embed_images', action='store_true') + parser.add_argument('-o', '--output', dest='output_filename', default=None) + parser.add_argument('--server', dest='server', type=int, default=None) + parser.set_defaults(func=markdown_argparse) + + args = parser.parse_args(argv) + return args.func(args) + +if __name__ == '__main__': + raise SystemExit(main(sys.argv[1:]))