diff --git a/AESFile/aesfile.py b/AESFile/aesfile.py index e4470c8..a44b3c9 100644 --- a/AESFile/aesfile.py +++ b/AESFile/aesfile.py @@ -4,13 +4,9 @@ from Crypto.Cipher import AES import sys import os -try: - sys.path.append('C:\\git\\else\\Bytestring') - import bytestring -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from voussoirkit import bytestring +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from voussoirkit import bytestring BLOCK_SIZE = 32 diff --git a/Downloady/downloady.py b/Downloady/downloady.py index 16b77f6..2d2d7ee 100644 --- a/Downloady/downloady.py +++ b/Downloady/downloady.py @@ -7,19 +7,11 @@ import time import urllib import warnings -try: - sys.path.append('C:\\git\\else\\Bytestring') - sys.path.append('C:\\git\\else\\clipext') - sys.path.append('C:\\git\\else\\ratelimiter') - import bytestring - import ratelimiter - import clipext -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from voussoirkit import bytestring - from voussoirkit import ratelimiter - from voussoirkit import clipext +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from voussoirkit import bytestring +from voussoirkit import ratelimiter +from voussoirkit import clipext warnings.simplefilter('ignore') diff --git a/Instathief/instathief.py b/Instathief/instathief.py index 1d4f1d7..b297f0b 100644 --- a/Instathief/instathief.py +++ b/Instathief/instathief.py @@ -6,16 +6,10 @@ import os import requests import sys -try: - sys.path.append('C:\\git\\else\\Clipext') - sys.path.append('C:\\git\\else\\Downloady') - import clipext - import downloady -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from voussoirkit import clipext - from voussoirkit import downloady +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from voussoirkit import clipext +from voussoirkit import downloady ''' ''' diff --git a/OpenDirDL/opendirdl.py b/OpenDirDL/opendirdl.py index 333f1b6..b595c13 100644 --- a/OpenDirDL/opendirdl.py +++ b/OpenDirDL/opendirdl.py @@ -136,16 +136,10 @@ import sys ## import tkinter import urllib.parse -try: - sys.path.append('C:\\git\\else\\Bytestring') - sys.path.append('C:\\git\\else\\Downloady') - import bytestring - import downloady -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from voussoirkit import bytestring - from voussoirkit import downloady +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from voussoirkit import bytestring +from voussoirkit import downloady DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE FILENAME_BADCHARS = '/\\:*?"<>|' @@ -158,6 +152,7 @@ UNKNOWN_SIZE_STRING = '???' # enough of the typical opendir to speed things up. SKIPPABLE_FILETYPES = [ '.3gp', + '.7z', '.aac', '.avi', '.bin', @@ -532,21 +527,24 @@ def build_file_tree(databasename): sql = sqlite3.connect(databasename) cur = sql.cursor() cur.execute('SELECT * FROM urls WHERE do_download == 1') - all_items = cur.fetchall() + fetch_all = cur.fetchall() sql.close() - if len(all_items) == 0: + if len(fetch_all) == 0: return path_form = '{domain}\\{folder}\\{filename}' - all_items = [ - { - 'url': item[SQL_URL], - 'size': item[SQL_CONTENT_LENGTH], - 'path_parts': path_form.format(**url_split(item[SQL_URL])).split('\\'), - } - for item in all_items - ] + all_items = [] + for item in fetch_all: + url = item[SQL_URL] + size = item[SQL_CONTENT_LENGTH] + path_parts = url_split(item[SQL_URL]) + path_parts = path_form.format(**path_parts) + #path_parts = urllib.parse.unquote(path_parts) + path_parts = path_parts.split('\\') + item = {'url': url, 'size': size, 'path_parts': path_parts} + all_items.append(item) + all_items.sort(key=lambda x: x['url']) root_data = { @@ -771,7 +769,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True): content_type = head.headers.get('Content-Type', None) basename = url_split(url)['filename'] - basename = urllib.parse.unquote(basename) + #basename = urllib.parse.unquote(basename) do_download = True cur.execute('SELECT * FROM urls WHERE url == ?', [url]) @@ -798,7 +796,7 @@ def url_split(url): ''' Given a url, return a dictionary of its components. ''' - url = urllib.parse.unquote(url) + #url = urllib.parse.unquote(url) parts = urllib.parse.urlsplit(url) if any(part == '' for part in [parts.scheme, parts.netloc]): raise ValueError('Not a valid URL') @@ -817,9 +815,9 @@ def url_split(url): result = { 'scheme': scheme, - 'domain': root, - 'folder': folder, - 'filename': filename, + 'domain': urllib.parse.unquote(root), + 'folder': urllib.parse.unquote(folder), + 'filename': urllib.parse.unquote(filename), } return result diff --git a/ServerReference/simpleserver.py b/ServerReference/simpleserver.py index 5442b1d..605362f 100644 --- a/ServerReference/simpleserver.py +++ b/ServerReference/simpleserver.py @@ -8,19 +8,11 @@ import socketserver import sys import types -try: - sys.path.append('C:\\git\\else\\Bytestring') - sys.path.append('C:\\git\\else\\Pathclass') - sys.path.append('C:\\git\\else\\Ratelimiter') - import bytestring - import pathclass - import ratelimiter -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from voussoirkit import bytestring - from voussoirkit import pathclass - from voussoirkit import ratelimiter +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from voussoirkit import bytestring +from voussoirkit import pathclass +from voussoirkit import ratelimiter FILE_READ_CHUNK = bytestring.MIBIBYTE RATELIMITER = ratelimiter.Ratelimiter(16 * bytestring.MIBIBYTE) diff --git a/SnudownTest/.gitignore b/SnudownTest/.gitignore deleted file mode 100644 index 4945b7a..0000000 --- a/SnudownTest/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -build/ -dist/ -snudown.egg-info/ -src/html_entities.h -*.pyc -*.so -*.so.* -*.o -/fuzzing/bin -/fuzzing/testing diff --git a/SnudownTest/.gitmodules b/SnudownTest/.gitmodules deleted file mode 100644 index e909bd5..0000000 --- a/SnudownTest/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ -[submodule "gumbo_snudown"] - path = fuzzing/gumbo_snudown - url = git@github.com:JordanMilne/gumbo-parser.git - branch = markdown_validation diff --git a/SnudownTest/Python.h b/SnudownTest/Python.h deleted file mode 100644 index 2dd8290..0000000 --- a/SnudownTest/Python.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef Py_PYTHON_H -#define Py_PYTHON_H -/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */ - -/* Include nearly all Python header files */ - -#include "patchlevel.h" -#include "pyconfig.h" -#include "pymacconfig.h" - -#include - -#ifndef UCHAR_MAX -#error "Something's broken. UCHAR_MAX should be defined in limits.h." -#endif - -#if UCHAR_MAX != 255 -#error "Python's source code assumes C's unsigned char is an 8-bit type." -#endif - -#if defined(__sgi) && defined(WITH_THREAD) && !defined(_SGI_MP_SOURCE) -#define _SGI_MP_SOURCE -#endif - -#include -#ifndef NULL -# error "Python.h requires that stdio.h define NULL." -#endif - -#include -#ifdef HAVE_ERRNO_H -#include -#endif -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -/* For size_t? */ -#ifdef HAVE_STDDEF_H -#include -#endif - -/* CAUTION: Build setups should ensure that NDEBUG is defined on the - * compiler command line when building Python in release mode; else - * assert() calls won't be removed. - */ -#include - -#include "pyport.h" -#include "pymacro.h" - -#include "pyatomic.h" - -/* Debug-mode build with pymalloc implies PYMALLOC_DEBUG. - * PYMALLOC_DEBUG is in error if pymalloc is not in use. - */ -#if defined(Py_DEBUG) && defined(WITH_PYMALLOC) && !defined(PYMALLOC_DEBUG) -#define PYMALLOC_DEBUG -#endif -#if defined(PYMALLOC_DEBUG) && !defined(WITH_PYMALLOC) -#error "PYMALLOC_DEBUG requires WITH_PYMALLOC" -#endif -#include "pymath.h" -#include "pytime.h" -#include "pymem.h" - -#include "object.h" -#include "objimpl.h" -#include "typeslots.h" -#include "pyhash.h" - -#include "pydebug.h" - -#include "bytearrayobject.h" -#include "bytesobject.h" -#include "unicodeobject.h" -#include "longobject.h" -#include "longintrepr.h" -#include "boolobject.h" -#include "floatobject.h" -#include "complexobject.h" -#include "rangeobject.h" -#include "memoryobject.h" -#include "tupleobject.h" -#include "listobject.h" -#include "dictobject.h" -#include "enumobject.h" -#include "setobject.h" -#include "methodobject.h" -#include "moduleobject.h" -#include "funcobject.h" -#include "classobject.h" -#include "fileobject.h" -#include "pycapsule.h" -#include "traceback.h" -#include "sliceobject.h" -#include "cellobject.h" -#include "iterobject.h" -#include "genobject.h" -#include "descrobject.h" -#include "warnings.h" -#include "weakrefobject.h" -#include "structseq.h" -#include "namespaceobject.h" - -#include "codecs.h" -#include "pyerrors.h" - -#include "pystate.h" - -#include "pyarena.h" -#include "modsupport.h" -#include "pythonrun.h" -#include "ceval.h" -#include "sysmodule.h" -#include "intrcheck.h" -#include "import.h" - -#include "abstract.h" -#include "bltinmodule.h" - -#include "compile.h" -#include "eval.h" - -#include "pyctype.h" -#include "pystrtod.h" -#include "pystrcmp.h" -#include "dtoa.h" -#include "fileutils.h" -#include "pyfpe.h" - -#endif /* !Py_PYTHON_H */ diff --git a/SnudownTest/SECURITY.md b/SnudownTest/SECURITY.md deleted file mode 100644 index 4c40dbe..0000000 --- a/SnudownTest/SECURITY.md +++ /dev/null @@ -1,12 +0,0 @@ -For safety reasons, whenever you add or change something in Snudown, -you should add a few test-cases that demonstrate your change and do a -fuzzing run in `/fuzzing` by running `make afl`. Make sure you have `cmake` -installed and in your `PATH`! - -This uses [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/) and a -modified [Google Gumbo](https://github.com/google/gumbo-parser/) to ensure -there is no way to generate invalid HTML, and that there are no unsafe -memory operations. - -See [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/)'s instructions -for your platform to get started. diff --git a/SnudownTest/autolink.c b/SnudownTest/autolink.c deleted file mode 100644 index 8d0e39a..0000000 --- a/SnudownTest/autolink.c +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "buffer.h" -#include "autolink.h" - -#include -#include -#include -#include - -#if defined(_WIN32) -#define strncasecmp _strnicmp -#endif - -int -sd_autolink_issafe(const uint8_t *link, size_t link_len) -{ - static const size_t valid_uris_count = 14; - static const char *valid_uris[] = { - "http://", "https://", "ftp://", "mailto://", - "/", "git://", "steam://", "irc://", "news://", "mumble://", - "ssh://", "ircs://", "ts3server://", "#" - }; - - size_t i; - - for (i = 0; i < valid_uris_count; ++i) { - size_t len = strlen(valid_uris[i]); - - if (link_len > len && - strncasecmp((char *)link, valid_uris[i], len) == 0 && - (isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?')) - return 1; - } - - return 0; -} - -static size_t -autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size) -{ - uint8_t cclose, copen = 0; - size_t i; - - for (i = 0; i < link_end; ++i) - if (data[i] == '<') { - link_end = i; - break; - } - - while (link_end > 0) { - uint8_t c = data[link_end - 1]; - - if (c == 0) - break; - - if (strchr("?!.,", c) != NULL) - link_end--; - - else if (c == ';') { - size_t new_end = link_end - 2; - - while (new_end > 0 && isalpha(data[new_end])) - new_end--; - - if (new_end < link_end - 2 && data[new_end] == '&') - link_end = new_end; - else - link_end--; - } - else break; - } - - if (link_end == 0) - return 0; - - cclose = data[link_end - 1]; - - switch (cclose) { - case '"': copen = '"'; break; - case '\'': copen = '\''; break; - case ')': copen = '('; break; - case ']': copen = '['; break; - case '}': copen = '{'; break; - } - - if (copen != 0) { - size_t closing = 0; - size_t opening = 0; - size_t i = 0; - - /* Try to close the final punctuation sign in this same line; - * if we managed to close it outside of the URL, that means that it's - * not part of the URL. If it closes inside the URL, that means it - * is part of the URL. - * - * Examples: - * - * foo http://www.pokemon.com/Pikachu_(Electric) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo (http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric)) - * - * (foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => foo http://www.pokemon.com/Pikachu_(Electric) - */ - - while (i < link_end) { - if (data[i] == copen) - opening++; - else if (data[i] == cclose) - closing++; - - i++; - } - - if (closing != opening) - link_end--; - } - - return link_end; -} - -/* - * Checks that `prefix_char` occurs on a word boundary just before `data`, - * where `data` points to the character to search to the left of, and a word boundary - * is (currently) a whitespace character, punctuation, or the start of the string. - * Returns the length of the prefix. - */ -static int -check_reddit_autolink_prefix( - const uint8_t* data, - size_t max_rewind, - size_t max_lookbehind, - size_t size, - char prefix_char - ) -{ - /* Make sure this `/` is part of `/?r/` */ - if (size < 2 || max_rewind < 1 || data[-1] != prefix_char) - return 0; - - /* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */ - if (max_rewind > 1) { - const char boundary = data[-2]; - if (boundary == '/') - return 2; - /** - * Here's where our lack of unicode-awareness bites us. We don't correctly - * match punctuation / whitespace characters for the boundary, because we - * reject valid cases like "。r/example" (note the fullwidth period.) - * - * A better implementation might try to rewind over bytes with the 8th bit set, try - * to decode them to a valid codepoint, then do a unicode-aware check on the codepoint. - */ - else if (ispunct(boundary) || isspace(boundary)) - return 1; - else - return 0; - } else if (max_lookbehind > 2) { - /* There's an inline element just left of the `prefix_char`, is it an escaped forward - * slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly - * allow "\\/r/foo". - */ - if (data[-2] == '/' && data[-3] == '\\') - return 0; - } - - /* Must be a new-style shortlink with nothing relevant to the left of it. */ - return 1; -} - -static size_t -check_domain(uint8_t *data, size_t size, int allow_short) -{ - size_t i, np = 0; - - if (!isalnum(data[0])) - return 0; - - for (i = 1; i < size - 1; ++i) { - if (data[i] == '.') np++; - else if (!isalnum(data[i]) && data[i] != '-') break; - } - - if (allow_short) { - /* We don't need a valid domain in the strict sense (with - * least one dot; so just make sure it's composed of valid - * domain characters and return the length of the the valid - * sequence. */ - return i; - } else { - /* a valid domain needs to have at least a dot. - * that's as far as we get */ - return np ? i : 0; - } -} - -size_t -sd_autolink__www( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) -{ - size_t link_end; - - if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1])) - return 0; - - if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) - return 0; - - link_end = check_domain(data, size, 0); - - if (link_end == 0) - return 0; - - while (link_end < size && !isspace(data[link_end])) - link_end++; - - link_end = autolink_delim(data, link_end, max_rewind, size); - - if (link_end == 0) - return 0; - - bufput(link, data, link_end); - *rewind_p = 0; - - return (int)link_end; -} - -size_t -sd_autolink__email( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) -{ - size_t link_end, rewind; - int nb = 0, np = 0; - - for (rewind = 0; rewind < max_rewind; ++rewind) { - uint8_t c = data[-rewind - 1]; - - if (c == 0) - break; - - if (isalnum(c)) - continue; - - if (strchr(".+-_", c) != NULL) - continue; - - break; - } - - if (rewind == 0) - return 0; - - for (link_end = 0; link_end < size; ++link_end) { - uint8_t c = data[link_end]; - - if (isalnum(c)) - continue; - - if (c == '@') - nb++; - else if (c == '.' && link_end < size - 1) - np++; - else if (c != '-' && c != '_') - break; - } - - if (link_end < 2 || nb != 1 || np == 0) - return 0; - - link_end = autolink_delim(data, link_end, max_rewind, size); - - if (link_end == 0) - return 0; - - bufput(link, data - rewind, link_end + rewind); - *rewind_p = rewind; - - return link_end; -} - -size_t -sd_autolink__url( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) -{ - size_t link_end, rewind = 0, domain_len; - - if (size < 4 || data[1] != '/' || data[2] != '/') - return 0; - - while (rewind < max_rewind && isalpha(data[-rewind - 1])) - rewind++; - - if (!sd_autolink_issafe(data - rewind, size + rewind)) - return 0; - - link_end = strlen("://"); - - domain_len = check_domain( - data + link_end, - size - link_end, - flags & SD_AUTOLINK_SHORT_DOMAINS); - - if (domain_len == 0) - return 0; - - link_end += domain_len; - while (link_end < size && !isspace(data[link_end])) - link_end++; - - link_end = autolink_delim(data, link_end, max_rewind, size); - - if (link_end == 0) - return 0; - - bufput(link, data - rewind, link_end + rewind); - *rewind_p = rewind; - - return link_end; -} - -size_t -sd_autolink__subreddit( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t max_lookbehind, - size_t size, - int *no_slash - ) -{ - /** - * This is meant to handle both r/foo and /r/foo style subreddit references. - * In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'. - * In pseudo-regex, this matches something like: - * - * `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?` - * where %subreddit% == `((t:)?\w{2,24}|reddit\.com)` - */ - size_t link_end; - size_t rewind; - int is_allminus = 0; - - rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r'); - if (!rewind) - return 0; - - /* offset to the "meat" of the link */ - link_end = strlen("/"); - - if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0) - is_allminus = 1; - - do { - size_t start = link_end; - int max_length = 24; - - /* special case: /r/reddit.com (only subreddit containing '.'). */ - if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) { - link_end += 10; - /* Make sure there are no trailing characters (don't do - * any autolinking for /r/reddit.commission) */ - max_length = 10; - } - - /* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */ - else { - /* support autolinking to timereddits, /r/t:when (1 April 2012) */ - if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 ) - link_end += 2; /* Jump over the 't:' */ - - /* the first character of a subreddit name must be a letter or digit */ - if (!isalnum(data[link_end])) - return 0; - link_end += 1; - } - - /* consume valid characters ([A-Za-z0-9_]) until we run out */ - while (link_end < size && (isalnum(data[link_end]) || - data[link_end] == '_')) - link_end++; - - /* valid subreddit names are between 3 and 21 characters, with - * some subreddits having 2-character names. Don't bother with - * autolinking for anything outside this length range. - * (chksrname function in reddit/.../validator.py) */ - if ( link_end-start < 2 || link_end-start > max_length ) - return 0; - - /* If we are linking to a multireddit, continue */ - } while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ ); - - if (link_end < size && data[link_end] == '/') { - while (link_end < size && (isalnum(data[link_end]) || - data[link_end] == '_' || - data[link_end] == '/' || - data[link_end] == '-')) - link_end++; - } - - /* make the link */ - bufput(link, data - rewind, link_end + rewind); - - *no_slash = (rewind == 1); - *rewind_p = rewind; - - return link_end; -} - -size_t -sd_autolink__username( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t max_lookbehind, - size_t size, - int *no_slash - ) -{ - size_t link_end; - size_t rewind; - - if (size < 3) - return 0; - - rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u'); - if (!rewind) - return 0; - - link_end = strlen("/"); - - /* the first letter of a username must... well, be valid, we don't care otherwise */ - if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-') - return 0; - link_end += 1; - - /* consume valid characters ([A-Za-z0-9_-/]) until we run out */ - while (link_end < size && (isalnum(data[link_end]) || - data[link_end] == '_' || - data[link_end] == '/' || - data[link_end] == '-')) - link_end++; - - /* make the link */ - bufput(link, data - rewind, link_end + rewind); - - *no_slash = (rewind == 1); - *rewind_p = rewind; - - return link_end; -} diff --git a/SnudownTest/autolink.h b/SnudownTest/autolink.h deleted file mode 100644 index 55b7aaa..0000000 --- a/SnudownTest/autolink.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UPSKIRT_AUTOLINK_H -#define UPSKIRT_AUTOLINK_H - -#include "buffer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - SD_AUTOLINK_SHORT_DOMAINS = (1 << 0), -}; - -int -sd_autolink_issafe(const uint8_t *link, size_t link_len); - -size_t -sd_autolink__www(size_t *rewind_p, struct buf *link, - uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); - -size_t -sd_autolink__email(size_t *rewind_p, struct buf *link, - uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); - -size_t -sd_autolink__url(size_t *rewind_p, struct buf *link, - uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); - -extern size_t -sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data, - size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash); - -extern size_t -sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data, - size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash); - -#ifdef __cplusplus -} -#endif - -#endif - -/* vim: set filetype=c: */ diff --git a/SnudownTest/buffer.c b/SnudownTest/buffer.c deleted file mode 100644 index ab18948..0000000 --- a/SnudownTest/buffer.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (c) 2008, Natacha Porté - * Copyright (c) 2011, Vicent Martí - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb - -#include "buffer.h" - -#include -#include -#include -#include - -/* MSVC compat */ -#if defined(_MSC_VER) -# define _buf_vsnprintf _vsnprintf -#else -# define _buf_vsnprintf vsnprintf -#endif - -int -bufprefix(const struct buf *buf, const char *prefix) -{ - size_t i; - assert(buf && buf->unit); - - for (i = 0; i < buf->size; ++i) { - if (prefix[i] == 0) - return 0; - - if (buf->data[i] != prefix[i]) - return buf->data[i] - prefix[i]; - } - - return 0; -} - -/* bufgrow: increasing the allocated size to the given value */ -int -bufgrow(struct buf *buf, size_t neosz) -{ - size_t neoasz; - void *neodata; - - assert(buf && buf->unit); - - if (neosz > BUFFER_MAX_ALLOC_SIZE) - return BUF_ENOMEM; - - if (buf->asize >= neosz) - return BUF_OK; - - neoasz = buf->asize + buf->unit; - while (neoasz < neosz) - neoasz += buf->unit; - - neodata = realloc(buf->data, neoasz); - if (!neodata) - return BUF_ENOMEM; - - buf->data = neodata; - buf->asize = neoasz; - return BUF_OK; -} - - -/* bufnew: allocation of a new buffer */ -struct buf * -bufnew(size_t unit) -{ - struct buf *ret; - ret = malloc(sizeof (struct buf)); - - if (ret) { - ret->data = 0; - ret->size = ret->asize = 0; - ret->unit = unit; - } - return ret; -} - -/* bufnullterm: NULL-termination of the string array */ -const char * -bufcstr(struct buf *buf) -{ - assert(buf && buf->unit); - - if (buf->size < buf->asize && buf->data[buf->size] == 0) - return (char *)buf->data; - - if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) { - buf->data[buf->size] = 0; - return (char *)buf->data; - } - - return NULL; -} - -/* bufprintf: formatted printing to a buffer */ -void -bufprintf(struct buf *buf, const char *fmt, ...) -{ - va_list ap; - int n; - - assert(buf && buf->unit); - - if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0) - return; - va_start(ap, fmt); - n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); - va_end(ap); - - if (n < 0) { -#ifdef _MSC_VER - va_start(ap, fmt); - n = _vscprintf(fmt, ap); - va_end(ap); -#else - return; -#endif - } - if ((size_t)n >= buf->asize - buf->size) { - if (bufgrow(buf, buf->size + n + 1) < 0) - return; - - va_start(ap, fmt); - n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); - va_end(ap); - } - - if (n < 0) - return; - - buf->size += n; -} - -/* bufput: appends raw data to a buffer */ -void -bufput(struct buf *buf, const void *data, size_t len) -{ - assert(buf && buf->unit); - - if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0) - return; - - memcpy(buf->data + buf->size, data, len); - buf->size += len; -} - -/* bufputs: appends a NUL-terminated string to a buffer */ -void -bufputs(struct buf *buf, const char *str) -{ - bufput(buf, str, strlen(str)); -} - - -/* bufputc: appends a single uint8_t to a buffer */ -void -bufputc(struct buf *buf, int c) -{ - assert(buf && buf->unit); - - if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0) - return; - - buf->data[buf->size] = c; - buf->size += 1; -} - -/* bufrelease: decrease the reference count and free the buffer if needed */ -void -bufrelease(struct buf *buf) -{ - if (!buf) - return; - - free(buf->data); - free(buf); -} - - -/* bufreset: frees internal data of the buffer */ -void -bufreset(struct buf *buf) -{ - if (!buf) - return; - - free(buf->data); - buf->data = NULL; - buf->size = buf->asize = 0; -} - -/* bufslurp: removes a given number of bytes from the head of the array */ -void -bufslurp(struct buf *buf, size_t len) -{ - assert(buf && buf->unit); - - if (len >= buf->size) { - buf->size = 0; - return; - } - - buf->size -= len; - memmove(buf->data, buf->data + len, buf->size); -} - -/* buftrucate: truncates the buffer at `size` */ -int -buftruncate(struct buf *buf, size_t size) -{ - if (buf->size < size || size < 0) { - /* bail out in debug mode so we can figure out why this happened */ - assert(0); - return BUF_EINVALIDIDX; - } - - buf->size = size; - return BUF_OK; -} diff --git a/SnudownTest/buffer.h b/SnudownTest/buffer.h deleted file mode 100644 index ab98ab6..0000000 --- a/SnudownTest/buffer.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2008, Natacha Porté - * Copyright (c) 2011, Vicent Martí - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef BUFFER_H__ -#define BUFFER_H__ - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_MSC_VER) -#define __attribute__(x) -#define inline -#endif - -typedef enum { - BUF_OK = 0, - BUF_ENOMEM = -1, - BUF_EINVALIDIDX = -2, -} buferror_t; - -/* struct buf: character array buffer */ -struct buf { - uint8_t *data; /* actual character data */ - size_t size; /* size of the string */ - size_t asize; /* allocated size (0 = volatile buffer) */ - size_t unit; /* reallocation unit size (0 = read-only buffer) */ -}; - -/* CONST_BUF: global buffer from a string litteral */ -#define BUF_STATIC(string) \ - { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 } - -/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */ -#define BUF_VOLATILE(strname) \ - { (uint8_t *)strname, strlen(strname), 0, 0, 0 } - -/* BUFPUTSL: optimized bufputs of a string litteral */ -#define BUFPUTSL(output, literal) \ - bufput(output, literal, sizeof literal - 1) - -/* bufgrow: increasing the allocated size to the given value */ -int bufgrow(struct buf *, size_t); - -/* bufnew: allocation of a new buffer */ -struct buf *bufnew(size_t) __attribute__ ((malloc)); - -/* bufnullterm: NUL-termination of the string array (making a C-string) */ -const char *bufcstr(struct buf *); - -/* bufprefix: compare the beginning of a buffer with a string */ -int bufprefix(const struct buf *buf, const char *prefix); - -/* bufput: appends raw data to a buffer */ -void bufput(struct buf *, const void *, size_t); - -/* bufputs: appends a NUL-terminated string to a buffer */ -void bufputs(struct buf *, const char *); - -/* bufputc: appends a single char to a buffer */ -void bufputc(struct buf *, int); - -/* bufrelease: decrease the reference count and free the buffer if needed */ -void bufrelease(struct buf *); - -/* bufreset: frees internal data of the buffer */ -void bufreset(struct buf *); - -/* bufslurp: removes a given number of bytes from the head of the array */ -void bufslurp(struct buf *, size_t); - -/* bufprintf: formatted printing to a buffer */ -void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3))); - -/* buftruncate: truncates the buffer at `size` */ -int buftruncate(struct buf *buf, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SnudownTest/build/buildlog.txt b/SnudownTest/build/buildlog.txt deleted file mode 100644 index 6e899a4..0000000 --- a/SnudownTest/build/buildlog.txt +++ /dev/null @@ -1,75 +0,0 @@ -running install -running bdist_egg -running egg_info -writing top-level names to snudown.egg-info\top_level.txt -writing dependency_links to snudown.egg-info\dependency_links.txt -writing snudown.egg-info\PKG-INFO -reading manifest file 'snudown.egg-info\SOURCES.txt' -writing manifest file 'snudown.egg-info\SOURCES.txt' -installing library code to build\bdist.win32\egg -running install_lib -running build_ext -gperf.exe src\html_entities.gperf --output-file=src\html_entities.h -building 'snudown' extension -creating build -creating build\temp.win32-3.4 -creating build\temp.win32-3.4\Release -creating build\temp.win32-3.4\Release\src -creating build\temp.win32-3.4\Release\html -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsnudown.c /Fobuild\temp.win32-3.4\Release\snudown.obj -snudown.c -snudown.c(231) : warning C4087: 'PyInit_snudown' : declared with 'void' parameter list -c:\git\else\snudowntest\snudown.c(163) : warning C4700: uninitialized local variable 'options' used -c:\git\else\snudowntest\snudown.c(228) : warning C4715: 'PyInit_snudown' : not all control paths return a value -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/autolink.c /Fobuild\temp.win32-3.4\Release\src/autolink.obj -autolink.c -src/autolink.c(266) : warning C4146: unary minus operator applied to unsigned type, result still unsigned -src/autolink.c(325) : warning C4146: unary minus operator applied to unsigned type, result still unsigned -src/autolink.c(422) : warning C4018: '>' : signed/unsigned mismatch -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/buffer.c /Fobuild\temp.win32-3.4\Release\src/buffer.obj -buffer.c -src/buffer.c(124) : warning C4996: '_vsnprintf': This function or variable may be unsafe. Consider using _vsnprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. - D:\Visual Studio 1_0_0\VC\INCLUDE\stdio.h(363) : see declaration of '_vsnprintf' -src/buffer.c(141) : warning C4996: '_vsnprintf': This function or variable may be unsafe. Consider using _vsnprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. - D:\Visual Studio 1_0_0\VC\INCLUDE\stdio.h(363) : see declaration of '_vsnprintf' -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/markdown.c /Fobuild\temp.win32-3.4\Release\src/markdown.obj -markdown.c -c:\git\else\snudowntest\src\html_entities.h(32) : warning C4129: 's' : unrecognized character escape sequence -c:\git\else\snudowntest\src\html_entities.h(32) : warning C4129: 's' : unrecognized character escape sequence -c:\git\else\snudowntest\src\html_entities.h(32) : warning C4129: 'h' : unrecognized character escape sequence -src/markdown.c(2168) : warning C4018: '>' : signed/unsigned mismatch -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/stack.c /Fobuild\temp.win32-3.4\Release\src/stack.obj -stack.c -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/houdini_href_e.c /Fobuild\temp.win32-3.4\Release\html/houdini_href_e.obj -houdini_href_e.c -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/houdini_html_e.c /Fobuild\temp.win32-3.4\Release\html/houdini_html_e.obj -houdini_html_e.c -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/html.c /Fobuild\temp.win32-3.4\Release\html/html.obj -html.c -D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/html_smartypants.c /Fobuild\temp.win32-3.4\Release\html/html_smartypants.obj -html_smartypants.c -html/html_smartypants.c(97) : warning C4996: '_snprintf': This function or variable may be unsafe. Consider using _snprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. - D:\Visual Studio 1_0_0\VC\INCLUDE\stdio.h(363) : see declaration of '_snprintf' -creating build\lib.win32-3.4 -D:\Visual Studio 1_0_0\VC\BIN\link.exe /DLL /nologo /INCREMENTAL:NO /LIBPATH:C:\Python34\libs /LIBPATH:C:\Python34\PCbuild /EXPORT:PyInit_snudown build\temp.win32-3.4\Release\snudown.obj build\temp.win32-3.4\Release\src/autolink.obj build\temp.win32-3.4\Release\src/buffer.obj build\temp.win32-3.4\Release\src/markdown.obj build\temp.win32-3.4\Release\src/stack.obj build\temp.win32-3.4\Release\html/houdini_href_e.obj build\temp.win32-3.4\Release\html/houdini_html_e.obj build\temp.win32-3.4\Release\html/html.obj build\temp.win32-3.4\Release\html/html_smartypants.obj /OUT:build\lib.win32-3.4\snudown.pyd /IMPLIB:build\temp.win32-3.4\Release\snudown.lib /MANIFESTFILE:build\temp.win32-3.4\Release\snudown.pyd.manifest - Creating library build\temp.win32-3.4\Release\snudown.lib and object build\temp.win32-3.4\Release\snudown.exp -creating build\bdist.win32 -creating build\bdist.win32\egg -copying build\lib.win32-3.4\snudown.pyd -> build\bdist.win32\egg -creating stub loader for snudown.pyd -creating build\bdist.win32\egg\EGG-INFO -copying snudown.egg-info\PKG-INFO -> build\bdist.win32\egg\EGG-INFO -copying snudown.egg-info\SOURCES.txt -> build\bdist.win32\egg\EGG-INFO -copying snudown.egg-info\dependency_links.txt -> build\bdist.win32\egg\EGG-INFO -copying snudown.egg-info\top_level.txt -> build\bdist.win32\egg\EGG-INFO -writing build\bdist.win32\egg\EGG-INFO\native_libs.txt -creating 'dist\snudown-1.4.0-py3.4-win32.egg' and adding 'build\bdist.win32\egg' to it -removing 'build\bdist.win32\egg' (and everything under it) -Processing snudown-1.4.0-py3.4-win32.egg -Removing c:\python34\lib\site-packages\snudown-1.4.0-py3.4-win32.egg -Copying snudown-1.4.0-py3.4-win32.egg to c:\python34\lib\site-packages -snudown 1.4.0 is already the active version in easy-install.pth - -Installed c:\python34\lib\site-packages\snudown-1.4.0-py3.4-win32.egg -Processing dependencies for snudown==1.4.0 -Finished processing dependencies for snudown==1.4.0 diff --git a/SnudownTest/build/lib.win32-3.4/snudown.pyd b/SnudownTest/build/lib.win32-3.4/snudown.pyd deleted file mode 100644 index 4c8b0cd..0000000 Binary files a/SnudownTest/build/lib.win32-3.4/snudown.pyd and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/html/houdini_href_e.obj b/SnudownTest/build/temp.win32-3.4/Release/html/houdini_href_e.obj deleted file mode 100644 index 4f6ce78..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/html/houdini_href_e.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/html/houdini_html_e.obj b/SnudownTest/build/temp.win32-3.4/Release/html/houdini_html_e.obj deleted file mode 100644 index 3cb4b7b..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/html/houdini_html_e.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/html/html.obj b/SnudownTest/build/temp.win32-3.4/Release/html/html.obj deleted file mode 100644 index 0a48150..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/html/html.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/html/html_smartypants.obj b/SnudownTest/build/temp.win32-3.4/Release/html/html_smartypants.obj deleted file mode 100644 index 1c252dd..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/html/html_smartypants.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/snudown.exp b/SnudownTest/build/temp.win32-3.4/Release/snudown.exp deleted file mode 100644 index f7194b3..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/snudown.exp and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/snudown.lib b/SnudownTest/build/temp.win32-3.4/Release/snudown.lib deleted file mode 100644 index 9df05e4..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/snudown.lib and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/snudown.obj b/SnudownTest/build/temp.win32-3.4/Release/snudown.obj deleted file mode 100644 index a3b94d2..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/snudown.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/src/autolink.obj b/SnudownTest/build/temp.win32-3.4/Release/src/autolink.obj deleted file mode 100644 index 0aeb66d..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/src/autolink.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/src/buffer.obj b/SnudownTest/build/temp.win32-3.4/Release/src/buffer.obj deleted file mode 100644 index d11f538..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/src/buffer.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/src/markdown.obj b/SnudownTest/build/temp.win32-3.4/Release/src/markdown.obj deleted file mode 100644 index a0ab148..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/src/markdown.obj and /dev/null differ diff --git a/SnudownTest/build/temp.win32-3.4/Release/src/stack.obj b/SnudownTest/build/temp.win32-3.4/Release/src/stack.obj deleted file mode 100644 index 267494e..0000000 Binary files a/SnudownTest/build/temp.win32-3.4/Release/src/stack.obj and /dev/null differ diff --git a/SnudownTest/debian/changelog b/SnudownTest/debian/changelog deleted file mode 100644 index e8dce65..0000000 --- a/SnudownTest/debian/changelog +++ /dev/null @@ -1,145 +0,0 @@ -snudown (1.4.0) unstable; urgency=medium - - * autolink r/subreddit and u/user - * security: don't rewind over previous inlines when autolinking - * email autolinks re-enabled due to ^ - * more stringent character entity checks and sanitization - * properly handle URLs containing control characters - - -- Jordan Milne Mon, 01 Jun 2015 13:04:23 -0700 - -snudown (1.3.2) unstable; urgency=medium - - * fix alphanumeric-named entities - - -- Neil Williams Wed, 25 Feb 2015 13:32:41 -0800 - -snudown (1.3.1) unstable; urgency=medium - - * add missing entities to entity whitelist - - -- Neil Williams Tue, 24 Feb 2015 22:12:29 -0800 - -snudown (1.3.0) unstable; urgency=medium - - * validate html entities and escape unrecognized ones - - -- Neil Williams Tue, 24 Feb 2015 17:55:38 -0800 - -snudown (1.2.0) unstable; urgency=medium - - * security: fix rewind issues - * email autolinks disabled due to ^ - * security: fix table header OOM bomb - - -- Neil Williams Sat, 20 Sep 2014 11:59:34 -0700 - -snudown (1.1.6) unstable; urgency=low - - * add ts3server url scheme to whitelist - * redo html sanitization for wiki renderer - - -- Neil Williams Tue, 01 Apr 2014 17:12:50 -0700 - -snudown (1.1.5) unstable; urgency=low - - * bring path stuff into user/subreddit autolinking (multis, subpages etc.) - * make /u/ autolinking case sensitive - - -- Neil Williams Wed, 22 May 2013 16:09:31 -0700 - -snudown (1.1.4) unstable; urgency=low - - * make /r/ autolinking case sensitive - - -- Neil Williams Mon, 25 Feb 2013 23:27:10 -0800 - -snudown (1.1.3) unstable; urgency=low - - * add support for /r/all-minus - - -- Neil Williams Tue, 08 Jan 2013 12:55:40 -0800 - -snudown (1.1.2) unstable; urgency=low - - * don't close the toc div if there wasn't a toc :( - - -- Neil Williams Wed, 12 Dec 2012 17:38:05 -0800 - -snudown (1.1.1) unstable; urgency=low - - * minor code cleanup - * add a div around wiki table of contents for styling purposes - - -- Neil Williams Wed, 12 Dec 2012 13:47:49 -0800 - -snudown (1.1.0) unstable; urgency=low - - * add wiki variant of markdown syntax (allows links, and - some raw html) - - -- Neil Williams Wed, 05 Sep 2012 23:30:34 -0700 - -snudown (1.0.7) unstable; urgency=low - - * add python-setuptools to build-depends - - -- Neil Williams Thu, 09 Aug 2012 14:46:49 -0700 - -snudown (1.0.6) unstable; urgency=low - - * made subreddit autolinking more robust thanks to nandhp - * cleaned up packaging - * merged upstream fixes: - * fix blockquotes nested inside paragraphs - * improve parsing of continuous list items - * fix infinite loop parsing strikethrouhgs - - -- Neil Williams Thu, 09 Aug 2012 13:06:38 -0700 - -snudown (1.0.5) unstable; urgency=low - - * require a space between url and title - * merged upstream fixes: - * whitespace after tables prevent them from rendering - * escape html in contents of tables - - -- Neil Williams Thu, 23 Feb 2012 08:40:39 -0800 - -snudown (1.0.4) unstable; urgency=low - - * change username autolinking to /u/username - * properly handle backslash at end of message - - -- Neil Williams Thu, 26 Jan 2012 18:26:45 -0800 - -snudown (1.0.3) unstable; urgency=low - - * ~username auto-linking - * make table headers less strict - * correctly handle ) in link title text - * synced with upstream - * code clean-up - * utf-8 fixes - - -- Neil Williams Wed, 18 Jan 2012 15:20:35 -0800 - -snudown (1.0.2) unstable; urgency=low - - * synced up with upstream - * more safelink relaxation based on community requests - * fixed nesting unordered lists within ordered lists and vice versa - - -- Neil Williams Sat, 19 Nov 2011 17:16:47 -0800 - -snudown (1.0.1) unstable; urgency=low - - * new version, new package - - -- Neil Williams Thu, 17 Nov 2011 14:22:26 -0800 - -snudown (1.0.0) unstable; urgency=low - - * source package automatically created by stdeb 0.6.0+git - - -- Neil Williams Wed, 16 Nov 2011 10:36:53 -0800 diff --git a/SnudownTest/debian/compat b/SnudownTest/debian/compat deleted file mode 100644 index 7f8f011..0000000 --- a/SnudownTest/debian/compat +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/SnudownTest/debian/control b/SnudownTest/debian/control deleted file mode 100644 index 19a8bd8..0000000 --- a/SnudownTest/debian/control +++ /dev/null @@ -1,15 +0,0 @@ -Source: snudown -Maintainer: Neil Williams -Section: python -Priority: optional -Build-Depends: python-all-dev (>= 2.6.6-3), debhelper (>= 7), python-setuptools, gperf -Standards-Version: 3.9.3 -Homepage: https://github.com/reddit/snudown -Vcs-Git: git://github.com/reddit/snudown.git - -Package: python-snudown -Architecture: any -Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends} -Breaks: ${python:Breaks} -Description: reddit's python wrapper and customization of the Sundown Markdown interpreter. - diff --git a/SnudownTest/debian/copyright b/SnudownTest/debian/copyright deleted file mode 100644 index 3d301c3..0000000 --- a/SnudownTest/debian/copyright +++ /dev/null @@ -1,30 +0,0 @@ -Format: http://dep.debian.net/deps/dep5 -Upstream-Name: snudown -Source: https://github.com/reddit/snudown - -Files: * -Copyright: 2011-2012 Vicent Marti - 2011-2012 reddit Inc. -License: MIT - -Files: debian/* -Copyright: 2011-2012 reddit Inc. -License: MIT - -Files: test_snudown.py -Copyright: 2011-2012 reddit Inc. -License: MIT - -License: MIT - Permission to use, copy, modify, and distribute this software for any purpose - with or without fee is hereby granted, provided that the above copyright - notice and this permission notice appear in all copies. - . - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH - REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY - AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, - INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR - OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - PERFORMANCE OF THIS SOFTWARE. - diff --git a/SnudownTest/debian/rules b/SnudownTest/debian/rules deleted file mode 100644 index 945a5fe..0000000 --- a/SnudownTest/debian/rules +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/make -f - -# This file was automatically generated by stdeb 0.6.0+git at -# Wed, 16 Nov 2011 10:36:53 -0800 - -%: - dh $@ --with python2 --buildsystem=python_distutils - - diff --git a/SnudownTest/debian/source/format b/SnudownTest/debian/source/format deleted file mode 100644 index 89ae9db..0000000 --- a/SnudownTest/debian/source/format +++ /dev/null @@ -1 +0,0 @@ -3.0 (native) diff --git a/SnudownTest/fuzzing/CMakeLists.txt b/SnudownTest/fuzzing/CMakeLists.txt deleted file mode 100644 index 5ed60de..0000000 --- a/SnudownTest/fuzzing/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -cmake_minimum_required(VERSION 2.8) - -set(HEADERS - ../html/houdini.h - ../html/html.h - ../src/autolink.h - ../src/buffer.h - ../src/html_blocks.h - ../src/html_entities.h - ../src/markdown.h - ../src/stack.h - ) -set(LIBRARY_SOURCES - ../html/houdini_href_e.c - ../html/houdini_html_e.c - ../html/html.c - ../html/html_smartypants.c - ../src/autolink.c - ../src/buffer.c - ../src/markdown.c - ../src/stack.c - ${HEADERS} - ) - -set(PROGRAM "snudown-validator") -set(PROGRAM_SOURCES - ${LIBRARY_SOURCES} - snudown-validator.c - ) - -include_directories(. ../src ../html ./build/gumbo_snudown/include ${CMAKE_CURRENT_BINARY_DIR}) -link_directories(${CMAKE_CURRENT_SOURCE_DIR}/build/gumbo_snudown/lib) - -add_executable(${PROGRAM} ${PROGRAM_SOURCES}) -target_link_libraries(${PROGRAM} gumbo) - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g -Wno-error=parentheses") diff --git a/SnudownTest/fuzzing/Makefile b/SnudownTest/fuzzing/Makefile deleted file mode 100644 index 5094b13..0000000 --- a/SnudownTest/fuzzing/Makefile +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2015, reddit inc. -# -# Permission to use, copy, modify, and distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -all: gumbo_snudown snudown-validator - -.PHONY: all clean gumbo_snudown snudown-validator build_dir - -build_dir: - mkdir -p build - -# Our modified gumbo for finding security-relevant syntax issues -gumbo_snudown: build_dir - mkdir -p build/gumbo_snudown - git submodule update --recursive - @[ -f "${CURDIR}/gumbo_snudown/configure" ] || { \ - cd gumbo_snudown; \ - ./autogen.sh; \ - ./configure --prefix=$(CURDIR)/build/gumbo_snudown; \ - } - # Don't build this with AFL instrumentation, I'm assuming Google - # already ran their own fuzzer over their own parser... - $(MAKE) -C gumbo_snudown all install - -gperf_src: - cd ../src/ && gperf html_entities.gperf --output-file=html_entities.h - -# executable -snudown-validator: build_dir gumbo_snudown gperf_src - cd build && cmake .. -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-gcc - $(MAKE) -C build all - -# stuff for fuzzing -gen_testcases: - mkdir -p testing/testcases - rm -f testing/testcases/test_default_*.md - python2.7 gen_testcases.py - -afl: gen_testcases snudown-validator - @[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; } - @mkdir -p testing/afl_results - $(AFL_PATH)/afl-fuzz \ - -i testing/testcases \ - -o testing/afl_results \ - -t 100 \ - -m none \ - ./build/snudown-validator - -# housekeeping -clean: - rm -rf *.o - rm -rf build/ diff --git a/SnudownTest/fuzzing/gen_testcases.py b/SnudownTest/fuzzing/gen_testcases.py deleted file mode 100644 index f952192..0000000 --- a/SnudownTest/fuzzing/gen_testcases.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/env python - -# dump all of our testcases into a directory as separate files, like AFL -# wants. - -import os.path -import sys -import itertools - -sys.path.append("..") -import test_snudown - -cases = itertools.chain(test_snudown.cases.keys(), test_snudown.wiki_cases.keys()) -for i, md in enumerate(cases): - # skip huge testcases - if len(md) > 2048: - continue - test_path = os.path.join('testing', 'testcases', 'test_default_%d.md' % i) - with open(test_path, 'w') as f: - f.write(md) diff --git a/SnudownTest/fuzzing/snudown-validator.c b/SnudownTest/fuzzing/snudown-validator.c deleted file mode 100644 index 153e1c4..0000000 --- a/SnudownTest/fuzzing/snudown-validator.c +++ /dev/null @@ -1,226 +0,0 @@ -#include "markdown.h" -#include "html.h" -#include "buffer.h" - -#include -#include -#include -#include -#include -#include -#include - -#include - -#define READ_UNIT 1024 -#define OUTPUT_UNIT 64 - -#include "autolink.h" - -#define SNUDOWN_VERSION "1.3.2" - -enum snudown_renderer_mode { - RENDERER_USERTEXT = 0, - RENDERER_WIKI, - RENDERER_COUNT -}; - -struct snudown_renderopt { - struct html_renderopt html; - int nofollow; - const char *target; -}; - -struct snudown_renderer { - struct sd_markdown* main_renderer; - struct sd_markdown* toc_renderer; - struct module_state* state; - struct module_state* toc_state; -}; - -struct module_state { - struct sd_callbacks callbacks; - struct snudown_renderopt options; -}; - -static struct snudown_renderer sundown[RENDERER_COUNT]; - -static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL}; -static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL}; - -static struct module_state usertext_toc_state; -static struct module_state wiki_toc_state; -static struct module_state usertext_state; -static struct module_state wiki_state; - -static const unsigned int snudown_default_md_flags = - MKDEXT_NO_INTRA_EMPHASIS | - MKDEXT_SUPERSCRIPT | - MKDEXT_AUTOLINK | - MKDEXT_STRIKETHROUGH | - MKDEXT_TABLES; - -static const unsigned int snudown_default_render_flags = - HTML_SKIP_HTML | - HTML_SKIP_IMAGES | - HTML_SAFELINK | - HTML_ESCAPE | - HTML_USE_XHTML; - -static const unsigned int snudown_wiki_render_flags = - HTML_SKIP_HTML | - HTML_SAFELINK | - HTML_ALLOW_ELEMENT_WHITELIST | - HTML_ESCAPE | - HTML_USE_XHTML; - -static void -snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque) -{ - struct snudown_renderopt *options = opaque; - - if (options->nofollow) - BUFPUTSL(ob, " rel=\"nofollow\""); - - if (options->target != NULL) { - BUFPUTSL(ob, " target=\""); - bufputs(ob, options->target); - bufputc(ob, '\"'); - } -} - -static struct sd_markdown* make_custom_renderer(struct module_state* state, - const unsigned int renderflags, - const unsigned int markdownflags, - int toc_renderer) { - if(toc_renderer) { - sdhtml_toc_renderer(&state->callbacks, - (struct html_renderopt *)&state->options); - } else { - sdhtml_renderer(&state->callbacks, - (struct html_renderopt *)&state->options, - renderflags); - } - - state->options.html.link_attributes = &snudown_link_attr; - state->options.html.html_element_whitelist = html_element_whitelist; - state->options.html.html_attr_whitelist = html_attr_whitelist; - - return sd_markdown_new( - markdownflags, - 16, - 64, - &state->callbacks, - &state->options - ); -} - -void init_default_renderer() { - sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_USERTEXT].state = &usertext_state; - sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state; -} - -void init_wiki_renderer() { - sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_WIKI].state = &wiki_state; - sundown[RENDERER_WIKI].toc_state = &wiki_toc_state; -} - -void -snudown_md(struct buf *ob, const uint8_t *document, size_t doc_size, int wiki_mode) -{ - int renderer = RENDERER_USERTEXT; - int enable_toc = 0; - struct snudown_renderer _snudown; - int nofollow = 0; - char* target = NULL; - char* toc_id_prefix = NULL; - unsigned int flags; - - if (wiki_mode) - renderer = RENDERER_WIKI; - - _snudown = sundown[renderer]; - - struct snudown_renderopt *options = &(_snudown.state->options); - options->nofollow = nofollow; - options->target = target; - - flags = options->html.flags; - - if (enable_toc) { - _snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix; - sd_markdown_render(ob, document, doc_size, _snudown.toc_renderer); - _snudown.toc_state->options.html.toc_id_prefix = NULL; - - options->html.flags |= HTML_TOC; - } - - options->html.toc_id_prefix = toc_id_prefix; - - /* do the magic */ - sd_markdown_render(ob, document, doc_size, _snudown.main_renderer); - - options->html.toc_id_prefix = NULL; - options->html.flags = flags; -} -int -main(int argc, char **argv) -{ - init_default_renderer(); - init_wiki_renderer(); - - struct buf *ib, *ob; - int size_read = 0, wiki_mode = 0, i = 0, have_errors = 0; - - /* reading everything */ - ib = bufnew(READ_UNIT); - bufgrow(ib, READ_UNIT); - while ((size_read = fread(ib->data + ib->size, 1, ib->asize - ib->size, stdin)) > 0) { - ib->size += size_read; - bufgrow(ib, ib->size + READ_UNIT); - } - /* Render to a buffer, then print that out */ - ob = bufnew(OUTPUT_UNIT); - bufputs(ob, "\n"); - snudown_md(ob, ib->data, ib->size, wiki_mode); - bufputs(ob, "\n"); - - // Wiki mode explicitly allows unbalanced tags, need some way to exclude those - if (!wiki_mode) { - GumboOutput* output = gumbo_parse_with_options(&kGumboDefaultOptions, bufcstr(ob), ob->size); - - for (i=0; i < output->errors.length; ++i) { - // stupid "public" API I hacked in. - void* thing = output->errors.data[i]; - GumboErrorType type = gumbo_get_error_type(thing); - switch(type) { - case GUMBO_ERR_UTF8_INVALID: - case GUMBO_ERR_UTF8_NULL: - // Making sure the user gave us valid - // utf-8 or transforming it to valid - // utf-8 is outside the scope of snudown - continue; - default: - have_errors = 1; - printf("%s\n", GUMBO_ERROR_NAMES[type]); - printf("%s\n",gumbo_get_error_text(thing)); - printf("===============\n"); - break; - } - } - - if (have_errors) { - // gotta trigger a crash for AFL to catch it - assert(0); - } - - gumbo_destroy_output(&kGumboDefaultOptions, output); - } - bufrelease(ob); - bufrelease(ib); - return 0; -} diff --git a/SnudownTest/fuzzing/triageerrors.sh b/SnudownTest/fuzzing/triageerrors.sh deleted file mode 100644 index 0714aba..0000000 --- a/SnudownTest/fuzzing/triageerrors.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -find testing/afl_results/ -regextype posix-egrep -regex ".*/(crashes|hangs)/.*" | xargs -I '{}' ./validatemd.sh {} diff --git a/SnudownTest/fuzzing/validatemd.sh b/SnudownTest/fuzzing/validatemd.sh deleted file mode 100644 index 3df6c26..0000000 --- a/SnudownTest/fuzzing/validatemd.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -echo "** ${1}" -./build/snudown-validator < $1 diff --git a/SnudownTest/gperf.exe b/SnudownTest/gperf.exe deleted file mode 100644 index ff10d02..0000000 --- a/SnudownTest/gperf.exe +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f9266ea2d2bd19a503b5d2ec613e983c6ed9ea45ff6b5820b0681fd1b778d12 -size 103424 diff --git a/SnudownTest/houdini.h b/SnudownTest/houdini.h deleted file mode 100644 index b4954c0..0000000 --- a/SnudownTest/houdini.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef HOUDINI_H__ -#define HOUDINI_H__ - -#include "buffer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef HOUDINI_USE_LOCALE -# define _isxdigit(c) isxdigit(c) -# define _isdigit(c) isdigit(c) -#else -/* - * Helper _isdigit methods -- do not trust the current locale - * */ -# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) -# define _isdigit(c) ((c) >= '0' && (c) <= '9') -#endif - -extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure); -extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SnudownTest/houdini_href_e.c b/SnudownTest/houdini_href_e.c deleted file mode 100644 index 581df1f..0000000 --- a/SnudownTest/houdini_href_e.c +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include - -#include "houdini.h" - -#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) - -/* - * The following characters will not be escaped: - * - * -_.+!*'(),%#@?=;:/,+&$ alphanum - * - * Note that this character set is the addition of: - * - * - The characters which are safe to be in an URL - * - The characters which are *not* safe to be in - * an URL because they are RESERVED characters. - * - * We asume (lazily) that any RESERVED char that - * appears inside an URL is actually meant to - * have its native function (i.e. as an URL - * component/separator) and hence needs no escaping. - * - * There are two exceptions: the chacters & (amp) - * and ' (single quote) do not appear in the table. - * They are meant to appear in the URL as components, - * yet they require special HTML-entity escaping - * to generate valid HTML markup. - * - * All other characters will be escaped to %XX. - * - */ -static const char HREF_SAFE[] = { - 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -void -houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size) -{ - static const char hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; - char hex_str[3]; - - bufgrow(ob, ESCAPE_GROW_FACTOR(size)); - hex_str[0] = '%'; - - while (i < size) { - org = i; - /* Skip by characters that don't need special - * processing */ - while (i < size && HREF_SAFE[src[i]] == 1) - i++; - - if (i > org) - bufput(ob, src + org, i - org); - - /* escaping */ - if (i >= size) - break; - - /* throw out control characters */ - if (HREF_SAFE[src[i]] == 2) { - i++; - continue; - } - - switch (src[i]) { - /* amp appears all the time in URLs, but needs - * HTML-entity escaping to be inside an href */ - case '&': - BUFPUTSL(ob, "&"); - break; - - /* the single quote is a valid URL character - * according to the standard; it needs HTML - * entity escaping too */ - case '\'': - BUFPUTSL(ob, "'"); - break; - - /* the space can be escaped to %20 or a plus - * sign. we're going with the generic escape - * for now. the plus thing is more commonly seen - * when building GET strings */ -#if 0 - case ' ': - bufputc(ob, '+'); - break; -#endif - - /* every other character goes with a %XX escaping */ - default: - hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; - hex_str[2] = hex_chars[src[i] & 0xF]; - bufput(ob, hex_str, 3); - } - - i++; - } -} diff --git a/SnudownTest/houdini_html_e.c b/SnudownTest/houdini_html_e.c deleted file mode 100644 index 085c4bf..0000000 --- a/SnudownTest/houdini_html_e.c +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include - -#include "houdini.h" - -#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */ - -/** - * According to the OWASP rules: - * - * & --> & - * < --> < - * > --> > - * " --> " - * ' --> ' ' is not recommended - * / --> / forward slash is included as it helps end an HTML entity - * - */ -static const char HTML_ESCAPE_TABLE[] = { - 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static const char *HTML_ESCAPES[] = { - "", - """, - "&", - "'", - "/", - "<", - ">", - "", // throw out control characters -}; - -void -houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure) -{ - size_t i = 0, org, esc = 0; - - bufgrow(ob, ESCAPE_GROW_FACTOR(size)); - - while (i < size) { - org = i; - while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) - i++; - - if (i > org) - bufput(ob, src + org, i - org); - - /* escaping */ - if (i >= size) - break; - - /* The forward slash is only escaped in secure mode */ - if (src[i] == '/' && !secure) { - bufputc(ob, '/'); - } else if (HTML_ESCAPE_TABLE[src[i]] == 7) { - /* skip control characters */ - } else { - bufputs(ob, HTML_ESCAPES[esc]); - } - - i++; - } -} - -void -houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size) -{ - houdini_escape_html0(ob, src, size, 1); -} - diff --git a/SnudownTest/html.c b/SnudownTest/html.c deleted file mode 100644 index eebccc1..0000000 --- a/SnudownTest/html.c +++ /dev/null @@ -1,790 +0,0 @@ -/* - * Copyright (c) 2009, Natacha Porté - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "markdown.h" -#include "html.h" - -#include -#include -#include -#include -#include - -#include "houdini.h" - -#define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML) - -int -sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname) -{ - size_t i; - int closed = 0; - - if (tag_size < 3 || tag_data[0] != '<') - return HTML_TAG_NONE; - - i = 1; - - if (tag_data[i] == '/') { - closed = 1; - i++; - } - - for (; i < tag_size; ++i, ++tagname) { - if (*tagname == 0) - break; - - if (tag_data[i] != *tagname) - return HTML_TAG_NONE; - } - - if (i == tag_size) - return HTML_TAG_NONE; - - if (isspace(tag_data[i]) || tag_data[i] == '>') - return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN; - - return HTML_TAG_NONE; -} - -static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length) -{ - houdini_escape_html0(ob, source, length, 0); -} - -static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length) -{ - houdini_escape_href(ob, source, length); -} - -/******************** - * GENERIC RENDERER * - ********************/ -static int -rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque) -{ - struct html_renderopt *options = opaque; - uint8_t offset = 0; - - if (!link || !link->size) - return 0; - - if ((options->flags & HTML_SAFELINK) != 0 && - !sd_autolink_issafe(link->data, link->size) && - type != MKDA_EMAIL) - return 0; - - BUFPUTSL(ob, "data + offset, link->size - offset); - - if (options->link_attributes) { - bufputc(ob, '\"'); - options->link_attributes(ob, link, opaque); - bufputc(ob, '>'); - } else { - BUFPUTSL(ob, "\">"); - } - - /* - * Pretty printing: if we get an email address as - * an actual URI, e.g. `mailto:foo@bar.com`, we don't - * want to print the `mailto:` prefix - */ - if (bufprefix(link, "mailto:") == 0) { - escape_html(ob, link->data + 7, link->size - 7); - } else { - escape_html(ob, link->data, link->size); - } - - BUFPUTSL(ob, ""); - - return 1; -} - -static void -rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - - if (lang && lang->size) { - size_t i, cls; - BUFPUTSL(ob, "
size; ++i, ++cls) {
-			while (i < lang->size && isspace(lang->data[i]))
-				i++;
-
-			if (i < lang->size) {
-				size_t org = i;
-				while (i < lang->size && !isspace(lang->data[i]))
-					i++;
-
-				if (lang->data[org] == '.')
-					org++;
-
-				if (cls) bufputc(ob, ' ');
-				escape_html(ob, lang->data + org, i - org);
-			}
-		}
-
-		BUFPUTSL(ob, "\">");
-	} else
-		BUFPUTSL(ob, "
");
-
-	if (text)
-		escape_html(ob, text->data, text->size);
-
-	BUFPUTSL(ob, "
\n"); -} - -static void -rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - BUFPUTSL(ob, "
\n"); - if (text) bufput(ob, text->data, text->size); - BUFPUTSL(ob, "
\n"); -} - -static int -rndr_codespan(struct buf *ob, const struct buf *text, void *opaque) -{ - BUFPUTSL(ob, ""); - if (text) escape_html(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static int -rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) - return 0; - - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static int -rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) - return 0; - - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - - return 1; -} - -static int -rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) return 0; - BUFPUTSL(ob, ""); - if (text) bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static int -rndr_linebreak(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - bufputs(ob, USE_XHTML(options) ? "
\n" : "
\n"); - return 1; -} - -static void -rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque) -{ - struct html_renderopt *options = opaque; - - if (ob->size) - bufputc(ob, '\n'); - - if (options->flags & HTML_TOC) { - bufprintf(ob, "toc_id_prefix) { - bufputs(ob, options->toc_id_prefix); - } - bufprintf(ob, "toc_%d\">", options->toc_data.header_count++); - } else { - bufprintf(ob, "", level); - } - - if (text) bufput(ob, text->data, text->size); - bufprintf(ob, "\n", level); -} - -static int -rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque) -{ - struct html_renderopt *options = opaque; - - if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size)) - return 0; - - BUFPUTSL(ob, "size) - escape_href(ob, link->data, link->size); - - if (title && title->size) { - BUFPUTSL(ob, "\" title=\""); - escape_html(ob, title->data, title->size); - } - - if (options->link_attributes) { - bufputc(ob, '\"'); - options->link_attributes(ob, link, opaque); - bufputc(ob, '>'); - } else { - BUFPUTSL(ob, "\">"); - } - - if (content && content->size) bufput(ob, content->data, content->size); - BUFPUTSL(ob, ""); - return 1; -} - -static void -rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - bufput(ob, flags & MKD_LIST_ORDERED ? "
    \n" : "
      \n", 5); - if (text) bufput(ob, text->data, text->size); - bufput(ob, flags & MKD_LIST_ORDERED ? "
\n" : "\n", 6); -} - -static void -rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque) -{ - BUFPUTSL(ob, "
  • "); - if (text) { - size_t size = text->size; - while (size && text->data[size - 1] == '\n') - size--; - - bufput(ob, text->data, size); - } - BUFPUTSL(ob, "
  • \n"); -} - -static void -rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque) -{ - struct html_renderopt *options = opaque; - size_t i = 0; - - if (ob->size) bufputc(ob, '\n'); - - if (!text || !text->size) - return; - - while (i < text->size && isspace(text->data[i])) i++; - - if (i == text->size) - return; - - BUFPUTSL(ob, "

    "); - if (options->flags & HTML_HARD_WRAP) { - size_t org; - while (i < text->size) { - org = i; - while (i < text->size && text->data[i] != '\n') - i++; - - if (i > org) - bufput(ob, text->data + org, i - org); - - /* - * do not insert a line break if this newline - * is the last character on the paragraph - */ - if (i >= text->size - 1) - break; - - rndr_linebreak(ob, opaque); - i++; - } - } else { - bufput(ob, &text->data[i], text->size - i); - } - BUFPUTSL(ob, "

    \n"); -} - -static void -rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque) -{ - size_t org, sz; - if (!text) return; - sz = text->size; - while (sz > 0 && text->data[sz - 1] == '\n') sz--; - org = 0; - while (org < sz && text->data[org] == '\n') org++; - if (org >= sz) return; - if (ob->size) bufputc(ob, '\n'); - bufput(ob, text->data + org, sz - org); - bufputc(ob, '\n'); -} - -static int -rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) return 0; - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static void -rndr_hrule(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - if (ob->size) bufputc(ob, '\n'); - bufputs(ob, USE_XHTML(options) ? "
    \n" : "
    \n"); -} - -static int -rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque) -{ - struct html_renderopt *options = opaque; - if (!link || !link->size) return 0; - - BUFPUTSL(ob, "data, link->size); - BUFPUTSL(ob, "\" alt=\""); - - if (alt && alt->size) - escape_html(ob, alt->data, alt->size); - - if (title && title->size) { - BUFPUTSL(ob, "\" title=\""); - escape_html(ob, title->data, title->size); } - - bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">"); - return 1; -} - -static void -rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque, - char* tagname, char** whitelist, int tagtype) -{ - size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0; - struct buf *attr; - struct buf *value; - char c; - - bufputc(ob, '<'); - - if(tagtype == HTML_TAG_CLOSE) { - bufputc(ob, '/'); - bufputs(ob, tagname); - bufputc(ob, '>'); - return; - } - - bufputs(ob, tagname); - i = 1 + strlen(tagname); - - attr = bufnew(16); - value = bufnew(16); - - for(; i < text->size && !done; i++) { - c = text->data[i]; - done = 0; - reset = 0; - done_attr = 0; - - switch(c) { - case '>': - done = 1; - break; - case '\'': - case '"': - if(!seen_equals) { - reset = 1; - } else if(!in_str) { - in_str = c; - } else if(in_str == c) { - in_str = 0; - done_attr = 1; - } else { - bufputc(value, c); - } - break; - case ' ': - if (in_str) { - bufputc(value, ' '); - } else { - reset = 1; - } - break; - case '=': - if(seen_equals) { - reset = 1; - break; - } - seen_equals = 1; - break; - default: - if(seen_equals && in_str || !seen_equals) { - bufputc(seen_equals ? value : attr, c); - } - break; - } - - if(done_attr) { - int valid = 0; - for(z = 0; whitelist[z]; z++) { - if(strlen(whitelist[z]) != attr->size) { - continue; - } - for(x = 0; x < attr->size; x++) { - if(tolower(whitelist[z][x]) != tolower(attr->data[x])) { - break; - } - } - if(x == attr->size) { - valid = 1; - break; - } - } - if(valid && value->size && attr->size) { - bufputc(ob, ' '); - escape_html(ob, attr->data, attr->size); - bufputs(ob, "=\""); - escape_html(ob, value->data, value->size); - bufputc(ob, '"'); - } - reset = 1; - } - - if(reset) { - seen_equals = 0; - in_str = 0; - bufreset(attr); - bufreset(value); - } - } - - bufrelease(attr); - bufrelease(value); - - bufputc(ob, '>'); -} - -static int -rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque) -{ - struct html_renderopt *options = opaque; - char** whitelist = options->html_element_whitelist; - int i, tagtype; - - /* Items on the whitelist ignore all other flags and just output */ - if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) { - for (i = 0; whitelist[i]; i++) { - tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]); - if (tagtype != HTML_TAG_NONE) { - rndr_html_tag(ob, text, opaque, - whitelist[i], - options->html_attr_whitelist, - tagtype); - return 1; - } - } - } - - /* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES - * It doens't see if there are any valid tags, just escape all of them. */ - if((options->flags & HTML_ESCAPE) != 0) { - escape_html(ob, text->data, text->size); - return 1; - } - - if ((options->flags & HTML_SKIP_HTML) != 0) - return 1; - - if ((options->flags & HTML_SKIP_STYLE) != 0 && - sdhtml_is_tag(text->data, text->size, "style")) - return 1; - - if ((options->flags & HTML_SKIP_LINKS) != 0 && - sdhtml_is_tag(text->data, text->size, "a")) - return 1; - - if ((options->flags & HTML_SKIP_IMAGES) != 0 && - sdhtml_is_tag(text->data, text->size, "img")) - return 1; - - bufput(ob, text->data, text->size); - return 1; -} - -static void -rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - BUFPUTSL(ob, "\n"); - if (header) - bufput(ob, header->data, header->size); - BUFPUTSL(ob, "\n"); - if (body) - bufput(ob, body->data, body->size); - BUFPUTSL(ob, "
    \n"); -} - -static void -rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque) -{ - BUFPUTSL(ob, "\n"); - if (text) - bufput(ob, text->data, text->size); - BUFPUTSL(ob, "\n"); -} - -static void -rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span) -{ - if (flags & MKD_TABLE_HEADER) { - BUFPUTSL(ob, " 1) { - bufprintf(ob, " colspan=\"%d\" ", col_span); - } - - switch (flags & MKD_TABLE_ALIGNMASK) { - case MKD_TABLE_ALIGN_CENTER: - BUFPUTSL(ob, " align=\"center\">"); - break; - - case MKD_TABLE_ALIGN_L: - BUFPUTSL(ob, " align=\"left\">"); - break; - - case MKD_TABLE_ALIGN_R: - BUFPUTSL(ob, " align=\"right\">"); - break; - - default: - BUFPUTSL(ob, ">"); - } - - if (text) - bufput(ob, text->data, text->size); - - if (flags & MKD_TABLE_HEADER) { - BUFPUTSL(ob, "\n"); - } else { - BUFPUTSL(ob, "\n"); - } -} - -static int -rndr_superscript(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) return 0; - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static void -rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque) -{ - if (text) - escape_html(ob, text->data, text->size); -} - -static void -toc_header(struct buf *ob, const struct buf *text, int level, void *opaque) -{ - struct html_renderopt *options = opaque; - - /* set the level offset if this is the first header - * we're parsing for the document */ - if (options->toc_data.current_level == 0) { - BUFPUTSL(ob, "
    \n"); - options->toc_data.level_offset = level - 1; - } - level -= options->toc_data.level_offset; - - if (level > options->toc_data.current_level) { - while (level > options->toc_data.current_level) { - BUFPUTSL(ob, "
      \n
    • \n"); - options->toc_data.current_level++; - } - } else if (level < options->toc_data.current_level) { - BUFPUTSL(ob, "
    • \n"); - while (level < options->toc_data.current_level) { - BUFPUTSL(ob, "
    \n\n"); - options->toc_data.current_level--; - } - BUFPUTSL(ob,"
  • \n"); - } else { - BUFPUTSL(ob,"
  • \n
  • \n"); - } - - BUFPUTSL(ob, "toc_id_prefix) { - bufputs(ob, options->toc_id_prefix); - } - - bufprintf(ob, "toc_%d\">", options->toc_data.header_count++); - if (text) - escape_html(ob, text->data, text->size); - BUFPUTSL(ob, "\n"); -} - -static int -toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque) -{ - if (content && content->size) - bufput(ob, content->data, content->size); - return 1; -} - -static void -reset_toc(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - - memset(&(options->toc_data), 0, sizeof(options->toc_data)); -} - -static void -toc_finalize(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - bool has_toc = false; - while (options->toc_data.current_level > 0) { - BUFPUTSL(ob, "
  • \n\n"); - options->toc_data.current_level--; - has_toc = true; - } - if(has_toc) { - BUFPUTSL(ob, "
    \n"); - } - reset_toc(ob, opaque); -} - -void -sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options) -{ - static const struct sd_callbacks cb_default = { - NULL, - NULL, - NULL, - toc_header, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - - NULL, - rndr_codespan, - rndr_double_emphasis, - rndr_emphasis, - NULL, - NULL, - toc_link, - NULL, - rndr_triple_emphasis, - rndr_strikethrough, - rndr_superscript, - - NULL, - NULL, - - NULL, - toc_finalize, - }; - - memset(options, 0x0, sizeof(struct html_renderopt)); - options->flags = HTML_TOC | HTML_SKIP_HTML; - - memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks)); -} - -void -sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags) -{ - static const struct sd_callbacks cb_default = { - rndr_blockcode, - rndr_blockquote, - rndr_raw_block, - rndr_header, - rndr_hrule, - rndr_list, - rndr_listitem, - rndr_paragraph, - rndr_table, - rndr_tablerow, - rndr_tablecell, - - rndr_autolink, - rndr_codespan, - rndr_double_emphasis, - rndr_emphasis, - rndr_image, - rndr_linebreak, - rndr_link, - rndr_raw_html, - rndr_triple_emphasis, - rndr_strikethrough, - rndr_superscript, - - NULL, - rndr_normal_text, - - NULL, - reset_toc, - }; - - /* Prepare the options pointer */ - memset(options, 0x0, sizeof(struct html_renderopt)); - options->flags = render_flags; - - /* Prepare the callbacks */ - memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks)); - - if (render_flags & HTML_SKIP_IMAGES) - callbacks->image = NULL; - - if (render_flags & HTML_SKIP_LINKS) { - callbacks->link = NULL; - callbacks->autolink = NULL; - } - - if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE) - callbacks->blockhtml = NULL; -} diff --git a/SnudownTest/html.h b/SnudownTest/html.h deleted file mode 100644 index 59103b3..0000000 --- a/SnudownTest/html.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UPSKIRT_HTML_H -#define UPSKIRT_HTML_H - -#include "markdown.h" -#include "buffer.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct html_renderopt { - struct { - int header_count; - int current_level; - int level_offset; - } toc_data; - - char* toc_id_prefix; - - unsigned int flags; - - char** html_element_whitelist; - char** html_attr_whitelist; - - /* extra callbacks */ - void (*link_attributes)(struct buf *ob, const struct buf *url, void *self); -}; - -typedef enum { - HTML_SKIP_HTML = (1 << 0), - HTML_SKIP_STYLE = (1 << 1), - HTML_SKIP_IMAGES = (1 << 2), - HTML_SKIP_LINKS = (1 << 3), - HTML_EXPAND_TABS = (1 << 4), - HTML_SAFELINK = (1 << 5), - HTML_TOC = (1 << 6), - HTML_HARD_WRAP = (1 << 7), - HTML_USE_XHTML = (1 << 8), - HTML_ESCAPE = (1 << 9), - HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10), -} html_render_mode; - -typedef enum { - HTML_TAG_NONE = 0, - HTML_TAG_OPEN, - HTML_TAG_CLOSE, -} html_tag; - -int -sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname); - -extern void -sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags); - -extern void -sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr); - -extern void -sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif - diff --git a/SnudownTest/html/houdini.h b/SnudownTest/html/houdini.h deleted file mode 100644 index b4954c0..0000000 --- a/SnudownTest/html/houdini.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef HOUDINI_H__ -#define HOUDINI_H__ - -#include "buffer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef HOUDINI_USE_LOCALE -# define _isxdigit(c) isxdigit(c) -# define _isdigit(c) isdigit(c) -#else -/* - * Helper _isdigit methods -- do not trust the current locale - * */ -# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) -# define _isdigit(c) ((c) >= '0' && (c) <= '9') -#endif - -extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure); -extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SnudownTest/html/houdini_href_e.c b/SnudownTest/html/houdini_href_e.c deleted file mode 100644 index 581df1f..0000000 --- a/SnudownTest/html/houdini_href_e.c +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include - -#include "houdini.h" - -#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) - -/* - * The following characters will not be escaped: - * - * -_.+!*'(),%#@?=;:/,+&$ alphanum - * - * Note that this character set is the addition of: - * - * - The characters which are safe to be in an URL - * - The characters which are *not* safe to be in - * an URL because they are RESERVED characters. - * - * We asume (lazily) that any RESERVED char that - * appears inside an URL is actually meant to - * have its native function (i.e. as an URL - * component/separator) and hence needs no escaping. - * - * There are two exceptions: the chacters & (amp) - * and ' (single quote) do not appear in the table. - * They are meant to appear in the URL as components, - * yet they require special HTML-entity escaping - * to generate valid HTML markup. - * - * All other characters will be escaped to %XX. - * - */ -static const char HREF_SAFE[] = { - 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -void -houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size) -{ - static const char hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; - char hex_str[3]; - - bufgrow(ob, ESCAPE_GROW_FACTOR(size)); - hex_str[0] = '%'; - - while (i < size) { - org = i; - /* Skip by characters that don't need special - * processing */ - while (i < size && HREF_SAFE[src[i]] == 1) - i++; - - if (i > org) - bufput(ob, src + org, i - org); - - /* escaping */ - if (i >= size) - break; - - /* throw out control characters */ - if (HREF_SAFE[src[i]] == 2) { - i++; - continue; - } - - switch (src[i]) { - /* amp appears all the time in URLs, but needs - * HTML-entity escaping to be inside an href */ - case '&': - BUFPUTSL(ob, "&"); - break; - - /* the single quote is a valid URL character - * according to the standard; it needs HTML - * entity escaping too */ - case '\'': - BUFPUTSL(ob, "'"); - break; - - /* the space can be escaped to %20 or a plus - * sign. we're going with the generic escape - * for now. the plus thing is more commonly seen - * when building GET strings */ -#if 0 - case ' ': - bufputc(ob, '+'); - break; -#endif - - /* every other character goes with a %XX escaping */ - default: - hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; - hex_str[2] = hex_chars[src[i] & 0xF]; - bufput(ob, hex_str, 3); - } - - i++; - } -} diff --git a/SnudownTest/html/houdini_html_e.c b/SnudownTest/html/houdini_html_e.c deleted file mode 100644 index 085c4bf..0000000 --- a/SnudownTest/html/houdini_html_e.c +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include - -#include "houdini.h" - -#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */ - -/** - * According to the OWASP rules: - * - * & --> & - * < --> < - * > --> > - * " --> " - * ' --> ' ' is not recommended - * / --> / forward slash is included as it helps end an HTML entity - * - */ -static const char HTML_ESCAPE_TABLE[] = { - 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static const char *HTML_ESCAPES[] = { - "", - """, - "&", - "'", - "/", - "<", - ">", - "", // throw out control characters -}; - -void -houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure) -{ - size_t i = 0, org, esc = 0; - - bufgrow(ob, ESCAPE_GROW_FACTOR(size)); - - while (i < size) { - org = i; - while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) - i++; - - if (i > org) - bufput(ob, src + org, i - org); - - /* escaping */ - if (i >= size) - break; - - /* The forward slash is only escaped in secure mode */ - if (src[i] == '/' && !secure) { - bufputc(ob, '/'); - } else if (HTML_ESCAPE_TABLE[src[i]] == 7) { - /* skip control characters */ - } else { - bufputs(ob, HTML_ESCAPES[esc]); - } - - i++; - } -} - -void -houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size) -{ - houdini_escape_html0(ob, src, size, 1); -} - diff --git a/SnudownTest/html/html.c b/SnudownTest/html/html.c deleted file mode 100644 index eebccc1..0000000 --- a/SnudownTest/html/html.c +++ /dev/null @@ -1,790 +0,0 @@ -/* - * Copyright (c) 2009, Natacha Porté - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "markdown.h" -#include "html.h" - -#include -#include -#include -#include -#include - -#include "houdini.h" - -#define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML) - -int -sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname) -{ - size_t i; - int closed = 0; - - if (tag_size < 3 || tag_data[0] != '<') - return HTML_TAG_NONE; - - i = 1; - - if (tag_data[i] == '/') { - closed = 1; - i++; - } - - for (; i < tag_size; ++i, ++tagname) { - if (*tagname == 0) - break; - - if (tag_data[i] != *tagname) - return HTML_TAG_NONE; - } - - if (i == tag_size) - return HTML_TAG_NONE; - - if (isspace(tag_data[i]) || tag_data[i] == '>') - return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN; - - return HTML_TAG_NONE; -} - -static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length) -{ - houdini_escape_html0(ob, source, length, 0); -} - -static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length) -{ - houdini_escape_href(ob, source, length); -} - -/******************** - * GENERIC RENDERER * - ********************/ -static int -rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque) -{ - struct html_renderopt *options = opaque; - uint8_t offset = 0; - - if (!link || !link->size) - return 0; - - if ((options->flags & HTML_SAFELINK) != 0 && - !sd_autolink_issafe(link->data, link->size) && - type != MKDA_EMAIL) - return 0; - - BUFPUTSL(ob, "data + offset, link->size - offset); - - if (options->link_attributes) { - bufputc(ob, '\"'); - options->link_attributes(ob, link, opaque); - bufputc(ob, '>'); - } else { - BUFPUTSL(ob, "\">"); - } - - /* - * Pretty printing: if we get an email address as - * an actual URI, e.g. `mailto:foo@bar.com`, we don't - * want to print the `mailto:` prefix - */ - if (bufprefix(link, "mailto:") == 0) { - escape_html(ob, link->data + 7, link->size - 7); - } else { - escape_html(ob, link->data, link->size); - } - - BUFPUTSL(ob, ""); - - return 1; -} - -static void -rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - - if (lang && lang->size) { - size_t i, cls; - BUFPUTSL(ob, "
    size; ++i, ++cls) {
    -			while (i < lang->size && isspace(lang->data[i]))
    -				i++;
    -
    -			if (i < lang->size) {
    -				size_t org = i;
    -				while (i < lang->size && !isspace(lang->data[i]))
    -					i++;
    -
    -				if (lang->data[org] == '.')
    -					org++;
    -
    -				if (cls) bufputc(ob, ' ');
    -				escape_html(ob, lang->data + org, i - org);
    -			}
    -		}
    -
    -		BUFPUTSL(ob, "\">");
    -	} else
    -		BUFPUTSL(ob, "
    ");
    -
    -	if (text)
    -		escape_html(ob, text->data, text->size);
    -
    -	BUFPUTSL(ob, "
    \n"); -} - -static void -rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - BUFPUTSL(ob, "
    \n"); - if (text) bufput(ob, text->data, text->size); - BUFPUTSL(ob, "
    \n"); -} - -static int -rndr_codespan(struct buf *ob, const struct buf *text, void *opaque) -{ - BUFPUTSL(ob, ""); - if (text) escape_html(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static int -rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) - return 0; - - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static int -rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) - return 0; - - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - - return 1; -} - -static int -rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) return 0; - BUFPUTSL(ob, ""); - if (text) bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static int -rndr_linebreak(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - bufputs(ob, USE_XHTML(options) ? "
    \n" : "
    \n"); - return 1; -} - -static void -rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque) -{ - struct html_renderopt *options = opaque; - - if (ob->size) - bufputc(ob, '\n'); - - if (options->flags & HTML_TOC) { - bufprintf(ob, "toc_id_prefix) { - bufputs(ob, options->toc_id_prefix); - } - bufprintf(ob, "toc_%d\">", options->toc_data.header_count++); - } else { - bufprintf(ob, "", level); - } - - if (text) bufput(ob, text->data, text->size); - bufprintf(ob, "\n", level); -} - -static int -rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque) -{ - struct html_renderopt *options = opaque; - - if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size)) - return 0; - - BUFPUTSL(ob, "size) - escape_href(ob, link->data, link->size); - - if (title && title->size) { - BUFPUTSL(ob, "\" title=\""); - escape_html(ob, title->data, title->size); - } - - if (options->link_attributes) { - bufputc(ob, '\"'); - options->link_attributes(ob, link, opaque); - bufputc(ob, '>'); - } else { - BUFPUTSL(ob, "\">"); - } - - if (content && content->size) bufput(ob, content->data, content->size); - BUFPUTSL(ob, ""); - return 1; -} - -static void -rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - bufput(ob, flags & MKD_LIST_ORDERED ? "
      \n" : "
        \n", 5); - if (text) bufput(ob, text->data, text->size); - bufput(ob, flags & MKD_LIST_ORDERED ? "
    \n" : "\n", 6); -} - -static void -rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque) -{ - BUFPUTSL(ob, "
  • "); - if (text) { - size_t size = text->size; - while (size && text->data[size - 1] == '\n') - size--; - - bufput(ob, text->data, size); - } - BUFPUTSL(ob, "
  • \n"); -} - -static void -rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque) -{ - struct html_renderopt *options = opaque; - size_t i = 0; - - if (ob->size) bufputc(ob, '\n'); - - if (!text || !text->size) - return; - - while (i < text->size && isspace(text->data[i])) i++; - - if (i == text->size) - return; - - BUFPUTSL(ob, "

    "); - if (options->flags & HTML_HARD_WRAP) { - size_t org; - while (i < text->size) { - org = i; - while (i < text->size && text->data[i] != '\n') - i++; - - if (i > org) - bufput(ob, text->data + org, i - org); - - /* - * do not insert a line break if this newline - * is the last character on the paragraph - */ - if (i >= text->size - 1) - break; - - rndr_linebreak(ob, opaque); - i++; - } - } else { - bufput(ob, &text->data[i], text->size - i); - } - BUFPUTSL(ob, "

    \n"); -} - -static void -rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque) -{ - size_t org, sz; - if (!text) return; - sz = text->size; - while (sz > 0 && text->data[sz - 1] == '\n') sz--; - org = 0; - while (org < sz && text->data[org] == '\n') org++; - if (org >= sz) return; - if (ob->size) bufputc(ob, '\n'); - bufput(ob, text->data + org, sz - org); - bufputc(ob, '\n'); -} - -static int -rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) return 0; - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static void -rndr_hrule(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - if (ob->size) bufputc(ob, '\n'); - bufputs(ob, USE_XHTML(options) ? "
    \n" : "
    \n"); -} - -static int -rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque) -{ - struct html_renderopt *options = opaque; - if (!link || !link->size) return 0; - - BUFPUTSL(ob, "data, link->size); - BUFPUTSL(ob, "\" alt=\""); - - if (alt && alt->size) - escape_html(ob, alt->data, alt->size); - - if (title && title->size) { - BUFPUTSL(ob, "\" title=\""); - escape_html(ob, title->data, title->size); } - - bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">"); - return 1; -} - -static void -rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque, - char* tagname, char** whitelist, int tagtype) -{ - size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0; - struct buf *attr; - struct buf *value; - char c; - - bufputc(ob, '<'); - - if(tagtype == HTML_TAG_CLOSE) { - bufputc(ob, '/'); - bufputs(ob, tagname); - bufputc(ob, '>'); - return; - } - - bufputs(ob, tagname); - i = 1 + strlen(tagname); - - attr = bufnew(16); - value = bufnew(16); - - for(; i < text->size && !done; i++) { - c = text->data[i]; - done = 0; - reset = 0; - done_attr = 0; - - switch(c) { - case '>': - done = 1; - break; - case '\'': - case '"': - if(!seen_equals) { - reset = 1; - } else if(!in_str) { - in_str = c; - } else if(in_str == c) { - in_str = 0; - done_attr = 1; - } else { - bufputc(value, c); - } - break; - case ' ': - if (in_str) { - bufputc(value, ' '); - } else { - reset = 1; - } - break; - case '=': - if(seen_equals) { - reset = 1; - break; - } - seen_equals = 1; - break; - default: - if(seen_equals && in_str || !seen_equals) { - bufputc(seen_equals ? value : attr, c); - } - break; - } - - if(done_attr) { - int valid = 0; - for(z = 0; whitelist[z]; z++) { - if(strlen(whitelist[z]) != attr->size) { - continue; - } - for(x = 0; x < attr->size; x++) { - if(tolower(whitelist[z][x]) != tolower(attr->data[x])) { - break; - } - } - if(x == attr->size) { - valid = 1; - break; - } - } - if(valid && value->size && attr->size) { - bufputc(ob, ' '); - escape_html(ob, attr->data, attr->size); - bufputs(ob, "=\""); - escape_html(ob, value->data, value->size); - bufputc(ob, '"'); - } - reset = 1; - } - - if(reset) { - seen_equals = 0; - in_str = 0; - bufreset(attr); - bufreset(value); - } - } - - bufrelease(attr); - bufrelease(value); - - bufputc(ob, '>'); -} - -static int -rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque) -{ - struct html_renderopt *options = opaque; - char** whitelist = options->html_element_whitelist; - int i, tagtype; - - /* Items on the whitelist ignore all other flags and just output */ - if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) { - for (i = 0; whitelist[i]; i++) { - tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]); - if (tagtype != HTML_TAG_NONE) { - rndr_html_tag(ob, text, opaque, - whitelist[i], - options->html_attr_whitelist, - tagtype); - return 1; - } - } - } - - /* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES - * It doens't see if there are any valid tags, just escape all of them. */ - if((options->flags & HTML_ESCAPE) != 0) { - escape_html(ob, text->data, text->size); - return 1; - } - - if ((options->flags & HTML_SKIP_HTML) != 0) - return 1; - - if ((options->flags & HTML_SKIP_STYLE) != 0 && - sdhtml_is_tag(text->data, text->size, "style")) - return 1; - - if ((options->flags & HTML_SKIP_LINKS) != 0 && - sdhtml_is_tag(text->data, text->size, "a")) - return 1; - - if ((options->flags & HTML_SKIP_IMAGES) != 0 && - sdhtml_is_tag(text->data, text->size, "img")) - return 1; - - bufput(ob, text->data, text->size); - return 1; -} - -static void -rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque) -{ - if (ob->size) bufputc(ob, '\n'); - BUFPUTSL(ob, "\n"); - if (header) - bufput(ob, header->data, header->size); - BUFPUTSL(ob, "\n"); - if (body) - bufput(ob, body->data, body->size); - BUFPUTSL(ob, "
    \n"); -} - -static void -rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque) -{ - BUFPUTSL(ob, "\n"); - if (text) - bufput(ob, text->data, text->size); - BUFPUTSL(ob, "\n"); -} - -static void -rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span) -{ - if (flags & MKD_TABLE_HEADER) { - BUFPUTSL(ob, " 1) { - bufprintf(ob, " colspan=\"%d\" ", col_span); - } - - switch (flags & MKD_TABLE_ALIGNMASK) { - case MKD_TABLE_ALIGN_CENTER: - BUFPUTSL(ob, " align=\"center\">"); - break; - - case MKD_TABLE_ALIGN_L: - BUFPUTSL(ob, " align=\"left\">"); - break; - - case MKD_TABLE_ALIGN_R: - BUFPUTSL(ob, " align=\"right\">"); - break; - - default: - BUFPUTSL(ob, ">"); - } - - if (text) - bufput(ob, text->data, text->size); - - if (flags & MKD_TABLE_HEADER) { - BUFPUTSL(ob, "\n"); - } else { - BUFPUTSL(ob, "\n"); - } -} - -static int -rndr_superscript(struct buf *ob, const struct buf *text, void *opaque) -{ - if (!text || !text->size) return 0; - BUFPUTSL(ob, ""); - bufput(ob, text->data, text->size); - BUFPUTSL(ob, ""); - return 1; -} - -static void -rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque) -{ - if (text) - escape_html(ob, text->data, text->size); -} - -static void -toc_header(struct buf *ob, const struct buf *text, int level, void *opaque) -{ - struct html_renderopt *options = opaque; - - /* set the level offset if this is the first header - * we're parsing for the document */ - if (options->toc_data.current_level == 0) { - BUFPUTSL(ob, "
    \n"); - options->toc_data.level_offset = level - 1; - } - level -= options->toc_data.level_offset; - - if (level > options->toc_data.current_level) { - while (level > options->toc_data.current_level) { - BUFPUTSL(ob, "
      \n
    • \n"); - options->toc_data.current_level++; - } - } else if (level < options->toc_data.current_level) { - BUFPUTSL(ob, "
    • \n"); - while (level < options->toc_data.current_level) { - BUFPUTSL(ob, "
    \n\n"); - options->toc_data.current_level--; - } - BUFPUTSL(ob,"
  • \n"); - } else { - BUFPUTSL(ob,"
  • \n
  • \n"); - } - - BUFPUTSL(ob, "toc_id_prefix) { - bufputs(ob, options->toc_id_prefix); - } - - bufprintf(ob, "toc_%d\">", options->toc_data.header_count++); - if (text) - escape_html(ob, text->data, text->size); - BUFPUTSL(ob, "\n"); -} - -static int -toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque) -{ - if (content && content->size) - bufput(ob, content->data, content->size); - return 1; -} - -static void -reset_toc(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - - memset(&(options->toc_data), 0, sizeof(options->toc_data)); -} - -static void -toc_finalize(struct buf *ob, void *opaque) -{ - struct html_renderopt *options = opaque; - bool has_toc = false; - while (options->toc_data.current_level > 0) { - BUFPUTSL(ob, "
  • \n\n"); - options->toc_data.current_level--; - has_toc = true; - } - if(has_toc) { - BUFPUTSL(ob, "
    \n"); - } - reset_toc(ob, opaque); -} - -void -sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options) -{ - static const struct sd_callbacks cb_default = { - NULL, - NULL, - NULL, - toc_header, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - - NULL, - rndr_codespan, - rndr_double_emphasis, - rndr_emphasis, - NULL, - NULL, - toc_link, - NULL, - rndr_triple_emphasis, - rndr_strikethrough, - rndr_superscript, - - NULL, - NULL, - - NULL, - toc_finalize, - }; - - memset(options, 0x0, sizeof(struct html_renderopt)); - options->flags = HTML_TOC | HTML_SKIP_HTML; - - memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks)); -} - -void -sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags) -{ - static const struct sd_callbacks cb_default = { - rndr_blockcode, - rndr_blockquote, - rndr_raw_block, - rndr_header, - rndr_hrule, - rndr_list, - rndr_listitem, - rndr_paragraph, - rndr_table, - rndr_tablerow, - rndr_tablecell, - - rndr_autolink, - rndr_codespan, - rndr_double_emphasis, - rndr_emphasis, - rndr_image, - rndr_linebreak, - rndr_link, - rndr_raw_html, - rndr_triple_emphasis, - rndr_strikethrough, - rndr_superscript, - - NULL, - rndr_normal_text, - - NULL, - reset_toc, - }; - - /* Prepare the options pointer */ - memset(options, 0x0, sizeof(struct html_renderopt)); - options->flags = render_flags; - - /* Prepare the callbacks */ - memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks)); - - if (render_flags & HTML_SKIP_IMAGES) - callbacks->image = NULL; - - if (render_flags & HTML_SKIP_LINKS) { - callbacks->link = NULL; - callbacks->autolink = NULL; - } - - if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE) - callbacks->blockhtml = NULL; -} diff --git a/SnudownTest/html/html.h b/SnudownTest/html/html.h deleted file mode 100644 index 59103b3..0000000 --- a/SnudownTest/html/html.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UPSKIRT_HTML_H -#define UPSKIRT_HTML_H - -#include "markdown.h" -#include "buffer.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct html_renderopt { - struct { - int header_count; - int current_level; - int level_offset; - } toc_data; - - char* toc_id_prefix; - - unsigned int flags; - - char** html_element_whitelist; - char** html_attr_whitelist; - - /* extra callbacks */ - void (*link_attributes)(struct buf *ob, const struct buf *url, void *self); -}; - -typedef enum { - HTML_SKIP_HTML = (1 << 0), - HTML_SKIP_STYLE = (1 << 1), - HTML_SKIP_IMAGES = (1 << 2), - HTML_SKIP_LINKS = (1 << 3), - HTML_EXPAND_TABS = (1 << 4), - HTML_SAFELINK = (1 << 5), - HTML_TOC = (1 << 6), - HTML_HARD_WRAP = (1 << 7), - HTML_USE_XHTML = (1 << 8), - HTML_ESCAPE = (1 << 9), - HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10), -} html_render_mode; - -typedef enum { - HTML_TAG_NONE = 0, - HTML_TAG_OPEN, - HTML_TAG_CLOSE, -} html_tag; - -int -sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname); - -extern void -sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags); - -extern void -sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr); - -extern void -sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif - diff --git a/SnudownTest/html/html_smartypants.c b/SnudownTest/html/html_smartypants.c deleted file mode 100644 index 4db8f02..0000000 --- a/SnudownTest/html/html_smartypants.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "buffer.h" -#include "html.h" - -#include -#include -#include -#include - -#if defined(_WIN32) -#define snprintf _snprintf -#endif - -struct smartypants_data { - int in_squote; - int in_dquote; -}; - -static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); - -static size_t (*smartypants_cb_ptrs[]) - (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) = -{ - NULL, /* 0 */ - smartypants_cb__dash, /* 1 */ - smartypants_cb__parens, /* 2 */ - smartypants_cb__squote, /* 3 */ - smartypants_cb__dquote, /* 4 */ - smartypants_cb__amp, /* 5 */ - smartypants_cb__period, /* 6 */ - smartypants_cb__number, /* 7 */ - smartypants_cb__ltag, /* 8 */ - smartypants_cb__backtick, /* 9 */ - smartypants_cb__escape, /* 10 */ -}; - -static const uint8_t smartypants_cb_chars[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0, - 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static inline int -word_boundary(uint8_t c) -{ - return c == 0 || isspace(c) || ispunct(c); -} - -static int -smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open) -{ - char ent[8]; - - if (*is_open && !word_boundary(next_char)) - return 0; - - if (!(*is_open) && !word_boundary(previous_char)) - return 0; - - snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote); - *is_open = !(*is_open); - bufputs(ob, ent); - return 1; -} - -static size_t -smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 2) { - uint8_t t1 = tolower(text[1]); - - if (t1 == '\'') { - if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) - return 1; - } - - if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && - (size == 3 || word_boundary(text[2]))) { - BUFPUTSL(ob, "’"); - return 0; - } - - if (size >= 3) { - uint8_t t2 = tolower(text[2]); - - if (((t1 == 'r' && t2 == 'e') || - (t1 == 'l' && t2 == 'l') || - (t1 == 'v' && t2 == 'e')) && - (size == 4 || word_boundary(text[3]))) { - BUFPUTSL(ob, "’"); - return 0; - } - } - } - - if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote)) - return 0; - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 3) { - uint8_t t1 = tolower(text[1]); - uint8_t t2 = tolower(text[2]); - - if (t1 == 'c' && t2 == ')') { - BUFPUTSL(ob, "©"); - return 2; - } - - if (t1 == 'r' && t2 == ')') { - BUFPUTSL(ob, "®"); - return 2; - } - - if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') { - BUFPUTSL(ob, "™"); - return 3; - } - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 3 && text[1] == '-' && text[2] == '-') { - BUFPUTSL(ob, "—"); - return 2; - } - - if (size >= 2 && text[1] == '-') { - BUFPUTSL(ob, "–"); - return 1; - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 6 && memcmp(text, """, 6) == 0) { - if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote)) - return 5; - } - - if (size >= 4 && memcmp(text, "�", 4) == 0) - return 3; - - bufputc(ob, '&'); - return 0; -} - -static size_t -smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 3 && text[1] == '.' && text[2] == '.') { - BUFPUTSL(ob, "…"); - return 2; - } - - if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') { - BUFPUTSL(ob, "…"); - return 4; - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 2 && text[1] == '`') { - if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) - return 1; - } - - return 0; -} - -static size_t -smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (word_boundary(previous_char) && size >= 3) { - if (text[0] == '1' && text[1] == '/' && text[2] == '2') { - if (size == 3 || word_boundary(text[3])) { - BUFPUTSL(ob, "½"); - return 2; - } - } - - if (text[0] == '1' && text[1] == '/' && text[2] == '4') { - if (size == 3 || word_boundary(text[3]) || - (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) { - BUFPUTSL(ob, "¼"); - return 2; - } - } - - if (text[0] == '3' && text[1] == '/' && text[2] == '4') { - if (size == 3 || word_boundary(text[3]) || - (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) { - BUFPUTSL(ob, "¾"); - return 2; - } - } - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote)) - BUFPUTSL(ob, """); - - return 0; -} - -static size_t -smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - static const char *skip_tags[] = { - "pre", "code", "var", "samp", "kbd", "math", "script", "style" - }; - static const size_t skip_tags_count = 8; - - size_t tag, i = 0; - - while (i < size && text[i] != '>') - i++; - - for (tag = 0; tag < skip_tags_count; ++tag) { - if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN) - break; - } - - if (tag < skip_tags_count) { - for (;;) { - while (i < size && text[i] != '<') - i++; - - if (i == size) - break; - - if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE) - break; - - i++; - } - - while (i < size && text[i] != '>') - i++; - } - - bufput(ob, text, i + 1); - return i; -} - -static size_t -smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size < 2) - return 0; - - switch (text[1]) { - case '\\': - case '"': - case '\'': - case '.': - case '-': - case '`': - bufputc(ob, text[1]); - return 1; - - default: - bufputc(ob, '\\'); - return 0; - } -} - -#if 0 -static struct { - uint8_t c0; - const uint8_t *pattern; - const uint8_t *entity; - int skip; -} smartypants_subs[] = { - { '\'', "'s>", "’", 0 }, - { '\'', "'t>", "’", 0 }, - { '\'', "'re>", "’", 0 }, - { '\'', "'ll>", "’", 0 }, - { '\'', "'ve>", "’", 0 }, - { '\'', "'m>", "’", 0 }, - { '\'', "'d>", "’", 0 }, - { '-', "--", "—", 1 }, - { '-', "<->", "–", 0 }, - { '.', "...", "…", 2 }, - { '.', ". . .", "…", 4 }, - { '(', "(c)", "©", 2 }, - { '(', "(r)", "®", 2 }, - { '(', "(tm)", "™", 3 }, - { '3', "<3/4>", "¾", 2 }, - { '3', "<3/4ths>", "¾", 2 }, - { '1', "<1/2>", "½", 2 }, - { '1', "<1/4>", "¼", 2 }, - { '1', "<1/4th>", "¼", 2 }, - { '&', "�", 0, 3 }, -}; -#endif - -void -sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size) -{ - size_t i; - struct smartypants_data smrt = {0, 0}; - - if (!text) - return; - - bufgrow(ob, size); - - for (i = 0; i < size; ++i) { - size_t org; - uint8_t action = 0; - - org = i; - while (i < size && (action = smartypants_cb_chars[text[i]]) == 0) - i++; - - if (i > org) - bufput(ob, text + org, i - org); - - if (i < size) { - i += smartypants_cb_ptrs[(int)action] - (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i); - } - } -} - - diff --git a/SnudownTest/html_block_names.txt b/SnudownTest/html_block_names.txt deleted file mode 100644 index a41d7d1..0000000 --- a/SnudownTest/html_block_names.txt +++ /dev/null @@ -1,25 +0,0 @@ -## -p -dl -h1 -h2 -h3 -h4 -h5 -h6 -ol -ul -del -div -ins -pre -form -math -table -figure -iframe -script -style -fieldset -noscript -blockquote diff --git a/SnudownTest/html_blocks.h b/SnudownTest/html_blocks.h deleted file mode 100644 index 09a758f..0000000 --- a/SnudownTest/html_blocks.h +++ /dev/null @@ -1,206 +0,0 @@ -/* C code produced by gperf version 3.0.3 */ -/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */ -/* Computed positions: -k'1-2' */ - -#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ - && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ - && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ - && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ - && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ - && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ - && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ - && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ - && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ - && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ - && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ - && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ - && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ - && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ - && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ - && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ - && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ - && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ - && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ - && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ - && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ - && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ - && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) -/* The character set is not based on ISO-646. */ -error "gperf generated tables don't work with this execution character set. Please report a bug to ." -#endif - -/* maximum key range = 37, duplicates = 0 */ - -#ifndef GPERF_DOWNCASE -#define GPERF_DOWNCASE 1 -static unsigned char gperf_downcase[256] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, - 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, - 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255 - }; -#endif - -#ifndef GPERF_CASE_STRNCMP -#define GPERF_CASE_STRNCMP 1 -static int -gperf_case_strncmp (s1, s2, n) - register const char *s1; - register const char *s2; - register unsigned int n; -{ - for (; n > 0;) - { - unsigned char c1 = gperf_downcase[(unsigned char)*s1++]; - unsigned char c2 = gperf_downcase[(unsigned char)*s2++]; - if (c1 != 0 && c1 == c2) - { - n--; - continue; - } - return (int)c1 - (int)c2; - } - return 0; -} -#endif - -#ifdef __GNUC__ -__inline -#else -#ifdef __cplusplus -inline -#endif -#endif -static unsigned int -hash_block_tag (str, len) - register const char *str; - register unsigned int len; -{ - static const unsigned char asso_values[] = - { - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 8, 30, 25, 20, 15, 10, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 0, 38, 0, 38, - 5, 5, 5, 15, 0, 38, 38, 0, 15, 10, - 0, 38, 38, 15, 0, 5, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 0, 38, - 0, 38, 5, 5, 5, 15, 0, 38, 38, 0, - 15, 10, 0, 38, 38, 15, 0, 5, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38 - }; - register int hval = len; - - switch (hval) - { - default: - hval += asso_values[(unsigned char)str[1]+1]; - /*FALLTHROUGH*/ - case 1: - hval += asso_values[(unsigned char)str[0]]; - break; - } - return hval; -} - -#ifdef __GNUC__ -__inline -#ifdef __GNUC_STDC_INLINE__ -__attribute__ ((__gnu_inline__)) -#endif -#endif -const char * -find_block_tag (str, len) - register const char *str; - register unsigned int len; -{ - enum - { - TOTAL_KEYWORDS = 24, - MIN_WORD_LENGTH = 1, - MAX_WORD_LENGTH = 10, - MIN_HASH_VALUE = 1, - MAX_HASH_VALUE = 37 - }; - - static const char * const wordlist[] = - { - "", - "p", - "dl", - "div", - "math", - "table", - "", - "ul", - "del", - "form", - "blockquote", - "figure", - "ol", - "fieldset", - "", - "h1", - "", - "h6", - "pre", - "", "", - "script", - "h5", - "noscript", - "", - "style", - "iframe", - "h4", - "ins", - "", "", "", - "h3", - "", "", "", "", - "h2" - }; - - if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) - { - register int key = hash_block_tag (str, len); - - if (key <= MAX_HASH_VALUE && key >= 0) - { - register const char *s = wordlist[key]; - - if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') - return s; - } - } - return 0; -} diff --git a/SnudownTest/html_entities.gperf b/SnudownTest/html_entities.gperf deleted file mode 100644 index f94e3c9..0000000 --- a/SnudownTest/html_entities.gperf +++ /dev/null @@ -1,292 +0,0 @@ -%language=ANSI-C -%define lookup-function-name is_allowed_named_entity -%compare-strncmp -%readonly-tables -%define hash-function-name hash_html_entity -%enum -%includes -%{ -#include - -/* Parsers tend to choke on entities with values greater than this */ -const u_int32_t MAX_NUM_ENTITY_VAL = 0x10ffff; -/* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL - * used to avoid dealing with overflows. */ -const size_t MAX_NUM_ENTITY_LEN = 7; - -inline int is_valid_numeric_entity(uint32_t entity_val) -{ - /* Some XML parsers will choke on entities with certain - * values (mostly control characters.) - * - * According to lxml these are all problematic: - * - * [xrange(0, 8), - * xrange(11, 12), - * xrange(14, 31), - * xrange(55296, 57343), - * xrange(65534, 65535)] - */ - return (entity_val > 8 - && (entity_val != 11 && entity_val != 12) - && (entity_val < 14 || entity_val > 31) - && (entity_val < 55296 || entity_val > 57343) - && (entity_val != 65534 && entity_val != 65535) - && entity_val <= MAX_NUM_ENTITY_VAL); -} - -%} -%% -Æ -Á - -À -Α -Å -à -Ä -Β -Ç -Χ -‡ -Δ -Ð -É -Ê -È -Ε -Η -Ë -Γ -Í -Î -Ì -Ι -Ï -Κ -Λ -Μ -Ñ -Ν -Œ -Ó -Ô -Ò -Ω -Ο -Ø -Õ -Ö -Φ -Π -″ -Ψ -Ρ -Š -Σ -Þ -Τ -Θ -Ú -Û -Ù -Υ -Ü -Ξ -Ý -Ÿ -Ζ -á -â -´ -æ -à -ℵ -α -& -∧ -∠ -' -å -≈ -ã -ä -„ -β -¦ -• -∩ -ç -¸ -¢ -χ -ˆ -♣ -≅ -© -↵ -∪ -¤ -⇓ -† -↓ -° -δ -♦ -÷ -é -ê -è -∅ -  -  -ε -≡ -η -ð -ë -€ -∃ -ƒ -∀ -½ -¼ -¾ -⁄ -γ -≥ -> -⇔ -↔ -♥ -… -í -î -¡ -ì -ℑ -∞ -∫ -ι -¿ -∈ -ï -κ -⇐ -λ -⟨ -« -← -⌈ -“ -≤ -⌊ -∗ -◊ -‎ -‹ -‘ -< -¯ -— -µ -· -− -μ -∇ -  -– -≠ -∋ -¬ -∉ -⊄ -ñ -ν -ó -ô -œ -ò -‾ -ω -ο -⊕ -∨ -ª -º -ø -õ -⊗ -ö -¶ -∂ -‰ -⊥ -φ -π -ϖ -± -£ -′ -∏ -∝ -ψ -" -⇒ -√ -⟩ -» -→ -⌉ -” -ℜ -® -⌋ -ρ -‏ -› -’ -‚ -š -⋅ -§ -­ -σ -ς -∼ -♠ -⊂ -⊆ -∑ -¹ -² -³ -⊃ -⊇ -ß -τ -∴ -θ -ϑ -  -þ -˜ -× -™ -⇑ -ú -↑ -û -ù -¨ -ϒ -υ -ü -℘ -ξ -ý -¥ -ÿ -ζ -‍ -‌ diff --git a/SnudownTest/html_smartypants.c b/SnudownTest/html_smartypants.c deleted file mode 100644 index 4db8f02..0000000 --- a/SnudownTest/html_smartypants.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "buffer.h" -#include "html.h" - -#include -#include -#include -#include - -#if defined(_WIN32) -#define snprintf _snprintf -#endif - -struct smartypants_data { - int in_squote; - int in_dquote; -}; - -static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); -static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); - -static size_t (*smartypants_cb_ptrs[]) - (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) = -{ - NULL, /* 0 */ - smartypants_cb__dash, /* 1 */ - smartypants_cb__parens, /* 2 */ - smartypants_cb__squote, /* 3 */ - smartypants_cb__dquote, /* 4 */ - smartypants_cb__amp, /* 5 */ - smartypants_cb__period, /* 6 */ - smartypants_cb__number, /* 7 */ - smartypants_cb__ltag, /* 8 */ - smartypants_cb__backtick, /* 9 */ - smartypants_cb__escape, /* 10 */ -}; - -static const uint8_t smartypants_cb_chars[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0, - 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static inline int -word_boundary(uint8_t c) -{ - return c == 0 || isspace(c) || ispunct(c); -} - -static int -smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open) -{ - char ent[8]; - - if (*is_open && !word_boundary(next_char)) - return 0; - - if (!(*is_open) && !word_boundary(previous_char)) - return 0; - - snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote); - *is_open = !(*is_open); - bufputs(ob, ent); - return 1; -} - -static size_t -smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 2) { - uint8_t t1 = tolower(text[1]); - - if (t1 == '\'') { - if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) - return 1; - } - - if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && - (size == 3 || word_boundary(text[2]))) { - BUFPUTSL(ob, "’"); - return 0; - } - - if (size >= 3) { - uint8_t t2 = tolower(text[2]); - - if (((t1 == 'r' && t2 == 'e') || - (t1 == 'l' && t2 == 'l') || - (t1 == 'v' && t2 == 'e')) && - (size == 4 || word_boundary(text[3]))) { - BUFPUTSL(ob, "’"); - return 0; - } - } - } - - if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote)) - return 0; - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 3) { - uint8_t t1 = tolower(text[1]); - uint8_t t2 = tolower(text[2]); - - if (t1 == 'c' && t2 == ')') { - BUFPUTSL(ob, "©"); - return 2; - } - - if (t1 == 'r' && t2 == ')') { - BUFPUTSL(ob, "®"); - return 2; - } - - if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') { - BUFPUTSL(ob, "™"); - return 3; - } - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 3 && text[1] == '-' && text[2] == '-') { - BUFPUTSL(ob, "—"); - return 2; - } - - if (size >= 2 && text[1] == '-') { - BUFPUTSL(ob, "–"); - return 1; - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 6 && memcmp(text, """, 6) == 0) { - if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote)) - return 5; - } - - if (size >= 4 && memcmp(text, "�", 4) == 0) - return 3; - - bufputc(ob, '&'); - return 0; -} - -static size_t -smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 3 && text[1] == '.' && text[2] == '.') { - BUFPUTSL(ob, "…"); - return 2; - } - - if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') { - BUFPUTSL(ob, "…"); - return 4; - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size >= 2 && text[1] == '`') { - if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) - return 1; - } - - return 0; -} - -static size_t -smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (word_boundary(previous_char) && size >= 3) { - if (text[0] == '1' && text[1] == '/' && text[2] == '2') { - if (size == 3 || word_boundary(text[3])) { - BUFPUTSL(ob, "½"); - return 2; - } - } - - if (text[0] == '1' && text[1] == '/' && text[2] == '4') { - if (size == 3 || word_boundary(text[3]) || - (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) { - BUFPUTSL(ob, "¼"); - return 2; - } - } - - if (text[0] == '3' && text[1] == '/' && text[2] == '4') { - if (size == 3 || word_boundary(text[3]) || - (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) { - BUFPUTSL(ob, "¾"); - return 2; - } - } - } - - bufputc(ob, text[0]); - return 0; -} - -static size_t -smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote)) - BUFPUTSL(ob, """); - - return 0; -} - -static size_t -smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - static const char *skip_tags[] = { - "pre", "code", "var", "samp", "kbd", "math", "script", "style" - }; - static const size_t skip_tags_count = 8; - - size_t tag, i = 0; - - while (i < size && text[i] != '>') - i++; - - for (tag = 0; tag < skip_tags_count; ++tag) { - if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN) - break; - } - - if (tag < skip_tags_count) { - for (;;) { - while (i < size && text[i] != '<') - i++; - - if (i == size) - break; - - if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE) - break; - - i++; - } - - while (i < size && text[i] != '>') - i++; - } - - bufput(ob, text, i + 1); - return i; -} - -static size_t -smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) -{ - if (size < 2) - return 0; - - switch (text[1]) { - case '\\': - case '"': - case '\'': - case '.': - case '-': - case '`': - bufputc(ob, text[1]); - return 1; - - default: - bufputc(ob, '\\'); - return 0; - } -} - -#if 0 -static struct { - uint8_t c0; - const uint8_t *pattern; - const uint8_t *entity; - int skip; -} smartypants_subs[] = { - { '\'', "'s>", "’", 0 }, - { '\'', "'t>", "’", 0 }, - { '\'', "'re>", "’", 0 }, - { '\'', "'ll>", "’", 0 }, - { '\'', "'ve>", "’", 0 }, - { '\'', "'m>", "’", 0 }, - { '\'', "'d>", "’", 0 }, - { '-', "--", "—", 1 }, - { '-', "<->", "–", 0 }, - { '.', "...", "…", 2 }, - { '.', ". . .", "…", 4 }, - { '(', "(c)", "©", 2 }, - { '(', "(r)", "®", 2 }, - { '(', "(tm)", "™", 3 }, - { '3', "<3/4>", "¾", 2 }, - { '3', "<3/4ths>", "¾", 2 }, - { '1', "<1/2>", "½", 2 }, - { '1', "<1/4>", "¼", 2 }, - { '1', "<1/4th>", "¼", 2 }, - { '&', "�", 0, 3 }, -}; -#endif - -void -sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size) -{ - size_t i; - struct smartypants_data smrt = {0, 0}; - - if (!text) - return; - - bufgrow(ob, size); - - for (i = 0; i < size; ++i) { - size_t org; - uint8_t action = 0; - - org = i; - while (i < size && (action = smartypants_cb_chars[text[i]]) == 0) - i++; - - if (i > org) - bufput(ob, text + org, i - org); - - if (i < size) { - i += smartypants_cb_ptrs[(int)action] - (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i); - } - } -} - - diff --git a/SnudownTest/markdown.c b/SnudownTest/markdown.c deleted file mode 100644 index abe4a1d..0000000 --- a/SnudownTest/markdown.c +++ /dev/null @@ -1,2661 +0,0 @@ -/* markdown.c - generic markdown parser */ - -/* - * Copyright (c) 2009, Natacha Porté - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "markdown.h" -#include "stack.h" - -#include -#include -#include -#include - -#if defined(_WIN32) -#define strncasecmp _strnicmp -#endif - -#define REF_TABLE_SIZE 8 - -#define BUFFER_BLOCK 0 -#define BUFFER_SPAN 1 - -#define MKD_LI_END 8 /* internal list flag */ - -#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n) -#define GPERF_DOWNCASE 1 -#define GPERF_CASE_STRNCMP 1 -#include "html_blocks.h" -#include "html_entities.h" - -/*************** - * LOCAL TYPES * - ***************/ - -/* link_ref: reference to a link */ -struct link_ref { - unsigned int id; - - struct buf *link; - struct buf *title; - - struct link_ref *next; -}; - -/* char_trigger: function pointer to render active chars */ -/* returns the number of chars taken care of */ -/* data is the pointer of the beginning of the span */ -/* offset is the number of valid chars before data */ -struct sd_markdown; -typedef size_t -(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); - -static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); - -enum markdown_char_t { - MD_CHAR_NONE = 0, - MD_CHAR_EMPHASIS, - MD_CHAR_CODESPAN, - MD_CHAR_LINEBREAK, - MD_CHAR_LINK, - MD_CHAR_LANGLE, - MD_CHAR_ESCAPE, - MD_CHAR_ENTITITY, - MD_CHAR_AUTOLINK_URL, - MD_CHAR_AUTOLINK_EMAIL, - MD_CHAR_AUTOLINK_WWW, - MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME, - MD_CHAR_SUPERSCRIPT, -}; - -static char_trigger markdown_char_ptrs[] = { - NULL, - &char_emphasis, - &char_codespan, - &char_linebreak, - &char_link, - &char_langle_tag, - &char_escape, - &char_entity, - &char_autolink_url, - &char_autolink_email, - &char_autolink_www, - &char_autolink_subreddit_or_username, - &char_superscript, -}; - -/* render • structure containing one particular render */ -struct sd_markdown { - struct sd_callbacks cb; - void *opaque; - - struct link_ref *refs[REF_TABLE_SIZE]; - uint8_t active_char[256]; - struct stack work_bufs[2]; - unsigned int ext_flags; - size_t max_nesting; - size_t max_table_cols; - int in_link_body; -}; - -/*************************** - * HELPER FUNCTIONS * - ***************************/ - -static inline struct buf * -rndr_newbuf(struct sd_markdown *rndr, int type) -{ - static const size_t buf_size[2] = {256, 64}; - struct buf *work = NULL; - struct stack *pool = &rndr->work_bufs[type]; - - if (pool->size < pool->asize && - pool->item[pool->size] != NULL) { - work = pool->item[pool->size++]; - work->size = 0; - } else { - work = bufnew(buf_size[type]); - stack_push(pool, work); - } - - return work; -} - -static inline void -rndr_popbuf(struct sd_markdown *rndr, int type) -{ - rndr->work_bufs[type].size--; -} - -static void -unscape_text(struct buf *ob, struct buf *src) -{ - size_t i = 0, org; - while (i < src->size) { - org = i; - while (i < src->size && src->data[i] != '\\') - i++; - - if (i > org) - bufput(ob, src->data + org, i - org); - - if (i + 1 >= src->size) - break; - - bufputc(ob, src->data[i + 1]); - i += 2; - } -} - -static unsigned int -hash_link_ref(const uint8_t *link_ref, size_t length) -{ - size_t i; - unsigned int hash = 0; - - for (i = 0; i < length; ++i) - hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash; - - return hash; -} - -static struct link_ref * -add_link_ref( - struct link_ref **references, - const uint8_t *name, size_t name_size) -{ - struct link_ref *ref = calloc(1, sizeof(struct link_ref)); - - if (!ref) - return NULL; - - ref->id = hash_link_ref(name, name_size); - ref->next = references[ref->id % REF_TABLE_SIZE]; - - references[ref->id % REF_TABLE_SIZE] = ref; - return ref; -} - -static struct link_ref * -find_link_ref(struct link_ref **references, uint8_t *name, size_t length) -{ - unsigned int hash = hash_link_ref(name, length); - struct link_ref *ref = NULL; - - ref = references[hash % REF_TABLE_SIZE]; - - while (ref != NULL) { - if (ref->id == hash) - return ref; - - ref = ref->next; - } - - return NULL; -} - -static void -free_link_refs(struct link_ref **references) -{ - size_t i; - - for (i = 0; i < REF_TABLE_SIZE; ++i) { - struct link_ref *r = references[i]; - struct link_ref *next; - - while (r) { - next = r->next; - bufrelease(r->link); - bufrelease(r->title); - free(r); - r = next; - } - } -} - -/* - * Check whether a char is a Markdown space. - - * Right now we only consider spaces the actual - * space and a newline: tabs and carriage returns - * are filtered out during the preprocessing phase. - * - * If we wanted to actually be UTF-8 compliant, we - * should instead extract an Unicode codepoint from - * this character and check for space properties. - */ -static inline int -_isspace(int c) -{ - return c == ' ' || c == '\n'; -} - -/**************************** - * INLINE PARSING FUNCTIONS * - ****************************/ - -/* is_mail_autolink • looks for the address part of a mail autolink and '>' */ -/* this is less strict than the original markdown e-mail address matching */ -static size_t -is_mail_autolink(uint8_t *data, size_t size) -{ - size_t i = 0, nb = 0; - - /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ - for (i = 0; i < size; ++i) { - if (isalnum(data[i])) - continue; - - switch (data[i]) { - case '@': - nb++; - - case '-': - case '.': - case '_': - break; - - case '>': - return (nb == 1) ? i + 1 : 0; - - default: - return 0; - } - } - - return 0; -} - -/* tag_length • returns the length of the given tag, or 0 is it's not valid */ -static size_t -tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink) -{ - size_t i, j; - - /* a valid tag can't be shorter than 3 chars */ - if (size < 3) return 0; - - /* begins with a '<' optionally followed by '/', followed by letter or number */ - if (data[0] != '<') return 0; - i = (data[1] == '/') ? 2 : 1; - - if (!isalnum(data[i])) - return 0; - - /* scheme test */ - *autolink = MKDA_NOT_AUTOLINK; - - /* try to find the beginning of an URI */ - while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) - i++; - - if (i > 1 && data[i] == '@') { - if ((j = is_mail_autolink(data + i, size - i)) != 0) { - *autolink = MKDA_EMAIL; - return i + j; - } - } - - if (i > 2 && data[i] == ':') { - *autolink = MKDA_NORMAL; - i++; - } - - /* completing autolink test: no whitespace or ' or " */ - if (i >= size) - *autolink = MKDA_NOT_AUTOLINK; - - else if (*autolink) { - j = i; - - while (i < size) { - if (data[i] == '\\') i += 2; - else if (data[i] == '>' || data[i] == '\'' || - data[i] == '"' || data[i] == ' ' || data[i] == '\n') - break; - else i++; - } - - if (i >= size) return 0; - if (i > j && data[i] == '>') return i + 1; - /* one of the forbidden chars has been found */ - *autolink = MKDA_NOT_AUTOLINK; - } - - /* looking for sometinhg looking like a tag end */ - while (i < size && data[i] != '>') i++; - if (i >= size) return 0; - return i + 1; -} - -/* parse_inline • parses inline markdown elements */ -static void -parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t i = 0, end = 0, last_special = 0; - uint8_t action = 0; - struct buf work = { 0, 0, 0, 0 }; - - if (rndr->work_bufs[BUFFER_SPAN].size + - rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) - return; - - while (i < size) { - /* copying inactive chars into the output */ - while (end < size && (action = rndr->active_char[data[end]]) == 0) { - end++; - } - - if (rndr->cb.normal_text) { - work.data = data + i; - work.size = end - i; - rndr->cb.normal_text(ob, &work, rndr->opaque); - } - else - bufput(ob, data + i, end - i); - - if (end >= size) break; - i = end; - - end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i - last_special, i, size - i); - if (!end) /* no action from the callback */ - end = i + 1; - else { - i += end; - last_special = end = i; - } - } -} - -/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ -static size_t -find_emph_char(uint8_t *data, size_t size, uint8_t c) -{ - size_t i = 1; - - while (i < size) { - while (i < size && data[i] != c && data[i] != '`' && data[i] != '[') - i++; - - if (i == size) - return 0; - - if (data[i] == c) - return i; - - /* not counting escaped chars */ - if (i && data[i - 1] == '\\') { - i++; continue; - } - - if (data[i] == '`') { - size_t span_nb = 0, bt; - size_t tmp_i = 0; - - /* counting the number of opening backticks */ - while (i < size && data[i] == '`') { - i++; span_nb++; - } - - if (i >= size) return 0; - - /* finding the matching closing sequence */ - bt = 0; - while (i < size && bt < span_nb) { - if (!tmp_i && data[i] == c) tmp_i = i; - if (data[i] == '`') bt++; - else bt = 0; - i++; - } - - if (i >= size) return tmp_i; - } - /* skipping a link */ - else if (data[i] == '[') { - size_t tmp_i = 0; - uint8_t cc; - - i++; - while (i < size && data[i] != ']') { - if (!tmp_i && data[i] == c) tmp_i = i; - i++; - } - - i++; - while (i < size && (data[i] == ' ' || data[i] == '\n')) - i++; - - if (i >= size) - return tmp_i; - - switch (data[i]) { - case '[': - cc = ']'; break; - - case '(': - cc = ')'; break; - - default: - if (tmp_i) - return tmp_i; - else - continue; - } - - i++; - while (i < size && data[i] != cc) { - if (!tmp_i && data[i] == c) tmp_i = i; - i++; - } - - if (i >= size) - return tmp_i; - - i++; - } - } - - return 0; -} - -/* parse_emph1 • parsing single emphase */ -/* closed by a symbol not preceded by whitespace and not followed by symbol */ -static size_t -parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) -{ - size_t i = 0, len; - struct buf *work = 0; - int r; - - if (!rndr->cb.emphasis) return 0; - - /* skipping one symbol if coming from emph3 */ - if (size > 1 && data[0] == c && data[1] == c) i = 1; - - while (i < size) { - len = find_emph_char(data + i, size - i, c); - if (!len) return 0; - i += len; - if (i >= size) return 0; - - if (data[i] == c && !_isspace(data[i - 1])) { - if ((rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) && (c == '_')) { - if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1]))) - continue; - } - - work = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(work, rndr, data, i); - r = rndr->cb.emphasis(ob, work, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - return r ? i + 1 : 0; - } - } - - return 0; -} - -/* parse_emph2 • parsing single emphase */ -static size_t -parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) -{ - int (*render_method)(struct buf *ob, const struct buf *text, void *opaque); - size_t i = 0, len; - struct buf *work = 0; - int r; - - render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis; - - if (!render_method) - return 0; - - while (i < size) { - len = find_emph_char(data + i, size - i, c); - if (!len) return 0; - i += len; - - if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) { - work = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(work, rndr, data, i); - r = render_method(ob, work, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - return r ? i + 2 : 0; - } - i++; - } - return 0; -} - -/* parse_emph3 • parsing single emphase */ -/* finds the first closing tag, and delegates to the other emph */ -static size_t -parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) -{ - size_t i = 0, len; - int r; - - while (i < size) { - len = find_emph_char(data + i, size - i, c); - if (!len) return 0; - i += len; - - /* skip whitespace preceded symbols */ - if (data[i] != c || _isspace(data[i - 1])) - continue; - - if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) { - /* triple symbol found */ - struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); - - parse_inline(work, rndr, data, i); - r = rndr->cb.triple_emphasis(ob, work, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - return r ? i + 3 : 0; - - } else if (i + 1 < size && data[i + 1] == c) { - /* double symbol found, handing over to emph1 */ - len = parse_emph1(ob, rndr, data - 2, size + 2, c); - if (!len) return 0; - else return len - 2; - - } else { - /* single symbol found, handing over to emph2 */ - len = parse_emph2(ob, rndr, data - 1, size + 1, c); - if (!len) return 0; - else return len - 1; - } - } - return 0; -} - -/* char_emphasis • single and double emphasis parsing */ -static size_t -char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - uint8_t c = data[0]; - size_t ret; - - if (size > 2 && data[1] != c) { - /* whitespace cannot follow an opening emphasis; - * strikethrough only takes two characters '~~' */ - if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0) - return 0; - - return ret + 1; - } - - if (size > 3 && data[1] == c && data[2] != c) { - if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0) - return 0; - - return ret + 2; - } - - if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { - if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0) - return 0; - - return ret + 3; - } - - return 0; -} - - -/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */ -static size_t -char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - if (max_rewind < 2 || data[-1] != ' ' || data[-2] != ' ') - return 0; - - /* removing the last space from ob and rendering */ - while (ob->size && ob->data[ob->size - 1] == ' ') - ob->size--; - - return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0; -} - - -/* char_codespan • '`' parsing a code span (assuming codespan != 0) */ -static size_t -char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - size_t end, nb = 0, i, f_begin, f_end; - - /* counting the number of backticks in the delimiter */ - while (nb < size && data[nb] == '`') - nb++; - - /* finding the next delimiter */ - i = 0; - for (end = nb; end < size && i < nb; end++) { - if (data[end] == '`') i++; - else i = 0; - } - - if (i < nb && end >= size) - return 0; /* no matching delimiter */ - - /* trimming outside whitespaces */ - f_begin = nb; - while (f_begin < end && data[f_begin] == ' ') - f_begin++; - - f_end = end - nb; - while (f_end > nb && data[f_end-1] == ' ') - f_end--; - - /* real code span */ - if (f_begin < f_end) { - struct buf work = { data + f_begin, f_end - f_begin, 0, 0 }; - if (!rndr->cb.codespan(ob, &work, rndr->opaque)) - end = 0; - } else { - if (!rndr->cb.codespan(ob, 0, rndr->opaque)) - end = 0; - } - - return end; -} - - -/* char_escape • '\\' backslash escape */ -static size_t -char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>/^~"; - struct buf work = { 0, 0, 0, 0 }; - - if (size > 1) { - if (strchr(escape_chars, data[1]) == NULL) - return 0; - - if (rndr->cb.normal_text) { - work.data = data + 1; - work.size = 1; - rndr->cb.normal_text(ob, &work, rndr->opaque); - } - else bufputc(ob, data[1]); - } else if (size == 1) { - bufputc(ob, data[0]); - } - - return 2; -} - -/* char_entity • '&' escaped when it doesn't belong to an entity */ -static size_t -char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - size_t end = 1; - size_t content_start; - size_t content_end; - struct buf work = { 0, 0, 0, 0 }; - int numeric = 0; - int hex = 0; - int entity_base; - uint32_t entity_val; - - if (end < size && data[end] == '#') { - numeric = 1; - end++; - } - - if (end < size && numeric && tolower(data[end]) == 'x') { - hex = 1; - end++; - } - - content_start = end; - - while (end < size) { - const char c = data[end]; - if (hex) { - if (!isxdigit(c)) break; - } else if (numeric) { - if (!isdigit(c)) break; - } else if (!isalnum(c)) { - break; - } - end++; - } - - content_end = end; - - if (end > content_start && end < size && data[end] == ';') - end++; /* well-formed entity */ - else - return 0; /* not an entity */ - - /* way too long to be a valid numeric entity */ - if (numeric && content_end - content_start > MAX_NUM_ENTITY_LEN) - return 0; - - /* Validate the entity's contents */ - if (numeric) { - if (hex) - entity_base = 16; - else - entity_base = 10; - - // This is ok because it'll stop once it hits the ';' - entity_val = strtol((char*)data + content_start, NULL, entity_base); - if (!is_valid_numeric_entity(entity_val)) - return 0; - } else { - if (!is_allowed_named_entity((const char *)data, end)) - return 0; - } - - if (rndr->cb.entity) { - work.data = data; - work.size = end; - rndr->cb.entity(ob, &work, rndr->opaque); - } else { - /* Necessary so we can normalize `>` to `>` */ - bufputc(ob, '&'); - if (numeric) - bufputc(ob, '#'); - if (hex) - bufputc(ob, 'x'); - bufput(ob, data + content_start, end - content_start); - } - - return end; -} - -/* char_langle_tag • '<' when tags or autolinks are allowed */ -static size_t -char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - enum mkd_autolink altype = MKDA_NOT_AUTOLINK; - size_t end = tag_length(data, size, &altype); - struct buf work = { data, end, 0, 0 }; - int ret = 0; - - if (end > 2) { - if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) { - struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN); - work.data = data + 1; - work.size = end - 2; - unscape_text(u_link, &work); - ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - } - else if (rndr->cb.raw_html_tag) - ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque); - } - - if (!ret) return 0; - else return end; -} - -static size_t -char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link, *link_url, *link_text; - size_t link_len, rewind; - - if (!rndr->cb.link || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - if ((link_len = sd_autolink__www(&rewind, link, data, max_rewind, size, 0)) > 0) { - link_url = rndr_newbuf(rndr, BUFFER_SPAN); - BUFPUTSL(link_url, "http://"); - bufput(link_url, link->data, link->size); - - buftruncate(ob, ob->size - rewind); - if (rndr->cb.normal_text) { - link_text = rndr_newbuf(rndr, BUFFER_SPAN); - rndr->cb.normal_text(link_text, link, rndr->opaque); - rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - } else { - rndr->cb.link(ob, link_url, NULL, link, rndr->opaque); - } - rndr_popbuf(rndr, BUFFER_SPAN); - } - - rndr_popbuf(rndr, BUFFER_SPAN); - return link_len; -} - -static size_t -char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link, *link_text, *link_url; - size_t link_len, rewind; - int no_slash; - - if (!rndr->cb.autolink || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - link_len = sd_autolink__subreddit(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash); - if (link_len == 0) - link_len = sd_autolink__username(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash); - - /* Found either a user or subreddit link */ - if (link_len > 0) { - link_url = rndr_newbuf(rndr, BUFFER_SPAN); - if (no_slash) - bufputc(link_url, '/'); - bufput(link_url, link->data, link->size); - - buftruncate(ob, ob->size - rewind); - if (rndr->cb.normal_text) { - link_text = rndr_newbuf(rndr, BUFFER_SPAN); - rndr->cb.normal_text(link_text, link, rndr->opaque); - rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - } else { - rndr->cb.link(ob, link_url, NULL, link, rndr->opaque); - } - rndr_popbuf(rndr, BUFFER_SPAN); - } - rndr_popbuf(rndr, BUFFER_SPAN); - - return link_len; -} - -static size_t -char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link; - size_t link_len, rewind; - - if (!rndr->cb.autolink || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - if ((link_len = sd_autolink__email(&rewind, link, data, max_rewind, size, 0)) > 0) { - buftruncate(ob, ob->size - rewind); - rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque); - } - - rndr_popbuf(rndr, BUFFER_SPAN); - return link_len; -} - -static size_t -char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link; - size_t link_len, rewind; - - if (!rndr->cb.autolink || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - if ((link_len = sd_autolink__url(&rewind, link, data, max_rewind, size, 0)) > 0) { - buftruncate(ob, ob->size - rewind); - rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque); - } - - rndr_popbuf(rndr, BUFFER_SPAN); - return link_len; -} - -/* char_link • '[': parsing a link or an image */ -static size_t -char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - int is_img = (max_rewind && data[-1] == '!'), level; - size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; - struct buf *content = 0; - struct buf *link = 0; - struct buf *title = 0; - struct buf *u_link = 0; - size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size; - int text_has_nl = 0, ret = 0; - int in_title = 0, qtype = 0; - - /* checking whether the correct renderer exists */ - if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link)) - goto cleanup; - - /* looking for the matching closing bracket */ - for (level = 1; i < size; i++) { - if (data[i] == '\n') - text_has_nl = 1; - - else if (data[i - 1] == '\\') - continue; - - else if (data[i] == '[') - level++; - - else if (data[i] == ']') { - level--; - if (level <= 0) - break; - } - } - - if (i >= size) - goto cleanup; - - txt_e = i; - i++; - - /* skip any amount of whitespace or newline */ - /* (this is much more laxist than original markdown syntax) */ - while (i < size && _isspace(data[i])) - i++; - - /* inline style link */ - if (i < size && data[i] == '(') { - /* skipping initial whitespace */ - i++; - - while (i < size && _isspace(data[i])) - i++; - - link_b = i; - - /* looking for link end: ' " ) */ - while (i < size) { - if (data[i] == '\\') i += 2; - else if (data[i] == ')') break; - else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break; - else i++; - } - - if (i >= size) goto cleanup; - link_e = i; - - /* looking for title end if present */ - if (data[i] == '\'' || data[i] == '"') { - qtype = data[i]; - in_title = 1; - i++; - title_b = i; - - while (i < size) { - if (data[i] == '\\') i += 2; - else if (data[i] == qtype) {in_title = 0; i++;} - else if ((data[i] == ')') && !in_title) break; - else i++; - } - - if (i >= size) goto cleanup; - - /* skipping whitespaces after title */ - title_e = i - 1; - while (title_e > title_b && _isspace(data[title_e])) - title_e--; - - /* checking for closing quote presence */ - if (data[title_e] != '\'' && data[title_e] != '"') { - title_b = title_e = 0; - link_e = i; - } - } - - /* remove whitespace at the end of the link */ - while (link_e > link_b && _isspace(data[link_e - 1])) - link_e--; - - /* remove optional angle brackets around the link */ - if (data[link_b] == '<') link_b++; - if (data[link_e - 1] == '>') link_e--; - - /* building escaped link and title */ - if (link_e > link_b) { - link = rndr_newbuf(rndr, BUFFER_SPAN); - bufput(link, data + link_b, link_e - link_b); - } - - if (title_e > title_b) { - title = rndr_newbuf(rndr, BUFFER_SPAN); - bufput(title, data + title_b, title_e - title_b); - } - - i++; - } - - /* reference style link */ - else if (i < size && data[i] == '[') { - struct buf id = { 0, 0, 0, 0 }; - struct link_ref *lr; - - /* looking for the id */ - i++; - link_b = i; - while (i < size && data[i] != ']') i++; - if (i >= size) goto cleanup; - link_e = i; - - /* finding the link_ref */ - if (link_b == link_e) { - if (text_has_nl) { - struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); - size_t j; - - for (j = 1; j < txt_e; j++) { - if (data[j] != '\n') - bufputc(b, data[j]); - else if (data[j - 1] != ' ') - bufputc(b, ' '); - } - - id.data = b->data; - id.size = b->size; - } else { - id.data = data + 1; - id.size = txt_e - 1; - } - } else { - id.data = data + link_b; - id.size = link_e - link_b; - } - - lr = find_link_ref(rndr->refs, id.data, id.size); - if (!lr) - goto cleanup; - - /* keeping link and title from link_ref */ - link = lr->link; - title = lr->title; - i++; - } - - /* shortcut reference style link */ - else { - struct buf id = { 0, 0, 0, 0 }; - struct link_ref *lr; - - /* crafting the id */ - if (text_has_nl) { - struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); - size_t j; - - for (j = 1; j < txt_e; j++) { - if (data[j] != '\n') - bufputc(b, data[j]); - else if (data[j - 1] != ' ') - bufputc(b, ' '); - } - - id.data = b->data; - id.size = b->size; - } else { - id.data = data + 1; - id.size = txt_e - 1; - } - - /* finding the link_ref */ - lr = find_link_ref(rndr->refs, id.data, id.size); - if (!lr) - goto cleanup; - - /* keeping link and title from link_ref */ - link = lr->link; - title = lr->title; - - /* rewinding the whitespace */ - i = txt_e + 1; - } - - /* building content: img alt is escaped, link content is parsed */ - if (txt_e > 1) { - content = rndr_newbuf(rndr, BUFFER_SPAN); - if (is_img) { - bufput(content, data + 1, txt_e - 1); - } else { - /* disable autolinking when parsing inline the - * content of a link */ - rndr->in_link_body = 1; - parse_inline(content, rndr, data + 1, txt_e - 1); - rndr->in_link_body = 0; - } - } - - if (link) { - u_link = rndr_newbuf(rndr, BUFFER_SPAN); - unscape_text(u_link, link); - } else { - goto cleanup; - } - - /* calling the relevant rendering function */ - if (is_img) { - if (ob->size && ob->data[ob->size - 1] == '!') - ob->size -= 1; - - ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque); - } else { - ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque); - } - - /* cleanup */ -cleanup: - rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size; - return ret ? i : 0; -} - -static size_t -char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - size_t sup_start, sup_len; - struct buf *sup; - - if (!rndr->cb.superscript) - return 0; - - if (size < 2) - return 0; - - if (data[1] == '(') { - sup_start = sup_len = 2; - - while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\') - sup_len++; - - if (sup_len == size) - return 0; - } else { - sup_start = sup_len = 1; - - while (sup_len < size && !_isspace(data[sup_len])) - sup_len++; - } - - if (sup_len - sup_start == 0) - return (sup_start == 2) ? 3 : 0; - - sup = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(sup, rndr, data + sup_start, sup_len - sup_start); - rndr->cb.superscript(ob, sup, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - - return (sup_start == 2) ? sup_len + 1 : sup_len; -} - -/********************************* - * BLOCK-LEVEL PARSING FUNCTIONS * - *********************************/ - -/* is_empty • returns the line length when it is empty, 0 otherwise */ -static size_t -is_empty(uint8_t *data, size_t size) -{ - size_t i; - - for (i = 0; i < size && data[i] != '\n'; i++) - if (data[i] != ' ') - return 0; - - return i + 1; -} - -/* is_hrule • returns whether a line is a horizontal rule */ -static int -is_hrule(uint8_t *data, size_t size) -{ - size_t i = 0, n = 0; - uint8_t c; - - /* skipping initial spaces */ - if (size < 3) return 0; - if (data[0] == ' ') { i++; - if (data[1] == ' ') { i++; - if (data[2] == ' ') { i++; } } } - - /* looking at the hrule uint8_t */ - if (i + 2 >= size - || (data[i] != '*' && data[i] != '-' && data[i] != '_')) - return 0; - c = data[i]; - - /* the whole line must be the char or whitespace */ - while (i < size && data[i] != '\n') { - if (data[i] == c) n++; - else if (data[i] != ' ') - return 0; - - i++; - } - - return n >= 3; -} - -/* check if a line begins with a code fence; return the - * width of the code fence */ -static size_t -prefix_codefence(uint8_t *data, size_t size) -{ - size_t i = 0, n = 0; - uint8_t c; - - /* skipping initial spaces */ - if (size < 3) return 0; - if (data[0] == ' ') { i++; - if (data[1] == ' ') { i++; - if (data[2] == ' ') { i++; } } } - - /* looking at the hrule uint8_t */ - if (i + 2 >= size || !(data[i] == '~' || data[i] == '`')) - return 0; - - c = data[i]; - - /* the whole line must be the uint8_t or whitespace */ - while (i < size && data[i] == c) { - n++; i++; - } - - if (n < 3) - return 0; - - return i; -} - -/* check if a line is a code fence; return its size if it is */ -static size_t -is_codefence(uint8_t *data, size_t size, struct buf *syntax) -{ - size_t i = 0, syn_len = 0; - uint8_t *syn_start; - - i = prefix_codefence(data, size); - if (i == 0) - return 0; - - while (i < size && data[i] == ' ') - i++; - - syn_start = data + i; - - if (i < size && data[i] == '{') { - i++; syn_start++; - - while (i < size && data[i] != '}' && data[i] != '\n') { - syn_len++; i++; - } - - if (i == size || data[i] != '}') - return 0; - - /* strip all whitespace at the beginning and the end - * of the {} block */ - while (syn_len > 0 && _isspace(syn_start[0])) { - syn_start++; syn_len--; - } - - while (syn_len > 0 && _isspace(syn_start[syn_len - 1])) - syn_len--; - - i++; - } else { - while (i < size && !_isspace(data[i])) { - syn_len++; i++; - } - } - - if (syntax) { - syntax->data = syn_start; - syntax->size = syn_len; - } - - while (i < size && data[i] != '\n') { - if (!_isspace(data[i])) - return 0; - - i++; - } - - return i + 1; -} - -/* is_atxheader • returns whether the line is a hash-prefixed header */ -static int -is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - if (data[0] != '#') - return 0; - - if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) { - size_t level = 0; - - while (level < size && level < 6 && data[level] == '#') - level++; - - if (level < size && data[level] != ' ') - return 0; - } - - return 1; -} - -/* is_headerline • returns whether the line is a setext-style hdr underline */ -static int -is_headerline(uint8_t *data, size_t size) -{ - size_t i = 0; - - /* test of level 1 header */ - if (data[i] == '=') { - for (i = 1; i < size && data[i] == '='; i++); - while (i < size && data[i] == ' ') i++; - return (i >= size || data[i] == '\n') ? 1 : 0; } - - /* test of level 2 header */ - if (data[i] == '-') { - for (i = 1; i < size && data[i] == '-'; i++); - while (i < size && data[i] == ' ') i++; - return (i >= size || data[i] == '\n') ? 2 : 0; } - - return 0; -} - -static int -is_next_headerline(uint8_t *data, size_t size) -{ - size_t i = 0; - - while (i < size && data[i] != '\n') - i++; - - if (++i >= size) - return 0; - - return is_headerline(data + i, size - i); -} - -/* prefix_quote • returns blockquote prefix length */ -static size_t -prefix_quote(uint8_t *data, size_t size) -{ - size_t i = 0; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - - if (i < size && data[i] == '>') { - if (i + 1 < size && data[i + 1] == ' ') - return i + 2; - - return i + 1; - } - - return 0; -} - -/* prefix_code • returns prefix length for block code*/ -static size_t -prefix_code(uint8_t *data, size_t size) -{ - if (size > 3 && data[0] == ' ' && data[1] == ' ' - && data[2] == ' ' && data[3] == ' ') return 4; - - return 0; -} - -/* prefix_oli • returns ordered list item prefix */ -static size_t -prefix_oli(uint8_t *data, size_t size) -{ - size_t i = 0; - - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - - if (i >= size || data[i] < '0' || data[i] > '9') - return 0; - - while (i < size && data[i] >= '0' && data[i] <= '9') - i++; - - if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') - return 0; - - if (is_next_headerline(data + i, size - i)) - return 0; - - return i + 2; -} - -/* prefix_uli • returns ordered list item prefix */ -static size_t -prefix_uli(uint8_t *data, size_t size) -{ - size_t i = 0; - - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - - if (i + 1 >= size || - (data[i] != '*' && data[i] != '+' && data[i] != '-') || - data[i + 1] != ' ') - return 0; - - if (is_next_headerline(data + i, size - i)) - return 0; - - return i + 2; -} - - -/* parse_block • parsing of one block, returning next uint8_t to parse */ -static void parse_block(struct buf *ob, struct sd_markdown *rndr, - uint8_t *data, size_t size); - - -/* parse_blockquote • handles parsing of a blockquote fragment */ -static size_t -parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end = 0, pre, work_size = 0; - uint8_t *work_data = 0; - struct buf *out = 0; - - out = rndr_newbuf(rndr, BUFFER_BLOCK); - beg = 0; - while (beg < size) { - for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); - - pre = prefix_quote(data + beg, end - beg); - - if (pre) - beg += pre; /* skipping prefix */ - - /* empty line followed by non-quote line */ - else if (is_empty(data + beg, end - beg) && - (end >= size || (prefix_quote(data + end, size - end) == 0 && - !is_empty(data + end, size - end)))) - break; - - if (beg < end) { /* copy into the in-place working buffer */ - /* bufput(work, data + beg, end - beg); */ - if (!work_data) - work_data = data + beg; - else if (data + beg != work_data + work_size) - memmove(work_data + work_size, data + beg, end - beg); - work_size += end - beg; - } - beg = end; - } - - parse_block(out, rndr, work_data, work_size); - if (rndr->cb.blockquote) - rndr->cb.blockquote(ob, out, rndr->opaque); - rndr_popbuf(rndr, BUFFER_BLOCK); - return end; -} - -static size_t -parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render); - -/* parse_blockquote • handles parsing of a regular paragraph */ -static size_t -parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t i = 0, end = 0; - int level = 0; - struct buf work = { data, 0, 0, 0 }; - - while (i < size) { - for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; - - if (prefix_quote(data + i, end - i) != 0) { - end = i; - break; - } - - if (is_empty(data + i, size - i)) - break; - - if ((level = is_headerline(data + i, size - i)) != 0) - break; - - if (is_atxheader(rndr, data + i, size - i) || - is_hrule(data + i, size - i) || - prefix_quote(data + i, size - i)) { - end = i; - break; - } - - /* - * Early termination of a paragraph with the same logic - * as Markdown 1.0.0. If this logic is applied, the - * Markdown 1.0.3 test suite won't pass cleanly - * - * :: If the first character in a new line is not a letter, - * let's check to see if there's some kind of block starting - * here - */ - if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) { - if (prefix_oli(data + i, size - i) || - prefix_uli(data + i, size - i)) { - end = i; - break; - } - - /* see if an html block starts here */ - if (data[i] == '<' && rndr->cb.blockhtml && - parse_htmlblock(ob, rndr, data + i, size - i, 0)) { - end = i; - break; - } - - /* see if a code fence starts here */ - if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && - is_codefence(data + i, size - i, NULL) != 0) { - end = i; - break; - } - } - - i = end; - } - - work.size = i; - while (work.size && data[work.size - 1] == '\n') - work.size--; - - if (!level) { - struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); - parse_inline(tmp, rndr, work.data, work.size); - if (rndr->cb.paragraph) - rndr->cb.paragraph(ob, tmp, rndr->opaque); - rndr_popbuf(rndr, BUFFER_BLOCK); - } else { - struct buf *header_work; - - if (work.size) { - size_t beg; - i = work.size; - work.size -= 1; - - while (work.size && data[work.size] != '\n') - work.size -= 1; - - beg = work.size + 1; - while (work.size && data[work.size - 1] == '\n') - work.size -= 1; - - if (work.size > 0) { - struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); - parse_inline(tmp, rndr, work.data, work.size); - - if (rndr->cb.paragraph) - rndr->cb.paragraph(ob, tmp, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_BLOCK); - work.data += beg; - work.size = i - beg; - } - else work.size = i; - } - - header_work = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(header_work, rndr, work.data, work.size); - - if (rndr->cb.header) - rndr->cb.header(ob, header_work, (int)level, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); - } - - return end; -} - -/* parse_fencedcode • handles parsing of a block-level code fragment */ -static size_t -parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end; - struct buf *work = 0; - struct buf lang = { 0, 0, 0, 0 }; - - beg = is_codefence(data, size, &lang); - if (beg == 0) return 0; - - work = rndr_newbuf(rndr, BUFFER_BLOCK); - - while (beg < size) { - size_t fence_end; - struct buf fence_trail = { 0, 0, 0, 0 }; - - fence_end = is_codefence(data + beg, size - beg, &fence_trail); - if (fence_end != 0 && fence_trail.size == 0) { - beg += fence_end; - break; - } - - for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); - - if (beg < end) { - /* verbatim copy to the working buffer, - escaping entities */ - if (is_empty(data + beg, end - beg)) - bufputc(work, '\n'); - else bufput(work, data + beg, end - beg); - } - beg = end; - } - - if (work->size && work->data[work->size - 1] != '\n') - bufputc(work, '\n'); - - if (rndr->cb.blockcode) - rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_BLOCK); - return beg; -} - -static size_t -parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end, pre; - struct buf *work = 0; - - work = rndr_newbuf(rndr, BUFFER_BLOCK); - - beg = 0; - while (beg < size) { - for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {}; - pre = prefix_code(data + beg, end - beg); - - if (pre) - beg += pre; /* skipping prefix */ - else if (!is_empty(data + beg, end - beg)) - /* non-empty non-prefixed line breaks the pre */ - break; - - if (beg < end) { - /* verbatim copy to the working buffer, - escaping entities */ - if (is_empty(data + beg, end - beg)) - bufputc(work, '\n'); - else bufput(work, data + beg, end - beg); - } - beg = end; - } - - while (work->size && work->data[work->size - 1] == '\n') - work->size -= 1; - - bufputc(work, '\n'); - - if (rndr->cb.blockcode) - rndr->cb.blockcode(ob, work, NULL, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_BLOCK); - return beg; -} - -/* parse_listitem • parsing of a single list item */ -/* assuming initial prefix is already removed */ -static size_t -parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags) -{ - struct buf *work = 0, *inter = 0; - size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i; - int in_empty = 0, has_inside_empty = 0, in_fence = 0; - - /* keeping track of the first indentation prefix */ - while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') - orgpre++; - - beg = prefix_uli(data, size); - if (!beg) - beg = prefix_oli(data, size); - - if (!beg) - return 0; - - /* skipping to the beginning of the following line */ - end = beg; - while (end < size && data[end - 1] != '\n') - end++; - - /* getting working buffers */ - work = rndr_newbuf(rndr, BUFFER_SPAN); - inter = rndr_newbuf(rndr, BUFFER_SPAN); - - /* putting the first line into the working buffer */ - bufput(work, data + beg, end - beg); - beg = end; - - /* process the following lines */ - while (beg < size) { - size_t has_next_uli = 0, has_next_oli = 0; - - end++; - - while (end < size && data[end - 1] != '\n') - end++; - - /* process an empty line */ - if (is_empty(data + beg, end - beg)) { - in_empty = 1; - beg = end; - continue; - } - - /* calculating the indentation */ - i = 0; - while (i < 4 && beg + i < end && data[beg + i] == ' ') - i++; - - pre = i; - - if (rndr->ext_flags & MKDEXT_FENCED_CODE) { - if (is_codefence(data + beg + i, end - beg - i, NULL) != 0) - in_fence = !in_fence; - } - - /* Only check for new list items if we are **not** inside - * a fenced code block */ - if (!in_fence) { - has_next_uli = prefix_uli(data + beg + i, end - beg - i); - has_next_oli = prefix_oli(data + beg + i, end - beg - i); - } - - /* checking for ul/ol switch */ - if (in_empty && ( - ((*flags & MKD_LIST_ORDERED) && has_next_uli) || - (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){ - *flags |= MKD_LI_END; - break; /* the following item must have same list type */ - } - - /* checking for a new item */ - if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) { - if (in_empty) - has_inside_empty = 1; - - if (pre == orgpre) /* the following item must have */ - break; /* the same indentation */ - - if (!sublist) - sublist = work->size; - } - /* joining only indented stuff after empty lines; - * note that now we only require 1 space of indentation - * to continue a list */ - else if (in_empty && pre == 0) { - *flags |= MKD_LI_END; - break; - } - else if (in_empty) { - bufputc(work, '\n'); - has_inside_empty = 1; - } - - in_empty = 0; - - /* adding the line without prefix into the working buffer */ - bufput(work, data + beg + i, end - beg - i); - beg = end; - } - - /* render of li contents */ - if (has_inside_empty) - *flags |= MKD_LI_BLOCK; - - if (*flags & MKD_LI_BLOCK) { - /* intermediate render of block li */ - if (sublist && sublist < work->size) { - parse_block(inter, rndr, work->data, sublist); - parse_block(inter, rndr, work->data + sublist, work->size - sublist); - } - else - parse_block(inter, rndr, work->data, work->size); - } else { - /* intermediate render of inline li */ - if (sublist && sublist < work->size) { - parse_inline(inter, rndr, work->data, sublist); - parse_block(inter, rndr, work->data + sublist, work->size - sublist); - } - else - parse_inline(inter, rndr, work->data, work->size); - } - - /* render of li itself */ - if (rndr->cb.listitem) - rndr->cb.listitem(ob, inter, *flags, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); - rndr_popbuf(rndr, BUFFER_SPAN); - return beg; -} - - -/* parse_list • parsing ordered or unordered list block */ -static size_t -parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags) -{ - struct buf *work = 0; - size_t i = 0, j; - - work = rndr_newbuf(rndr, BUFFER_BLOCK); - - while (i < size) { - j = parse_listitem(work, rndr, data + i, size - i, &flags); - i += j; - - if (!j || (flags & MKD_LI_END)) - break; - } - - if (rndr->cb.list) - rndr->cb.list(ob, work, flags, rndr->opaque); - rndr_popbuf(rndr, BUFFER_BLOCK); - return i; -} - -/* parse_atxheader • parsing of atx-style headers */ -static size_t -parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t level = 0; - size_t i, end, skip; - - while (level < size && level < 6 && data[level] == '#') - level++; - - for (i = level; i < size && data[i] == ' '; i++); - - for (end = i; end < size && data[end] != '\n'; end++); - skip = end; - - while (end && data[end - 1] == '#') - end--; - - while (end && data[end - 1] == ' ') - end--; - - if (end > i) { - struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); - - parse_inline(work, rndr, data + i, end - i); - - if (rndr->cb.header) - rndr->cb.header(ob, work, (int)level, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); - } - - return skip; -} - - -/* htmlblock_end • checking end of HTML block : [ \t]*\n[ \t*]\n */ -/* returns the length on match, 0 otherwise */ -static size_t -htmlblock_end_tag( - const char *tag, - size_t tag_len, - struct sd_markdown *rndr, - uint8_t *data, - size_t size) -{ - size_t i, w; - - /* checking if tag is a match */ - if (tag_len + 3 >= size || - strncasecmp((char *)data + 2, tag, tag_len) != 0 || - data[tag_len + 2] != '>') - return 0; - - /* checking white lines */ - i = tag_len + 3; - w = 0; - if (i < size && (w = is_empty(data + i, size - i)) == 0) - return 0; /* non-blank after tag */ - i += w; - w = 0; - - if (i < size) - w = is_empty(data + i, size - i); - - return i + w; -} - -static size_t -htmlblock_end(const char *curtag, - struct sd_markdown *rndr, - uint8_t *data, - size_t size, - int start_of_line) -{ - size_t tag_size = strlen(curtag); - size_t i = 1, end_tag; - int block_lines = 0; - - while (i < size) { - i++; - while (i < size && !(data[i - 1] == '<' && data[i] == '/')) { - if (data[i] == '\n') - block_lines++; - - i++; - } - - /* If we are only looking for unindented tags, skip the tag - * if it doesn't follow a newline. - * - * The only exception to this is if the tag is still on the - * initial line; in that case it still counts as a closing - * tag - */ - if (start_of_line && block_lines > 0 && data[i - 2] != '\n') - continue; - - if (i + 2 + tag_size >= size) - break; - - end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1); - if (end_tag) - return i + end_tag - 1; - } - - return 0; -} - - -/* parse_htmlblock • parsing of inline HTML block */ -static size_t -parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render) -{ - size_t i, j = 0, tag_end; - const char *curtag = NULL; - struct buf work = { data, 0, 0, 0 }; - - /* identification of the opening tag */ - if (size < 2 || data[0] != '<') - return 0; - - i = 1; - while (i < size && data[i] != '>' && data[i] != ' ') - i++; - - if (i < size) - curtag = find_block_tag((char *)data + 1, (int)i - 1); - - /* handling of special cases */ - if (!curtag) { - - /* HTML comment, laxist form */ - if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { - i = 5; - - while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) - i++; - - i++; - - if (i < size) - j = is_empty(data + i, size - i); - - if (j) { - work.size = i + j; - if (do_render && rndr->cb.blockhtml) - rndr->cb.blockhtml(ob, &work, rndr->opaque); - return work.size; - } - } - - /* HR, which is the only self-closing block tag considered */ - if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { - i = 3; - while (i < size && data[i] != '>') - i++; - - if (i + 1 < size) { - i++; - j = is_empty(data + i, size - i); - if (j) { - work.size = i + j; - if (do_render && rndr->cb.blockhtml) - rndr->cb.blockhtml(ob, &work, rndr->opaque); - return work.size; - } - } - } - - /* no special case recognised */ - return 0; - } - - /* looking for an unindented matching closing tag */ - /* followed by a blank line */ - tag_end = htmlblock_end(curtag, rndr, data, size, 1); - - /* if not found, trying a second pass looking for indented match */ - /* but not if tag is "ins" or "del" (following original Markdown.pl) */ - if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { - tag_end = htmlblock_end(curtag, rndr, data, size, 0); - } - - if (!tag_end) - return 0; - - /* the end of the block has been found */ - work.size = tag_end; - if (do_render && rndr->cb.blockhtml) - rndr->cb.blockhtml(ob, &work, rndr->opaque); - - return tag_end; -} - -static void -parse_table_row( - struct buf *ob, - struct sd_markdown *rndr, - uint8_t *data, - size_t size, - size_t columns, - int *col_data, - int header_flag) -{ - size_t i = 0, col, cols_left; - struct buf *row_work = 0; - - if (!rndr->cb.table_cell || !rndr->cb.table_row) - return; - - row_work = rndr_newbuf(rndr, BUFFER_SPAN); - - if (i < size && data[i] == '|') - i++; - - for (col = 0; col < columns && i < size; ++col) { - size_t cell_start, cell_end; - struct buf *cell_work; - - cell_work = rndr_newbuf(rndr, BUFFER_SPAN); - - while (i < size && _isspace(data[i])) - i++; - - cell_start = i; - - while (i < size && data[i] != '|') - i++; - - cell_end = i - 1; - - while (cell_end > cell_start && _isspace(data[cell_end])) - cell_end--; - - parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start); - rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque, 0); - - rndr_popbuf(rndr, BUFFER_SPAN); - i++; - } - - cols_left = columns - col; - if (cols_left > 0) { - struct buf empty_cell = { 0, 0, 0, 0 }; - rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque, cols_left); - } - - rndr->cb.table_row(ob, row_work, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); -} - -static size_t -parse_table_header( - struct buf *ob, - struct sd_markdown *rndr, - uint8_t *data, - size_t size, - size_t *columns, - int **column_data) -{ - int pipes; - size_t i = 0, col, header_end, under_end; - - pipes = 0; - while (i < size && data[i] != '\n') - if (data[i++] == '|') - pipes++; - - if (i == size || pipes == 0) - return 0; - - header_end = i; - - while (header_end > 0 && _isspace(data[header_end - 1])) - header_end--; - - if (data[0] == '|') - pipes--; - - if (header_end && data[header_end - 1] == '|') - pipes--; - - if (pipes + 1 > rndr->max_table_cols) - return 0; - - *columns = pipes + 1; - *column_data = calloc(*columns, sizeof(int)); - - /* Parse the header underline */ - i++; - if (i < size && data[i] == '|') - i++; - - under_end = i; - while (under_end < size && data[under_end] != '\n') - under_end++; - - for (col = 0; col < *columns && i < under_end; ++col) { - size_t dashes = 0; - - while (i < under_end && data[i] == ' ') - i++; - - if (data[i] == ':') { - i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L; - dashes++; - } - - while (i < under_end && data[i] == '-') { - i++; dashes++; - } - - if (i < under_end && data[i] == ':') { - i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R; - dashes++; - } - - while (i < under_end && data[i] == ' ') - i++; - - if (i < under_end && data[i] != '|') - break; - - if (dashes < 1) - break; - - i++; - } - - if (col < *columns) - return 0; - - parse_table_row( - ob, rndr, data, - header_end, - *columns, - *column_data, - MKD_TABLE_HEADER - ); - - return under_end + 1; -} - -static size_t -parse_table( - struct buf *ob, - struct sd_markdown *rndr, - uint8_t *data, - size_t size) -{ - size_t i; - - struct buf *header_work = 0; - struct buf *body_work = 0; - - size_t columns; - int *col_data = NULL; - - header_work = rndr_newbuf(rndr, BUFFER_SPAN); - body_work = rndr_newbuf(rndr, BUFFER_BLOCK); - - i = parse_table_header(header_work, rndr, data, size, &columns, &col_data); - if (i > 0) { - - while (i < size) { - size_t row_start; - int pipes = 0; - - row_start = i; - - while (i < size && data[i] != '\n') - if (data[i++] == '|') - pipes++; - - if (pipes == 0 || i == size) { - i = row_start; - break; - } - - parse_table_row( - body_work, - rndr, - data + row_start, - i - row_start, - columns, - col_data, 0 - ); - - i++; - } - - if (rndr->cb.table) - rndr->cb.table(ob, header_work, body_work, rndr->opaque); - } - - free(col_data); - rndr_popbuf(rndr, BUFFER_SPAN); - rndr_popbuf(rndr, BUFFER_BLOCK); - return i; -} - -/* parse_block • parsing of one block, returning next uint8_t to parse */ -static void -parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end, i; - uint8_t *txt_data; - beg = 0; - - if (rndr->work_bufs[BUFFER_SPAN].size + - rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) - return; - - while (beg < size) { - txt_data = data + beg; - end = size - beg; - - if (is_atxheader(rndr, txt_data, end)) - beg += parse_atxheader(ob, rndr, txt_data, end); - - else if (data[beg] == '<' && rndr->cb.blockhtml && - (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0) - beg += i; - - else if ((i = is_empty(txt_data, end)) != 0) - beg += i; - - else if (is_hrule(txt_data, end)) { - if (rndr->cb.hrule) - rndr->cb.hrule(ob, rndr->opaque); - - while (beg < size && data[beg] != '\n') - beg++; - - beg++; - } - - else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && - (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0) - beg += i; - - else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 && - (i = parse_table(ob, rndr, txt_data, end)) != 0) - beg += i; - - else if (prefix_quote(txt_data, end)) - beg += parse_blockquote(ob, rndr, txt_data, end); - - else if (prefix_code(txt_data, end)) - beg += parse_blockcode(ob, rndr, txt_data, end); - - else if (prefix_uli(txt_data, end)) - beg += parse_list(ob, rndr, txt_data, end, 0); - - else if (prefix_oli(txt_data, end)) - beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED); - - else - beg += parse_paragraph(ob, rndr, txt_data, end); - } -} - - - -/********************* - * REFERENCE PARSING * - *********************/ - -/* is_ref • returns whether a line is a reference or not */ -static int -is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs) -{ -/* int n; */ - size_t i = 0; - size_t id_offset, id_end; - size_t link_offset, link_end; - size_t title_offset, title_end; - size_t line_end; - - /* up to 3 optional leading spaces */ - if (beg + 3 >= end) return 0; - if (data[beg] == ' ') { i = 1; - if (data[beg + 1] == ' ') { i = 2; - if (data[beg + 2] == ' ') { i = 3; - if (data[beg + 3] == ' ') return 0; } } } - i += beg; - - /* id part: anything but a newline between brackets */ - if (data[i] != '[') return 0; - i++; - id_offset = i; - while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') - i++; - if (i >= end || data[i] != ']') return 0; - id_end = i; - - /* spacer: colon (space | tab)* newline? (space | tab)* */ - i++; - if (i >= end || data[i] != ':') return 0; - i++; - while (i < end && data[i] == ' ') i++; - if (i < end && (data[i] == '\n' || data[i] == '\r')) { - i++; - if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } - while (i < end && data[i] == ' ') i++; - if (i >= end) return 0; - - /* link: whitespace-free sequence, optionally between angle brackets */ - if (data[i] == '<') - i++; - - link_offset = i; - - while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r') - i++; - - if (data[i - 1] == '>') link_end = i - 1; - else link_end = i; - - /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ - while (i < end && data[i] == ' ') i++; - if (i < end && data[i] != '\n' && data[i] != '\r' - && data[i] != '\'' && data[i] != '"' && data[i] != '(') - return 0; - line_end = 0; - /* computing end-of-line */ - if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; - if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') - line_end = i + 1; - - /* optional (space|tab)* spacer after a newline */ - if (line_end) { - i = line_end + 1; - while (i < end && data[i] == ' ') i++; } - - /* optional title: any non-newline sequence enclosed in '"() - alone on its line */ - title_offset = title_end = 0; - if (i + 1 < end - && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { - i++; - title_offset = i; - /* looking for EOL */ - while (i < end && data[i] != '\n' && data[i] != '\r') i++; - if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') - title_end = i + 1; - else title_end = i; - /* stepping back */ - i -= 1; - while (i > title_offset && data[i] == ' ') - i -= 1; - if (i > title_offset - && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) { - line_end = title_end; - title_end = i; } } - - if (!line_end || link_end == link_offset) - return 0; /* garbage after the link empty link */ - - /* a valid ref has been found, filling-in return structures */ - if (last) - *last = line_end; - - if (refs) { - struct link_ref *ref; - - ref = add_link_ref(refs, data + id_offset, id_end - id_offset); - if (!ref) - return 0; - - ref->link = bufnew(link_end - link_offset); - bufput(ref->link, data + link_offset, link_end - link_offset); - - if (title_end > title_offset) { - ref->title = bufnew(title_end - title_offset); - bufput(ref->title, data + title_offset, title_end - title_offset); - } - } - - return 1; -} - -static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size) -{ - size_t i = 0, tab = 0; - - while (i < size) { - size_t org = i; - - while (i < size && line[i] != '\t') { - i++; tab++; - } - - if (i > org) - bufput(ob, line + org, i - org); - - if (i >= size) - break; - - do { - bufputc(ob, ' '); tab++; - } while (tab % 4); - - i++; - } -} - -/********************** - * EXPORTED FUNCTIONS * - **********************/ - -struct sd_markdown * -sd_markdown_new( - unsigned int extensions, - size_t max_nesting, - size_t max_table_cols, - const struct sd_callbacks *callbacks, - void *opaque) -{ - struct sd_markdown *md = NULL; - - assert(max_nesting > 0 && max_table_cols > 0 && callbacks); - - md = malloc(sizeof(struct sd_markdown)); - if (!md) - return NULL; - - memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks)); - - stack_init(&md->work_bufs[BUFFER_BLOCK], 4); - stack_init(&md->work_bufs[BUFFER_SPAN], 8); - - memset(md->active_char, 0x0, 256); - - if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) { - md->active_char['*'] = MD_CHAR_EMPHASIS; - md->active_char['_'] = MD_CHAR_EMPHASIS; - if (extensions & MKDEXT_STRIKETHROUGH) - md->active_char['~'] = MD_CHAR_EMPHASIS; - } - - if (md->cb.codespan) - md->active_char['`'] = MD_CHAR_CODESPAN; - - if (md->cb.linebreak) - md->active_char['\n'] = MD_CHAR_LINEBREAK; - - if (md->cb.image || md->cb.link) - md->active_char['['] = MD_CHAR_LINK; - - md->active_char['<'] = MD_CHAR_LANGLE; - md->active_char['\\'] = MD_CHAR_ESCAPE; - md->active_char['&'] = MD_CHAR_ENTITITY; - - if (extensions & MKDEXT_AUTOLINK) { - if (!(extensions & MKDEXT_NO_EMAIL_AUTOLINK)) - md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; - md->active_char[':'] = MD_CHAR_AUTOLINK_URL; - md->active_char['w'] = MD_CHAR_AUTOLINK_WWW; - md->active_char['/'] = MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME; - } - - if (extensions & MKDEXT_SUPERSCRIPT) - md->active_char['^'] = MD_CHAR_SUPERSCRIPT; - - /* Extension data */ - md->ext_flags = extensions; - md->opaque = opaque; - md->max_nesting = max_nesting; - md->max_table_cols = max_table_cols; - md->in_link_body = 0; - - return md; -} - -void -sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md) -{ -#define MARKDOWN_GROW(x) ((x) + ((x) >> 1)) - static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; - - struct buf *text; - size_t beg, end; - - text = bufnew(64); - if (!text) - return; - - /* Preallocate enough space for our buffer to avoid expanding while copying */ - bufgrow(text, doc_size); - - /* reset the references table */ - memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); - - /* first pass: looking for references, copying everything else */ - beg = 0; - - /* Skip a possible UTF-8 BOM, even though the Unicode standard - * discourages having these in UTF-8 documents */ - if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0) - beg += 3; - - while (beg < doc_size) /* iterating over lines */ - if (is_ref(document, beg, doc_size, &end, md->refs)) - beg = end; - else { /* skipping to the next line */ - end = beg; - while (end < doc_size && document[end] != '\n' && document[end] != '\r') - end++; - - /* adding the line body if present */ - if (end > beg) - expand_tabs(text, document + beg, end - beg); - - while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) { - /* add one \n per newline */ - if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n')) - bufputc(text, '\n'); - end++; - } - - beg = end; - } - - /* pre-grow the output buffer to minimize allocations */ - bufgrow(ob, MARKDOWN_GROW(text->size)); - - /* second pass: actual rendering */ - if (md->cb.doc_header) - md->cb.doc_header(ob, md->opaque); - - if (text->size) { - /* adding a final newline if not already present */ - if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r') - bufputc(text, '\n'); - - parse_block(ob, md, text->data, text->size); - } - - if (md->cb.doc_footer) - md->cb.doc_footer(ob, md->opaque); - - /* clean-up */ - bufrelease(text); - free_link_refs(md->refs); - - assert(md->work_bufs[BUFFER_SPAN].size == 0); - assert(md->work_bufs[BUFFER_BLOCK].size == 0); -} - -void -sd_markdown_free(struct sd_markdown *md) -{ - size_t i; - - for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i) - bufrelease(md->work_bufs[BUFFER_SPAN].item[i]); - - for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i) - bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]); - - stack_free(&md->work_bufs[BUFFER_SPAN]); - stack_free(&md->work_bufs[BUFFER_BLOCK]); - - free(md); -} - -void -sd_version(int *ver_major, int *ver_minor, int *ver_revision) -{ - *ver_major = SUNDOWN_VER_MAJOR; - *ver_minor = SUNDOWN_VER_MINOR; - *ver_revision = SUNDOWN_VER_REVISION; -} - -/* vim: set filetype=c: */ diff --git a/SnudownTest/markdown.h b/SnudownTest/markdown.h deleted file mode 100644 index 00d50dc..0000000 --- a/SnudownTest/markdown.h +++ /dev/null @@ -1,140 +0,0 @@ -/* markdown.h - generic markdown parser */ - -/* - * Copyright (c) 2009, Natacha Porté - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UPSKIRT_MARKDOWN_H -#define UPSKIRT_MARKDOWN_H - -#include "buffer.h" -#include "autolink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define SUNDOWN_VERSION "1.16.0" -#define SUNDOWN_VER_MAJOR 1 -#define SUNDOWN_VER_MINOR 16 -#define SUNDOWN_VER_REVISION 0 - -/******************** - * TYPE DEFINITIONS * - ********************/ - -/* mkd_autolink - type of autolink */ -enum mkd_autolink { - MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/ - MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */ - MKDA_EMAIL, /* e-mail link without explit mailto: */ -}; - -enum mkd_tableflags { - MKD_TABLE_ALIGN_L = 1, - MKD_TABLE_ALIGN_R = 2, - MKD_TABLE_ALIGN_CENTER = 3, - MKD_TABLE_ALIGNMASK = 3, - MKD_TABLE_HEADER = 4 -}; - -enum mkd_extensions { - MKDEXT_NO_INTRA_EMPHASIS = (1 << 0), - MKDEXT_TABLES = (1 << 1), - MKDEXT_FENCED_CODE = (1 << 2), - MKDEXT_AUTOLINK = (1 << 3), - MKDEXT_STRIKETHROUGH = (1 << 4), - MKDEXT_SPACE_HEADERS = (1 << 6), - MKDEXT_SUPERSCRIPT = (1 << 7), - MKDEXT_LAX_SPACING = (1 << 8), - MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9), -}; - -/* sd_callbacks - functions for rendering parsed data */ -struct sd_callbacks { - /* block level callbacks - NULL skips the block */ - void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque); - void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque); - void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque); - void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque); - void (*hrule)(struct buf *ob, void *opaque); - void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque); - void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque); - void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque); - void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque); - void (*table_row)(struct buf *ob, const struct buf *text, void *opaque); - void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span); - - - /* span level callbacks - NULL or return 0 prints the span verbatim */ - int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque); - int (*codespan)(struct buf *ob, const struct buf *text, void *opaque); - int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque); - int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque); - int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque); - int (*linebreak)(struct buf *ob, void *opaque); - int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque); - int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque); - int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque); - int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque); - int (*superscript)(struct buf *ob, const struct buf *text, void *opaque); - - /* low level callbacks - NULL copies input directly into the output */ - void (*entity)(struct buf *ob, const struct buf *entity, void *opaque); - void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque); - - /* header and footer */ - void (*doc_header)(struct buf *ob, void *opaque); - void (*doc_footer)(struct buf *ob, void *opaque); -}; - -struct sd_markdown; - -/********* - * FLAGS * - *********/ - -/* list/listitem flags */ -#define MKD_LIST_ORDERED 1 -#define MKD_LI_BLOCK 2 /*
  • containing block data */ - -/********************** - * EXPORTED FUNCTIONS * - **********************/ - -extern struct sd_markdown * -sd_markdown_new( - unsigned int extensions, - size_t max_nesting, - size_t max_table_cols, - const struct sd_callbacks *callbacks, - void *opaque); - -extern void -sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md); - -extern void -sd_markdown_free(struct sd_markdown *md); - -extern void -sd_version(int *major, int *minor, int *revision); - -#ifdef __cplusplus -} -#endif - -#endif - -/* vim: set filetype=c: */ diff --git a/SnudownTest/setup.py b/SnudownTest/setup.py deleted file mode 100644 index ffec52d..0000000 --- a/SnudownTest/setup.py +++ /dev/null @@ -1,56 +0,0 @@ -from distutils.spawn import find_executable -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext - -import re -import os -import subprocess -import fnmatch - -def c_files_in(directory): - paths = [] - names = os.listdir(directory) - for f in fnmatch.filter(names, '*.c'): - paths.append(os.path.join(directory, f)) - return paths - - -def process_gperf_file(gperf_file, output_file): - if not find_executable("gperf"): - raise Exception("Couldn't find `gperf`, is it installed?") - assert os.path.exists(gperf_file) - command = "gperf.exe "+ gperf_file+ " --output-file=" + output_file - print(command) - #subprocess.check_call(command) - -version = None -version_re = re.compile(r'^#define\s+SNUDOWN_VERSION\s+"([^"]+)"$') -with open('snudown.c', 'r') as f: - for line in f: - m = version_re.match(line) - if m: - version = m.group(1) -assert version - - -class GPerfingBuildExt(build_ext): - def run(self): - process_gperf_file("src\\html_entities.gperf", "src\\html_entities.h") - build_ext.run(self) - -setup( - name='snudown', - version=version, - author='Vicent Marti', - author_email='vicent@github.com', - license='MIT', - test_suite="test_snudown.test_snudown", - cmdclass={'build_ext': GPerfingBuildExt,}, - ext_modules=[ - Extension( - name='snudown', - sources=['snudown.c'] + c_files_in('src/') + c_files_in('html/'), - include_dirs=['src', 'html'] - ) - ], -) diff --git a/SnudownTest/snudown - Copy.c b/SnudownTest/snudown - Copy.c deleted file mode 100644 index e268f66..0000000 --- a/SnudownTest/snudown - Copy.c +++ /dev/null @@ -1,212 +0,0 @@ -#define PY_SSIZE_T_CLEAN -#include - -#include "markdown.h" -#include "html.h" -#include "autolink.h" - -#define SNUDOWN_VERSION "1.4.0" - -enum snudown_renderer_mode { - RENDERER_USERTEXT = 0, - RENDERER_WIKI, - RENDERER_COUNT -}; - -struct snudown_renderopt { - struct html_renderopt html; - int nofollow; - const char *target; -}; - -struct snudown_renderer { - struct sd_markdown* main_renderer; - struct sd_markdown* toc_renderer; - struct module_state* state; - struct module_state* toc_state; -}; - -struct module_state { - struct sd_callbacks callbacks; - struct snudown_renderopt options; -}; - -static struct snudown_renderer sundown[RENDERER_COUNT]; - -static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL}; -static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL}; - -static struct module_state usertext_toc_state; -static struct module_state wiki_toc_state; -static struct module_state usertext_state; -static struct module_state wiki_state; - -/* The module doc strings */ -PyDoc_STRVAR(snudown_module__doc__, "When does the narwhal bacon? At Sundown."); -PyDoc_STRVAR(snudown_md__doc__, "Render a Markdown document"); - -static const unsigned int snudown_default_md_flags = - MKDEXT_NO_INTRA_EMPHASIS | - MKDEXT_SUPERSCRIPT | - MKDEXT_AUTOLINK | - MKDEXT_STRIKETHROUGH | - MKDEXT_TABLES; - -static const unsigned int snudown_default_render_flags = - HTML_SKIP_HTML | - HTML_SKIP_IMAGES | - HTML_SAFELINK | - HTML_ESCAPE | - HTML_USE_XHTML; - -static const unsigned int snudown_wiki_render_flags = - HTML_SKIP_HTML | - HTML_SAFELINK | - HTML_ALLOW_ELEMENT_WHITELIST | - HTML_ESCAPE | - HTML_USE_XHTML; - -static void -snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque) -{ - struct snudown_renderopt *options = opaque; - - if (options->nofollow) - BUFPUTSL(ob, " rel=\"nofollow\""); - - if (options->target != NULL) { - BUFPUTSL(ob, " target=\""); - bufputs(ob, options->target); - bufputc(ob, '\"'); - } -} - -static struct sd_markdown* make_custom_renderer(struct module_state* state, - const unsigned int renderflags, - const unsigned int markdownflags, - int toc_renderer) { - if(toc_renderer) { - sdhtml_toc_renderer(&state->callbacks, - (struct html_renderopt *)&state->options); - } else { - sdhtml_renderer(&state->callbacks, - (struct html_renderopt *)&state->options, - renderflags); - } - - state->options.html.link_attributes = &snudown_link_attr; - state->options.html.html_element_whitelist = html_element_whitelist; - state->options.html.html_attr_whitelist = html_attr_whitelist; - - return sd_markdown_new( - markdownflags, - 16, - 64, - &state->callbacks, - &state->options - ); -} - -void init_default_renderer(PyObject *module) { - PyModule_AddIntConstant(module, "RENDERER_USERTEXT", RENDERER_USERTEXT); - sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_USERTEXT].state = &usertext_state; - sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state; -} - -void init_wiki_renderer(PyObject *module) { - PyModule_AddIntConstant(module, "RENDERER_WIKI", RENDERER_WIKI); - sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_WIKI].state = &wiki_state; - sundown[RENDERER_WIKI].toc_state = &wiki_toc_state; -} - -static PyObject * -snudown_md(PyObject *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"text", "nofollow", "target", "toc_id_prefix", "renderer", "enable_toc", NULL}; - - struct buf ib, *ob; - PyObject *py_result; - const char* result_text; - int renderer = RENDERER_USERTEXT; - int enable_toc = 0; - struct snudown_renderer _snudown; - int nofollow = 0; - char* target = NULL; - char* toc_id_prefix = NULL; - unsigned int flags; - - memset(&ib, 0x0, sizeof(struct buf)); - - /* Parse arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|izzii", kwlist, - &ib.data, &ib.size, &nofollow, - &target, &toc_id_prefix, &renderer, &enable_toc)) { - return NULL; - } - - if (renderer < 0 || renderer >= RENDERER_COUNT) { - PyErr_SetString(PyExc_ValueError, "Invalid renderer"); - return NULL; - } - - _snudown = sundown[renderer]; - - struct snudown_renderopt *options = &(_snudown.state->options); - options->nofollow = nofollow; - options->target = target; - - /* Output buffer */ - ob = bufnew(128); - - flags = options->html.flags; - - if (enable_toc) { - _snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix; - sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer); - _snudown.toc_state->options.html.toc_id_prefix = NULL; - - options->html.flags |= HTML_TOC; - } - - options->html.toc_id_prefix = toc_id_prefix; - - /* do the magic */ - sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer); - - options->html.toc_id_prefix = NULL; - options->html.flags = flags; - - /* make a Python string */ - result_text = ""; - if (ob->data) - result_text = (const char*)ob->data; - py_result = Py_BuildValue("s#", result_text, (int)ob->size); - - /* Cleanup */ - bufrelease(ob); - return py_result; -} - -static PyMethodDef snudown_methods[] = { - {"markdown", (PyCFunction) snudown_md, METH_VARARGS | METH_KEYWORDS, snudown_md__doc__}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - -PyMODINIT_FUNC initsnudown(void) -{ - PyObject *module; - - module = Py_InitModule3("snudown", snudown_methods, snudown_module__doc__); - if (module == NULL) - return; - - init_default_renderer(module); - init_wiki_renderer(module); - - /* Version */ - PyModule_AddStringConstant(module, "__version__", SNUDOWN_VERSION); -} diff --git a/SnudownTest/snudown-validator.c b/SnudownTest/snudown-validator.c deleted file mode 100644 index 153e1c4..0000000 --- a/SnudownTest/snudown-validator.c +++ /dev/null @@ -1,226 +0,0 @@ -#include "markdown.h" -#include "html.h" -#include "buffer.h" - -#include -#include -#include -#include -#include -#include -#include - -#include - -#define READ_UNIT 1024 -#define OUTPUT_UNIT 64 - -#include "autolink.h" - -#define SNUDOWN_VERSION "1.3.2" - -enum snudown_renderer_mode { - RENDERER_USERTEXT = 0, - RENDERER_WIKI, - RENDERER_COUNT -}; - -struct snudown_renderopt { - struct html_renderopt html; - int nofollow; - const char *target; -}; - -struct snudown_renderer { - struct sd_markdown* main_renderer; - struct sd_markdown* toc_renderer; - struct module_state* state; - struct module_state* toc_state; -}; - -struct module_state { - struct sd_callbacks callbacks; - struct snudown_renderopt options; -}; - -static struct snudown_renderer sundown[RENDERER_COUNT]; - -static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL}; -static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL}; - -static struct module_state usertext_toc_state; -static struct module_state wiki_toc_state; -static struct module_state usertext_state; -static struct module_state wiki_state; - -static const unsigned int snudown_default_md_flags = - MKDEXT_NO_INTRA_EMPHASIS | - MKDEXT_SUPERSCRIPT | - MKDEXT_AUTOLINK | - MKDEXT_STRIKETHROUGH | - MKDEXT_TABLES; - -static const unsigned int snudown_default_render_flags = - HTML_SKIP_HTML | - HTML_SKIP_IMAGES | - HTML_SAFELINK | - HTML_ESCAPE | - HTML_USE_XHTML; - -static const unsigned int snudown_wiki_render_flags = - HTML_SKIP_HTML | - HTML_SAFELINK | - HTML_ALLOW_ELEMENT_WHITELIST | - HTML_ESCAPE | - HTML_USE_XHTML; - -static void -snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque) -{ - struct snudown_renderopt *options = opaque; - - if (options->nofollow) - BUFPUTSL(ob, " rel=\"nofollow\""); - - if (options->target != NULL) { - BUFPUTSL(ob, " target=\""); - bufputs(ob, options->target); - bufputc(ob, '\"'); - } -} - -static struct sd_markdown* make_custom_renderer(struct module_state* state, - const unsigned int renderflags, - const unsigned int markdownflags, - int toc_renderer) { - if(toc_renderer) { - sdhtml_toc_renderer(&state->callbacks, - (struct html_renderopt *)&state->options); - } else { - sdhtml_renderer(&state->callbacks, - (struct html_renderopt *)&state->options, - renderflags); - } - - state->options.html.link_attributes = &snudown_link_attr; - state->options.html.html_element_whitelist = html_element_whitelist; - state->options.html.html_attr_whitelist = html_attr_whitelist; - - return sd_markdown_new( - markdownflags, - 16, - 64, - &state->callbacks, - &state->options - ); -} - -void init_default_renderer() { - sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_USERTEXT].state = &usertext_state; - sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state; -} - -void init_wiki_renderer() { - sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_WIKI].state = &wiki_state; - sundown[RENDERER_WIKI].toc_state = &wiki_toc_state; -} - -void -snudown_md(struct buf *ob, const uint8_t *document, size_t doc_size, int wiki_mode) -{ - int renderer = RENDERER_USERTEXT; - int enable_toc = 0; - struct snudown_renderer _snudown; - int nofollow = 0; - char* target = NULL; - char* toc_id_prefix = NULL; - unsigned int flags; - - if (wiki_mode) - renderer = RENDERER_WIKI; - - _snudown = sundown[renderer]; - - struct snudown_renderopt *options = &(_snudown.state->options); - options->nofollow = nofollow; - options->target = target; - - flags = options->html.flags; - - if (enable_toc) { - _snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix; - sd_markdown_render(ob, document, doc_size, _snudown.toc_renderer); - _snudown.toc_state->options.html.toc_id_prefix = NULL; - - options->html.flags |= HTML_TOC; - } - - options->html.toc_id_prefix = toc_id_prefix; - - /* do the magic */ - sd_markdown_render(ob, document, doc_size, _snudown.main_renderer); - - options->html.toc_id_prefix = NULL; - options->html.flags = flags; -} -int -main(int argc, char **argv) -{ - init_default_renderer(); - init_wiki_renderer(); - - struct buf *ib, *ob; - int size_read = 0, wiki_mode = 0, i = 0, have_errors = 0; - - /* reading everything */ - ib = bufnew(READ_UNIT); - bufgrow(ib, READ_UNIT); - while ((size_read = fread(ib->data + ib->size, 1, ib->asize - ib->size, stdin)) > 0) { - ib->size += size_read; - bufgrow(ib, ib->size + READ_UNIT); - } - /* Render to a buffer, then print that out */ - ob = bufnew(OUTPUT_UNIT); - bufputs(ob, "\n"); - snudown_md(ob, ib->data, ib->size, wiki_mode); - bufputs(ob, "\n"); - - // Wiki mode explicitly allows unbalanced tags, need some way to exclude those - if (!wiki_mode) { - GumboOutput* output = gumbo_parse_with_options(&kGumboDefaultOptions, bufcstr(ob), ob->size); - - for (i=0; i < output->errors.length; ++i) { - // stupid "public" API I hacked in. - void* thing = output->errors.data[i]; - GumboErrorType type = gumbo_get_error_type(thing); - switch(type) { - case GUMBO_ERR_UTF8_INVALID: - case GUMBO_ERR_UTF8_NULL: - // Making sure the user gave us valid - // utf-8 or transforming it to valid - // utf-8 is outside the scope of snudown - continue; - default: - have_errors = 1; - printf("%s\n", GUMBO_ERROR_NAMES[type]); - printf("%s\n",gumbo_get_error_text(thing)); - printf("===============\n"); - break; - } - } - - if (have_errors) { - // gotta trigger a crash for AFL to catch it - assert(0); - } - - gumbo_destroy_output(&kGumboDefaultOptions, output); - } - bufrelease(ob); - bufrelease(ib); - return 0; -} diff --git a/SnudownTest/snudown.c b/SnudownTest/snudown.c deleted file mode 100644 index 8cbacaf..0000000 --- a/SnudownTest/snudown.c +++ /dev/null @@ -1,232 +0,0 @@ -#define PY_SSIZE_T_CLEAN -#include - -#include "markdown.h" -#include "html.h" -#include "autolink.h" - -#define SNUDOWN_VERSION "1.4.0" - -enum snudown_renderer_mode { - RENDERER_USERTEXT = 0, - RENDERER_WIKI, - RENDERER_COUNT -}; - -struct snudown_renderopt { - struct html_renderopt html; - int nofollow; - const char *target; -}; - -struct snudown_renderer { - struct sd_markdown* main_renderer; - struct sd_markdown* toc_renderer; - struct module_state* state; - struct module_state* toc_state; -}; - -struct module_state { - struct sd_callbacks callbacks; - struct snudown_renderopt options; -}; - -static struct snudown_renderer sundown[RENDERER_COUNT]; - -static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL}; -static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL}; - -static struct module_state usertext_toc_state; -static struct module_state wiki_toc_state; -static struct module_state usertext_state; -static struct module_state wiki_state; - -/* The module doc strings */ -PyDoc_STRVAR(snudown_module__doc__, "When does the narwhal bacon? At Sundown."); -PyDoc_STRVAR(snudown_md__doc__, "Render a Markdown document"); - -static const unsigned int snudown_default_md_flags = - MKDEXT_NO_INTRA_EMPHASIS | - MKDEXT_SUPERSCRIPT | - MKDEXT_AUTOLINK | - MKDEXT_STRIKETHROUGH | - MKDEXT_TABLES; - -static const unsigned int snudown_default_render_flags = - HTML_SKIP_HTML | - HTML_SKIP_IMAGES | - HTML_SAFELINK | - HTML_ESCAPE | - HTML_USE_XHTML; - -static const unsigned int snudown_wiki_render_flags = - HTML_SKIP_HTML | - HTML_SAFELINK | - HTML_ALLOW_ELEMENT_WHITELIST | - HTML_ESCAPE | - HTML_USE_XHTML; - -static void -snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque) -{ - struct snudown_renderopt *options = opaque; - - if (options->nofollow) - BUFPUTSL(ob, " rel=\"nofollow\""); - - if (options->target != NULL) { - BUFPUTSL(ob, " target=\""); - bufputs(ob, options->target); - bufputc(ob, '\"'); - } -} - -static struct sd_markdown* make_custom_renderer(struct module_state* state, - const unsigned int renderflags, - const unsigned int markdownflags, - int toc_renderer) { - if(toc_renderer) { - sdhtml_toc_renderer(&state->callbacks, - (struct html_renderopt *)&state->options); - } else { - sdhtml_renderer(&state->callbacks, - (struct html_renderopt *)&state->options, - renderflags); - } - - state->options.html.link_attributes = &snudown_link_attr; - state->options.html.html_element_whitelist = html_element_whitelist; - state->options.html.html_attr_whitelist = html_attr_whitelist; - - return sd_markdown_new( - markdownflags, - 16, - 64, - &state->callbacks, - &state->options - ); -} - -void init_default_renderer(PyObject *module) { - PyModule_AddIntConstant(module, "RENDERER_USERTEXT", RENDERER_USERTEXT); - sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_USERTEXT].state = &usertext_state; - sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state; -} - -void init_wiki_renderer(PyObject *module) { - PyModule_AddIntConstant(module, "RENDERER_WIKI", RENDERER_WIKI); - sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0); - sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1); - sundown[RENDERER_WIKI].state = &wiki_state; - sundown[RENDERER_WIKI].toc_state = &wiki_toc_state; -} - -static PyObject * -snudown_md(PyObject *self, PyObject *args, PyObject *kwargs) -{ - struct snudown_renderopt *options; - static char *kwlist[] = {"text", "nofollow", "target", "toc_id_prefix", "renderer", "enable_toc", NULL}; - - struct buf ib, *ob; - PyObject *py_result; - const char* result_text; - int renderer = RENDERER_USERTEXT; - int enable_toc = 0; - struct snudown_renderer _snudown; - int nofollow = 0; - char* target = NULL; - char* toc_id_prefix = NULL; - unsigned int flags; - - memset(&ib, 0x0, sizeof(struct buf)); - - /* Parse arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|izzii", kwlist, - &ib.data, &ib.size, &nofollow, - &target, &toc_id_prefix, &renderer, &enable_toc)) { - return NULL; - } - - if (renderer < 0 || renderer >= RENDERER_COUNT) { - PyErr_SetString(PyExc_ValueError, "Invalid renderer"); - return NULL; - }; - - _snudown = sundown[renderer]; - - - - - _snudown.state->options; - options->nofollow = nofollow; - options->target = target; - - /* Output buffer */ - ob = bufnew(128); - - flags = options->html.flags; - - if (enable_toc) { - _snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix; - sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer); - _snudown.toc_state->options.html.toc_id_prefix = NULL; - - options->html.flags |= HTML_TOC; - } - - options->html.toc_id_prefix = toc_id_prefix; - - /* do the magic */ - sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer); - - options->html.toc_id_prefix = NULL; - options->html.flags = flags; - - /* make a Python string */ - result_text = ""; - if (ob->data) - result_text = (const char*)ob->data; - py_result = Py_BuildValue("s#", result_text, (int)ob->size); - - /* Cleanup */ - bufrelease(ob); - return py_result; -} - - -static PyMethodDef snudown_methods[] = { - {"markdown", (PyCFunction) snudown_md, METH_VARARGS | METH_KEYWORDS, snudown_md__doc__}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; -PyMODINIT_FUNC PyInit_snudown(void) -{ - PyObject *module; - - struct PyModuleDef wtf = { - PyModuleDef_HEAD_INIT, - "snudown", /* m_name */ - "This is snudown", /* m_doc */ - -1, /* m_size */ - snudown_methods, /* m_methods */ - NULL, /* m_reload */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL, /* m_free */ - }; - //module = Py_InitModule3("snudown", snudown_methods, snudown_module__doc__); - module = PyModule_Create(&wtf); - if (module == NULL) - return Py_BuildValue(""); - - init_default_renderer(module); - init_wiki_renderer(module); - - /* Version */ - PyModule_AddStringConstant(module, "__version__", SNUDOWN_VERSION); -}; -void initsnudown(void) -{ - (void) PyInit_snudown("snudown", snudown_methods); -}; \ No newline at end of file diff --git a/SnudownTest/src/autolink.c b/SnudownTest/src/autolink.c deleted file mode 100644 index 8d0e39a..0000000 --- a/SnudownTest/src/autolink.c +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "buffer.h" -#include "autolink.h" - -#include -#include -#include -#include - -#if defined(_WIN32) -#define strncasecmp _strnicmp -#endif - -int -sd_autolink_issafe(const uint8_t *link, size_t link_len) -{ - static const size_t valid_uris_count = 14; - static const char *valid_uris[] = { - "http://", "https://", "ftp://", "mailto://", - "/", "git://", "steam://", "irc://", "news://", "mumble://", - "ssh://", "ircs://", "ts3server://", "#" - }; - - size_t i; - - for (i = 0; i < valid_uris_count; ++i) { - size_t len = strlen(valid_uris[i]); - - if (link_len > len && - strncasecmp((char *)link, valid_uris[i], len) == 0 && - (isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?')) - return 1; - } - - return 0; -} - -static size_t -autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size) -{ - uint8_t cclose, copen = 0; - size_t i; - - for (i = 0; i < link_end; ++i) - if (data[i] == '<') { - link_end = i; - break; - } - - while (link_end > 0) { - uint8_t c = data[link_end - 1]; - - if (c == 0) - break; - - if (strchr("?!.,", c) != NULL) - link_end--; - - else if (c == ';') { - size_t new_end = link_end - 2; - - while (new_end > 0 && isalpha(data[new_end])) - new_end--; - - if (new_end < link_end - 2 && data[new_end] == '&') - link_end = new_end; - else - link_end--; - } - else break; - } - - if (link_end == 0) - return 0; - - cclose = data[link_end - 1]; - - switch (cclose) { - case '"': copen = '"'; break; - case '\'': copen = '\''; break; - case ')': copen = '('; break; - case ']': copen = '['; break; - case '}': copen = '{'; break; - } - - if (copen != 0) { - size_t closing = 0; - size_t opening = 0; - size_t i = 0; - - /* Try to close the final punctuation sign in this same line; - * if we managed to close it outside of the URL, that means that it's - * not part of the URL. If it closes inside the URL, that means it - * is part of the URL. - * - * Examples: - * - * foo http://www.pokemon.com/Pikachu_(Electric) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo (http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric)) - * - * (foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => foo http://www.pokemon.com/Pikachu_(Electric) - */ - - while (i < link_end) { - if (data[i] == copen) - opening++; - else if (data[i] == cclose) - closing++; - - i++; - } - - if (closing != opening) - link_end--; - } - - return link_end; -} - -/* - * Checks that `prefix_char` occurs on a word boundary just before `data`, - * where `data` points to the character to search to the left of, and a word boundary - * is (currently) a whitespace character, punctuation, or the start of the string. - * Returns the length of the prefix. - */ -static int -check_reddit_autolink_prefix( - const uint8_t* data, - size_t max_rewind, - size_t max_lookbehind, - size_t size, - char prefix_char - ) -{ - /* Make sure this `/` is part of `/?r/` */ - if (size < 2 || max_rewind < 1 || data[-1] != prefix_char) - return 0; - - /* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */ - if (max_rewind > 1) { - const char boundary = data[-2]; - if (boundary == '/') - return 2; - /** - * Here's where our lack of unicode-awareness bites us. We don't correctly - * match punctuation / whitespace characters for the boundary, because we - * reject valid cases like "。r/example" (note the fullwidth period.) - * - * A better implementation might try to rewind over bytes with the 8th bit set, try - * to decode them to a valid codepoint, then do a unicode-aware check on the codepoint. - */ - else if (ispunct(boundary) || isspace(boundary)) - return 1; - else - return 0; - } else if (max_lookbehind > 2) { - /* There's an inline element just left of the `prefix_char`, is it an escaped forward - * slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly - * allow "\\/r/foo". - */ - if (data[-2] == '/' && data[-3] == '\\') - return 0; - } - - /* Must be a new-style shortlink with nothing relevant to the left of it. */ - return 1; -} - -static size_t -check_domain(uint8_t *data, size_t size, int allow_short) -{ - size_t i, np = 0; - - if (!isalnum(data[0])) - return 0; - - for (i = 1; i < size - 1; ++i) { - if (data[i] == '.') np++; - else if (!isalnum(data[i]) && data[i] != '-') break; - } - - if (allow_short) { - /* We don't need a valid domain in the strict sense (with - * least one dot; so just make sure it's composed of valid - * domain characters and return the length of the the valid - * sequence. */ - return i; - } else { - /* a valid domain needs to have at least a dot. - * that's as far as we get */ - return np ? i : 0; - } -} - -size_t -sd_autolink__www( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) -{ - size_t link_end; - - if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1])) - return 0; - - if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) - return 0; - - link_end = check_domain(data, size, 0); - - if (link_end == 0) - return 0; - - while (link_end < size && !isspace(data[link_end])) - link_end++; - - link_end = autolink_delim(data, link_end, max_rewind, size); - - if (link_end == 0) - return 0; - - bufput(link, data, link_end); - *rewind_p = 0; - - return (int)link_end; -} - -size_t -sd_autolink__email( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) -{ - size_t link_end, rewind; - int nb = 0, np = 0; - - for (rewind = 0; rewind < max_rewind; ++rewind) { - uint8_t c = data[-rewind - 1]; - - if (c == 0) - break; - - if (isalnum(c)) - continue; - - if (strchr(".+-_", c) != NULL) - continue; - - break; - } - - if (rewind == 0) - return 0; - - for (link_end = 0; link_end < size; ++link_end) { - uint8_t c = data[link_end]; - - if (isalnum(c)) - continue; - - if (c == '@') - nb++; - else if (c == '.' && link_end < size - 1) - np++; - else if (c != '-' && c != '_') - break; - } - - if (link_end < 2 || nb != 1 || np == 0) - return 0; - - link_end = autolink_delim(data, link_end, max_rewind, size); - - if (link_end == 0) - return 0; - - bufput(link, data - rewind, link_end + rewind); - *rewind_p = rewind; - - return link_end; -} - -size_t -sd_autolink__url( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) -{ - size_t link_end, rewind = 0, domain_len; - - if (size < 4 || data[1] != '/' || data[2] != '/') - return 0; - - while (rewind < max_rewind && isalpha(data[-rewind - 1])) - rewind++; - - if (!sd_autolink_issafe(data - rewind, size + rewind)) - return 0; - - link_end = strlen("://"); - - domain_len = check_domain( - data + link_end, - size - link_end, - flags & SD_AUTOLINK_SHORT_DOMAINS); - - if (domain_len == 0) - return 0; - - link_end += domain_len; - while (link_end < size && !isspace(data[link_end])) - link_end++; - - link_end = autolink_delim(data, link_end, max_rewind, size); - - if (link_end == 0) - return 0; - - bufput(link, data - rewind, link_end + rewind); - *rewind_p = rewind; - - return link_end; -} - -size_t -sd_autolink__subreddit( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t max_lookbehind, - size_t size, - int *no_slash - ) -{ - /** - * This is meant to handle both r/foo and /r/foo style subreddit references. - * In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'. - * In pseudo-regex, this matches something like: - * - * `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?` - * where %subreddit% == `((t:)?\w{2,24}|reddit\.com)` - */ - size_t link_end; - size_t rewind; - int is_allminus = 0; - - rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r'); - if (!rewind) - return 0; - - /* offset to the "meat" of the link */ - link_end = strlen("/"); - - if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0) - is_allminus = 1; - - do { - size_t start = link_end; - int max_length = 24; - - /* special case: /r/reddit.com (only subreddit containing '.'). */ - if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) { - link_end += 10; - /* Make sure there are no trailing characters (don't do - * any autolinking for /r/reddit.commission) */ - max_length = 10; - } - - /* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */ - else { - /* support autolinking to timereddits, /r/t:when (1 April 2012) */ - if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 ) - link_end += 2; /* Jump over the 't:' */ - - /* the first character of a subreddit name must be a letter or digit */ - if (!isalnum(data[link_end])) - return 0; - link_end += 1; - } - - /* consume valid characters ([A-Za-z0-9_]) until we run out */ - while (link_end < size && (isalnum(data[link_end]) || - data[link_end] == '_')) - link_end++; - - /* valid subreddit names are between 3 and 21 characters, with - * some subreddits having 2-character names. Don't bother with - * autolinking for anything outside this length range. - * (chksrname function in reddit/.../validator.py) */ - if ( link_end-start < 2 || link_end-start > max_length ) - return 0; - - /* If we are linking to a multireddit, continue */ - } while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ ); - - if (link_end < size && data[link_end] == '/') { - while (link_end < size && (isalnum(data[link_end]) || - data[link_end] == '_' || - data[link_end] == '/' || - data[link_end] == '-')) - link_end++; - } - - /* make the link */ - bufput(link, data - rewind, link_end + rewind); - - *no_slash = (rewind == 1); - *rewind_p = rewind; - - return link_end; -} - -size_t -sd_autolink__username( - size_t *rewind_p, - struct buf *link, - uint8_t *data, - size_t max_rewind, - size_t max_lookbehind, - size_t size, - int *no_slash - ) -{ - size_t link_end; - size_t rewind; - - if (size < 3) - return 0; - - rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u'); - if (!rewind) - return 0; - - link_end = strlen("/"); - - /* the first letter of a username must... well, be valid, we don't care otherwise */ - if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-') - return 0; - link_end += 1; - - /* consume valid characters ([A-Za-z0-9_-/]) until we run out */ - while (link_end < size && (isalnum(data[link_end]) || - data[link_end] == '_' || - data[link_end] == '/' || - data[link_end] == '-')) - link_end++; - - /* make the link */ - bufput(link, data - rewind, link_end + rewind); - - *no_slash = (rewind == 1); - *rewind_p = rewind; - - return link_end; -} diff --git a/SnudownTest/src/autolink.h b/SnudownTest/src/autolink.h deleted file mode 100644 index 55b7aaa..0000000 --- a/SnudownTest/src/autolink.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UPSKIRT_AUTOLINK_H -#define UPSKIRT_AUTOLINK_H - -#include "buffer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - SD_AUTOLINK_SHORT_DOMAINS = (1 << 0), -}; - -int -sd_autolink_issafe(const uint8_t *link, size_t link_len); - -size_t -sd_autolink__www(size_t *rewind_p, struct buf *link, - uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); - -size_t -sd_autolink__email(size_t *rewind_p, struct buf *link, - uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); - -size_t -sd_autolink__url(size_t *rewind_p, struct buf *link, - uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); - -extern size_t -sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data, - size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash); - -extern size_t -sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data, - size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash); - -#ifdef __cplusplus -} -#endif - -#endif - -/* vim: set filetype=c: */ diff --git a/SnudownTest/src/buffer.c b/SnudownTest/src/buffer.c deleted file mode 100644 index ab18948..0000000 --- a/SnudownTest/src/buffer.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (c) 2008, Natacha Porté - * Copyright (c) 2011, Vicent Martí - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb - -#include "buffer.h" - -#include -#include -#include -#include - -/* MSVC compat */ -#if defined(_MSC_VER) -# define _buf_vsnprintf _vsnprintf -#else -# define _buf_vsnprintf vsnprintf -#endif - -int -bufprefix(const struct buf *buf, const char *prefix) -{ - size_t i; - assert(buf && buf->unit); - - for (i = 0; i < buf->size; ++i) { - if (prefix[i] == 0) - return 0; - - if (buf->data[i] != prefix[i]) - return buf->data[i] - prefix[i]; - } - - return 0; -} - -/* bufgrow: increasing the allocated size to the given value */ -int -bufgrow(struct buf *buf, size_t neosz) -{ - size_t neoasz; - void *neodata; - - assert(buf && buf->unit); - - if (neosz > BUFFER_MAX_ALLOC_SIZE) - return BUF_ENOMEM; - - if (buf->asize >= neosz) - return BUF_OK; - - neoasz = buf->asize + buf->unit; - while (neoasz < neosz) - neoasz += buf->unit; - - neodata = realloc(buf->data, neoasz); - if (!neodata) - return BUF_ENOMEM; - - buf->data = neodata; - buf->asize = neoasz; - return BUF_OK; -} - - -/* bufnew: allocation of a new buffer */ -struct buf * -bufnew(size_t unit) -{ - struct buf *ret; - ret = malloc(sizeof (struct buf)); - - if (ret) { - ret->data = 0; - ret->size = ret->asize = 0; - ret->unit = unit; - } - return ret; -} - -/* bufnullterm: NULL-termination of the string array */ -const char * -bufcstr(struct buf *buf) -{ - assert(buf && buf->unit); - - if (buf->size < buf->asize && buf->data[buf->size] == 0) - return (char *)buf->data; - - if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) { - buf->data[buf->size] = 0; - return (char *)buf->data; - } - - return NULL; -} - -/* bufprintf: formatted printing to a buffer */ -void -bufprintf(struct buf *buf, const char *fmt, ...) -{ - va_list ap; - int n; - - assert(buf && buf->unit); - - if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0) - return; - va_start(ap, fmt); - n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); - va_end(ap); - - if (n < 0) { -#ifdef _MSC_VER - va_start(ap, fmt); - n = _vscprintf(fmt, ap); - va_end(ap); -#else - return; -#endif - } - if ((size_t)n >= buf->asize - buf->size) { - if (bufgrow(buf, buf->size + n + 1) < 0) - return; - - va_start(ap, fmt); - n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); - va_end(ap); - } - - if (n < 0) - return; - - buf->size += n; -} - -/* bufput: appends raw data to a buffer */ -void -bufput(struct buf *buf, const void *data, size_t len) -{ - assert(buf && buf->unit); - - if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0) - return; - - memcpy(buf->data + buf->size, data, len); - buf->size += len; -} - -/* bufputs: appends a NUL-terminated string to a buffer */ -void -bufputs(struct buf *buf, const char *str) -{ - bufput(buf, str, strlen(str)); -} - - -/* bufputc: appends a single uint8_t to a buffer */ -void -bufputc(struct buf *buf, int c) -{ - assert(buf && buf->unit); - - if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0) - return; - - buf->data[buf->size] = c; - buf->size += 1; -} - -/* bufrelease: decrease the reference count and free the buffer if needed */ -void -bufrelease(struct buf *buf) -{ - if (!buf) - return; - - free(buf->data); - free(buf); -} - - -/* bufreset: frees internal data of the buffer */ -void -bufreset(struct buf *buf) -{ - if (!buf) - return; - - free(buf->data); - buf->data = NULL; - buf->size = buf->asize = 0; -} - -/* bufslurp: removes a given number of bytes from the head of the array */ -void -bufslurp(struct buf *buf, size_t len) -{ - assert(buf && buf->unit); - - if (len >= buf->size) { - buf->size = 0; - return; - } - - buf->size -= len; - memmove(buf->data, buf->data + len, buf->size); -} - -/* buftrucate: truncates the buffer at `size` */ -int -buftruncate(struct buf *buf, size_t size) -{ - if (buf->size < size || size < 0) { - /* bail out in debug mode so we can figure out why this happened */ - assert(0); - return BUF_EINVALIDIDX; - } - - buf->size = size; - return BUF_OK; -} diff --git a/SnudownTest/src/buffer.h b/SnudownTest/src/buffer.h deleted file mode 100644 index ab98ab6..0000000 --- a/SnudownTest/src/buffer.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2008, Natacha Porté - * Copyright (c) 2011, Vicent Martí - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef BUFFER_H__ -#define BUFFER_H__ - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_MSC_VER) -#define __attribute__(x) -#define inline -#endif - -typedef enum { - BUF_OK = 0, - BUF_ENOMEM = -1, - BUF_EINVALIDIDX = -2, -} buferror_t; - -/* struct buf: character array buffer */ -struct buf { - uint8_t *data; /* actual character data */ - size_t size; /* size of the string */ - size_t asize; /* allocated size (0 = volatile buffer) */ - size_t unit; /* reallocation unit size (0 = read-only buffer) */ -}; - -/* CONST_BUF: global buffer from a string litteral */ -#define BUF_STATIC(string) \ - { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 } - -/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */ -#define BUF_VOLATILE(strname) \ - { (uint8_t *)strname, strlen(strname), 0, 0, 0 } - -/* BUFPUTSL: optimized bufputs of a string litteral */ -#define BUFPUTSL(output, literal) \ - bufput(output, literal, sizeof literal - 1) - -/* bufgrow: increasing the allocated size to the given value */ -int bufgrow(struct buf *, size_t); - -/* bufnew: allocation of a new buffer */ -struct buf *bufnew(size_t) __attribute__ ((malloc)); - -/* bufnullterm: NUL-termination of the string array (making a C-string) */ -const char *bufcstr(struct buf *); - -/* bufprefix: compare the beginning of a buffer with a string */ -int bufprefix(const struct buf *buf, const char *prefix); - -/* bufput: appends raw data to a buffer */ -void bufput(struct buf *, const void *, size_t); - -/* bufputs: appends a NUL-terminated string to a buffer */ -void bufputs(struct buf *, const char *); - -/* bufputc: appends a single char to a buffer */ -void bufputc(struct buf *, int); - -/* bufrelease: decrease the reference count and free the buffer if needed */ -void bufrelease(struct buf *); - -/* bufreset: frees internal data of the buffer */ -void bufreset(struct buf *); - -/* bufslurp: removes a given number of bytes from the head of the array */ -void bufslurp(struct buf *, size_t); - -/* bufprintf: formatted printing to a buffer */ -void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3))); - -/* buftruncate: truncates the buffer at `size` */ -int buftruncate(struct buf *buf, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SnudownTest/src/html_blocks.h b/SnudownTest/src/html_blocks.h deleted file mode 100644 index 09a758f..0000000 --- a/SnudownTest/src/html_blocks.h +++ /dev/null @@ -1,206 +0,0 @@ -/* C code produced by gperf version 3.0.3 */ -/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */ -/* Computed positions: -k'1-2' */ - -#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ - && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ - && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ - && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ - && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ - && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ - && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ - && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ - && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ - && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ - && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ - && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ - && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ - && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ - && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ - && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ - && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ - && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ - && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ - && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ - && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ - && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ - && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) -/* The character set is not based on ISO-646. */ -error "gperf generated tables don't work with this execution character set. Please report a bug to ." -#endif - -/* maximum key range = 37, duplicates = 0 */ - -#ifndef GPERF_DOWNCASE -#define GPERF_DOWNCASE 1 -static unsigned char gperf_downcase[256] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, - 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, - 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255 - }; -#endif - -#ifndef GPERF_CASE_STRNCMP -#define GPERF_CASE_STRNCMP 1 -static int -gperf_case_strncmp (s1, s2, n) - register const char *s1; - register const char *s2; - register unsigned int n; -{ - for (; n > 0;) - { - unsigned char c1 = gperf_downcase[(unsigned char)*s1++]; - unsigned char c2 = gperf_downcase[(unsigned char)*s2++]; - if (c1 != 0 && c1 == c2) - { - n--; - continue; - } - return (int)c1 - (int)c2; - } - return 0; -} -#endif - -#ifdef __GNUC__ -__inline -#else -#ifdef __cplusplus -inline -#endif -#endif -static unsigned int -hash_block_tag (str, len) - register const char *str; - register unsigned int len; -{ - static const unsigned char asso_values[] = - { - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 8, 30, 25, 20, 15, 10, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 0, 38, 0, 38, - 5, 5, 5, 15, 0, 38, 38, 0, 15, 10, - 0, 38, 38, 15, 0, 5, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 0, 38, - 0, 38, 5, 5, 5, 15, 0, 38, 38, 0, - 15, 10, 0, 38, 38, 15, 0, 5, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 38 - }; - register int hval = len; - - switch (hval) - { - default: - hval += asso_values[(unsigned char)str[1]+1]; - /*FALLTHROUGH*/ - case 1: - hval += asso_values[(unsigned char)str[0]]; - break; - } - return hval; -} - -#ifdef __GNUC__ -__inline -#ifdef __GNUC_STDC_INLINE__ -__attribute__ ((__gnu_inline__)) -#endif -#endif -const char * -find_block_tag (str, len) - register const char *str; - register unsigned int len; -{ - enum - { - TOTAL_KEYWORDS = 24, - MIN_WORD_LENGTH = 1, - MAX_WORD_LENGTH = 10, - MIN_HASH_VALUE = 1, - MAX_HASH_VALUE = 37 - }; - - static const char * const wordlist[] = - { - "", - "p", - "dl", - "div", - "math", - "table", - "", - "ul", - "del", - "form", - "blockquote", - "figure", - "ol", - "fieldset", - "", - "h1", - "", - "h6", - "pre", - "", "", - "script", - "h5", - "noscript", - "", - "style", - "iframe", - "h4", - "ins", - "", "", "", - "h3", - "", "", "", "", - "h2" - }; - - if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) - { - register int key = hash_block_tag (str, len); - - if (key <= MAX_HASH_VALUE && key >= 0) - { - register const char *s = wordlist[key]; - - if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') - return s; - } - } - return 0; -} diff --git a/SnudownTest/src/html_entities.gperf b/SnudownTest/src/html_entities.gperf deleted file mode 100644 index 543103d..0000000 --- a/SnudownTest/src/html_entities.gperf +++ /dev/null @@ -1,292 +0,0 @@ -%language=ANSI-C -%define lookup-function-name is_allowed_named_entity -%compare-strncmp -%readonly-tables -%define hash-function-name hash_html_entity -%enum -%includes -%{ -#include - -/* Parsers tend to choke on entities with values greater than this */ -const u_int32_t max_num_entity_val = 0x10ffff; -/* Any numeric entity longer than this is obviously above max_num_entity_val - * used to avoid dealing with overflows. */ -const size_t MAX_NUM_ENTITY_LEN = 7; - -inline int is_valid_numeric_entity(uint32_t entity_val) -{ - /* Some XML parsers will choke on entities with certain - * values (mostly control characters.) - * - * According to lxml these are all problematic: - * - * [xrange(0, 8), - * xrange(11, 12), - * xrange(14, 31), - * xrange(55296, 57343), - * xrange(65534, 65535)] - */ - return (entity_val > 8 - && (entity_val != 11 && entity_val != 12) - && (entity_val < 14 || entity_val > 31) - && (entity_val < 55296 || entity_val > 57343) - && (entity_val != 65534 && entity_val != 65535) - && entity_val <= max_num_entity_val); -} - -%} -%% -Æ -Á - -À -Α -Å -à -Ä -Β -Ç -Χ -‡ -Δ -Ð -É -Ê -È -Ε -Η -Ë -Γ -Í -Î -Ì -Ι -Ï -Κ -Λ -Μ -Ñ -Ν -Œ -Ó -Ô -Ò -Ω -Ο -Ø -Õ -Ö -Φ -Π -″ -Ψ -Ρ -Š -Σ -Þ -Τ -Θ -Ú -Û -Ù -Υ -Ü -Ξ -Ý -Ÿ -Ζ -á -â -´ -æ -à -ℵ -α -& -∧ -∠ -' -å -≈ -ã -ä -„ -β -¦ -• -∩ -ç -¸ -¢ -χ -ˆ -♣ -≅ -© -↵ -∪ -¤ -⇓ -† -↓ -° -δ -♦ -÷ -é -ê -è -∅ -  -  -ε -≡ -η -ð -ë -€ -∃ -ƒ -∀ -½ -¼ -¾ -⁄ -γ -≥ -> -⇔ -↔ -♥ -… -í -î -¡ -ì -ℑ -∞ -∫ -ι -¿ -∈ -ï -κ -⇐ -λ -⟨ -« -← -⌈ -“ -≤ -⌊ -∗ -◊ -‎ -‹ -‘ -< -¯ -— -µ -· -− -μ -∇ -  -– -≠ -∋ -¬ -∉ -⊄ -ñ -ν -ó -ô -œ -ò -‾ -ω -ο -⊕ -∨ -ª -º -ø -õ -⊗ -ö -¶ -∂ -‰ -⊥ -φ -π -ϖ -± -£ -′ -∏ -∝ -ψ -" -⇒ -√ -⟩ -» -→ -⌉ -” -ℜ -® -⌋ -ρ -‏ -› -’ -‚ -š -⋅ -§ -­ -σ -ς -∼ -♠ -⊂ -⊆ -∑ -¹ -² -³ -⊃ -⊇ -ß -τ -∴ -θ -ϑ -  -þ -˜ -× -™ -⇑ -ú -↑ -û -ù -¨ -ϒ -υ -ü -℘ -ξ -ý -¥ -ÿ -ζ -‍ -‌ diff --git a/SnudownTest/src/markdown.c b/SnudownTest/src/markdown.c deleted file mode 100644 index abe4a1d..0000000 --- a/SnudownTest/src/markdown.c +++ /dev/null @@ -1,2661 +0,0 @@ -/* markdown.c - generic markdown parser */ - -/* - * Copyright (c) 2009, Natacha Porté - * Copyright (c) 2011, Vicent Marti - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "markdown.h" -#include "stack.h" - -#include -#include -#include -#include - -#if defined(_WIN32) -#define strncasecmp _strnicmp -#endif - -#define REF_TABLE_SIZE 8 - -#define BUFFER_BLOCK 0 -#define BUFFER_SPAN 1 - -#define MKD_LI_END 8 /* internal list flag */ - -#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n) -#define GPERF_DOWNCASE 1 -#define GPERF_CASE_STRNCMP 1 -#include "html_blocks.h" -#include "html_entities.h" - -/*************** - * LOCAL TYPES * - ***************/ - -/* link_ref: reference to a link */ -struct link_ref { - unsigned int id; - - struct buf *link; - struct buf *title; - - struct link_ref *next; -}; - -/* char_trigger: function pointer to render active chars */ -/* returns the number of chars taken care of */ -/* data is the pointer of the beginning of the span */ -/* offset is the number of valid chars before data */ -struct sd_markdown; -typedef size_t -(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); - -static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); -static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); - -enum markdown_char_t { - MD_CHAR_NONE = 0, - MD_CHAR_EMPHASIS, - MD_CHAR_CODESPAN, - MD_CHAR_LINEBREAK, - MD_CHAR_LINK, - MD_CHAR_LANGLE, - MD_CHAR_ESCAPE, - MD_CHAR_ENTITITY, - MD_CHAR_AUTOLINK_URL, - MD_CHAR_AUTOLINK_EMAIL, - MD_CHAR_AUTOLINK_WWW, - MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME, - MD_CHAR_SUPERSCRIPT, -}; - -static char_trigger markdown_char_ptrs[] = { - NULL, - &char_emphasis, - &char_codespan, - &char_linebreak, - &char_link, - &char_langle_tag, - &char_escape, - &char_entity, - &char_autolink_url, - &char_autolink_email, - &char_autolink_www, - &char_autolink_subreddit_or_username, - &char_superscript, -}; - -/* render • structure containing one particular render */ -struct sd_markdown { - struct sd_callbacks cb; - void *opaque; - - struct link_ref *refs[REF_TABLE_SIZE]; - uint8_t active_char[256]; - struct stack work_bufs[2]; - unsigned int ext_flags; - size_t max_nesting; - size_t max_table_cols; - int in_link_body; -}; - -/*************************** - * HELPER FUNCTIONS * - ***************************/ - -static inline struct buf * -rndr_newbuf(struct sd_markdown *rndr, int type) -{ - static const size_t buf_size[2] = {256, 64}; - struct buf *work = NULL; - struct stack *pool = &rndr->work_bufs[type]; - - if (pool->size < pool->asize && - pool->item[pool->size] != NULL) { - work = pool->item[pool->size++]; - work->size = 0; - } else { - work = bufnew(buf_size[type]); - stack_push(pool, work); - } - - return work; -} - -static inline void -rndr_popbuf(struct sd_markdown *rndr, int type) -{ - rndr->work_bufs[type].size--; -} - -static void -unscape_text(struct buf *ob, struct buf *src) -{ - size_t i = 0, org; - while (i < src->size) { - org = i; - while (i < src->size && src->data[i] != '\\') - i++; - - if (i > org) - bufput(ob, src->data + org, i - org); - - if (i + 1 >= src->size) - break; - - bufputc(ob, src->data[i + 1]); - i += 2; - } -} - -static unsigned int -hash_link_ref(const uint8_t *link_ref, size_t length) -{ - size_t i; - unsigned int hash = 0; - - for (i = 0; i < length; ++i) - hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash; - - return hash; -} - -static struct link_ref * -add_link_ref( - struct link_ref **references, - const uint8_t *name, size_t name_size) -{ - struct link_ref *ref = calloc(1, sizeof(struct link_ref)); - - if (!ref) - return NULL; - - ref->id = hash_link_ref(name, name_size); - ref->next = references[ref->id % REF_TABLE_SIZE]; - - references[ref->id % REF_TABLE_SIZE] = ref; - return ref; -} - -static struct link_ref * -find_link_ref(struct link_ref **references, uint8_t *name, size_t length) -{ - unsigned int hash = hash_link_ref(name, length); - struct link_ref *ref = NULL; - - ref = references[hash % REF_TABLE_SIZE]; - - while (ref != NULL) { - if (ref->id == hash) - return ref; - - ref = ref->next; - } - - return NULL; -} - -static void -free_link_refs(struct link_ref **references) -{ - size_t i; - - for (i = 0; i < REF_TABLE_SIZE; ++i) { - struct link_ref *r = references[i]; - struct link_ref *next; - - while (r) { - next = r->next; - bufrelease(r->link); - bufrelease(r->title); - free(r); - r = next; - } - } -} - -/* - * Check whether a char is a Markdown space. - - * Right now we only consider spaces the actual - * space and a newline: tabs and carriage returns - * are filtered out during the preprocessing phase. - * - * If we wanted to actually be UTF-8 compliant, we - * should instead extract an Unicode codepoint from - * this character and check for space properties. - */ -static inline int -_isspace(int c) -{ - return c == ' ' || c == '\n'; -} - -/**************************** - * INLINE PARSING FUNCTIONS * - ****************************/ - -/* is_mail_autolink • looks for the address part of a mail autolink and '>' */ -/* this is less strict than the original markdown e-mail address matching */ -static size_t -is_mail_autolink(uint8_t *data, size_t size) -{ - size_t i = 0, nb = 0; - - /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ - for (i = 0; i < size; ++i) { - if (isalnum(data[i])) - continue; - - switch (data[i]) { - case '@': - nb++; - - case '-': - case '.': - case '_': - break; - - case '>': - return (nb == 1) ? i + 1 : 0; - - default: - return 0; - } - } - - return 0; -} - -/* tag_length • returns the length of the given tag, or 0 is it's not valid */ -static size_t -tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink) -{ - size_t i, j; - - /* a valid tag can't be shorter than 3 chars */ - if (size < 3) return 0; - - /* begins with a '<' optionally followed by '/', followed by letter or number */ - if (data[0] != '<') return 0; - i = (data[1] == '/') ? 2 : 1; - - if (!isalnum(data[i])) - return 0; - - /* scheme test */ - *autolink = MKDA_NOT_AUTOLINK; - - /* try to find the beginning of an URI */ - while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) - i++; - - if (i > 1 && data[i] == '@') { - if ((j = is_mail_autolink(data + i, size - i)) != 0) { - *autolink = MKDA_EMAIL; - return i + j; - } - } - - if (i > 2 && data[i] == ':') { - *autolink = MKDA_NORMAL; - i++; - } - - /* completing autolink test: no whitespace or ' or " */ - if (i >= size) - *autolink = MKDA_NOT_AUTOLINK; - - else if (*autolink) { - j = i; - - while (i < size) { - if (data[i] == '\\') i += 2; - else if (data[i] == '>' || data[i] == '\'' || - data[i] == '"' || data[i] == ' ' || data[i] == '\n') - break; - else i++; - } - - if (i >= size) return 0; - if (i > j && data[i] == '>') return i + 1; - /* one of the forbidden chars has been found */ - *autolink = MKDA_NOT_AUTOLINK; - } - - /* looking for sometinhg looking like a tag end */ - while (i < size && data[i] != '>') i++; - if (i >= size) return 0; - return i + 1; -} - -/* parse_inline • parses inline markdown elements */ -static void -parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t i = 0, end = 0, last_special = 0; - uint8_t action = 0; - struct buf work = { 0, 0, 0, 0 }; - - if (rndr->work_bufs[BUFFER_SPAN].size + - rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) - return; - - while (i < size) { - /* copying inactive chars into the output */ - while (end < size && (action = rndr->active_char[data[end]]) == 0) { - end++; - } - - if (rndr->cb.normal_text) { - work.data = data + i; - work.size = end - i; - rndr->cb.normal_text(ob, &work, rndr->opaque); - } - else - bufput(ob, data + i, end - i); - - if (end >= size) break; - i = end; - - end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i - last_special, i, size - i); - if (!end) /* no action from the callback */ - end = i + 1; - else { - i += end; - last_special = end = i; - } - } -} - -/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ -static size_t -find_emph_char(uint8_t *data, size_t size, uint8_t c) -{ - size_t i = 1; - - while (i < size) { - while (i < size && data[i] != c && data[i] != '`' && data[i] != '[') - i++; - - if (i == size) - return 0; - - if (data[i] == c) - return i; - - /* not counting escaped chars */ - if (i && data[i - 1] == '\\') { - i++; continue; - } - - if (data[i] == '`') { - size_t span_nb = 0, bt; - size_t tmp_i = 0; - - /* counting the number of opening backticks */ - while (i < size && data[i] == '`') { - i++; span_nb++; - } - - if (i >= size) return 0; - - /* finding the matching closing sequence */ - bt = 0; - while (i < size && bt < span_nb) { - if (!tmp_i && data[i] == c) tmp_i = i; - if (data[i] == '`') bt++; - else bt = 0; - i++; - } - - if (i >= size) return tmp_i; - } - /* skipping a link */ - else if (data[i] == '[') { - size_t tmp_i = 0; - uint8_t cc; - - i++; - while (i < size && data[i] != ']') { - if (!tmp_i && data[i] == c) tmp_i = i; - i++; - } - - i++; - while (i < size && (data[i] == ' ' || data[i] == '\n')) - i++; - - if (i >= size) - return tmp_i; - - switch (data[i]) { - case '[': - cc = ']'; break; - - case '(': - cc = ')'; break; - - default: - if (tmp_i) - return tmp_i; - else - continue; - } - - i++; - while (i < size && data[i] != cc) { - if (!tmp_i && data[i] == c) tmp_i = i; - i++; - } - - if (i >= size) - return tmp_i; - - i++; - } - } - - return 0; -} - -/* parse_emph1 • parsing single emphase */ -/* closed by a symbol not preceded by whitespace and not followed by symbol */ -static size_t -parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) -{ - size_t i = 0, len; - struct buf *work = 0; - int r; - - if (!rndr->cb.emphasis) return 0; - - /* skipping one symbol if coming from emph3 */ - if (size > 1 && data[0] == c && data[1] == c) i = 1; - - while (i < size) { - len = find_emph_char(data + i, size - i, c); - if (!len) return 0; - i += len; - if (i >= size) return 0; - - if (data[i] == c && !_isspace(data[i - 1])) { - if ((rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) && (c == '_')) { - if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1]))) - continue; - } - - work = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(work, rndr, data, i); - r = rndr->cb.emphasis(ob, work, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - return r ? i + 1 : 0; - } - } - - return 0; -} - -/* parse_emph2 • parsing single emphase */ -static size_t -parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) -{ - int (*render_method)(struct buf *ob, const struct buf *text, void *opaque); - size_t i = 0, len; - struct buf *work = 0; - int r; - - render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis; - - if (!render_method) - return 0; - - while (i < size) { - len = find_emph_char(data + i, size - i, c); - if (!len) return 0; - i += len; - - if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) { - work = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(work, rndr, data, i); - r = render_method(ob, work, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - return r ? i + 2 : 0; - } - i++; - } - return 0; -} - -/* parse_emph3 • parsing single emphase */ -/* finds the first closing tag, and delegates to the other emph */ -static size_t -parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) -{ - size_t i = 0, len; - int r; - - while (i < size) { - len = find_emph_char(data + i, size - i, c); - if (!len) return 0; - i += len; - - /* skip whitespace preceded symbols */ - if (data[i] != c || _isspace(data[i - 1])) - continue; - - if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) { - /* triple symbol found */ - struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); - - parse_inline(work, rndr, data, i); - r = rndr->cb.triple_emphasis(ob, work, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - return r ? i + 3 : 0; - - } else if (i + 1 < size && data[i + 1] == c) { - /* double symbol found, handing over to emph1 */ - len = parse_emph1(ob, rndr, data - 2, size + 2, c); - if (!len) return 0; - else return len - 2; - - } else { - /* single symbol found, handing over to emph2 */ - len = parse_emph2(ob, rndr, data - 1, size + 1, c); - if (!len) return 0; - else return len - 1; - } - } - return 0; -} - -/* char_emphasis • single and double emphasis parsing */ -static size_t -char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - uint8_t c = data[0]; - size_t ret; - - if (size > 2 && data[1] != c) { - /* whitespace cannot follow an opening emphasis; - * strikethrough only takes two characters '~~' */ - if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0) - return 0; - - return ret + 1; - } - - if (size > 3 && data[1] == c && data[2] != c) { - if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0) - return 0; - - return ret + 2; - } - - if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { - if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0) - return 0; - - return ret + 3; - } - - return 0; -} - - -/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */ -static size_t -char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - if (max_rewind < 2 || data[-1] != ' ' || data[-2] != ' ') - return 0; - - /* removing the last space from ob and rendering */ - while (ob->size && ob->data[ob->size - 1] == ' ') - ob->size--; - - return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0; -} - - -/* char_codespan • '`' parsing a code span (assuming codespan != 0) */ -static size_t -char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - size_t end, nb = 0, i, f_begin, f_end; - - /* counting the number of backticks in the delimiter */ - while (nb < size && data[nb] == '`') - nb++; - - /* finding the next delimiter */ - i = 0; - for (end = nb; end < size && i < nb; end++) { - if (data[end] == '`') i++; - else i = 0; - } - - if (i < nb && end >= size) - return 0; /* no matching delimiter */ - - /* trimming outside whitespaces */ - f_begin = nb; - while (f_begin < end && data[f_begin] == ' ') - f_begin++; - - f_end = end - nb; - while (f_end > nb && data[f_end-1] == ' ') - f_end--; - - /* real code span */ - if (f_begin < f_end) { - struct buf work = { data + f_begin, f_end - f_begin, 0, 0 }; - if (!rndr->cb.codespan(ob, &work, rndr->opaque)) - end = 0; - } else { - if (!rndr->cb.codespan(ob, 0, rndr->opaque)) - end = 0; - } - - return end; -} - - -/* char_escape • '\\' backslash escape */ -static size_t -char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>/^~"; - struct buf work = { 0, 0, 0, 0 }; - - if (size > 1) { - if (strchr(escape_chars, data[1]) == NULL) - return 0; - - if (rndr->cb.normal_text) { - work.data = data + 1; - work.size = 1; - rndr->cb.normal_text(ob, &work, rndr->opaque); - } - else bufputc(ob, data[1]); - } else if (size == 1) { - bufputc(ob, data[0]); - } - - return 2; -} - -/* char_entity • '&' escaped when it doesn't belong to an entity */ -static size_t -char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - size_t end = 1; - size_t content_start; - size_t content_end; - struct buf work = { 0, 0, 0, 0 }; - int numeric = 0; - int hex = 0; - int entity_base; - uint32_t entity_val; - - if (end < size && data[end] == '#') { - numeric = 1; - end++; - } - - if (end < size && numeric && tolower(data[end]) == 'x') { - hex = 1; - end++; - } - - content_start = end; - - while (end < size) { - const char c = data[end]; - if (hex) { - if (!isxdigit(c)) break; - } else if (numeric) { - if (!isdigit(c)) break; - } else if (!isalnum(c)) { - break; - } - end++; - } - - content_end = end; - - if (end > content_start && end < size && data[end] == ';') - end++; /* well-formed entity */ - else - return 0; /* not an entity */ - - /* way too long to be a valid numeric entity */ - if (numeric && content_end - content_start > MAX_NUM_ENTITY_LEN) - return 0; - - /* Validate the entity's contents */ - if (numeric) { - if (hex) - entity_base = 16; - else - entity_base = 10; - - // This is ok because it'll stop once it hits the ';' - entity_val = strtol((char*)data + content_start, NULL, entity_base); - if (!is_valid_numeric_entity(entity_val)) - return 0; - } else { - if (!is_allowed_named_entity((const char *)data, end)) - return 0; - } - - if (rndr->cb.entity) { - work.data = data; - work.size = end; - rndr->cb.entity(ob, &work, rndr->opaque); - } else { - /* Necessary so we can normalize `>` to `>` */ - bufputc(ob, '&'); - if (numeric) - bufputc(ob, '#'); - if (hex) - bufputc(ob, 'x'); - bufput(ob, data + content_start, end - content_start); - } - - return end; -} - -/* char_langle_tag • '<' when tags or autolinks are allowed */ -static size_t -char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - enum mkd_autolink altype = MKDA_NOT_AUTOLINK; - size_t end = tag_length(data, size, &altype); - struct buf work = { data, end, 0, 0 }; - int ret = 0; - - if (end > 2) { - if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) { - struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN); - work.data = data + 1; - work.size = end - 2; - unscape_text(u_link, &work); - ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - } - else if (rndr->cb.raw_html_tag) - ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque); - } - - if (!ret) return 0; - else return end; -} - -static size_t -char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link, *link_url, *link_text; - size_t link_len, rewind; - - if (!rndr->cb.link || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - if ((link_len = sd_autolink__www(&rewind, link, data, max_rewind, size, 0)) > 0) { - link_url = rndr_newbuf(rndr, BUFFER_SPAN); - BUFPUTSL(link_url, "http://"); - bufput(link_url, link->data, link->size); - - buftruncate(ob, ob->size - rewind); - if (rndr->cb.normal_text) { - link_text = rndr_newbuf(rndr, BUFFER_SPAN); - rndr->cb.normal_text(link_text, link, rndr->opaque); - rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - } else { - rndr->cb.link(ob, link_url, NULL, link, rndr->opaque); - } - rndr_popbuf(rndr, BUFFER_SPAN); - } - - rndr_popbuf(rndr, BUFFER_SPAN); - return link_len; -} - -static size_t -char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link, *link_text, *link_url; - size_t link_len, rewind; - int no_slash; - - if (!rndr->cb.autolink || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - link_len = sd_autolink__subreddit(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash); - if (link_len == 0) - link_len = sd_autolink__username(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash); - - /* Found either a user or subreddit link */ - if (link_len > 0) { - link_url = rndr_newbuf(rndr, BUFFER_SPAN); - if (no_slash) - bufputc(link_url, '/'); - bufput(link_url, link->data, link->size); - - buftruncate(ob, ob->size - rewind); - if (rndr->cb.normal_text) { - link_text = rndr_newbuf(rndr, BUFFER_SPAN); - rndr->cb.normal_text(link_text, link, rndr->opaque); - rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - } else { - rndr->cb.link(ob, link_url, NULL, link, rndr->opaque); - } - rndr_popbuf(rndr, BUFFER_SPAN); - } - rndr_popbuf(rndr, BUFFER_SPAN); - - return link_len; -} - -static size_t -char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link; - size_t link_len, rewind; - - if (!rndr->cb.autolink || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - if ((link_len = sd_autolink__email(&rewind, link, data, max_rewind, size, 0)) > 0) { - buftruncate(ob, ob->size - rewind); - rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque); - } - - rndr_popbuf(rndr, BUFFER_SPAN); - return link_len; -} - -static size_t -char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - struct buf *link; - size_t link_len, rewind; - - if (!rndr->cb.autolink || rndr->in_link_body) - return 0; - - link = rndr_newbuf(rndr, BUFFER_SPAN); - - if ((link_len = sd_autolink__url(&rewind, link, data, max_rewind, size, 0)) > 0) { - buftruncate(ob, ob->size - rewind); - rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque); - } - - rndr_popbuf(rndr, BUFFER_SPAN); - return link_len; -} - -/* char_link • '[': parsing a link or an image */ -static size_t -char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - int is_img = (max_rewind && data[-1] == '!'), level; - size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; - struct buf *content = 0; - struct buf *link = 0; - struct buf *title = 0; - struct buf *u_link = 0; - size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size; - int text_has_nl = 0, ret = 0; - int in_title = 0, qtype = 0; - - /* checking whether the correct renderer exists */ - if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link)) - goto cleanup; - - /* looking for the matching closing bracket */ - for (level = 1; i < size; i++) { - if (data[i] == '\n') - text_has_nl = 1; - - else if (data[i - 1] == '\\') - continue; - - else if (data[i] == '[') - level++; - - else if (data[i] == ']') { - level--; - if (level <= 0) - break; - } - } - - if (i >= size) - goto cleanup; - - txt_e = i; - i++; - - /* skip any amount of whitespace or newline */ - /* (this is much more laxist than original markdown syntax) */ - while (i < size && _isspace(data[i])) - i++; - - /* inline style link */ - if (i < size && data[i] == '(') { - /* skipping initial whitespace */ - i++; - - while (i < size && _isspace(data[i])) - i++; - - link_b = i; - - /* looking for link end: ' " ) */ - while (i < size) { - if (data[i] == '\\') i += 2; - else if (data[i] == ')') break; - else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break; - else i++; - } - - if (i >= size) goto cleanup; - link_e = i; - - /* looking for title end if present */ - if (data[i] == '\'' || data[i] == '"') { - qtype = data[i]; - in_title = 1; - i++; - title_b = i; - - while (i < size) { - if (data[i] == '\\') i += 2; - else if (data[i] == qtype) {in_title = 0; i++;} - else if ((data[i] == ')') && !in_title) break; - else i++; - } - - if (i >= size) goto cleanup; - - /* skipping whitespaces after title */ - title_e = i - 1; - while (title_e > title_b && _isspace(data[title_e])) - title_e--; - - /* checking for closing quote presence */ - if (data[title_e] != '\'' && data[title_e] != '"') { - title_b = title_e = 0; - link_e = i; - } - } - - /* remove whitespace at the end of the link */ - while (link_e > link_b && _isspace(data[link_e - 1])) - link_e--; - - /* remove optional angle brackets around the link */ - if (data[link_b] == '<') link_b++; - if (data[link_e - 1] == '>') link_e--; - - /* building escaped link and title */ - if (link_e > link_b) { - link = rndr_newbuf(rndr, BUFFER_SPAN); - bufput(link, data + link_b, link_e - link_b); - } - - if (title_e > title_b) { - title = rndr_newbuf(rndr, BUFFER_SPAN); - bufput(title, data + title_b, title_e - title_b); - } - - i++; - } - - /* reference style link */ - else if (i < size && data[i] == '[') { - struct buf id = { 0, 0, 0, 0 }; - struct link_ref *lr; - - /* looking for the id */ - i++; - link_b = i; - while (i < size && data[i] != ']') i++; - if (i >= size) goto cleanup; - link_e = i; - - /* finding the link_ref */ - if (link_b == link_e) { - if (text_has_nl) { - struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); - size_t j; - - for (j = 1; j < txt_e; j++) { - if (data[j] != '\n') - bufputc(b, data[j]); - else if (data[j - 1] != ' ') - bufputc(b, ' '); - } - - id.data = b->data; - id.size = b->size; - } else { - id.data = data + 1; - id.size = txt_e - 1; - } - } else { - id.data = data + link_b; - id.size = link_e - link_b; - } - - lr = find_link_ref(rndr->refs, id.data, id.size); - if (!lr) - goto cleanup; - - /* keeping link and title from link_ref */ - link = lr->link; - title = lr->title; - i++; - } - - /* shortcut reference style link */ - else { - struct buf id = { 0, 0, 0, 0 }; - struct link_ref *lr; - - /* crafting the id */ - if (text_has_nl) { - struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); - size_t j; - - for (j = 1; j < txt_e; j++) { - if (data[j] != '\n') - bufputc(b, data[j]); - else if (data[j - 1] != ' ') - bufputc(b, ' '); - } - - id.data = b->data; - id.size = b->size; - } else { - id.data = data + 1; - id.size = txt_e - 1; - } - - /* finding the link_ref */ - lr = find_link_ref(rndr->refs, id.data, id.size); - if (!lr) - goto cleanup; - - /* keeping link and title from link_ref */ - link = lr->link; - title = lr->title; - - /* rewinding the whitespace */ - i = txt_e + 1; - } - - /* building content: img alt is escaped, link content is parsed */ - if (txt_e > 1) { - content = rndr_newbuf(rndr, BUFFER_SPAN); - if (is_img) { - bufput(content, data + 1, txt_e - 1); - } else { - /* disable autolinking when parsing inline the - * content of a link */ - rndr->in_link_body = 1; - parse_inline(content, rndr, data + 1, txt_e - 1); - rndr->in_link_body = 0; - } - } - - if (link) { - u_link = rndr_newbuf(rndr, BUFFER_SPAN); - unscape_text(u_link, link); - } else { - goto cleanup; - } - - /* calling the relevant rendering function */ - if (is_img) { - if (ob->size && ob->data[ob->size - 1] == '!') - ob->size -= 1; - - ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque); - } else { - ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque); - } - - /* cleanup */ -cleanup: - rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size; - return ret ? i : 0; -} - -static size_t -char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) -{ - size_t sup_start, sup_len; - struct buf *sup; - - if (!rndr->cb.superscript) - return 0; - - if (size < 2) - return 0; - - if (data[1] == '(') { - sup_start = sup_len = 2; - - while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\') - sup_len++; - - if (sup_len == size) - return 0; - } else { - sup_start = sup_len = 1; - - while (sup_len < size && !_isspace(data[sup_len])) - sup_len++; - } - - if (sup_len - sup_start == 0) - return (sup_start == 2) ? 3 : 0; - - sup = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(sup, rndr, data + sup_start, sup_len - sup_start); - rndr->cb.superscript(ob, sup, rndr->opaque); - rndr_popbuf(rndr, BUFFER_SPAN); - - return (sup_start == 2) ? sup_len + 1 : sup_len; -} - -/********************************* - * BLOCK-LEVEL PARSING FUNCTIONS * - *********************************/ - -/* is_empty • returns the line length when it is empty, 0 otherwise */ -static size_t -is_empty(uint8_t *data, size_t size) -{ - size_t i; - - for (i = 0; i < size && data[i] != '\n'; i++) - if (data[i] != ' ') - return 0; - - return i + 1; -} - -/* is_hrule • returns whether a line is a horizontal rule */ -static int -is_hrule(uint8_t *data, size_t size) -{ - size_t i = 0, n = 0; - uint8_t c; - - /* skipping initial spaces */ - if (size < 3) return 0; - if (data[0] == ' ') { i++; - if (data[1] == ' ') { i++; - if (data[2] == ' ') { i++; } } } - - /* looking at the hrule uint8_t */ - if (i + 2 >= size - || (data[i] != '*' && data[i] != '-' && data[i] != '_')) - return 0; - c = data[i]; - - /* the whole line must be the char or whitespace */ - while (i < size && data[i] != '\n') { - if (data[i] == c) n++; - else if (data[i] != ' ') - return 0; - - i++; - } - - return n >= 3; -} - -/* check if a line begins with a code fence; return the - * width of the code fence */ -static size_t -prefix_codefence(uint8_t *data, size_t size) -{ - size_t i = 0, n = 0; - uint8_t c; - - /* skipping initial spaces */ - if (size < 3) return 0; - if (data[0] == ' ') { i++; - if (data[1] == ' ') { i++; - if (data[2] == ' ') { i++; } } } - - /* looking at the hrule uint8_t */ - if (i + 2 >= size || !(data[i] == '~' || data[i] == '`')) - return 0; - - c = data[i]; - - /* the whole line must be the uint8_t or whitespace */ - while (i < size && data[i] == c) { - n++; i++; - } - - if (n < 3) - return 0; - - return i; -} - -/* check if a line is a code fence; return its size if it is */ -static size_t -is_codefence(uint8_t *data, size_t size, struct buf *syntax) -{ - size_t i = 0, syn_len = 0; - uint8_t *syn_start; - - i = prefix_codefence(data, size); - if (i == 0) - return 0; - - while (i < size && data[i] == ' ') - i++; - - syn_start = data + i; - - if (i < size && data[i] == '{') { - i++; syn_start++; - - while (i < size && data[i] != '}' && data[i] != '\n') { - syn_len++; i++; - } - - if (i == size || data[i] != '}') - return 0; - - /* strip all whitespace at the beginning and the end - * of the {} block */ - while (syn_len > 0 && _isspace(syn_start[0])) { - syn_start++; syn_len--; - } - - while (syn_len > 0 && _isspace(syn_start[syn_len - 1])) - syn_len--; - - i++; - } else { - while (i < size && !_isspace(data[i])) { - syn_len++; i++; - } - } - - if (syntax) { - syntax->data = syn_start; - syntax->size = syn_len; - } - - while (i < size && data[i] != '\n') { - if (!_isspace(data[i])) - return 0; - - i++; - } - - return i + 1; -} - -/* is_atxheader • returns whether the line is a hash-prefixed header */ -static int -is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - if (data[0] != '#') - return 0; - - if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) { - size_t level = 0; - - while (level < size && level < 6 && data[level] == '#') - level++; - - if (level < size && data[level] != ' ') - return 0; - } - - return 1; -} - -/* is_headerline • returns whether the line is a setext-style hdr underline */ -static int -is_headerline(uint8_t *data, size_t size) -{ - size_t i = 0; - - /* test of level 1 header */ - if (data[i] == '=') { - for (i = 1; i < size && data[i] == '='; i++); - while (i < size && data[i] == ' ') i++; - return (i >= size || data[i] == '\n') ? 1 : 0; } - - /* test of level 2 header */ - if (data[i] == '-') { - for (i = 1; i < size && data[i] == '-'; i++); - while (i < size && data[i] == ' ') i++; - return (i >= size || data[i] == '\n') ? 2 : 0; } - - return 0; -} - -static int -is_next_headerline(uint8_t *data, size_t size) -{ - size_t i = 0; - - while (i < size && data[i] != '\n') - i++; - - if (++i >= size) - return 0; - - return is_headerline(data + i, size - i); -} - -/* prefix_quote • returns blockquote prefix length */ -static size_t -prefix_quote(uint8_t *data, size_t size) -{ - size_t i = 0; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - - if (i < size && data[i] == '>') { - if (i + 1 < size && data[i + 1] == ' ') - return i + 2; - - return i + 1; - } - - return 0; -} - -/* prefix_code • returns prefix length for block code*/ -static size_t -prefix_code(uint8_t *data, size_t size) -{ - if (size > 3 && data[0] == ' ' && data[1] == ' ' - && data[2] == ' ' && data[3] == ' ') return 4; - - return 0; -} - -/* prefix_oli • returns ordered list item prefix */ -static size_t -prefix_oli(uint8_t *data, size_t size) -{ - size_t i = 0; - - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - - if (i >= size || data[i] < '0' || data[i] > '9') - return 0; - - while (i < size && data[i] >= '0' && data[i] <= '9') - i++; - - if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') - return 0; - - if (is_next_headerline(data + i, size - i)) - return 0; - - return i + 2; -} - -/* prefix_uli • returns ordered list item prefix */ -static size_t -prefix_uli(uint8_t *data, size_t size) -{ - size_t i = 0; - - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - if (i < size && data[i] == ' ') i++; - - if (i + 1 >= size || - (data[i] != '*' && data[i] != '+' && data[i] != '-') || - data[i + 1] != ' ') - return 0; - - if (is_next_headerline(data + i, size - i)) - return 0; - - return i + 2; -} - - -/* parse_block • parsing of one block, returning next uint8_t to parse */ -static void parse_block(struct buf *ob, struct sd_markdown *rndr, - uint8_t *data, size_t size); - - -/* parse_blockquote • handles parsing of a blockquote fragment */ -static size_t -parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end = 0, pre, work_size = 0; - uint8_t *work_data = 0; - struct buf *out = 0; - - out = rndr_newbuf(rndr, BUFFER_BLOCK); - beg = 0; - while (beg < size) { - for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); - - pre = prefix_quote(data + beg, end - beg); - - if (pre) - beg += pre; /* skipping prefix */ - - /* empty line followed by non-quote line */ - else if (is_empty(data + beg, end - beg) && - (end >= size || (prefix_quote(data + end, size - end) == 0 && - !is_empty(data + end, size - end)))) - break; - - if (beg < end) { /* copy into the in-place working buffer */ - /* bufput(work, data + beg, end - beg); */ - if (!work_data) - work_data = data + beg; - else if (data + beg != work_data + work_size) - memmove(work_data + work_size, data + beg, end - beg); - work_size += end - beg; - } - beg = end; - } - - parse_block(out, rndr, work_data, work_size); - if (rndr->cb.blockquote) - rndr->cb.blockquote(ob, out, rndr->opaque); - rndr_popbuf(rndr, BUFFER_BLOCK); - return end; -} - -static size_t -parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render); - -/* parse_blockquote • handles parsing of a regular paragraph */ -static size_t -parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t i = 0, end = 0; - int level = 0; - struct buf work = { data, 0, 0, 0 }; - - while (i < size) { - for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; - - if (prefix_quote(data + i, end - i) != 0) { - end = i; - break; - } - - if (is_empty(data + i, size - i)) - break; - - if ((level = is_headerline(data + i, size - i)) != 0) - break; - - if (is_atxheader(rndr, data + i, size - i) || - is_hrule(data + i, size - i) || - prefix_quote(data + i, size - i)) { - end = i; - break; - } - - /* - * Early termination of a paragraph with the same logic - * as Markdown 1.0.0. If this logic is applied, the - * Markdown 1.0.3 test suite won't pass cleanly - * - * :: If the first character in a new line is not a letter, - * let's check to see if there's some kind of block starting - * here - */ - if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) { - if (prefix_oli(data + i, size - i) || - prefix_uli(data + i, size - i)) { - end = i; - break; - } - - /* see if an html block starts here */ - if (data[i] == '<' && rndr->cb.blockhtml && - parse_htmlblock(ob, rndr, data + i, size - i, 0)) { - end = i; - break; - } - - /* see if a code fence starts here */ - if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && - is_codefence(data + i, size - i, NULL) != 0) { - end = i; - break; - } - } - - i = end; - } - - work.size = i; - while (work.size && data[work.size - 1] == '\n') - work.size--; - - if (!level) { - struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); - parse_inline(tmp, rndr, work.data, work.size); - if (rndr->cb.paragraph) - rndr->cb.paragraph(ob, tmp, rndr->opaque); - rndr_popbuf(rndr, BUFFER_BLOCK); - } else { - struct buf *header_work; - - if (work.size) { - size_t beg; - i = work.size; - work.size -= 1; - - while (work.size && data[work.size] != '\n') - work.size -= 1; - - beg = work.size + 1; - while (work.size && data[work.size - 1] == '\n') - work.size -= 1; - - if (work.size > 0) { - struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); - parse_inline(tmp, rndr, work.data, work.size); - - if (rndr->cb.paragraph) - rndr->cb.paragraph(ob, tmp, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_BLOCK); - work.data += beg; - work.size = i - beg; - } - else work.size = i; - } - - header_work = rndr_newbuf(rndr, BUFFER_SPAN); - parse_inline(header_work, rndr, work.data, work.size); - - if (rndr->cb.header) - rndr->cb.header(ob, header_work, (int)level, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); - } - - return end; -} - -/* parse_fencedcode • handles parsing of a block-level code fragment */ -static size_t -parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end; - struct buf *work = 0; - struct buf lang = { 0, 0, 0, 0 }; - - beg = is_codefence(data, size, &lang); - if (beg == 0) return 0; - - work = rndr_newbuf(rndr, BUFFER_BLOCK); - - while (beg < size) { - size_t fence_end; - struct buf fence_trail = { 0, 0, 0, 0 }; - - fence_end = is_codefence(data + beg, size - beg, &fence_trail); - if (fence_end != 0 && fence_trail.size == 0) { - beg += fence_end; - break; - } - - for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); - - if (beg < end) { - /* verbatim copy to the working buffer, - escaping entities */ - if (is_empty(data + beg, end - beg)) - bufputc(work, '\n'); - else bufput(work, data + beg, end - beg); - } - beg = end; - } - - if (work->size && work->data[work->size - 1] != '\n') - bufputc(work, '\n'); - - if (rndr->cb.blockcode) - rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_BLOCK); - return beg; -} - -static size_t -parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end, pre; - struct buf *work = 0; - - work = rndr_newbuf(rndr, BUFFER_BLOCK); - - beg = 0; - while (beg < size) { - for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {}; - pre = prefix_code(data + beg, end - beg); - - if (pre) - beg += pre; /* skipping prefix */ - else if (!is_empty(data + beg, end - beg)) - /* non-empty non-prefixed line breaks the pre */ - break; - - if (beg < end) { - /* verbatim copy to the working buffer, - escaping entities */ - if (is_empty(data + beg, end - beg)) - bufputc(work, '\n'); - else bufput(work, data + beg, end - beg); - } - beg = end; - } - - while (work->size && work->data[work->size - 1] == '\n') - work->size -= 1; - - bufputc(work, '\n'); - - if (rndr->cb.blockcode) - rndr->cb.blockcode(ob, work, NULL, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_BLOCK); - return beg; -} - -/* parse_listitem • parsing of a single list item */ -/* assuming initial prefix is already removed */ -static size_t -parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags) -{ - struct buf *work = 0, *inter = 0; - size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i; - int in_empty = 0, has_inside_empty = 0, in_fence = 0; - - /* keeping track of the first indentation prefix */ - while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') - orgpre++; - - beg = prefix_uli(data, size); - if (!beg) - beg = prefix_oli(data, size); - - if (!beg) - return 0; - - /* skipping to the beginning of the following line */ - end = beg; - while (end < size && data[end - 1] != '\n') - end++; - - /* getting working buffers */ - work = rndr_newbuf(rndr, BUFFER_SPAN); - inter = rndr_newbuf(rndr, BUFFER_SPAN); - - /* putting the first line into the working buffer */ - bufput(work, data + beg, end - beg); - beg = end; - - /* process the following lines */ - while (beg < size) { - size_t has_next_uli = 0, has_next_oli = 0; - - end++; - - while (end < size && data[end - 1] != '\n') - end++; - - /* process an empty line */ - if (is_empty(data + beg, end - beg)) { - in_empty = 1; - beg = end; - continue; - } - - /* calculating the indentation */ - i = 0; - while (i < 4 && beg + i < end && data[beg + i] == ' ') - i++; - - pre = i; - - if (rndr->ext_flags & MKDEXT_FENCED_CODE) { - if (is_codefence(data + beg + i, end - beg - i, NULL) != 0) - in_fence = !in_fence; - } - - /* Only check for new list items if we are **not** inside - * a fenced code block */ - if (!in_fence) { - has_next_uli = prefix_uli(data + beg + i, end - beg - i); - has_next_oli = prefix_oli(data + beg + i, end - beg - i); - } - - /* checking for ul/ol switch */ - if (in_empty && ( - ((*flags & MKD_LIST_ORDERED) && has_next_uli) || - (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){ - *flags |= MKD_LI_END; - break; /* the following item must have same list type */ - } - - /* checking for a new item */ - if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) { - if (in_empty) - has_inside_empty = 1; - - if (pre == orgpre) /* the following item must have */ - break; /* the same indentation */ - - if (!sublist) - sublist = work->size; - } - /* joining only indented stuff after empty lines; - * note that now we only require 1 space of indentation - * to continue a list */ - else if (in_empty && pre == 0) { - *flags |= MKD_LI_END; - break; - } - else if (in_empty) { - bufputc(work, '\n'); - has_inside_empty = 1; - } - - in_empty = 0; - - /* adding the line without prefix into the working buffer */ - bufput(work, data + beg + i, end - beg - i); - beg = end; - } - - /* render of li contents */ - if (has_inside_empty) - *flags |= MKD_LI_BLOCK; - - if (*flags & MKD_LI_BLOCK) { - /* intermediate render of block li */ - if (sublist && sublist < work->size) { - parse_block(inter, rndr, work->data, sublist); - parse_block(inter, rndr, work->data + sublist, work->size - sublist); - } - else - parse_block(inter, rndr, work->data, work->size); - } else { - /* intermediate render of inline li */ - if (sublist && sublist < work->size) { - parse_inline(inter, rndr, work->data, sublist); - parse_block(inter, rndr, work->data + sublist, work->size - sublist); - } - else - parse_inline(inter, rndr, work->data, work->size); - } - - /* render of li itself */ - if (rndr->cb.listitem) - rndr->cb.listitem(ob, inter, *flags, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); - rndr_popbuf(rndr, BUFFER_SPAN); - return beg; -} - - -/* parse_list • parsing ordered or unordered list block */ -static size_t -parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags) -{ - struct buf *work = 0; - size_t i = 0, j; - - work = rndr_newbuf(rndr, BUFFER_BLOCK); - - while (i < size) { - j = parse_listitem(work, rndr, data + i, size - i, &flags); - i += j; - - if (!j || (flags & MKD_LI_END)) - break; - } - - if (rndr->cb.list) - rndr->cb.list(ob, work, flags, rndr->opaque); - rndr_popbuf(rndr, BUFFER_BLOCK); - return i; -} - -/* parse_atxheader • parsing of atx-style headers */ -static size_t -parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t level = 0; - size_t i, end, skip; - - while (level < size && level < 6 && data[level] == '#') - level++; - - for (i = level; i < size && data[i] == ' '; i++); - - for (end = i; end < size && data[end] != '\n'; end++); - skip = end; - - while (end && data[end - 1] == '#') - end--; - - while (end && data[end - 1] == ' ') - end--; - - if (end > i) { - struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); - - parse_inline(work, rndr, data + i, end - i); - - if (rndr->cb.header) - rndr->cb.header(ob, work, (int)level, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); - } - - return skip; -} - - -/* htmlblock_end • checking end of HTML block : [ \t]*\n[ \t*]\n */ -/* returns the length on match, 0 otherwise */ -static size_t -htmlblock_end_tag( - const char *tag, - size_t tag_len, - struct sd_markdown *rndr, - uint8_t *data, - size_t size) -{ - size_t i, w; - - /* checking if tag is a match */ - if (tag_len + 3 >= size || - strncasecmp((char *)data + 2, tag, tag_len) != 0 || - data[tag_len + 2] != '>') - return 0; - - /* checking white lines */ - i = tag_len + 3; - w = 0; - if (i < size && (w = is_empty(data + i, size - i)) == 0) - return 0; /* non-blank after tag */ - i += w; - w = 0; - - if (i < size) - w = is_empty(data + i, size - i); - - return i + w; -} - -static size_t -htmlblock_end(const char *curtag, - struct sd_markdown *rndr, - uint8_t *data, - size_t size, - int start_of_line) -{ - size_t tag_size = strlen(curtag); - size_t i = 1, end_tag; - int block_lines = 0; - - while (i < size) { - i++; - while (i < size && !(data[i - 1] == '<' && data[i] == '/')) { - if (data[i] == '\n') - block_lines++; - - i++; - } - - /* If we are only looking for unindented tags, skip the tag - * if it doesn't follow a newline. - * - * The only exception to this is if the tag is still on the - * initial line; in that case it still counts as a closing - * tag - */ - if (start_of_line && block_lines > 0 && data[i - 2] != '\n') - continue; - - if (i + 2 + tag_size >= size) - break; - - end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1); - if (end_tag) - return i + end_tag - 1; - } - - return 0; -} - - -/* parse_htmlblock • parsing of inline HTML block */ -static size_t -parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render) -{ - size_t i, j = 0, tag_end; - const char *curtag = NULL; - struct buf work = { data, 0, 0, 0 }; - - /* identification of the opening tag */ - if (size < 2 || data[0] != '<') - return 0; - - i = 1; - while (i < size && data[i] != '>' && data[i] != ' ') - i++; - - if (i < size) - curtag = find_block_tag((char *)data + 1, (int)i - 1); - - /* handling of special cases */ - if (!curtag) { - - /* HTML comment, laxist form */ - if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { - i = 5; - - while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) - i++; - - i++; - - if (i < size) - j = is_empty(data + i, size - i); - - if (j) { - work.size = i + j; - if (do_render && rndr->cb.blockhtml) - rndr->cb.blockhtml(ob, &work, rndr->opaque); - return work.size; - } - } - - /* HR, which is the only self-closing block tag considered */ - if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { - i = 3; - while (i < size && data[i] != '>') - i++; - - if (i + 1 < size) { - i++; - j = is_empty(data + i, size - i); - if (j) { - work.size = i + j; - if (do_render && rndr->cb.blockhtml) - rndr->cb.blockhtml(ob, &work, rndr->opaque); - return work.size; - } - } - } - - /* no special case recognised */ - return 0; - } - - /* looking for an unindented matching closing tag */ - /* followed by a blank line */ - tag_end = htmlblock_end(curtag, rndr, data, size, 1); - - /* if not found, trying a second pass looking for indented match */ - /* but not if tag is "ins" or "del" (following original Markdown.pl) */ - if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { - tag_end = htmlblock_end(curtag, rndr, data, size, 0); - } - - if (!tag_end) - return 0; - - /* the end of the block has been found */ - work.size = tag_end; - if (do_render && rndr->cb.blockhtml) - rndr->cb.blockhtml(ob, &work, rndr->opaque); - - return tag_end; -} - -static void -parse_table_row( - struct buf *ob, - struct sd_markdown *rndr, - uint8_t *data, - size_t size, - size_t columns, - int *col_data, - int header_flag) -{ - size_t i = 0, col, cols_left; - struct buf *row_work = 0; - - if (!rndr->cb.table_cell || !rndr->cb.table_row) - return; - - row_work = rndr_newbuf(rndr, BUFFER_SPAN); - - if (i < size && data[i] == '|') - i++; - - for (col = 0; col < columns && i < size; ++col) { - size_t cell_start, cell_end; - struct buf *cell_work; - - cell_work = rndr_newbuf(rndr, BUFFER_SPAN); - - while (i < size && _isspace(data[i])) - i++; - - cell_start = i; - - while (i < size && data[i] != '|') - i++; - - cell_end = i - 1; - - while (cell_end > cell_start && _isspace(data[cell_end])) - cell_end--; - - parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start); - rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque, 0); - - rndr_popbuf(rndr, BUFFER_SPAN); - i++; - } - - cols_left = columns - col; - if (cols_left > 0) { - struct buf empty_cell = { 0, 0, 0, 0 }; - rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque, cols_left); - } - - rndr->cb.table_row(ob, row_work, rndr->opaque); - - rndr_popbuf(rndr, BUFFER_SPAN); -} - -static size_t -parse_table_header( - struct buf *ob, - struct sd_markdown *rndr, - uint8_t *data, - size_t size, - size_t *columns, - int **column_data) -{ - int pipes; - size_t i = 0, col, header_end, under_end; - - pipes = 0; - while (i < size && data[i] != '\n') - if (data[i++] == '|') - pipes++; - - if (i == size || pipes == 0) - return 0; - - header_end = i; - - while (header_end > 0 && _isspace(data[header_end - 1])) - header_end--; - - if (data[0] == '|') - pipes--; - - if (header_end && data[header_end - 1] == '|') - pipes--; - - if (pipes + 1 > rndr->max_table_cols) - return 0; - - *columns = pipes + 1; - *column_data = calloc(*columns, sizeof(int)); - - /* Parse the header underline */ - i++; - if (i < size && data[i] == '|') - i++; - - under_end = i; - while (under_end < size && data[under_end] != '\n') - under_end++; - - for (col = 0; col < *columns && i < under_end; ++col) { - size_t dashes = 0; - - while (i < under_end && data[i] == ' ') - i++; - - if (data[i] == ':') { - i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L; - dashes++; - } - - while (i < under_end && data[i] == '-') { - i++; dashes++; - } - - if (i < under_end && data[i] == ':') { - i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R; - dashes++; - } - - while (i < under_end && data[i] == ' ') - i++; - - if (i < under_end && data[i] != '|') - break; - - if (dashes < 1) - break; - - i++; - } - - if (col < *columns) - return 0; - - parse_table_row( - ob, rndr, data, - header_end, - *columns, - *column_data, - MKD_TABLE_HEADER - ); - - return under_end + 1; -} - -static size_t -parse_table( - struct buf *ob, - struct sd_markdown *rndr, - uint8_t *data, - size_t size) -{ - size_t i; - - struct buf *header_work = 0; - struct buf *body_work = 0; - - size_t columns; - int *col_data = NULL; - - header_work = rndr_newbuf(rndr, BUFFER_SPAN); - body_work = rndr_newbuf(rndr, BUFFER_BLOCK); - - i = parse_table_header(header_work, rndr, data, size, &columns, &col_data); - if (i > 0) { - - while (i < size) { - size_t row_start; - int pipes = 0; - - row_start = i; - - while (i < size && data[i] != '\n') - if (data[i++] == '|') - pipes++; - - if (pipes == 0 || i == size) { - i = row_start; - break; - } - - parse_table_row( - body_work, - rndr, - data + row_start, - i - row_start, - columns, - col_data, 0 - ); - - i++; - } - - if (rndr->cb.table) - rndr->cb.table(ob, header_work, body_work, rndr->opaque); - } - - free(col_data); - rndr_popbuf(rndr, BUFFER_SPAN); - rndr_popbuf(rndr, BUFFER_BLOCK); - return i; -} - -/* parse_block • parsing of one block, returning next uint8_t to parse */ -static void -parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) -{ - size_t beg, end, i; - uint8_t *txt_data; - beg = 0; - - if (rndr->work_bufs[BUFFER_SPAN].size + - rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) - return; - - while (beg < size) { - txt_data = data + beg; - end = size - beg; - - if (is_atxheader(rndr, txt_data, end)) - beg += parse_atxheader(ob, rndr, txt_data, end); - - else if (data[beg] == '<' && rndr->cb.blockhtml && - (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0) - beg += i; - - else if ((i = is_empty(txt_data, end)) != 0) - beg += i; - - else if (is_hrule(txt_data, end)) { - if (rndr->cb.hrule) - rndr->cb.hrule(ob, rndr->opaque); - - while (beg < size && data[beg] != '\n') - beg++; - - beg++; - } - - else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && - (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0) - beg += i; - - else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 && - (i = parse_table(ob, rndr, txt_data, end)) != 0) - beg += i; - - else if (prefix_quote(txt_data, end)) - beg += parse_blockquote(ob, rndr, txt_data, end); - - else if (prefix_code(txt_data, end)) - beg += parse_blockcode(ob, rndr, txt_data, end); - - else if (prefix_uli(txt_data, end)) - beg += parse_list(ob, rndr, txt_data, end, 0); - - else if (prefix_oli(txt_data, end)) - beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED); - - else - beg += parse_paragraph(ob, rndr, txt_data, end); - } -} - - - -/********************* - * REFERENCE PARSING * - *********************/ - -/* is_ref • returns whether a line is a reference or not */ -static int -is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs) -{ -/* int n; */ - size_t i = 0; - size_t id_offset, id_end; - size_t link_offset, link_end; - size_t title_offset, title_end; - size_t line_end; - - /* up to 3 optional leading spaces */ - if (beg + 3 >= end) return 0; - if (data[beg] == ' ') { i = 1; - if (data[beg + 1] == ' ') { i = 2; - if (data[beg + 2] == ' ') { i = 3; - if (data[beg + 3] == ' ') return 0; } } } - i += beg; - - /* id part: anything but a newline between brackets */ - if (data[i] != '[') return 0; - i++; - id_offset = i; - while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') - i++; - if (i >= end || data[i] != ']') return 0; - id_end = i; - - /* spacer: colon (space | tab)* newline? (space | tab)* */ - i++; - if (i >= end || data[i] != ':') return 0; - i++; - while (i < end && data[i] == ' ') i++; - if (i < end && (data[i] == '\n' || data[i] == '\r')) { - i++; - if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } - while (i < end && data[i] == ' ') i++; - if (i >= end) return 0; - - /* link: whitespace-free sequence, optionally between angle brackets */ - if (data[i] == '<') - i++; - - link_offset = i; - - while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r') - i++; - - if (data[i - 1] == '>') link_end = i - 1; - else link_end = i; - - /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ - while (i < end && data[i] == ' ') i++; - if (i < end && data[i] != '\n' && data[i] != '\r' - && data[i] != '\'' && data[i] != '"' && data[i] != '(') - return 0; - line_end = 0; - /* computing end-of-line */ - if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; - if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') - line_end = i + 1; - - /* optional (space|tab)* spacer after a newline */ - if (line_end) { - i = line_end + 1; - while (i < end && data[i] == ' ') i++; } - - /* optional title: any non-newline sequence enclosed in '"() - alone on its line */ - title_offset = title_end = 0; - if (i + 1 < end - && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { - i++; - title_offset = i; - /* looking for EOL */ - while (i < end && data[i] != '\n' && data[i] != '\r') i++; - if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') - title_end = i + 1; - else title_end = i; - /* stepping back */ - i -= 1; - while (i > title_offset && data[i] == ' ') - i -= 1; - if (i > title_offset - && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) { - line_end = title_end; - title_end = i; } } - - if (!line_end || link_end == link_offset) - return 0; /* garbage after the link empty link */ - - /* a valid ref has been found, filling-in return structures */ - if (last) - *last = line_end; - - if (refs) { - struct link_ref *ref; - - ref = add_link_ref(refs, data + id_offset, id_end - id_offset); - if (!ref) - return 0; - - ref->link = bufnew(link_end - link_offset); - bufput(ref->link, data + link_offset, link_end - link_offset); - - if (title_end > title_offset) { - ref->title = bufnew(title_end - title_offset); - bufput(ref->title, data + title_offset, title_end - title_offset); - } - } - - return 1; -} - -static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size) -{ - size_t i = 0, tab = 0; - - while (i < size) { - size_t org = i; - - while (i < size && line[i] != '\t') { - i++; tab++; - } - - if (i > org) - bufput(ob, line + org, i - org); - - if (i >= size) - break; - - do { - bufputc(ob, ' '); tab++; - } while (tab % 4); - - i++; - } -} - -/********************** - * EXPORTED FUNCTIONS * - **********************/ - -struct sd_markdown * -sd_markdown_new( - unsigned int extensions, - size_t max_nesting, - size_t max_table_cols, - const struct sd_callbacks *callbacks, - void *opaque) -{ - struct sd_markdown *md = NULL; - - assert(max_nesting > 0 && max_table_cols > 0 && callbacks); - - md = malloc(sizeof(struct sd_markdown)); - if (!md) - return NULL; - - memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks)); - - stack_init(&md->work_bufs[BUFFER_BLOCK], 4); - stack_init(&md->work_bufs[BUFFER_SPAN], 8); - - memset(md->active_char, 0x0, 256); - - if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) { - md->active_char['*'] = MD_CHAR_EMPHASIS; - md->active_char['_'] = MD_CHAR_EMPHASIS; - if (extensions & MKDEXT_STRIKETHROUGH) - md->active_char['~'] = MD_CHAR_EMPHASIS; - } - - if (md->cb.codespan) - md->active_char['`'] = MD_CHAR_CODESPAN; - - if (md->cb.linebreak) - md->active_char['\n'] = MD_CHAR_LINEBREAK; - - if (md->cb.image || md->cb.link) - md->active_char['['] = MD_CHAR_LINK; - - md->active_char['<'] = MD_CHAR_LANGLE; - md->active_char['\\'] = MD_CHAR_ESCAPE; - md->active_char['&'] = MD_CHAR_ENTITITY; - - if (extensions & MKDEXT_AUTOLINK) { - if (!(extensions & MKDEXT_NO_EMAIL_AUTOLINK)) - md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; - md->active_char[':'] = MD_CHAR_AUTOLINK_URL; - md->active_char['w'] = MD_CHAR_AUTOLINK_WWW; - md->active_char['/'] = MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME; - } - - if (extensions & MKDEXT_SUPERSCRIPT) - md->active_char['^'] = MD_CHAR_SUPERSCRIPT; - - /* Extension data */ - md->ext_flags = extensions; - md->opaque = opaque; - md->max_nesting = max_nesting; - md->max_table_cols = max_table_cols; - md->in_link_body = 0; - - return md; -} - -void -sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md) -{ -#define MARKDOWN_GROW(x) ((x) + ((x) >> 1)) - static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; - - struct buf *text; - size_t beg, end; - - text = bufnew(64); - if (!text) - return; - - /* Preallocate enough space for our buffer to avoid expanding while copying */ - bufgrow(text, doc_size); - - /* reset the references table */ - memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); - - /* first pass: looking for references, copying everything else */ - beg = 0; - - /* Skip a possible UTF-8 BOM, even though the Unicode standard - * discourages having these in UTF-8 documents */ - if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0) - beg += 3; - - while (beg < doc_size) /* iterating over lines */ - if (is_ref(document, beg, doc_size, &end, md->refs)) - beg = end; - else { /* skipping to the next line */ - end = beg; - while (end < doc_size && document[end] != '\n' && document[end] != '\r') - end++; - - /* adding the line body if present */ - if (end > beg) - expand_tabs(text, document + beg, end - beg); - - while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) { - /* add one \n per newline */ - if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n')) - bufputc(text, '\n'); - end++; - } - - beg = end; - } - - /* pre-grow the output buffer to minimize allocations */ - bufgrow(ob, MARKDOWN_GROW(text->size)); - - /* second pass: actual rendering */ - if (md->cb.doc_header) - md->cb.doc_header(ob, md->opaque); - - if (text->size) { - /* adding a final newline if not already present */ - if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r') - bufputc(text, '\n'); - - parse_block(ob, md, text->data, text->size); - } - - if (md->cb.doc_footer) - md->cb.doc_footer(ob, md->opaque); - - /* clean-up */ - bufrelease(text); - free_link_refs(md->refs); - - assert(md->work_bufs[BUFFER_SPAN].size == 0); - assert(md->work_bufs[BUFFER_BLOCK].size == 0); -} - -void -sd_markdown_free(struct sd_markdown *md) -{ - size_t i; - - for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i) - bufrelease(md->work_bufs[BUFFER_SPAN].item[i]); - - for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i) - bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]); - - stack_free(&md->work_bufs[BUFFER_SPAN]); - stack_free(&md->work_bufs[BUFFER_BLOCK]); - - free(md); -} - -void -sd_version(int *ver_major, int *ver_minor, int *ver_revision) -{ - *ver_major = SUNDOWN_VER_MAJOR; - *ver_minor = SUNDOWN_VER_MINOR; - *ver_revision = SUNDOWN_VER_REVISION; -} - -/* vim: set filetype=c: */ diff --git a/SnudownTest/src/markdown.h b/SnudownTest/src/markdown.h deleted file mode 100644 index 00d50dc..0000000 --- a/SnudownTest/src/markdown.h +++ /dev/null @@ -1,140 +0,0 @@ -/* markdown.h - generic markdown parser */ - -/* - * Copyright (c) 2009, Natacha Porté - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UPSKIRT_MARKDOWN_H -#define UPSKIRT_MARKDOWN_H - -#include "buffer.h" -#include "autolink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define SUNDOWN_VERSION "1.16.0" -#define SUNDOWN_VER_MAJOR 1 -#define SUNDOWN_VER_MINOR 16 -#define SUNDOWN_VER_REVISION 0 - -/******************** - * TYPE DEFINITIONS * - ********************/ - -/* mkd_autolink - type of autolink */ -enum mkd_autolink { - MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/ - MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */ - MKDA_EMAIL, /* e-mail link without explit mailto: */ -}; - -enum mkd_tableflags { - MKD_TABLE_ALIGN_L = 1, - MKD_TABLE_ALIGN_R = 2, - MKD_TABLE_ALIGN_CENTER = 3, - MKD_TABLE_ALIGNMASK = 3, - MKD_TABLE_HEADER = 4 -}; - -enum mkd_extensions { - MKDEXT_NO_INTRA_EMPHASIS = (1 << 0), - MKDEXT_TABLES = (1 << 1), - MKDEXT_FENCED_CODE = (1 << 2), - MKDEXT_AUTOLINK = (1 << 3), - MKDEXT_STRIKETHROUGH = (1 << 4), - MKDEXT_SPACE_HEADERS = (1 << 6), - MKDEXT_SUPERSCRIPT = (1 << 7), - MKDEXT_LAX_SPACING = (1 << 8), - MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9), -}; - -/* sd_callbacks - functions for rendering parsed data */ -struct sd_callbacks { - /* block level callbacks - NULL skips the block */ - void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque); - void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque); - void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque); - void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque); - void (*hrule)(struct buf *ob, void *opaque); - void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque); - void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque); - void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque); - void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque); - void (*table_row)(struct buf *ob, const struct buf *text, void *opaque); - void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span); - - - /* span level callbacks - NULL or return 0 prints the span verbatim */ - int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque); - int (*codespan)(struct buf *ob, const struct buf *text, void *opaque); - int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque); - int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque); - int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque); - int (*linebreak)(struct buf *ob, void *opaque); - int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque); - int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque); - int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque); - int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque); - int (*superscript)(struct buf *ob, const struct buf *text, void *opaque); - - /* low level callbacks - NULL copies input directly into the output */ - void (*entity)(struct buf *ob, const struct buf *entity, void *opaque); - void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque); - - /* header and footer */ - void (*doc_header)(struct buf *ob, void *opaque); - void (*doc_footer)(struct buf *ob, void *opaque); -}; - -struct sd_markdown; - -/********* - * FLAGS * - *********/ - -/* list/listitem flags */ -#define MKD_LIST_ORDERED 1 -#define MKD_LI_BLOCK 2 /*
  • containing block data */ - -/********************** - * EXPORTED FUNCTIONS * - **********************/ - -extern struct sd_markdown * -sd_markdown_new( - unsigned int extensions, - size_t max_nesting, - size_t max_table_cols, - const struct sd_callbacks *callbacks, - void *opaque); - -extern void -sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md); - -extern void -sd_markdown_free(struct sd_markdown *md); - -extern void -sd_version(int *major, int *minor, int *revision); - -#ifdef __cplusplus -} -#endif - -#endif - -/* vim: set filetype=c: */ diff --git a/SnudownTest/src/stack.c b/SnudownTest/src/stack.c deleted file mode 100644 index ce069ff..0000000 --- a/SnudownTest/src/stack.c +++ /dev/null @@ -1,81 +0,0 @@ -#include "stack.h" -#include - -int -stack_grow(struct stack *st, size_t new_size) -{ - void **new_st; - - if (st->asize >= new_size) - return 0; - - new_st = realloc(st->item, new_size * sizeof(void *)); - if (new_st == NULL) - return -1; - - memset(new_st + st->asize, 0x0, - (new_size - st->asize) * sizeof(void *)); - - st->item = new_st; - st->asize = new_size; - - if (st->size > new_size) - st->size = new_size; - - return 0; -} - -void -stack_free(struct stack *st) -{ - if (!st) - return; - - free(st->item); - - st->item = NULL; - st->size = 0; - st->asize = 0; -} - -int -stack_init(struct stack *st, size_t initial_size) -{ - st->item = NULL; - st->size = 0; - st->asize = 0; - - if (!initial_size) - initial_size = 8; - - return stack_grow(st, initial_size); -} - -void * -stack_pop(struct stack *st) -{ - if (!st->size) - return NULL; - - return st->item[--st->size]; -} - -int -stack_push(struct stack *st, void *item) -{ - if (stack_grow(st, st->size * 2) < 0) - return -1; - - st->item[st->size++] = item; - return 0; -} - -void * -stack_top(struct stack *st) -{ - if (!st->size) - return NULL; - - return st->item[st->size - 1]; -} - diff --git a/SnudownTest/src/stack.h b/SnudownTest/src/stack.h deleted file mode 100644 index 08ff030..0000000 --- a/SnudownTest/src/stack.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef STACK_H__ -#define STACK_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct stack { - void **item; - size_t size; - size_t asize; -}; - -void stack_free(struct stack *); -int stack_grow(struct stack *, size_t); -int stack_init(struct stack *, size_t); - -int stack_push(struct stack *, void *); - -void *stack_pop(struct stack *); -void *stack_top(struct stack *); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SnudownTest/stack.c b/SnudownTest/stack.c deleted file mode 100644 index ce069ff..0000000 --- a/SnudownTest/stack.c +++ /dev/null @@ -1,81 +0,0 @@ -#include "stack.h" -#include - -int -stack_grow(struct stack *st, size_t new_size) -{ - void **new_st; - - if (st->asize >= new_size) - return 0; - - new_st = realloc(st->item, new_size * sizeof(void *)); - if (new_st == NULL) - return -1; - - memset(new_st + st->asize, 0x0, - (new_size - st->asize) * sizeof(void *)); - - st->item = new_st; - st->asize = new_size; - - if (st->size > new_size) - st->size = new_size; - - return 0; -} - -void -stack_free(struct stack *st) -{ - if (!st) - return; - - free(st->item); - - st->item = NULL; - st->size = 0; - st->asize = 0; -} - -int -stack_init(struct stack *st, size_t initial_size) -{ - st->item = NULL; - st->size = 0; - st->asize = 0; - - if (!initial_size) - initial_size = 8; - - return stack_grow(st, initial_size); -} - -void * -stack_pop(struct stack *st) -{ - if (!st->size) - return NULL; - - return st->item[--st->size]; -} - -int -stack_push(struct stack *st, void *item) -{ - if (stack_grow(st, st->size * 2) < 0) - return -1; - - st->item[st->size++] = item; - return 0; -} - -void * -stack_top(struct stack *st) -{ - if (!st->size) - return NULL; - - return st->item[st->size - 1]; -} - diff --git a/SnudownTest/stack.h b/SnudownTest/stack.h deleted file mode 100644 index 08ff030..0000000 --- a/SnudownTest/stack.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef STACK_H__ -#define STACK_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct stack { - void **item; - size_t size; - size_t asize; -}; - -void stack_free(struct stack *); -int stack_grow(struct stack *, size_t); -int stack_init(struct stack *, size_t); - -int stack_push(struct stack *, void *); - -void *stack_pop(struct stack *); -void *stack_top(struct stack *); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/SnudownTest/stdint.h b/SnudownTest/stdint.h deleted file mode 100644 index 6423fc8..0000000 --- a/SnudownTest/stdint.h +++ /dev/null @@ -1,199 +0,0 @@ -/* stdint.h standard header */ -#pragma once -#ifndef _STDINT -#define _STDINT -#ifndef RC_INVOKED -#include - -/* NB: assumes - byte has 8 bits - long is 32 bits - pointer can convert to and from long long - long long is longest type - */ - -_C_STD_BEGIN - /* TYPE DEFINITIONS */ -typedef signed char int8_t; -typedef short int16_t; -typedef int int32_t; - -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; - -typedef signed char int_least8_t; -typedef short int_least16_t; -typedef int int_least32_t; - -typedef unsigned char uint_least8_t; -typedef unsigned short uint_least16_t; -typedef unsigned int uint_least32_t; - -typedef char int_fast8_t; -typedef int int_fast16_t; -typedef int int_fast32_t; - -typedef unsigned char uint_fast8_t; -typedef unsigned int uint_fast16_t; -typedef unsigned int uint_fast32_t; - -#ifndef _INTPTR_T_DEFINED - #define _INTPTR_T_DEFINED - #ifdef _WIN64 -typedef __int64 intptr_t; - #else /* _WIN64 */ -typedef _W64 int intptr_t; - #endif /* _WIN64 */ -#endif /* _INTPTR_T_DEFINED */ - -#ifndef _UINTPTR_T_DEFINED - #define _UINTPTR_T_DEFINED - #ifdef _WIN64 -typedef unsigned __int64 uintptr_t; - #else /* _WIN64 */ -typedef _W64 unsigned int uintptr_t; - #endif /* _WIN64 */ -#endif /* _UINTPTR_T_DEFINED */ - -typedef _Longlong int64_t; -typedef _ULonglong uint64_t; - -typedef _Longlong int_least64_t; -typedef _ULonglong uint_least64_t; - -typedef _Longlong int_fast64_t; -typedef _ULonglong uint_fast64_t; - -typedef _Longlong intmax_t; -typedef _ULonglong uintmax_t; - - /* LIMIT MACROS */ -#define INT8_MIN (-0x7f - _C2) -#define INT16_MIN (-0x7fff - _C2) -#define INT32_MIN (-0x7fffffff - _C2) - -#define INT8_MAX 0x7f -#define INT16_MAX 0x7fff -#define INT32_MAX 0x7fffffff -#define UINT8_MAX 0xff -#define UINT16_MAX 0xffff -#define UINT32_MAX 0xffffffff - -#define INT_LEAST8_MIN (-0x7f - _C2) -#define INT_LEAST16_MIN (-0x7fff - _C2) -#define INT_LEAST32_MIN (-0x7fffffff - _C2) - -#define INT_LEAST8_MAX 0x7f -#define INT_LEAST16_MAX 0x7fff -#define INT_LEAST32_MAX 0x7fffffff -#define UINT_LEAST8_MAX 0xff -#define UINT_LEAST16_MAX 0xffff -#define UINT_LEAST32_MAX 0xffffffff - -#define INT_FAST8_MIN (-0x7f - _C2) -#define INT_FAST16_MIN (-0x7fff - _C2) -#define INT_FAST32_MIN (-0x7fffffff - _C2) - -#define INT_FAST8_MAX 0x7f -#define INT_FAST16_MAX 0x7fff -#define INT_FAST32_MAX 0x7fffffff -#define UINT_FAST8_MAX 0xff -#define UINT_FAST16_MAX 0xffff -#define UINT_FAST32_MAX 0xffffffff - - #if _INTPTR == 0 || _INTPTR == 1 -#define INTPTR_MAX 0x7fffffff -#define INTPTR_MIN (-INTPTR_MAX - _C2) -#define UINTPTR_MAX 0xffffffff - - #else /* _INTPTR == 2 */ -#define INTPTR_MIN (-_LLONG_MAX - _C2) -#define INTPTR_MAX _LLONG_MAX -#define UINTPTR_MAX _ULLONG_MAX -#endif /* _INTPTR */ - -#define INT8_C(x) (x) -#define INT16_C(x) (x) -#define INT32_C(x) ((x) + (INT32_MAX - INT32_MAX)) - -#define UINT8_C(x) (x) -#define UINT16_C(x) (x) -#define UINT32_C(x) ((x) + (UINT32_MAX - UINT32_MAX)) - -#ifdef _WIN64 - #define PTRDIFF_MIN INT64_MIN - #define PTRDIFF_MAX INT64_MAX -#else /* _WIN64 */ - #define PTRDIFF_MIN INT32_MIN - #define PTRDIFF_MAX INT32_MAX -#endif /* _WIN64 */ - -#define SIG_ATOMIC_MIN INT32_MIN -#define SIG_ATOMIC_MAX INT32_MAX - -#ifndef SIZE_MAX - #ifdef _WIN64 - #define SIZE_MAX UINT64_MAX - #else /* _WIN64 */ - #define SIZE_MAX UINT32_MAX - #endif /* _WIN64 */ -#endif /* SIZE_MAX */ - -#define WCHAR_MIN 0x0000 -#define WCHAR_MAX 0xffff - -#define WINT_MIN 0x0000 -#define WINT_MAX 0xffff - - #define INT64_MIN (-0x7fffffffffffffff - _C2) - #define INT64_MAX 0x7fffffffffffffff - #define UINT64_MAX 0xffffffffffffffffU - - #define INT_LEAST64_MIN (-0x7fffffffffffffff - _C2) - #define INT_LEAST64_MAX 0x7fffffffffffffff - #define UINT_LEAST64_MAX 0xffffffffffffffffU - - #define INT_FAST64_MIN (-0x7fffffffffffffff - _C2) - #define INT_FAST64_MAX 0x7fffffffffffffff - #define UINT_FAST64_MAX 0xffffffffffffffffU - - #define INTMAX_MIN (-0x7fffffffffffffff - _C2) - #define INTMAX_MAX 0x7fffffffffffffff - #define UINTMAX_MAX 0xffffffffffffffffU - -#define INT64_C(x) ((x) + (INT64_MAX - INT64_MAX)) -#define UINT64_C(x) ((x) + (UINT64_MAX - UINT64_MAX)) -#define INTMAX_C(x) INT64_C(x) -#define UINTMAX_C(x) UINT64_C(x) -_C_STD_END -#endif /* RC_INVOKED */ -#endif /* _STDINT */ - - #if defined(_STD_USING) -using _CSTD int8_t; using _CSTD int16_t; -using _CSTD int32_t; using _CSTD int64_t; - -using _CSTD uint8_t; using _CSTD uint16_t; -using _CSTD uint32_t; using _CSTD uint64_t; - -using _CSTD int_least8_t; using _CSTD int_least16_t; -using _CSTD int_least32_t; using _CSTD int_least64_t; -using _CSTD uint_least8_t; using _CSTD uint_least16_t; -using _CSTD uint_least32_t; using _CSTD uint_least64_t; - -using _CSTD intmax_t; using _CSTD uintmax_t; - -using _CSTD uintptr_t; -using _CSTD intptr_t; - -using _CSTD int_fast8_t; using _CSTD int_fast16_t; -using _CSTD int_fast32_t; using _CSTD int_fast64_t; -using _CSTD uint_fast8_t; using _CSTD uint_fast16_t; -using _CSTD uint_fast32_t; using _CSTD uint_fast64_t; - #endif /* defined(_STD_USING) */ - -/* - * Copyright (c) 1992-2009 by P.J. Plauger. ALL RIGHTS RESERVED. - * Consult your license regarding permissions and restrictions. -V5.20:0009 */ diff --git a/SnudownTest/sundown.def b/SnudownTest/sundown.def deleted file mode 100644 index 7cd41bb..0000000 --- a/SnudownTest/sundown.def +++ /dev/null @@ -1,20 +0,0 @@ -LIBRARY SUNDOWN -EXPORTS - sdhtml_renderer - sdhtml_toc_renderer - sdhtml_smartypants - bufgrow - bufnew - bufcstr - bufprefix - bufput - bufputs - bufputc - bufrelease - bufreset - bufslurp - bufprintf - sd_markdown_new - sd_markdown_render - sd_markdown_free - sd_version \ No newline at end of file diff --git a/SnudownTest/test_snudown.py b/SnudownTest/test_snudown.py deleted file mode 100644 index fa9568f..0000000 --- a/SnudownTest/test_snudown.py +++ /dev/null @@ -1,461 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import snudown -import unittest -import itertools -import cStringIO as StringIO - - -cases = { - '': '', - 'http://www.reddit.com': - '

    http://www.reddit.com

    \n', - - 'http://www.reddit.com/a\x00b': - '

    http://www.reddit.com/ab

    \n', - - 'foo@example.com': - '

    foo@example.com

    \n', - - '[foo](http://en.wikipedia.org/wiki/Link_(film\))': - '

    foo

    \n', - - '(http://tsfr.org)': - '

    (http://tsfr.org)

    \n', - - '[A link with a /r/subreddit in it](/lol)': - '

    A link with a /r/subreddit in it

    \n', - - '[A link with a http://www.url.com in it](/lol)': - '

    A link with a http://www.url.com in it

    \n', - - '[Empty Link]()': - '

    [Empty Link]()

    \n', - - 'http://en.wikipedia.org/wiki/café_racer': - '

    http://en.wikipedia.org/wiki/café_racer

    \n', - - '#####################################################hi': - '
    ###############################################hi
    \n', - - '[foo](http://bar\nbar)': - '

    foo

    \n', - - '/r/test': - '

    /r/test

    \n', - - 'Words words /r/test words': - '

    Words words /r/test words

    \n', - - '/r/': - '

    /r/

    \n', - - r'escaped \/r/test': - '

    escaped /r/test

    \n', - - 'ampersands http://www.google.com?test&blah': - '

    ampersands http://www.google.com?test&blah

    \n', - - '[_regular_ link with nesting](/test)': - '

    regular link with nesting

    \n', - - ' www.a.co?with&test': - '

    www.a.co?with&test

    \n', - - r'Normal^superscript': - '

    Normalsuperscript

    \n', - - r'Escape\^superscript': - '

    Escape^superscript

    \n', - - r'~~normal strikethrough~~': - '

    normal strikethrough

    \n', - - r'\~~escaped strikethrough~~': - '

    ~~escaped strikethrough~~

    \n', - - 'anywhere\x03, you': - '

    anywhere, you

    \n', - - '[Test](//test)': - '

    Test

    \n', - - '[Test](//#test)': - '

    Test

    \n', - - '[Test](#test)': - '

    Test

    \n', - - '[Test](git://github.com)': - '

    Test

    \n', - - '[Speculation](//?)': - '

    Speculation

    \n', - - '/r/sr_with_underscores': - '

    /r/sr_with_underscores

    \n', - - '[Test](///#test)': - '

    Test

    \n', - - '/r/multireddit+test+yay': - '

    /r/multireddit+test+yay

    \n', - - '': - '

    <test>

    \n', - - 'words_with_underscores': - '

    words_with_underscores

    \n', - - 'words*with*asterisks': - '

    wordswithasterisks

    \n', - - '~test': - '

    ~test

    \n', - - '/u/test': - '

    /u/test

    \n', - - '/u/test/m/test test': - '

    /u/test/m/test test

    \n', - - '/U/nope': - '

    /U/nope

    \n', - - '/r/test/m/test test': - '

    /r/test/m/test test

    \n', - - '/r/test/w/test test': - '

    /r/test/w/test test

    \n', - - '/r/test/comments/test test': - '

    /r/test/comments/test test

    \n', - - '/u/test/commentscommentscommentscommentscommentscommentscomments/test test': - '

    /u/test/commentscommentscommentscommentscommentscommentscomments/test test

    \n', - - 'a /u/reddit': - '

    a /u/reddit

    \n', - - 'u/reddit': - '

    u/reddit

    \n', - - 'a u/reddit': - '

    a u/reddit

    \n', - - 'a u/reddit/foobaz': - '

    a u/reddit/foobaz

    \n', - - 'foo:u/reddit': - '

    foo:u/reddit

    \n', - - 'fuu/reddit': - '

    fuu/reddit

    \n', - - # Don't treat unicode punctuation as a word boundary for now - u'a。u/reddit'.encode('utf8'): - u'

    a。u/reddit

    \n'.encode('utf8'), - - '\\/u/me': - '

    /u/me

    \n', - - '\\\\/u/me': - '

    \\/u/me

    \n', - - '\\u/me': - '

    \\u/me

    \n', - - '\\\\u/me': - '

    \\u/me

    \n', - - 'u\\/me': - '

    u/me

    \n', - - '*u/me*': - '

    u/me

    \n', - - 'foo^u/me': - '

    foou/me

    \n', - - '*foo*u/me': - '

    foou/me

    \n', - - 'u/me': - '

    u/me

    \n', - - '/u/me': - '

    /u/me

    \n', - - 'u/m': - '

    u/m

    \n', - - '/u/m': - '

    /u/m

    \n', - - '/f/oobar': - '

    /f/oobar

    \n', - - 'f/oobar': - '

    f/oobar

    \n', - - '/r/test/commentscommentscommentscommentscommentscommentscomments/test test': - '

    /r/test/commentscommentscommentscommentscommentscommentscomments/test test

    \n', - - 'blah \\': - '

    blah \\

    \n', - - '/r/whatever: fork': - '

    /r/whatever: fork

    \n', - - '/r/t:timereddit': - '

    /r/t:timereddit

    \n', - - '/r/reddit.com': - '

    /r/reddit.com

    \n', - - '/r/not.cool': - '

    /r/not.cool

    \n', - - '/r/very+clever+multireddit+reddit.com+t:fork+yay': - '

    /r/very+clever+multireddit+reddit.com+t:fork+yay

    \n', - - '/r/t:heatdeathoftheuniverse': - '

    /r/t:heatdeathoftheuniverse

    \n', - - '/r/all-minus-something': - '

    /r/all-minus-something

    \n', - - '/r/notall-minus': - '

    /r/notall-minus

    \n', - - 'a /r/reddit.com': - '

    a /r/reddit.com

    \n', - - 'a r/reddit.com': - '

    a r/reddit.com

    \n', - - 'foo:r/reddit.com': - '

    foo:r/reddit.com

    \n', - - 'foobar/reddit.com': - '

    foobar/reddit.com

    \n', - - u'a。r/reddit.com'.encode('utf8'): - u'

    a。r/reddit.com

    \n'.encode('utf8'), - - '/R/reddit.com': - '

    /R/reddit.com

    \n', - - '/r/irc://foo.bar/': - '

    /r/irc://foo.bar/

    \n', - - '/r/t:irc//foo.bar/': - '

    /r/t:irc//foo.bar/

    \n', - - '/r/all-irc://foo.bar/': - '

    /r/all-irc://foo.bar/

    \n', - - '/r/foo+irc://foo.bar/': - '

    /r/foo+irc://foo.bar/

    \n', - - '/r/www.example.com': - '

    /r/www.example.com

    \n', - - '.http://reddit.com': - '

    .http://reddit.com

    \n', - - '[r://](/aa)': - '

    r://http://reddit.com/

    \n', - - '/u/http://www.reddit.com/user/reddit': - '

    /u/http://www.reddit.com/user/reddit

    \n', - - 'www.http://example.com/': - '

    www.http://example.com/

    \n', - - ('|' * 5) + '\n' + ('-|' * 5) + '\n|\n': - '\n\n' + ('\n' * 4) + '\n\n\n\n\n
    \n', - - ('|' * 2) + '\n' + ('-|' * 2) + '\n|\n': - '\n\n' + ('\n' * 1) + '\n\n\n\n\n
    \n', - - ('|' * 65) + '\n' + ('-|' * 65) + '\n|\n': - '\n\n' + ('\n' * 64) + '\n\n\n\n\n
    \n', - - ('|' * 66) + '\n' + ('-|' * 66) + '\n|\n': - '

    ' + ('|' * 66) + '\n' + ('-|' * 66) + '\n|' + '

    \n', - - 'ϑ': - '

    ϑ

    \n', - - '&foobar;': - '

    &foobar;

    \n', - - ' ': - '

    &nbsp

    \n', - - '&#foobar;': - '

    &#foobar;

    \n', - - 'oobar;': - '

    &#xfoobar;

    \n', - - '�': - '

    &#9999999999;

    \n', - - 'c': - '

    c

    \n', - - '~': - '

    ~

    \n', - - '~': - '

    ~

    \n', - - '½': - '

    ½

    \n', - - 'aaa½aaa': - '

    aaa½aaa

    \n', - - '&': - '

    &

    \n', - - '&;': - '

    &;

    \n', - - '&#;': - '

    &#;

    \n', - - '&#;': - '

    &#;

    \n', - - '&#x;': - '

    &#x;

    \n', -} - -# Test that every numeric entity is encoded as -# it should be. -ILLEGAL_NUMERIC_ENTS = frozenset(itertools.chain( - xrange(0, 9), - xrange(11, 13), - xrange(14, 32), - xrange(55296, 57344), - xrange(65534, 65536), -)) - -ent_test_key = '' -ent_test_val = '' -for i in xrange(65550): - ent_testcase = '&#%d;&#x%x;' % (i, i) - ent_test_key += ent_testcase - if i in ILLEGAL_NUMERIC_ENTS: - ent_test_val += ent_testcase.replace('&', '&') - else: - ent_test_val += ent_testcase - -cases[ent_test_key] = '

    %s

    \n' % ent_test_val - -wiki_cases = { - '': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', - - '
    ': - '

    \n', -} - -class SnudownTestCase(unittest.TestCase): - def __init__(self, renderer=snudown.RENDERER_USERTEXT): - self.renderer = renderer - unittest.TestCase.__init__(self) - - def runTest(self): - output = snudown.markdown(self.input, renderer=self.renderer) - - for i, (a, b) in enumerate(zip(repr(self.expected_output), - repr(output))): - if a != b: - io = StringIO.StringIO() - print >> io, "TEST FAILED:" - print >> io, " input: %s" % repr(self.input) - print >> io, " expected: %s" % repr(self.expected_output) - print >> io, " actual: %s" % repr(output) - print >> io, " %s" % (' ' * i + '^') - self.fail(io.getvalue()) - - - -def test_snudown(): - suite = unittest.TestSuite() - - for input, expected_output in wiki_cases.iteritems(): - case = SnudownTestCase(renderer=snudown.RENDERER_WIKI) - case.input = input - case.expected_output = expected_output - suite.addTest(case) - - for input, expected_output in cases.iteritems(): - case = SnudownTestCase() - case.input = input - case.expected_output = expected_output - suite.addTest(case) - - return suite diff --git a/SpinalTap/README.md b/SpinalTap/README.md index 7648f9a..c4b8589 100644 --- a/SpinalTap/README.md +++ b/SpinalTap/README.md @@ -3,18 +3,21 @@ Spinal A couple of tools for copying files and directories. - 2016 03 02 +- 2016 11 27 + - Renamed the `copy_file` parameter `callback` to `callback_progress` for clarity. + +- 2016 03 02 - Fixed issue where the copy's path casing was based on the input string and not the path's actual casing (since Windows doesn't care). - Change the returned written_bytes to 0 if the file did not need to be copied. This is better for tracking how much actually happens during each backup. - Fixed encode errors caused by callback_v1's print statement. - 2016 03 03 +- 2016 03 03 - Moved directory / filename exclusion logic into the walk_generator so the caller doesn't need to worry about it. - walk_generator now yields absolute filenames since copy_dir no longer needs to process exclusions, and that was the only reason walk_generator used to yield them in parts. - 2016 03 04 +- 2016 03 04 - Created a FilePath class to cache os.stat data, which should reduce the number of unecessary filesystem calls. - 2016 03 18 +- 2016 03 18 - Added `glob.escape` to `get_path_casing`. - Added callbacks for some extra debug output. \ No newline at end of file diff --git a/SpinalTap/spinal.py b/SpinalTap/spinal.py index 8b95fad..1dbb329 100644 --- a/SpinalTap/spinal.py +++ b/SpinalTap/spinal.py @@ -1,33 +1,20 @@ import collections -import glob import hashlib -import json import logging import os import shutil -import stat -import string import sys -import time -try: - sys.path.append('C:\\git\\else\\Bytestring') - sys.path.append('C:\\git\\else\\Pathclass') - sys.path.append('C:\\git\\else\\Ratelimiter') - import bytestring - import pathclass - import ratelimiter -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from voussoirkit import bytestring - from voussoirkit import pathclass - from voussoirkit import ratelimiter +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from voussoirkit import bytestring +from voussoirkit import pathclass +from voussoirkit import ratelimiter logging.basicConfig(level=logging.CRITICAL) log = logging.getLogger(__name__) -CHUNK_SIZE = 128 * bytestring.KIBIBYTE +CHUNK_SIZE = 256 * bytestring.KIBIBYTE # Number of bytes to read and write at a time HASH_CLASS = hashlib.md5 @@ -53,7 +40,7 @@ class SpinalError(Exception): class ValidationError(Exception): pass -def callback_exclusion(name, path_type): +def callback_exclusion_v1(name, path_type): ''' Example of an exclusion callback function. ''' @@ -98,12 +85,12 @@ def copy(source, file_args=None, file_kwargs=None, dir_args=None, dir_kwargs=Non def copy_dir( source, destination=None, - destination_new_root=None, bytes_per_second=None, callback_directory=None, callback_exclusion=None, callback_file=None, callback_permission_denied=None, + destination_new_root=None, dry_run=False, exclude_directories=None, exclude_filenames=None, @@ -123,13 +110,6 @@ def copy_dir( The directory in which copied files are placed. Alternatively, use destination_new_root. - destination_new_root: - Determine the destination path by calling - `new_root(source, destination_new_root)`. - Thus, this path acts as a root and the rest of the path is matched. - - `destination` and `destination_new_root` are mutually exclusive. - bytes_per_second: Restrict file copying to this many bytes per second. Can be an integer or an existing Ratelimiter object. @@ -139,8 +119,8 @@ def copy_dir( callback_directory: This function will be called after each file copy with three parameters: - name of file copied, number of bytes written to destination so far, - total bytes needed (from precalcsize). + name of file copied, number of bytes written to destination directory + so far, total bytes needed (based on precalcsize). If `precalcsize` is False, this function will receive written bytes for both written and total, showing 100% always. @@ -163,6 +143,13 @@ def copy_dir( Default = None + destination_new_root: + Determine the destination path by calling + `new_root(source, destination_new_root)`. + Thus, this path acts as a root and the rest of the path is matched. + + `destination` and `destination_new_root` are mutually exclusive. + dry_run: Do everything except the actual file copying. @@ -179,8 +166,8 @@ def copy_dir( Default = None files_per_second: - Maximum number of files to be processed per second. Helps to keep CPU usage - low. + Maximum number of files to be processed per second. Helps to keep CPU + usage low. Default = None @@ -207,9 +194,9 @@ def copy_dir( ''' # Prepare parameters if not is_xor(destination, destination_new_root): - m = 'One and only one of `destination` and ' - m += '`destination_new_root` can be passed.' - raise ValueError(m) + message = 'One and only one of `destination` and ' + message += '`destination_new_root` can be passed.' + raise ValueError(message) source = str_to_fp(source) @@ -244,7 +231,7 @@ def copy_dir( exclude_directories=exclude_directories, exclude_filenames=exclude_filenames, ) - for (source_abspath) in walker: + for source_abspath in walker: # Terminology: # abspath: C:\folder\subfolder\filename.txt # location: C:\folder\subfolder @@ -267,7 +254,7 @@ def copy_dir( source_abspath, destination_abspath, bytes_per_second=bytes_per_second, - callback=callback_file, + callback_progress=callback_file, callback_permission_denied=callback_permission_denied, dry_run=dry_run, overwrite_old=overwrite_old, @@ -292,7 +279,7 @@ def copy_file( destination=None, destination_new_root=None, bytes_per_second=None, - callback=None, + callback_progress=None, callback_permission_denied=None, callback_validate_hash=None, dry_run=False, @@ -321,14 +308,6 @@ def copy_file( Default = None - callback: - If provided, this function will be called after writing - each CHUNK_SIZE bytes to destination with three parameters: - the Path object being copied, number of bytes written so far, - total number of bytes needed. - - Default = None - callback_permission_denied: If provided, this function will be called when a source file denies read access, with the file path and the exception object as parameters. @@ -338,6 +317,14 @@ def copy_file( Default = None + callback_progress: + If provided, this function will be called after writing + each CHUNK_SIZE bytes to destination with three parameters: + the Path object being copied, number of bytes written so far, + total number of bytes needed. + + Default = None + callback_validate_hash: Passed directly into `verify_hash` @@ -365,9 +352,9 @@ def copy_file( ''' # Prepare parameters if not is_xor(destination, destination_new_root): - m = 'One and only one of `destination` and ' - m += '`destination_new_root` can be passed' - raise ValueError(m) + message = 'One and only one of `destination` and ' + message += '`destination_new_root` can be passed' + raise ValueError(message) source = str_to_fp(source) @@ -379,10 +366,10 @@ def copy_file( destination = new_root(source, destination_new_root) destination = str_to_fp(destination) - callback = callback or do_nothing + callback_progress = callback_progress or do_nothing if destination.is_dir: - raise DestinationIsDirectory(destination) + destination = destination.with_child(source.basename) bytes_per_second = limiter_or_none(bytes_per_second) @@ -397,8 +384,8 @@ def copy_file( # Copy if dry_run: - if callback is not None: - callback(destination, 0, 0) + if callback_progress is not None: + callback_progress(destination, 0, 0) return [destination, 0] source_bytes = source.size @@ -435,7 +422,7 @@ def copy_file( if bytes_per_second is not None: bytes_per_second.limit(data_bytes) - callback(destination, written_bytes, source_bytes) + callback_progress(destination, written_bytes, source_bytes) # Fin log.debug('Closing source handle.') @@ -539,7 +526,7 @@ def verify_hash(path, known_size, known_hash, callback=None): path object, bytes ingested so far, bytes total ''' path = str_to_fp(path) - log.debug('Validating hash for "%s" against %s' % (path.absolute_path, known_hash)) + log.debug('Validating hash for "%s" against %s', path.absolute_path, known_hash) file_size = os.path.getsize(path.absolute_path) if file_size != known_size: raise ValidationError('File size %d != known size %d' % (file_size, known_size)) @@ -565,6 +552,7 @@ def verify_hash(path, known_size, known_hash, callback=None): def walk_generator( path='.', callback_exclusion=None, + callback_permission_denied=None, exclude_directories=None, exclude_filenames=None, recurse=True, @@ -613,6 +601,7 @@ def walk_generator( exclude_filenames = set() callback_exclusion = callback_exclusion or do_nothing + callback_permission_denied = callback_permission_denied or do_nothing exclude_filenames = {normalize(f) for f in exclude_filenames} exclude_directories = {normalize(f) for f in exclude_directories} @@ -636,9 +625,14 @@ def walk_generator( # Thank you for your cooperation. while len(directory_queue) > 0: current_location = directory_queue.popleft() - log.debug('listdir: %s' % current_location.absolute_path) - contents = os.listdir(current_location.absolute_path) - log.debug('received %d items' % len(contents)) + log.debug('listdir: %s', current_location.absolute_path) + try: + contents = os.listdir(current_location.absolute_path) + except PermissionError as exception: + callback_permission_denied(current_location, exception) + continue + + log.debug('received %d items', len(contents)) directories = [] files = [] diff --git a/Templates/changelog.md b/Templates/changelog.md new file mode 100644 index 0000000..847a19a --- /dev/null +++ b/Templates/changelog.md @@ -0,0 +1,15 @@ +### Changelog + +- **[addition]** A new feature was added. +- **[bugfix]** Incorrect behavior was fixed. +- **[change]** An existing feature was slightly modified or parameters were renamed. +- **[cleanup]** Code was improved, comments were added, or other changes with minor impact on the interface. +- **[release]** A new version of the program has been released. +- **[removal]** An old feature was removed. + +  + +- YYYY MM DD + - **[addition]** Test + - **[bugfix]** Ping Pong + - **[removal]** Entries are grouped by label diff --git a/Templates/flask/templates/root.html b/Templates/flask/templates/root.html index 92449bd..872be2b 100644 --- a/Templates/flask/templates/root.html +++ b/Templates/flask/templates/root.html @@ -17,12 +17,13 @@ body, a + {{header.make_header()}}

    Welcome to my flask site

    - + diff --git a/Templates/flask/templates/template.html b/Templates/flask/templates/template.html index 8dacc5a..68f5b3f 100644 --- a/Templates/flask/templates/template.html +++ b/Templates/flask/templates/template.html @@ -16,8 +16,8 @@

    test

    - + diff --git a/ThreadedDL/threaded_dl.py b/ThreadedDL/threaded_dl.py index 5781c89..d069992 100644 --- a/ThreadedDL/threaded_dl.py +++ b/ThreadedDL/threaded_dl.py @@ -3,16 +3,10 @@ import sys import threading import time -try: - sys.path.append('C:\\git\\else\\Clipext') - sys.path.append('C:\\git\\else\\Downloady') - import clipext - import downloady -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from voussoirkit import bytestring - from voussoirkit import pathclass +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from voussoirkit import clipext +from voussoirkit import downloady def remove_finished(threads): threads = [t for t in threads if t.is_alive()] diff --git a/Toolbox/bitrate_chart.py b/Toolbox/bitrate_chart.py index bb11139..b143f29 100644 --- a/Toolbox/bitrate_chart.py +++ b/Toolbox/bitrate_chart.py @@ -19,9 +19,10 @@ 16384 kbps | 2.000 MiB | 120.000 MiB | 3.516 GiB | 7.031 GiB | 10.547 GiB | 14.062 GiB ''' import sys -sys.path.append('C:\\git\\else\\bytestring') -import bytestring import kbps + +from voussoirkit import bytestring + times = ['01', '1:00', '30:00', '1:00:00', '1:30:00', '2:00:00'] rates = [128, 256, 320, 500, 640, 738, 1024, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 2330.17, 4660.34] @@ -35,7 +36,7 @@ for r in rates: l = [] l.append('%d kbps' % r) for t in times: - l.append(bytestring.bytestring(kbps.calc(kbps.hms_s(t), r))) + l.append(kbps.kbps(time=t, kbps=r)) l = ' | '.join(l) table.append(l) diff --git a/Toolbox/brename.py b/Toolbox/brename.py new file mode 100644 index 0000000..b2426e7 --- /dev/null +++ b/Toolbox/brename.py @@ -0,0 +1,61 @@ +import os +import sys + + +def brename(transformation): + old = os.listdir() + if 're.' in transformation: + import re + new = [eval(transformation) for x in old] + pairs = [] + for (x, y) in zip(old, new): + if x == y: + continue + pairs.append((x, y)) + if not loop(pairs, dry=True): + print('Nothing to replace') + return + print('Is this correct? y/n') + if input('>').lower() not in ('y', 'yes', 'yeehaw'): + return + loop(pairs, dry=False) + +def longest_length(li): + longest = 0 + for item in li: + longest = max(longest, len(item)) + return longest + +def loop(pairs, dry=False): + has_content = False + for (x, y) in pairs: + if dry: + line = '{old}\n{new}\n' + line = line.format(old=x, new=y) + #print(line.encode('utf-8')) + print(line) + has_content = True + else: + os.rename(x, y) + return has_content + +def title(text): + (text, extension) = os.path.splitext(text) + text = text.title() + if ' ' in text: + (first, rest) = text.split(' ', 1) + else: + (first, rest) = (text, '') + rest = ' %s ' % rest + for article in ['The', 'A', 'An', 'At', 'To', 'In', 'Of', 'From', 'And']: + article = ' %s ' % article + rest = rest.replace(article, article.lower()) + rest = rest.strip() + if rest != '': + rest = ' ' + rest + text = first + rest + extension + return text + +if __name__ == '__main__': + transformation = sys.argv[1] + brename(transformation) \ No newline at end of file diff --git a/Toolbox/breplace.py b/Toolbox/breplace.py new file mode 100644 index 0000000..2dc1bcc --- /dev/null +++ b/Toolbox/breplace.py @@ -0,0 +1,7 @@ +import brename +import sys + +replace_from = sys.argv[1] +replace_to = sys.argv[2] +command = 'x.replace("{f}", "{t}")'.format(f=replace_from, t=replace_to) +brename.brename(command) \ No newline at end of file diff --git a/Toolbox/clipboard.py b/Toolbox/clipboard.py index 640280c..3eec048 100644 --- a/Toolbox/clipboard.py +++ b/Toolbox/clipboard.py @@ -3,7 +3,7 @@ import sys if len(sys.argv) > 1: - sys.path.append('C:\\git\\else\\Clipext'); import clipext + from voussoirkit import clipext stuff = clipext.resolve(sys.argv[1]) pyperclip.copy(stuff) else: diff --git a/Toolbox/filepull.py b/Toolbox/filepull.py index 6ffdd98..44474d6 100644 --- a/Toolbox/filepull.py +++ b/Toolbox/filepull.py @@ -1,8 +1,7 @@ import os import sys -sys.path.append('C:\\git\\else\\spinaltap') -import spinal +from voussoirkit import spinal def main(): files = list(spinal.walk_generator()) diff --git a/Toolbox/fileswith.py b/Toolbox/fileswith.py index 69b935c..a3b90b2 100644 --- a/Toolbox/fileswith.py +++ b/Toolbox/fileswith.py @@ -2,7 +2,8 @@ import fnmatch import glob import re import sys -sys.path.append('C:\\git\\else\\spinaltap'); import spinal + +from voussoirkit import spinal filepattern = sys.argv[1] searchpattern = sys.argv[2] diff --git a/Toolbox/kbps.py b/Toolbox/kbps.py index 00461b0..395cbe9 100644 --- a/Toolbox/kbps.py +++ b/Toolbox/kbps.py @@ -1,11 +1,7 @@ +import argparse import sys -try: - sys.path.append('C:\\git\\else\\Bytestring') - import bytestring -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from vousoirkit import bytestring + +from voussoirkit import bytestring def hms_s(hms): hms = hms.split(':') @@ -20,13 +16,50 @@ def hms_s(hms): seconds += int(hms[0]) return seconds -def calc(seconds, kbps): - final_kilobits = kbps * seconds - final_bytes = final_kilobits * 128 - return final_bytes +def s_hms(s): + (minutes, seconds) = divmod(s, 60) + (hours, minutes) = divmod(minutes, 60) + return '%02d:%02d:%02d' % (hours, minutes, seconds) + +def kbps(time=None, size=None, kbps=None): + if [time, size, kbps].count(None) != 1: + raise ValueError('Incorrect number of unknowns') + + if size is None: + seconds = hms_s(time) + kibs = int(kbps) / 8 + size = kibs * 1024 + size *= seconds + out = bytestring.bytestring(size) + return out + + if time is None: + size = bytestring.parsebytes(size) + kilobits = size / 128 + time = kilobits / int(kbps) + return s_hms(time) + + if kbps is None: + seconds = hms_s(time) + size = bytestring.parsebytes(size) + kibs = size / 1024 + kilobits = kibs * 8 + kbps = kilobits / seconds + return int(kbps) + +def example_argparse(args): + print(kbps(time=args.time, size=args.size, kbps=args.kbps)) + +def main(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('-t', '--time', dest='time', default=None) + parser.add_argument('-s', '--size', dest='size', default=None) + parser.add_argument('-k', '--kbps', dest='kbps', default=None) + parser.set_defaults(func=example_argparse) + + args = parser.parse_args(argv) + args.func(args) if __name__ == '__main__': - length = sys.argv[1] # HH:MM:SS - kbps = int(sys.argv[2]) - seconds = hms_s(length) - print(bytestring.bytestring(calc(seconds, kbps))) \ No newline at end of file + main(sys.argv[1:]) diff --git a/Toolbox/kbpsr.py b/Toolbox/kbpsr.py index 81bfaff..e317213 100644 --- a/Toolbox/kbpsr.py +++ b/Toolbox/kbpsr.py @@ -1,12 +1,8 @@ import sys -try: - sys.path.append('C:\\git\\else\\Bytestring') - import bytestring -except ImportError: - # pip install - # https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip - from vousoirkit import bytestring +# pip install +# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip +from vousoirkit import bytestring def hms_s(hms): hms = hms.split(':') diff --git a/Toolbox/rejpg.py b/Toolbox/rejpg.py index 927caa1..42b6791 100644 --- a/Toolbox/rejpg.py +++ b/Toolbox/rejpg.py @@ -9,8 +9,7 @@ import sys PIL.ImageFile.LOAD_TRUNCATED_IMAGES = True if '/r' in sys.argv: - sys.path.append('C:\\git\\else\\spinaltap') - import spinal + from voussoirkit import spinal walker = spinal.walk_generator() files = list(walker) files = [f.absolute_path for f in files] diff --git a/Toolbox/subtitle_shift.py b/Toolbox/subtitle_shift.py new file mode 100644 index 0000000..e554c76 --- /dev/null +++ b/Toolbox/subtitle_shift.py @@ -0,0 +1,66 @@ +''' +Usage: + +Shift all subtitles 10 seconds forward: +> subtitle_shift file.srt +10 + +Shift all subtitles 10 seconds backward: +> subtitle_shift file.srt -10 + +This will produce "file_correct.srt" with the new timestamps. +''' + +import os +import sys +filename = sys.argv[1] +offset = float(sys.argv[2]) +f = open(filename, 'r') + +lines = [l.strip() for l in f.readlines()] +for (lineindex, line) in enumerate(lines): + changed = False + + if '-->' not in line: + continue + + words = line.split(' ') + for (wordindex, word) in enumerate(words): + if not (':' in word and ',' in word): + continue + + if not word.replace(':', '').replace(',', '').isdigit(): + continue + + # 1.) 01:23:45,678 --> 02:34:56,789 | our input + # 2.) 01:23:45:678 --> 02:34:56:789 | comma to colon + # 3.) 5025.678 --> 9296.789 | split by colon and sum + # 4.) 5035.678 --> 9306.789 | add offset + # 5.) 01:23:55.678 --> 02:35:06.789 | reformat + # 6.) 01:23:55,678 --> 02:35:06,789 | period to comma + word = word.replace(',', ':') + (hours, minutes, seconds, mili) = [int(x) for x in word.split(':')] + seconds = (3600 * hours) + (60 * minutes) + (seconds) + (mili / 1000) + + seconds += offset + (hours, seconds) = divmod(seconds, 3600) + (minutes, seconds) = divmod(seconds, 60) + + if hours < 0: + raise Exception('Negative time') + + word = '%02d:%02d:%06.3f' % (hours, minutes, seconds) + word = word.replace('.', ',') + changed = True + words[wordindex] = word + + if changed: + line = ' '.join(words) + print(line) + lines[lineindex] = line + +lines = '\n'.join(lines) +(name, extension) = os.path.splitext(filename) +newname = name + '_correct' + extension +x = open(newname, 'w') +x.write(lines) +x.close() \ No newline at end of file diff --git a/YoutubeChannelDownloader/static/common.css b/YoutubeChannelDownloader/static/common.css deleted file mode 100644 index c25663e..0000000 --- a/YoutubeChannelDownloader/static/common.css +++ /dev/null @@ -1,32 +0,0 @@ -body -{ - display: flex; - flex-direction: column; - background-color:#fff; - margin: 8px; -} -#header -{ - display: flex; - flex-direction: row; - justify-content: center; - align-content: center; - margin-bottom: 4px; -} -.header_element -{ - display: flex; - justify-content: center; - flex: 1; - background-color: rgba(0, 0, 0, 0.1); -} -.header_element:hover -{ - background-color: rgba(0, 0, 0, 0.2); -} -#content_body -{ - flex: 0 0 auto; - display: flex; - flex-direction: row; -} diff --git a/YoutubeChannelDownloader/static/common.js b/YoutubeChannelDownloader/static/common.js deleted file mode 100644 index a4797c8..0000000 --- a/YoutubeChannelDownloader/static/common.js +++ /dev/null @@ -1,83 +0,0 @@ -function post_example(key, value, callback) -{ - var url = "/postexample"; - data = new FormData(); - data.append(key, value); - return post(url, data, callback); -} - -function null_callback() -{ - return; -} - -function post(url, data, callback) -{ - var request = new XMLHttpRequest(); - request.answer = null; - request.onreadystatechange = function() - { - if (request.readyState == 4) - { - var text = request.responseText; - if (callback != null) - { - console.log(text); - callback(JSON.parse(text)); - } - } - }; - var asynchronous = true; - request.open("POST", url, asynchronous); - request.send(data); -} - -function bind_box_to_button(box, button) -{ - box.onkeydown=function() - { - if (event.keyCode == 13) - { - button.click(); - } - }; -} -function entry_with_history_hook(box, button) -{ - //console.log(event.keyCode); - if (box.entry_history === undefined) - {box.entry_history = [];} - if (box.entry_history_pos === undefined) - {box.entry_history_pos = -1;} - if (event.keyCode == 13) - { - /* Enter */ - box.entry_history.push(box.value); - button.click(); - box.value = ""; - } - else if (event.keyCode == 38) - { - - /* Up arrow */ - if (box.entry_history.length == 0) - {return} - if (box.entry_history_pos == -1) - { - box.entry_history_pos = box.entry_history.length - 1; - } - else if (box.entry_history_pos > 0) - { - box.entry_history_pos -= 1; - } - box.value = box.entry_history[box.entry_history_pos]; - } - else if (event.keyCode == 27) - { - box.value = ""; - } - else - { - box.entry_history_pos = -1; - } -} diff --git a/YoutubeChannelDownloader/static/favicon.png b/YoutubeChannelDownloader/static/favicon.png deleted file mode 100644 index 7140f04..0000000 Binary files a/YoutubeChannelDownloader/static/favicon.png and /dev/null differ diff --git a/YoutubeChannelDownloader/templates/channel.html b/YoutubeChannelDownloader/templates/channel.html deleted file mode 100644 index efef3ce..0000000 --- a/YoutubeChannelDownloader/templates/channel.html +++ /dev/null @@ -1,145 +0,0 @@ - - - - {% import "header.html" as header %} - {{channel['name']}} - - - - - - - - - -{{header.make_header()}} -
    - - - All - Pending - Ignored - Downloaded - {% for video in videos %} - - {% if video['download'] == "downloaded" %} -
    - {% elif video['download'] == "ignored" %} -
    - {% else %} -
    - {% endif %} - {{video['title']}} -
    - -
    - {% if video['download'] == "downloaded" %} - - {% elif video['download'] == "ignored" %} - - {% else %} - - - {% endif %} -
    -
    -
    - {% endfor %} -
    - - - - - diff --git a/YoutubeChannelDownloader/templates/channels.html b/YoutubeChannelDownloader/templates/channels.html deleted file mode 100644 index 6c3a691..0000000 --- a/YoutubeChannelDownloader/templates/channels.html +++ /dev/null @@ -1,67 +0,0 @@ - - - - {% import "header.html" as header %} - Channels - - - - - - - - - -{{header.make_header()}} -
    - - - {% for channel in channels %} - {% if channel['has_pending'] %} -
    - {% else %} -
    - {% endif %} - {{channel['name']}} -
    - {% endfor %} -
    - - - - - diff --git a/YoutubeChannelDownloader/templates/header.html b/YoutubeChannelDownloader/templates/header.html deleted file mode 100644 index fa3564a..0000000 --- a/YoutubeChannelDownloader/templates/header.html +++ /dev/null @@ -1,6 +0,0 @@ -{% macro make_header() %} - -{% endmacro %} \ No newline at end of file diff --git a/YoutubeChannelDownloader/templates/root.html b/YoutubeChannelDownloader/templates/root.html deleted file mode 100644 index 5dfa283..0000000 --- a/YoutubeChannelDownloader/templates/root.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - {% import "header.html" as header %} - Flasksite - - - - - - Manage channels - - - - - diff --git a/YoutubeChannelDownloader/ycdl.py b/YoutubeChannelDownloader/ycdl.py deleted file mode 100644 index 90e893f..0000000 --- a/YoutubeChannelDownloader/ycdl.py +++ /dev/null @@ -1,214 +0,0 @@ -import os -import sqlite3 -import ytapi - -# AVAILABLE FORMATTERS: -# url, id -YOUTUBE_DL_COMMAND = 'touch {id}.ytqueue' - -SQL_CHANNEL_COLUMNS = [ - 'id', - 'name', - 'directory', -] - -SQL_VIDEO_COLUMNS = [ - 'id', - 'published', - 'author_id', - 'title', - 'description', - 'thumbnail', - 'download', -] - -SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)} -SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)} - -DATABASE_VERSION = 1 -DB_INIT = ''' -PRAGMA count_changes = OFF; -PRAGMA cache_size = 10000; -PRAGMA user_version = {user_version}; -CREATE TABLE IF NOT EXISTS channels( - id TEXT, - name TEXT, - directory TEXT COLLATE NOCASE -); -CREATE TABLE IF NOT EXISTS videos( - id TEXT, - published INT, - author_id TEXT, - title TEXT, - description TEXT, - thumbnail TEXT, - download TEXT -); - - -CREATE INDEX IF NOT EXISTS index_channel_id on channels(id); -CREATE INDEX IF NOT EXISTS index_video_id on videos(id); -CREATE INDEX IF NOT EXISTS index_video_published on videos(published); -CREATE INDEX IF NOT EXISTS index_video_download on videos(download); - -'''.format(user_version=DATABASE_VERSION) - -DEFAULT_DBNAME = 'ycdl.db' - -ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}' - -def verify_is_abspath(path): - ''' - TO DO: Determine whether this is actually correct. - ''' - if os.path.abspath(path) != path: - raise ValueError('Not an abspath') - -class YCDL: - def __init__(self, youtube, database_filename=None): - self.youtube = youtube - if database_filename is None: - database_filename = DEFAULT_DBNAME - - existing_database = os.path.exists(database_filename) - self.sql = sqlite3.connect(database_filename) - self.cur = self.sql.cursor() - - if existing_database: - self.cur.execute('PRAGMA user_version') - existing_version = self.cur.fetchone()[0] - if existing_version != DATABASE_VERSION: - message = ERROR_DATABASE_OUTOFDATE - message = message.format(current=existing_version, new=DATABASE_VERSION) - print(message) - raise SystemExit - - statements = DB_INIT.split(';') - for statement in statements: - self.cur.execute(statement) - - def add_channel(self, channel_id, name=None, download_directory=None, get_videos=True, commit=False): - if self.get_channel(channel_id) is not None: - return - - if name is None: - name = self.youtube.get_user_name(channel_id) - - data = [None] * len(SQL_CHANNEL) - data[SQL_CHANNEL['id']] = channel_id - data[SQL_CHANNEL['name']] = name - if download_directory is not None: - verify_is_abspath(download_directory) - data[SQL_CHANNEL['directory']] = download_directory - - self.cur.execute('INSERT INTO channels VALUES(?, ?, ?)', data) - if get_videos: - self.refresh_channel(channel_id, commit=False) - if commit: - self.sql.commit() - - def channel_has_pending(self, channel_id): - self.cur.execute('SELECT * FROM videos WHERE author_id == ? AND download == "pending"', [channel_id]) - return self.cur.fetchone() is not None - - def channel_directory(self, channel_id): - self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id]) - fetch = self.cur.fetchone() - if fetch is None: - return None - return fetch[SQL_CHANNEL['directory']] - - def download_video(self, video, force=False): - if not isinstance(video, ytapi.Video): - video = self.youtube.get_video(video) - - self.add_channel(video.author_id, get_videos=False, commit=False) - status = self.insert_video(video, commit=True) - - if status['row'][SQL_VIDEO['download']] != 'pending' and not force: - print('That video does not need to be downloaded.') - return - - download_directory = self.channel_directory(video.author_id) - download_directory = download_directory or os.getcwd() - - current_directory = os.getcwd() - os.makedirs(download_directory, exist_ok=True) - os.chdir(download_directory) - url = 'https://www.youtube.com/watch?v={id}'.format(id=video.id) - command = YOUTUBE_DL_COMMAND.format(url=url, id=video.id) - os.system(command) - os.chdir(current_directory) - - self.cur.execute('UPDATE videos SET download = "downloaded" WHERE id == ?', [video.id]) - self.sql.commit() - - def get_channel(self, channel_id): - self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id]) - fetch = self.cur.fetchone() - if not fetch: - return None - fetch = {key: fetch[SQL_CHANNEL[key]] for key in SQL_CHANNEL} - return fetch - - def get_channels(self): - self.cur.execute('SELECT * FROM channels') - channels = self.cur.fetchall() - channels = [{key: channel[SQL_CHANNEL[key]] for key in SQL_CHANNEL} for channel in channels] - channels.sort(key=lambda x: x['name'].lower()) - return channels - - def get_videos_by_channel(self, channel_id): - self.cur.execute('SELECT * FROM videos WHERE author_id == ?', [channel_id]) - videos = self.cur.fetchall() - if not videos: - return [] - videos = [{key: video[SQL_VIDEO[key]] for key in SQL_VIDEO} for video in videos] - videos.sort(key=lambda x: x['published'], reverse=True) - return videos - - def mark_video_state(self, video_id, state, commit=True): - ''' - Mark the video as ignored, pending, or downloaded. - ''' - if state not in ['ignored', 'pending', 'downloaded']: - raise ValueError(state) - self.cur.execute('SELECT * FROM videos WHERE id == ?', [video_id]) - if self.cur.fetchone() is None: - raise KeyError(video_id) - self.cur.execute('UPDATE videos SET download = ? WHERE id == ?', [state, video_id]) - if commit: - self.sql.commit() - - def refresh_channel(self, channel_id, force=True, commit=True): - video_generator = self.youtube.get_user_videos(uid=channel_id) - for video in video_generator: - status = self.insert_video(video, commit=False) - if not force and not status['new']: - break - if commit: - self.sql.commit() - - def insert_video(self, video, commit=True): - if not isinstance(video, ytapi.Video): - video = self.youtube.get_video(video) - - self.add_channel(video.author_id, get_videos=False, commit=False) - self.cur.execute('SELECT * FROM videos WHERE id == ?', [video.id]) - fetch = self.cur.fetchone() - if fetch is not None: - return {'new': False, 'row': fetch} - - data = [None] * len(SQL_VIDEO) - data[SQL_VIDEO['id']] = video.id - data[SQL_VIDEO['published']] = video.published - data[SQL_VIDEO['author_id']] = video.author_id - data[SQL_VIDEO['title']] = video.title - data[SQL_VIDEO['description']] = video.description - data[SQL_VIDEO['thumbnail']] = video.thumbnail['url'] - data[SQL_VIDEO['download']] = 'pending' - - self.cur.execute('INSERT INTO videos VALUES(?, ?, ?, ?, ?, ?, ?)', data) - if commit: - self.sql.commit() - return {'new': True, 'row': data} diff --git a/YoutubeChannelDownloader/ycdl_easy.py b/YoutubeChannelDownloader/ycdl_easy.py deleted file mode 100644 index c78e8ab..0000000 --- a/YoutubeChannelDownloader/ycdl_easy.py +++ /dev/null @@ -1,6 +0,0 @@ -import ytapi -import ycdl -import bot - -youtube_core = ytapi.Youtube(bot.YOUTUBE_KEY) -youtube = ycdl.YCDL(youtube_core) diff --git a/YoutubeChannelDownloader/ycdl_launch.py b/YoutubeChannelDownloader/ycdl_launch.py deleted file mode 100644 index 8bca03b..0000000 --- a/YoutubeChannelDownloader/ycdl_launch.py +++ /dev/null @@ -1,29 +0,0 @@ -import gevent.monkey -gevent.monkey.patch_all() - -import ycdl_site -import gevent.pywsgi -import gevent.wsgi -import sys - -if len(sys.argv) == 2: - port = int(sys.argv[1]) -else: - port = 5000 - -if port == 443: - http = gevent.pywsgi.WSGIServer( - listener=('', port), - application=ycdl_site.site, - keyfile='https\\flasksite.key', - certfile='https\\flasksite.crt', - ) -else: - http = gevent.pywsgi.WSGIServer( - listener=('', port), - application=ycdl_site.site, - ) - - -print('Starting server') -http.serve_forever() diff --git a/YoutubeChannelDownloader/ycdl_site.py b/YoutubeChannelDownloader/ycdl_site.py deleted file mode 100644 index 523cf23..0000000 --- a/YoutubeChannelDownloader/ycdl_site.py +++ /dev/null @@ -1,213 +0,0 @@ -import flask -from flask import request -import json -import mimetypes -import os -import sqlite3 -import threading -import time - -import ytapi -import ycdl -import bot - -youtube_core = ytapi.Youtube(bot.YOUTUBE_KEY) -youtube = ycdl.YCDL(youtube_core) - -site = flask.Flask(__name__) -site.config.update( - SEND_FILE_MAX_AGE_DEFAULT=180, - TEMPLATES_AUTO_RELOAD=True, -) -site.jinja_env.add_extension('jinja2.ext.do') -site.debug = True - -download_queue = set() - -#################################################################################################### -#################################################################################################### -#################################################################################################### -#################################################################################################### - -#def handle_download_queue(): -# while True: -# if len(download_queue) > 0: -# item = download_queue.pop() -# youtube.download_video(item) -# time.sleep(2) -# -#DOWNLOAD_QUEUE_THREAD = threading.Thread(target=handle_download_queue) -#DOWNLOAD_QUEUE_THREAD.daemon = True -#DOWNLOAD_QUEUE_THREAD.start() - -def make_json_response(j, *args, **kwargs): - dumped = json.dumps(j) - response = flask.Response(dumped, *args, **kwargs) - response.headers['Content-Type'] = 'application/json;charset=utf-8' - return response - -def send_file(filepath): - ''' - Range-enabled file sending. - ''' - try: - file_size = os.path.getsize(filepath) - except FileNotFoundError: - flask.abort(404) - - outgoing_headers = {} - mimetype = mimetypes.guess_type(filepath)[0] - if mimetype is not None: - if 'text/' in mimetype: - mimetype += '; charset=utf-8' - outgoing_headers['Content-Type'] = mimetype - - if 'range' in request.headers: - desired_range = request.headers['range'].lower() - desired_range = desired_range.split('bytes=')[-1] - - int_helper = lambda x: int(x) if x.isdigit() else None - if '-' in desired_range: - (desired_min, desired_max) = desired_range.split('-') - range_min = int_helper(desired_min) - range_max = int_helper(desired_max) - else: - range_min = int_helper(desired_range) - - if range_min is None: - range_min = 0 - if range_max is None: - range_max = file_size - - # because ranges are 0-indexed - range_max = min(range_max, file_size - 1) - range_min = max(range_min, 0) - - range_header = 'bytes {min}-{max}/{outof}'.format( - min=range_min, - max=range_max, - outof=file_size, - ) - outgoing_headers['Content-Range'] = range_header - status = 206 - else: - range_max = file_size - 1 - range_min = 0 - status = 200 - - outgoing_headers['Accept-Ranges'] = 'bytes' - outgoing_headers['Content-Length'] = (range_max - range_min) + 1 - - if request.method == 'HEAD': - outgoing_data = bytes() - else: - outgoing_data = helpers.read_filebytes(filepath, range_min=range_min, range_max=range_max) - - response = flask.Response( - outgoing_data, - status=status, - headers=outgoing_headers, - ) - return response - -def truthystring(s): - if isinstance(s, (bool, int)) or s is None: - return s - s = s.lower() - if s in {'1', 'true', 't', 'yes', 'y', 'on'}: - return True - if s in {'null', 'none'}: - return None - return False - - -#################################################################################################### -#################################################################################################### -#################################################################################################### -#################################################################################################### - -@site.route('/') -def root(): - return flask.render_template('root.html') - -@site.route('/channels') -def get_channels(): - channels = youtube.get_channels() - for channel in channels: - channel['has_pending'] = youtube.channel_has_pending(channel['id']) - return flask.render_template('channels.html', channels=channels) - -@site.route('/channel/') -@site.route('/channel//') -def get_channel(channel_id, download_filter=None): - channel = youtube.get_channel(channel_id) - if channel is None: - flask.abort(404) - videos = youtube.get_videos_by_channel(channel_id) - if download_filter is not None: - videos = [video for video in videos if video['download'] == download_filter] - return flask.render_template('channel.html', channel=channel, videos=videos) - -@site.route('/favicon.ico') -@site.route('/favicon.png') -def favicon(): - filename = os.path.join('static', 'favicon.png') - return flask.send_file(filename) - -@site.route('/static/') -def get_static(filename): - filename = filename.replace('\\', os.sep) - filename = filename.replace('/', os.sep) - filename = os.path.join('static', filename) - return flask.send_file(filename) - -@site.route('/mark_video_state', methods=['POST']) -def post_mark_video_state(): - if 'video_id' not in request.form or 'state' not in request.form: - flask.abort(400) - video_id = request.form['video_id'] - state = request.form['state'] - try: - youtube.mark_video_state(video_id, state) - except KeyError: - flask.abort(404) - except ValueError: - flask.abort(400) - return make_json_response({}) - -@site.route('/refresh_channel', methods=['POST']) -def post_refresh_channel(): - if 'channel_id' not in request.form: - flask.abort(400) - channel_id = request.form['channel_id'] - force = request.form.get('force', False) - force = truthystring(force) - print('Refresh channel', channel_id) - youtube.refresh_channel(channel_id, force=force) - return make_json_response({}) - -@site.route('/refresh_all_channels', methods=['POST']) -def post_refresh_all_channels(): - force = request.form.get('force', False) - force = truthystring(force) - for channel in youtube.get_channels(): - print('Refresh channel', channel['id']) - youtube.refresh_channel(channel['id'], force=force) - return make_json_response({}) - -@site.route('/start_download', methods=['POST']) -def post_start_download(): - if 'video_id' not in request.form: - flask.abort(400) - video_id = request.form['video_id'] - video_info = youtube_core.get_video([video_id]) - if video_info == []: - flask.abort(404) - for video in video_info: - #download_queue.add(video) - youtube.download_video(video) - #print(video) - return make_json_response({}) - -if __name__ == '__main__': - pass diff --git a/YoutubeChannelDownloader/ytapi.py b/YoutubeChannelDownloader/ytapi.py deleted file mode 100644 index a6c3237..0000000 --- a/YoutubeChannelDownloader/ytapi.py +++ /dev/null @@ -1,102 +0,0 @@ -import apiclient.discovery -import datetime -import sqlite3 - -class Video: - def __init__(self, snippet): - self.id = snippet['id'] - - snippet = snippet['snippet'] - self.title = snippet['title'] or '[untitled]' - self.description = snippet['description'] - self.author_id = snippet['channelId'] - self.author_name = snippet['channelTitle'] - # Something like '2016-10-01T21:00:01' - self.published_string = snippet['publishedAt'] - published = snippet['publishedAt'] - published = published.split('.')[0] - published = datetime.datetime.strptime(published, '%Y-%m-%dT%H:%M:%S') - self.published = published.timestamp() - - thumbnails = snippet['thumbnails'] - best_thumbnail = max(thumbnails, key=lambda x: thumbnails[x]['width'] * thumbnails[x]['height']) - self.thumbnail = thumbnails[best_thumbnail] - - -class Youtube: - def __init__(self, key): - youtube = apiclient.discovery.build( - developerKey=key, - serviceName='youtube', - version='v3', - ) - self.youtube = youtube - - def get_user_name(self, uid): - user = self.youtube.channels().list(part='snippet', id=uid).execute() - return user['items'][0]['snippet']['title'] - - def get_user_videos(self, username=None, uid=None): - if username: - user = self.youtube.channels().list(part='contentDetails', forUsername=username).execute() - else: - user = self.youtube.channels().list(part='contentDetails', id=uid).execute() - upload_playlist = user['items'][0]['contentDetails']['relatedPlaylists']['uploads'] - page_token = None - while True: - items = self.youtube.playlistItems().list( - maxResults=50, - pageToken=page_token, - part='contentDetails', - playlistId=upload_playlist, - ).execute() - page_token = items.get('nextPageToken', None) - new = [item['contentDetails']['videoId'] for item in items['items']] - count = len(new) - new = self.get_video(new) - new.sort(key=lambda x: x.published, reverse=True) - yield from new - #print('Found %d more, %d total' % (count, len(videos))) - if page_token is None or count < 50: - break - - def get_video(self, video_ids): - if isinstance(video_ids, str): - singular = True - video_ids = [video_ids] - else: - singular = False - video_ids = chunk_sequence(video_ids, 50) - results = [] - for chunk in video_ids: - chunk = ','.join(chunk) - data = self.youtube.videos().list(part='snippet', id=chunk).execute() - items = data['items'] - results += items - #print('Found %d more, %d total' % (len(items), len(results))) - results = [Video(snippet) for snippet in results] - if singular and len(results) == 1: - return results[0] - return results - - -def chunk_sequence(sequence, chunk_length, allow_incomplete=True): - """Given a sequence, divide it into sequences of length `chunk_length`. - - :param allow_incomplete: If True, allow the final chunk to be shorter if the - given sequence is not an exact multiple of `chunk_length`. - If False, the incomplete chunk will be discarded. - """ - (complete, leftover) = divmod(len(sequence), chunk_length) - if not allow_incomplete: - leftover = 0 - - chunk_count = complete + min(leftover, 1) - - chunks = [] - for x in range(chunk_count): - left = chunk_length * x - right = left + chunk_length - chunks.append(sequence[left:right]) - - return chunks diff --git a/YoutubeChannelDownloader/ytqueue.py b/YoutubeChannelDownloader/ytqueue.py deleted file mode 100644 index 067f2a4..0000000 --- a/YoutubeChannelDownloader/ytqueue.py +++ /dev/null @@ -1,20 +0,0 @@ -''' -I was having trouble making my Flask server perform the youtube-dl without -slowing down and clogging up the other site activities. So instead I'll just -have the server export ytqueue files, which this script will download -as a separate process. -''' -import os -import time - -YOUTUBE_DL = 'youtube-dlw https://www.youtube.com/watch?v={id}' - -while True: - print(time.strftime('%H:%M:%S'), 'Looking for files.') - queue = [f for f in os.listdir() if f.endswith('.ytqueue')] - for filename in queue: - yt_id = filename.split('.')[0] - command = YOUTUBE_DL.format(id=yt_id) - os.system(command) - os.remove(filename) - time.sleep(10) \ No newline at end of file diff --git a/_voussoirkit/voussoirkit.py b/_voussoirkit/voussoirkit.py index 895c582..fd1c306 100644 --- a/_voussoirkit/voussoirkit.py +++ b/_voussoirkit/voussoirkit.py @@ -39,7 +39,7 @@ import setuptools setuptools.setup( author='voussoir', name='{package}', - version='0.0.1', + version='0.0.2', description='', py_modules=[{py_modules}], ) @@ -68,4 +68,4 @@ shutil.rmtree('dist') shutil.rmtree(PACKAGE) shutil.rmtree(glob.glob('*.egg-info')[0]) os.remove('setup.py') -os.rename(glob.glob('*.zip')[0], 'voussoirkit.zip') \ No newline at end of file +os.rename(glob.glob('*.zip')[0], 'voussoirkit.zip') diff --git a/_voussoirkit/voussoirkit.zip b/_voussoirkit/voussoirkit.zip index 78d4fe9..6213275 100644 Binary files a/_voussoirkit/voussoirkit.zip and b/_voussoirkit/voussoirkit.zip differ