else
This commit is contained in:
parent
4e1880f9a0
commit
2e337c77c5
106 changed files with 317 additions and 14925 deletions
|
@ -4,13 +4,9 @@ from Crypto.Cipher import AES
|
|||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Bytestring')
|
||||
import bytestring
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
|
||||
|
||||
BLOCK_SIZE = 32
|
||||
|
|
|
@ -7,19 +7,11 @@ import time
|
|||
import urllib
|
||||
import warnings
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Bytestring')
|
||||
sys.path.append('C:\\git\\else\\clipext')
|
||||
sys.path.append('C:\\git\\else\\ratelimiter')
|
||||
import bytestring
|
||||
import ratelimiter
|
||||
import clipext
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import ratelimiter
|
||||
from voussoirkit import clipext
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import ratelimiter
|
||||
from voussoirkit import clipext
|
||||
|
||||
warnings.simplefilter('ignore')
|
||||
|
||||
|
|
|
@ -6,16 +6,10 @@ import os
|
|||
import requests
|
||||
import sys
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Clipext')
|
||||
sys.path.append('C:\\git\\else\\Downloady')
|
||||
import clipext
|
||||
import downloady
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import clipext
|
||||
from voussoirkit import downloady
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import clipext
|
||||
from voussoirkit import downloady
|
||||
|
||||
|
||||
''' '''
|
||||
|
|
|
@ -136,16 +136,10 @@ import sys
|
|||
## import tkinter
|
||||
import urllib.parse
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Bytestring')
|
||||
sys.path.append('C:\\git\\else\\Downloady')
|
||||
import bytestring
|
||||
import downloady
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import downloady
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import downloady
|
||||
|
||||
DOWNLOAD_CHUNK = 16 * bytestring.KIBIBYTE
|
||||
FILENAME_BADCHARS = '/\\:*?"<>|'
|
||||
|
@ -158,6 +152,7 @@ UNKNOWN_SIZE_STRING = '???'
|
|||
# enough of the typical opendir to speed things up.
|
||||
SKIPPABLE_FILETYPES = [
|
||||
'.3gp',
|
||||
'.7z',
|
||||
'.aac',
|
||||
'.avi',
|
||||
'.bin',
|
||||
|
@ -532,21 +527,24 @@ def build_file_tree(databasename):
|
|||
sql = sqlite3.connect(databasename)
|
||||
cur = sql.cursor()
|
||||
cur.execute('SELECT * FROM urls WHERE do_download == 1')
|
||||
all_items = cur.fetchall()
|
||||
fetch_all = cur.fetchall()
|
||||
sql.close()
|
||||
|
||||
if len(all_items) == 0:
|
||||
if len(fetch_all) == 0:
|
||||
return
|
||||
|
||||
path_form = '{domain}\\{folder}\\{filename}'
|
||||
all_items = [
|
||||
{
|
||||
'url': item[SQL_URL],
|
||||
'size': item[SQL_CONTENT_LENGTH],
|
||||
'path_parts': path_form.format(**url_split(item[SQL_URL])).split('\\'),
|
||||
}
|
||||
for item in all_items
|
||||
]
|
||||
all_items = []
|
||||
for item in fetch_all:
|
||||
url = item[SQL_URL]
|
||||
size = item[SQL_CONTENT_LENGTH]
|
||||
path_parts = url_split(item[SQL_URL])
|
||||
path_parts = path_form.format(**path_parts)
|
||||
#path_parts = urllib.parse.unquote(path_parts)
|
||||
path_parts = path_parts.split('\\')
|
||||
item = {'url': url, 'size': size, 'path_parts': path_parts}
|
||||
all_items.append(item)
|
||||
|
||||
all_items.sort(key=lambda x: x['url'])
|
||||
|
||||
root_data = {
|
||||
|
@ -771,7 +769,7 @@ def smart_insert(sql, cur, url=None, head=None, commit=True):
|
|||
content_type = head.headers.get('Content-Type', None)
|
||||
|
||||
basename = url_split(url)['filename']
|
||||
basename = urllib.parse.unquote(basename)
|
||||
#basename = urllib.parse.unquote(basename)
|
||||
do_download = True
|
||||
|
||||
cur.execute('SELECT * FROM urls WHERE url == ?', [url])
|
||||
|
@ -798,7 +796,7 @@ def url_split(url):
|
|||
'''
|
||||
Given a url, return a dictionary of its components.
|
||||
'''
|
||||
url = urllib.parse.unquote(url)
|
||||
#url = urllib.parse.unquote(url)
|
||||
parts = urllib.parse.urlsplit(url)
|
||||
if any(part == '' for part in [parts.scheme, parts.netloc]):
|
||||
raise ValueError('Not a valid URL')
|
||||
|
@ -817,9 +815,9 @@ def url_split(url):
|
|||
|
||||
result = {
|
||||
'scheme': scheme,
|
||||
'domain': root,
|
||||
'folder': folder,
|
||||
'filename': filename,
|
||||
'domain': urllib.parse.unquote(root),
|
||||
'folder': urllib.parse.unquote(folder),
|
||||
'filename': urllib.parse.unquote(filename),
|
||||
}
|
||||
return result
|
||||
|
||||
|
|
|
@ -8,19 +8,11 @@ import socketserver
|
|||
import sys
|
||||
import types
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Bytestring')
|
||||
sys.path.append('C:\\git\\else\\Pathclass')
|
||||
sys.path.append('C:\\git\\else\\Ratelimiter')
|
||||
import bytestring
|
||||
import pathclass
|
||||
import ratelimiter
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import pathclass
|
||||
from voussoirkit import ratelimiter
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import pathclass
|
||||
from voussoirkit import ratelimiter
|
||||
|
||||
FILE_READ_CHUNK = bytestring.MIBIBYTE
|
||||
RATELIMITER = ratelimiter.Ratelimiter(16 * bytestring.MIBIBYTE)
|
||||
|
|
10
SnudownTest/.gitignore
vendored
10
SnudownTest/.gitignore
vendored
|
@ -1,10 +0,0 @@
|
|||
build/
|
||||
dist/
|
||||
snudown.egg-info/
|
||||
src/html_entities.h
|
||||
*.pyc
|
||||
*.so
|
||||
*.so.*
|
||||
*.o
|
||||
/fuzzing/bin
|
||||
/fuzzing/testing
|
4
SnudownTest/.gitmodules
vendored
4
SnudownTest/.gitmodules
vendored
|
@ -1,4 +0,0 @@
|
|||
[submodule "gumbo_snudown"]
|
||||
path = fuzzing/gumbo_snudown
|
||||
url = git@github.com:JordanMilne/gumbo-parser.git
|
||||
branch = markdown_validation
|
|
@ -1,133 +0,0 @@
|
|||
#ifndef Py_PYTHON_H
|
||||
#define Py_PYTHON_H
|
||||
/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */
|
||||
|
||||
/* Include nearly all Python header files */
|
||||
|
||||
#include "patchlevel.h"
|
||||
#include "pyconfig.h"
|
||||
#include "pymacconfig.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#ifndef UCHAR_MAX
|
||||
#error "Something's broken. UCHAR_MAX should be defined in limits.h."
|
||||
#endif
|
||||
|
||||
#if UCHAR_MAX != 255
|
||||
#error "Python's source code assumes C's unsigned char is an 8-bit type."
|
||||
#endif
|
||||
|
||||
#if defined(__sgi) && defined(WITH_THREAD) && !defined(_SGI_MP_SOURCE)
|
||||
#define _SGI_MP_SOURCE
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#ifndef NULL
|
||||
# error "Python.h requires that stdio.h define NULL."
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#ifdef HAVE_ERRNO_H
|
||||
#include <errno.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
/* For size_t? */
|
||||
#ifdef HAVE_STDDEF_H
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
|
||||
/* CAUTION: Build setups should ensure that NDEBUG is defined on the
|
||||
* compiler command line when building Python in release mode; else
|
||||
* assert() calls won't be removed.
|
||||
*/
|
||||
#include <assert.h>
|
||||
|
||||
#include "pyport.h"
|
||||
#include "pymacro.h"
|
||||
|
||||
#include "pyatomic.h"
|
||||
|
||||
/* Debug-mode build with pymalloc implies PYMALLOC_DEBUG.
|
||||
* PYMALLOC_DEBUG is in error if pymalloc is not in use.
|
||||
*/
|
||||
#if defined(Py_DEBUG) && defined(WITH_PYMALLOC) && !defined(PYMALLOC_DEBUG)
|
||||
#define PYMALLOC_DEBUG
|
||||
#endif
|
||||
#if defined(PYMALLOC_DEBUG) && !defined(WITH_PYMALLOC)
|
||||
#error "PYMALLOC_DEBUG requires WITH_PYMALLOC"
|
||||
#endif
|
||||
#include "pymath.h"
|
||||
#include "pytime.h"
|
||||
#include "pymem.h"
|
||||
|
||||
#include "object.h"
|
||||
#include "objimpl.h"
|
||||
#include "typeslots.h"
|
||||
#include "pyhash.h"
|
||||
|
||||
#include "pydebug.h"
|
||||
|
||||
#include "bytearrayobject.h"
|
||||
#include "bytesobject.h"
|
||||
#include "unicodeobject.h"
|
||||
#include "longobject.h"
|
||||
#include "longintrepr.h"
|
||||
#include "boolobject.h"
|
||||
#include "floatobject.h"
|
||||
#include "complexobject.h"
|
||||
#include "rangeobject.h"
|
||||
#include "memoryobject.h"
|
||||
#include "tupleobject.h"
|
||||
#include "listobject.h"
|
||||
#include "dictobject.h"
|
||||
#include "enumobject.h"
|
||||
#include "setobject.h"
|
||||
#include "methodobject.h"
|
||||
#include "moduleobject.h"
|
||||
#include "funcobject.h"
|
||||
#include "classobject.h"
|
||||
#include "fileobject.h"
|
||||
#include "pycapsule.h"
|
||||
#include "traceback.h"
|
||||
#include "sliceobject.h"
|
||||
#include "cellobject.h"
|
||||
#include "iterobject.h"
|
||||
#include "genobject.h"
|
||||
#include "descrobject.h"
|
||||
#include "warnings.h"
|
||||
#include "weakrefobject.h"
|
||||
#include "structseq.h"
|
||||
#include "namespaceobject.h"
|
||||
|
||||
#include "codecs.h"
|
||||
#include "pyerrors.h"
|
||||
|
||||
#include "pystate.h"
|
||||
|
||||
#include "pyarena.h"
|
||||
#include "modsupport.h"
|
||||
#include "pythonrun.h"
|
||||
#include "ceval.h"
|
||||
#include "sysmodule.h"
|
||||
#include "intrcheck.h"
|
||||
#include "import.h"
|
||||
|
||||
#include "abstract.h"
|
||||
#include "bltinmodule.h"
|
||||
|
||||
#include "compile.h"
|
||||
#include "eval.h"
|
||||
|
||||
#include "pyctype.h"
|
||||
#include "pystrtod.h"
|
||||
#include "pystrcmp.h"
|
||||
#include "dtoa.h"
|
||||
#include "fileutils.h"
|
||||
#include "pyfpe.h"
|
||||
|
||||
#endif /* !Py_PYTHON_H */
|
|
@ -1,12 +0,0 @@
|
|||
For safety reasons, whenever you add or change something in Snudown,
|
||||
you should add a few test-cases that demonstrate your change and do a
|
||||
fuzzing run in `/fuzzing` by running `make afl`. Make sure you have `cmake`
|
||||
installed and in your `PATH`!
|
||||
|
||||
This uses [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/) and a
|
||||
modified [Google Gumbo](https://github.com/google/gumbo-parser/) to ensure
|
||||
there is no way to generate invalid HTML, and that there are no unsafe
|
||||
memory operations.
|
||||
|
||||
See [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/)'s instructions
|
||||
for your platform to get started.
|
|
@ -1,487 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define strncasecmp _strnicmp
|
||||
#endif
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len)
|
||||
{
|
||||
static const size_t valid_uris_count = 14;
|
||||
static const char *valid_uris[] = {
|
||||
"http://", "https://", "ftp://", "mailto://",
|
||||
"/", "git://", "steam://", "irc://", "news://", "mumble://",
|
||||
"ssh://", "ircs://", "ts3server://", "#"
|
||||
};
|
||||
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < valid_uris_count; ++i) {
|
||||
size_t len = strlen(valid_uris[i]);
|
||||
|
||||
if (link_len > len &&
|
||||
strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
||||
(isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?'))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
||||
{
|
||||
uint8_t cclose, copen = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < link_end; ++i)
|
||||
if (data[i] == '<') {
|
||||
link_end = i;
|
||||
break;
|
||||
}
|
||||
|
||||
while (link_end > 0) {
|
||||
uint8_t c = data[link_end - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (strchr("?!.,", c) != NULL)
|
||||
link_end--;
|
||||
|
||||
else if (c == ';') {
|
||||
size_t new_end = link_end - 2;
|
||||
|
||||
while (new_end > 0 && isalpha(data[new_end]))
|
||||
new_end--;
|
||||
|
||||
if (new_end < link_end - 2 && data[new_end] == '&')
|
||||
link_end = new_end;
|
||||
else
|
||||
link_end--;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
cclose = data[link_end - 1];
|
||||
|
||||
switch (cclose) {
|
||||
case '"': copen = '"'; break;
|
||||
case '\'': copen = '\''; break;
|
||||
case ')': copen = '('; break;
|
||||
case ']': copen = '['; break;
|
||||
case '}': copen = '{'; break;
|
||||
}
|
||||
|
||||
if (copen != 0) {
|
||||
size_t closing = 0;
|
||||
size_t opening = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Try to close the final punctuation sign in this same line;
|
||||
* if we managed to close it outside of the URL, that means that it's
|
||||
* not part of the URL. If it closes inside the URL, that means it
|
||||
* is part of the URL.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric))
|
||||
*
|
||||
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
||||
*/
|
||||
|
||||
while (i < link_end) {
|
||||
if (data[i] == copen)
|
||||
opening++;
|
||||
else if (data[i] == cclose)
|
||||
closing++;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (closing != opening)
|
||||
link_end--;
|
||||
}
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks that `prefix_char` occurs on a word boundary just before `data`,
|
||||
* where `data` points to the character to search to the left of, and a word boundary
|
||||
* is (currently) a whitespace character, punctuation, or the start of the string.
|
||||
* Returns the length of the prefix.
|
||||
*/
|
||||
static int
|
||||
check_reddit_autolink_prefix(
|
||||
const uint8_t* data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
char prefix_char
|
||||
)
|
||||
{
|
||||
/* Make sure this `/` is part of `/?r/` */
|
||||
if (size < 2 || max_rewind < 1 || data[-1] != prefix_char)
|
||||
return 0;
|
||||
|
||||
/* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */
|
||||
if (max_rewind > 1) {
|
||||
const char boundary = data[-2];
|
||||
if (boundary == '/')
|
||||
return 2;
|
||||
/**
|
||||
* Here's where our lack of unicode-awareness bites us. We don't correctly
|
||||
* match punctuation / whitespace characters for the boundary, because we
|
||||
* reject valid cases like "。r/example" (note the fullwidth period.)
|
||||
*
|
||||
* A better implementation might try to rewind over bytes with the 8th bit set, try
|
||||
* to decode them to a valid codepoint, then do a unicode-aware check on the codepoint.
|
||||
*/
|
||||
else if (ispunct(boundary) || isspace(boundary))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
} else if (max_lookbehind > 2) {
|
||||
/* There's an inline element just left of the `prefix_char`, is it an escaped forward
|
||||
* slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly
|
||||
* allow "\\/r/foo".
|
||||
*/
|
||||
if (data[-2] == '/' && data[-3] == '\\')
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Must be a new-style shortlink with nothing relevant to the left of it. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
check_domain(uint8_t *data, size_t size, int allow_short)
|
||||
{
|
||||
size_t i, np = 0;
|
||||
|
||||
if (!isalnum(data[0]))
|
||||
return 0;
|
||||
|
||||
for (i = 1; i < size - 1; ++i) {
|
||||
if (data[i] == '.') np++;
|
||||
else if (!isalnum(data[i]) && data[i] != '-') break;
|
||||
}
|
||||
|
||||
if (allow_short) {
|
||||
/* We don't need a valid domain in the strict sense (with
|
||||
* least one dot; so just make sure it's composed of valid
|
||||
* domain characters and return the length of the the valid
|
||||
* sequence. */
|
||||
return i;
|
||||
} else {
|
||||
/* a valid domain needs to have at least a dot.
|
||||
* that's as far as we get */
|
||||
return np ? i : 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__www(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end;
|
||||
|
||||
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
||||
return 0;
|
||||
|
||||
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
||||
return 0;
|
||||
|
||||
link_end = check_domain(data, size, 0);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data, link_end);
|
||||
*rewind_p = 0;
|
||||
|
||||
return (int)link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__email(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind;
|
||||
int nb = 0, np = 0;
|
||||
|
||||
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
||||
uint8_t c = data[-rewind - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (strchr(".+-_", c) != NULL)
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (rewind == 0)
|
||||
return 0;
|
||||
|
||||
for (link_end = 0; link_end < size; ++link_end) {
|
||||
uint8_t c = data[link_end];
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (c == '@')
|
||||
nb++;
|
||||
else if (c == '.' && link_end < size - 1)
|
||||
np++;
|
||||
else if (c != '-' && c != '_')
|
||||
break;
|
||||
}
|
||||
|
||||
if (link_end < 2 || nb != 1 || np == 0)
|
||||
return 0;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__url(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind = 0, domain_len;
|
||||
|
||||
if (size < 4 || data[1] != '/' || data[2] != '/')
|
||||
return 0;
|
||||
|
||||
while (rewind < max_rewind && isalpha(data[-rewind - 1]))
|
||||
rewind++;
|
||||
|
||||
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
||||
return 0;
|
||||
|
||||
link_end = strlen("://");
|
||||
|
||||
domain_len = check_domain(
|
||||
data + link_end,
|
||||
size - link_end,
|
||||
flags & SD_AUTOLINK_SHORT_DOMAINS);
|
||||
|
||||
if (domain_len == 0)
|
||||
return 0;
|
||||
|
||||
link_end += domain_len;
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__subreddit(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
/**
|
||||
* This is meant to handle both r/foo and /r/foo style subreddit references.
|
||||
* In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'.
|
||||
* In pseudo-regex, this matches something like:
|
||||
*
|
||||
* `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?`
|
||||
* where %subreddit% == `((t:)?\w{2,24}|reddit\.com)`
|
||||
*/
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
int is_allminus = 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
/* offset to the "meat" of the link */
|
||||
link_end = strlen("/");
|
||||
|
||||
if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0)
|
||||
is_allminus = 1;
|
||||
|
||||
do {
|
||||
size_t start = link_end;
|
||||
int max_length = 24;
|
||||
|
||||
/* special case: /r/reddit.com (only subreddit containing '.'). */
|
||||
if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) {
|
||||
link_end += 10;
|
||||
/* Make sure there are no trailing characters (don't do
|
||||
* any autolinking for /r/reddit.commission) */
|
||||
max_length = 10;
|
||||
}
|
||||
|
||||
/* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */
|
||||
else {
|
||||
/* support autolinking to timereddits, /r/t:when (1 April 2012) */
|
||||
if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 )
|
||||
link_end += 2; /* Jump over the 't:' */
|
||||
|
||||
/* the first character of a subreddit name must be a letter or digit */
|
||||
if (!isalnum(data[link_end]))
|
||||
return 0;
|
||||
link_end += 1;
|
||||
}
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_'))
|
||||
link_end++;
|
||||
|
||||
/* valid subreddit names are between 3 and 21 characters, with
|
||||
* some subreddits having 2-character names. Don't bother with
|
||||
* autolinking for anything outside this length range.
|
||||
* (chksrname function in reddit/.../validator.py) */
|
||||
if ( link_end-start < 2 || link_end-start > max_length )
|
||||
return 0;
|
||||
|
||||
/* If we are linking to a multireddit, continue */
|
||||
} while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ );
|
||||
|
||||
if (link_end < size && data[link_end] == '/') {
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
}
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__username(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
|
||||
if (size < 3)
|
||||
return 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
link_end = strlen("/");
|
||||
|
||||
/* the first letter of a username must... well, be valid, we don't care otherwise */
|
||||
if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-')
|
||||
return 0;
|
||||
link_end += 1;
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_-/]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_AUTOLINK_H
|
||||
#define UPSKIRT_AUTOLINK_H
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
||||
};
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len);
|
||||
|
||||
size_t
|
||||
sd_autolink__www(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__email(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__url(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
|
@ -1,236 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* MSVC compat */
|
||||
#if defined(_MSC_VER)
|
||||
# define _buf_vsnprintf _vsnprintf
|
||||
#else
|
||||
# define _buf_vsnprintf vsnprintf
|
||||
#endif
|
||||
|
||||
int
|
||||
bufprefix(const struct buf *buf, const char *prefix)
|
||||
{
|
||||
size_t i;
|
||||
assert(buf && buf->unit);
|
||||
|
||||
for (i = 0; i < buf->size; ++i) {
|
||||
if (prefix[i] == 0)
|
||||
return 0;
|
||||
|
||||
if (buf->data[i] != prefix[i])
|
||||
return buf->data[i] - prefix[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int
|
||||
bufgrow(struct buf *buf, size_t neosz)
|
||||
{
|
||||
size_t neoasz;
|
||||
void *neodata;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (neosz > BUFFER_MAX_ALLOC_SIZE)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
if (buf->asize >= neosz)
|
||||
return BUF_OK;
|
||||
|
||||
neoasz = buf->asize + buf->unit;
|
||||
while (neoasz < neosz)
|
||||
neoasz += buf->unit;
|
||||
|
||||
neodata = realloc(buf->data, neoasz);
|
||||
if (!neodata)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
buf->data = neodata;
|
||||
buf->asize = neoasz;
|
||||
return BUF_OK;
|
||||
}
|
||||
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *
|
||||
bufnew(size_t unit)
|
||||
{
|
||||
struct buf *ret;
|
||||
ret = malloc(sizeof (struct buf));
|
||||
|
||||
if (ret) {
|
||||
ret->data = 0;
|
||||
ret->size = ret->asize = 0;
|
||||
ret->unit = unit;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* bufnullterm: NULL-termination of the string array */
|
||||
const char *
|
||||
bufcstr(struct buf *buf)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size < buf->asize && buf->data[buf->size] == 0)
|
||||
return (char *)buf->data;
|
||||
|
||||
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
|
||||
buf->data[buf->size] = 0;
|
||||
return (char *)buf->data;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void
|
||||
bufprintf(struct buf *buf, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int n;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (n < 0) {
|
||||
#ifdef _MSC_VER
|
||||
va_start(ap, fmt);
|
||||
n = _vscprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
if ((size_t)n >= buf->asize - buf->size) {
|
||||
if (bufgrow(buf, buf->size + n + 1) < 0)
|
||||
return;
|
||||
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
if (n < 0)
|
||||
return;
|
||||
|
||||
buf->size += n;
|
||||
}
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void
|
||||
bufput(struct buf *buf, const void *data, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
|
||||
return;
|
||||
|
||||
memcpy(buf->data + buf->size, data, len);
|
||||
buf->size += len;
|
||||
}
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void
|
||||
bufputs(struct buf *buf, const char *str)
|
||||
{
|
||||
bufput(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
|
||||
/* bufputc: appends a single uint8_t to a buffer */
|
||||
void
|
||||
bufputc(struct buf *buf, int c)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
|
||||
buf->data[buf->size] = c;
|
||||
buf->size += 1;
|
||||
}
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void
|
||||
bufrelease(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void
|
||||
bufreset(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
buf->data = NULL;
|
||||
buf->size = buf->asize = 0;
|
||||
}
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void
|
||||
bufslurp(struct buf *buf, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (len >= buf->size) {
|
||||
buf->size = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
buf->size -= len;
|
||||
memmove(buf->data, buf->data + len, buf->size);
|
||||
}
|
||||
|
||||
/* buftrucate: truncates the buffer at `size` */
|
||||
int
|
||||
buftruncate(struct buf *buf, size_t size)
|
||||
{
|
||||
if (buf->size < size || size < 0) {
|
||||
/* bail out in debug mode so we can figure out why this happened */
|
||||
assert(0);
|
||||
return BUF_EINVALIDIDX;
|
||||
}
|
||||
|
||||
buf->size = size;
|
||||
return BUF_OK;
|
||||
}
|
|
@ -1,100 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BUFFER_H__
|
||||
#define BUFFER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define __attribute__(x)
|
||||
#define inline
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
BUF_OK = 0,
|
||||
BUF_ENOMEM = -1,
|
||||
BUF_EINVALIDIDX = -2,
|
||||
} buferror_t;
|
||||
|
||||
/* struct buf: character array buffer */
|
||||
struct buf {
|
||||
uint8_t *data; /* actual character data */
|
||||
size_t size; /* size of the string */
|
||||
size_t asize; /* allocated size (0 = volatile buffer) */
|
||||
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
||||
};
|
||||
|
||||
/* CONST_BUF: global buffer from a string litteral */
|
||||
#define BUF_STATIC(string) \
|
||||
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
||||
|
||||
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
||||
#define BUF_VOLATILE(strname) \
|
||||
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
||||
|
||||
/* BUFPUTSL: optimized bufputs of a string litteral */
|
||||
#define BUFPUTSL(output, literal) \
|
||||
bufput(output, literal, sizeof literal - 1)
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int bufgrow(struct buf *, size_t);
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
||||
|
||||
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
||||
const char *bufcstr(struct buf *);
|
||||
|
||||
/* bufprefix: compare the beginning of a buffer with a string */
|
||||
int bufprefix(const struct buf *buf, const char *prefix);
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void bufput(struct buf *, const void *, size_t);
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void bufputs(struct buf *, const char *);
|
||||
|
||||
/* bufputc: appends a single char to a buffer */
|
||||
void bufputc(struct buf *, int);
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void bufrelease(struct buf *);
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void bufreset(struct buf *);
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void bufslurp(struct buf *, size_t);
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
||||
|
||||
/* buftruncate: truncates the buffer at `size` */
|
||||
int buftruncate(struct buf *buf, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,75 +0,0 @@
|
|||
running install
|
||||
running bdist_egg
|
||||
running egg_info
|
||||
writing top-level names to snudown.egg-info\top_level.txt
|
||||
writing dependency_links to snudown.egg-info\dependency_links.txt
|
||||
writing snudown.egg-info\PKG-INFO
|
||||
reading manifest file 'snudown.egg-info\SOURCES.txt'
|
||||
writing manifest file 'snudown.egg-info\SOURCES.txt'
|
||||
installing library code to build\bdist.win32\egg
|
||||
running install_lib
|
||||
running build_ext
|
||||
gperf.exe src\html_entities.gperf --output-file=src\html_entities.h
|
||||
building 'snudown' extension
|
||||
creating build
|
||||
creating build\temp.win32-3.4
|
||||
creating build\temp.win32-3.4\Release
|
||||
creating build\temp.win32-3.4\Release\src
|
||||
creating build\temp.win32-3.4\Release\html
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsnudown.c /Fobuild\temp.win32-3.4\Release\snudown.obj
|
||||
snudown.c
|
||||
snudown.c(231) : warning C4087: 'PyInit_snudown' : declared with 'void' parameter list
|
||||
c:\git\else\snudowntest\snudown.c(163) : warning C4700: uninitialized local variable 'options' used
|
||||
c:\git\else\snudowntest\snudown.c(228) : warning C4715: 'PyInit_snudown' : not all control paths return a value
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/autolink.c /Fobuild\temp.win32-3.4\Release\src/autolink.obj
|
||||
autolink.c
|
||||
src/autolink.c(266) : warning C4146: unary minus operator applied to unsigned type, result still unsigned
|
||||
src/autolink.c(325) : warning C4146: unary minus operator applied to unsigned type, result still unsigned
|
||||
src/autolink.c(422) : warning C4018: '>' : signed/unsigned mismatch
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/buffer.c /Fobuild\temp.win32-3.4\Release\src/buffer.obj
|
||||
buffer.c
|
||||
src/buffer.c(124) : warning C4996: '_vsnprintf': This function or variable may be unsafe. Consider using _vsnprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details.
|
||||
D:\Visual Studio 1_0_0\VC\INCLUDE\stdio.h(363) : see declaration of '_vsnprintf'
|
||||
src/buffer.c(141) : warning C4996: '_vsnprintf': This function or variable may be unsafe. Consider using _vsnprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details.
|
||||
D:\Visual Studio 1_0_0\VC\INCLUDE\stdio.h(363) : see declaration of '_vsnprintf'
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/markdown.c /Fobuild\temp.win32-3.4\Release\src/markdown.obj
|
||||
markdown.c
|
||||
c:\git\else\snudowntest\src\html_entities.h(32) : warning C4129: 's' : unrecognized character escape sequence
|
||||
c:\git\else\snudowntest\src\html_entities.h(32) : warning C4129: 's' : unrecognized character escape sequence
|
||||
c:\git\else\snudowntest\src\html_entities.h(32) : warning C4129: 'h' : unrecognized character escape sequence
|
||||
src/markdown.c(2168) : warning C4018: '>' : signed/unsigned mismatch
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tcsrc/stack.c /Fobuild\temp.win32-3.4\Release\src/stack.obj
|
||||
stack.c
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/houdini_href_e.c /Fobuild\temp.win32-3.4\Release\html/houdini_href_e.obj
|
||||
houdini_href_e.c
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/houdini_html_e.c /Fobuild\temp.win32-3.4\Release\html/houdini_html_e.obj
|
||||
houdini_html_e.c
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/html.c /Fobuild\temp.win32-3.4\Release\html/html.obj
|
||||
html.c
|
||||
D:\Visual Studio 1_0_0\VC\BIN\cl.exe /c /nologo /Ox /MD /W3 /GS- /DNDEBUG -Isrc -Ihtml -IC:\Python34\include -IC:\Python34\include /Tchtml/html_smartypants.c /Fobuild\temp.win32-3.4\Release\html/html_smartypants.obj
|
||||
html_smartypants.c
|
||||
html/html_smartypants.c(97) : warning C4996: '_snprintf': This function or variable may be unsafe. Consider using _snprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details.
|
||||
D:\Visual Studio 1_0_0\VC\INCLUDE\stdio.h(363) : see declaration of '_snprintf'
|
||||
creating build\lib.win32-3.4
|
||||
D:\Visual Studio 1_0_0\VC\BIN\link.exe /DLL /nologo /INCREMENTAL:NO /LIBPATH:C:\Python34\libs /LIBPATH:C:\Python34\PCbuild /EXPORT:PyInit_snudown build\temp.win32-3.4\Release\snudown.obj build\temp.win32-3.4\Release\src/autolink.obj build\temp.win32-3.4\Release\src/buffer.obj build\temp.win32-3.4\Release\src/markdown.obj build\temp.win32-3.4\Release\src/stack.obj build\temp.win32-3.4\Release\html/houdini_href_e.obj build\temp.win32-3.4\Release\html/houdini_html_e.obj build\temp.win32-3.4\Release\html/html.obj build\temp.win32-3.4\Release\html/html_smartypants.obj /OUT:build\lib.win32-3.4\snudown.pyd /IMPLIB:build\temp.win32-3.4\Release\snudown.lib /MANIFESTFILE:build\temp.win32-3.4\Release\snudown.pyd.manifest
|
||||
Creating library build\temp.win32-3.4\Release\snudown.lib and object build\temp.win32-3.4\Release\snudown.exp
|
||||
creating build\bdist.win32
|
||||
creating build\bdist.win32\egg
|
||||
copying build\lib.win32-3.4\snudown.pyd -> build\bdist.win32\egg
|
||||
creating stub loader for snudown.pyd
|
||||
creating build\bdist.win32\egg\EGG-INFO
|
||||
copying snudown.egg-info\PKG-INFO -> build\bdist.win32\egg\EGG-INFO
|
||||
copying snudown.egg-info\SOURCES.txt -> build\bdist.win32\egg\EGG-INFO
|
||||
copying snudown.egg-info\dependency_links.txt -> build\bdist.win32\egg\EGG-INFO
|
||||
copying snudown.egg-info\top_level.txt -> build\bdist.win32\egg\EGG-INFO
|
||||
writing build\bdist.win32\egg\EGG-INFO\native_libs.txt
|
||||
creating 'dist\snudown-1.4.0-py3.4-win32.egg' and adding 'build\bdist.win32\egg' to it
|
||||
removing 'build\bdist.win32\egg' (and everything under it)
|
||||
Processing snudown-1.4.0-py3.4-win32.egg
|
||||
Removing c:\python34\lib\site-packages\snudown-1.4.0-py3.4-win32.egg
|
||||
Copying snudown-1.4.0-py3.4-win32.egg to c:\python34\lib\site-packages
|
||||
snudown 1.4.0 is already the active version in easy-install.pth
|
||||
|
||||
Installed c:\python34\lib\site-packages\snudown-1.4.0-py3.4-win32.egg
|
||||
Processing dependencies for snudown==1.4.0
|
||||
Finished processing dependencies for snudown==1.4.0
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,145 +0,0 @@
|
|||
snudown (1.4.0) unstable; urgency=medium
|
||||
|
||||
* autolink r/subreddit and u/user
|
||||
* security: don't rewind over previous inlines when autolinking
|
||||
* email autolinks re-enabled due to ^
|
||||
* more stringent character entity checks and sanitization
|
||||
* properly handle URLs containing control characters
|
||||
|
||||
-- Jordan Milne <jordan.milne@reddit.com> Mon, 01 Jun 2015 13:04:23 -0700
|
||||
|
||||
snudown (1.3.2) unstable; urgency=medium
|
||||
|
||||
* fix alphanumeric-named entities
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 25 Feb 2015 13:32:41 -0800
|
||||
|
||||
snudown (1.3.1) unstable; urgency=medium
|
||||
|
||||
* add missing entities to entity whitelist
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 24 Feb 2015 22:12:29 -0800
|
||||
|
||||
snudown (1.3.0) unstable; urgency=medium
|
||||
|
||||
* validate html entities and escape unrecognized ones
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 24 Feb 2015 17:55:38 -0800
|
||||
|
||||
snudown (1.2.0) unstable; urgency=medium
|
||||
|
||||
* security: fix rewind issues
|
||||
* email autolinks disabled due to ^
|
||||
* security: fix table header OOM bomb
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Sat, 20 Sep 2014 11:59:34 -0700
|
||||
|
||||
snudown (1.1.6) unstable; urgency=low
|
||||
|
||||
* add ts3server url scheme to whitelist
|
||||
* redo html sanitization for wiki renderer
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 01 Apr 2014 17:12:50 -0700
|
||||
|
||||
snudown (1.1.5) unstable; urgency=low
|
||||
|
||||
* bring path stuff into user/subreddit autolinking (multis, subpages etc.)
|
||||
* make /u/ autolinking case sensitive
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 22 May 2013 16:09:31 -0700
|
||||
|
||||
snudown (1.1.4) unstable; urgency=low
|
||||
|
||||
* make /r/ autolinking case sensitive
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Mon, 25 Feb 2013 23:27:10 -0800
|
||||
|
||||
snudown (1.1.3) unstable; urgency=low
|
||||
|
||||
* add support for /r/all-minus
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 08 Jan 2013 12:55:40 -0800
|
||||
|
||||
snudown (1.1.2) unstable; urgency=low
|
||||
|
||||
* don't close the toc div if there wasn't a toc :(
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 12 Dec 2012 17:38:05 -0800
|
||||
|
||||
snudown (1.1.1) unstable; urgency=low
|
||||
|
||||
* minor code cleanup
|
||||
* add a div around wiki table of contents for styling purposes
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 12 Dec 2012 13:47:49 -0800
|
||||
|
||||
snudown (1.1.0) unstable; urgency=low
|
||||
|
||||
* add wiki variant of markdown syntax (allows links, and
|
||||
some raw html)
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 05 Sep 2012 23:30:34 -0700
|
||||
|
||||
snudown (1.0.7) unstable; urgency=low
|
||||
|
||||
* add python-setuptools to build-depends
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 09 Aug 2012 14:46:49 -0700
|
||||
|
||||
snudown (1.0.6) unstable; urgency=low
|
||||
|
||||
* made subreddit autolinking more robust thanks to nandhp
|
||||
* cleaned up packaging
|
||||
* merged upstream fixes:
|
||||
* fix blockquotes nested inside paragraphs
|
||||
* improve parsing of continuous list items
|
||||
* fix infinite loop parsing strikethrouhgs
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 09 Aug 2012 13:06:38 -0700
|
||||
|
||||
snudown (1.0.5) unstable; urgency=low
|
||||
|
||||
* require a space between url and title
|
||||
* merged upstream fixes:
|
||||
* whitespace after tables prevent them from rendering
|
||||
* escape html in contents of tables
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 23 Feb 2012 08:40:39 -0800
|
||||
|
||||
snudown (1.0.4) unstable; urgency=low
|
||||
|
||||
* change username autolinking to /u/username
|
||||
* properly handle backslash at end of message
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 26 Jan 2012 18:26:45 -0800
|
||||
|
||||
snudown (1.0.3) unstable; urgency=low
|
||||
|
||||
* ~username auto-linking
|
||||
* make table headers less strict
|
||||
* correctly handle ) in link title text
|
||||
* synced with upstream
|
||||
* code clean-up
|
||||
* utf-8 fixes
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 18 Jan 2012 15:20:35 -0800
|
||||
|
||||
snudown (1.0.2) unstable; urgency=low
|
||||
|
||||
* synced up with upstream
|
||||
* more safelink relaxation based on community requests
|
||||
* fixed nesting unordered lists within ordered lists and vice versa
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Sat, 19 Nov 2011 17:16:47 -0800
|
||||
|
||||
snudown (1.0.1) unstable; urgency=low
|
||||
|
||||
* new version, new package
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 17 Nov 2011 14:22:26 -0800
|
||||
|
||||
snudown (1.0.0) unstable; urgency=low
|
||||
|
||||
* source package automatically created by stdeb 0.6.0+git
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 16 Nov 2011 10:36:53 -0800
|
|
@ -1 +0,0 @@
|
|||
7
|
|
@ -1,15 +0,0 @@
|
|||
Source: snudown
|
||||
Maintainer: Neil Williams <neil@reddit.com>
|
||||
Section: python
|
||||
Priority: optional
|
||||
Build-Depends: python-all-dev (>= 2.6.6-3), debhelper (>= 7), python-setuptools, gperf
|
||||
Standards-Version: 3.9.3
|
||||
Homepage: https://github.com/reddit/snudown
|
||||
Vcs-Git: git://github.com/reddit/snudown.git
|
||||
|
||||
Package: python-snudown
|
||||
Architecture: any
|
||||
Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends}
|
||||
Breaks: ${python:Breaks}
|
||||
Description: reddit's python wrapper and customization of the Sundown Markdown interpreter.
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
Format: http://dep.debian.net/deps/dep5
|
||||
Upstream-Name: snudown
|
||||
Source: https://github.com/reddit/snudown
|
||||
|
||||
Files: *
|
||||
Copyright: 2011-2012 Vicent Marti
|
||||
2011-2012 reddit Inc.
|
||||
License: MIT
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2011-2012 reddit Inc.
|
||||
License: MIT
|
||||
|
||||
Files: test_snudown.py
|
||||
Copyright: 2011-2012 reddit Inc.
|
||||
License: MIT
|
||||
|
||||
License: MIT
|
||||
Permission to use, copy, modify, and distribute this software for any purpose
|
||||
with or without fee is hereby granted, provided that the above copyright
|
||||
notice and this permission notice appear in all copies.
|
||||
.
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
||||
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
||||
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
#!/usr/bin/make -f
|
||||
|
||||
# This file was automatically generated by stdeb 0.6.0+git at
|
||||
# Wed, 16 Nov 2011 10:36:53 -0800
|
||||
|
||||
%:
|
||||
dh $@ --with python2 --buildsystem=python_distutils
|
||||
|
||||
|
|
@ -1 +0,0 @@
|
|||
3.0 (native)
|
|
@ -1,37 +0,0 @@
|
|||
cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
set(HEADERS
|
||||
../html/houdini.h
|
||||
../html/html.h
|
||||
../src/autolink.h
|
||||
../src/buffer.h
|
||||
../src/html_blocks.h
|
||||
../src/html_entities.h
|
||||
../src/markdown.h
|
||||
../src/stack.h
|
||||
)
|
||||
set(LIBRARY_SOURCES
|
||||
../html/houdini_href_e.c
|
||||
../html/houdini_html_e.c
|
||||
../html/html.c
|
||||
../html/html_smartypants.c
|
||||
../src/autolink.c
|
||||
../src/buffer.c
|
||||
../src/markdown.c
|
||||
../src/stack.c
|
||||
${HEADERS}
|
||||
)
|
||||
|
||||
set(PROGRAM "snudown-validator")
|
||||
set(PROGRAM_SOURCES
|
||||
${LIBRARY_SOURCES}
|
||||
snudown-validator.c
|
||||
)
|
||||
|
||||
include_directories(. ../src ../html ./build/gumbo_snudown/include ${CMAKE_CURRENT_BINARY_DIR})
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/build/gumbo_snudown/lib)
|
||||
|
||||
add_executable(${PROGRAM} ${PROGRAM_SOURCES})
|
||||
target_link_libraries(${PROGRAM} gumbo)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g -Wno-error=parentheses")
|
|
@ -1,62 +0,0 @@
|
|||
# Copyright (c) 2015, reddit inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software for any
|
||||
# purpose with or without fee is hereby granted, provided that the above
|
||||
# copyright notice and this permission notice appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
all: gumbo_snudown snudown-validator
|
||||
|
||||
.PHONY: all clean gumbo_snudown snudown-validator build_dir
|
||||
|
||||
build_dir:
|
||||
mkdir -p build
|
||||
|
||||
# Our modified gumbo for finding security-relevant syntax issues
|
||||
gumbo_snudown: build_dir
|
||||
mkdir -p build/gumbo_snudown
|
||||
git submodule update --recursive
|
||||
@[ -f "${CURDIR}/gumbo_snudown/configure" ] || { \
|
||||
cd gumbo_snudown; \
|
||||
./autogen.sh; \
|
||||
./configure --prefix=$(CURDIR)/build/gumbo_snudown; \
|
||||
}
|
||||
# Don't build this with AFL instrumentation, I'm assuming Google
|
||||
# already ran their own fuzzer over their own parser...
|
||||
$(MAKE) -C gumbo_snudown all install
|
||||
|
||||
gperf_src:
|
||||
cd ../src/ && gperf html_entities.gperf --output-file=html_entities.h
|
||||
|
||||
# executable
|
||||
snudown-validator: build_dir gumbo_snudown gperf_src
|
||||
cd build && cmake .. -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-gcc
|
||||
$(MAKE) -C build all
|
||||
|
||||
# stuff for fuzzing
|
||||
gen_testcases:
|
||||
mkdir -p testing/testcases
|
||||
rm -f testing/testcases/test_default_*.md
|
||||
python2.7 gen_testcases.py
|
||||
|
||||
afl: gen_testcases snudown-validator
|
||||
@[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; }
|
||||
@mkdir -p testing/afl_results
|
||||
$(AFL_PATH)/afl-fuzz \
|
||||
-i testing/testcases \
|
||||
-o testing/afl_results \
|
||||
-t 100 \
|
||||
-m none \
|
||||
./build/snudown-validator
|
||||
|
||||
# housekeeping
|
||||
clean:
|
||||
rm -rf *.o
|
||||
rm -rf build/
|
|
@ -1,20 +0,0 @@
|
|||
#!/bin/env python
|
||||
|
||||
# dump all of our testcases into a directory as separate files, like AFL
|
||||
# wants.
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
import itertools
|
||||
|
||||
sys.path.append("..")
|
||||
import test_snudown
|
||||
|
||||
cases = itertools.chain(test_snudown.cases.keys(), test_snudown.wiki_cases.keys())
|
||||
for i, md in enumerate(cases):
|
||||
# skip huge testcases
|
||||
if len(md) > 2048:
|
||||
continue
|
||||
test_path = os.path.join('testing', 'testcases', 'test_default_%d.md' % i)
|
||||
with open(test_path, 'w') as f:
|
||||
f.write(md)
|
|
@ -1,226 +0,0 @@
|
|||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <gumbo.h>
|
||||
|
||||
#define READ_UNIT 1024
|
||||
#define OUTPUT_UNIT 64
|
||||
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.3.2"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer() {
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer() {
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
void
|
||||
snudown_md(struct buf *ob, const uint8_t *document, size_t doc_size, int wiki_mode)
|
||||
{
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
if (wiki_mode)
|
||||
renderer = RENDERER_WIKI;
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
struct snudown_renderopt *options = &(_snudown.state->options);
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
}
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
init_default_renderer();
|
||||
init_wiki_renderer();
|
||||
|
||||
struct buf *ib, *ob;
|
||||
int size_read = 0, wiki_mode = 0, i = 0, have_errors = 0;
|
||||
|
||||
/* reading everything */
|
||||
ib = bufnew(READ_UNIT);
|
||||
bufgrow(ib, READ_UNIT);
|
||||
while ((size_read = fread(ib->data + ib->size, 1, ib->asize - ib->size, stdin)) > 0) {
|
||||
ib->size += size_read;
|
||||
bufgrow(ib, ib->size + READ_UNIT);
|
||||
}
|
||||
/* Render to a buffer, then print that out */
|
||||
ob = bufnew(OUTPUT_UNIT);
|
||||
bufputs(ob, "<!DOCTYPE html><html><body>\n");
|
||||
snudown_md(ob, ib->data, ib->size, wiki_mode);
|
||||
bufputs(ob, "</body></html>\n");
|
||||
|
||||
// Wiki mode explicitly allows unbalanced tags, need some way to exclude those
|
||||
if (!wiki_mode) {
|
||||
GumboOutput* output = gumbo_parse_with_options(&kGumboDefaultOptions, bufcstr(ob), ob->size);
|
||||
|
||||
for (i=0; i < output->errors.length; ++i) {
|
||||
// stupid "public" API I hacked in.
|
||||
void* thing = output->errors.data[i];
|
||||
GumboErrorType type = gumbo_get_error_type(thing);
|
||||
switch(type) {
|
||||
case GUMBO_ERR_UTF8_INVALID:
|
||||
case GUMBO_ERR_UTF8_NULL:
|
||||
// Making sure the user gave us valid
|
||||
// utf-8 or transforming it to valid
|
||||
// utf-8 is outside the scope of snudown
|
||||
continue;
|
||||
default:
|
||||
have_errors = 1;
|
||||
printf("%s\n", GUMBO_ERROR_NAMES[type]);
|
||||
printf("%s\n",gumbo_get_error_text(thing));
|
||||
printf("===============\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (have_errors) {
|
||||
// gotta trigger a crash for AFL to catch it
|
||||
assert(0);
|
||||
}
|
||||
|
||||
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
||||
}
|
||||
bufrelease(ob);
|
||||
bufrelease(ib);
|
||||
return 0;
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
#!/bin/bash
|
||||
find testing/afl_results/ -regextype posix-egrep -regex ".*/(crashes|hangs)/.*" | xargs -I '{}' ./validatemd.sh {}
|
|
@ -1,3 +0,0 @@
|
|||
#!/bin/bash
|
||||
echo "** ${1}"
|
||||
./build/snudown-validator < $1
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3f9266ea2d2bd19a503b5d2ec613e983c6ed9ea45ff6b5820b0681fd1b778d12
|
||||
size 103424
|
|
@ -1,37 +0,0 @@
|
|||
#ifndef HOUDINI_H__
|
||||
#define HOUDINI_H__
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HOUDINI_USE_LOCALE
|
||||
# define _isxdigit(c) isxdigit(c)
|
||||
# define _isdigit(c) isdigit(c)
|
||||
#else
|
||||
/*
|
||||
* Helper _isdigit methods -- do not trust the current locale
|
||||
* */
|
||||
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
||||
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
||||
#endif
|
||||
|
||||
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
||||
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,116 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
||||
|
||||
/*
|
||||
* The following characters will not be escaped:
|
||||
*
|
||||
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
||||
*
|
||||
* Note that this character set is the addition of:
|
||||
*
|
||||
* - The characters which are safe to be in an URL
|
||||
* - The characters which are *not* safe to be in
|
||||
* an URL because they are RESERVED characters.
|
||||
*
|
||||
* We asume (lazily) that any RESERVED char that
|
||||
* appears inside an URL is actually meant to
|
||||
* have its native function (i.e. as an URL
|
||||
* component/separator) and hence needs no escaping.
|
||||
*
|
||||
* There are two exceptions: the chacters & (amp)
|
||||
* and ' (single quote) do not appear in the table.
|
||||
* They are meant to appear in the URL as components,
|
||||
* yet they require special HTML-entity escaping
|
||||
* to generate valid HTML markup.
|
||||
*
|
||||
* All other characters will be escaped to %XX.
|
||||
*
|
||||
*/
|
||||
static const char HREF_SAFE[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
static const char hex_chars[] = "0123456789ABCDEF";
|
||||
size_t i = 0, org;
|
||||
char hex_str[3];
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
hex_str[0] = '%';
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
/* Skip by characters that don't need special
|
||||
* processing */
|
||||
while (i < size && HREF_SAFE[src[i]] == 1)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* throw out control characters */
|
||||
if (HREF_SAFE[src[i]] == 2) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (src[i]) {
|
||||
/* amp appears all the time in URLs, but needs
|
||||
* HTML-entity escaping to be inside an href */
|
||||
case '&':
|
||||
BUFPUTSL(ob, "&");
|
||||
break;
|
||||
|
||||
/* the single quote is a valid URL character
|
||||
* according to the standard; it needs HTML
|
||||
* entity escaping too */
|
||||
case '\'':
|
||||
BUFPUTSL(ob, "'");
|
||||
break;
|
||||
|
||||
/* the space can be escaped to %20 or a plus
|
||||
* sign. we're going with the generic escape
|
||||
* for now. the plus thing is more commonly seen
|
||||
* when building GET strings */
|
||||
#if 0
|
||||
case ' ':
|
||||
bufputc(ob, '+');
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* every other character goes with a %XX escaping */
|
||||
default:
|
||||
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
||||
hex_str[2] = hex_chars[src[i] & 0xF];
|
||||
bufput(ob, hex_str, 3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
||||
|
||||
/**
|
||||
* According to the OWASP rules:
|
||||
*
|
||||
* & --> &
|
||||
* < --> <
|
||||
* > --> >
|
||||
* " --> "
|
||||
* ' --> ' ' is not recommended
|
||||
* / --> / forward slash is included as it helps end an HTML entity
|
||||
*
|
||||
*/
|
||||
static const char HTML_ESCAPE_TABLE[] = {
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char *HTML_ESCAPES[] = {
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"'",
|
||||
"/",
|
||||
"<",
|
||||
">",
|
||||
"", // throw out control characters
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
||||
{
|
||||
size_t i = 0, org, esc = 0;
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* The forward slash is only escaped in secure mode */
|
||||
if (src[i] == '/' && !secure) {
|
||||
bufputc(ob, '/');
|
||||
} else if (HTML_ESCAPE_TABLE[src[i]] == 7) {
|
||||
/* skip control characters */
|
||||
} else {
|
||||
bufputs(ob, HTML_ESCAPES[esc]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
houdini_escape_html0(ob, src, size, 1);
|
||||
}
|
||||
|
|
@ -1,790 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML)
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
||||
{
|
||||
size_t i;
|
||||
int closed = 0;
|
||||
|
||||
if (tag_size < 3 || tag_data[0] != '<')
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
i = 1;
|
||||
|
||||
if (tag_data[i] == '/') {
|
||||
closed = 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
for (; i < tag_size; ++i, ++tagname) {
|
||||
if (*tagname == 0)
|
||||
break;
|
||||
|
||||
if (tag_data[i] != *tagname)
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
if (i == tag_size)
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
if (isspace(tag_data[i]) || tag_data[i] == '>')
|
||||
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
||||
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_html0(ob, source, length, 0);
|
||||
}
|
||||
|
||||
static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_href(ob, source, length);
|
||||
}
|
||||
|
||||
/********************
|
||||
* GENERIC RENDERER *
|
||||
********************/
|
||||
static int
|
||||
rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
uint8_t offset = 0;
|
||||
|
||||
if (!link || !link->size)
|
||||
return 0;
|
||||
|
||||
if ((options->flags & HTML_SAFELINK) != 0 &&
|
||||
!sd_autolink_issafe(link->data, link->size) &&
|
||||
type != MKDA_EMAIL)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
if (type == MKDA_EMAIL)
|
||||
BUFPUTSL(ob, "mailto:");
|
||||
escape_href(ob, link->data + offset, link->size - offset);
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
/*
|
||||
* Pretty printing: if we get an email address as
|
||||
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
|
||||
* want to print the `mailto:` prefix
|
||||
*/
|
||||
if (bufprefix(link, "mailto:") == 0) {
|
||||
escape_html(ob, link->data + 7, link->size - 7);
|
||||
} else {
|
||||
escape_html(ob, link->data, link->size);
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "</a>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (lang && lang->size) {
|
||||
size_t i, cls;
|
||||
BUFPUTSL(ob, "<pre><code class=\"");
|
||||
|
||||
for (i = 0, cls = 0; i < lang->size; ++i, ++cls) {
|
||||
while (i < lang->size && isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (i < lang->size) {
|
||||
size_t org = i;
|
||||
while (i < lang->size && !isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (lang->data[org] == '.')
|
||||
org++;
|
||||
|
||||
if (cls) bufputc(ob, ' ');
|
||||
escape_html(ob, lang->data + org, i - org);
|
||||
}
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "\">");
|
||||
} else
|
||||
BUFPUTSL(ob, "<pre><code>");
|
||||
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
|
||||
BUFPUTSL(ob, "</code></pre>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<blockquote>\n");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</blockquote>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<code>");
|
||||
if (text) escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</code>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<del>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</del>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<strong>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</strong>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<em>");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_linebreak(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bufputs(ob, USE_XHTML(options) ? "<br/>\n" : "<br>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (ob->size)
|
||||
bufputc(ob, '\n');
|
||||
|
||||
if (options->flags & HTML_TOC) {
|
||||
bufprintf(ob, "<h%d id=\"", level);
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
} else {
|
||||
bufprintf(ob, "<h%d>", level);
|
||||
}
|
||||
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufprintf(ob, "</h%d>\n", level);
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size))
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
|
||||
if (link && link->size)
|
||||
escape_href(ob, link->data, link->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size);
|
||||
}
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
if (content && content->size) bufput(ob, content->data, content->size);
|
||||
BUFPUTSL(ob, "</a>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6);
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<li>");
|
||||
if (text) {
|
||||
size_t size = text->size;
|
||||
while (size && text->data[size - 1] == '\n')
|
||||
size--;
|
||||
|
||||
bufput(ob, text->data, size);
|
||||
}
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
size_t i = 0;
|
||||
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (!text || !text->size)
|
||||
return;
|
||||
|
||||
while (i < text->size && isspace(text->data[i])) i++;
|
||||
|
||||
if (i == text->size)
|
||||
return;
|
||||
|
||||
BUFPUTSL(ob, "<p>");
|
||||
if (options->flags & HTML_HARD_WRAP) {
|
||||
size_t org;
|
||||
while (i < text->size) {
|
||||
org = i;
|
||||
while (i < text->size && text->data[i] != '\n')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text->data + org, i - org);
|
||||
|
||||
/*
|
||||
* do not insert a line break if this newline
|
||||
* is the last character on the paragraph
|
||||
*/
|
||||
if (i >= text->size - 1)
|
||||
break;
|
||||
|
||||
rndr_linebreak(ob, opaque);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
bufput(ob, &text->data[i], text->size - i);
|
||||
}
|
||||
BUFPUTSL(ob, "</p>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
size_t org, sz;
|
||||
if (!text) return;
|
||||
sz = text->size;
|
||||
while (sz > 0 && text->data[sz - 1] == '\n') sz--;
|
||||
org = 0;
|
||||
while (org < sz && text->data[org] == '\n') org++;
|
||||
if (org >= sz) return;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, text->data + org, sz - org);
|
||||
bufputc(ob, '\n');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<strong><em>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em></strong>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_hrule(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufputs(ob, USE_XHTML(options) ? "<hr/>\n" : "<hr>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (!link || !link->size) return 0;
|
||||
|
||||
BUFPUTSL(ob, "<img src=\"");
|
||||
escape_href(ob, link->data, link->size);
|
||||
BUFPUTSL(ob, "\" alt=\"");
|
||||
|
||||
if (alt && alt->size)
|
||||
escape_html(ob, alt->data, alt->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size); }
|
||||
|
||||
bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque,
|
||||
char* tagname, char** whitelist, int tagtype)
|
||||
{
|
||||
size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0;
|
||||
struct buf *attr;
|
||||
struct buf *value;
|
||||
char c;
|
||||
|
||||
bufputc(ob, '<');
|
||||
|
||||
if(tagtype == HTML_TAG_CLOSE) {
|
||||
bufputc(ob, '/');
|
||||
bufputs(ob, tagname);
|
||||
bufputc(ob, '>');
|
||||
return;
|
||||
}
|
||||
|
||||
bufputs(ob, tagname);
|
||||
i = 1 + strlen(tagname);
|
||||
|
||||
attr = bufnew(16);
|
||||
value = bufnew(16);
|
||||
|
||||
for(; i < text->size && !done; i++) {
|
||||
c = text->data[i];
|
||||
done = 0;
|
||||
reset = 0;
|
||||
done_attr = 0;
|
||||
|
||||
switch(c) {
|
||||
case '>':
|
||||
done = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if(!seen_equals) {
|
||||
reset = 1;
|
||||
} else if(!in_str) {
|
||||
in_str = c;
|
||||
} else if(in_str == c) {
|
||||
in_str = 0;
|
||||
done_attr = 1;
|
||||
} else {
|
||||
bufputc(value, c);
|
||||
}
|
||||
break;
|
||||
case ' ':
|
||||
if (in_str) {
|
||||
bufputc(value, ' ');
|
||||
} else {
|
||||
reset = 1;
|
||||
}
|
||||
break;
|
||||
case '=':
|
||||
if(seen_equals) {
|
||||
reset = 1;
|
||||
break;
|
||||
}
|
||||
seen_equals = 1;
|
||||
break;
|
||||
default:
|
||||
if(seen_equals && in_str || !seen_equals) {
|
||||
bufputc(seen_equals ? value : attr, c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if(done_attr) {
|
||||
int valid = 0;
|
||||
for(z = 0; whitelist[z]; z++) {
|
||||
if(strlen(whitelist[z]) != attr->size) {
|
||||
continue;
|
||||
}
|
||||
for(x = 0; x < attr->size; x++) {
|
||||
if(tolower(whitelist[z][x]) != tolower(attr->data[x])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(x == attr->size) {
|
||||
valid = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(valid && value->size && attr->size) {
|
||||
bufputc(ob, ' ');
|
||||
escape_html(ob, attr->data, attr->size);
|
||||
bufputs(ob, "=\"");
|
||||
escape_html(ob, value->data, value->size);
|
||||
bufputc(ob, '"');
|
||||
}
|
||||
reset = 1;
|
||||
}
|
||||
|
||||
if(reset) {
|
||||
seen_equals = 0;
|
||||
in_str = 0;
|
||||
bufreset(attr);
|
||||
bufreset(value);
|
||||
}
|
||||
}
|
||||
|
||||
bufrelease(attr);
|
||||
bufrelease(value);
|
||||
|
||||
bufputc(ob, '>');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
char** whitelist = options->html_element_whitelist;
|
||||
int i, tagtype;
|
||||
|
||||
/* Items on the whitelist ignore all other flags and just output */
|
||||
if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) {
|
||||
for (i = 0; whitelist[i]; i++) {
|
||||
tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]);
|
||||
if (tagtype != HTML_TAG_NONE) {
|
||||
rndr_html_tag(ob, text, opaque,
|
||||
whitelist[i],
|
||||
options->html_attr_whitelist,
|
||||
tagtype);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES
|
||||
* It doens't see if there are any valid tags, just escape all of them. */
|
||||
if((options->flags & HTML_ESCAPE) != 0) {
|
||||
escape_html(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((options->flags & HTML_SKIP_HTML) != 0)
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_STYLE) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "style"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_LINKS) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "a"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_IMAGES) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "img"))
|
||||
return 1;
|
||||
|
||||
bufput(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<table><thead>\n");
|
||||
if (header)
|
||||
bufput(ob, header->data, header->size);
|
||||
BUFPUTSL(ob, "</thead><tbody>\n");
|
||||
if (body)
|
||||
bufput(ob, body->data, body->size);
|
||||
BUFPUTSL(ob, "</tbody></table>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<tr>\n");
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</tr>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span)
|
||||
{
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "<th");
|
||||
} else {
|
||||
BUFPUTSL(ob, "<td");
|
||||
}
|
||||
|
||||
if (col_span > 1) {
|
||||
bufprintf(ob, " colspan=\"%d\" ", col_span);
|
||||
}
|
||||
|
||||
switch (flags & MKD_TABLE_ALIGNMASK) {
|
||||
case MKD_TABLE_ALIGN_CENTER:
|
||||
BUFPUTSL(ob, " align=\"center\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_L:
|
||||
BUFPUTSL(ob, " align=\"left\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_R:
|
||||
BUFPUTSL(ob, " align=\"right\">");
|
||||
break;
|
||||
|
||||
default:
|
||||
BUFPUTSL(ob, ">");
|
||||
}
|
||||
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "</th>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob, "</td>\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<sup>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</sup>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
}
|
||||
|
||||
static void
|
||||
toc_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
/* set the level offset if this is the first header
|
||||
* we're parsing for the document */
|
||||
if (options->toc_data.current_level == 0) {
|
||||
BUFPUTSL(ob, "<div class=\"toc\">\n");
|
||||
options->toc_data.level_offset = level - 1;
|
||||
}
|
||||
level -= options->toc_data.level_offset;
|
||||
|
||||
if (level > options->toc_data.current_level) {
|
||||
while (level > options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "<ul>\n<li>\n");
|
||||
options->toc_data.current_level++;
|
||||
}
|
||||
} else if (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
while (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</ul>\n</li>\n");
|
||||
options->toc_data.current_level--;
|
||||
}
|
||||
BUFPUTSL(ob,"<li>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob,"</li>\n<li>\n");
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"#");
|
||||
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</a>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
if (content && content->size)
|
||||
bufput(ob, content->data, content->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
reset_toc(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
memset(&(options->toc_data), 0, sizeof(options->toc_data));
|
||||
}
|
||||
|
||||
static void
|
||||
toc_finalize(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bool has_toc = false;
|
||||
while (options->toc_data.current_level > 0) {
|
||||
BUFPUTSL(ob, "</li>\n</ul>\n");
|
||||
options->toc_data.current_level--;
|
||||
has_toc = true;
|
||||
}
|
||||
if(has_toc) {
|
||||
BUFPUTSL(ob, "</div>\n");
|
||||
}
|
||||
reset_toc(ob, opaque);
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_header,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_link,
|
||||
NULL,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
toc_finalize,
|
||||
};
|
||||
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = HTML_TOC | HTML_SKIP_HTML;
|
||||
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
rndr_blockcode,
|
||||
rndr_blockquote,
|
||||
rndr_raw_block,
|
||||
rndr_header,
|
||||
rndr_hrule,
|
||||
rndr_list,
|
||||
rndr_listitem,
|
||||
rndr_paragraph,
|
||||
rndr_table,
|
||||
rndr_tablerow,
|
||||
rndr_tablecell,
|
||||
|
||||
rndr_autolink,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
rndr_image,
|
||||
rndr_linebreak,
|
||||
rndr_link,
|
||||
rndr_raw_html,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
rndr_normal_text,
|
||||
|
||||
NULL,
|
||||
reset_toc,
|
||||
};
|
||||
|
||||
/* Prepare the options pointer */
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = render_flags;
|
||||
|
||||
/* Prepare the callbacks */
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
|
||||
if (render_flags & HTML_SKIP_IMAGES)
|
||||
callbacks->image = NULL;
|
||||
|
||||
if (render_flags & HTML_SKIP_LINKS) {
|
||||
callbacks->link = NULL;
|
||||
callbacks->autolink = NULL;
|
||||
}
|
||||
|
||||
if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE)
|
||||
callbacks->blockhtml = NULL;
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_HTML_H
|
||||
#define UPSKIRT_HTML_H
|
||||
|
||||
#include "markdown.h"
|
||||
#include "buffer.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct html_renderopt {
|
||||
struct {
|
||||
int header_count;
|
||||
int current_level;
|
||||
int level_offset;
|
||||
} toc_data;
|
||||
|
||||
char* toc_id_prefix;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
char** html_element_whitelist;
|
||||
char** html_attr_whitelist;
|
||||
|
||||
/* extra callbacks */
|
||||
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
HTML_SKIP_HTML = (1 << 0),
|
||||
HTML_SKIP_STYLE = (1 << 1),
|
||||
HTML_SKIP_IMAGES = (1 << 2),
|
||||
HTML_SKIP_LINKS = (1 << 3),
|
||||
HTML_EXPAND_TABS = (1 << 4),
|
||||
HTML_SAFELINK = (1 << 5),
|
||||
HTML_TOC = (1 << 6),
|
||||
HTML_HARD_WRAP = (1 << 7),
|
||||
HTML_USE_XHTML = (1 << 8),
|
||||
HTML_ESCAPE = (1 << 9),
|
||||
HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10),
|
||||
} html_render_mode;
|
||||
|
||||
typedef enum {
|
||||
HTML_TAG_NONE = 0,
|
||||
HTML_TAG_OPEN,
|
||||
HTML_TAG_CLOSE,
|
||||
} html_tag;
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
|
||||
|
||||
extern void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
||||
|
||||
extern void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
|
||||
|
||||
extern void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
#ifndef HOUDINI_H__
|
||||
#define HOUDINI_H__
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HOUDINI_USE_LOCALE
|
||||
# define _isxdigit(c) isxdigit(c)
|
||||
# define _isdigit(c) isdigit(c)
|
||||
#else
|
||||
/*
|
||||
* Helper _isdigit methods -- do not trust the current locale
|
||||
* */
|
||||
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
||||
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
||||
#endif
|
||||
|
||||
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
||||
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,116 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
||||
|
||||
/*
|
||||
* The following characters will not be escaped:
|
||||
*
|
||||
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
||||
*
|
||||
* Note that this character set is the addition of:
|
||||
*
|
||||
* - The characters which are safe to be in an URL
|
||||
* - The characters which are *not* safe to be in
|
||||
* an URL because they are RESERVED characters.
|
||||
*
|
||||
* We asume (lazily) that any RESERVED char that
|
||||
* appears inside an URL is actually meant to
|
||||
* have its native function (i.e. as an URL
|
||||
* component/separator) and hence needs no escaping.
|
||||
*
|
||||
* There are two exceptions: the chacters & (amp)
|
||||
* and ' (single quote) do not appear in the table.
|
||||
* They are meant to appear in the URL as components,
|
||||
* yet they require special HTML-entity escaping
|
||||
* to generate valid HTML markup.
|
||||
*
|
||||
* All other characters will be escaped to %XX.
|
||||
*
|
||||
*/
|
||||
static const char HREF_SAFE[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
static const char hex_chars[] = "0123456789ABCDEF";
|
||||
size_t i = 0, org;
|
||||
char hex_str[3];
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
hex_str[0] = '%';
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
/* Skip by characters that don't need special
|
||||
* processing */
|
||||
while (i < size && HREF_SAFE[src[i]] == 1)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* throw out control characters */
|
||||
if (HREF_SAFE[src[i]] == 2) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (src[i]) {
|
||||
/* amp appears all the time in URLs, but needs
|
||||
* HTML-entity escaping to be inside an href */
|
||||
case '&':
|
||||
BUFPUTSL(ob, "&");
|
||||
break;
|
||||
|
||||
/* the single quote is a valid URL character
|
||||
* according to the standard; it needs HTML
|
||||
* entity escaping too */
|
||||
case '\'':
|
||||
BUFPUTSL(ob, "'");
|
||||
break;
|
||||
|
||||
/* the space can be escaped to %20 or a plus
|
||||
* sign. we're going with the generic escape
|
||||
* for now. the plus thing is more commonly seen
|
||||
* when building GET strings */
|
||||
#if 0
|
||||
case ' ':
|
||||
bufputc(ob, '+');
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* every other character goes with a %XX escaping */
|
||||
default:
|
||||
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
||||
hex_str[2] = hex_chars[src[i] & 0xF];
|
||||
bufput(ob, hex_str, 3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
||||
|
||||
/**
|
||||
* According to the OWASP rules:
|
||||
*
|
||||
* & --> &
|
||||
* < --> <
|
||||
* > --> >
|
||||
* " --> "
|
||||
* ' --> ' ' is not recommended
|
||||
* / --> / forward slash is included as it helps end an HTML entity
|
||||
*
|
||||
*/
|
||||
static const char HTML_ESCAPE_TABLE[] = {
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char *HTML_ESCAPES[] = {
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"'",
|
||||
"/",
|
||||
"<",
|
||||
">",
|
||||
"", // throw out control characters
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
||||
{
|
||||
size_t i = 0, org, esc = 0;
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* The forward slash is only escaped in secure mode */
|
||||
if (src[i] == '/' && !secure) {
|
||||
bufputc(ob, '/');
|
||||
} else if (HTML_ESCAPE_TABLE[src[i]] == 7) {
|
||||
/* skip control characters */
|
||||
} else {
|
||||
bufputs(ob, HTML_ESCAPES[esc]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
houdini_escape_html0(ob, src, size, 1);
|
||||
}
|
||||
|
|
@ -1,790 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML)
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
||||
{
|
||||
size_t i;
|
||||
int closed = 0;
|
||||
|
||||
if (tag_size < 3 || tag_data[0] != '<')
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
i = 1;
|
||||
|
||||
if (tag_data[i] == '/') {
|
||||
closed = 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
for (; i < tag_size; ++i, ++tagname) {
|
||||
if (*tagname == 0)
|
||||
break;
|
||||
|
||||
if (tag_data[i] != *tagname)
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
if (i == tag_size)
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
if (isspace(tag_data[i]) || tag_data[i] == '>')
|
||||
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
||||
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_html0(ob, source, length, 0);
|
||||
}
|
||||
|
||||
static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_href(ob, source, length);
|
||||
}
|
||||
|
||||
/********************
|
||||
* GENERIC RENDERER *
|
||||
********************/
|
||||
static int
|
||||
rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
uint8_t offset = 0;
|
||||
|
||||
if (!link || !link->size)
|
||||
return 0;
|
||||
|
||||
if ((options->flags & HTML_SAFELINK) != 0 &&
|
||||
!sd_autolink_issafe(link->data, link->size) &&
|
||||
type != MKDA_EMAIL)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
if (type == MKDA_EMAIL)
|
||||
BUFPUTSL(ob, "mailto:");
|
||||
escape_href(ob, link->data + offset, link->size - offset);
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
/*
|
||||
* Pretty printing: if we get an email address as
|
||||
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
|
||||
* want to print the `mailto:` prefix
|
||||
*/
|
||||
if (bufprefix(link, "mailto:") == 0) {
|
||||
escape_html(ob, link->data + 7, link->size - 7);
|
||||
} else {
|
||||
escape_html(ob, link->data, link->size);
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "</a>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (lang && lang->size) {
|
||||
size_t i, cls;
|
||||
BUFPUTSL(ob, "<pre><code class=\"");
|
||||
|
||||
for (i = 0, cls = 0; i < lang->size; ++i, ++cls) {
|
||||
while (i < lang->size && isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (i < lang->size) {
|
||||
size_t org = i;
|
||||
while (i < lang->size && !isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (lang->data[org] == '.')
|
||||
org++;
|
||||
|
||||
if (cls) bufputc(ob, ' ');
|
||||
escape_html(ob, lang->data + org, i - org);
|
||||
}
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "\">");
|
||||
} else
|
||||
BUFPUTSL(ob, "<pre><code>");
|
||||
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
|
||||
BUFPUTSL(ob, "</code></pre>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<blockquote>\n");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</blockquote>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<code>");
|
||||
if (text) escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</code>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<del>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</del>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<strong>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</strong>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<em>");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_linebreak(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bufputs(ob, USE_XHTML(options) ? "<br/>\n" : "<br>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (ob->size)
|
||||
bufputc(ob, '\n');
|
||||
|
||||
if (options->flags & HTML_TOC) {
|
||||
bufprintf(ob, "<h%d id=\"", level);
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
} else {
|
||||
bufprintf(ob, "<h%d>", level);
|
||||
}
|
||||
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufprintf(ob, "</h%d>\n", level);
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size))
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
|
||||
if (link && link->size)
|
||||
escape_href(ob, link->data, link->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size);
|
||||
}
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
if (content && content->size) bufput(ob, content->data, content->size);
|
||||
BUFPUTSL(ob, "</a>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6);
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<li>");
|
||||
if (text) {
|
||||
size_t size = text->size;
|
||||
while (size && text->data[size - 1] == '\n')
|
||||
size--;
|
||||
|
||||
bufput(ob, text->data, size);
|
||||
}
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
size_t i = 0;
|
||||
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (!text || !text->size)
|
||||
return;
|
||||
|
||||
while (i < text->size && isspace(text->data[i])) i++;
|
||||
|
||||
if (i == text->size)
|
||||
return;
|
||||
|
||||
BUFPUTSL(ob, "<p>");
|
||||
if (options->flags & HTML_HARD_WRAP) {
|
||||
size_t org;
|
||||
while (i < text->size) {
|
||||
org = i;
|
||||
while (i < text->size && text->data[i] != '\n')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text->data + org, i - org);
|
||||
|
||||
/*
|
||||
* do not insert a line break if this newline
|
||||
* is the last character on the paragraph
|
||||
*/
|
||||
if (i >= text->size - 1)
|
||||
break;
|
||||
|
||||
rndr_linebreak(ob, opaque);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
bufput(ob, &text->data[i], text->size - i);
|
||||
}
|
||||
BUFPUTSL(ob, "</p>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
size_t org, sz;
|
||||
if (!text) return;
|
||||
sz = text->size;
|
||||
while (sz > 0 && text->data[sz - 1] == '\n') sz--;
|
||||
org = 0;
|
||||
while (org < sz && text->data[org] == '\n') org++;
|
||||
if (org >= sz) return;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, text->data + org, sz - org);
|
||||
bufputc(ob, '\n');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<strong><em>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em></strong>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_hrule(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufputs(ob, USE_XHTML(options) ? "<hr/>\n" : "<hr>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (!link || !link->size) return 0;
|
||||
|
||||
BUFPUTSL(ob, "<img src=\"");
|
||||
escape_href(ob, link->data, link->size);
|
||||
BUFPUTSL(ob, "\" alt=\"");
|
||||
|
||||
if (alt && alt->size)
|
||||
escape_html(ob, alt->data, alt->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size); }
|
||||
|
||||
bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque,
|
||||
char* tagname, char** whitelist, int tagtype)
|
||||
{
|
||||
size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0;
|
||||
struct buf *attr;
|
||||
struct buf *value;
|
||||
char c;
|
||||
|
||||
bufputc(ob, '<');
|
||||
|
||||
if(tagtype == HTML_TAG_CLOSE) {
|
||||
bufputc(ob, '/');
|
||||
bufputs(ob, tagname);
|
||||
bufputc(ob, '>');
|
||||
return;
|
||||
}
|
||||
|
||||
bufputs(ob, tagname);
|
||||
i = 1 + strlen(tagname);
|
||||
|
||||
attr = bufnew(16);
|
||||
value = bufnew(16);
|
||||
|
||||
for(; i < text->size && !done; i++) {
|
||||
c = text->data[i];
|
||||
done = 0;
|
||||
reset = 0;
|
||||
done_attr = 0;
|
||||
|
||||
switch(c) {
|
||||
case '>':
|
||||
done = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if(!seen_equals) {
|
||||
reset = 1;
|
||||
} else if(!in_str) {
|
||||
in_str = c;
|
||||
} else if(in_str == c) {
|
||||
in_str = 0;
|
||||
done_attr = 1;
|
||||
} else {
|
||||
bufputc(value, c);
|
||||
}
|
||||
break;
|
||||
case ' ':
|
||||
if (in_str) {
|
||||
bufputc(value, ' ');
|
||||
} else {
|
||||
reset = 1;
|
||||
}
|
||||
break;
|
||||
case '=':
|
||||
if(seen_equals) {
|
||||
reset = 1;
|
||||
break;
|
||||
}
|
||||
seen_equals = 1;
|
||||
break;
|
||||
default:
|
||||
if(seen_equals && in_str || !seen_equals) {
|
||||
bufputc(seen_equals ? value : attr, c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if(done_attr) {
|
||||
int valid = 0;
|
||||
for(z = 0; whitelist[z]; z++) {
|
||||
if(strlen(whitelist[z]) != attr->size) {
|
||||
continue;
|
||||
}
|
||||
for(x = 0; x < attr->size; x++) {
|
||||
if(tolower(whitelist[z][x]) != tolower(attr->data[x])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(x == attr->size) {
|
||||
valid = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(valid && value->size && attr->size) {
|
||||
bufputc(ob, ' ');
|
||||
escape_html(ob, attr->data, attr->size);
|
||||
bufputs(ob, "=\"");
|
||||
escape_html(ob, value->data, value->size);
|
||||
bufputc(ob, '"');
|
||||
}
|
||||
reset = 1;
|
||||
}
|
||||
|
||||
if(reset) {
|
||||
seen_equals = 0;
|
||||
in_str = 0;
|
||||
bufreset(attr);
|
||||
bufreset(value);
|
||||
}
|
||||
}
|
||||
|
||||
bufrelease(attr);
|
||||
bufrelease(value);
|
||||
|
||||
bufputc(ob, '>');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
char** whitelist = options->html_element_whitelist;
|
||||
int i, tagtype;
|
||||
|
||||
/* Items on the whitelist ignore all other flags and just output */
|
||||
if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) {
|
||||
for (i = 0; whitelist[i]; i++) {
|
||||
tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]);
|
||||
if (tagtype != HTML_TAG_NONE) {
|
||||
rndr_html_tag(ob, text, opaque,
|
||||
whitelist[i],
|
||||
options->html_attr_whitelist,
|
||||
tagtype);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES
|
||||
* It doens't see if there are any valid tags, just escape all of them. */
|
||||
if((options->flags & HTML_ESCAPE) != 0) {
|
||||
escape_html(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((options->flags & HTML_SKIP_HTML) != 0)
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_STYLE) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "style"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_LINKS) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "a"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_IMAGES) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "img"))
|
||||
return 1;
|
||||
|
||||
bufput(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<table><thead>\n");
|
||||
if (header)
|
||||
bufput(ob, header->data, header->size);
|
||||
BUFPUTSL(ob, "</thead><tbody>\n");
|
||||
if (body)
|
||||
bufput(ob, body->data, body->size);
|
||||
BUFPUTSL(ob, "</tbody></table>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<tr>\n");
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</tr>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span)
|
||||
{
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "<th");
|
||||
} else {
|
||||
BUFPUTSL(ob, "<td");
|
||||
}
|
||||
|
||||
if (col_span > 1) {
|
||||
bufprintf(ob, " colspan=\"%d\" ", col_span);
|
||||
}
|
||||
|
||||
switch (flags & MKD_TABLE_ALIGNMASK) {
|
||||
case MKD_TABLE_ALIGN_CENTER:
|
||||
BUFPUTSL(ob, " align=\"center\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_L:
|
||||
BUFPUTSL(ob, " align=\"left\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_R:
|
||||
BUFPUTSL(ob, " align=\"right\">");
|
||||
break;
|
||||
|
||||
default:
|
||||
BUFPUTSL(ob, ">");
|
||||
}
|
||||
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "</th>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob, "</td>\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<sup>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</sup>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
}
|
||||
|
||||
static void
|
||||
toc_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
/* set the level offset if this is the first header
|
||||
* we're parsing for the document */
|
||||
if (options->toc_data.current_level == 0) {
|
||||
BUFPUTSL(ob, "<div class=\"toc\">\n");
|
||||
options->toc_data.level_offset = level - 1;
|
||||
}
|
||||
level -= options->toc_data.level_offset;
|
||||
|
||||
if (level > options->toc_data.current_level) {
|
||||
while (level > options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "<ul>\n<li>\n");
|
||||
options->toc_data.current_level++;
|
||||
}
|
||||
} else if (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
while (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</ul>\n</li>\n");
|
||||
options->toc_data.current_level--;
|
||||
}
|
||||
BUFPUTSL(ob,"<li>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob,"</li>\n<li>\n");
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"#");
|
||||
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</a>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
if (content && content->size)
|
||||
bufput(ob, content->data, content->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
reset_toc(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
memset(&(options->toc_data), 0, sizeof(options->toc_data));
|
||||
}
|
||||
|
||||
static void
|
||||
toc_finalize(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bool has_toc = false;
|
||||
while (options->toc_data.current_level > 0) {
|
||||
BUFPUTSL(ob, "</li>\n</ul>\n");
|
||||
options->toc_data.current_level--;
|
||||
has_toc = true;
|
||||
}
|
||||
if(has_toc) {
|
||||
BUFPUTSL(ob, "</div>\n");
|
||||
}
|
||||
reset_toc(ob, opaque);
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_header,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_link,
|
||||
NULL,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
toc_finalize,
|
||||
};
|
||||
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = HTML_TOC | HTML_SKIP_HTML;
|
||||
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
rndr_blockcode,
|
||||
rndr_blockquote,
|
||||
rndr_raw_block,
|
||||
rndr_header,
|
||||
rndr_hrule,
|
||||
rndr_list,
|
||||
rndr_listitem,
|
||||
rndr_paragraph,
|
||||
rndr_table,
|
||||
rndr_tablerow,
|
||||
rndr_tablecell,
|
||||
|
||||
rndr_autolink,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
rndr_image,
|
||||
rndr_linebreak,
|
||||
rndr_link,
|
||||
rndr_raw_html,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
rndr_normal_text,
|
||||
|
||||
NULL,
|
||||
reset_toc,
|
||||
};
|
||||
|
||||
/* Prepare the options pointer */
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = render_flags;
|
||||
|
||||
/* Prepare the callbacks */
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
|
||||
if (render_flags & HTML_SKIP_IMAGES)
|
||||
callbacks->image = NULL;
|
||||
|
||||
if (render_flags & HTML_SKIP_LINKS) {
|
||||
callbacks->link = NULL;
|
||||
callbacks->autolink = NULL;
|
||||
}
|
||||
|
||||
if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE)
|
||||
callbacks->blockhtml = NULL;
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_HTML_H
|
||||
#define UPSKIRT_HTML_H
|
||||
|
||||
#include "markdown.h"
|
||||
#include "buffer.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct html_renderopt {
|
||||
struct {
|
||||
int header_count;
|
||||
int current_level;
|
||||
int level_offset;
|
||||
} toc_data;
|
||||
|
||||
char* toc_id_prefix;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
char** html_element_whitelist;
|
||||
char** html_attr_whitelist;
|
||||
|
||||
/* extra callbacks */
|
||||
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
HTML_SKIP_HTML = (1 << 0),
|
||||
HTML_SKIP_STYLE = (1 << 1),
|
||||
HTML_SKIP_IMAGES = (1 << 2),
|
||||
HTML_SKIP_LINKS = (1 << 3),
|
||||
HTML_EXPAND_TABS = (1 << 4),
|
||||
HTML_SAFELINK = (1 << 5),
|
||||
HTML_TOC = (1 << 6),
|
||||
HTML_HARD_WRAP = (1 << 7),
|
||||
HTML_USE_XHTML = (1 << 8),
|
||||
HTML_ESCAPE = (1 << 9),
|
||||
HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10),
|
||||
} html_render_mode;
|
||||
|
||||
typedef enum {
|
||||
HTML_TAG_NONE = 0,
|
||||
HTML_TAG_OPEN,
|
||||
HTML_TAG_CLOSE,
|
||||
} html_tag;
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
|
||||
|
||||
extern void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
||||
|
||||
extern void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
|
||||
|
||||
extern void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,389 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
struct smartypants_data {
|
||||
int in_squote;
|
||||
int in_dquote;
|
||||
};
|
||||
|
||||
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
|
||||
static size_t (*smartypants_cb_ptrs[])
|
||||
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
||||
{
|
||||
NULL, /* 0 */
|
||||
smartypants_cb__dash, /* 1 */
|
||||
smartypants_cb__parens, /* 2 */
|
||||
smartypants_cb__squote, /* 3 */
|
||||
smartypants_cb__dquote, /* 4 */
|
||||
smartypants_cb__amp, /* 5 */
|
||||
smartypants_cb__period, /* 6 */
|
||||
smartypants_cb__number, /* 7 */
|
||||
smartypants_cb__ltag, /* 8 */
|
||||
smartypants_cb__backtick, /* 9 */
|
||||
smartypants_cb__escape, /* 10 */
|
||||
};
|
||||
|
||||
static const uint8_t smartypants_cb_chars[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
||||
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
||||
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static inline int
|
||||
word_boundary(uint8_t c)
|
||||
{
|
||||
return c == 0 || isspace(c) || ispunct(c);
|
||||
}
|
||||
|
||||
static int
|
||||
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
||||
{
|
||||
char ent[8];
|
||||
|
||||
if (*is_open && !word_boundary(next_char))
|
||||
return 0;
|
||||
|
||||
if (!(*is_open) && !word_boundary(previous_char))
|
||||
return 0;
|
||||
|
||||
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
||||
*is_open = !(*is_open);
|
||||
bufputs(ob, ent);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
|
||||
if (t1 == '\'') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
||||
(size == 3 || word_boundary(text[2]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size >= 3) {
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (((t1 == 'r' && t2 == 'e') ||
|
||||
(t1 == 'l' && t2 == 'l') ||
|
||||
(t1 == 'v' && t2 == 'e')) &&
|
||||
(size == 4 || word_boundary(text[3]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
||||
return 0;
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (t1 == 'c' && t2 == ')') {
|
||||
BUFPUTSL(ob, "©");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (t1 == 'r' && t2 == ')') {
|
||||
BUFPUTSL(ob, "®");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
||||
BUFPUTSL(ob, "™");
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
||||
BUFPUTSL(ob, "—");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 2 && text[1] == '-') {
|
||||
BUFPUTSL(ob, "–");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
||||
return 5;
|
||||
}
|
||||
|
||||
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
||||
return 3;
|
||||
|
||||
bufputc(ob, '&');
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 4;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2 && text[1] == '`') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (word_boundary(previous_char) && size >= 3) {
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
||||
if (size == 3 || word_boundary(text[3])) {
|
||||
BUFPUTSL(ob, "½");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
||||
BUFPUTSL(ob, "¼");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
||||
BUFPUTSL(ob, "¾");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
||||
BUFPUTSL(ob, """);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
static const char *skip_tags[] = {
|
||||
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
||||
};
|
||||
static const size_t skip_tags_count = 8;
|
||||
|
||||
size_t tag, i = 0;
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
|
||||
for (tag = 0; tag < skip_tags_count; ++tag) {
|
||||
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tag < skip_tags_count) {
|
||||
for (;;) {
|
||||
while (i < size && text[i] != '<')
|
||||
i++;
|
||||
|
||||
if (i == size)
|
||||
break;
|
||||
|
||||
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
|
||||
break;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
}
|
||||
|
||||
bufput(ob, text, i + 1);
|
||||
return i;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size < 2)
|
||||
return 0;
|
||||
|
||||
switch (text[1]) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '.':
|
||||
case '-':
|
||||
case '`':
|
||||
bufputc(ob, text[1]);
|
||||
return 1;
|
||||
|
||||
default:
|
||||
bufputc(ob, '\\');
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static struct {
|
||||
uint8_t c0;
|
||||
const uint8_t *pattern;
|
||||
const uint8_t *entity;
|
||||
int skip;
|
||||
} smartypants_subs[] = {
|
||||
{ '\'', "'s>", "’", 0 },
|
||||
{ '\'', "'t>", "’", 0 },
|
||||
{ '\'', "'re>", "’", 0 },
|
||||
{ '\'', "'ll>", "’", 0 },
|
||||
{ '\'', "'ve>", "’", 0 },
|
||||
{ '\'', "'m>", "’", 0 },
|
||||
{ '\'', "'d>", "’", 0 },
|
||||
{ '-', "--", "—", 1 },
|
||||
{ '-', "<->", "–", 0 },
|
||||
{ '.', "...", "…", 2 },
|
||||
{ '.', ". . .", "…", 4 },
|
||||
{ '(', "(c)", "©", 2 },
|
||||
{ '(', "(r)", "®", 2 },
|
||||
{ '(', "(tm)", "™", 3 },
|
||||
{ '3', "<3/4>", "¾", 2 },
|
||||
{ '3', "<3/4ths>", "¾", 2 },
|
||||
{ '1', "<1/2>", "½", 2 },
|
||||
{ '1', "<1/4>", "¼", 2 },
|
||||
{ '1', "<1/4th>", "¼", 2 },
|
||||
{ '&', "�", 0, 3 },
|
||||
};
|
||||
#endif
|
||||
|
||||
void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
struct smartypants_data smrt = {0, 0};
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
bufgrow(ob, size);
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
size_t org;
|
||||
uint8_t action = 0;
|
||||
|
||||
org = i;
|
||||
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text + org, i - org);
|
||||
|
||||
if (i < size) {
|
||||
i += smartypants_cb_ptrs[(int)action]
|
||||
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
##
|
||||
p
|
||||
dl
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
ol
|
||||
ul
|
||||
del
|
||||
div
|
||||
ins
|
||||
pre
|
||||
form
|
||||
math
|
||||
table
|
||||
figure
|
||||
iframe
|
||||
script
|
||||
style
|
||||
fieldset
|
||||
noscript
|
||||
blockquote
|
|
@ -1,206 +0,0 @@
|
|||
/* C code produced by gperf version 3.0.3 */
|
||||
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
|
||||
/* Computed positions: -k'1-2' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
||||
#endif
|
||||
|
||||
/* maximum key range = 37, duplicates = 0 */
|
||||
|
||||
#ifndef GPERF_DOWNCASE
|
||||
#define GPERF_DOWNCASE 1
|
||||
static unsigned char gperf_downcase[256] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
||||
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
||||
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
||||
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
||||
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
||||
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
||||
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
||||
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
||||
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
||||
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
||||
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
||||
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
||||
255
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef GPERF_CASE_STRNCMP
|
||||
#define GPERF_CASE_STRNCMP 1
|
||||
static int
|
||||
gperf_case_strncmp (s1, s2, n)
|
||||
register const char *s1;
|
||||
register const char *s2;
|
||||
register unsigned int n;
|
||||
{
|
||||
for (; n > 0;)
|
||||
{
|
||||
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
||||
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
||||
if (c1 != 0 && c1 == c2)
|
||||
{
|
||||
n--;
|
||||
continue;
|
||||
}
|
||||
return (int)c1 - (int)c2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
static unsigned int
|
||||
hash_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
|
||||
5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
|
||||
0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
|
||||
0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
|
||||
15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38
|
||||
};
|
||||
register int hval = len;
|
||||
|
||||
switch (hval)
|
||||
{
|
||||
default:
|
||||
hval += asso_values[(unsigned char)str[1]+1];
|
||||
/*FALLTHROUGH*/
|
||||
case 1:
|
||||
hval += asso_values[(unsigned char)str[0]];
|
||||
break;
|
||||
}
|
||||
return hval;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#ifdef __GNUC_STDC_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
const char *
|
||||
find_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
enum
|
||||
{
|
||||
TOTAL_KEYWORDS = 24,
|
||||
MIN_WORD_LENGTH = 1,
|
||||
MAX_WORD_LENGTH = 10,
|
||||
MIN_HASH_VALUE = 1,
|
||||
MAX_HASH_VALUE = 37
|
||||
};
|
||||
|
||||
static const char * const wordlist[] =
|
||||
{
|
||||
"",
|
||||
"p",
|
||||
"dl",
|
||||
"div",
|
||||
"math",
|
||||
"table",
|
||||
"",
|
||||
"ul",
|
||||
"del",
|
||||
"form",
|
||||
"blockquote",
|
||||
"figure",
|
||||
"ol",
|
||||
"fieldset",
|
||||
"",
|
||||
"h1",
|
||||
"",
|
||||
"h6",
|
||||
"pre",
|
||||
"", "",
|
||||
"script",
|
||||
"h5",
|
||||
"noscript",
|
||||
"",
|
||||
"style",
|
||||
"iframe",
|
||||
"h4",
|
||||
"ins",
|
||||
"", "", "",
|
||||
"h3",
|
||||
"", "", "", "",
|
||||
"h2"
|
||||
};
|
||||
|
||||
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
||||
{
|
||||
register int key = hash_block_tag (str, len);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
{
|
||||
register const char *s = wordlist[key];
|
||||
|
||||
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -1,292 +0,0 @@
|
|||
%language=ANSI-C
|
||||
%define lookup-function-name is_allowed_named_entity
|
||||
%compare-strncmp
|
||||
%readonly-tables
|
||||
%define hash-function-name hash_html_entity
|
||||
%enum
|
||||
%includes
|
||||
%{
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Parsers tend to choke on entities with values greater than this */
|
||||
const u_int32_t MAX_NUM_ENTITY_VAL = 0x10ffff;
|
||||
/* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL
|
||||
* used to avoid dealing with overflows. */
|
||||
const size_t MAX_NUM_ENTITY_LEN = 7;
|
||||
|
||||
inline int is_valid_numeric_entity(uint32_t entity_val)
|
||||
{
|
||||
/* Some XML parsers will choke on entities with certain
|
||||
* values (mostly control characters.)
|
||||
*
|
||||
* According to lxml these are all problematic:
|
||||
*
|
||||
* [xrange(0, 8),
|
||||
* xrange(11, 12),
|
||||
* xrange(14, 31),
|
||||
* xrange(55296, 57343),
|
||||
* xrange(65534, 65535)]
|
||||
*/
|
||||
return (entity_val > 8
|
||||
&& (entity_val != 11 && entity_val != 12)
|
||||
&& (entity_val < 14 || entity_val > 31)
|
||||
&& (entity_val < 55296 || entity_val > 57343)
|
||||
&& (entity_val != 65534 && entity_val != 65535)
|
||||
&& entity_val <= MAX_NUM_ENTITY_VAL);
|
||||
}
|
||||
|
||||
%}
|
||||
%%
|
||||
Æ
|
||||
Á
|
||||
Â
|
||||
À
|
||||
Α
|
||||
Å
|
||||
Ã
|
||||
Ä
|
||||
Β
|
||||
Ç
|
||||
Χ
|
||||
‡
|
||||
Δ
|
||||
Ð
|
||||
É
|
||||
Ê
|
||||
È
|
||||
Ε
|
||||
Η
|
||||
Ë
|
||||
Γ
|
||||
Í
|
||||
Î
|
||||
Ì
|
||||
Ι
|
||||
Ï
|
||||
Κ
|
||||
Λ
|
||||
Μ
|
||||
Ñ
|
||||
Ν
|
||||
Œ
|
||||
Ó
|
||||
Ô
|
||||
Ò
|
||||
Ω
|
||||
Ο
|
||||
Ø
|
||||
Õ
|
||||
Ö
|
||||
Φ
|
||||
Π
|
||||
″
|
||||
Ψ
|
||||
Ρ
|
||||
Š
|
||||
Σ
|
||||
Þ
|
||||
Τ
|
||||
Θ
|
||||
Ú
|
||||
Û
|
||||
Ù
|
||||
Υ
|
||||
Ü
|
||||
Ξ
|
||||
Ý
|
||||
Ÿ
|
||||
Ζ
|
||||
á
|
||||
â
|
||||
´
|
||||
æ
|
||||
à
|
||||
ℵ
|
||||
α
|
||||
&
|
||||
∧
|
||||
∠
|
||||
'
|
||||
å
|
||||
≈
|
||||
ã
|
||||
ä
|
||||
„
|
||||
β
|
||||
¦
|
||||
•
|
||||
∩
|
||||
ç
|
||||
¸
|
||||
¢
|
||||
χ
|
||||
ˆ
|
||||
♣
|
||||
≅
|
||||
©
|
||||
↵
|
||||
∪
|
||||
¤
|
||||
⇓
|
||||
†
|
||||
↓
|
||||
°
|
||||
δ
|
||||
♦
|
||||
÷
|
||||
é
|
||||
ê
|
||||
è
|
||||
∅
|
||||
 
|
||||
 
|
||||
ε
|
||||
≡
|
||||
η
|
||||
ð
|
||||
ë
|
||||
€
|
||||
∃
|
||||
ƒ
|
||||
∀
|
||||
½
|
||||
¼
|
||||
¾
|
||||
⁄
|
||||
γ
|
||||
≥
|
||||
>
|
||||
⇔
|
||||
↔
|
||||
♥
|
||||
…
|
||||
í
|
||||
î
|
||||
¡
|
||||
ì
|
||||
ℑ
|
||||
∞
|
||||
∫
|
||||
ι
|
||||
¿
|
||||
∈
|
||||
ï
|
||||
κ
|
||||
⇐
|
||||
λ
|
||||
⟨
|
||||
«
|
||||
←
|
||||
⌈
|
||||
“
|
||||
≤
|
||||
⌊
|
||||
∗
|
||||
◊
|
||||
‎
|
||||
‹
|
||||
‘
|
||||
<
|
||||
¯
|
||||
—
|
||||
µ
|
||||
·
|
||||
−
|
||||
μ
|
||||
∇
|
||||
|
||||
–
|
||||
≠
|
||||
∋
|
||||
¬
|
||||
∉
|
||||
⊄
|
||||
ñ
|
||||
ν
|
||||
ó
|
||||
ô
|
||||
œ
|
||||
ò
|
||||
‾
|
||||
ω
|
||||
ο
|
||||
⊕
|
||||
∨
|
||||
ª
|
||||
º
|
||||
ø
|
||||
õ
|
||||
⊗
|
||||
ö
|
||||
¶
|
||||
∂
|
||||
‰
|
||||
⊥
|
||||
φ
|
||||
π
|
||||
ϖ
|
||||
±
|
||||
£
|
||||
′
|
||||
∏
|
||||
∝
|
||||
ψ
|
||||
"
|
||||
⇒
|
||||
√
|
||||
⟩
|
||||
»
|
||||
→
|
||||
⌉
|
||||
”
|
||||
ℜ
|
||||
®
|
||||
⌋
|
||||
ρ
|
||||
‏
|
||||
›
|
||||
’
|
||||
‚
|
||||
š
|
||||
⋅
|
||||
§
|
||||
­
|
||||
σ
|
||||
ς
|
||||
∼
|
||||
♠
|
||||
⊂
|
||||
⊆
|
||||
∑
|
||||
¹
|
||||
²
|
||||
³
|
||||
⊃
|
||||
⊇
|
||||
ß
|
||||
τ
|
||||
∴
|
||||
θ
|
||||
ϑ
|
||||
 
|
||||
þ
|
||||
˜
|
||||
×
|
||||
™
|
||||
⇑
|
||||
ú
|
||||
↑
|
||||
û
|
||||
ù
|
||||
¨
|
||||
ϒ
|
||||
υ
|
||||
ü
|
||||
℘
|
||||
ξ
|
||||
ý
|
||||
¥
|
||||
ÿ
|
||||
ζ
|
||||
‍
|
||||
‌
|
|
@ -1,389 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
struct smartypants_data {
|
||||
int in_squote;
|
||||
int in_dquote;
|
||||
};
|
||||
|
||||
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
|
||||
static size_t (*smartypants_cb_ptrs[])
|
||||
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
||||
{
|
||||
NULL, /* 0 */
|
||||
smartypants_cb__dash, /* 1 */
|
||||
smartypants_cb__parens, /* 2 */
|
||||
smartypants_cb__squote, /* 3 */
|
||||
smartypants_cb__dquote, /* 4 */
|
||||
smartypants_cb__amp, /* 5 */
|
||||
smartypants_cb__period, /* 6 */
|
||||
smartypants_cb__number, /* 7 */
|
||||
smartypants_cb__ltag, /* 8 */
|
||||
smartypants_cb__backtick, /* 9 */
|
||||
smartypants_cb__escape, /* 10 */
|
||||
};
|
||||
|
||||
static const uint8_t smartypants_cb_chars[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
||||
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
||||
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static inline int
|
||||
word_boundary(uint8_t c)
|
||||
{
|
||||
return c == 0 || isspace(c) || ispunct(c);
|
||||
}
|
||||
|
||||
static int
|
||||
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
||||
{
|
||||
char ent[8];
|
||||
|
||||
if (*is_open && !word_boundary(next_char))
|
||||
return 0;
|
||||
|
||||
if (!(*is_open) && !word_boundary(previous_char))
|
||||
return 0;
|
||||
|
||||
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
||||
*is_open = !(*is_open);
|
||||
bufputs(ob, ent);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
|
||||
if (t1 == '\'') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
||||
(size == 3 || word_boundary(text[2]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size >= 3) {
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (((t1 == 'r' && t2 == 'e') ||
|
||||
(t1 == 'l' && t2 == 'l') ||
|
||||
(t1 == 'v' && t2 == 'e')) &&
|
||||
(size == 4 || word_boundary(text[3]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
||||
return 0;
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (t1 == 'c' && t2 == ')') {
|
||||
BUFPUTSL(ob, "©");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (t1 == 'r' && t2 == ')') {
|
||||
BUFPUTSL(ob, "®");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
||||
BUFPUTSL(ob, "™");
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
||||
BUFPUTSL(ob, "—");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 2 && text[1] == '-') {
|
||||
BUFPUTSL(ob, "–");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
||||
return 5;
|
||||
}
|
||||
|
||||
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
||||
return 3;
|
||||
|
||||
bufputc(ob, '&');
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 4;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2 && text[1] == '`') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (word_boundary(previous_char) && size >= 3) {
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
||||
if (size == 3 || word_boundary(text[3])) {
|
||||
BUFPUTSL(ob, "½");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
||||
BUFPUTSL(ob, "¼");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
||||
BUFPUTSL(ob, "¾");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
||||
BUFPUTSL(ob, """);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
static const char *skip_tags[] = {
|
||||
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
||||
};
|
||||
static const size_t skip_tags_count = 8;
|
||||
|
||||
size_t tag, i = 0;
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
|
||||
for (tag = 0; tag < skip_tags_count; ++tag) {
|
||||
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tag < skip_tags_count) {
|
||||
for (;;) {
|
||||
while (i < size && text[i] != '<')
|
||||
i++;
|
||||
|
||||
if (i == size)
|
||||
break;
|
||||
|
||||
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
|
||||
break;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
}
|
||||
|
||||
bufput(ob, text, i + 1);
|
||||
return i;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size < 2)
|
||||
return 0;
|
||||
|
||||
switch (text[1]) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '.':
|
||||
case '-':
|
||||
case '`':
|
||||
bufputc(ob, text[1]);
|
||||
return 1;
|
||||
|
||||
default:
|
||||
bufputc(ob, '\\');
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static struct {
|
||||
uint8_t c0;
|
||||
const uint8_t *pattern;
|
||||
const uint8_t *entity;
|
||||
int skip;
|
||||
} smartypants_subs[] = {
|
||||
{ '\'', "'s>", "’", 0 },
|
||||
{ '\'', "'t>", "’", 0 },
|
||||
{ '\'', "'re>", "’", 0 },
|
||||
{ '\'', "'ll>", "’", 0 },
|
||||
{ '\'', "'ve>", "’", 0 },
|
||||
{ '\'', "'m>", "’", 0 },
|
||||
{ '\'', "'d>", "’", 0 },
|
||||
{ '-', "--", "—", 1 },
|
||||
{ '-', "<->", "–", 0 },
|
||||
{ '.', "...", "…", 2 },
|
||||
{ '.', ". . .", "…", 4 },
|
||||
{ '(', "(c)", "©", 2 },
|
||||
{ '(', "(r)", "®", 2 },
|
||||
{ '(', "(tm)", "™", 3 },
|
||||
{ '3', "<3/4>", "¾", 2 },
|
||||
{ '3', "<3/4ths>", "¾", 2 },
|
||||
{ '1', "<1/2>", "½", 2 },
|
||||
{ '1', "<1/4>", "¼", 2 },
|
||||
{ '1', "<1/4th>", "¼", 2 },
|
||||
{ '&', "�", 0, 3 },
|
||||
};
|
||||
#endif
|
||||
|
||||
void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
struct smartypants_data smrt = {0, 0};
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
bufgrow(ob, size);
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
size_t org;
|
||||
uint8_t action = 0;
|
||||
|
||||
org = i;
|
||||
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text + org, i - org);
|
||||
|
||||
if (i < size) {
|
||||
i += smartypants_cb_ptrs[(int)action]
|
||||
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -1,140 +0,0 @@
|
|||
/* markdown.h - generic markdown parser */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_MARKDOWN_H
|
||||
#define UPSKIRT_MARKDOWN_H
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SUNDOWN_VERSION "1.16.0"
|
||||
#define SUNDOWN_VER_MAJOR 1
|
||||
#define SUNDOWN_VER_MINOR 16
|
||||
#define SUNDOWN_VER_REVISION 0
|
||||
|
||||
/********************
|
||||
* TYPE DEFINITIONS *
|
||||
********************/
|
||||
|
||||
/* mkd_autolink - type of autolink */
|
||||
enum mkd_autolink {
|
||||
MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/
|
||||
MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */
|
||||
MKDA_EMAIL, /* e-mail link without explit mailto: */
|
||||
};
|
||||
|
||||
enum mkd_tableflags {
|
||||
MKD_TABLE_ALIGN_L = 1,
|
||||
MKD_TABLE_ALIGN_R = 2,
|
||||
MKD_TABLE_ALIGN_CENTER = 3,
|
||||
MKD_TABLE_ALIGNMASK = 3,
|
||||
MKD_TABLE_HEADER = 4
|
||||
};
|
||||
|
||||
enum mkd_extensions {
|
||||
MKDEXT_NO_INTRA_EMPHASIS = (1 << 0),
|
||||
MKDEXT_TABLES = (1 << 1),
|
||||
MKDEXT_FENCED_CODE = (1 << 2),
|
||||
MKDEXT_AUTOLINK = (1 << 3),
|
||||
MKDEXT_STRIKETHROUGH = (1 << 4),
|
||||
MKDEXT_SPACE_HEADERS = (1 << 6),
|
||||
MKDEXT_SUPERSCRIPT = (1 << 7),
|
||||
MKDEXT_LAX_SPACING = (1 << 8),
|
||||
MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9),
|
||||
};
|
||||
|
||||
/* sd_callbacks - functions for rendering parsed data */
|
||||
struct sd_callbacks {
|
||||
/* block level callbacks - NULL skips the block */
|
||||
void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque);
|
||||
void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque);
|
||||
void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque);
|
||||
void (*hrule)(struct buf *ob, void *opaque);
|
||||
void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque);
|
||||
void (*table_row)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span);
|
||||
|
||||
|
||||
/* span level callbacks - NULL or return 0 prints the span verbatim */
|
||||
int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque);
|
||||
int (*codespan)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque);
|
||||
int (*linebreak)(struct buf *ob, void *opaque);
|
||||
int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque);
|
||||
int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque);
|
||||
int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*superscript)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* low level callbacks - NULL copies input directly into the output */
|
||||
void (*entity)(struct buf *ob, const struct buf *entity, void *opaque);
|
||||
void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* header and footer */
|
||||
void (*doc_header)(struct buf *ob, void *opaque);
|
||||
void (*doc_footer)(struct buf *ob, void *opaque);
|
||||
};
|
||||
|
||||
struct sd_markdown;
|
||||
|
||||
/*********
|
||||
* FLAGS *
|
||||
*********/
|
||||
|
||||
/* list/listitem flags */
|
||||
#define MKD_LIST_ORDERED 1
|
||||
#define MKD_LI_BLOCK 2 /* <li> containing block data */
|
||||
|
||||
/**********************
|
||||
* EXPORTED FUNCTIONS *
|
||||
**********************/
|
||||
|
||||
extern struct sd_markdown *
|
||||
sd_markdown_new(
|
||||
unsigned int extensions,
|
||||
size_t max_nesting,
|
||||
size_t max_table_cols,
|
||||
const struct sd_callbacks *callbacks,
|
||||
void *opaque);
|
||||
|
||||
extern void
|
||||
sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_markdown_free(struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_version(int *major, int *minor, int *revision);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
|
@ -1,56 +0,0 @@
|
|||
from distutils.spawn import find_executable
|
||||
from setuptools import setup, Extension
|
||||
from setuptools.command.build_ext import build_ext
|
||||
|
||||
import re
|
||||
import os
|
||||
import subprocess
|
||||
import fnmatch
|
||||
|
||||
def c_files_in(directory):
|
||||
paths = []
|
||||
names = os.listdir(directory)
|
||||
for f in fnmatch.filter(names, '*.c'):
|
||||
paths.append(os.path.join(directory, f))
|
||||
return paths
|
||||
|
||||
|
||||
def process_gperf_file(gperf_file, output_file):
|
||||
if not find_executable("gperf"):
|
||||
raise Exception("Couldn't find `gperf`, is it installed?")
|
||||
assert os.path.exists(gperf_file)
|
||||
command = "gperf.exe "+ gperf_file+ " --output-file=" + output_file
|
||||
print(command)
|
||||
#subprocess.check_call(command)
|
||||
|
||||
version = None
|
||||
version_re = re.compile(r'^#define\s+SNUDOWN_VERSION\s+"([^"]+)"$')
|
||||
with open('snudown.c', 'r') as f:
|
||||
for line in f:
|
||||
m = version_re.match(line)
|
||||
if m:
|
||||
version = m.group(1)
|
||||
assert version
|
||||
|
||||
|
||||
class GPerfingBuildExt(build_ext):
|
||||
def run(self):
|
||||
process_gperf_file("src\\html_entities.gperf", "src\\html_entities.h")
|
||||
build_ext.run(self)
|
||||
|
||||
setup(
|
||||
name='snudown',
|
||||
version=version,
|
||||
author='Vicent Marti',
|
||||
author_email='vicent@github.com',
|
||||
license='MIT',
|
||||
test_suite="test_snudown.test_snudown",
|
||||
cmdclass={'build_ext': GPerfingBuildExt,},
|
||||
ext_modules=[
|
||||
Extension(
|
||||
name='snudown',
|
||||
sources=['snudown.c'] + c_files_in('src/') + c_files_in('html/'),
|
||||
include_dirs=['src', 'html']
|
||||
)
|
||||
],
|
||||
)
|
|
@ -1,212 +0,0 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.4.0"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
/* The module doc strings */
|
||||
PyDoc_STRVAR(snudown_module__doc__, "When does the narwhal bacon? At Sundown.");
|
||||
PyDoc_STRVAR(snudown_md__doc__, "Render a Markdown document");
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_USERTEXT", RENDERER_USERTEXT);
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_WIKI", RENDERER_WIKI);
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
snudown_md(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
static char *kwlist[] = {"text", "nofollow", "target", "toc_id_prefix", "renderer", "enable_toc", NULL};
|
||||
|
||||
struct buf ib, *ob;
|
||||
PyObject *py_result;
|
||||
const char* result_text;
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
memset(&ib, 0x0, sizeof(struct buf));
|
||||
|
||||
/* Parse arguments */
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|izzii", kwlist,
|
||||
&ib.data, &ib.size, &nofollow,
|
||||
&target, &toc_id_prefix, &renderer, &enable_toc)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (renderer < 0 || renderer >= RENDERER_COUNT) {
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid renderer");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
struct snudown_renderopt *options = &(_snudown.state->options);
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
/* Output buffer */
|
||||
ob = bufnew(128);
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
|
||||
/* make a Python string */
|
||||
result_text = "";
|
||||
if (ob->data)
|
||||
result_text = (const char*)ob->data;
|
||||
py_result = Py_BuildValue("s#", result_text, (int)ob->size);
|
||||
|
||||
/* Cleanup */
|
||||
bufrelease(ob);
|
||||
return py_result;
|
||||
}
|
||||
|
||||
static PyMethodDef snudown_methods[] = {
|
||||
{"markdown", (PyCFunction) snudown_md, METH_VARARGS | METH_KEYWORDS, snudown_md__doc__},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC initsnudown(void)
|
||||
{
|
||||
PyObject *module;
|
||||
|
||||
module = Py_InitModule3("snudown", snudown_methods, snudown_module__doc__);
|
||||
if (module == NULL)
|
||||
return;
|
||||
|
||||
init_default_renderer(module);
|
||||
init_wiki_renderer(module);
|
||||
|
||||
/* Version */
|
||||
PyModule_AddStringConstant(module, "__version__", SNUDOWN_VERSION);
|
||||
}
|
|
@ -1,226 +0,0 @@
|
|||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <gumbo.h>
|
||||
|
||||
#define READ_UNIT 1024
|
||||
#define OUTPUT_UNIT 64
|
||||
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.3.2"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer() {
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer() {
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
void
|
||||
snudown_md(struct buf *ob, const uint8_t *document, size_t doc_size, int wiki_mode)
|
||||
{
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
if (wiki_mode)
|
||||
renderer = RENDERER_WIKI;
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
struct snudown_renderopt *options = &(_snudown.state->options);
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
}
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
init_default_renderer();
|
||||
init_wiki_renderer();
|
||||
|
||||
struct buf *ib, *ob;
|
||||
int size_read = 0, wiki_mode = 0, i = 0, have_errors = 0;
|
||||
|
||||
/* reading everything */
|
||||
ib = bufnew(READ_UNIT);
|
||||
bufgrow(ib, READ_UNIT);
|
||||
while ((size_read = fread(ib->data + ib->size, 1, ib->asize - ib->size, stdin)) > 0) {
|
||||
ib->size += size_read;
|
||||
bufgrow(ib, ib->size + READ_UNIT);
|
||||
}
|
||||
/* Render to a buffer, then print that out */
|
||||
ob = bufnew(OUTPUT_UNIT);
|
||||
bufputs(ob, "<!DOCTYPE html><html><body>\n");
|
||||
snudown_md(ob, ib->data, ib->size, wiki_mode);
|
||||
bufputs(ob, "</body></html>\n");
|
||||
|
||||
// Wiki mode explicitly allows unbalanced tags, need some way to exclude those
|
||||
if (!wiki_mode) {
|
||||
GumboOutput* output = gumbo_parse_with_options(&kGumboDefaultOptions, bufcstr(ob), ob->size);
|
||||
|
||||
for (i=0; i < output->errors.length; ++i) {
|
||||
// stupid "public" API I hacked in.
|
||||
void* thing = output->errors.data[i];
|
||||
GumboErrorType type = gumbo_get_error_type(thing);
|
||||
switch(type) {
|
||||
case GUMBO_ERR_UTF8_INVALID:
|
||||
case GUMBO_ERR_UTF8_NULL:
|
||||
// Making sure the user gave us valid
|
||||
// utf-8 or transforming it to valid
|
||||
// utf-8 is outside the scope of snudown
|
||||
continue;
|
||||
default:
|
||||
have_errors = 1;
|
||||
printf("%s\n", GUMBO_ERROR_NAMES[type]);
|
||||
printf("%s\n",gumbo_get_error_text(thing));
|
||||
printf("===============\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (have_errors) {
|
||||
// gotta trigger a crash for AFL to catch it
|
||||
assert(0);
|
||||
}
|
||||
|
||||
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
||||
}
|
||||
bufrelease(ob);
|
||||
bufrelease(ib);
|
||||
return 0;
|
||||
}
|
|
@ -1,232 +0,0 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.4.0"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
/* The module doc strings */
|
||||
PyDoc_STRVAR(snudown_module__doc__, "When does the narwhal bacon? At Sundown.");
|
||||
PyDoc_STRVAR(snudown_md__doc__, "Render a Markdown document");
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_USERTEXT", RENDERER_USERTEXT);
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_WIKI", RENDERER_WIKI);
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
snudown_md(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
struct snudown_renderopt *options;
|
||||
static char *kwlist[] = {"text", "nofollow", "target", "toc_id_prefix", "renderer", "enable_toc", NULL};
|
||||
|
||||
struct buf ib, *ob;
|
||||
PyObject *py_result;
|
||||
const char* result_text;
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
memset(&ib, 0x0, sizeof(struct buf));
|
||||
|
||||
/* Parse arguments */
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|izzii", kwlist,
|
||||
&ib.data, &ib.size, &nofollow,
|
||||
&target, &toc_id_prefix, &renderer, &enable_toc)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (renderer < 0 || renderer >= RENDERER_COUNT) {
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid renderer");
|
||||
return NULL;
|
||||
};
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
|
||||
|
||||
|
||||
_snudown.state->options;
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
/* Output buffer */
|
||||
ob = bufnew(128);
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
|
||||
/* make a Python string */
|
||||
result_text = "";
|
||||
if (ob->data)
|
||||
result_text = (const char*)ob->data;
|
||||
py_result = Py_BuildValue("s#", result_text, (int)ob->size);
|
||||
|
||||
/* Cleanup */
|
||||
bufrelease(ob);
|
||||
return py_result;
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef snudown_methods[] = {
|
||||
{"markdown", (PyCFunction) snudown_md, METH_VARARGS | METH_KEYWORDS, snudown_md__doc__},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
PyMODINIT_FUNC PyInit_snudown(void)
|
||||
{
|
||||
PyObject *module;
|
||||
|
||||
struct PyModuleDef wtf = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"snudown", /* m_name */
|
||||
"This is snudown", /* m_doc */
|
||||
-1, /* m_size */
|
||||
snudown_methods, /* m_methods */
|
||||
NULL, /* m_reload */
|
||||
NULL, /* m_traverse */
|
||||
NULL, /* m_clear */
|
||||
NULL, /* m_free */
|
||||
};
|
||||
//module = Py_InitModule3("snudown", snudown_methods, snudown_module__doc__);
|
||||
module = PyModule_Create(&wtf);
|
||||
if (module == NULL)
|
||||
return Py_BuildValue("");
|
||||
|
||||
init_default_renderer(module);
|
||||
init_wiki_renderer(module);
|
||||
|
||||
/* Version */
|
||||
PyModule_AddStringConstant(module, "__version__", SNUDOWN_VERSION);
|
||||
};
|
||||
void initsnudown(void)
|
||||
{
|
||||
(void) PyInit_snudown("snudown", snudown_methods);
|
||||
};
|
|
@ -1,487 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define strncasecmp _strnicmp
|
||||
#endif
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len)
|
||||
{
|
||||
static const size_t valid_uris_count = 14;
|
||||
static const char *valid_uris[] = {
|
||||
"http://", "https://", "ftp://", "mailto://",
|
||||
"/", "git://", "steam://", "irc://", "news://", "mumble://",
|
||||
"ssh://", "ircs://", "ts3server://", "#"
|
||||
};
|
||||
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < valid_uris_count; ++i) {
|
||||
size_t len = strlen(valid_uris[i]);
|
||||
|
||||
if (link_len > len &&
|
||||
strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
||||
(isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?'))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
||||
{
|
||||
uint8_t cclose, copen = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < link_end; ++i)
|
||||
if (data[i] == '<') {
|
||||
link_end = i;
|
||||
break;
|
||||
}
|
||||
|
||||
while (link_end > 0) {
|
||||
uint8_t c = data[link_end - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (strchr("?!.,", c) != NULL)
|
||||
link_end--;
|
||||
|
||||
else if (c == ';') {
|
||||
size_t new_end = link_end - 2;
|
||||
|
||||
while (new_end > 0 && isalpha(data[new_end]))
|
||||
new_end--;
|
||||
|
||||
if (new_end < link_end - 2 && data[new_end] == '&')
|
||||
link_end = new_end;
|
||||
else
|
||||
link_end--;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
cclose = data[link_end - 1];
|
||||
|
||||
switch (cclose) {
|
||||
case '"': copen = '"'; break;
|
||||
case '\'': copen = '\''; break;
|
||||
case ')': copen = '('; break;
|
||||
case ']': copen = '['; break;
|
||||
case '}': copen = '{'; break;
|
||||
}
|
||||
|
||||
if (copen != 0) {
|
||||
size_t closing = 0;
|
||||
size_t opening = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Try to close the final punctuation sign in this same line;
|
||||
* if we managed to close it outside of the URL, that means that it's
|
||||
* not part of the URL. If it closes inside the URL, that means it
|
||||
* is part of the URL.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric))
|
||||
*
|
||||
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
||||
*/
|
||||
|
||||
while (i < link_end) {
|
||||
if (data[i] == copen)
|
||||
opening++;
|
||||
else if (data[i] == cclose)
|
||||
closing++;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (closing != opening)
|
||||
link_end--;
|
||||
}
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks that `prefix_char` occurs on a word boundary just before `data`,
|
||||
* where `data` points to the character to search to the left of, and a word boundary
|
||||
* is (currently) a whitespace character, punctuation, or the start of the string.
|
||||
* Returns the length of the prefix.
|
||||
*/
|
||||
static int
|
||||
check_reddit_autolink_prefix(
|
||||
const uint8_t* data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
char prefix_char
|
||||
)
|
||||
{
|
||||
/* Make sure this `/` is part of `/?r/` */
|
||||
if (size < 2 || max_rewind < 1 || data[-1] != prefix_char)
|
||||
return 0;
|
||||
|
||||
/* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */
|
||||
if (max_rewind > 1) {
|
||||
const char boundary = data[-2];
|
||||
if (boundary == '/')
|
||||
return 2;
|
||||
/**
|
||||
* Here's where our lack of unicode-awareness bites us. We don't correctly
|
||||
* match punctuation / whitespace characters for the boundary, because we
|
||||
* reject valid cases like "。r/example" (note the fullwidth period.)
|
||||
*
|
||||
* A better implementation might try to rewind over bytes with the 8th bit set, try
|
||||
* to decode them to a valid codepoint, then do a unicode-aware check on the codepoint.
|
||||
*/
|
||||
else if (ispunct(boundary) || isspace(boundary))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
} else if (max_lookbehind > 2) {
|
||||
/* There's an inline element just left of the `prefix_char`, is it an escaped forward
|
||||
* slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly
|
||||
* allow "\\/r/foo".
|
||||
*/
|
||||
if (data[-2] == '/' && data[-3] == '\\')
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Must be a new-style shortlink with nothing relevant to the left of it. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
check_domain(uint8_t *data, size_t size, int allow_short)
|
||||
{
|
||||
size_t i, np = 0;
|
||||
|
||||
if (!isalnum(data[0]))
|
||||
return 0;
|
||||
|
||||
for (i = 1; i < size - 1; ++i) {
|
||||
if (data[i] == '.') np++;
|
||||
else if (!isalnum(data[i]) && data[i] != '-') break;
|
||||
}
|
||||
|
||||
if (allow_short) {
|
||||
/* We don't need a valid domain in the strict sense (with
|
||||
* least one dot; so just make sure it's composed of valid
|
||||
* domain characters and return the length of the the valid
|
||||
* sequence. */
|
||||
return i;
|
||||
} else {
|
||||
/* a valid domain needs to have at least a dot.
|
||||
* that's as far as we get */
|
||||
return np ? i : 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__www(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end;
|
||||
|
||||
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
||||
return 0;
|
||||
|
||||
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
||||
return 0;
|
||||
|
||||
link_end = check_domain(data, size, 0);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data, link_end);
|
||||
*rewind_p = 0;
|
||||
|
||||
return (int)link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__email(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind;
|
||||
int nb = 0, np = 0;
|
||||
|
||||
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
||||
uint8_t c = data[-rewind - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (strchr(".+-_", c) != NULL)
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (rewind == 0)
|
||||
return 0;
|
||||
|
||||
for (link_end = 0; link_end < size; ++link_end) {
|
||||
uint8_t c = data[link_end];
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (c == '@')
|
||||
nb++;
|
||||
else if (c == '.' && link_end < size - 1)
|
||||
np++;
|
||||
else if (c != '-' && c != '_')
|
||||
break;
|
||||
}
|
||||
|
||||
if (link_end < 2 || nb != 1 || np == 0)
|
||||
return 0;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__url(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind = 0, domain_len;
|
||||
|
||||
if (size < 4 || data[1] != '/' || data[2] != '/')
|
||||
return 0;
|
||||
|
||||
while (rewind < max_rewind && isalpha(data[-rewind - 1]))
|
||||
rewind++;
|
||||
|
||||
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
||||
return 0;
|
||||
|
||||
link_end = strlen("://");
|
||||
|
||||
domain_len = check_domain(
|
||||
data + link_end,
|
||||
size - link_end,
|
||||
flags & SD_AUTOLINK_SHORT_DOMAINS);
|
||||
|
||||
if (domain_len == 0)
|
||||
return 0;
|
||||
|
||||
link_end += domain_len;
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__subreddit(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
/**
|
||||
* This is meant to handle both r/foo and /r/foo style subreddit references.
|
||||
* In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'.
|
||||
* In pseudo-regex, this matches something like:
|
||||
*
|
||||
* `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?`
|
||||
* where %subreddit% == `((t:)?\w{2,24}|reddit\.com)`
|
||||
*/
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
int is_allminus = 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
/* offset to the "meat" of the link */
|
||||
link_end = strlen("/");
|
||||
|
||||
if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0)
|
||||
is_allminus = 1;
|
||||
|
||||
do {
|
||||
size_t start = link_end;
|
||||
int max_length = 24;
|
||||
|
||||
/* special case: /r/reddit.com (only subreddit containing '.'). */
|
||||
if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) {
|
||||
link_end += 10;
|
||||
/* Make sure there are no trailing characters (don't do
|
||||
* any autolinking for /r/reddit.commission) */
|
||||
max_length = 10;
|
||||
}
|
||||
|
||||
/* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */
|
||||
else {
|
||||
/* support autolinking to timereddits, /r/t:when (1 April 2012) */
|
||||
if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 )
|
||||
link_end += 2; /* Jump over the 't:' */
|
||||
|
||||
/* the first character of a subreddit name must be a letter or digit */
|
||||
if (!isalnum(data[link_end]))
|
||||
return 0;
|
||||
link_end += 1;
|
||||
}
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_'))
|
||||
link_end++;
|
||||
|
||||
/* valid subreddit names are between 3 and 21 characters, with
|
||||
* some subreddits having 2-character names. Don't bother with
|
||||
* autolinking for anything outside this length range.
|
||||
* (chksrname function in reddit/.../validator.py) */
|
||||
if ( link_end-start < 2 || link_end-start > max_length )
|
||||
return 0;
|
||||
|
||||
/* If we are linking to a multireddit, continue */
|
||||
} while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ );
|
||||
|
||||
if (link_end < size && data[link_end] == '/') {
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
}
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__username(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
|
||||
if (size < 3)
|
||||
return 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
link_end = strlen("/");
|
||||
|
||||
/* the first letter of a username must... well, be valid, we don't care otherwise */
|
||||
if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-')
|
||||
return 0;
|
||||
link_end += 1;
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_-/]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_AUTOLINK_H
|
||||
#define UPSKIRT_AUTOLINK_H
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
||||
};
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len);
|
||||
|
||||
size_t
|
||||
sd_autolink__www(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__email(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__url(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
|
@ -1,236 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* MSVC compat */
|
||||
#if defined(_MSC_VER)
|
||||
# define _buf_vsnprintf _vsnprintf
|
||||
#else
|
||||
# define _buf_vsnprintf vsnprintf
|
||||
#endif
|
||||
|
||||
int
|
||||
bufprefix(const struct buf *buf, const char *prefix)
|
||||
{
|
||||
size_t i;
|
||||
assert(buf && buf->unit);
|
||||
|
||||
for (i = 0; i < buf->size; ++i) {
|
||||
if (prefix[i] == 0)
|
||||
return 0;
|
||||
|
||||
if (buf->data[i] != prefix[i])
|
||||
return buf->data[i] - prefix[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int
|
||||
bufgrow(struct buf *buf, size_t neosz)
|
||||
{
|
||||
size_t neoasz;
|
||||
void *neodata;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (neosz > BUFFER_MAX_ALLOC_SIZE)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
if (buf->asize >= neosz)
|
||||
return BUF_OK;
|
||||
|
||||
neoasz = buf->asize + buf->unit;
|
||||
while (neoasz < neosz)
|
||||
neoasz += buf->unit;
|
||||
|
||||
neodata = realloc(buf->data, neoasz);
|
||||
if (!neodata)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
buf->data = neodata;
|
||||
buf->asize = neoasz;
|
||||
return BUF_OK;
|
||||
}
|
||||
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *
|
||||
bufnew(size_t unit)
|
||||
{
|
||||
struct buf *ret;
|
||||
ret = malloc(sizeof (struct buf));
|
||||
|
||||
if (ret) {
|
||||
ret->data = 0;
|
||||
ret->size = ret->asize = 0;
|
||||
ret->unit = unit;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* bufnullterm: NULL-termination of the string array */
|
||||
const char *
|
||||
bufcstr(struct buf *buf)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size < buf->asize && buf->data[buf->size] == 0)
|
||||
return (char *)buf->data;
|
||||
|
||||
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
|
||||
buf->data[buf->size] = 0;
|
||||
return (char *)buf->data;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void
|
||||
bufprintf(struct buf *buf, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int n;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (n < 0) {
|
||||
#ifdef _MSC_VER
|
||||
va_start(ap, fmt);
|
||||
n = _vscprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
if ((size_t)n >= buf->asize - buf->size) {
|
||||
if (bufgrow(buf, buf->size + n + 1) < 0)
|
||||
return;
|
||||
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
if (n < 0)
|
||||
return;
|
||||
|
||||
buf->size += n;
|
||||
}
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void
|
||||
bufput(struct buf *buf, const void *data, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
|
||||
return;
|
||||
|
||||
memcpy(buf->data + buf->size, data, len);
|
||||
buf->size += len;
|
||||
}
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void
|
||||
bufputs(struct buf *buf, const char *str)
|
||||
{
|
||||
bufput(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
|
||||
/* bufputc: appends a single uint8_t to a buffer */
|
||||
void
|
||||
bufputc(struct buf *buf, int c)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
|
||||
buf->data[buf->size] = c;
|
||||
buf->size += 1;
|
||||
}
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void
|
||||
bufrelease(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void
|
||||
bufreset(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
buf->data = NULL;
|
||||
buf->size = buf->asize = 0;
|
||||
}
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void
|
||||
bufslurp(struct buf *buf, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (len >= buf->size) {
|
||||
buf->size = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
buf->size -= len;
|
||||
memmove(buf->data, buf->data + len, buf->size);
|
||||
}
|
||||
|
||||
/* buftrucate: truncates the buffer at `size` */
|
||||
int
|
||||
buftruncate(struct buf *buf, size_t size)
|
||||
{
|
||||
if (buf->size < size || size < 0) {
|
||||
/* bail out in debug mode so we can figure out why this happened */
|
||||
assert(0);
|
||||
return BUF_EINVALIDIDX;
|
||||
}
|
||||
|
||||
buf->size = size;
|
||||
return BUF_OK;
|
||||
}
|
|
@ -1,100 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BUFFER_H__
|
||||
#define BUFFER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define __attribute__(x)
|
||||
#define inline
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
BUF_OK = 0,
|
||||
BUF_ENOMEM = -1,
|
||||
BUF_EINVALIDIDX = -2,
|
||||
} buferror_t;
|
||||
|
||||
/* struct buf: character array buffer */
|
||||
struct buf {
|
||||
uint8_t *data; /* actual character data */
|
||||
size_t size; /* size of the string */
|
||||
size_t asize; /* allocated size (0 = volatile buffer) */
|
||||
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
||||
};
|
||||
|
||||
/* CONST_BUF: global buffer from a string litteral */
|
||||
#define BUF_STATIC(string) \
|
||||
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
||||
|
||||
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
||||
#define BUF_VOLATILE(strname) \
|
||||
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
||||
|
||||
/* BUFPUTSL: optimized bufputs of a string litteral */
|
||||
#define BUFPUTSL(output, literal) \
|
||||
bufput(output, literal, sizeof literal - 1)
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int bufgrow(struct buf *, size_t);
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
||||
|
||||
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
||||
const char *bufcstr(struct buf *);
|
||||
|
||||
/* bufprefix: compare the beginning of a buffer with a string */
|
||||
int bufprefix(const struct buf *buf, const char *prefix);
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void bufput(struct buf *, const void *, size_t);
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void bufputs(struct buf *, const char *);
|
||||
|
||||
/* bufputc: appends a single char to a buffer */
|
||||
void bufputc(struct buf *, int);
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void bufrelease(struct buf *);
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void bufreset(struct buf *);
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void bufslurp(struct buf *, size_t);
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
||||
|
||||
/* buftruncate: truncates the buffer at `size` */
|
||||
int buftruncate(struct buf *buf, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,206 +0,0 @@
|
|||
/* C code produced by gperf version 3.0.3 */
|
||||
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
|
||||
/* Computed positions: -k'1-2' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
||||
#endif
|
||||
|
||||
/* maximum key range = 37, duplicates = 0 */
|
||||
|
||||
#ifndef GPERF_DOWNCASE
|
||||
#define GPERF_DOWNCASE 1
|
||||
static unsigned char gperf_downcase[256] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
||||
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
||||
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
||||
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
||||
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
||||
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
||||
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
||||
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
||||
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
||||
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
||||
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
||||
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
||||
255
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef GPERF_CASE_STRNCMP
|
||||
#define GPERF_CASE_STRNCMP 1
|
||||
static int
|
||||
gperf_case_strncmp (s1, s2, n)
|
||||
register const char *s1;
|
||||
register const char *s2;
|
||||
register unsigned int n;
|
||||
{
|
||||
for (; n > 0;)
|
||||
{
|
||||
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
||||
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
||||
if (c1 != 0 && c1 == c2)
|
||||
{
|
||||
n--;
|
||||
continue;
|
||||
}
|
||||
return (int)c1 - (int)c2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
static unsigned int
|
||||
hash_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
|
||||
5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
|
||||
0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
|
||||
0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
|
||||
15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38
|
||||
};
|
||||
register int hval = len;
|
||||
|
||||
switch (hval)
|
||||
{
|
||||
default:
|
||||
hval += asso_values[(unsigned char)str[1]+1];
|
||||
/*FALLTHROUGH*/
|
||||
case 1:
|
||||
hval += asso_values[(unsigned char)str[0]];
|
||||
break;
|
||||
}
|
||||
return hval;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#ifdef __GNUC_STDC_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
const char *
|
||||
find_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
enum
|
||||
{
|
||||
TOTAL_KEYWORDS = 24,
|
||||
MIN_WORD_LENGTH = 1,
|
||||
MAX_WORD_LENGTH = 10,
|
||||
MIN_HASH_VALUE = 1,
|
||||
MAX_HASH_VALUE = 37
|
||||
};
|
||||
|
||||
static const char * const wordlist[] =
|
||||
{
|
||||
"",
|
||||
"p",
|
||||
"dl",
|
||||
"div",
|
||||
"math",
|
||||
"table",
|
||||
"",
|
||||
"ul",
|
||||
"del",
|
||||
"form",
|
||||
"blockquote",
|
||||
"figure",
|
||||
"ol",
|
||||
"fieldset",
|
||||
"",
|
||||
"h1",
|
||||
"",
|
||||
"h6",
|
||||
"pre",
|
||||
"", "",
|
||||
"script",
|
||||
"h5",
|
||||
"noscript",
|
||||
"",
|
||||
"style",
|
||||
"iframe",
|
||||
"h4",
|
||||
"ins",
|
||||
"", "", "",
|
||||
"h3",
|
||||
"", "", "", "",
|
||||
"h2"
|
||||
};
|
||||
|
||||
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
||||
{
|
||||
register int key = hash_block_tag (str, len);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
{
|
||||
register const char *s = wordlist[key];
|
||||
|
||||
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -1,292 +0,0 @@
|
|||
%language=ANSI-C
|
||||
%define lookup-function-name is_allowed_named_entity
|
||||
%compare-strncmp
|
||||
%readonly-tables
|
||||
%define hash-function-name hash_html_entity
|
||||
%enum
|
||||
%includes
|
||||
%{
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Parsers tend to choke on entities with values greater than this */
|
||||
const u_int32_t max_num_entity_val = 0x10ffff;
|
||||
/* Any numeric entity longer than this is obviously above max_num_entity_val
|
||||
* used to avoid dealing with overflows. */
|
||||
const size_t MAX_NUM_ENTITY_LEN = 7;
|
||||
|
||||
inline int is_valid_numeric_entity(uint32_t entity_val)
|
||||
{
|
||||
/* Some XML parsers will choke on entities with certain
|
||||
* values (mostly control characters.)
|
||||
*
|
||||
* According to lxml these are all problematic:
|
||||
*
|
||||
* [xrange(0, 8),
|
||||
* xrange(11, 12),
|
||||
* xrange(14, 31),
|
||||
* xrange(55296, 57343),
|
||||
* xrange(65534, 65535)]
|
||||
*/
|
||||
return (entity_val > 8
|
||||
&& (entity_val != 11 && entity_val != 12)
|
||||
&& (entity_val < 14 || entity_val > 31)
|
||||
&& (entity_val < 55296 || entity_val > 57343)
|
||||
&& (entity_val != 65534 && entity_val != 65535)
|
||||
&& entity_val <= max_num_entity_val);
|
||||
}
|
||||
|
||||
%}
|
||||
%%
|
||||
Æ
|
||||
Á
|
||||
Â
|
||||
À
|
||||
Α
|
||||
Å
|
||||
Ã
|
||||
Ä
|
||||
Β
|
||||
Ç
|
||||
Χ
|
||||
‡
|
||||
Δ
|
||||
Ð
|
||||
É
|
||||
Ê
|
||||
È
|
||||
Ε
|
||||
Η
|
||||
Ë
|
||||
Γ
|
||||
Í
|
||||
Î
|
||||
Ì
|
||||
Ι
|
||||
Ï
|
||||
Κ
|
||||
Λ
|
||||
Μ
|
||||
Ñ
|
||||
Ν
|
||||
Œ
|
||||
Ó
|
||||
Ô
|
||||
Ò
|
||||
Ω
|
||||
Ο
|
||||
Ø
|
||||
Õ
|
||||
Ö
|
||||
Φ
|
||||
Π
|
||||
″
|
||||
Ψ
|
||||
Ρ
|
||||
Š
|
||||
Σ
|
||||
Þ
|
||||
Τ
|
||||
Θ
|
||||
Ú
|
||||
Û
|
||||
Ù
|
||||
Υ
|
||||
Ü
|
||||
Ξ
|
||||
Ý
|
||||
Ÿ
|
||||
Ζ
|
||||
á
|
||||
â
|
||||
´
|
||||
æ
|
||||
à
|
||||
ℵ
|
||||
α
|
||||
&
|
||||
∧
|
||||
∠
|
||||
'
|
||||
å
|
||||
≈
|
||||
ã
|
||||
ä
|
||||
„
|
||||
β
|
||||
¦
|
||||
•
|
||||
∩
|
||||
ç
|
||||
¸
|
||||
¢
|
||||
χ
|
||||
ˆ
|
||||
♣
|
||||
≅
|
||||
©
|
||||
↵
|
||||
∪
|
||||
¤
|
||||
⇓
|
||||
†
|
||||
↓
|
||||
°
|
||||
δ
|
||||
♦
|
||||
÷
|
||||
é
|
||||
ê
|
||||
è
|
||||
∅
|
||||
 
|
||||
 
|
||||
ε
|
||||
≡
|
||||
η
|
||||
ð
|
||||
ë
|
||||
€
|
||||
∃
|
||||
ƒ
|
||||
∀
|
||||
½
|
||||
¼
|
||||
¾
|
||||
⁄
|
||||
γ
|
||||
≥
|
||||
>
|
||||
⇔
|
||||
↔
|
||||
♥
|
||||
…
|
||||
í
|
||||
î
|
||||
¡
|
||||
ì
|
||||
ℑ
|
||||
∞
|
||||
∫
|
||||
ι
|
||||
¿
|
||||
∈
|
||||
ï
|
||||
κ
|
||||
⇐
|
||||
λ
|
||||
⟨
|
||||
«
|
||||
←
|
||||
⌈
|
||||
“
|
||||
≤
|
||||
⌊
|
||||
∗
|
||||
◊
|
||||
‎
|
||||
‹
|
||||
‘
|
||||
<
|
||||
¯
|
||||
—
|
||||
µ
|
||||
·
|
||||
−
|
||||
μ
|
||||
∇
|
||||
|
||||
–
|
||||
≠
|
||||
∋
|
||||
¬
|
||||
∉
|
||||
⊄
|
||||
ñ
|
||||
ν
|
||||
ó
|
||||
ô
|
||||
œ
|
||||
ò
|
||||
‾
|
||||
ω
|
||||
ο
|
||||
⊕
|
||||
∨
|
||||
ª
|
||||
º
|
||||
ø
|
||||
õ
|
||||
⊗
|
||||
ö
|
||||
¶
|
||||
∂
|
||||
‰
|
||||
⊥
|
||||
φ
|
||||
π
|
||||
ϖ
|
||||
±
|
||||
£
|
||||
′
|
||||
∏
|
||||
∝
|
||||
ψ
|
||||
"
|
||||
⇒
|
||||
√
|
||||
⟩
|
||||
»
|
||||
→
|
||||
⌉
|
||||
”
|
||||
ℜ
|
||||
®
|
||||
⌋
|
||||
ρ
|
||||
‏
|
||||
›
|
||||
’
|
||||
‚
|
||||
š
|
||||
⋅
|
||||
§
|
||||
­
|
||||
σ
|
||||
ς
|
||||
∼
|
||||
♠
|
||||
⊂
|
||||
⊆
|
||||
∑
|
||||
¹
|
||||
²
|
||||
³
|
||||
⊃
|
||||
⊇
|
||||
ß
|
||||
τ
|
||||
∴
|
||||
θ
|
||||
ϑ
|
||||
 
|
||||
þ
|
||||
˜
|
||||
×
|
||||
™
|
||||
⇑
|
||||
ú
|
||||
↑
|
||||
û
|
||||
ù
|
||||
¨
|
||||
ϒ
|
||||
υ
|
||||
ü
|
||||
℘
|
||||
ξ
|
||||
ý
|
||||
¥
|
||||
ÿ
|
||||
ζ
|
||||
‍
|
||||
‌
|
File diff suppressed because it is too large
Load diff
|
@ -1,140 +0,0 @@
|
|||
/* markdown.h - generic markdown parser */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_MARKDOWN_H
|
||||
#define UPSKIRT_MARKDOWN_H
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SUNDOWN_VERSION "1.16.0"
|
||||
#define SUNDOWN_VER_MAJOR 1
|
||||
#define SUNDOWN_VER_MINOR 16
|
||||
#define SUNDOWN_VER_REVISION 0
|
||||
|
||||
/********************
|
||||
* TYPE DEFINITIONS *
|
||||
********************/
|
||||
|
||||
/* mkd_autolink - type of autolink */
|
||||
enum mkd_autolink {
|
||||
MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/
|
||||
MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */
|
||||
MKDA_EMAIL, /* e-mail link without explit mailto: */
|
||||
};
|
||||
|
||||
enum mkd_tableflags {
|
||||
MKD_TABLE_ALIGN_L = 1,
|
||||
MKD_TABLE_ALIGN_R = 2,
|
||||
MKD_TABLE_ALIGN_CENTER = 3,
|
||||
MKD_TABLE_ALIGNMASK = 3,
|
||||
MKD_TABLE_HEADER = 4
|
||||
};
|
||||
|
||||
enum mkd_extensions {
|
||||
MKDEXT_NO_INTRA_EMPHASIS = (1 << 0),
|
||||
MKDEXT_TABLES = (1 << 1),
|
||||
MKDEXT_FENCED_CODE = (1 << 2),
|
||||
MKDEXT_AUTOLINK = (1 << 3),
|
||||
MKDEXT_STRIKETHROUGH = (1 << 4),
|
||||
MKDEXT_SPACE_HEADERS = (1 << 6),
|
||||
MKDEXT_SUPERSCRIPT = (1 << 7),
|
||||
MKDEXT_LAX_SPACING = (1 << 8),
|
||||
MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9),
|
||||
};
|
||||
|
||||
/* sd_callbacks - functions for rendering parsed data */
|
||||
struct sd_callbacks {
|
||||
/* block level callbacks - NULL skips the block */
|
||||
void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque);
|
||||
void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque);
|
||||
void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque);
|
||||
void (*hrule)(struct buf *ob, void *opaque);
|
||||
void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque);
|
||||
void (*table_row)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span);
|
||||
|
||||
|
||||
/* span level callbacks - NULL or return 0 prints the span verbatim */
|
||||
int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque);
|
||||
int (*codespan)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque);
|
||||
int (*linebreak)(struct buf *ob, void *opaque);
|
||||
int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque);
|
||||
int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque);
|
||||
int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*superscript)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* low level callbacks - NULL copies input directly into the output */
|
||||
void (*entity)(struct buf *ob, const struct buf *entity, void *opaque);
|
||||
void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* header and footer */
|
||||
void (*doc_header)(struct buf *ob, void *opaque);
|
||||
void (*doc_footer)(struct buf *ob, void *opaque);
|
||||
};
|
||||
|
||||
struct sd_markdown;
|
||||
|
||||
/*********
|
||||
* FLAGS *
|
||||
*********/
|
||||
|
||||
/* list/listitem flags */
|
||||
#define MKD_LIST_ORDERED 1
|
||||
#define MKD_LI_BLOCK 2 /* <li> containing block data */
|
||||
|
||||
/**********************
|
||||
* EXPORTED FUNCTIONS *
|
||||
**********************/
|
||||
|
||||
extern struct sd_markdown *
|
||||
sd_markdown_new(
|
||||
unsigned int extensions,
|
||||
size_t max_nesting,
|
||||
size_t max_table_cols,
|
||||
const struct sd_callbacks *callbacks,
|
||||
void *opaque);
|
||||
|
||||
extern void
|
||||
sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_markdown_free(struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_version(int *major, int *minor, int *revision);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
|
@ -1,81 +0,0 @@
|
|||
#include "stack.h"
|
||||
#include <string.h>
|
||||
|
||||
int
|
||||
stack_grow(struct stack *st, size_t new_size)
|
||||
{
|
||||
void **new_st;
|
||||
|
||||
if (st->asize >= new_size)
|
||||
return 0;
|
||||
|
||||
new_st = realloc(st->item, new_size * sizeof(void *));
|
||||
if (new_st == NULL)
|
||||
return -1;
|
||||
|
||||
memset(new_st + st->asize, 0x0,
|
||||
(new_size - st->asize) * sizeof(void *));
|
||||
|
||||
st->item = new_st;
|
||||
st->asize = new_size;
|
||||
|
||||
if (st->size > new_size)
|
||||
st->size = new_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
stack_free(struct stack *st)
|
||||
{
|
||||
if (!st)
|
||||
return;
|
||||
|
||||
free(st->item);
|
||||
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
}
|
||||
|
||||
int
|
||||
stack_init(struct stack *st, size_t initial_size)
|
||||
{
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
|
||||
if (!initial_size)
|
||||
initial_size = 8;
|
||||
|
||||
return stack_grow(st, initial_size);
|
||||
}
|
||||
|
||||
void *
|
||||
stack_pop(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[--st->size];
|
||||
}
|
||||
|
||||
int
|
||||
stack_push(struct stack *st, void *item)
|
||||
{
|
||||
if (stack_grow(st, st->size * 2) < 0)
|
||||
return -1;
|
||||
|
||||
st->item[st->size++] = item;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *
|
||||
stack_top(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[st->size - 1];
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
#ifndef STACK_H__
|
||||
#define STACK_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct stack {
|
||||
void **item;
|
||||
size_t size;
|
||||
size_t asize;
|
||||
};
|
||||
|
||||
void stack_free(struct stack *);
|
||||
int stack_grow(struct stack *, size_t);
|
||||
int stack_init(struct stack *, size_t);
|
||||
|
||||
int stack_push(struct stack *, void *);
|
||||
|
||||
void *stack_pop(struct stack *);
|
||||
void *stack_top(struct stack *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,81 +0,0 @@
|
|||
#include "stack.h"
|
||||
#include <string.h>
|
||||
|
||||
int
|
||||
stack_grow(struct stack *st, size_t new_size)
|
||||
{
|
||||
void **new_st;
|
||||
|
||||
if (st->asize >= new_size)
|
||||
return 0;
|
||||
|
||||
new_st = realloc(st->item, new_size * sizeof(void *));
|
||||
if (new_st == NULL)
|
||||
return -1;
|
||||
|
||||
memset(new_st + st->asize, 0x0,
|
||||
(new_size - st->asize) * sizeof(void *));
|
||||
|
||||
st->item = new_st;
|
||||
st->asize = new_size;
|
||||
|
||||
if (st->size > new_size)
|
||||
st->size = new_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
stack_free(struct stack *st)
|
||||
{
|
||||
if (!st)
|
||||
return;
|
||||
|
||||
free(st->item);
|
||||
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
}
|
||||
|
||||
int
|
||||
stack_init(struct stack *st, size_t initial_size)
|
||||
{
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
|
||||
if (!initial_size)
|
||||
initial_size = 8;
|
||||
|
||||
return stack_grow(st, initial_size);
|
||||
}
|
||||
|
||||
void *
|
||||
stack_pop(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[--st->size];
|
||||
}
|
||||
|
||||
int
|
||||
stack_push(struct stack *st, void *item)
|
||||
{
|
||||
if (stack_grow(st, st->size * 2) < 0)
|
||||
return -1;
|
||||
|
||||
st->item[st->size++] = item;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *
|
||||
stack_top(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[st->size - 1];
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
#ifndef STACK_H__
|
||||
#define STACK_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct stack {
|
||||
void **item;
|
||||
size_t size;
|
||||
size_t asize;
|
||||
};
|
||||
|
||||
void stack_free(struct stack *);
|
||||
int stack_grow(struct stack *, size_t);
|
||||
int stack_init(struct stack *, size_t);
|
||||
|
||||
int stack_push(struct stack *, void *);
|
||||
|
||||
void *stack_pop(struct stack *);
|
||||
void *stack_top(struct stack *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,199 +0,0 @@
|
|||
/* stdint.h standard header */
|
||||
#pragma once
|
||||
#ifndef _STDINT
|
||||
#define _STDINT
|
||||
#ifndef RC_INVOKED
|
||||
#include <yvals.h>
|
||||
|
||||
/* NB: assumes
|
||||
byte has 8 bits
|
||||
long is 32 bits
|
||||
pointer can convert to and from long long
|
||||
long long is longest type
|
||||
*/
|
||||
|
||||
_C_STD_BEGIN
|
||||
/* TYPE DEFINITIONS */
|
||||
typedef signed char int8_t;
|
||||
typedef short int16_t;
|
||||
typedef int int32_t;
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
typedef signed char int_least8_t;
|
||||
typedef short int_least16_t;
|
||||
typedef int int_least32_t;
|
||||
|
||||
typedef unsigned char uint_least8_t;
|
||||
typedef unsigned short uint_least16_t;
|
||||
typedef unsigned int uint_least32_t;
|
||||
|
||||
typedef char int_fast8_t;
|
||||
typedef int int_fast16_t;
|
||||
typedef int int_fast32_t;
|
||||
|
||||
typedef unsigned char uint_fast8_t;
|
||||
typedef unsigned int uint_fast16_t;
|
||||
typedef unsigned int uint_fast32_t;
|
||||
|
||||
#ifndef _INTPTR_T_DEFINED
|
||||
#define _INTPTR_T_DEFINED
|
||||
#ifdef _WIN64
|
||||
typedef __int64 intptr_t;
|
||||
#else /* _WIN64 */
|
||||
typedef _W64 int intptr_t;
|
||||
#endif /* _WIN64 */
|
||||
#endif /* _INTPTR_T_DEFINED */
|
||||
|
||||
#ifndef _UINTPTR_T_DEFINED
|
||||
#define _UINTPTR_T_DEFINED
|
||||
#ifdef _WIN64
|
||||
typedef unsigned __int64 uintptr_t;
|
||||
#else /* _WIN64 */
|
||||
typedef _W64 unsigned int uintptr_t;
|
||||
#endif /* _WIN64 */
|
||||
#endif /* _UINTPTR_T_DEFINED */
|
||||
|
||||
typedef _Longlong int64_t;
|
||||
typedef _ULonglong uint64_t;
|
||||
|
||||
typedef _Longlong int_least64_t;
|
||||
typedef _ULonglong uint_least64_t;
|
||||
|
||||
typedef _Longlong int_fast64_t;
|
||||
typedef _ULonglong uint_fast64_t;
|
||||
|
||||
typedef _Longlong intmax_t;
|
||||
typedef _ULonglong uintmax_t;
|
||||
|
||||
/* LIMIT MACROS */
|
||||
#define INT8_MIN (-0x7f - _C2)
|
||||
#define INT16_MIN (-0x7fff - _C2)
|
||||
#define INT32_MIN (-0x7fffffff - _C2)
|
||||
|
||||
#define INT8_MAX 0x7f
|
||||
#define INT16_MAX 0x7fff
|
||||
#define INT32_MAX 0x7fffffff
|
||||
#define UINT8_MAX 0xff
|
||||
#define UINT16_MAX 0xffff
|
||||
#define UINT32_MAX 0xffffffff
|
||||
|
||||
#define INT_LEAST8_MIN (-0x7f - _C2)
|
||||
#define INT_LEAST16_MIN (-0x7fff - _C2)
|
||||
#define INT_LEAST32_MIN (-0x7fffffff - _C2)
|
||||
|
||||
#define INT_LEAST8_MAX 0x7f
|
||||
#define INT_LEAST16_MAX 0x7fff
|
||||
#define INT_LEAST32_MAX 0x7fffffff
|
||||
#define UINT_LEAST8_MAX 0xff
|
||||
#define UINT_LEAST16_MAX 0xffff
|
||||
#define UINT_LEAST32_MAX 0xffffffff
|
||||
|
||||
#define INT_FAST8_MIN (-0x7f - _C2)
|
||||
#define INT_FAST16_MIN (-0x7fff - _C2)
|
||||
#define INT_FAST32_MIN (-0x7fffffff - _C2)
|
||||
|
||||
#define INT_FAST8_MAX 0x7f
|
||||
#define INT_FAST16_MAX 0x7fff
|
||||
#define INT_FAST32_MAX 0x7fffffff
|
||||
#define UINT_FAST8_MAX 0xff
|
||||
#define UINT_FAST16_MAX 0xffff
|
||||
#define UINT_FAST32_MAX 0xffffffff
|
||||
|
||||
#if _INTPTR == 0 || _INTPTR == 1
|
||||
#define INTPTR_MAX 0x7fffffff
|
||||
#define INTPTR_MIN (-INTPTR_MAX - _C2)
|
||||
#define UINTPTR_MAX 0xffffffff
|
||||
|
||||
#else /* _INTPTR == 2 */
|
||||
#define INTPTR_MIN (-_LLONG_MAX - _C2)
|
||||
#define INTPTR_MAX _LLONG_MAX
|
||||
#define UINTPTR_MAX _ULLONG_MAX
|
||||
#endif /* _INTPTR */
|
||||
|
||||
#define INT8_C(x) (x)
|
||||
#define INT16_C(x) (x)
|
||||
#define INT32_C(x) ((x) + (INT32_MAX - INT32_MAX))
|
||||
|
||||
#define UINT8_C(x) (x)
|
||||
#define UINT16_C(x) (x)
|
||||
#define UINT32_C(x) ((x) + (UINT32_MAX - UINT32_MAX))
|
||||
|
||||
#ifdef _WIN64
|
||||
#define PTRDIFF_MIN INT64_MIN
|
||||
#define PTRDIFF_MAX INT64_MAX
|
||||
#else /* _WIN64 */
|
||||
#define PTRDIFF_MIN INT32_MIN
|
||||
#define PTRDIFF_MAX INT32_MAX
|
||||
#endif /* _WIN64 */
|
||||
|
||||
#define SIG_ATOMIC_MIN INT32_MIN
|
||||
#define SIG_ATOMIC_MAX INT32_MAX
|
||||
|
||||
#ifndef SIZE_MAX
|
||||
#ifdef _WIN64
|
||||
#define SIZE_MAX UINT64_MAX
|
||||
#else /* _WIN64 */
|
||||
#define SIZE_MAX UINT32_MAX
|
||||
#endif /* _WIN64 */
|
||||
#endif /* SIZE_MAX */
|
||||
|
||||
#define WCHAR_MIN 0x0000
|
||||
#define WCHAR_MAX 0xffff
|
||||
|
||||
#define WINT_MIN 0x0000
|
||||
#define WINT_MAX 0xffff
|
||||
|
||||
#define INT64_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INT64_MAX 0x7fffffffffffffff
|
||||
#define UINT64_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INT_LEAST64_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INT_LEAST64_MAX 0x7fffffffffffffff
|
||||
#define UINT_LEAST64_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INT_FAST64_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INT_FAST64_MAX 0x7fffffffffffffff
|
||||
#define UINT_FAST64_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INTMAX_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INTMAX_MAX 0x7fffffffffffffff
|
||||
#define UINTMAX_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INT64_C(x) ((x) + (INT64_MAX - INT64_MAX))
|
||||
#define UINT64_C(x) ((x) + (UINT64_MAX - UINT64_MAX))
|
||||
#define INTMAX_C(x) INT64_C(x)
|
||||
#define UINTMAX_C(x) UINT64_C(x)
|
||||
_C_STD_END
|
||||
#endif /* RC_INVOKED */
|
||||
#endif /* _STDINT */
|
||||
|
||||
#if defined(_STD_USING)
|
||||
using _CSTD int8_t; using _CSTD int16_t;
|
||||
using _CSTD int32_t; using _CSTD int64_t;
|
||||
|
||||
using _CSTD uint8_t; using _CSTD uint16_t;
|
||||
using _CSTD uint32_t; using _CSTD uint64_t;
|
||||
|
||||
using _CSTD int_least8_t; using _CSTD int_least16_t;
|
||||
using _CSTD int_least32_t; using _CSTD int_least64_t;
|
||||
using _CSTD uint_least8_t; using _CSTD uint_least16_t;
|
||||
using _CSTD uint_least32_t; using _CSTD uint_least64_t;
|
||||
|
||||
using _CSTD intmax_t; using _CSTD uintmax_t;
|
||||
|
||||
using _CSTD uintptr_t;
|
||||
using _CSTD intptr_t;
|
||||
|
||||
using _CSTD int_fast8_t; using _CSTD int_fast16_t;
|
||||
using _CSTD int_fast32_t; using _CSTD int_fast64_t;
|
||||
using _CSTD uint_fast8_t; using _CSTD uint_fast16_t;
|
||||
using _CSTD uint_fast32_t; using _CSTD uint_fast64_t;
|
||||
#endif /* defined(_STD_USING) */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1992-2009 by P.J. Plauger. ALL RIGHTS RESERVED.
|
||||
* Consult your license regarding permissions and restrictions.
|
||||
V5.20:0009 */
|
|
@ -1,20 +0,0 @@
|
|||
LIBRARY SUNDOWN
|
||||
EXPORTS
|
||||
sdhtml_renderer
|
||||
sdhtml_toc_renderer
|
||||
sdhtml_smartypants
|
||||
bufgrow
|
||||
bufnew
|
||||
bufcstr
|
||||
bufprefix
|
||||
bufput
|
||||
bufputs
|
||||
bufputc
|
||||
bufrelease
|
||||
bufreset
|
||||
bufslurp
|
||||
bufprintf
|
||||
sd_markdown_new
|
||||
sd_markdown_render
|
||||
sd_markdown_free
|
||||
sd_version
|
|
@ -1,461 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import snudown
|
||||
import unittest
|
||||
import itertools
|
||||
import cStringIO as StringIO
|
||||
|
||||
|
||||
cases = {
|
||||
'': '',
|
||||
'http://www.reddit.com':
|
||||
'<p><a href="http://www.reddit.com">http://www.reddit.com</a></p>\n',
|
||||
|
||||
'http://www.reddit.com/a\x00b':
|
||||
'<p><a href="http://www.reddit.com/ab">http://www.reddit.com/ab</a></p>\n',
|
||||
|
||||
'foo@example.com':
|
||||
'<p><a href="mailto:foo@example.com">foo@example.com</a></p>\n',
|
||||
|
||||
'[foo](http://en.wikipedia.org/wiki/Link_(film\))':
|
||||
'<p><a href="http://en.wikipedia.org/wiki/Link_(film)">foo</a></p>\n',
|
||||
|
||||
'(http://tsfr.org)':
|
||||
'<p>(<a href="http://tsfr.org">http://tsfr.org</a>)</p>\n',
|
||||
|
||||
'[A link with a /r/subreddit in it](/lol)':
|
||||
'<p><a href="/lol">A link with a /r/subreddit in it</a></p>\n',
|
||||
|
||||
'[A link with a http://www.url.com in it](/lol)':
|
||||
'<p><a href="/lol">A link with a http://www.url.com in it</a></p>\n',
|
||||
|
||||
'[Empty Link]()':
|
||||
'<p>[Empty Link]()</p>\n',
|
||||
|
||||
'http://en.wikipedia.org/wiki/café_racer':
|
||||
'<p><a href="http://en.wikipedia.org/wiki/caf%C3%A9_racer">http://en.wikipedia.org/wiki/café_racer</a></p>\n',
|
||||
|
||||
'#####################################################hi':
|
||||
'<h6>###############################################hi</h6>\n',
|
||||
|
||||
'[foo](http://bar\nbar)':
|
||||
'<p><a href="http://bar%0Abar">foo</a></p>\n',
|
||||
|
||||
'/r/test':
|
||||
'<p><a href="/r/test">/r/test</a></p>\n',
|
||||
|
||||
'Words words /r/test words':
|
||||
'<p>Words words <a href="/r/test">/r/test</a> words</p>\n',
|
||||
|
||||
'/r/':
|
||||
'<p>/r/</p>\n',
|
||||
|
||||
r'escaped \/r/test':
|
||||
'<p>escaped /r/test</p>\n',
|
||||
|
||||
'ampersands http://www.google.com?test&blah':
|
||||
'<p>ampersands <a href="http://www.google.com?test&blah">http://www.google.com?test&blah</a></p>\n',
|
||||
|
||||
'[_regular_ link with nesting](/test)':
|
||||
'<p><a href="/test"><em>regular</em> link with nesting</a></p>\n',
|
||||
|
||||
' www.a.co?with&test':
|
||||
'<p><a href="http://www.a.co?with&test">www.a.co?with&test</a></p>\n',
|
||||
|
||||
r'Normal^superscript':
|
||||
'<p>Normal<sup>superscript</sup></p>\n',
|
||||
|
||||
r'Escape\^superscript':
|
||||
'<p>Escape^superscript</p>\n',
|
||||
|
||||
r'~~normal strikethrough~~':
|
||||
'<p><del>normal strikethrough</del></p>\n',
|
||||
|
||||
r'\~~escaped strikethrough~~':
|
||||
'<p>~~escaped strikethrough~~</p>\n',
|
||||
|
||||
'anywhere\x03, you':
|
||||
'<p>anywhere, you</p>\n',
|
||||
|
||||
'[Test](//test)':
|
||||
'<p><a href="//test">Test</a></p>\n',
|
||||
|
||||
'[Test](//#test)':
|
||||
'<p><a href="//#test">Test</a></p>\n',
|
||||
|
||||
'[Test](#test)':
|
||||
'<p><a href="#test">Test</a></p>\n',
|
||||
|
||||
'[Test](git://github.com)':
|
||||
'<p><a href="git://github.com">Test</a></p>\n',
|
||||
|
||||
'[Speculation](//?)':
|
||||
'<p><a href="//?">Speculation</a></p>\n',
|
||||
|
||||
'/r/sr_with_underscores':
|
||||
'<p><a href="/r/sr_with_underscores">/r/sr_with_underscores</a></p>\n',
|
||||
|
||||
'[Test](///#test)':
|
||||
'<p><a href="///#test">Test</a></p>\n',
|
||||
|
||||
'/r/multireddit+test+yay':
|
||||
'<p><a href="/r/multireddit+test+yay">/r/multireddit+test+yay</a></p>\n',
|
||||
|
||||
'<test>':
|
||||
'<p><test></p>\n',
|
||||
|
||||
'words_with_underscores':
|
||||
'<p>words_with_underscores</p>\n',
|
||||
|
||||
'words*with*asterisks':
|
||||
'<p>words<em>with</em>asterisks</p>\n',
|
||||
|
||||
'~test':
|
||||
'<p>~test</p>\n',
|
||||
|
||||
'/u/test':
|
||||
'<p><a href="/u/test">/u/test</a></p>\n',
|
||||
|
||||
'/u/test/m/test test':
|
||||
'<p><a href="/u/test/m/test">/u/test/m/test</a> test</p>\n',
|
||||
|
||||
'/U/nope':
|
||||
'<p>/U/nope</p>\n',
|
||||
|
||||
'/r/test/m/test test':
|
||||
'<p><a href="/r/test/m/test">/r/test/m/test</a> test</p>\n',
|
||||
|
||||
'/r/test/w/test test':
|
||||
'<p><a href="/r/test/w/test">/r/test/w/test</a> test</p>\n',
|
||||
|
||||
'/r/test/comments/test test':
|
||||
'<p><a href="/r/test/comments/test">/r/test/comments/test</a> test</p>\n',
|
||||
|
||||
'/u/test/commentscommentscommentscommentscommentscommentscomments/test test':
|
||||
'<p><a href="/u/test/commentscommentscommentscommentscommentscommentscomments/test">/u/test/commentscommentscommentscommentscommentscommentscomments/test</a> test</p>\n',
|
||||
|
||||
'a /u/reddit':
|
||||
'<p>a <a href="/u/reddit">/u/reddit</a></p>\n',
|
||||
|
||||
'u/reddit':
|
||||
'<p><a href="/u/reddit">u/reddit</a></p>\n',
|
||||
|
||||
'a u/reddit':
|
||||
'<p>a <a href="/u/reddit">u/reddit</a></p>\n',
|
||||
|
||||
'a u/reddit/foobaz':
|
||||
'<p>a <a href="/u/reddit/foobaz">u/reddit/foobaz</a></p>\n',
|
||||
|
||||
'foo:u/reddit':
|
||||
'<p>foo:<a href="/u/reddit">u/reddit</a></p>\n',
|
||||
|
||||
'fuu/reddit':
|
||||
'<p>fuu/reddit</p>\n',
|
||||
|
||||
# Don't treat unicode punctuation as a word boundary for now
|
||||
u'a。u/reddit'.encode('utf8'):
|
||||
u'<p>a。u/reddit</p>\n'.encode('utf8'),
|
||||
|
||||
'\\/u/me':
|
||||
'<p>/u/me</p>\n',
|
||||
|
||||
'\\\\/u/me':
|
||||
'<p>\\<a href="/u/me">/u/me</a></p>\n',
|
||||
|
||||
'\\u/me':
|
||||
'<p>\\<a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'\\\\u/me':
|
||||
'<p>\\<a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'u\\/me':
|
||||
'<p>u/me</p>\n',
|
||||
|
||||
'*u/me*':
|
||||
'<p><em><a href="/u/me">u/me</a></em></p>\n',
|
||||
|
||||
'foo^u/me':
|
||||
'<p>foo<sup><a href="/u/me">u/me</a></sup></p>\n',
|
||||
|
||||
'*foo*u/me':
|
||||
'<p><em>foo</em><a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'u/me':
|
||||
'<p><a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'/u/me':
|
||||
'<p><a href="/u/me">/u/me</a></p>\n',
|
||||
|
||||
'u/m':
|
||||
'<p>u/m</p>\n',
|
||||
|
||||
'/u/m':
|
||||
'<p>/u/m</p>\n',
|
||||
|
||||
'/f/oobar':
|
||||
'<p>/f/oobar</p>\n',
|
||||
|
||||
'f/oobar':
|
||||
'<p>f/oobar</p>\n',
|
||||
|
||||
'/r/test/commentscommentscommentscommentscommentscommentscomments/test test':
|
||||
'<p><a href="/r/test/commentscommentscommentscommentscommentscommentscomments/test">/r/test/commentscommentscommentscommentscommentscommentscomments/test</a> test</p>\n',
|
||||
|
||||
'blah \\':
|
||||
'<p>blah \\</p>\n',
|
||||
|
||||
'/r/whatever: fork':
|
||||
'<p><a href="/r/whatever">/r/whatever</a>: fork</p>\n',
|
||||
|
||||
'/r/t:timereddit':
|
||||
'<p><a href="/r/t:timereddit">/r/t:timereddit</a></p>\n',
|
||||
|
||||
'/r/reddit.com':
|
||||
'<p><a href="/r/reddit.com">/r/reddit.com</a></p>\n',
|
||||
|
||||
'/r/not.cool':
|
||||
'<p><a href="/r/not">/r/not</a>.cool</p>\n',
|
||||
|
||||
'/r/very+clever+multireddit+reddit.com+t:fork+yay':
|
||||
'<p><a href="/r/very+clever+multireddit+reddit.com+t:fork+yay">/r/very+clever+multireddit+reddit.com+t:fork+yay</a></p>\n',
|
||||
|
||||
'/r/t:heatdeathoftheuniverse':
|
||||
'<p><a href="/r/t:heatdeathoftheuniverse">/r/t:heatdeathoftheuniverse</a></p>\n',
|
||||
|
||||
'/r/all-minus-something':
|
||||
'<p><a href="/r/all-minus-something">/r/all-minus-something</a></p>\n',
|
||||
|
||||
'/r/notall-minus':
|
||||
'<p><a href="/r/notall">/r/notall</a>-minus</p>\n',
|
||||
|
||||
'a /r/reddit.com':
|
||||
'<p>a <a href="/r/reddit.com">/r/reddit.com</a></p>\n',
|
||||
|
||||
'a r/reddit.com':
|
||||
'<p>a <a href="/r/reddit.com">r/reddit.com</a></p>\n',
|
||||
|
||||
'foo:r/reddit.com':
|
||||
'<p>foo:<a href="/r/reddit.com">r/reddit.com</a></p>\n',
|
||||
|
||||
'foobar/reddit.com':
|
||||
'<p>foobar/reddit.com</p>\n',
|
||||
|
||||
u'a。r/reddit.com'.encode('utf8'):
|
||||
u'<p>a。r/reddit.com</p>\n'.encode('utf8'),
|
||||
|
||||
'/R/reddit.com':
|
||||
'<p>/R/reddit.com</p>\n',
|
||||
|
||||
'/r/irc://foo.bar/':
|
||||
'<p><a href="/r/irc">/r/irc</a>://foo.bar/</p>\n',
|
||||
|
||||
'/r/t:irc//foo.bar/':
|
||||
'<p><a href="/r/t:irc//foo">/r/t:irc//foo</a>.bar/</p>\n',
|
||||
|
||||
'/r/all-irc://foo.bar/':
|
||||
'<p><a href="/r/all-irc">/r/all-irc</a>://foo.bar/</p>\n',
|
||||
|
||||
'/r/foo+irc://foo.bar/':
|
||||
'<p><a href="/r/foo+irc">/r/foo+irc</a>://foo.bar/</p>\n',
|
||||
|
||||
'/r/www.example.com':
|
||||
'<p><a href="/r/www">/r/www</a>.example.com</p>\n',
|
||||
|
||||
'.http://reddit.com':
|
||||
'<p>.<a href="http://reddit.com">http://reddit.com</a></p>\n',
|
||||
|
||||
'[r://<http://reddit.com/>](/aa)':
|
||||
'<p><a href="/aa">r://<a href="http://reddit.com/">http://reddit.com/</a></a></p>\n',
|
||||
|
||||
'/u/http://www.reddit.com/user/reddit':
|
||||
'<p><a href="/u/http">/u/http</a>://<a href="http://www.reddit.com/user/reddit">www.reddit.com/user/reddit</a></p>\n',
|
||||
|
||||
'www.http://example.com/':
|
||||
'<p><a href="http://www.http://example.com/">www.http://example.com/</a></p>\n',
|
||||
|
||||
('|' * 5) + '\n' + ('-|' * 5) + '\n|\n':
|
||||
'<table><thead>\n<tr>\n' + ('<th></th>\n' * 4) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="4" ></td>\n</tr>\n</tbody></table>\n',
|
||||
|
||||
('|' * 2) + '\n' + ('-|' * 2) + '\n|\n':
|
||||
'<table><thead>\n<tr>\n' + ('<th></th>\n' * 1) + '</tr>\n</thead><tbody>\n<tr>\n<td></td>\n</tr>\n</tbody></table>\n',
|
||||
|
||||
('|' * 65) + '\n' + ('-|' * 65) + '\n|\n':
|
||||
'<table><thead>\n<tr>\n' + ('<th></th>\n' * 64) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="64" ></td>\n</tr>\n</tbody></table>\n',
|
||||
|
||||
('|' * 66) + '\n' + ('-|' * 66) + '\n|\n':
|
||||
'<p>' + ('|' * 66) + '\n' + ('-|' * 66) + '\n|' + '</p>\n',
|
||||
|
||||
'ϑ':
|
||||
'<p>ϑ</p>\n',
|
||||
|
||||
'&foobar;':
|
||||
'<p>&foobar;</p>\n',
|
||||
|
||||
' ':
|
||||
'<p>&nbsp</p>\n',
|
||||
|
||||
'&#foobar;':
|
||||
'<p>&#foobar;</p>\n',
|
||||
|
||||
'oobar;':
|
||||
'<p>&#xfoobar;</p>\n',
|
||||
|
||||
'�':
|
||||
'<p>&#9999999999;</p>\n',
|
||||
|
||||
'c':
|
||||
'<p>c</p>\n',
|
||||
|
||||
'~':
|
||||
'<p>~</p>\n',
|
||||
|
||||
'~':
|
||||
'<p>~</p>\n',
|
||||
|
||||
'½':
|
||||
'<p>½</p>\n',
|
||||
|
||||
'aaa½aaa':
|
||||
'<p>aaa½aaa</p>\n',
|
||||
|
||||
'&':
|
||||
'<p>&</p>\n',
|
||||
|
||||
'&;':
|
||||
'<p>&;</p>\n',
|
||||
|
||||
'&#;':
|
||||
'<p>&#;</p>\n',
|
||||
|
||||
'&#;':
|
||||
'<p>&#;</p>\n',
|
||||
|
||||
'&#x;':
|
||||
'<p>&#x;</p>\n',
|
||||
}
|
||||
|
||||
# Test that every numeric entity is encoded as
|
||||
# it should be.
|
||||
ILLEGAL_NUMERIC_ENTS = frozenset(itertools.chain(
|
||||
xrange(0, 9),
|
||||
xrange(11, 13),
|
||||
xrange(14, 32),
|
||||
xrange(55296, 57344),
|
||||
xrange(65534, 65536),
|
||||
))
|
||||
|
||||
ent_test_key = ''
|
||||
ent_test_val = ''
|
||||
for i in xrange(65550):
|
||||
ent_testcase = '&#%d;&#x%x;' % (i, i)
|
||||
ent_test_key += ent_testcase
|
||||
if i in ILLEGAL_NUMERIC_ENTS:
|
||||
ent_test_val += ent_testcase.replace('&', '&')
|
||||
else:
|
||||
ent_test_val += ent_testcase
|
||||
|
||||
cases[ent_test_key] = '<p>%s</p>\n' % ent_test_val
|
||||
|
||||
wiki_cases = {
|
||||
'<table scope="foo"bar>':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo"bar colspan="2">':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table scope="foo" colspan="2"bar>':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scop="foo">':
|
||||
'<p><table></p>\n',
|
||||
|
||||
'<table ff= scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table colspan= scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope=ff"foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo" test="test">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo" longervalue="testing test" scope="test">':
|
||||
'<p><table scope="foo" scope="test"></p>\n',
|
||||
|
||||
'<table scope=`"foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo bar">':
|
||||
'<p><table scope="foo bar"></p>\n',
|
||||
|
||||
'<table scope=\'foo colspan="foo">':
|
||||
'<p><table></p>\n',
|
||||
|
||||
'<table scope=\'foo\' colspan="foo">':
|
||||
'<p><table scope="foo" colspan="foo"></p>\n',
|
||||
|
||||
'<table scope=>':
|
||||
'<p><table></p>\n',
|
||||
|
||||
'<table scope= colspan="test" scope=>':
|
||||
'<p><table colspan="test"></p>\n',
|
||||
|
||||
'<table colspan="\'test">':
|
||||
'<p><table colspan="'test"></p>\n',
|
||||
|
||||
'<table scope="foo" colspan="2">':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table scope="foo" colspan="2" ff="test">':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table ff="test" scope="foo" colspan="2" colspan=>':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
' <table colspan=\'\'\' a="" \' scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
}
|
||||
|
||||
class SnudownTestCase(unittest.TestCase):
|
||||
def __init__(self, renderer=snudown.RENDERER_USERTEXT):
|
||||
self.renderer = renderer
|
||||
unittest.TestCase.__init__(self)
|
||||
|
||||
def runTest(self):
|
||||
output = snudown.markdown(self.input, renderer=self.renderer)
|
||||
|
||||
for i, (a, b) in enumerate(zip(repr(self.expected_output),
|
||||
repr(output))):
|
||||
if a != b:
|
||||
io = StringIO.StringIO()
|
||||
print >> io, "TEST FAILED:"
|
||||
print >> io, " input: %s" % repr(self.input)
|
||||
print >> io, " expected: %s" % repr(self.expected_output)
|
||||
print >> io, " actual: %s" % repr(output)
|
||||
print >> io, " %s" % (' ' * i + '^')
|
||||
self.fail(io.getvalue())
|
||||
|
||||
|
||||
|
||||
def test_snudown():
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
for input, expected_output in wiki_cases.iteritems():
|
||||
case = SnudownTestCase(renderer=snudown.RENDERER_WIKI)
|
||||
case.input = input
|
||||
case.expected_output = expected_output
|
||||
suite.addTest(case)
|
||||
|
||||
for input, expected_output in cases.iteritems():
|
||||
case = SnudownTestCase()
|
||||
case.input = input
|
||||
case.expected_output = expected_output
|
||||
suite.addTest(case)
|
||||
|
||||
return suite
|
|
@ -3,18 +3,21 @@ Spinal
|
|||
|
||||
A couple of tools for copying files and directories.
|
||||
|
||||
2016 03 02
|
||||
- 2016 11 27
|
||||
- Renamed the `copy_file` parameter `callback` to `callback_progress` for clarity.
|
||||
|
||||
- 2016 03 02
|
||||
- Fixed issue where the copy's path casing was based on the input string and not the path's actual casing (since Windows doesn't care).
|
||||
- Change the returned written_bytes to 0 if the file did not need to be copied. This is better for tracking how much actually happens during each backup.
|
||||
- Fixed encode errors caused by callback_v1's print statement.
|
||||
|
||||
2016 03 03
|
||||
- 2016 03 03
|
||||
- Moved directory / filename exclusion logic into the walk_generator so the caller doesn't need to worry about it.
|
||||
- walk_generator now yields absolute filenames since copy_dir no longer needs to process exclusions, and that was the only reason walk_generator used to yield them in parts.
|
||||
|
||||
2016 03 04
|
||||
- 2016 03 04
|
||||
- Created a FilePath class to cache os.stat data, which should reduce the number of unecessary filesystem calls.
|
||||
|
||||
2016 03 18
|
||||
- 2016 03 18
|
||||
- Added `glob.escape` to `get_path_casing`.
|
||||
- Added callbacks for some extra debug output.
|
|
@ -1,33 +1,20 @@
|
|||
import collections
|
||||
import glob
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import stat
|
||||
import string
|
||||
import sys
|
||||
import time
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Bytestring')
|
||||
sys.path.append('C:\\git\\else\\Pathclass')
|
||||
sys.path.append('C:\\git\\else\\Ratelimiter')
|
||||
import bytestring
|
||||
import pathclass
|
||||
import ratelimiter
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import pathclass
|
||||
from voussoirkit import ratelimiter
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import pathclass
|
||||
from voussoirkit import ratelimiter
|
||||
|
||||
logging.basicConfig(level=logging.CRITICAL)
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
CHUNK_SIZE = 128 * bytestring.KIBIBYTE
|
||||
CHUNK_SIZE = 256 * bytestring.KIBIBYTE
|
||||
# Number of bytes to read and write at a time
|
||||
|
||||
HASH_CLASS = hashlib.md5
|
||||
|
@ -53,7 +40,7 @@ class SpinalError(Exception):
|
|||
class ValidationError(Exception):
|
||||
pass
|
||||
|
||||
def callback_exclusion(name, path_type):
|
||||
def callback_exclusion_v1(name, path_type):
|
||||
'''
|
||||
Example of an exclusion callback function.
|
||||
'''
|
||||
|
@ -98,12 +85,12 @@ def copy(source, file_args=None, file_kwargs=None, dir_args=None, dir_kwargs=Non
|
|||
def copy_dir(
|
||||
source,
|
||||
destination=None,
|
||||
destination_new_root=None,
|
||||
bytes_per_second=None,
|
||||
callback_directory=None,
|
||||
callback_exclusion=None,
|
||||
callback_file=None,
|
||||
callback_permission_denied=None,
|
||||
destination_new_root=None,
|
||||
dry_run=False,
|
||||
exclude_directories=None,
|
||||
exclude_filenames=None,
|
||||
|
@ -123,13 +110,6 @@ def copy_dir(
|
|||
The directory in which copied files are placed. Alternatively, use
|
||||
destination_new_root.
|
||||
|
||||
destination_new_root:
|
||||
Determine the destination path by calling
|
||||
`new_root(source, destination_new_root)`.
|
||||
Thus, this path acts as a root and the rest of the path is matched.
|
||||
|
||||
`destination` and `destination_new_root` are mutually exclusive.
|
||||
|
||||
bytes_per_second:
|
||||
Restrict file copying to this many bytes per second. Can be an integer
|
||||
or an existing Ratelimiter object.
|
||||
|
@ -139,8 +119,8 @@ def copy_dir(
|
|||
|
||||
callback_directory:
|
||||
This function will be called after each file copy with three parameters:
|
||||
name of file copied, number of bytes written to destination so far,
|
||||
total bytes needed (from precalcsize).
|
||||
name of file copied, number of bytes written to destination directory
|
||||
so far, total bytes needed (based on precalcsize).
|
||||
If `precalcsize` is False, this function will receive written bytes
|
||||
for both written and total, showing 100% always.
|
||||
|
||||
|
@ -163,6 +143,13 @@ def copy_dir(
|
|||
|
||||
Default = None
|
||||
|
||||
destination_new_root:
|
||||
Determine the destination path by calling
|
||||
`new_root(source, destination_new_root)`.
|
||||
Thus, this path acts as a root and the rest of the path is matched.
|
||||
|
||||
`destination` and `destination_new_root` are mutually exclusive.
|
||||
|
||||
dry_run:
|
||||
Do everything except the actual file copying.
|
||||
|
||||
|
@ -179,8 +166,8 @@ def copy_dir(
|
|||
Default = None
|
||||
|
||||
files_per_second:
|
||||
Maximum number of files to be processed per second. Helps to keep CPU usage
|
||||
low.
|
||||
Maximum number of files to be processed per second. Helps to keep CPU
|
||||
usage low.
|
||||
|
||||
Default = None
|
||||
|
||||
|
@ -207,9 +194,9 @@ def copy_dir(
|
|||
'''
|
||||
# Prepare parameters
|
||||
if not is_xor(destination, destination_new_root):
|
||||
m = 'One and only one of `destination` and '
|
||||
m += '`destination_new_root` can be passed.'
|
||||
raise ValueError(m)
|
||||
message = 'One and only one of `destination` and '
|
||||
message += '`destination_new_root` can be passed.'
|
||||
raise ValueError(message)
|
||||
|
||||
source = str_to_fp(source)
|
||||
|
||||
|
@ -244,7 +231,7 @@ def copy_dir(
|
|||
exclude_directories=exclude_directories,
|
||||
exclude_filenames=exclude_filenames,
|
||||
)
|
||||
for (source_abspath) in walker:
|
||||
for source_abspath in walker:
|
||||
# Terminology:
|
||||
# abspath: C:\folder\subfolder\filename.txt
|
||||
# location: C:\folder\subfolder
|
||||
|
@ -267,7 +254,7 @@ def copy_dir(
|
|||
source_abspath,
|
||||
destination_abspath,
|
||||
bytes_per_second=bytes_per_second,
|
||||
callback=callback_file,
|
||||
callback_progress=callback_file,
|
||||
callback_permission_denied=callback_permission_denied,
|
||||
dry_run=dry_run,
|
||||
overwrite_old=overwrite_old,
|
||||
|
@ -292,7 +279,7 @@ def copy_file(
|
|||
destination=None,
|
||||
destination_new_root=None,
|
||||
bytes_per_second=None,
|
||||
callback=None,
|
||||
callback_progress=None,
|
||||
callback_permission_denied=None,
|
||||
callback_validate_hash=None,
|
||||
dry_run=False,
|
||||
|
@ -321,14 +308,6 @@ def copy_file(
|
|||
|
||||
Default = None
|
||||
|
||||
callback:
|
||||
If provided, this function will be called after writing
|
||||
each CHUNK_SIZE bytes to destination with three parameters:
|
||||
the Path object being copied, number of bytes written so far,
|
||||
total number of bytes needed.
|
||||
|
||||
Default = None
|
||||
|
||||
callback_permission_denied:
|
||||
If provided, this function will be called when a source file denies
|
||||
read access, with the file path and the exception object as parameters.
|
||||
|
@ -338,6 +317,14 @@ def copy_file(
|
|||
|
||||
Default = None
|
||||
|
||||
callback_progress:
|
||||
If provided, this function will be called after writing
|
||||
each CHUNK_SIZE bytes to destination with three parameters:
|
||||
the Path object being copied, number of bytes written so far,
|
||||
total number of bytes needed.
|
||||
|
||||
Default = None
|
||||
|
||||
callback_validate_hash:
|
||||
Passed directly into `verify_hash`
|
||||
|
||||
|
@ -365,9 +352,9 @@ def copy_file(
|
|||
'''
|
||||
# Prepare parameters
|
||||
if not is_xor(destination, destination_new_root):
|
||||
m = 'One and only one of `destination` and '
|
||||
m += '`destination_new_root` can be passed'
|
||||
raise ValueError(m)
|
||||
message = 'One and only one of `destination` and '
|
||||
message += '`destination_new_root` can be passed'
|
||||
raise ValueError(message)
|
||||
|
||||
source = str_to_fp(source)
|
||||
|
||||
|
@ -379,10 +366,10 @@ def copy_file(
|
|||
destination = new_root(source, destination_new_root)
|
||||
destination = str_to_fp(destination)
|
||||
|
||||
callback = callback or do_nothing
|
||||
callback_progress = callback_progress or do_nothing
|
||||
|
||||
if destination.is_dir:
|
||||
raise DestinationIsDirectory(destination)
|
||||
destination = destination.with_child(source.basename)
|
||||
|
||||
bytes_per_second = limiter_or_none(bytes_per_second)
|
||||
|
||||
|
@ -397,8 +384,8 @@ def copy_file(
|
|||
|
||||
# Copy
|
||||
if dry_run:
|
||||
if callback is not None:
|
||||
callback(destination, 0, 0)
|
||||
if callback_progress is not None:
|
||||
callback_progress(destination, 0, 0)
|
||||
return [destination, 0]
|
||||
|
||||
source_bytes = source.size
|
||||
|
@ -435,7 +422,7 @@ def copy_file(
|
|||
if bytes_per_second is not None:
|
||||
bytes_per_second.limit(data_bytes)
|
||||
|
||||
callback(destination, written_bytes, source_bytes)
|
||||
callback_progress(destination, written_bytes, source_bytes)
|
||||
|
||||
# Fin
|
||||
log.debug('Closing source handle.')
|
||||
|
@ -539,7 +526,7 @@ def verify_hash(path, known_size, known_hash, callback=None):
|
|||
path object, bytes ingested so far, bytes total
|
||||
'''
|
||||
path = str_to_fp(path)
|
||||
log.debug('Validating hash for "%s" against %s' % (path.absolute_path, known_hash))
|
||||
log.debug('Validating hash for "%s" against %s', path.absolute_path, known_hash)
|
||||
file_size = os.path.getsize(path.absolute_path)
|
||||
if file_size != known_size:
|
||||
raise ValidationError('File size %d != known size %d' % (file_size, known_size))
|
||||
|
@ -565,6 +552,7 @@ def verify_hash(path, known_size, known_hash, callback=None):
|
|||
def walk_generator(
|
||||
path='.',
|
||||
callback_exclusion=None,
|
||||
callback_permission_denied=None,
|
||||
exclude_directories=None,
|
||||
exclude_filenames=None,
|
||||
recurse=True,
|
||||
|
@ -613,6 +601,7 @@ def walk_generator(
|
|||
exclude_filenames = set()
|
||||
|
||||
callback_exclusion = callback_exclusion or do_nothing
|
||||
callback_permission_denied = callback_permission_denied or do_nothing
|
||||
|
||||
exclude_filenames = {normalize(f) for f in exclude_filenames}
|
||||
exclude_directories = {normalize(f) for f in exclude_directories}
|
||||
|
@ -636,9 +625,14 @@ def walk_generator(
|
|||
# Thank you for your cooperation.
|
||||
while len(directory_queue) > 0:
|
||||
current_location = directory_queue.popleft()
|
||||
log.debug('listdir: %s' % current_location.absolute_path)
|
||||
contents = os.listdir(current_location.absolute_path)
|
||||
log.debug('received %d items' % len(contents))
|
||||
log.debug('listdir: %s', current_location.absolute_path)
|
||||
try:
|
||||
contents = os.listdir(current_location.absolute_path)
|
||||
except PermissionError as exception:
|
||||
callback_permission_denied(current_location, exception)
|
||||
continue
|
||||
|
||||
log.debug('received %d items', len(contents))
|
||||
|
||||
directories = []
|
||||
files = []
|
||||
|
|
15
Templates/changelog.md
Normal file
15
Templates/changelog.md
Normal file
|
@ -0,0 +1,15 @@
|
|||
### Changelog
|
||||
|
||||
- **[addition]** A new feature was added.
|
||||
- **[bugfix]** Incorrect behavior was fixed.
|
||||
- **[change]** An existing feature was slightly modified or parameters were renamed.
|
||||
- **[cleanup]** Code was improved, comments were added, or other changes with minor impact on the interface.
|
||||
- **[release]** A new version of the program has been released.
|
||||
- **[removal]** An old feature was removed.
|
||||
|
||||
|
||||
|
||||
- YYYY MM DD
|
||||
- **[addition]** Test
|
||||
- **[bugfix]** Ping Pong
|
||||
- **[removal]** Entries are grouped by label
|
|
@ -17,12 +17,13 @@ body, a
|
|||
</style>
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
{{header.make_header()}}
|
||||
<p>Welcome to my flask site</p>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
</html>
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
<p>test</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
</html>
|
||||
|
|
|
@ -3,16 +3,10 @@ import sys
|
|||
import threading
|
||||
import time
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Clipext')
|
||||
sys.path.append('C:\\git\\else\\Downloady')
|
||||
import clipext
|
||||
import downloady
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import bytestring
|
||||
from voussoirkit import pathclass
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from voussoirkit import clipext
|
||||
from voussoirkit import downloady
|
||||
|
||||
def remove_finished(threads):
|
||||
threads = [t for t in threads if t.is_alive()]
|
||||
|
|
|
@ -19,9 +19,10 @@
|
|||
16384 kbps | 2.000 MiB | 120.000 MiB | 3.516 GiB | 7.031 GiB | 10.547 GiB | 14.062 GiB
|
||||
'''
|
||||
import sys
|
||||
sys.path.append('C:\\git\\else\\bytestring')
|
||||
import bytestring
|
||||
import kbps
|
||||
|
||||
from voussoirkit import bytestring
|
||||
|
||||
times = ['01', '1:00', '30:00', '1:00:00', '1:30:00', '2:00:00']
|
||||
rates = [128, 256, 320, 500, 640, 738, 1024, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 2330.17, 4660.34]
|
||||
|
||||
|
@ -35,7 +36,7 @@ for r in rates:
|
|||
l = []
|
||||
l.append('%d kbps' % r)
|
||||
for t in times:
|
||||
l.append(bytestring.bytestring(kbps.calc(kbps.hms_s(t), r)))
|
||||
l.append(kbps.kbps(time=t, kbps=r))
|
||||
l = ' | '.join(l)
|
||||
table.append(l)
|
||||
|
||||
|
|
61
Toolbox/brename.py
Normal file
61
Toolbox/brename.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def brename(transformation):
|
||||
old = os.listdir()
|
||||
if 're.' in transformation:
|
||||
import re
|
||||
new = [eval(transformation) for x in old]
|
||||
pairs = []
|
||||
for (x, y) in zip(old, new):
|
||||
if x == y:
|
||||
continue
|
||||
pairs.append((x, y))
|
||||
if not loop(pairs, dry=True):
|
||||
print('Nothing to replace')
|
||||
return
|
||||
print('Is this correct? y/n')
|
||||
if input('>').lower() not in ('y', 'yes', 'yeehaw'):
|
||||
return
|
||||
loop(pairs, dry=False)
|
||||
|
||||
def longest_length(li):
|
||||
longest = 0
|
||||
for item in li:
|
||||
longest = max(longest, len(item))
|
||||
return longest
|
||||
|
||||
def loop(pairs, dry=False):
|
||||
has_content = False
|
||||
for (x, y) in pairs:
|
||||
if dry:
|
||||
line = '{old}\n{new}\n'
|
||||
line = line.format(old=x, new=y)
|
||||
#print(line.encode('utf-8'))
|
||||
print(line)
|
||||
has_content = True
|
||||
else:
|
||||
os.rename(x, y)
|
||||
return has_content
|
||||
|
||||
def title(text):
|
||||
(text, extension) = os.path.splitext(text)
|
||||
text = text.title()
|
||||
if ' ' in text:
|
||||
(first, rest) = text.split(' ', 1)
|
||||
else:
|
||||
(first, rest) = (text, '')
|
||||
rest = ' %s ' % rest
|
||||
for article in ['The', 'A', 'An', 'At', 'To', 'In', 'Of', 'From', 'And']:
|
||||
article = ' %s ' % article
|
||||
rest = rest.replace(article, article.lower())
|
||||
rest = rest.strip()
|
||||
if rest != '':
|
||||
rest = ' ' + rest
|
||||
text = first + rest + extension
|
||||
return text
|
||||
|
||||
if __name__ == '__main__':
|
||||
transformation = sys.argv[1]
|
||||
brename(transformation)
|
7
Toolbox/breplace.py
Normal file
7
Toolbox/breplace.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
import brename
|
||||
import sys
|
||||
|
||||
replace_from = sys.argv[1]
|
||||
replace_to = sys.argv[2]
|
||||
command = 'x.replace("{f}", "{t}")'.format(f=replace_from, t=replace_to)
|
||||
brename.brename(command)
|
|
@ -3,7 +3,7 @@ import sys
|
|||
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
sys.path.append('C:\\git\\else\\Clipext'); import clipext
|
||||
from voussoirkit import clipext
|
||||
stuff = clipext.resolve(sys.argv[1])
|
||||
pyperclip.copy(stuff)
|
||||
else:
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append('C:\\git\\else\\spinaltap')
|
||||
import spinal
|
||||
from voussoirkit import spinal
|
||||
|
||||
def main():
|
||||
files = list(spinal.walk_generator())
|
||||
|
|
|
@ -2,7 +2,8 @@ import fnmatch
|
|||
import glob
|
||||
import re
|
||||
import sys
|
||||
sys.path.append('C:\\git\\else\\spinaltap'); import spinal
|
||||
|
||||
from voussoirkit import spinal
|
||||
|
||||
filepattern = sys.argv[1]
|
||||
searchpattern = sys.argv[2]
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
import argparse
|
||||
import sys
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Bytestring')
|
||||
import bytestring
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from vousoirkit import bytestring
|
||||
|
||||
from voussoirkit import bytestring
|
||||
|
||||
def hms_s(hms):
|
||||
hms = hms.split(':')
|
||||
|
@ -20,13 +16,50 @@ def hms_s(hms):
|
|||
seconds += int(hms[0])
|
||||
return seconds
|
||||
|
||||
def calc(seconds, kbps):
|
||||
final_kilobits = kbps * seconds
|
||||
final_bytes = final_kilobits * 128
|
||||
return final_bytes
|
||||
def s_hms(s):
|
||||
(minutes, seconds) = divmod(s, 60)
|
||||
(hours, minutes) = divmod(minutes, 60)
|
||||
return '%02d:%02d:%02d' % (hours, minutes, seconds)
|
||||
|
||||
def kbps(time=None, size=None, kbps=None):
|
||||
if [time, size, kbps].count(None) != 1:
|
||||
raise ValueError('Incorrect number of unknowns')
|
||||
|
||||
if size is None:
|
||||
seconds = hms_s(time)
|
||||
kibs = int(kbps) / 8
|
||||
size = kibs * 1024
|
||||
size *= seconds
|
||||
out = bytestring.bytestring(size)
|
||||
return out
|
||||
|
||||
if time is None:
|
||||
size = bytestring.parsebytes(size)
|
||||
kilobits = size / 128
|
||||
time = kilobits / int(kbps)
|
||||
return s_hms(time)
|
||||
|
||||
if kbps is None:
|
||||
seconds = hms_s(time)
|
||||
size = bytestring.parsebytes(size)
|
||||
kibs = size / 1024
|
||||
kilobits = kibs * 8
|
||||
kbps = kilobits / seconds
|
||||
return int(kbps)
|
||||
|
||||
def example_argparse(args):
|
||||
print(kbps(time=args.time, size=args.size, kbps=args.kbps))
|
||||
|
||||
def main(argv):
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('-t', '--time', dest='time', default=None)
|
||||
parser.add_argument('-s', '--size', dest='size', default=None)
|
||||
parser.add_argument('-k', '--kbps', dest='kbps', default=None)
|
||||
parser.set_defaults(func=example_argparse)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
args.func(args)
|
||||
|
||||
if __name__ == '__main__':
|
||||
length = sys.argv[1] # HH:MM:SS
|
||||
kbps = int(sys.argv[2])
|
||||
seconds = hms_s(length)
|
||||
print(bytestring.bytestring(calc(seconds, kbps)))
|
||||
main(sys.argv[1:])
|
||||
|
|
|
@ -1,12 +1,8 @@
|
|||
import sys
|
||||
|
||||
try:
|
||||
sys.path.append('C:\\git\\else\\Bytestring')
|
||||
import bytestring
|
||||
except ImportError:
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from vousoirkit import bytestring
|
||||
# pip install
|
||||
# https://raw.githubusercontent.com/voussoir/else/master/_voussoirkit/voussoirkit.zip
|
||||
from vousoirkit import bytestring
|
||||
|
||||
def hms_s(hms):
|
||||
hms = hms.split(':')
|
||||
|
|
|
@ -9,8 +9,7 @@ import sys
|
|||
PIL.ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
|
||||
if '/r' in sys.argv:
|
||||
sys.path.append('C:\\git\\else\\spinaltap')
|
||||
import spinal
|
||||
from voussoirkit import spinal
|
||||
walker = spinal.walk_generator()
|
||||
files = list(walker)
|
||||
files = [f.absolute_path for f in files]
|
||||
|
|
66
Toolbox/subtitle_shift.py
Normal file
66
Toolbox/subtitle_shift.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
'''
|
||||
Usage:
|
||||
|
||||
Shift all subtitles 10 seconds forward:
|
||||
> subtitle_shift file.srt +10
|
||||
|
||||
Shift all subtitles 10 seconds backward:
|
||||
> subtitle_shift file.srt -10
|
||||
|
||||
This will produce "file_correct.srt" with the new timestamps.
|
||||
'''
|
||||
|
||||
import os
|
||||
import sys
|
||||
filename = sys.argv[1]
|
||||
offset = float(sys.argv[2])
|
||||
f = open(filename, 'r')
|
||||
|
||||
lines = [l.strip() for l in f.readlines()]
|
||||
for (lineindex, line) in enumerate(lines):
|
||||
changed = False
|
||||
|
||||
if '-->' not in line:
|
||||
continue
|
||||
|
||||
words = line.split(' ')
|
||||
for (wordindex, word) in enumerate(words):
|
||||
if not (':' in word and ',' in word):
|
||||
continue
|
||||
|
||||
if not word.replace(':', '').replace(',', '').isdigit():
|
||||
continue
|
||||
|
||||
# 1.) 01:23:45,678 --> 02:34:56,789 | our input
|
||||
# 2.) 01:23:45:678 --> 02:34:56:789 | comma to colon
|
||||
# 3.) 5025.678 --> 9296.789 | split by colon and sum
|
||||
# 4.) 5035.678 --> 9306.789 | add offset
|
||||
# 5.) 01:23:55.678 --> 02:35:06.789 | reformat
|
||||
# 6.) 01:23:55,678 --> 02:35:06,789 | period to comma
|
||||
word = word.replace(',', ':')
|
||||
(hours, minutes, seconds, mili) = [int(x) for x in word.split(':')]
|
||||
seconds = (3600 * hours) + (60 * minutes) + (seconds) + (mili / 1000)
|
||||
|
||||
seconds += offset
|
||||
(hours, seconds) = divmod(seconds, 3600)
|
||||
(minutes, seconds) = divmod(seconds, 60)
|
||||
|
||||
if hours < 0:
|
||||
raise Exception('Negative time')
|
||||
|
||||
word = '%02d:%02d:%06.3f' % (hours, minutes, seconds)
|
||||
word = word.replace('.', ',')
|
||||
changed = True
|
||||
words[wordindex] = word
|
||||
|
||||
if changed:
|
||||
line = ' '.join(words)
|
||||
print(line)
|
||||
lines[lineindex] = line
|
||||
|
||||
lines = '\n'.join(lines)
|
||||
(name, extension) = os.path.splitext(filename)
|
||||
newname = name + '_correct' + extension
|
||||
x = open(newname, 'w')
|
||||
x.write(lines)
|
||||
x.close()
|
|
@ -1,32 +0,0 @@
|
|||
body
|
||||
{
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background-color:#fff;
|
||||
margin: 8px;
|
||||
}
|
||||
#header
|
||||
{
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
justify-content: center;
|
||||
align-content: center;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.header_element
|
||||
{
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
flex: 1;
|
||||
background-color: rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.header_element:hover
|
||||
{
|
||||
background-color: rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
#content_body
|
||||
{
|
||||
flex: 0 0 auto;
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
function post_example(key, value, callback)
|
||||
{
|
||||
var url = "/postexample";
|
||||
data = new FormData();
|
||||
data.append(key, value);
|
||||
return post(url, data, callback);
|
||||
}
|
||||
|
||||
function null_callback()
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
function post(url, data, callback)
|
||||
{
|
||||
var request = new XMLHttpRequest();
|
||||
request.answer = null;
|
||||
request.onreadystatechange = function()
|
||||
{
|
||||
if (request.readyState == 4)
|
||||
{
|
||||
var text = request.responseText;
|
||||
if (callback != null)
|
||||
{
|
||||
console.log(text);
|
||||
callback(JSON.parse(text));
|
||||
}
|
||||
}
|
||||
};
|
||||
var asynchronous = true;
|
||||
request.open("POST", url, asynchronous);
|
||||
request.send(data);
|
||||
}
|
||||
|
||||
function bind_box_to_button(box, button)
|
||||
{
|
||||
box.onkeydown=function()
|
||||
{
|
||||
if (event.keyCode == 13)
|
||||
{
|
||||
button.click();
|
||||
}
|
||||
};
|
||||
}
|
||||
function entry_with_history_hook(box, button)
|
||||
{
|
||||
//console.log(event.keyCode);
|
||||
if (box.entry_history === undefined)
|
||||
{box.entry_history = [];}
|
||||
if (box.entry_history_pos === undefined)
|
||||
{box.entry_history_pos = -1;}
|
||||
if (event.keyCode == 13)
|
||||
{
|
||||
/* Enter */
|
||||
box.entry_history.push(box.value);
|
||||
button.click();
|
||||
box.value = "";
|
||||
}
|
||||
else if (event.keyCode == 38)
|
||||
{
|
||||
|
||||
/* Up arrow */
|
||||
if (box.entry_history.length == 0)
|
||||
{return}
|
||||
if (box.entry_history_pos == -1)
|
||||
{
|
||||
box.entry_history_pos = box.entry_history.length - 1;
|
||||
}
|
||||
else if (box.entry_history_pos > 0)
|
||||
{
|
||||
box.entry_history_pos -= 1;
|
||||
}
|
||||
box.value = box.entry_history[box.entry_history_pos];
|
||||
}
|
||||
else if (event.keyCode == 27)
|
||||
{
|
||||
box.value = "";
|
||||
}
|
||||
else
|
||||
{
|
||||
box.entry_history_pos = -1;
|
||||
}
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 448 B |
|
@ -1,145 +0,0 @@
|
|||
<!DOCTYPE html5>
|
||||
<html>
|
||||
<head>
|
||||
{% import "header.html" as header %}
|
||||
<title>{{channel['name']}}</title>
|
||||
<meta charset="UTF-8">
|
||||
<link rel="stylesheet" href="/static/common.css">
|
||||
<script src="/static/common.js"></script>
|
||||
|
||||
<style>
|
||||
#content_body
|
||||
{
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.video_card_downloaded,
|
||||
.video_card_ignored,
|
||||
.video_card_pending
|
||||
{
|
||||
position: relative;
|
||||
margin: 8px;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #000;
|
||||
}
|
||||
.video_card_pending
|
||||
{
|
||||
background-color: #ffffaa;
|
||||
}
|
||||
.video_card_ignored
|
||||
{
|
||||
background-color: #ffc886;
|
||||
}
|
||||
.video_card_downloaded
|
||||
{
|
||||
background-color: #aaffaa;
|
||||
}
|
||||
|
||||
.action_toolbox
|
||||
{
|
||||
float: right;
|
||||
display: inline-flex;
|
||||
flex-direction: column;
|
||||
position: relative;
|
||||
}
|
||||
.video_action_dropdown
|
||||
{
|
||||
z-index: 1;
|
||||
background-color: #fff;
|
||||
padding: 4px;
|
||||
border: 1px solid #000;
|
||||
position: absolute;
|
||||
top: 100%;
|
||||
right: 0;
|
||||
display: none;
|
||||
flex-direction: column;
|
||||
}
|
||||
.refresh_button
|
||||
{
|
||||
width: 10%;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
{{header.make_header()}}
|
||||
<div id="content_body">
|
||||
<button class="refresh_button" onclick="refresh_channel('{{channel['id']}}', false, null_callback)">Refresh new videos</button>
|
||||
<button class="refresh_button" onclick="refresh_channel('{{channel['id']}}', true, null_callback)">Refresh everything</button>
|
||||
<span><a href="/channel/{{channel['id']}}">All</a></span>
|
||||
<span><a href="/channel/{{channel['id']}}/pending">Pending</a></span>
|
||||
<span><a href="/channel/{{channel['id']}}/ignored">Ignored</a></span>
|
||||
<span><a href="/channel/{{channel['id']}}/downloaded">Downloaded</a></span>
|
||||
{% for video in videos %}
|
||||
|
||||
{% if video['download'] == "downloaded" %}
|
||||
<div class="video_card_downloaded">
|
||||
{% elif video['download'] == "ignored" %}
|
||||
<div class="video_card_ignored">
|
||||
{% else %}
|
||||
<div class="video_card_pending">
|
||||
{% endif %}
|
||||
<a href="https://www.youtube.com/watch?v={{video['id']}}">{{video['title']}}</a>
|
||||
<div class="action_toolbox">
|
||||
<button class="video_action_dropdown_toggle" onclick="toggle_dropdown(this.nextElementSibling)">Actions</button>
|
||||
<div class="video_action_dropdown">
|
||||
{% if video['download'] == "downloaded" %}
|
||||
<button class="video_action_ignore" onclick="mark_video_state('{{video['id']}}', 'pending', null_callback); toggle_dropdown(this.parentElement);">Revert to Pending</button>
|
||||
{% elif video['download'] == "ignored" %}
|
||||
<button class="video_action_ignore" onclick="mark_video_state('{{video['id']}}', 'pending', null_callback); toggle_dropdown(this.parentElement);">Revert to Pending</button>
|
||||
{% else %}
|
||||
<button class="video_action_download" onclick="start_download('{{video['id']}}', null_callback); toggle_dropdown(this.parentElement);">Download</button>
|
||||
<button class="video_action_ignore" onclick="mark_video_state('{{video['id']}}', 'ignored', null_callback); toggle_dropdown(this.parentElement);">Ignore</button>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
function refresh_channel(channel_id, force, callback)
|
||||
{
|
||||
var url = "/refresh_channel";
|
||||
data = new FormData();
|
||||
data.append("channel_id", channel_id);
|
||||
data.append("force", force)
|
||||
return post(url, data, callback);
|
||||
}
|
||||
|
||||
function mark_video_state(video_id, state, callback)
|
||||
{
|
||||
var url = "/mark_video_state";
|
||||
data = new FormData();
|
||||
data.append("video_id", video_id);
|
||||
data.append("state", state);
|
||||
return post(url, data, callback);
|
||||
}
|
||||
function start_download(video_id, callback)
|
||||
{
|
||||
var url = "/start_download";
|
||||
data = new FormData();
|
||||
data.append("video_id", video_id);
|
||||
return post(url, data, callback);
|
||||
}
|
||||
|
||||
|
||||
function toggle_dropdown(dropdown)
|
||||
{
|
||||
if (dropdown.style.display != "inline-flex")
|
||||
{
|
||||
dropdown.style.display = "inline-flex";
|
||||
}
|
||||
else
|
||||
{
|
||||
dropdown.style.display = "none";
|
||||
}
|
||||
|
||||
}
|
||||
</script>
|
|
@ -1,67 +0,0 @@
|
|||
<!DOCTYPE html5>
|
||||
<html>
|
||||
<head>
|
||||
{% import "header.html" as header %}
|
||||
<title>Channels</title>
|
||||
<meta charset="UTF-8">
|
||||
<link rel="stylesheet" href="/static/common.css">
|
||||
<script src="/static/common.js"></script>
|
||||
|
||||
<style>
|
||||
#content_body
|
||||
{
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
.channel_card_downloaded,
|
||||
.channel_card_pending
|
||||
{
|
||||
margin: 8px;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #000;
|
||||
}
|
||||
.channel_card_pending
|
||||
{
|
||||
background-color: #ffffaa;
|
||||
}
|
||||
.channel_card_downloaded
|
||||
{
|
||||
background-color: #aaffaa;
|
||||
}
|
||||
.refresh_button
|
||||
{
|
||||
width: 10%;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
{{header.make_header()}}
|
||||
<div id="content_body">
|
||||
<button class="refresh_button" onclick="refresh_all_channels(false)">Refresh new videos</button>
|
||||
<button class="refresh_button" onclick="refresh_all_channels(true)">Refresh everything</button>
|
||||
{% for channel in channels %}
|
||||
{% if channel['has_pending'] %}
|
||||
<div class="channel_card_pending">
|
||||
{% else %}
|
||||
<div class="channel_card_downloaded">
|
||||
{% endif %}
|
||||
<a href="/channel/{{channel['id']}}">{{channel['name']}}</a>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
function refresh_all_channels(force)
|
||||
{
|
||||
var url = "/refresh_all_channels";
|
||||
data = new FormData();
|
||||
data.append("force", force)
|
||||
return post(url, data, null_callback);
|
||||
}
|
||||
</script>
|
|
@ -1,6 +0,0 @@
|
|||
{% macro make_header() %}
|
||||
<div id="header">
|
||||
<a class="header_element" href="/">Home</a>
|
||||
<a class="header_element" href="/channels">Channels</a>
|
||||
</div>
|
||||
{% endmacro %}
|
|
@ -1,28 +0,0 @@
|
|||
<!DOCTYPE html5>
|
||||
<html>
|
||||
|
||||
<style>
|
||||
body, a
|
||||
{
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
</style>
|
||||
|
||||
<head>
|
||||
{% import "header.html" as header %}
|
||||
<title>Flasksite</title>
|
||||
<meta charset="UTF-8">
|
||||
<link rel="stylesheet" href="/static/common.css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<a href='/channels'>Manage channels</a>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
<script type="text/javascript">
|
||||
</script>
|
|
@ -1,214 +0,0 @@
|
|||
import os
|
||||
import sqlite3
|
||||
import ytapi
|
||||
|
||||
# AVAILABLE FORMATTERS:
|
||||
# url, id
|
||||
YOUTUBE_DL_COMMAND = 'touch {id}.ytqueue'
|
||||
|
||||
SQL_CHANNEL_COLUMNS = [
|
||||
'id',
|
||||
'name',
|
||||
'directory',
|
||||
]
|
||||
|
||||
SQL_VIDEO_COLUMNS = [
|
||||
'id',
|
||||
'published',
|
||||
'author_id',
|
||||
'title',
|
||||
'description',
|
||||
'thumbnail',
|
||||
'download',
|
||||
]
|
||||
|
||||
SQL_CHANNEL = {key:index for (index, key) in enumerate(SQL_CHANNEL_COLUMNS)}
|
||||
SQL_VIDEO = {key:index for (index, key) in enumerate(SQL_VIDEO_COLUMNS)}
|
||||
|
||||
DATABASE_VERSION = 1
|
||||
DB_INIT = '''
|
||||
PRAGMA count_changes = OFF;
|
||||
PRAGMA cache_size = 10000;
|
||||
PRAGMA user_version = {user_version};
|
||||
CREATE TABLE IF NOT EXISTS channels(
|
||||
id TEXT,
|
||||
name TEXT,
|
||||
directory TEXT COLLATE NOCASE
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS videos(
|
||||
id TEXT,
|
||||
published INT,
|
||||
author_id TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
thumbnail TEXT,
|
||||
download TEXT
|
||||
);
|
||||
|
||||
|
||||
CREATE INDEX IF NOT EXISTS index_channel_id on channels(id);
|
||||
CREATE INDEX IF NOT EXISTS index_video_id on videos(id);
|
||||
CREATE INDEX IF NOT EXISTS index_video_published on videos(published);
|
||||
CREATE INDEX IF NOT EXISTS index_video_download on videos(download);
|
||||
|
||||
'''.format(user_version=DATABASE_VERSION)
|
||||
|
||||
DEFAULT_DBNAME = 'ycdl.db'
|
||||
|
||||
ERROR_DATABASE_OUTOFDATE = 'Database is out-of-date. {current} should be {new}'
|
||||
|
||||
def verify_is_abspath(path):
|
||||
'''
|
||||
TO DO: Determine whether this is actually correct.
|
||||
'''
|
||||
if os.path.abspath(path) != path:
|
||||
raise ValueError('Not an abspath')
|
||||
|
||||
class YCDL:
|
||||
def __init__(self, youtube, database_filename=None):
|
||||
self.youtube = youtube
|
||||
if database_filename is None:
|
||||
database_filename = DEFAULT_DBNAME
|
||||
|
||||
existing_database = os.path.exists(database_filename)
|
||||
self.sql = sqlite3.connect(database_filename)
|
||||
self.cur = self.sql.cursor()
|
||||
|
||||
if existing_database:
|
||||
self.cur.execute('PRAGMA user_version')
|
||||
existing_version = self.cur.fetchone()[0]
|
||||
if existing_version != DATABASE_VERSION:
|
||||
message = ERROR_DATABASE_OUTOFDATE
|
||||
message = message.format(current=existing_version, new=DATABASE_VERSION)
|
||||
print(message)
|
||||
raise SystemExit
|
||||
|
||||
statements = DB_INIT.split(';')
|
||||
for statement in statements:
|
||||
self.cur.execute(statement)
|
||||
|
||||
def add_channel(self, channel_id, name=None, download_directory=None, get_videos=True, commit=False):
|
||||
if self.get_channel(channel_id) is not None:
|
||||
return
|
||||
|
||||
if name is None:
|
||||
name = self.youtube.get_user_name(channel_id)
|
||||
|
||||
data = [None] * len(SQL_CHANNEL)
|
||||
data[SQL_CHANNEL['id']] = channel_id
|
||||
data[SQL_CHANNEL['name']] = name
|
||||
if download_directory is not None:
|
||||
verify_is_abspath(download_directory)
|
||||
data[SQL_CHANNEL['directory']] = download_directory
|
||||
|
||||
self.cur.execute('INSERT INTO channels VALUES(?, ?, ?)', data)
|
||||
if get_videos:
|
||||
self.refresh_channel(channel_id, commit=False)
|
||||
if commit:
|
||||
self.sql.commit()
|
||||
|
||||
def channel_has_pending(self, channel_id):
|
||||
self.cur.execute('SELECT * FROM videos WHERE author_id == ? AND download == "pending"', [channel_id])
|
||||
return self.cur.fetchone() is not None
|
||||
|
||||
def channel_directory(self, channel_id):
|
||||
self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id])
|
||||
fetch = self.cur.fetchone()
|
||||
if fetch is None:
|
||||
return None
|
||||
return fetch[SQL_CHANNEL['directory']]
|
||||
|
||||
def download_video(self, video, force=False):
|
||||
if not isinstance(video, ytapi.Video):
|
||||
video = self.youtube.get_video(video)
|
||||
|
||||
self.add_channel(video.author_id, get_videos=False, commit=False)
|
||||
status = self.insert_video(video, commit=True)
|
||||
|
||||
if status['row'][SQL_VIDEO['download']] != 'pending' and not force:
|
||||
print('That video does not need to be downloaded.')
|
||||
return
|
||||
|
||||
download_directory = self.channel_directory(video.author_id)
|
||||
download_directory = download_directory or os.getcwd()
|
||||
|
||||
current_directory = os.getcwd()
|
||||
os.makedirs(download_directory, exist_ok=True)
|
||||
os.chdir(download_directory)
|
||||
url = 'https://www.youtube.com/watch?v={id}'.format(id=video.id)
|
||||
command = YOUTUBE_DL_COMMAND.format(url=url, id=video.id)
|
||||
os.system(command)
|
||||
os.chdir(current_directory)
|
||||
|
||||
self.cur.execute('UPDATE videos SET download = "downloaded" WHERE id == ?', [video.id])
|
||||
self.sql.commit()
|
||||
|
||||
def get_channel(self, channel_id):
|
||||
self.cur.execute('SELECT * FROM channels WHERE id == ?', [channel_id])
|
||||
fetch = self.cur.fetchone()
|
||||
if not fetch:
|
||||
return None
|
||||
fetch = {key: fetch[SQL_CHANNEL[key]] for key in SQL_CHANNEL}
|
||||
return fetch
|
||||
|
||||
def get_channels(self):
|
||||
self.cur.execute('SELECT * FROM channels')
|
||||
channels = self.cur.fetchall()
|
||||
channels = [{key: channel[SQL_CHANNEL[key]] for key in SQL_CHANNEL} for channel in channels]
|
||||
channels.sort(key=lambda x: x['name'].lower())
|
||||
return channels
|
||||
|
||||
def get_videos_by_channel(self, channel_id):
|
||||
self.cur.execute('SELECT * FROM videos WHERE author_id == ?', [channel_id])
|
||||
videos = self.cur.fetchall()
|
||||
if not videos:
|
||||
return []
|
||||
videos = [{key: video[SQL_VIDEO[key]] for key in SQL_VIDEO} for video in videos]
|
||||
videos.sort(key=lambda x: x['published'], reverse=True)
|
||||
return videos
|
||||
|
||||
def mark_video_state(self, video_id, state, commit=True):
|
||||
'''
|
||||
Mark the video as ignored, pending, or downloaded.
|
||||
'''
|
||||
if state not in ['ignored', 'pending', 'downloaded']:
|
||||
raise ValueError(state)
|
||||
self.cur.execute('SELECT * FROM videos WHERE id == ?', [video_id])
|
||||
if self.cur.fetchone() is None:
|
||||
raise KeyError(video_id)
|
||||
self.cur.execute('UPDATE videos SET download = ? WHERE id == ?', [state, video_id])
|
||||
if commit:
|
||||
self.sql.commit()
|
||||
|
||||
def refresh_channel(self, channel_id, force=True, commit=True):
|
||||
video_generator = self.youtube.get_user_videos(uid=channel_id)
|
||||
for video in video_generator:
|
||||
status = self.insert_video(video, commit=False)
|
||||
if not force and not status['new']:
|
||||
break
|
||||
if commit:
|
||||
self.sql.commit()
|
||||
|
||||
def insert_video(self, video, commit=True):
|
||||
if not isinstance(video, ytapi.Video):
|
||||
video = self.youtube.get_video(video)
|
||||
|
||||
self.add_channel(video.author_id, get_videos=False, commit=False)
|
||||
self.cur.execute('SELECT * FROM videos WHERE id == ?', [video.id])
|
||||
fetch = self.cur.fetchone()
|
||||
if fetch is not None:
|
||||
return {'new': False, 'row': fetch}
|
||||
|
||||
data = [None] * len(SQL_VIDEO)
|
||||
data[SQL_VIDEO['id']] = video.id
|
||||
data[SQL_VIDEO['published']] = video.published
|
||||
data[SQL_VIDEO['author_id']] = video.author_id
|
||||
data[SQL_VIDEO['title']] = video.title
|
||||
data[SQL_VIDEO['description']] = video.description
|
||||
data[SQL_VIDEO['thumbnail']] = video.thumbnail['url']
|
||||
data[SQL_VIDEO['download']] = 'pending'
|
||||
|
||||
self.cur.execute('INSERT INTO videos VALUES(?, ?, ?, ?, ?, ?, ?)', data)
|
||||
if commit:
|
||||
self.sql.commit()
|
||||
return {'new': True, 'row': data}
|
|
@ -1,6 +0,0 @@
|
|||
import ytapi
|
||||
import ycdl
|
||||
import bot
|
||||
|
||||
youtube_core = ytapi.Youtube(bot.YOUTUBE_KEY)
|
||||
youtube = ycdl.YCDL(youtube_core)
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue