else
This commit is contained in:
parent
9dd6bc8c02
commit
80966b7fb8
66 changed files with 13955 additions and 50 deletions
54
.gitignore
vendored
54
.gitignore
vendored
|
@ -1,34 +1,36 @@
|
|||
Classifieds/
|
||||
|
||||
# Windows image file caches
|
||||
Thumbs.db
|
||||
ehthumbs.db
|
||||
|
||||
# Folder config file
|
||||
Desktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# =========================
|
||||
# Operating System Files
|
||||
# =========================
|
||||
|
||||
# OSX
|
||||
# =========================
|
||||
|
||||
AwfulCrateBox/
|
||||
Classifieds/
|
||||
|
||||
# Windows image file caches
|
||||
Thumbs.db
|
||||
ehthumbs.db
|
||||
|
||||
# Folder config file
|
||||
Desktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# =========================
|
||||
# Operating System Files
|
||||
# =========================
|
||||
|
||||
# OSX
|
||||
# =========================
|
||||
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
Icon
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
|
11
AHK/rapid_t.ahk
Normal file
11
AHK/rapid_t.ahk
Normal file
|
@ -0,0 +1,11 @@
|
|||
#NoEnv ; Recommended for performance and compatibility with future AutoHotkey releases.
|
||||
SendMode Input ; Recommended for new scripts due to its superior speed and reliability.
|
||||
SetWorkingDir %A_ScriptDir% ; Ensures a consistent starting directory.
|
||||
|
||||
+T::
|
||||
While GetKeyState("t", "P")
|
||||
{
|
||||
Click WheelDown
|
||||
Sleep 20
|
||||
}
|
||||
Return
|
|
@ -3,6 +3,7 @@ import sys
|
|||
|
||||
imagename = sys.argv[1]
|
||||
image = Image.open(imagename)
|
||||
image = image.convert('RGBA')
|
||||
w = image.size[0] - 1
|
||||
h = image.size[1] - 1
|
||||
for i in range(5):
|
||||
|
|
|
@ -64,7 +64,8 @@
|
|||
import binascii
|
||||
import sys
|
||||
from PIL import Image
|
||||
|
||||
import os
|
||||
print(os.getcwd())
|
||||
try:
|
||||
INPUTFILE = sys.argv[1]
|
||||
except:
|
||||
|
|
|
@ -33,7 +33,7 @@ HELP_SENTENCE = '''
|
|||
---------------------------------------------------------------
|
||||
'''[1:-1] % (DEFAULT_SENTENCE)
|
||||
|
||||
def make_password(length=None, allowpunctuation=False, allowdigits=False):
|
||||
def make_password(length=None, allowpunctuation=False, allowdigits=False, digits_only=False, binary=False):
|
||||
'''
|
||||
Returns a string of length `length` consisting of a random selection
|
||||
of uppercase and lowercase letters, as well as punctuation and digits
|
||||
|
@ -41,12 +41,17 @@ def make_password(length=None, allowpunctuation=False, allowdigits=False):
|
|||
'''
|
||||
if length is None:
|
||||
length = DEFAULT_LENGTH
|
||||
|
||||
s = string.ascii_letters
|
||||
if allowpunctuation is True:
|
||||
s += string.punctuation
|
||||
if allowdigits is True:
|
||||
s += string.digits
|
||||
|
||||
if digits_only is False and binary is False:
|
||||
s = string.ascii_letters
|
||||
if allowpunctuation is True:
|
||||
s += string.punctuation
|
||||
if allowdigits is True:
|
||||
s += string.digits
|
||||
elif digits_only:
|
||||
s = string.digits
|
||||
elif binary:
|
||||
s = '01'
|
||||
|
||||
password = ''.join([random.choice(s) for x in range(length)])
|
||||
return password
|
||||
|
@ -100,7 +105,9 @@ if __name__ == '__main__':
|
|||
if mode == 'password':
|
||||
punc = 'p' in args
|
||||
digi = 'd' in args
|
||||
print(make_password(length, punc, digi))
|
||||
digi_only = 'dd' in args
|
||||
binary = 'b' in args
|
||||
print(make_password(length, punc, digi, digi_only, binary))
|
||||
|
||||
elif mode == 'sentence':
|
||||
if argc == 3:
|
||||
|
|
|
@ -16,7 +16,7 @@ def quadratic_formula(a, b, c):
|
|||
discriminant = math.sqrt(discriminant)
|
||||
b *= -1
|
||||
possible = (b + discriminant, b - discriminant)
|
||||
possible = [x / (2*a) for x in possible]
|
||||
possible = tuple(x / (2*a) for x in possible)
|
||||
return possible
|
||||
|
||||
def time_to_known_distance(velocity, distance, acceleration):
|
||||
|
@ -29,9 +29,11 @@ def time_to_known_distance(velocity, distance, acceleration):
|
|||
return min(possible)
|
||||
|
||||
def make_throw(starting_x, starting_y, starting_velocity, thrown_angle):
|
||||
# We don't track smallest_y because it's going to be 0!
|
||||
global smallest_x
|
||||
global largest_x
|
||||
global largest_y
|
||||
|
||||
upward = thrown_angle in range(1, 179, 1) or thrown_angle in range(-181, -359, -1)
|
||||
upward = -1 if upward else 1
|
||||
|
||||
|
@ -40,13 +42,6 @@ def make_throw(starting_x, starting_y, starting_velocity, thrown_angle):
|
|||
cos = math.cos(rads)
|
||||
tan = math.tan(rads)
|
||||
|
||||
throw = {'angle': thrown_angle}
|
||||
throw['horizontal_component'] = starting_velocity * cos * -upward
|
||||
throw['vertical_component'] = starting_velocity * sin * upward
|
||||
#print(thrown_angle, starting_velocity, throw['horizontal_component'])
|
||||
throw['hang_time'] = time_to_known_distance(throw['vertical_component'], starting_y, acceleration=9.8)
|
||||
throw['distance'] = throw['hang_time'] * throw['horizontal_component']
|
||||
|
||||
def parabola(x):
|
||||
# 100% credit goes to wikipedia authors
|
||||
# https://en.wikipedia.org/wiki/Projectile_motion#Parabolic_equation
|
||||
|
@ -56,15 +51,19 @@ def make_throw(starting_x, starting_y, starting_velocity, thrown_angle):
|
|||
y = left - (numerator / denominator)
|
||||
return y
|
||||
|
||||
throw = {'angle': thrown_angle}
|
||||
throw['parabola'] = parabola
|
||||
throw['horizontal_component'] = starting_velocity * cos * -upward
|
||||
throw['vertical_component'] = starting_velocity * sin * upward
|
||||
throw['hang_time'] = time_to_known_distance(throw['vertical_component'], starting_y, acceleration=9.8)
|
||||
throw['distance'] = throw['hang_time'] * throw['horizontal_component']
|
||||
throw['parabola_points'] = []
|
||||
#print(throw['vertical_component'], throw['hang_time'])
|
||||
|
||||
y = 1
|
||||
x = starting_x
|
||||
backwards = (thrown_angle in range(90, 270)) or (thrown_angle in range(-90, -270, -1))
|
||||
while y > 0:
|
||||
y = throw['parabola'](x) + starting_y
|
||||
y = parabola(x) + starting_y
|
||||
if y < 0:
|
||||
# To keep a smooth floor of 0, rescale the active x so that
|
||||
# it looks like it continues in the right direction underground.
|
||||
|
@ -77,7 +76,7 @@ def make_throw(starting_x, starting_y, starting_velocity, thrown_angle):
|
|||
if (smallest_x is None or x < smallest_x): smallest_x = math.floor(x)
|
||||
if (largest_x is None or x > largest_x): largest_x = math.ceil(x)
|
||||
if (largest_y is None or y > largest_y): largest_y = math.ceil(y)
|
||||
throw['parabola_points'].append([int(x), int(y)])
|
||||
throw['parabola_points'].append((int(x), int(y)))
|
||||
if backwards:
|
||||
x -= PLOT_STEP_X
|
||||
else:
|
||||
|
@ -141,12 +140,12 @@ for (index, t) in enumerate(throws):
|
|||
point_a = None
|
||||
for pointindex in range(len(t['parabola_points']) - 1):
|
||||
if point_a is None:
|
||||
point_a = t['parabola_points'][pointindex][:]
|
||||
point_a = list(t['parabola_points'][pointindex])
|
||||
point_a[0] = (round(point_a[0])) + abs(smallest_x) + PLOT_PAD_LEFT
|
||||
point_a[1] = (largest_y - round(point_a[1]))
|
||||
else:
|
||||
point_a = point_b
|
||||
point_b = t['parabola_points'][pointindex + 1][:]
|
||||
point_b = list(t['parabola_points'][pointindex + 1])
|
||||
point_b[0] = (round(point_b[0])) + abs(smallest_x) + PLOT_PAD_LEFT
|
||||
point_b[1] = (largest_y - round(point_b[1]))
|
||||
try:
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:889e3073049ed9d4b0dff0414ab769fbd9cac821177446ef9b5df08d34c8e2f5
|
||||
size 8042
|
||||
oid sha256:67e8ae6342a582acff7c6fa0904f24c5265a0bb666b132cb505dc56728bfedf9
|
||||
size 8054
|
||||
|
|
10
SnudownTest/.gitignore
vendored
Normal file
10
SnudownTest/.gitignore
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
build/
|
||||
dist/
|
||||
snudown.egg-info/
|
||||
src/html_entities.h
|
||||
*.pyc
|
||||
*.so
|
||||
*.so.*
|
||||
*.o
|
||||
/fuzzing/bin
|
||||
/fuzzing/testing
|
4
SnudownTest/.gitmodules
vendored
Normal file
4
SnudownTest/.gitmodules
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
[submodule "gumbo_snudown"]
|
||||
path = fuzzing/gumbo_snudown
|
||||
url = git@github.com:JordanMilne/gumbo-parser.git
|
||||
branch = markdown_validation
|
133
SnudownTest/Python.h
Normal file
133
SnudownTest/Python.h
Normal file
|
@ -0,0 +1,133 @@
|
|||
#ifndef Py_PYTHON_H
|
||||
#define Py_PYTHON_H
|
||||
/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */
|
||||
|
||||
/* Include nearly all Python header files */
|
||||
|
||||
#include "patchlevel.h"
|
||||
#include "pyconfig.h"
|
||||
#include "pymacconfig.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#ifndef UCHAR_MAX
|
||||
#error "Something's broken. UCHAR_MAX should be defined in limits.h."
|
||||
#endif
|
||||
|
||||
#if UCHAR_MAX != 255
|
||||
#error "Python's source code assumes C's unsigned char is an 8-bit type."
|
||||
#endif
|
||||
|
||||
#if defined(__sgi) && defined(WITH_THREAD) && !defined(_SGI_MP_SOURCE)
|
||||
#define _SGI_MP_SOURCE
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#ifndef NULL
|
||||
# error "Python.h requires that stdio.h define NULL."
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#ifdef HAVE_ERRNO_H
|
||||
#include <errno.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
/* For size_t? */
|
||||
#ifdef HAVE_STDDEF_H
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
|
||||
/* CAUTION: Build setups should ensure that NDEBUG is defined on the
|
||||
* compiler command line when building Python in release mode; else
|
||||
* assert() calls won't be removed.
|
||||
*/
|
||||
#include <assert.h>
|
||||
|
||||
#include "pyport.h"
|
||||
#include "pymacro.h"
|
||||
|
||||
#include "pyatomic.h"
|
||||
|
||||
/* Debug-mode build with pymalloc implies PYMALLOC_DEBUG.
|
||||
* PYMALLOC_DEBUG is in error if pymalloc is not in use.
|
||||
*/
|
||||
#if defined(Py_DEBUG) && defined(WITH_PYMALLOC) && !defined(PYMALLOC_DEBUG)
|
||||
#define PYMALLOC_DEBUG
|
||||
#endif
|
||||
#if defined(PYMALLOC_DEBUG) && !defined(WITH_PYMALLOC)
|
||||
#error "PYMALLOC_DEBUG requires WITH_PYMALLOC"
|
||||
#endif
|
||||
#include "pymath.h"
|
||||
#include "pytime.h"
|
||||
#include "pymem.h"
|
||||
|
||||
#include "object.h"
|
||||
#include "objimpl.h"
|
||||
#include "typeslots.h"
|
||||
#include "pyhash.h"
|
||||
|
||||
#include "pydebug.h"
|
||||
|
||||
#include "bytearrayobject.h"
|
||||
#include "bytesobject.h"
|
||||
#include "unicodeobject.h"
|
||||
#include "longobject.h"
|
||||
#include "longintrepr.h"
|
||||
#include "boolobject.h"
|
||||
#include "floatobject.h"
|
||||
#include "complexobject.h"
|
||||
#include "rangeobject.h"
|
||||
#include "memoryobject.h"
|
||||
#include "tupleobject.h"
|
||||
#include "listobject.h"
|
||||
#include "dictobject.h"
|
||||
#include "enumobject.h"
|
||||
#include "setobject.h"
|
||||
#include "methodobject.h"
|
||||
#include "moduleobject.h"
|
||||
#include "funcobject.h"
|
||||
#include "classobject.h"
|
||||
#include "fileobject.h"
|
||||
#include "pycapsule.h"
|
||||
#include "traceback.h"
|
||||
#include "sliceobject.h"
|
||||
#include "cellobject.h"
|
||||
#include "iterobject.h"
|
||||
#include "genobject.h"
|
||||
#include "descrobject.h"
|
||||
#include "warnings.h"
|
||||
#include "weakrefobject.h"
|
||||
#include "structseq.h"
|
||||
#include "namespaceobject.h"
|
||||
|
||||
#include "codecs.h"
|
||||
#include "pyerrors.h"
|
||||
|
||||
#include "pystate.h"
|
||||
|
||||
#include "pyarena.h"
|
||||
#include "modsupport.h"
|
||||
#include "pythonrun.h"
|
||||
#include "ceval.h"
|
||||
#include "sysmodule.h"
|
||||
#include "intrcheck.h"
|
||||
#include "import.h"
|
||||
|
||||
#include "abstract.h"
|
||||
#include "bltinmodule.h"
|
||||
|
||||
#include "compile.h"
|
||||
#include "eval.h"
|
||||
|
||||
#include "pyctype.h"
|
||||
#include "pystrtod.h"
|
||||
#include "pystrcmp.h"
|
||||
#include "dtoa.h"
|
||||
#include "fileutils.h"
|
||||
#include "pyfpe.h"
|
||||
|
||||
#endif /* !Py_PYTHON_H */
|
12
SnudownTest/SECURITY.md
Normal file
12
SnudownTest/SECURITY.md
Normal file
|
@ -0,0 +1,12 @@
|
|||
For safety reasons, whenever you add or change something in Snudown,
|
||||
you should add a few test-cases that demonstrate your change and do a
|
||||
fuzzing run in `/fuzzing` by running `make afl`. Make sure you have `cmake`
|
||||
installed and in your `PATH`!
|
||||
|
||||
This uses [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/) and a
|
||||
modified [Google Gumbo](https://github.com/google/gumbo-parser/) to ensure
|
||||
there is no way to generate invalid HTML, and that there are no unsafe
|
||||
memory operations.
|
||||
|
||||
See [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/)'s instructions
|
||||
for your platform to get started.
|
487
SnudownTest/autolink.c
Normal file
487
SnudownTest/autolink.c
Normal file
|
@ -0,0 +1,487 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define strncasecmp _strnicmp
|
||||
#endif
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len)
|
||||
{
|
||||
static const size_t valid_uris_count = 14;
|
||||
static const char *valid_uris[] = {
|
||||
"http://", "https://", "ftp://", "mailto://",
|
||||
"/", "git://", "steam://", "irc://", "news://", "mumble://",
|
||||
"ssh://", "ircs://", "ts3server://", "#"
|
||||
};
|
||||
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < valid_uris_count; ++i) {
|
||||
size_t len = strlen(valid_uris[i]);
|
||||
|
||||
if (link_len > len &&
|
||||
strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
||||
(isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?'))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
||||
{
|
||||
uint8_t cclose, copen = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < link_end; ++i)
|
||||
if (data[i] == '<') {
|
||||
link_end = i;
|
||||
break;
|
||||
}
|
||||
|
||||
while (link_end > 0) {
|
||||
uint8_t c = data[link_end - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (strchr("?!.,", c) != NULL)
|
||||
link_end--;
|
||||
|
||||
else if (c == ';') {
|
||||
size_t new_end = link_end - 2;
|
||||
|
||||
while (new_end > 0 && isalpha(data[new_end]))
|
||||
new_end--;
|
||||
|
||||
if (new_end < link_end - 2 && data[new_end] == '&')
|
||||
link_end = new_end;
|
||||
else
|
||||
link_end--;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
cclose = data[link_end - 1];
|
||||
|
||||
switch (cclose) {
|
||||
case '"': copen = '"'; break;
|
||||
case '\'': copen = '\''; break;
|
||||
case ')': copen = '('; break;
|
||||
case ']': copen = '['; break;
|
||||
case '}': copen = '{'; break;
|
||||
}
|
||||
|
||||
if (copen != 0) {
|
||||
size_t closing = 0;
|
||||
size_t opening = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Try to close the final punctuation sign in this same line;
|
||||
* if we managed to close it outside of the URL, that means that it's
|
||||
* not part of the URL. If it closes inside the URL, that means it
|
||||
* is part of the URL.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric))
|
||||
*
|
||||
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
||||
*/
|
||||
|
||||
while (i < link_end) {
|
||||
if (data[i] == copen)
|
||||
opening++;
|
||||
else if (data[i] == cclose)
|
||||
closing++;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (closing != opening)
|
||||
link_end--;
|
||||
}
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks that `prefix_char` occurs on a word boundary just before `data`,
|
||||
* where `data` points to the character to search to the left of, and a word boundary
|
||||
* is (currently) a whitespace character, punctuation, or the start of the string.
|
||||
* Returns the length of the prefix.
|
||||
*/
|
||||
static int
|
||||
check_reddit_autolink_prefix(
|
||||
const uint8_t* data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
char prefix_char
|
||||
)
|
||||
{
|
||||
/* Make sure this `/` is part of `/?r/` */
|
||||
if (size < 2 || max_rewind < 1 || data[-1] != prefix_char)
|
||||
return 0;
|
||||
|
||||
/* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */
|
||||
if (max_rewind > 1) {
|
||||
const char boundary = data[-2];
|
||||
if (boundary == '/')
|
||||
return 2;
|
||||
/**
|
||||
* Here's where our lack of unicode-awareness bites us. We don't correctly
|
||||
* match punctuation / whitespace characters for the boundary, because we
|
||||
* reject valid cases like "。r/example" (note the fullwidth period.)
|
||||
*
|
||||
* A better implementation might try to rewind over bytes with the 8th bit set, try
|
||||
* to decode them to a valid codepoint, then do a unicode-aware check on the codepoint.
|
||||
*/
|
||||
else if (ispunct(boundary) || isspace(boundary))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
} else if (max_lookbehind > 2) {
|
||||
/* There's an inline element just left of the `prefix_char`, is it an escaped forward
|
||||
* slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly
|
||||
* allow "\\/r/foo".
|
||||
*/
|
||||
if (data[-2] == '/' && data[-3] == '\\')
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Must be a new-style shortlink with nothing relevant to the left of it. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
check_domain(uint8_t *data, size_t size, int allow_short)
|
||||
{
|
||||
size_t i, np = 0;
|
||||
|
||||
if (!isalnum(data[0]))
|
||||
return 0;
|
||||
|
||||
for (i = 1; i < size - 1; ++i) {
|
||||
if (data[i] == '.') np++;
|
||||
else if (!isalnum(data[i]) && data[i] != '-') break;
|
||||
}
|
||||
|
||||
if (allow_short) {
|
||||
/* We don't need a valid domain in the strict sense (with
|
||||
* least one dot; so just make sure it's composed of valid
|
||||
* domain characters and return the length of the the valid
|
||||
* sequence. */
|
||||
return i;
|
||||
} else {
|
||||
/* a valid domain needs to have at least a dot.
|
||||
* that's as far as we get */
|
||||
return np ? i : 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__www(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end;
|
||||
|
||||
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
||||
return 0;
|
||||
|
||||
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
||||
return 0;
|
||||
|
||||
link_end = check_domain(data, size, 0);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data, link_end);
|
||||
*rewind_p = 0;
|
||||
|
||||
return (int)link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__email(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind;
|
||||
int nb = 0, np = 0;
|
||||
|
||||
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
||||
uint8_t c = data[-rewind - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (strchr(".+-_", c) != NULL)
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (rewind == 0)
|
||||
return 0;
|
||||
|
||||
for (link_end = 0; link_end < size; ++link_end) {
|
||||
uint8_t c = data[link_end];
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (c == '@')
|
||||
nb++;
|
||||
else if (c == '.' && link_end < size - 1)
|
||||
np++;
|
||||
else if (c != '-' && c != '_')
|
||||
break;
|
||||
}
|
||||
|
||||
if (link_end < 2 || nb != 1 || np == 0)
|
||||
return 0;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__url(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind = 0, domain_len;
|
||||
|
||||
if (size < 4 || data[1] != '/' || data[2] != '/')
|
||||
return 0;
|
||||
|
||||
while (rewind < max_rewind && isalpha(data[-rewind - 1]))
|
||||
rewind++;
|
||||
|
||||
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
||||
return 0;
|
||||
|
||||
link_end = strlen("://");
|
||||
|
||||
domain_len = check_domain(
|
||||
data + link_end,
|
||||
size - link_end,
|
||||
flags & SD_AUTOLINK_SHORT_DOMAINS);
|
||||
|
||||
if (domain_len == 0)
|
||||
return 0;
|
||||
|
||||
link_end += domain_len;
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__subreddit(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
/**
|
||||
* This is meant to handle both r/foo and /r/foo style subreddit references.
|
||||
* In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'.
|
||||
* In pseudo-regex, this matches something like:
|
||||
*
|
||||
* `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?`
|
||||
* where %subreddit% == `((t:)?\w{2,24}|reddit\.com)`
|
||||
*/
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
int is_allminus = 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
/* offset to the "meat" of the link */
|
||||
link_end = strlen("/");
|
||||
|
||||
if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0)
|
||||
is_allminus = 1;
|
||||
|
||||
do {
|
||||
size_t start = link_end;
|
||||
int max_length = 24;
|
||||
|
||||
/* special case: /r/reddit.com (only subreddit containing '.'). */
|
||||
if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) {
|
||||
link_end += 10;
|
||||
/* Make sure there are no trailing characters (don't do
|
||||
* any autolinking for /r/reddit.commission) */
|
||||
max_length = 10;
|
||||
}
|
||||
|
||||
/* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */
|
||||
else {
|
||||
/* support autolinking to timereddits, /r/t:when (1 April 2012) */
|
||||
if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 )
|
||||
link_end += 2; /* Jump over the 't:' */
|
||||
|
||||
/* the first character of a subreddit name must be a letter or digit */
|
||||
if (!isalnum(data[link_end]))
|
||||
return 0;
|
||||
link_end += 1;
|
||||
}
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_'))
|
||||
link_end++;
|
||||
|
||||
/* valid subreddit names are between 3 and 21 characters, with
|
||||
* some subreddits having 2-character names. Don't bother with
|
||||
* autolinking for anything outside this length range.
|
||||
* (chksrname function in reddit/.../validator.py) */
|
||||
if ( link_end-start < 2 || link_end-start > max_length )
|
||||
return 0;
|
||||
|
||||
/* If we are linking to a multireddit, continue */
|
||||
} while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ );
|
||||
|
||||
if (link_end < size && data[link_end] == '/') {
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
}
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__username(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
|
||||
if (size < 3)
|
||||
return 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
link_end = strlen("/");
|
||||
|
||||
/* the first letter of a username must... well, be valid, we don't care otherwise */
|
||||
if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-')
|
||||
return 0;
|
||||
link_end += 1;
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_-/]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
59
SnudownTest/autolink.h
Normal file
59
SnudownTest/autolink.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_AUTOLINK_H
|
||||
#define UPSKIRT_AUTOLINK_H
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
||||
};
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len);
|
||||
|
||||
size_t
|
||||
sd_autolink__www(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__email(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__url(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
236
SnudownTest/buffer.c
Normal file
236
SnudownTest/buffer.c
Normal file
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* MSVC compat */
|
||||
#if defined(_MSC_VER)
|
||||
# define _buf_vsnprintf _vsnprintf
|
||||
#else
|
||||
# define _buf_vsnprintf vsnprintf
|
||||
#endif
|
||||
|
||||
int
|
||||
bufprefix(const struct buf *buf, const char *prefix)
|
||||
{
|
||||
size_t i;
|
||||
assert(buf && buf->unit);
|
||||
|
||||
for (i = 0; i < buf->size; ++i) {
|
||||
if (prefix[i] == 0)
|
||||
return 0;
|
||||
|
||||
if (buf->data[i] != prefix[i])
|
||||
return buf->data[i] - prefix[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int
|
||||
bufgrow(struct buf *buf, size_t neosz)
|
||||
{
|
||||
size_t neoasz;
|
||||
void *neodata;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (neosz > BUFFER_MAX_ALLOC_SIZE)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
if (buf->asize >= neosz)
|
||||
return BUF_OK;
|
||||
|
||||
neoasz = buf->asize + buf->unit;
|
||||
while (neoasz < neosz)
|
||||
neoasz += buf->unit;
|
||||
|
||||
neodata = realloc(buf->data, neoasz);
|
||||
if (!neodata)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
buf->data = neodata;
|
||||
buf->asize = neoasz;
|
||||
return BUF_OK;
|
||||
}
|
||||
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *
|
||||
bufnew(size_t unit)
|
||||
{
|
||||
struct buf *ret;
|
||||
ret = malloc(sizeof (struct buf));
|
||||
|
||||
if (ret) {
|
||||
ret->data = 0;
|
||||
ret->size = ret->asize = 0;
|
||||
ret->unit = unit;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* bufnullterm: NULL-termination of the string array */
|
||||
const char *
|
||||
bufcstr(struct buf *buf)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size < buf->asize && buf->data[buf->size] == 0)
|
||||
return (char *)buf->data;
|
||||
|
||||
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
|
||||
buf->data[buf->size] = 0;
|
||||
return (char *)buf->data;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void
|
||||
bufprintf(struct buf *buf, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int n;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (n < 0) {
|
||||
#ifdef _MSC_VER
|
||||
va_start(ap, fmt);
|
||||
n = _vscprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
if ((size_t)n >= buf->asize - buf->size) {
|
||||
if (bufgrow(buf, buf->size + n + 1) < 0)
|
||||
return;
|
||||
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
if (n < 0)
|
||||
return;
|
||||
|
||||
buf->size += n;
|
||||
}
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void
|
||||
bufput(struct buf *buf, const void *data, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
|
||||
return;
|
||||
|
||||
memcpy(buf->data + buf->size, data, len);
|
||||
buf->size += len;
|
||||
}
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void
|
||||
bufputs(struct buf *buf, const char *str)
|
||||
{
|
||||
bufput(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
|
||||
/* bufputc: appends a single uint8_t to a buffer */
|
||||
void
|
||||
bufputc(struct buf *buf, int c)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
|
||||
buf->data[buf->size] = c;
|
||||
buf->size += 1;
|
||||
}
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void
|
||||
bufrelease(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void
|
||||
bufreset(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
buf->data = NULL;
|
||||
buf->size = buf->asize = 0;
|
||||
}
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void
|
||||
bufslurp(struct buf *buf, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (len >= buf->size) {
|
||||
buf->size = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
buf->size -= len;
|
||||
memmove(buf->data, buf->data + len, buf->size);
|
||||
}
|
||||
|
||||
/* buftrucate: truncates the buffer at `size` */
|
||||
int
|
||||
buftruncate(struct buf *buf, size_t size)
|
||||
{
|
||||
if (buf->size < size || size < 0) {
|
||||
/* bail out in debug mode so we can figure out why this happened */
|
||||
assert(0);
|
||||
return BUF_EINVALIDIDX;
|
||||
}
|
||||
|
||||
buf->size = size;
|
||||
return BUF_OK;
|
||||
}
|
100
SnudownTest/buffer.h
Normal file
100
SnudownTest/buffer.h
Normal file
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BUFFER_H__
|
||||
#define BUFFER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define __attribute__(x)
|
||||
#define inline
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
BUF_OK = 0,
|
||||
BUF_ENOMEM = -1,
|
||||
BUF_EINVALIDIDX = -2,
|
||||
} buferror_t;
|
||||
|
||||
/* struct buf: character array buffer */
|
||||
struct buf {
|
||||
uint8_t *data; /* actual character data */
|
||||
size_t size; /* size of the string */
|
||||
size_t asize; /* allocated size (0 = volatile buffer) */
|
||||
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
||||
};
|
||||
|
||||
/* CONST_BUF: global buffer from a string litteral */
|
||||
#define BUF_STATIC(string) \
|
||||
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
||||
|
||||
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
||||
#define BUF_VOLATILE(strname) \
|
||||
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
||||
|
||||
/* BUFPUTSL: optimized bufputs of a string litteral */
|
||||
#define BUFPUTSL(output, literal) \
|
||||
bufput(output, literal, sizeof literal - 1)
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int bufgrow(struct buf *, size_t);
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
||||
|
||||
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
||||
const char *bufcstr(struct buf *);
|
||||
|
||||
/* bufprefix: compare the beginning of a buffer with a string */
|
||||
int bufprefix(const struct buf *buf, const char *prefix);
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void bufput(struct buf *, const void *, size_t);
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void bufputs(struct buf *, const char *);
|
||||
|
||||
/* bufputc: appends a single char to a buffer */
|
||||
void bufputc(struct buf *, int);
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void bufrelease(struct buf *);
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void bufreset(struct buf *);
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void bufslurp(struct buf *, size_t);
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
||||
|
||||
/* buftruncate: truncates the buffer at `size` */
|
||||
int buftruncate(struct buf *buf, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
145
SnudownTest/debian/changelog
Normal file
145
SnudownTest/debian/changelog
Normal file
|
@ -0,0 +1,145 @@
|
|||
snudown (1.4.0) unstable; urgency=medium
|
||||
|
||||
* autolink r/subreddit and u/user
|
||||
* security: don't rewind over previous inlines when autolinking
|
||||
* email autolinks re-enabled due to ^
|
||||
* more stringent character entity checks and sanitization
|
||||
* properly handle URLs containing control characters
|
||||
|
||||
-- Jordan Milne <jordan.milne@reddit.com> Mon, 01 Jun 2015 13:04:23 -0700
|
||||
|
||||
snudown (1.3.2) unstable; urgency=medium
|
||||
|
||||
* fix alphanumeric-named entities
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 25 Feb 2015 13:32:41 -0800
|
||||
|
||||
snudown (1.3.1) unstable; urgency=medium
|
||||
|
||||
* add missing entities to entity whitelist
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 24 Feb 2015 22:12:29 -0800
|
||||
|
||||
snudown (1.3.0) unstable; urgency=medium
|
||||
|
||||
* validate html entities and escape unrecognized ones
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 24 Feb 2015 17:55:38 -0800
|
||||
|
||||
snudown (1.2.0) unstable; urgency=medium
|
||||
|
||||
* security: fix rewind issues
|
||||
* email autolinks disabled due to ^
|
||||
* security: fix table header OOM bomb
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Sat, 20 Sep 2014 11:59:34 -0700
|
||||
|
||||
snudown (1.1.6) unstable; urgency=low
|
||||
|
||||
* add ts3server url scheme to whitelist
|
||||
* redo html sanitization for wiki renderer
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 01 Apr 2014 17:12:50 -0700
|
||||
|
||||
snudown (1.1.5) unstable; urgency=low
|
||||
|
||||
* bring path stuff into user/subreddit autolinking (multis, subpages etc.)
|
||||
* make /u/ autolinking case sensitive
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 22 May 2013 16:09:31 -0700
|
||||
|
||||
snudown (1.1.4) unstable; urgency=low
|
||||
|
||||
* make /r/ autolinking case sensitive
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Mon, 25 Feb 2013 23:27:10 -0800
|
||||
|
||||
snudown (1.1.3) unstable; urgency=low
|
||||
|
||||
* add support for /r/all-minus
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Tue, 08 Jan 2013 12:55:40 -0800
|
||||
|
||||
snudown (1.1.2) unstable; urgency=low
|
||||
|
||||
* don't close the toc div if there wasn't a toc :(
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 12 Dec 2012 17:38:05 -0800
|
||||
|
||||
snudown (1.1.1) unstable; urgency=low
|
||||
|
||||
* minor code cleanup
|
||||
* add a div around wiki table of contents for styling purposes
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 12 Dec 2012 13:47:49 -0800
|
||||
|
||||
snudown (1.1.0) unstable; urgency=low
|
||||
|
||||
* add wiki variant of markdown syntax (allows links, and
|
||||
some raw html)
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 05 Sep 2012 23:30:34 -0700
|
||||
|
||||
snudown (1.0.7) unstable; urgency=low
|
||||
|
||||
* add python-setuptools to build-depends
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 09 Aug 2012 14:46:49 -0700
|
||||
|
||||
snudown (1.0.6) unstable; urgency=low
|
||||
|
||||
* made subreddit autolinking more robust thanks to nandhp
|
||||
* cleaned up packaging
|
||||
* merged upstream fixes:
|
||||
* fix blockquotes nested inside paragraphs
|
||||
* improve parsing of continuous list items
|
||||
* fix infinite loop parsing strikethrouhgs
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 09 Aug 2012 13:06:38 -0700
|
||||
|
||||
snudown (1.0.5) unstable; urgency=low
|
||||
|
||||
* require a space between url and title
|
||||
* merged upstream fixes:
|
||||
* whitespace after tables prevent them from rendering
|
||||
* escape html in contents of tables
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 23 Feb 2012 08:40:39 -0800
|
||||
|
||||
snudown (1.0.4) unstable; urgency=low
|
||||
|
||||
* change username autolinking to /u/username
|
||||
* properly handle backslash at end of message
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 26 Jan 2012 18:26:45 -0800
|
||||
|
||||
snudown (1.0.3) unstable; urgency=low
|
||||
|
||||
* ~username auto-linking
|
||||
* make table headers less strict
|
||||
* correctly handle ) in link title text
|
||||
* synced with upstream
|
||||
* code clean-up
|
||||
* utf-8 fixes
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 18 Jan 2012 15:20:35 -0800
|
||||
|
||||
snudown (1.0.2) unstable; urgency=low
|
||||
|
||||
* synced up with upstream
|
||||
* more safelink relaxation based on community requests
|
||||
* fixed nesting unordered lists within ordered lists and vice versa
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Sat, 19 Nov 2011 17:16:47 -0800
|
||||
|
||||
snudown (1.0.1) unstable; urgency=low
|
||||
|
||||
* new version, new package
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Thu, 17 Nov 2011 14:22:26 -0800
|
||||
|
||||
snudown (1.0.0) unstable; urgency=low
|
||||
|
||||
* source package automatically created by stdeb 0.6.0+git
|
||||
|
||||
-- Neil Williams <neil@reddit.com> Wed, 16 Nov 2011 10:36:53 -0800
|
1
SnudownTest/debian/compat
Normal file
1
SnudownTest/debian/compat
Normal file
|
@ -0,0 +1 @@
|
|||
7
|
15
SnudownTest/debian/control
Normal file
15
SnudownTest/debian/control
Normal file
|
@ -0,0 +1,15 @@
|
|||
Source: snudown
|
||||
Maintainer: Neil Williams <neil@reddit.com>
|
||||
Section: python
|
||||
Priority: optional
|
||||
Build-Depends: python-all-dev (>= 2.6.6-3), debhelper (>= 7), python-setuptools, gperf
|
||||
Standards-Version: 3.9.3
|
||||
Homepage: https://github.com/reddit/snudown
|
||||
Vcs-Git: git://github.com/reddit/snudown.git
|
||||
|
||||
Package: python-snudown
|
||||
Architecture: any
|
||||
Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends}
|
||||
Breaks: ${python:Breaks}
|
||||
Description: reddit's python wrapper and customization of the Sundown Markdown interpreter.
|
||||
|
30
SnudownTest/debian/copyright
Normal file
30
SnudownTest/debian/copyright
Normal file
|
@ -0,0 +1,30 @@
|
|||
Format: http://dep.debian.net/deps/dep5
|
||||
Upstream-Name: snudown
|
||||
Source: https://github.com/reddit/snudown
|
||||
|
||||
Files: *
|
||||
Copyright: 2011-2012 Vicent Marti
|
||||
2011-2012 reddit Inc.
|
||||
License: MIT
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2011-2012 reddit Inc.
|
||||
License: MIT
|
||||
|
||||
Files: test_snudown.py
|
||||
Copyright: 2011-2012 reddit Inc.
|
||||
License: MIT
|
||||
|
||||
License: MIT
|
||||
Permission to use, copy, modify, and distribute this software for any purpose
|
||||
with or without fee is hereby granted, provided that the above copyright
|
||||
notice and this permission notice appear in all copies.
|
||||
.
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
||||
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
||||
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
||||
|
9
SnudownTest/debian/rules
Normal file
9
SnudownTest/debian/rules
Normal file
|
@ -0,0 +1,9 @@
|
|||
#!/usr/bin/make -f
|
||||
|
||||
# This file was automatically generated by stdeb 0.6.0+git at
|
||||
# Wed, 16 Nov 2011 10:36:53 -0800
|
||||
|
||||
%:
|
||||
dh $@ --with python2 --buildsystem=python_distutils
|
||||
|
||||
|
1
SnudownTest/debian/source/format
Normal file
1
SnudownTest/debian/source/format
Normal file
|
@ -0,0 +1 @@
|
|||
3.0 (native)
|
37
SnudownTest/fuzzing/CMakeLists.txt
Normal file
37
SnudownTest/fuzzing/CMakeLists.txt
Normal file
|
@ -0,0 +1,37 @@
|
|||
cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
set(HEADERS
|
||||
../html/houdini.h
|
||||
../html/html.h
|
||||
../src/autolink.h
|
||||
../src/buffer.h
|
||||
../src/html_blocks.h
|
||||
../src/html_entities.h
|
||||
../src/markdown.h
|
||||
../src/stack.h
|
||||
)
|
||||
set(LIBRARY_SOURCES
|
||||
../html/houdini_href_e.c
|
||||
../html/houdini_html_e.c
|
||||
../html/html.c
|
||||
../html/html_smartypants.c
|
||||
../src/autolink.c
|
||||
../src/buffer.c
|
||||
../src/markdown.c
|
||||
../src/stack.c
|
||||
${HEADERS}
|
||||
)
|
||||
|
||||
set(PROGRAM "snudown-validator")
|
||||
set(PROGRAM_SOURCES
|
||||
${LIBRARY_SOURCES}
|
||||
snudown-validator.c
|
||||
)
|
||||
|
||||
include_directories(. ../src ../html ./build/gumbo_snudown/include ${CMAKE_CURRENT_BINARY_DIR})
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/build/gumbo_snudown/lib)
|
||||
|
||||
add_executable(${PROGRAM} ${PROGRAM_SOURCES})
|
||||
target_link_libraries(${PROGRAM} gumbo)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g -Wno-error=parentheses")
|
62
SnudownTest/fuzzing/Makefile
Normal file
62
SnudownTest/fuzzing/Makefile
Normal file
|
@ -0,0 +1,62 @@
|
|||
# Copyright (c) 2015, reddit inc.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software for any
|
||||
# purpose with or without fee is hereby granted, provided that the above
|
||||
# copyright notice and this permission notice appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
all: gumbo_snudown snudown-validator
|
||||
|
||||
.PHONY: all clean gumbo_snudown snudown-validator build_dir
|
||||
|
||||
build_dir:
|
||||
mkdir -p build
|
||||
|
||||
# Our modified gumbo for finding security-relevant syntax issues
|
||||
gumbo_snudown: build_dir
|
||||
mkdir -p build/gumbo_snudown
|
||||
git submodule update --recursive
|
||||
@[ -f "${CURDIR}/gumbo_snudown/configure" ] || { \
|
||||
cd gumbo_snudown; \
|
||||
./autogen.sh; \
|
||||
./configure --prefix=$(CURDIR)/build/gumbo_snudown; \
|
||||
}
|
||||
# Don't build this with AFL instrumentation, I'm assuming Google
|
||||
# already ran their own fuzzer over their own parser...
|
||||
$(MAKE) -C gumbo_snudown all install
|
||||
|
||||
gperf_src:
|
||||
cd ../src/ && gperf html_entities.gperf --output-file=html_entities.h
|
||||
|
||||
# executable
|
||||
snudown-validator: build_dir gumbo_snudown gperf_src
|
||||
cd build && cmake .. -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-gcc
|
||||
$(MAKE) -C build all
|
||||
|
||||
# stuff for fuzzing
|
||||
gen_testcases:
|
||||
mkdir -p testing/testcases
|
||||
rm -f testing/testcases/test_default_*.md
|
||||
python2.7 gen_testcases.py
|
||||
|
||||
afl: gen_testcases snudown-validator
|
||||
@[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; }
|
||||
@mkdir -p testing/afl_results
|
||||
$(AFL_PATH)/afl-fuzz \
|
||||
-i testing/testcases \
|
||||
-o testing/afl_results \
|
||||
-t 100 \
|
||||
-m none \
|
||||
./build/snudown-validator
|
||||
|
||||
# housekeeping
|
||||
clean:
|
||||
rm -rf *.o
|
||||
rm -rf build/
|
20
SnudownTest/fuzzing/gen_testcases.py
Normal file
20
SnudownTest/fuzzing/gen_testcases.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/env python
|
||||
|
||||
# dump all of our testcases into a directory as separate files, like AFL
|
||||
# wants.
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
import itertools
|
||||
|
||||
sys.path.append("..")
|
||||
import test_snudown
|
||||
|
||||
cases = itertools.chain(test_snudown.cases.keys(), test_snudown.wiki_cases.keys())
|
||||
for i, md in enumerate(cases):
|
||||
# skip huge testcases
|
||||
if len(md) > 2048:
|
||||
continue
|
||||
test_path = os.path.join('testing', 'testcases', 'test_default_%d.md' % i)
|
||||
with open(test_path, 'w') as f:
|
||||
f.write(md)
|
226
SnudownTest/fuzzing/snudown-validator.c
Normal file
226
SnudownTest/fuzzing/snudown-validator.c
Normal file
|
@ -0,0 +1,226 @@
|
|||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <gumbo.h>
|
||||
|
||||
#define READ_UNIT 1024
|
||||
#define OUTPUT_UNIT 64
|
||||
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.3.2"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer() {
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer() {
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
void
|
||||
snudown_md(struct buf *ob, const uint8_t *document, size_t doc_size, int wiki_mode)
|
||||
{
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
if (wiki_mode)
|
||||
renderer = RENDERER_WIKI;
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
struct snudown_renderopt *options = &(_snudown.state->options);
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
}
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
init_default_renderer();
|
||||
init_wiki_renderer();
|
||||
|
||||
struct buf *ib, *ob;
|
||||
int size_read = 0, wiki_mode = 0, i = 0, have_errors = 0;
|
||||
|
||||
/* reading everything */
|
||||
ib = bufnew(READ_UNIT);
|
||||
bufgrow(ib, READ_UNIT);
|
||||
while ((size_read = fread(ib->data + ib->size, 1, ib->asize - ib->size, stdin)) > 0) {
|
||||
ib->size += size_read;
|
||||
bufgrow(ib, ib->size + READ_UNIT);
|
||||
}
|
||||
/* Render to a buffer, then print that out */
|
||||
ob = bufnew(OUTPUT_UNIT);
|
||||
bufputs(ob, "<!DOCTYPE html><html><body>\n");
|
||||
snudown_md(ob, ib->data, ib->size, wiki_mode);
|
||||
bufputs(ob, "</body></html>\n");
|
||||
|
||||
// Wiki mode explicitly allows unbalanced tags, need some way to exclude those
|
||||
if (!wiki_mode) {
|
||||
GumboOutput* output = gumbo_parse_with_options(&kGumboDefaultOptions, bufcstr(ob), ob->size);
|
||||
|
||||
for (i=0; i < output->errors.length; ++i) {
|
||||
// stupid "public" API I hacked in.
|
||||
void* thing = output->errors.data[i];
|
||||
GumboErrorType type = gumbo_get_error_type(thing);
|
||||
switch(type) {
|
||||
case GUMBO_ERR_UTF8_INVALID:
|
||||
case GUMBO_ERR_UTF8_NULL:
|
||||
// Making sure the user gave us valid
|
||||
// utf-8 or transforming it to valid
|
||||
// utf-8 is outside the scope of snudown
|
||||
continue;
|
||||
default:
|
||||
have_errors = 1;
|
||||
printf("%s\n", GUMBO_ERROR_NAMES[type]);
|
||||
printf("%s\n",gumbo_get_error_text(thing));
|
||||
printf("===============\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (have_errors) {
|
||||
// gotta trigger a crash for AFL to catch it
|
||||
assert(0);
|
||||
}
|
||||
|
||||
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
||||
}
|
||||
bufrelease(ob);
|
||||
bufrelease(ib);
|
||||
return 0;
|
||||
}
|
2
SnudownTest/fuzzing/triageerrors.sh
Normal file
2
SnudownTest/fuzzing/triageerrors.sh
Normal file
|
@ -0,0 +1,2 @@
|
|||
#!/bin/bash
|
||||
find testing/afl_results/ -regextype posix-egrep -regex ".*/(crashes|hangs)/.*" | xargs -I '{}' ./validatemd.sh {}
|
3
SnudownTest/fuzzing/validatemd.sh
Normal file
3
SnudownTest/fuzzing/validatemd.sh
Normal file
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
echo "** ${1}"
|
||||
./build/snudown-validator < $1
|
3
SnudownTest/gperf.exe
Normal file
3
SnudownTest/gperf.exe
Normal file
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3f9266ea2d2bd19a503b5d2ec613e983c6ed9ea45ff6b5820b0681fd1b778d12
|
||||
size 103424
|
37
SnudownTest/houdini.h
Normal file
37
SnudownTest/houdini.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
#ifndef HOUDINI_H__
|
||||
#define HOUDINI_H__
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HOUDINI_USE_LOCALE
|
||||
# define _isxdigit(c) isxdigit(c)
|
||||
# define _isdigit(c) isdigit(c)
|
||||
#else
|
||||
/*
|
||||
* Helper _isdigit methods -- do not trust the current locale
|
||||
* */
|
||||
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
||||
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
||||
#endif
|
||||
|
||||
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
||||
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
116
SnudownTest/houdini_href_e.c
Normal file
116
SnudownTest/houdini_href_e.c
Normal file
|
@ -0,0 +1,116 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
||||
|
||||
/*
|
||||
* The following characters will not be escaped:
|
||||
*
|
||||
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
||||
*
|
||||
* Note that this character set is the addition of:
|
||||
*
|
||||
* - The characters which are safe to be in an URL
|
||||
* - The characters which are *not* safe to be in
|
||||
* an URL because they are RESERVED characters.
|
||||
*
|
||||
* We asume (lazily) that any RESERVED char that
|
||||
* appears inside an URL is actually meant to
|
||||
* have its native function (i.e. as an URL
|
||||
* component/separator) and hence needs no escaping.
|
||||
*
|
||||
* There are two exceptions: the chacters & (amp)
|
||||
* and ' (single quote) do not appear in the table.
|
||||
* They are meant to appear in the URL as components,
|
||||
* yet they require special HTML-entity escaping
|
||||
* to generate valid HTML markup.
|
||||
*
|
||||
* All other characters will be escaped to %XX.
|
||||
*
|
||||
*/
|
||||
static const char HREF_SAFE[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
static const char hex_chars[] = "0123456789ABCDEF";
|
||||
size_t i = 0, org;
|
||||
char hex_str[3];
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
hex_str[0] = '%';
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
/* Skip by characters that don't need special
|
||||
* processing */
|
||||
while (i < size && HREF_SAFE[src[i]] == 1)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* throw out control characters */
|
||||
if (HREF_SAFE[src[i]] == 2) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (src[i]) {
|
||||
/* amp appears all the time in URLs, but needs
|
||||
* HTML-entity escaping to be inside an href */
|
||||
case '&':
|
||||
BUFPUTSL(ob, "&");
|
||||
break;
|
||||
|
||||
/* the single quote is a valid URL character
|
||||
* according to the standard; it needs HTML
|
||||
* entity escaping too */
|
||||
case '\'':
|
||||
BUFPUTSL(ob, "'");
|
||||
break;
|
||||
|
||||
/* the space can be escaped to %20 or a plus
|
||||
* sign. we're going with the generic escape
|
||||
* for now. the plus thing is more commonly seen
|
||||
* when building GET strings */
|
||||
#if 0
|
||||
case ' ':
|
||||
bufputc(ob, '+');
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* every other character goes with a %XX escaping */
|
||||
default:
|
||||
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
||||
hex_str[2] = hex_chars[src[i] & 0xF];
|
||||
bufput(ob, hex_str, 3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
87
SnudownTest/houdini_html_e.c
Normal file
87
SnudownTest/houdini_html_e.c
Normal file
|
@ -0,0 +1,87 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
||||
|
||||
/**
|
||||
* According to the OWASP rules:
|
||||
*
|
||||
* & --> &
|
||||
* < --> <
|
||||
* > --> >
|
||||
* " --> "
|
||||
* ' --> ' ' is not recommended
|
||||
* / --> / forward slash is included as it helps end an HTML entity
|
||||
*
|
||||
*/
|
||||
static const char HTML_ESCAPE_TABLE[] = {
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char *HTML_ESCAPES[] = {
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"'",
|
||||
"/",
|
||||
"<",
|
||||
">",
|
||||
"", // throw out control characters
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
||||
{
|
||||
size_t i = 0, org, esc = 0;
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* The forward slash is only escaped in secure mode */
|
||||
if (src[i] == '/' && !secure) {
|
||||
bufputc(ob, '/');
|
||||
} else if (HTML_ESCAPE_TABLE[src[i]] == 7) {
|
||||
/* skip control characters */
|
||||
} else {
|
||||
bufputs(ob, HTML_ESCAPES[esc]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
houdini_escape_html0(ob, src, size, 1);
|
||||
}
|
||||
|
790
SnudownTest/html.c
Normal file
790
SnudownTest/html.c
Normal file
|
@ -0,0 +1,790 @@
|
|||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML)
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
||||
{
|
||||
size_t i;
|
||||
int closed = 0;
|
||||
|
||||
if (tag_size < 3 || tag_data[0] != '<')
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
i = 1;
|
||||
|
||||
if (tag_data[i] == '/') {
|
||||
closed = 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
for (; i < tag_size; ++i, ++tagname) {
|
||||
if (*tagname == 0)
|
||||
break;
|
||||
|
||||
if (tag_data[i] != *tagname)
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
if (i == tag_size)
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
if (isspace(tag_data[i]) || tag_data[i] == '>')
|
||||
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
||||
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_html0(ob, source, length, 0);
|
||||
}
|
||||
|
||||
static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_href(ob, source, length);
|
||||
}
|
||||
|
||||
/********************
|
||||
* GENERIC RENDERER *
|
||||
********************/
|
||||
static int
|
||||
rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
uint8_t offset = 0;
|
||||
|
||||
if (!link || !link->size)
|
||||
return 0;
|
||||
|
||||
if ((options->flags & HTML_SAFELINK) != 0 &&
|
||||
!sd_autolink_issafe(link->data, link->size) &&
|
||||
type != MKDA_EMAIL)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
if (type == MKDA_EMAIL)
|
||||
BUFPUTSL(ob, "mailto:");
|
||||
escape_href(ob, link->data + offset, link->size - offset);
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
/*
|
||||
* Pretty printing: if we get an email address as
|
||||
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
|
||||
* want to print the `mailto:` prefix
|
||||
*/
|
||||
if (bufprefix(link, "mailto:") == 0) {
|
||||
escape_html(ob, link->data + 7, link->size - 7);
|
||||
} else {
|
||||
escape_html(ob, link->data, link->size);
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "</a>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (lang && lang->size) {
|
||||
size_t i, cls;
|
||||
BUFPUTSL(ob, "<pre><code class=\"");
|
||||
|
||||
for (i = 0, cls = 0; i < lang->size; ++i, ++cls) {
|
||||
while (i < lang->size && isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (i < lang->size) {
|
||||
size_t org = i;
|
||||
while (i < lang->size && !isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (lang->data[org] == '.')
|
||||
org++;
|
||||
|
||||
if (cls) bufputc(ob, ' ');
|
||||
escape_html(ob, lang->data + org, i - org);
|
||||
}
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "\">");
|
||||
} else
|
||||
BUFPUTSL(ob, "<pre><code>");
|
||||
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
|
||||
BUFPUTSL(ob, "</code></pre>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<blockquote>\n");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</blockquote>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<code>");
|
||||
if (text) escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</code>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<del>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</del>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<strong>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</strong>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<em>");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_linebreak(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bufputs(ob, USE_XHTML(options) ? "<br/>\n" : "<br>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (ob->size)
|
||||
bufputc(ob, '\n');
|
||||
|
||||
if (options->flags & HTML_TOC) {
|
||||
bufprintf(ob, "<h%d id=\"", level);
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
} else {
|
||||
bufprintf(ob, "<h%d>", level);
|
||||
}
|
||||
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufprintf(ob, "</h%d>\n", level);
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size))
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
|
||||
if (link && link->size)
|
||||
escape_href(ob, link->data, link->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size);
|
||||
}
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
if (content && content->size) bufput(ob, content->data, content->size);
|
||||
BUFPUTSL(ob, "</a>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6);
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<li>");
|
||||
if (text) {
|
||||
size_t size = text->size;
|
||||
while (size && text->data[size - 1] == '\n')
|
||||
size--;
|
||||
|
||||
bufput(ob, text->data, size);
|
||||
}
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
size_t i = 0;
|
||||
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (!text || !text->size)
|
||||
return;
|
||||
|
||||
while (i < text->size && isspace(text->data[i])) i++;
|
||||
|
||||
if (i == text->size)
|
||||
return;
|
||||
|
||||
BUFPUTSL(ob, "<p>");
|
||||
if (options->flags & HTML_HARD_WRAP) {
|
||||
size_t org;
|
||||
while (i < text->size) {
|
||||
org = i;
|
||||
while (i < text->size && text->data[i] != '\n')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text->data + org, i - org);
|
||||
|
||||
/*
|
||||
* do not insert a line break if this newline
|
||||
* is the last character on the paragraph
|
||||
*/
|
||||
if (i >= text->size - 1)
|
||||
break;
|
||||
|
||||
rndr_linebreak(ob, opaque);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
bufput(ob, &text->data[i], text->size - i);
|
||||
}
|
||||
BUFPUTSL(ob, "</p>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
size_t org, sz;
|
||||
if (!text) return;
|
||||
sz = text->size;
|
||||
while (sz > 0 && text->data[sz - 1] == '\n') sz--;
|
||||
org = 0;
|
||||
while (org < sz && text->data[org] == '\n') org++;
|
||||
if (org >= sz) return;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, text->data + org, sz - org);
|
||||
bufputc(ob, '\n');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<strong><em>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em></strong>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_hrule(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufputs(ob, USE_XHTML(options) ? "<hr/>\n" : "<hr>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (!link || !link->size) return 0;
|
||||
|
||||
BUFPUTSL(ob, "<img src=\"");
|
||||
escape_href(ob, link->data, link->size);
|
||||
BUFPUTSL(ob, "\" alt=\"");
|
||||
|
||||
if (alt && alt->size)
|
||||
escape_html(ob, alt->data, alt->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size); }
|
||||
|
||||
bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque,
|
||||
char* tagname, char** whitelist, int tagtype)
|
||||
{
|
||||
size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0;
|
||||
struct buf *attr;
|
||||
struct buf *value;
|
||||
char c;
|
||||
|
||||
bufputc(ob, '<');
|
||||
|
||||
if(tagtype == HTML_TAG_CLOSE) {
|
||||
bufputc(ob, '/');
|
||||
bufputs(ob, tagname);
|
||||
bufputc(ob, '>');
|
||||
return;
|
||||
}
|
||||
|
||||
bufputs(ob, tagname);
|
||||
i = 1 + strlen(tagname);
|
||||
|
||||
attr = bufnew(16);
|
||||
value = bufnew(16);
|
||||
|
||||
for(; i < text->size && !done; i++) {
|
||||
c = text->data[i];
|
||||
done = 0;
|
||||
reset = 0;
|
||||
done_attr = 0;
|
||||
|
||||
switch(c) {
|
||||
case '>':
|
||||
done = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if(!seen_equals) {
|
||||
reset = 1;
|
||||
} else if(!in_str) {
|
||||
in_str = c;
|
||||
} else if(in_str == c) {
|
||||
in_str = 0;
|
||||
done_attr = 1;
|
||||
} else {
|
||||
bufputc(value, c);
|
||||
}
|
||||
break;
|
||||
case ' ':
|
||||
if (in_str) {
|
||||
bufputc(value, ' ');
|
||||
} else {
|
||||
reset = 1;
|
||||
}
|
||||
break;
|
||||
case '=':
|
||||
if(seen_equals) {
|
||||
reset = 1;
|
||||
break;
|
||||
}
|
||||
seen_equals = 1;
|
||||
break;
|
||||
default:
|
||||
if(seen_equals && in_str || !seen_equals) {
|
||||
bufputc(seen_equals ? value : attr, c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if(done_attr) {
|
||||
int valid = 0;
|
||||
for(z = 0; whitelist[z]; z++) {
|
||||
if(strlen(whitelist[z]) != attr->size) {
|
||||
continue;
|
||||
}
|
||||
for(x = 0; x < attr->size; x++) {
|
||||
if(tolower(whitelist[z][x]) != tolower(attr->data[x])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(x == attr->size) {
|
||||
valid = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(valid && value->size && attr->size) {
|
||||
bufputc(ob, ' ');
|
||||
escape_html(ob, attr->data, attr->size);
|
||||
bufputs(ob, "=\"");
|
||||
escape_html(ob, value->data, value->size);
|
||||
bufputc(ob, '"');
|
||||
}
|
||||
reset = 1;
|
||||
}
|
||||
|
||||
if(reset) {
|
||||
seen_equals = 0;
|
||||
in_str = 0;
|
||||
bufreset(attr);
|
||||
bufreset(value);
|
||||
}
|
||||
}
|
||||
|
||||
bufrelease(attr);
|
||||
bufrelease(value);
|
||||
|
||||
bufputc(ob, '>');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
char** whitelist = options->html_element_whitelist;
|
||||
int i, tagtype;
|
||||
|
||||
/* Items on the whitelist ignore all other flags and just output */
|
||||
if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) {
|
||||
for (i = 0; whitelist[i]; i++) {
|
||||
tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]);
|
||||
if (tagtype != HTML_TAG_NONE) {
|
||||
rndr_html_tag(ob, text, opaque,
|
||||
whitelist[i],
|
||||
options->html_attr_whitelist,
|
||||
tagtype);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES
|
||||
* It doens't see if there are any valid tags, just escape all of them. */
|
||||
if((options->flags & HTML_ESCAPE) != 0) {
|
||||
escape_html(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((options->flags & HTML_SKIP_HTML) != 0)
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_STYLE) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "style"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_LINKS) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "a"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_IMAGES) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "img"))
|
||||
return 1;
|
||||
|
||||
bufput(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<table><thead>\n");
|
||||
if (header)
|
||||
bufput(ob, header->data, header->size);
|
||||
BUFPUTSL(ob, "</thead><tbody>\n");
|
||||
if (body)
|
||||
bufput(ob, body->data, body->size);
|
||||
BUFPUTSL(ob, "</tbody></table>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<tr>\n");
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</tr>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span)
|
||||
{
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "<th");
|
||||
} else {
|
||||
BUFPUTSL(ob, "<td");
|
||||
}
|
||||
|
||||
if (col_span > 1) {
|
||||
bufprintf(ob, " colspan=\"%d\" ", col_span);
|
||||
}
|
||||
|
||||
switch (flags & MKD_TABLE_ALIGNMASK) {
|
||||
case MKD_TABLE_ALIGN_CENTER:
|
||||
BUFPUTSL(ob, " align=\"center\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_L:
|
||||
BUFPUTSL(ob, " align=\"left\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_R:
|
||||
BUFPUTSL(ob, " align=\"right\">");
|
||||
break;
|
||||
|
||||
default:
|
||||
BUFPUTSL(ob, ">");
|
||||
}
|
||||
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "</th>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob, "</td>\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<sup>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</sup>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
}
|
||||
|
||||
static void
|
||||
toc_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
/* set the level offset if this is the first header
|
||||
* we're parsing for the document */
|
||||
if (options->toc_data.current_level == 0) {
|
||||
BUFPUTSL(ob, "<div class=\"toc\">\n");
|
||||
options->toc_data.level_offset = level - 1;
|
||||
}
|
||||
level -= options->toc_data.level_offset;
|
||||
|
||||
if (level > options->toc_data.current_level) {
|
||||
while (level > options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "<ul>\n<li>\n");
|
||||
options->toc_data.current_level++;
|
||||
}
|
||||
} else if (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
while (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</ul>\n</li>\n");
|
||||
options->toc_data.current_level--;
|
||||
}
|
||||
BUFPUTSL(ob,"<li>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob,"</li>\n<li>\n");
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"#");
|
||||
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</a>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
if (content && content->size)
|
||||
bufput(ob, content->data, content->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
reset_toc(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
memset(&(options->toc_data), 0, sizeof(options->toc_data));
|
||||
}
|
||||
|
||||
static void
|
||||
toc_finalize(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bool has_toc = false;
|
||||
while (options->toc_data.current_level > 0) {
|
||||
BUFPUTSL(ob, "</li>\n</ul>\n");
|
||||
options->toc_data.current_level--;
|
||||
has_toc = true;
|
||||
}
|
||||
if(has_toc) {
|
||||
BUFPUTSL(ob, "</div>\n");
|
||||
}
|
||||
reset_toc(ob, opaque);
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_header,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_link,
|
||||
NULL,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
toc_finalize,
|
||||
};
|
||||
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = HTML_TOC | HTML_SKIP_HTML;
|
||||
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
rndr_blockcode,
|
||||
rndr_blockquote,
|
||||
rndr_raw_block,
|
||||
rndr_header,
|
||||
rndr_hrule,
|
||||
rndr_list,
|
||||
rndr_listitem,
|
||||
rndr_paragraph,
|
||||
rndr_table,
|
||||
rndr_tablerow,
|
||||
rndr_tablecell,
|
||||
|
||||
rndr_autolink,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
rndr_image,
|
||||
rndr_linebreak,
|
||||
rndr_link,
|
||||
rndr_raw_html,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
rndr_normal_text,
|
||||
|
||||
NULL,
|
||||
reset_toc,
|
||||
};
|
||||
|
||||
/* Prepare the options pointer */
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = render_flags;
|
||||
|
||||
/* Prepare the callbacks */
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
|
||||
if (render_flags & HTML_SKIP_IMAGES)
|
||||
callbacks->image = NULL;
|
||||
|
||||
if (render_flags & HTML_SKIP_LINKS) {
|
||||
callbacks->link = NULL;
|
||||
callbacks->autolink = NULL;
|
||||
}
|
||||
|
||||
if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE)
|
||||
callbacks->blockhtml = NULL;
|
||||
}
|
83
SnudownTest/html.h
Normal file
83
SnudownTest/html.h
Normal file
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_HTML_H
|
||||
#define UPSKIRT_HTML_H
|
||||
|
||||
#include "markdown.h"
|
||||
#include "buffer.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct html_renderopt {
|
||||
struct {
|
||||
int header_count;
|
||||
int current_level;
|
||||
int level_offset;
|
||||
} toc_data;
|
||||
|
||||
char* toc_id_prefix;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
char** html_element_whitelist;
|
||||
char** html_attr_whitelist;
|
||||
|
||||
/* extra callbacks */
|
||||
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
HTML_SKIP_HTML = (1 << 0),
|
||||
HTML_SKIP_STYLE = (1 << 1),
|
||||
HTML_SKIP_IMAGES = (1 << 2),
|
||||
HTML_SKIP_LINKS = (1 << 3),
|
||||
HTML_EXPAND_TABS = (1 << 4),
|
||||
HTML_SAFELINK = (1 << 5),
|
||||
HTML_TOC = (1 << 6),
|
||||
HTML_HARD_WRAP = (1 << 7),
|
||||
HTML_USE_XHTML = (1 << 8),
|
||||
HTML_ESCAPE = (1 << 9),
|
||||
HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10),
|
||||
} html_render_mode;
|
||||
|
||||
typedef enum {
|
||||
HTML_TAG_NONE = 0,
|
||||
HTML_TAG_OPEN,
|
||||
HTML_TAG_CLOSE,
|
||||
} html_tag;
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
|
||||
|
||||
extern void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
||||
|
||||
extern void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
|
||||
|
||||
extern void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
37
SnudownTest/html/houdini.h
Normal file
37
SnudownTest/html/houdini.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
#ifndef HOUDINI_H__
|
||||
#define HOUDINI_H__
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HOUDINI_USE_LOCALE
|
||||
# define _isxdigit(c) isxdigit(c)
|
||||
# define _isdigit(c) isdigit(c)
|
||||
#else
|
||||
/*
|
||||
* Helper _isdigit methods -- do not trust the current locale
|
||||
* */
|
||||
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
||||
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
||||
#endif
|
||||
|
||||
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
||||
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
116
SnudownTest/html/houdini_href_e.c
Normal file
116
SnudownTest/html/houdini_href_e.c
Normal file
|
@ -0,0 +1,116 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
||||
|
||||
/*
|
||||
* The following characters will not be escaped:
|
||||
*
|
||||
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
||||
*
|
||||
* Note that this character set is the addition of:
|
||||
*
|
||||
* - The characters which are safe to be in an URL
|
||||
* - The characters which are *not* safe to be in
|
||||
* an URL because they are RESERVED characters.
|
||||
*
|
||||
* We asume (lazily) that any RESERVED char that
|
||||
* appears inside an URL is actually meant to
|
||||
* have its native function (i.e. as an URL
|
||||
* component/separator) and hence needs no escaping.
|
||||
*
|
||||
* There are two exceptions: the chacters & (amp)
|
||||
* and ' (single quote) do not appear in the table.
|
||||
* They are meant to appear in the URL as components,
|
||||
* yet they require special HTML-entity escaping
|
||||
* to generate valid HTML markup.
|
||||
*
|
||||
* All other characters will be escaped to %XX.
|
||||
*
|
||||
*/
|
||||
static const char HREF_SAFE[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
static const char hex_chars[] = "0123456789ABCDEF";
|
||||
size_t i = 0, org;
|
||||
char hex_str[3];
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
hex_str[0] = '%';
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
/* Skip by characters that don't need special
|
||||
* processing */
|
||||
while (i < size && HREF_SAFE[src[i]] == 1)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* throw out control characters */
|
||||
if (HREF_SAFE[src[i]] == 2) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (src[i]) {
|
||||
/* amp appears all the time in URLs, but needs
|
||||
* HTML-entity escaping to be inside an href */
|
||||
case '&':
|
||||
BUFPUTSL(ob, "&");
|
||||
break;
|
||||
|
||||
/* the single quote is a valid URL character
|
||||
* according to the standard; it needs HTML
|
||||
* entity escaping too */
|
||||
case '\'':
|
||||
BUFPUTSL(ob, "'");
|
||||
break;
|
||||
|
||||
/* the space can be escaped to %20 or a plus
|
||||
* sign. we're going with the generic escape
|
||||
* for now. the plus thing is more commonly seen
|
||||
* when building GET strings */
|
||||
#if 0
|
||||
case ' ':
|
||||
bufputc(ob, '+');
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* every other character goes with a %XX escaping */
|
||||
default:
|
||||
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
||||
hex_str[2] = hex_chars[src[i] & 0xF];
|
||||
bufput(ob, hex_str, 3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
87
SnudownTest/html/houdini_html_e.c
Normal file
87
SnudownTest/html/houdini_html_e.c
Normal file
|
@ -0,0 +1,87 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
||||
|
||||
/**
|
||||
* According to the OWASP rules:
|
||||
*
|
||||
* & --> &
|
||||
* < --> <
|
||||
* > --> >
|
||||
* " --> "
|
||||
* ' --> ' ' is not recommended
|
||||
* / --> / forward slash is included as it helps end an HTML entity
|
||||
*
|
||||
*/
|
||||
static const char HTML_ESCAPE_TABLE[] = {
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char *HTML_ESCAPES[] = {
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"'",
|
||||
"/",
|
||||
"<",
|
||||
">",
|
||||
"", // throw out control characters
|
||||
};
|
||||
|
||||
void
|
||||
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
||||
{
|
||||
size_t i = 0, org, esc = 0;
|
||||
|
||||
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
/* The forward slash is only escaped in secure mode */
|
||||
if (src[i] == '/' && !secure) {
|
||||
bufputc(ob, '/');
|
||||
} else if (HTML_ESCAPE_TABLE[src[i]] == 7) {
|
||||
/* skip control characters */
|
||||
} else {
|
||||
bufputs(ob, HTML_ESCAPES[esc]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
||||
{
|
||||
houdini_escape_html0(ob, src, size, 1);
|
||||
}
|
||||
|
790
SnudownTest/html/html.c
Normal file
790
SnudownTest/html/html.c
Normal file
|
@ -0,0 +1,790 @@
|
|||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
#define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML)
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
||||
{
|
||||
size_t i;
|
||||
int closed = 0;
|
||||
|
||||
if (tag_size < 3 || tag_data[0] != '<')
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
i = 1;
|
||||
|
||||
if (tag_data[i] == '/') {
|
||||
closed = 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
for (; i < tag_size; ++i, ++tagname) {
|
||||
if (*tagname == 0)
|
||||
break;
|
||||
|
||||
if (tag_data[i] != *tagname)
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
if (i == tag_size)
|
||||
return HTML_TAG_NONE;
|
||||
|
||||
if (isspace(tag_data[i]) || tag_data[i] == '>')
|
||||
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
||||
|
||||
return HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_html0(ob, source, length, 0);
|
||||
}
|
||||
|
||||
static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
houdini_escape_href(ob, source, length);
|
||||
}
|
||||
|
||||
/********************
|
||||
* GENERIC RENDERER *
|
||||
********************/
|
||||
static int
|
||||
rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
uint8_t offset = 0;
|
||||
|
||||
if (!link || !link->size)
|
||||
return 0;
|
||||
|
||||
if ((options->flags & HTML_SAFELINK) != 0 &&
|
||||
!sd_autolink_issafe(link->data, link->size) &&
|
||||
type != MKDA_EMAIL)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
if (type == MKDA_EMAIL)
|
||||
BUFPUTSL(ob, "mailto:");
|
||||
escape_href(ob, link->data + offset, link->size - offset);
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
/*
|
||||
* Pretty printing: if we get an email address as
|
||||
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
|
||||
* want to print the `mailto:` prefix
|
||||
*/
|
||||
if (bufprefix(link, "mailto:") == 0) {
|
||||
escape_html(ob, link->data + 7, link->size - 7);
|
||||
} else {
|
||||
escape_html(ob, link->data, link->size);
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "</a>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (lang && lang->size) {
|
||||
size_t i, cls;
|
||||
BUFPUTSL(ob, "<pre><code class=\"");
|
||||
|
||||
for (i = 0, cls = 0; i < lang->size; ++i, ++cls) {
|
||||
while (i < lang->size && isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (i < lang->size) {
|
||||
size_t org = i;
|
||||
while (i < lang->size && !isspace(lang->data[i]))
|
||||
i++;
|
||||
|
||||
if (lang->data[org] == '.')
|
||||
org++;
|
||||
|
||||
if (cls) bufputc(ob, ' ');
|
||||
escape_html(ob, lang->data + org, i - org);
|
||||
}
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "\">");
|
||||
} else
|
||||
BUFPUTSL(ob, "<pre><code>");
|
||||
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
|
||||
BUFPUTSL(ob, "</code></pre>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<blockquote>\n");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</blockquote>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<code>");
|
||||
if (text) escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</code>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<del>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</del>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size)
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<strong>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</strong>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<em>");
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_linebreak(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bufputs(ob, USE_XHTML(options) ? "<br/>\n" : "<br>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (ob->size)
|
||||
bufputc(ob, '\n');
|
||||
|
||||
if (options->flags & HTML_TOC) {
|
||||
bufprintf(ob, "<h%d id=\"", level);
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
} else {
|
||||
bufprintf(ob, "<h%d>", level);
|
||||
}
|
||||
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufprintf(ob, "</h%d>\n", level);
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size))
|
||||
return 0;
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"");
|
||||
|
||||
if (link && link->size)
|
||||
escape_href(ob, link->data, link->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size);
|
||||
}
|
||||
|
||||
if (options->link_attributes) {
|
||||
bufputc(ob, '\"');
|
||||
options->link_attributes(ob, link, opaque);
|
||||
bufputc(ob, '>');
|
||||
} else {
|
||||
BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
if (content && content->size) bufput(ob, content->data, content->size);
|
||||
BUFPUTSL(ob, "</a>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
|
||||
if (text) bufput(ob, text->data, text->size);
|
||||
bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6);
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<li>");
|
||||
if (text) {
|
||||
size_t size = text->size;
|
||||
while (size && text->data[size - 1] == '\n')
|
||||
size--;
|
||||
|
||||
bufput(ob, text->data, size);
|
||||
}
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
size_t i = 0;
|
||||
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
|
||||
if (!text || !text->size)
|
||||
return;
|
||||
|
||||
while (i < text->size && isspace(text->data[i])) i++;
|
||||
|
||||
if (i == text->size)
|
||||
return;
|
||||
|
||||
BUFPUTSL(ob, "<p>");
|
||||
if (options->flags & HTML_HARD_WRAP) {
|
||||
size_t org;
|
||||
while (i < text->size) {
|
||||
org = i;
|
||||
while (i < text->size && text->data[i] != '\n')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text->data + org, i - org);
|
||||
|
||||
/*
|
||||
* do not insert a line break if this newline
|
||||
* is the last character on the paragraph
|
||||
*/
|
||||
if (i >= text->size - 1)
|
||||
break;
|
||||
|
||||
rndr_linebreak(ob, opaque);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
bufput(ob, &text->data[i], text->size - i);
|
||||
}
|
||||
BUFPUTSL(ob, "</p>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
size_t org, sz;
|
||||
if (!text) return;
|
||||
sz = text->size;
|
||||
while (sz > 0 && text->data[sz - 1] == '\n') sz--;
|
||||
org = 0;
|
||||
while (org < sz && text->data[org] == '\n') org++;
|
||||
if (org >= sz) return;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufput(ob, text->data + org, sz - org);
|
||||
bufputc(ob, '\n');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<strong><em>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</em></strong>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_hrule(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
bufputs(ob, USE_XHTML(options) ? "<hr/>\n" : "<hr>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
if (!link || !link->size) return 0;
|
||||
|
||||
BUFPUTSL(ob, "<img src=\"");
|
||||
escape_href(ob, link->data, link->size);
|
||||
BUFPUTSL(ob, "\" alt=\"");
|
||||
|
||||
if (alt && alt->size)
|
||||
escape_html(ob, alt->data, alt->size);
|
||||
|
||||
if (title && title->size) {
|
||||
BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size); }
|
||||
|
||||
bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque,
|
||||
char* tagname, char** whitelist, int tagtype)
|
||||
{
|
||||
size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0;
|
||||
struct buf *attr;
|
||||
struct buf *value;
|
||||
char c;
|
||||
|
||||
bufputc(ob, '<');
|
||||
|
||||
if(tagtype == HTML_TAG_CLOSE) {
|
||||
bufputc(ob, '/');
|
||||
bufputs(ob, tagname);
|
||||
bufputc(ob, '>');
|
||||
return;
|
||||
}
|
||||
|
||||
bufputs(ob, tagname);
|
||||
i = 1 + strlen(tagname);
|
||||
|
||||
attr = bufnew(16);
|
||||
value = bufnew(16);
|
||||
|
||||
for(; i < text->size && !done; i++) {
|
||||
c = text->data[i];
|
||||
done = 0;
|
||||
reset = 0;
|
||||
done_attr = 0;
|
||||
|
||||
switch(c) {
|
||||
case '>':
|
||||
done = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if(!seen_equals) {
|
||||
reset = 1;
|
||||
} else if(!in_str) {
|
||||
in_str = c;
|
||||
} else if(in_str == c) {
|
||||
in_str = 0;
|
||||
done_attr = 1;
|
||||
} else {
|
||||
bufputc(value, c);
|
||||
}
|
||||
break;
|
||||
case ' ':
|
||||
if (in_str) {
|
||||
bufputc(value, ' ');
|
||||
} else {
|
||||
reset = 1;
|
||||
}
|
||||
break;
|
||||
case '=':
|
||||
if(seen_equals) {
|
||||
reset = 1;
|
||||
break;
|
||||
}
|
||||
seen_equals = 1;
|
||||
break;
|
||||
default:
|
||||
if(seen_equals && in_str || !seen_equals) {
|
||||
bufputc(seen_equals ? value : attr, c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if(done_attr) {
|
||||
int valid = 0;
|
||||
for(z = 0; whitelist[z]; z++) {
|
||||
if(strlen(whitelist[z]) != attr->size) {
|
||||
continue;
|
||||
}
|
||||
for(x = 0; x < attr->size; x++) {
|
||||
if(tolower(whitelist[z][x]) != tolower(attr->data[x])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(x == attr->size) {
|
||||
valid = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(valid && value->size && attr->size) {
|
||||
bufputc(ob, ' ');
|
||||
escape_html(ob, attr->data, attr->size);
|
||||
bufputs(ob, "=\"");
|
||||
escape_html(ob, value->data, value->size);
|
||||
bufputc(ob, '"');
|
||||
}
|
||||
reset = 1;
|
||||
}
|
||||
|
||||
if(reset) {
|
||||
seen_equals = 0;
|
||||
in_str = 0;
|
||||
bufreset(attr);
|
||||
bufreset(value);
|
||||
}
|
||||
}
|
||||
|
||||
bufrelease(attr);
|
||||
bufrelease(value);
|
||||
|
||||
bufputc(ob, '>');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
char** whitelist = options->html_element_whitelist;
|
||||
int i, tagtype;
|
||||
|
||||
/* Items on the whitelist ignore all other flags and just output */
|
||||
if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) {
|
||||
for (i = 0; whitelist[i]; i++) {
|
||||
tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]);
|
||||
if (tagtype != HTML_TAG_NONE) {
|
||||
rndr_html_tag(ob, text, opaque,
|
||||
whitelist[i],
|
||||
options->html_attr_whitelist,
|
||||
tagtype);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES
|
||||
* It doens't see if there are any valid tags, just escape all of them. */
|
||||
if((options->flags & HTML_ESCAPE) != 0) {
|
||||
escape_html(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((options->flags & HTML_SKIP_HTML) != 0)
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_STYLE) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "style"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_LINKS) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "a"))
|
||||
return 1;
|
||||
|
||||
if ((options->flags & HTML_SKIP_IMAGES) != 0 &&
|
||||
sdhtml_is_tag(text->data, text->size, "img"))
|
||||
return 1;
|
||||
|
||||
bufput(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
|
||||
{
|
||||
if (ob->size) bufputc(ob, '\n');
|
||||
BUFPUTSL(ob, "<table><thead>\n");
|
||||
if (header)
|
||||
bufput(ob, header->data, header->size);
|
||||
BUFPUTSL(ob, "</thead><tbody>\n");
|
||||
if (body)
|
||||
bufput(ob, body->data, body->size);
|
||||
BUFPUTSL(ob, "</tbody></table>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
BUFPUTSL(ob, "<tr>\n");
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</tr>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span)
|
||||
{
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "<th");
|
||||
} else {
|
||||
BUFPUTSL(ob, "<td");
|
||||
}
|
||||
|
||||
if (col_span > 1) {
|
||||
bufprintf(ob, " colspan=\"%d\" ", col_span);
|
||||
}
|
||||
|
||||
switch (flags & MKD_TABLE_ALIGNMASK) {
|
||||
case MKD_TABLE_ALIGN_CENTER:
|
||||
BUFPUTSL(ob, " align=\"center\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_L:
|
||||
BUFPUTSL(ob, " align=\"left\">");
|
||||
break;
|
||||
|
||||
case MKD_TABLE_ALIGN_R:
|
||||
BUFPUTSL(ob, " align=\"right\">");
|
||||
break;
|
||||
|
||||
default:
|
||||
BUFPUTSL(ob, ">");
|
||||
}
|
||||
|
||||
if (text)
|
||||
bufput(ob, text->data, text->size);
|
||||
|
||||
if (flags & MKD_TABLE_HEADER) {
|
||||
BUFPUTSL(ob, "</th>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob, "</td>\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (!text || !text->size) return 0;
|
||||
BUFPUTSL(ob, "<sup>");
|
||||
bufput(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</sup>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
|
||||
{
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
}
|
||||
|
||||
static void
|
||||
toc_header(struct buf *ob, const struct buf *text, int level, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
/* set the level offset if this is the first header
|
||||
* we're parsing for the document */
|
||||
if (options->toc_data.current_level == 0) {
|
||||
BUFPUTSL(ob, "<div class=\"toc\">\n");
|
||||
options->toc_data.level_offset = level - 1;
|
||||
}
|
||||
level -= options->toc_data.level_offset;
|
||||
|
||||
if (level > options->toc_data.current_level) {
|
||||
while (level > options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "<ul>\n<li>\n");
|
||||
options->toc_data.current_level++;
|
||||
}
|
||||
} else if (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</li>\n");
|
||||
while (level < options->toc_data.current_level) {
|
||||
BUFPUTSL(ob, "</ul>\n</li>\n");
|
||||
options->toc_data.current_level--;
|
||||
}
|
||||
BUFPUTSL(ob,"<li>\n");
|
||||
} else {
|
||||
BUFPUTSL(ob,"</li>\n<li>\n");
|
||||
}
|
||||
|
||||
BUFPUTSL(ob, "<a href=\"#");
|
||||
|
||||
if (options->toc_id_prefix) {
|
||||
bufputs(ob, options->toc_id_prefix);
|
||||
}
|
||||
|
||||
bufprintf(ob, "toc_%d\">", options->toc_data.header_count++);
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
BUFPUTSL(ob, "</a>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
|
||||
{
|
||||
if (content && content->size)
|
||||
bufput(ob, content->data, content->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
reset_toc(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
|
||||
memset(&(options->toc_data), 0, sizeof(options->toc_data));
|
||||
}
|
||||
|
||||
static void
|
||||
toc_finalize(struct buf *ob, void *opaque)
|
||||
{
|
||||
struct html_renderopt *options = opaque;
|
||||
bool has_toc = false;
|
||||
while (options->toc_data.current_level > 0) {
|
||||
BUFPUTSL(ob, "</li>\n</ul>\n");
|
||||
options->toc_data.current_level--;
|
||||
has_toc = true;
|
||||
}
|
||||
if(has_toc) {
|
||||
BUFPUTSL(ob, "</div>\n");
|
||||
}
|
||||
reset_toc(ob, opaque);
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_header,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_link,
|
||||
NULL,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
toc_finalize,
|
||||
};
|
||||
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = HTML_TOC | HTML_SKIP_HTML;
|
||||
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
}
|
||||
|
||||
void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags)
|
||||
{
|
||||
static const struct sd_callbacks cb_default = {
|
||||
rndr_blockcode,
|
||||
rndr_blockquote,
|
||||
rndr_raw_block,
|
||||
rndr_header,
|
||||
rndr_hrule,
|
||||
rndr_list,
|
||||
rndr_listitem,
|
||||
rndr_paragraph,
|
||||
rndr_table,
|
||||
rndr_tablerow,
|
||||
rndr_tablecell,
|
||||
|
||||
rndr_autolink,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
rndr_image,
|
||||
rndr_linebreak,
|
||||
rndr_link,
|
||||
rndr_raw_html,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
|
||||
NULL,
|
||||
rndr_normal_text,
|
||||
|
||||
NULL,
|
||||
reset_toc,
|
||||
};
|
||||
|
||||
/* Prepare the options pointer */
|
||||
memset(options, 0x0, sizeof(struct html_renderopt));
|
||||
options->flags = render_flags;
|
||||
|
||||
/* Prepare the callbacks */
|
||||
memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
|
||||
|
||||
if (render_flags & HTML_SKIP_IMAGES)
|
||||
callbacks->image = NULL;
|
||||
|
||||
if (render_flags & HTML_SKIP_LINKS) {
|
||||
callbacks->link = NULL;
|
||||
callbacks->autolink = NULL;
|
||||
}
|
||||
|
||||
if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE)
|
||||
callbacks->blockhtml = NULL;
|
||||
}
|
83
SnudownTest/html/html.h
Normal file
83
SnudownTest/html/html.h
Normal file
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_HTML_H
|
||||
#define UPSKIRT_HTML_H
|
||||
|
||||
#include "markdown.h"
|
||||
#include "buffer.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct html_renderopt {
|
||||
struct {
|
||||
int header_count;
|
||||
int current_level;
|
||||
int level_offset;
|
||||
} toc_data;
|
||||
|
||||
char* toc_id_prefix;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
char** html_element_whitelist;
|
||||
char** html_attr_whitelist;
|
||||
|
||||
/* extra callbacks */
|
||||
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
HTML_SKIP_HTML = (1 << 0),
|
||||
HTML_SKIP_STYLE = (1 << 1),
|
||||
HTML_SKIP_IMAGES = (1 << 2),
|
||||
HTML_SKIP_LINKS = (1 << 3),
|
||||
HTML_EXPAND_TABS = (1 << 4),
|
||||
HTML_SAFELINK = (1 << 5),
|
||||
HTML_TOC = (1 << 6),
|
||||
HTML_HARD_WRAP = (1 << 7),
|
||||
HTML_USE_XHTML = (1 << 8),
|
||||
HTML_ESCAPE = (1 << 9),
|
||||
HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10),
|
||||
} html_render_mode;
|
||||
|
||||
typedef enum {
|
||||
HTML_TAG_NONE = 0,
|
||||
HTML_TAG_OPEN,
|
||||
HTML_TAG_CLOSE,
|
||||
} html_tag;
|
||||
|
||||
int
|
||||
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
|
||||
|
||||
extern void
|
||||
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
||||
|
||||
extern void
|
||||
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
|
||||
|
||||
extern void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
389
SnudownTest/html/html_smartypants.c
Normal file
389
SnudownTest/html/html_smartypants.c
Normal file
|
@ -0,0 +1,389 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
struct smartypants_data {
|
||||
int in_squote;
|
||||
int in_dquote;
|
||||
};
|
||||
|
||||
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
|
||||
static size_t (*smartypants_cb_ptrs[])
|
||||
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
||||
{
|
||||
NULL, /* 0 */
|
||||
smartypants_cb__dash, /* 1 */
|
||||
smartypants_cb__parens, /* 2 */
|
||||
smartypants_cb__squote, /* 3 */
|
||||
smartypants_cb__dquote, /* 4 */
|
||||
smartypants_cb__amp, /* 5 */
|
||||
smartypants_cb__period, /* 6 */
|
||||
smartypants_cb__number, /* 7 */
|
||||
smartypants_cb__ltag, /* 8 */
|
||||
smartypants_cb__backtick, /* 9 */
|
||||
smartypants_cb__escape, /* 10 */
|
||||
};
|
||||
|
||||
static const uint8_t smartypants_cb_chars[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
||||
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
||||
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static inline int
|
||||
word_boundary(uint8_t c)
|
||||
{
|
||||
return c == 0 || isspace(c) || ispunct(c);
|
||||
}
|
||||
|
||||
static int
|
||||
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
||||
{
|
||||
char ent[8];
|
||||
|
||||
if (*is_open && !word_boundary(next_char))
|
||||
return 0;
|
||||
|
||||
if (!(*is_open) && !word_boundary(previous_char))
|
||||
return 0;
|
||||
|
||||
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
||||
*is_open = !(*is_open);
|
||||
bufputs(ob, ent);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
|
||||
if (t1 == '\'') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
||||
(size == 3 || word_boundary(text[2]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size >= 3) {
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (((t1 == 'r' && t2 == 'e') ||
|
||||
(t1 == 'l' && t2 == 'l') ||
|
||||
(t1 == 'v' && t2 == 'e')) &&
|
||||
(size == 4 || word_boundary(text[3]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
||||
return 0;
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (t1 == 'c' && t2 == ')') {
|
||||
BUFPUTSL(ob, "©");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (t1 == 'r' && t2 == ')') {
|
||||
BUFPUTSL(ob, "®");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
||||
BUFPUTSL(ob, "™");
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
||||
BUFPUTSL(ob, "—");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 2 && text[1] == '-') {
|
||||
BUFPUTSL(ob, "–");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
||||
return 5;
|
||||
}
|
||||
|
||||
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
||||
return 3;
|
||||
|
||||
bufputc(ob, '&');
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 4;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2 && text[1] == '`') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (word_boundary(previous_char) && size >= 3) {
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
||||
if (size == 3 || word_boundary(text[3])) {
|
||||
BUFPUTSL(ob, "½");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
||||
BUFPUTSL(ob, "¼");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
||||
BUFPUTSL(ob, "¾");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
||||
BUFPUTSL(ob, """);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
static const char *skip_tags[] = {
|
||||
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
||||
};
|
||||
static const size_t skip_tags_count = 8;
|
||||
|
||||
size_t tag, i = 0;
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
|
||||
for (tag = 0; tag < skip_tags_count; ++tag) {
|
||||
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tag < skip_tags_count) {
|
||||
for (;;) {
|
||||
while (i < size && text[i] != '<')
|
||||
i++;
|
||||
|
||||
if (i == size)
|
||||
break;
|
||||
|
||||
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
|
||||
break;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
}
|
||||
|
||||
bufput(ob, text, i + 1);
|
||||
return i;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size < 2)
|
||||
return 0;
|
||||
|
||||
switch (text[1]) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '.':
|
||||
case '-':
|
||||
case '`':
|
||||
bufputc(ob, text[1]);
|
||||
return 1;
|
||||
|
||||
default:
|
||||
bufputc(ob, '\\');
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static struct {
|
||||
uint8_t c0;
|
||||
const uint8_t *pattern;
|
||||
const uint8_t *entity;
|
||||
int skip;
|
||||
} smartypants_subs[] = {
|
||||
{ '\'', "'s>", "’", 0 },
|
||||
{ '\'', "'t>", "’", 0 },
|
||||
{ '\'', "'re>", "’", 0 },
|
||||
{ '\'', "'ll>", "’", 0 },
|
||||
{ '\'', "'ve>", "’", 0 },
|
||||
{ '\'', "'m>", "’", 0 },
|
||||
{ '\'', "'d>", "’", 0 },
|
||||
{ '-', "--", "—", 1 },
|
||||
{ '-', "<->", "–", 0 },
|
||||
{ '.', "...", "…", 2 },
|
||||
{ '.', ". . .", "…", 4 },
|
||||
{ '(', "(c)", "©", 2 },
|
||||
{ '(', "(r)", "®", 2 },
|
||||
{ '(', "(tm)", "™", 3 },
|
||||
{ '3', "<3/4>", "¾", 2 },
|
||||
{ '3', "<3/4ths>", "¾", 2 },
|
||||
{ '1', "<1/2>", "½", 2 },
|
||||
{ '1', "<1/4>", "¼", 2 },
|
||||
{ '1', "<1/4th>", "¼", 2 },
|
||||
{ '&', "�", 0, 3 },
|
||||
};
|
||||
#endif
|
||||
|
||||
void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
struct smartypants_data smrt = {0, 0};
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
bufgrow(ob, size);
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
size_t org;
|
||||
uint8_t action = 0;
|
||||
|
||||
org = i;
|
||||
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text + org, i - org);
|
||||
|
||||
if (i < size) {
|
||||
i += smartypants_cb_ptrs[(int)action]
|
||||
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
25
SnudownTest/html_block_names.txt
Normal file
25
SnudownTest/html_block_names.txt
Normal file
|
@ -0,0 +1,25 @@
|
|||
##
|
||||
p
|
||||
dl
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
ol
|
||||
ul
|
||||
del
|
||||
div
|
||||
ins
|
||||
pre
|
||||
form
|
||||
math
|
||||
table
|
||||
figure
|
||||
iframe
|
||||
script
|
||||
style
|
||||
fieldset
|
||||
noscript
|
||||
blockquote
|
206
SnudownTest/html_blocks.h
Normal file
206
SnudownTest/html_blocks.h
Normal file
|
@ -0,0 +1,206 @@
|
|||
/* C code produced by gperf version 3.0.3 */
|
||||
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
|
||||
/* Computed positions: -k'1-2' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
||||
#endif
|
||||
|
||||
/* maximum key range = 37, duplicates = 0 */
|
||||
|
||||
#ifndef GPERF_DOWNCASE
|
||||
#define GPERF_DOWNCASE 1
|
||||
static unsigned char gperf_downcase[256] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
||||
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
||||
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
||||
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
||||
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
||||
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
||||
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
||||
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
||||
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
||||
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
||||
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
||||
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
||||
255
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef GPERF_CASE_STRNCMP
|
||||
#define GPERF_CASE_STRNCMP 1
|
||||
static int
|
||||
gperf_case_strncmp (s1, s2, n)
|
||||
register const char *s1;
|
||||
register const char *s2;
|
||||
register unsigned int n;
|
||||
{
|
||||
for (; n > 0;)
|
||||
{
|
||||
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
||||
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
||||
if (c1 != 0 && c1 == c2)
|
||||
{
|
||||
n--;
|
||||
continue;
|
||||
}
|
||||
return (int)c1 - (int)c2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
static unsigned int
|
||||
hash_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
|
||||
5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
|
||||
0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
|
||||
0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
|
||||
15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38
|
||||
};
|
||||
register int hval = len;
|
||||
|
||||
switch (hval)
|
||||
{
|
||||
default:
|
||||
hval += asso_values[(unsigned char)str[1]+1];
|
||||
/*FALLTHROUGH*/
|
||||
case 1:
|
||||
hval += asso_values[(unsigned char)str[0]];
|
||||
break;
|
||||
}
|
||||
return hval;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#ifdef __GNUC_STDC_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
const char *
|
||||
find_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
enum
|
||||
{
|
||||
TOTAL_KEYWORDS = 24,
|
||||
MIN_WORD_LENGTH = 1,
|
||||
MAX_WORD_LENGTH = 10,
|
||||
MIN_HASH_VALUE = 1,
|
||||
MAX_HASH_VALUE = 37
|
||||
};
|
||||
|
||||
static const char * const wordlist[] =
|
||||
{
|
||||
"",
|
||||
"p",
|
||||
"dl",
|
||||
"div",
|
||||
"math",
|
||||
"table",
|
||||
"",
|
||||
"ul",
|
||||
"del",
|
||||
"form",
|
||||
"blockquote",
|
||||
"figure",
|
||||
"ol",
|
||||
"fieldset",
|
||||
"",
|
||||
"h1",
|
||||
"",
|
||||
"h6",
|
||||
"pre",
|
||||
"", "",
|
||||
"script",
|
||||
"h5",
|
||||
"noscript",
|
||||
"",
|
||||
"style",
|
||||
"iframe",
|
||||
"h4",
|
||||
"ins",
|
||||
"", "", "",
|
||||
"h3",
|
||||
"", "", "", "",
|
||||
"h2"
|
||||
};
|
||||
|
||||
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
||||
{
|
||||
register int key = hash_block_tag (str, len);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
{
|
||||
register const char *s = wordlist[key];
|
||||
|
||||
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
292
SnudownTest/html_entities.gperf
Normal file
292
SnudownTest/html_entities.gperf
Normal file
|
@ -0,0 +1,292 @@
|
|||
%language=ANSI-C
|
||||
%define lookup-function-name is_allowed_named_entity
|
||||
%compare-strncmp
|
||||
%readonly-tables
|
||||
%define hash-function-name hash_html_entity
|
||||
%enum
|
||||
%includes
|
||||
%{
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Parsers tend to choke on entities with values greater than this */
|
||||
const u_int32_t MAX_NUM_ENTITY_VAL = 0x10ffff;
|
||||
/* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL
|
||||
* used to avoid dealing with overflows. */
|
||||
const size_t MAX_NUM_ENTITY_LEN = 7;
|
||||
|
||||
inline int is_valid_numeric_entity(uint32_t entity_val)
|
||||
{
|
||||
/* Some XML parsers will choke on entities with certain
|
||||
* values (mostly control characters.)
|
||||
*
|
||||
* According to lxml these are all problematic:
|
||||
*
|
||||
* [xrange(0, 8),
|
||||
* xrange(11, 12),
|
||||
* xrange(14, 31),
|
||||
* xrange(55296, 57343),
|
||||
* xrange(65534, 65535)]
|
||||
*/
|
||||
return (entity_val > 8
|
||||
&& (entity_val != 11 && entity_val != 12)
|
||||
&& (entity_val < 14 || entity_val > 31)
|
||||
&& (entity_val < 55296 || entity_val > 57343)
|
||||
&& (entity_val != 65534 && entity_val != 65535)
|
||||
&& entity_val <= MAX_NUM_ENTITY_VAL);
|
||||
}
|
||||
|
||||
%}
|
||||
%%
|
||||
Æ
|
||||
Á
|
||||
Â
|
||||
À
|
||||
Α
|
||||
Å
|
||||
Ã
|
||||
Ä
|
||||
Β
|
||||
Ç
|
||||
Χ
|
||||
‡
|
||||
Δ
|
||||
Ð
|
||||
É
|
||||
Ê
|
||||
È
|
||||
Ε
|
||||
Η
|
||||
Ë
|
||||
Γ
|
||||
Í
|
||||
Î
|
||||
Ì
|
||||
Ι
|
||||
Ï
|
||||
Κ
|
||||
Λ
|
||||
Μ
|
||||
Ñ
|
||||
Ν
|
||||
Œ
|
||||
Ó
|
||||
Ô
|
||||
Ò
|
||||
Ω
|
||||
Ο
|
||||
Ø
|
||||
Õ
|
||||
Ö
|
||||
Φ
|
||||
Π
|
||||
″
|
||||
Ψ
|
||||
Ρ
|
||||
Š
|
||||
Σ
|
||||
Þ
|
||||
Τ
|
||||
Θ
|
||||
Ú
|
||||
Û
|
||||
Ù
|
||||
Υ
|
||||
Ü
|
||||
Ξ
|
||||
Ý
|
||||
Ÿ
|
||||
Ζ
|
||||
á
|
||||
â
|
||||
´
|
||||
æ
|
||||
à
|
||||
ℵ
|
||||
α
|
||||
&
|
||||
∧
|
||||
∠
|
||||
'
|
||||
å
|
||||
≈
|
||||
ã
|
||||
ä
|
||||
„
|
||||
β
|
||||
¦
|
||||
•
|
||||
∩
|
||||
ç
|
||||
¸
|
||||
¢
|
||||
χ
|
||||
ˆ
|
||||
♣
|
||||
≅
|
||||
©
|
||||
↵
|
||||
∪
|
||||
¤
|
||||
⇓
|
||||
†
|
||||
↓
|
||||
°
|
||||
δ
|
||||
♦
|
||||
÷
|
||||
é
|
||||
ê
|
||||
è
|
||||
∅
|
||||
 
|
||||
 
|
||||
ε
|
||||
≡
|
||||
η
|
||||
ð
|
||||
ë
|
||||
€
|
||||
∃
|
||||
ƒ
|
||||
∀
|
||||
½
|
||||
¼
|
||||
¾
|
||||
⁄
|
||||
γ
|
||||
≥
|
||||
>
|
||||
⇔
|
||||
↔
|
||||
♥
|
||||
…
|
||||
í
|
||||
î
|
||||
¡
|
||||
ì
|
||||
ℑ
|
||||
∞
|
||||
∫
|
||||
ι
|
||||
¿
|
||||
∈
|
||||
ï
|
||||
κ
|
||||
⇐
|
||||
λ
|
||||
⟨
|
||||
«
|
||||
←
|
||||
⌈
|
||||
“
|
||||
≤
|
||||
⌊
|
||||
∗
|
||||
◊
|
||||
‎
|
||||
‹
|
||||
‘
|
||||
<
|
||||
¯
|
||||
—
|
||||
µ
|
||||
·
|
||||
−
|
||||
μ
|
||||
∇
|
||||
|
||||
–
|
||||
≠
|
||||
∋
|
||||
¬
|
||||
∉
|
||||
⊄
|
||||
ñ
|
||||
ν
|
||||
ó
|
||||
ô
|
||||
œ
|
||||
ò
|
||||
‾
|
||||
ω
|
||||
ο
|
||||
⊕
|
||||
∨
|
||||
ª
|
||||
º
|
||||
ø
|
||||
õ
|
||||
⊗
|
||||
ö
|
||||
¶
|
||||
∂
|
||||
‰
|
||||
⊥
|
||||
φ
|
||||
π
|
||||
ϖ
|
||||
±
|
||||
£
|
||||
′
|
||||
∏
|
||||
∝
|
||||
ψ
|
||||
"
|
||||
⇒
|
||||
√
|
||||
⟩
|
||||
»
|
||||
→
|
||||
⌉
|
||||
”
|
||||
ℜ
|
||||
®
|
||||
⌋
|
||||
ρ
|
||||
‏
|
||||
›
|
||||
’
|
||||
‚
|
||||
š
|
||||
⋅
|
||||
§
|
||||
­
|
||||
σ
|
||||
ς
|
||||
∼
|
||||
♠
|
||||
⊂
|
||||
⊆
|
||||
∑
|
||||
¹
|
||||
²
|
||||
³
|
||||
⊃
|
||||
⊇
|
||||
ß
|
||||
τ
|
||||
∴
|
||||
θ
|
||||
ϑ
|
||||
 
|
||||
þ
|
||||
˜
|
||||
×
|
||||
™
|
||||
⇑
|
||||
ú
|
||||
↑
|
||||
û
|
||||
ù
|
||||
¨
|
||||
ϒ
|
||||
υ
|
||||
ü
|
||||
℘
|
||||
ξ
|
||||
ý
|
||||
¥
|
||||
ÿ
|
||||
ζ
|
||||
‍
|
||||
‌
|
389
SnudownTest/html_smartypants.c
Normal file
389
SnudownTest/html_smartypants.c
Normal file
|
@ -0,0 +1,389 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
struct smartypants_data {
|
||||
int in_squote;
|
||||
int in_dquote;
|
||||
};
|
||||
|
||||
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
|
||||
static size_t (*smartypants_cb_ptrs[])
|
||||
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
||||
{
|
||||
NULL, /* 0 */
|
||||
smartypants_cb__dash, /* 1 */
|
||||
smartypants_cb__parens, /* 2 */
|
||||
smartypants_cb__squote, /* 3 */
|
||||
smartypants_cb__dquote, /* 4 */
|
||||
smartypants_cb__amp, /* 5 */
|
||||
smartypants_cb__period, /* 6 */
|
||||
smartypants_cb__number, /* 7 */
|
||||
smartypants_cb__ltag, /* 8 */
|
||||
smartypants_cb__backtick, /* 9 */
|
||||
smartypants_cb__escape, /* 10 */
|
||||
};
|
||||
|
||||
static const uint8_t smartypants_cb_chars[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
||||
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
||||
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static inline int
|
||||
word_boundary(uint8_t c)
|
||||
{
|
||||
return c == 0 || isspace(c) || ispunct(c);
|
||||
}
|
||||
|
||||
static int
|
||||
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
||||
{
|
||||
char ent[8];
|
||||
|
||||
if (*is_open && !word_boundary(next_char))
|
||||
return 0;
|
||||
|
||||
if (!(*is_open) && !word_boundary(previous_char))
|
||||
return 0;
|
||||
|
||||
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
||||
*is_open = !(*is_open);
|
||||
bufputs(ob, ent);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
|
||||
if (t1 == '\'') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
||||
(size == 3 || word_boundary(text[2]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size >= 3) {
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (((t1 == 'r' && t2 == 'e') ||
|
||||
(t1 == 'l' && t2 == 'l') ||
|
||||
(t1 == 'v' && t2 == 'e')) &&
|
||||
(size == 4 || word_boundary(text[3]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
||||
return 0;
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (t1 == 'c' && t2 == ')') {
|
||||
BUFPUTSL(ob, "©");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (t1 == 'r' && t2 == ')') {
|
||||
BUFPUTSL(ob, "®");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
||||
BUFPUTSL(ob, "™");
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
||||
BUFPUTSL(ob, "—");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 2 && text[1] == '-') {
|
||||
BUFPUTSL(ob, "–");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
||||
return 5;
|
||||
}
|
||||
|
||||
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
||||
return 3;
|
||||
|
||||
bufputc(ob, '&');
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
||||
BUFPUTSL(ob, "…");
|
||||
return 4;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2 && text[1] == '`') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (word_boundary(previous_char) && size >= 3) {
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
||||
if (size == 3 || word_boundary(text[3])) {
|
||||
BUFPUTSL(ob, "½");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
||||
BUFPUTSL(ob, "¼");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
||||
BUFPUTSL(ob, "¾");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
||||
BUFPUTSL(ob, """);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
static const char *skip_tags[] = {
|
||||
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
||||
};
|
||||
static const size_t skip_tags_count = 8;
|
||||
|
||||
size_t tag, i = 0;
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
|
||||
for (tag = 0; tag < skip_tags_count; ++tag) {
|
||||
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tag < skip_tags_count) {
|
||||
for (;;) {
|
||||
while (i < size && text[i] != '<')
|
||||
i++;
|
||||
|
||||
if (i == size)
|
||||
break;
|
||||
|
||||
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
|
||||
break;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
}
|
||||
|
||||
bufput(ob, text, i + 1);
|
||||
return i;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size < 2)
|
||||
return 0;
|
||||
|
||||
switch (text[1]) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '.':
|
||||
case '-':
|
||||
case '`':
|
||||
bufputc(ob, text[1]);
|
||||
return 1;
|
||||
|
||||
default:
|
||||
bufputc(ob, '\\');
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static struct {
|
||||
uint8_t c0;
|
||||
const uint8_t *pattern;
|
||||
const uint8_t *entity;
|
||||
int skip;
|
||||
} smartypants_subs[] = {
|
||||
{ '\'', "'s>", "’", 0 },
|
||||
{ '\'', "'t>", "’", 0 },
|
||||
{ '\'', "'re>", "’", 0 },
|
||||
{ '\'', "'ll>", "’", 0 },
|
||||
{ '\'', "'ve>", "’", 0 },
|
||||
{ '\'', "'m>", "’", 0 },
|
||||
{ '\'', "'d>", "’", 0 },
|
||||
{ '-', "--", "—", 1 },
|
||||
{ '-', "<->", "–", 0 },
|
||||
{ '.', "...", "…", 2 },
|
||||
{ '.', ". . .", "…", 4 },
|
||||
{ '(', "(c)", "©", 2 },
|
||||
{ '(', "(r)", "®", 2 },
|
||||
{ '(', "(tm)", "™", 3 },
|
||||
{ '3', "<3/4>", "¾", 2 },
|
||||
{ '3', "<3/4ths>", "¾", 2 },
|
||||
{ '1', "<1/2>", "½", 2 },
|
||||
{ '1', "<1/4>", "¼", 2 },
|
||||
{ '1', "<1/4th>", "¼", 2 },
|
||||
{ '&', "�", 0, 3 },
|
||||
};
|
||||
#endif
|
||||
|
||||
void
|
||||
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
struct smartypants_data smrt = {0, 0};
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
bufgrow(ob, size);
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
size_t org;
|
||||
uint8_t action = 0;
|
||||
|
||||
org = i;
|
||||
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
bufput(ob, text + org, i - org);
|
||||
|
||||
if (i < size) {
|
||||
i += smartypants_cb_ptrs[(int)action]
|
||||
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
2661
SnudownTest/markdown.c
Normal file
2661
SnudownTest/markdown.c
Normal file
File diff suppressed because it is too large
Load diff
140
SnudownTest/markdown.h
Normal file
140
SnudownTest/markdown.h
Normal file
|
@ -0,0 +1,140 @@
|
|||
/* markdown.h - generic markdown parser */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_MARKDOWN_H
|
||||
#define UPSKIRT_MARKDOWN_H
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SUNDOWN_VERSION "1.16.0"
|
||||
#define SUNDOWN_VER_MAJOR 1
|
||||
#define SUNDOWN_VER_MINOR 16
|
||||
#define SUNDOWN_VER_REVISION 0
|
||||
|
||||
/********************
|
||||
* TYPE DEFINITIONS *
|
||||
********************/
|
||||
|
||||
/* mkd_autolink - type of autolink */
|
||||
enum mkd_autolink {
|
||||
MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/
|
||||
MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */
|
||||
MKDA_EMAIL, /* e-mail link without explit mailto: */
|
||||
};
|
||||
|
||||
enum mkd_tableflags {
|
||||
MKD_TABLE_ALIGN_L = 1,
|
||||
MKD_TABLE_ALIGN_R = 2,
|
||||
MKD_TABLE_ALIGN_CENTER = 3,
|
||||
MKD_TABLE_ALIGNMASK = 3,
|
||||
MKD_TABLE_HEADER = 4
|
||||
};
|
||||
|
||||
enum mkd_extensions {
|
||||
MKDEXT_NO_INTRA_EMPHASIS = (1 << 0),
|
||||
MKDEXT_TABLES = (1 << 1),
|
||||
MKDEXT_FENCED_CODE = (1 << 2),
|
||||
MKDEXT_AUTOLINK = (1 << 3),
|
||||
MKDEXT_STRIKETHROUGH = (1 << 4),
|
||||
MKDEXT_SPACE_HEADERS = (1 << 6),
|
||||
MKDEXT_SUPERSCRIPT = (1 << 7),
|
||||
MKDEXT_LAX_SPACING = (1 << 8),
|
||||
MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9),
|
||||
};
|
||||
|
||||
/* sd_callbacks - functions for rendering parsed data */
|
||||
struct sd_callbacks {
|
||||
/* block level callbacks - NULL skips the block */
|
||||
void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque);
|
||||
void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque);
|
||||
void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque);
|
||||
void (*hrule)(struct buf *ob, void *opaque);
|
||||
void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque);
|
||||
void (*table_row)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span);
|
||||
|
||||
|
||||
/* span level callbacks - NULL or return 0 prints the span verbatim */
|
||||
int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque);
|
||||
int (*codespan)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque);
|
||||
int (*linebreak)(struct buf *ob, void *opaque);
|
||||
int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque);
|
||||
int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque);
|
||||
int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*superscript)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* low level callbacks - NULL copies input directly into the output */
|
||||
void (*entity)(struct buf *ob, const struct buf *entity, void *opaque);
|
||||
void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* header and footer */
|
||||
void (*doc_header)(struct buf *ob, void *opaque);
|
||||
void (*doc_footer)(struct buf *ob, void *opaque);
|
||||
};
|
||||
|
||||
struct sd_markdown;
|
||||
|
||||
/*********
|
||||
* FLAGS *
|
||||
*********/
|
||||
|
||||
/* list/listitem flags */
|
||||
#define MKD_LIST_ORDERED 1
|
||||
#define MKD_LI_BLOCK 2 /* <li> containing block data */
|
||||
|
||||
/**********************
|
||||
* EXPORTED FUNCTIONS *
|
||||
**********************/
|
||||
|
||||
extern struct sd_markdown *
|
||||
sd_markdown_new(
|
||||
unsigned int extensions,
|
||||
size_t max_nesting,
|
||||
size_t max_table_cols,
|
||||
const struct sd_callbacks *callbacks,
|
||||
void *opaque);
|
||||
|
||||
extern void
|
||||
sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_markdown_free(struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_version(int *major, int *minor, int *revision);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
56
SnudownTest/setup.py
Normal file
56
SnudownTest/setup.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
from distutils.spawn import find_executable
|
||||
from setuptools import setup, Extension
|
||||
from setuptools.command.build_ext import build_ext
|
||||
|
||||
import re
|
||||
import os
|
||||
import subprocess
|
||||
import fnmatch
|
||||
|
||||
def c_files_in(directory):
|
||||
paths = []
|
||||
names = os.listdir(directory)
|
||||
for f in fnmatch.filter(names, '*.c'):
|
||||
paths.append(os.path.join(directory, f))
|
||||
return paths
|
||||
|
||||
|
||||
def process_gperf_file(gperf_file, output_file):
|
||||
if not find_executable("gperf"):
|
||||
raise Exception("Couldn't find `gperf`, is it installed?")
|
||||
assert os.path.exists(gperf_file)
|
||||
command = "gperf.exe "+ gperf_file+ " --output-file=" + output_file
|
||||
print(command)
|
||||
#subprocess.check_call(command)
|
||||
|
||||
version = None
|
||||
version_re = re.compile(r'^#define\s+SNUDOWN_VERSION\s+"([^"]+)"$')
|
||||
with open('snudown.c', 'r') as f:
|
||||
for line in f:
|
||||
m = version_re.match(line)
|
||||
if m:
|
||||
version = m.group(1)
|
||||
assert version
|
||||
|
||||
|
||||
class GPerfingBuildExt(build_ext):
|
||||
def run(self):
|
||||
process_gperf_file("src\\html_entities.gperf", "src\\html_entities.h")
|
||||
build_ext.run(self)
|
||||
|
||||
setup(
|
||||
name='snudown',
|
||||
version=version,
|
||||
author='Vicent Marti',
|
||||
author_email='vicent@github.com',
|
||||
license='MIT',
|
||||
test_suite="test_snudown.test_snudown",
|
||||
cmdclass={'build_ext': GPerfingBuildExt,},
|
||||
ext_modules=[
|
||||
Extension(
|
||||
name='snudown',
|
||||
sources=['snudown.c'] + c_files_in('src/') + c_files_in('html/'),
|
||||
include_dirs=['src', 'html']
|
||||
)
|
||||
],
|
||||
)
|
212
SnudownTest/snudown - Copy.c
Normal file
212
SnudownTest/snudown - Copy.c
Normal file
|
@ -0,0 +1,212 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.4.0"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
/* The module doc strings */
|
||||
PyDoc_STRVAR(snudown_module__doc__, "When does the narwhal bacon? At Sundown.");
|
||||
PyDoc_STRVAR(snudown_md__doc__, "Render a Markdown document");
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_USERTEXT", RENDERER_USERTEXT);
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_WIKI", RENDERER_WIKI);
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
snudown_md(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
static char *kwlist[] = {"text", "nofollow", "target", "toc_id_prefix", "renderer", "enable_toc", NULL};
|
||||
|
||||
struct buf ib, *ob;
|
||||
PyObject *py_result;
|
||||
const char* result_text;
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
memset(&ib, 0x0, sizeof(struct buf));
|
||||
|
||||
/* Parse arguments */
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|izzii", kwlist,
|
||||
&ib.data, &ib.size, &nofollow,
|
||||
&target, &toc_id_prefix, &renderer, &enable_toc)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (renderer < 0 || renderer >= RENDERER_COUNT) {
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid renderer");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
struct snudown_renderopt *options = &(_snudown.state->options);
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
/* Output buffer */
|
||||
ob = bufnew(128);
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
|
||||
/* make a Python string */
|
||||
result_text = "";
|
||||
if (ob->data)
|
||||
result_text = (const char*)ob->data;
|
||||
py_result = Py_BuildValue("s#", result_text, (int)ob->size);
|
||||
|
||||
/* Cleanup */
|
||||
bufrelease(ob);
|
||||
return py_result;
|
||||
}
|
||||
|
||||
static PyMethodDef snudown_methods[] = {
|
||||
{"markdown", (PyCFunction) snudown_md, METH_VARARGS | METH_KEYWORDS, snudown_md__doc__},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC initsnudown(void)
|
||||
{
|
||||
PyObject *module;
|
||||
|
||||
module = Py_InitModule3("snudown", snudown_methods, snudown_module__doc__);
|
||||
if (module == NULL)
|
||||
return;
|
||||
|
||||
init_default_renderer(module);
|
||||
init_wiki_renderer(module);
|
||||
|
||||
/* Version */
|
||||
PyModule_AddStringConstant(module, "__version__", SNUDOWN_VERSION);
|
||||
}
|
226
SnudownTest/snudown-validator.c
Normal file
226
SnudownTest/snudown-validator.c
Normal file
|
@ -0,0 +1,226 @@
|
|||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <gumbo.h>
|
||||
|
||||
#define READ_UNIT 1024
|
||||
#define OUTPUT_UNIT 64
|
||||
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.3.2"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer() {
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer() {
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
void
|
||||
snudown_md(struct buf *ob, const uint8_t *document, size_t doc_size, int wiki_mode)
|
||||
{
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
if (wiki_mode)
|
||||
renderer = RENDERER_WIKI;
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
struct snudown_renderopt *options = &(_snudown.state->options);
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, document, doc_size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
}
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
init_default_renderer();
|
||||
init_wiki_renderer();
|
||||
|
||||
struct buf *ib, *ob;
|
||||
int size_read = 0, wiki_mode = 0, i = 0, have_errors = 0;
|
||||
|
||||
/* reading everything */
|
||||
ib = bufnew(READ_UNIT);
|
||||
bufgrow(ib, READ_UNIT);
|
||||
while ((size_read = fread(ib->data + ib->size, 1, ib->asize - ib->size, stdin)) > 0) {
|
||||
ib->size += size_read;
|
||||
bufgrow(ib, ib->size + READ_UNIT);
|
||||
}
|
||||
/* Render to a buffer, then print that out */
|
||||
ob = bufnew(OUTPUT_UNIT);
|
||||
bufputs(ob, "<!DOCTYPE html><html><body>\n");
|
||||
snudown_md(ob, ib->data, ib->size, wiki_mode);
|
||||
bufputs(ob, "</body></html>\n");
|
||||
|
||||
// Wiki mode explicitly allows unbalanced tags, need some way to exclude those
|
||||
if (!wiki_mode) {
|
||||
GumboOutput* output = gumbo_parse_with_options(&kGumboDefaultOptions, bufcstr(ob), ob->size);
|
||||
|
||||
for (i=0; i < output->errors.length; ++i) {
|
||||
// stupid "public" API I hacked in.
|
||||
void* thing = output->errors.data[i];
|
||||
GumboErrorType type = gumbo_get_error_type(thing);
|
||||
switch(type) {
|
||||
case GUMBO_ERR_UTF8_INVALID:
|
||||
case GUMBO_ERR_UTF8_NULL:
|
||||
// Making sure the user gave us valid
|
||||
// utf-8 or transforming it to valid
|
||||
// utf-8 is outside the scope of snudown
|
||||
continue;
|
||||
default:
|
||||
have_errors = 1;
|
||||
printf("%s\n", GUMBO_ERROR_NAMES[type]);
|
||||
printf("%s\n",gumbo_get_error_text(thing));
|
||||
printf("===============\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (have_errors) {
|
||||
// gotta trigger a crash for AFL to catch it
|
||||
assert(0);
|
||||
}
|
||||
|
||||
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
||||
}
|
||||
bufrelease(ob);
|
||||
bufrelease(ib);
|
||||
return 0;
|
||||
}
|
232
SnudownTest/snudown.c
Normal file
232
SnudownTest/snudown.c
Normal file
|
@ -0,0 +1,232 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#include "markdown.h"
|
||||
#include "html.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#define SNUDOWN_VERSION "1.4.0"
|
||||
|
||||
enum snudown_renderer_mode {
|
||||
RENDERER_USERTEXT = 0,
|
||||
RENDERER_WIKI,
|
||||
RENDERER_COUNT
|
||||
};
|
||||
|
||||
struct snudown_renderopt {
|
||||
struct html_renderopt html;
|
||||
int nofollow;
|
||||
const char *target;
|
||||
};
|
||||
|
||||
struct snudown_renderer {
|
||||
struct sd_markdown* main_renderer;
|
||||
struct sd_markdown* toc_renderer;
|
||||
struct module_state* state;
|
||||
struct module_state* toc_state;
|
||||
};
|
||||
|
||||
struct module_state {
|
||||
struct sd_callbacks callbacks;
|
||||
struct snudown_renderopt options;
|
||||
};
|
||||
|
||||
static struct snudown_renderer sundown[RENDERER_COUNT];
|
||||
|
||||
static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
|
||||
static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
|
||||
|
||||
static struct module_state usertext_toc_state;
|
||||
static struct module_state wiki_toc_state;
|
||||
static struct module_state usertext_state;
|
||||
static struct module_state wiki_state;
|
||||
|
||||
/* The module doc strings */
|
||||
PyDoc_STRVAR(snudown_module__doc__, "When does the narwhal bacon? At Sundown.");
|
||||
PyDoc_STRVAR(snudown_md__doc__, "Render a Markdown document");
|
||||
|
||||
static const unsigned int snudown_default_md_flags =
|
||||
MKDEXT_NO_INTRA_EMPHASIS |
|
||||
MKDEXT_SUPERSCRIPT |
|
||||
MKDEXT_AUTOLINK |
|
||||
MKDEXT_STRIKETHROUGH |
|
||||
MKDEXT_TABLES;
|
||||
|
||||
static const unsigned int snudown_default_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SKIP_IMAGES |
|
||||
HTML_SAFELINK |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static const unsigned int snudown_wiki_render_flags =
|
||||
HTML_SKIP_HTML |
|
||||
HTML_SAFELINK |
|
||||
HTML_ALLOW_ELEMENT_WHITELIST |
|
||||
HTML_ESCAPE |
|
||||
HTML_USE_XHTML;
|
||||
|
||||
static void
|
||||
snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
|
||||
{
|
||||
struct snudown_renderopt *options = opaque;
|
||||
|
||||
if (options->nofollow)
|
||||
BUFPUTSL(ob, " rel=\"nofollow\"");
|
||||
|
||||
if (options->target != NULL) {
|
||||
BUFPUTSL(ob, " target=\"");
|
||||
bufputs(ob, options->target);
|
||||
bufputc(ob, '\"');
|
||||
}
|
||||
}
|
||||
|
||||
static struct sd_markdown* make_custom_renderer(struct module_state* state,
|
||||
const unsigned int renderflags,
|
||||
const unsigned int markdownflags,
|
||||
int toc_renderer) {
|
||||
if(toc_renderer) {
|
||||
sdhtml_toc_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options);
|
||||
} else {
|
||||
sdhtml_renderer(&state->callbacks,
|
||||
(struct html_renderopt *)&state->options,
|
||||
renderflags);
|
||||
}
|
||||
|
||||
state->options.html.link_attributes = &snudown_link_attr;
|
||||
state->options.html.html_element_whitelist = html_element_whitelist;
|
||||
state->options.html.html_attr_whitelist = html_attr_whitelist;
|
||||
|
||||
return sd_markdown_new(
|
||||
markdownflags,
|
||||
16,
|
||||
64,
|
||||
&state->callbacks,
|
||||
&state->options
|
||||
);
|
||||
}
|
||||
|
||||
void init_default_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_USERTEXT", RENDERER_USERTEXT);
|
||||
sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_USERTEXT].state = &usertext_state;
|
||||
sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
|
||||
}
|
||||
|
||||
void init_wiki_renderer(PyObject *module) {
|
||||
PyModule_AddIntConstant(module, "RENDERER_WIKI", RENDERER_WIKI);
|
||||
sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
|
||||
sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
|
||||
sundown[RENDERER_WIKI].state = &wiki_state;
|
||||
sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
snudown_md(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
struct snudown_renderopt *options;
|
||||
static char *kwlist[] = {"text", "nofollow", "target", "toc_id_prefix", "renderer", "enable_toc", NULL};
|
||||
|
||||
struct buf ib, *ob;
|
||||
PyObject *py_result;
|
||||
const char* result_text;
|
||||
int renderer = RENDERER_USERTEXT;
|
||||
int enable_toc = 0;
|
||||
struct snudown_renderer _snudown;
|
||||
int nofollow = 0;
|
||||
char* target = NULL;
|
||||
char* toc_id_prefix = NULL;
|
||||
unsigned int flags;
|
||||
|
||||
memset(&ib, 0x0, sizeof(struct buf));
|
||||
|
||||
/* Parse arguments */
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|izzii", kwlist,
|
||||
&ib.data, &ib.size, &nofollow,
|
||||
&target, &toc_id_prefix, &renderer, &enable_toc)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (renderer < 0 || renderer >= RENDERER_COUNT) {
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid renderer");
|
||||
return NULL;
|
||||
};
|
||||
|
||||
_snudown = sundown[renderer];
|
||||
|
||||
|
||||
|
||||
|
||||
_snudown.state->options;
|
||||
options->nofollow = nofollow;
|
||||
options->target = target;
|
||||
|
||||
/* Output buffer */
|
||||
ob = bufnew(128);
|
||||
|
||||
flags = options->html.flags;
|
||||
|
||||
if (enable_toc) {
|
||||
_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer);
|
||||
_snudown.toc_state->options.html.toc_id_prefix = NULL;
|
||||
|
||||
options->html.flags |= HTML_TOC;
|
||||
}
|
||||
|
||||
options->html.toc_id_prefix = toc_id_prefix;
|
||||
|
||||
/* do the magic */
|
||||
sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer);
|
||||
|
||||
options->html.toc_id_prefix = NULL;
|
||||
options->html.flags = flags;
|
||||
|
||||
/* make a Python string */
|
||||
result_text = "";
|
||||
if (ob->data)
|
||||
result_text = (const char*)ob->data;
|
||||
py_result = Py_BuildValue("s#", result_text, (int)ob->size);
|
||||
|
||||
/* Cleanup */
|
||||
bufrelease(ob);
|
||||
return py_result;
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef snudown_methods[] = {
|
||||
{"markdown", (PyCFunction) snudown_md, METH_VARARGS | METH_KEYWORDS, snudown_md__doc__},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
PyMODINIT_FUNC PyInit_snudown(void)
|
||||
{
|
||||
PyObject *module;
|
||||
|
||||
struct PyModuleDef wtf = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"snudown", /* m_name */
|
||||
"This is snudown", /* m_doc */
|
||||
-1, /* m_size */
|
||||
snudown_methods, /* m_methods */
|
||||
NULL, /* m_reload */
|
||||
NULL, /* m_traverse */
|
||||
NULL, /* m_clear */
|
||||
NULL, /* m_free */
|
||||
};
|
||||
//module = Py_InitModule3("snudown", snudown_methods, snudown_module__doc__);
|
||||
module = PyModule_Create(&wtf);
|
||||
if (module == NULL)
|
||||
return Py_BuildValue("");
|
||||
|
||||
init_default_renderer(module);
|
||||
init_wiki_renderer(module);
|
||||
|
||||
/* Version */
|
||||
PyModule_AddStringConstant(module, "__version__", SNUDOWN_VERSION);
|
||||
};
|
||||
void initsnudown(void)
|
||||
{
|
||||
(void) PyInit_snudown("snudown", snudown_methods);
|
||||
};
|
487
SnudownTest/src/autolink.c
Normal file
487
SnudownTest/src/autolink.c
Normal file
|
@ -0,0 +1,487 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define strncasecmp _strnicmp
|
||||
#endif
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len)
|
||||
{
|
||||
static const size_t valid_uris_count = 14;
|
||||
static const char *valid_uris[] = {
|
||||
"http://", "https://", "ftp://", "mailto://",
|
||||
"/", "git://", "steam://", "irc://", "news://", "mumble://",
|
||||
"ssh://", "ircs://", "ts3server://", "#"
|
||||
};
|
||||
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < valid_uris_count; ++i) {
|
||||
size_t len = strlen(valid_uris[i]);
|
||||
|
||||
if (link_len > len &&
|
||||
strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
||||
(isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?'))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
||||
{
|
||||
uint8_t cclose, copen = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < link_end; ++i)
|
||||
if (data[i] == '<') {
|
||||
link_end = i;
|
||||
break;
|
||||
}
|
||||
|
||||
while (link_end > 0) {
|
||||
uint8_t c = data[link_end - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (strchr("?!.,", c) != NULL)
|
||||
link_end--;
|
||||
|
||||
else if (c == ';') {
|
||||
size_t new_end = link_end - 2;
|
||||
|
||||
while (new_end > 0 && isalpha(data[new_end]))
|
||||
new_end--;
|
||||
|
||||
if (new_end < link_end - 2 && data[new_end] == '&')
|
||||
link_end = new_end;
|
||||
else
|
||||
link_end--;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
cclose = data[link_end - 1];
|
||||
|
||||
switch (cclose) {
|
||||
case '"': copen = '"'; break;
|
||||
case '\'': copen = '\''; break;
|
||||
case ')': copen = '('; break;
|
||||
case ']': copen = '['; break;
|
||||
case '}': copen = '{'; break;
|
||||
}
|
||||
|
||||
if (copen != 0) {
|
||||
size_t closing = 0;
|
||||
size_t opening = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Try to close the final punctuation sign in this same line;
|
||||
* if we managed to close it outside of the URL, that means that it's
|
||||
* not part of the URL. If it closes inside the URL, that means it
|
||||
* is part of the URL.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric))
|
||||
*
|
||||
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
||||
*/
|
||||
|
||||
while (i < link_end) {
|
||||
if (data[i] == copen)
|
||||
opening++;
|
||||
else if (data[i] == cclose)
|
||||
closing++;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (closing != opening)
|
||||
link_end--;
|
||||
}
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks that `prefix_char` occurs on a word boundary just before `data`,
|
||||
* where `data` points to the character to search to the left of, and a word boundary
|
||||
* is (currently) a whitespace character, punctuation, or the start of the string.
|
||||
* Returns the length of the prefix.
|
||||
*/
|
||||
static int
|
||||
check_reddit_autolink_prefix(
|
||||
const uint8_t* data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
char prefix_char
|
||||
)
|
||||
{
|
||||
/* Make sure this `/` is part of `/?r/` */
|
||||
if (size < 2 || max_rewind < 1 || data[-1] != prefix_char)
|
||||
return 0;
|
||||
|
||||
/* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */
|
||||
if (max_rewind > 1) {
|
||||
const char boundary = data[-2];
|
||||
if (boundary == '/')
|
||||
return 2;
|
||||
/**
|
||||
* Here's where our lack of unicode-awareness bites us. We don't correctly
|
||||
* match punctuation / whitespace characters for the boundary, because we
|
||||
* reject valid cases like "。r/example" (note the fullwidth period.)
|
||||
*
|
||||
* A better implementation might try to rewind over bytes with the 8th bit set, try
|
||||
* to decode them to a valid codepoint, then do a unicode-aware check on the codepoint.
|
||||
*/
|
||||
else if (ispunct(boundary) || isspace(boundary))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
} else if (max_lookbehind > 2) {
|
||||
/* There's an inline element just left of the `prefix_char`, is it an escaped forward
|
||||
* slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly
|
||||
* allow "\\/r/foo".
|
||||
*/
|
||||
if (data[-2] == '/' && data[-3] == '\\')
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Must be a new-style shortlink with nothing relevant to the left of it. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
check_domain(uint8_t *data, size_t size, int allow_short)
|
||||
{
|
||||
size_t i, np = 0;
|
||||
|
||||
if (!isalnum(data[0]))
|
||||
return 0;
|
||||
|
||||
for (i = 1; i < size - 1; ++i) {
|
||||
if (data[i] == '.') np++;
|
||||
else if (!isalnum(data[i]) && data[i] != '-') break;
|
||||
}
|
||||
|
||||
if (allow_short) {
|
||||
/* We don't need a valid domain in the strict sense (with
|
||||
* least one dot; so just make sure it's composed of valid
|
||||
* domain characters and return the length of the the valid
|
||||
* sequence. */
|
||||
return i;
|
||||
} else {
|
||||
/* a valid domain needs to have at least a dot.
|
||||
* that's as far as we get */
|
||||
return np ? i : 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__www(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end;
|
||||
|
||||
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
||||
return 0;
|
||||
|
||||
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
||||
return 0;
|
||||
|
||||
link_end = check_domain(data, size, 0);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data, link_end);
|
||||
*rewind_p = 0;
|
||||
|
||||
return (int)link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__email(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind;
|
||||
int nb = 0, np = 0;
|
||||
|
||||
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
||||
uint8_t c = data[-rewind - 1];
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (strchr(".+-_", c) != NULL)
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (rewind == 0)
|
||||
return 0;
|
||||
|
||||
for (link_end = 0; link_end < size; ++link_end) {
|
||||
uint8_t c = data[link_end];
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (c == '@')
|
||||
nb++;
|
||||
else if (c == '.' && link_end < size - 1)
|
||||
np++;
|
||||
else if (c != '-' && c != '_')
|
||||
break;
|
||||
}
|
||||
|
||||
if (link_end < 2 || nb != 1 || np == 0)
|
||||
return 0;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__url(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind = 0, domain_len;
|
||||
|
||||
if (size < 4 || data[1] != '/' || data[2] != '/')
|
||||
return 0;
|
||||
|
||||
while (rewind < max_rewind && isalpha(data[-rewind - 1]))
|
||||
rewind++;
|
||||
|
||||
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
||||
return 0;
|
||||
|
||||
link_end = strlen("://");
|
||||
|
||||
domain_len = check_domain(
|
||||
data + link_end,
|
||||
size - link_end,
|
||||
flags & SD_AUTOLINK_SHORT_DOMAINS);
|
||||
|
||||
if (domain_len == 0)
|
||||
return 0;
|
||||
|
||||
link_end += domain_len;
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__subreddit(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
/**
|
||||
* This is meant to handle both r/foo and /r/foo style subreddit references.
|
||||
* In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'.
|
||||
* In pseudo-regex, this matches something like:
|
||||
*
|
||||
* `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?`
|
||||
* where %subreddit% == `((t:)?\w{2,24}|reddit\.com)`
|
||||
*/
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
int is_allminus = 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
/* offset to the "meat" of the link */
|
||||
link_end = strlen("/");
|
||||
|
||||
if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0)
|
||||
is_allminus = 1;
|
||||
|
||||
do {
|
||||
size_t start = link_end;
|
||||
int max_length = 24;
|
||||
|
||||
/* special case: /r/reddit.com (only subreddit containing '.'). */
|
||||
if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) {
|
||||
link_end += 10;
|
||||
/* Make sure there are no trailing characters (don't do
|
||||
* any autolinking for /r/reddit.commission) */
|
||||
max_length = 10;
|
||||
}
|
||||
|
||||
/* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */
|
||||
else {
|
||||
/* support autolinking to timereddits, /r/t:when (1 April 2012) */
|
||||
if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 )
|
||||
link_end += 2; /* Jump over the 't:' */
|
||||
|
||||
/* the first character of a subreddit name must be a letter or digit */
|
||||
if (!isalnum(data[link_end]))
|
||||
return 0;
|
||||
link_end += 1;
|
||||
}
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_'))
|
||||
link_end++;
|
||||
|
||||
/* valid subreddit names are between 3 and 21 characters, with
|
||||
* some subreddits having 2-character names. Don't bother with
|
||||
* autolinking for anything outside this length range.
|
||||
* (chksrname function in reddit/.../validator.py) */
|
||||
if ( link_end-start < 2 || link_end-start > max_length )
|
||||
return 0;
|
||||
|
||||
/* If we are linking to a multireddit, continue */
|
||||
} while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ );
|
||||
|
||||
if (link_end < size && data[link_end] == '/') {
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
}
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
sd_autolink__username(
|
||||
size_t *rewind_p,
|
||||
struct buf *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t max_lookbehind,
|
||||
size_t size,
|
||||
int *no_slash
|
||||
)
|
||||
{
|
||||
size_t link_end;
|
||||
size_t rewind;
|
||||
|
||||
if (size < 3)
|
||||
return 0;
|
||||
|
||||
rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u');
|
||||
if (!rewind)
|
||||
return 0;
|
||||
|
||||
link_end = strlen("/");
|
||||
|
||||
/* the first letter of a username must... well, be valid, we don't care otherwise */
|
||||
if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-')
|
||||
return 0;
|
||||
link_end += 1;
|
||||
|
||||
/* consume valid characters ([A-Za-z0-9_-/]) until we run out */
|
||||
while (link_end < size && (isalnum(data[link_end]) ||
|
||||
data[link_end] == '_' ||
|
||||
data[link_end] == '/' ||
|
||||
data[link_end] == '-'))
|
||||
link_end++;
|
||||
|
||||
/* make the link */
|
||||
bufput(link, data - rewind, link_end + rewind);
|
||||
|
||||
*no_slash = (rewind == 1);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
59
SnudownTest/src/autolink.h
Normal file
59
SnudownTest/src/autolink.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Vicent Marti
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_AUTOLINK_H
|
||||
#define UPSKIRT_AUTOLINK_H
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
||||
};
|
||||
|
||||
int
|
||||
sd_autolink_issafe(const uint8_t *link, size_t link_len);
|
||||
|
||||
size_t
|
||||
sd_autolink__www(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__email(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
size_t
|
||||
sd_autolink__url(size_t *rewind_p, struct buf *link,
|
||||
uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
extern size_t
|
||||
sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data,
|
||||
size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
236
SnudownTest/src/buffer.c
Normal file
236
SnudownTest/src/buffer.c
Normal file
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* MSVC compat */
|
||||
#if defined(_MSC_VER)
|
||||
# define _buf_vsnprintf _vsnprintf
|
||||
#else
|
||||
# define _buf_vsnprintf vsnprintf
|
||||
#endif
|
||||
|
||||
int
|
||||
bufprefix(const struct buf *buf, const char *prefix)
|
||||
{
|
||||
size_t i;
|
||||
assert(buf && buf->unit);
|
||||
|
||||
for (i = 0; i < buf->size; ++i) {
|
||||
if (prefix[i] == 0)
|
||||
return 0;
|
||||
|
||||
if (buf->data[i] != prefix[i])
|
||||
return buf->data[i] - prefix[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int
|
||||
bufgrow(struct buf *buf, size_t neosz)
|
||||
{
|
||||
size_t neoasz;
|
||||
void *neodata;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (neosz > BUFFER_MAX_ALLOC_SIZE)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
if (buf->asize >= neosz)
|
||||
return BUF_OK;
|
||||
|
||||
neoasz = buf->asize + buf->unit;
|
||||
while (neoasz < neosz)
|
||||
neoasz += buf->unit;
|
||||
|
||||
neodata = realloc(buf->data, neoasz);
|
||||
if (!neodata)
|
||||
return BUF_ENOMEM;
|
||||
|
||||
buf->data = neodata;
|
||||
buf->asize = neoasz;
|
||||
return BUF_OK;
|
||||
}
|
||||
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *
|
||||
bufnew(size_t unit)
|
||||
{
|
||||
struct buf *ret;
|
||||
ret = malloc(sizeof (struct buf));
|
||||
|
||||
if (ret) {
|
||||
ret->data = 0;
|
||||
ret->size = ret->asize = 0;
|
||||
ret->unit = unit;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* bufnullterm: NULL-termination of the string array */
|
||||
const char *
|
||||
bufcstr(struct buf *buf)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size < buf->asize && buf->data[buf->size] == 0)
|
||||
return (char *)buf->data;
|
||||
|
||||
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
|
||||
buf->data[buf->size] = 0;
|
||||
return (char *)buf->data;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void
|
||||
bufprintf(struct buf *buf, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int n;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (n < 0) {
|
||||
#ifdef _MSC_VER
|
||||
va_start(ap, fmt);
|
||||
n = _vscprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
if ((size_t)n >= buf->asize - buf->size) {
|
||||
if (bufgrow(buf, buf->size + n + 1) < 0)
|
||||
return;
|
||||
|
||||
va_start(ap, fmt);
|
||||
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
if (n < 0)
|
||||
return;
|
||||
|
||||
buf->size += n;
|
||||
}
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void
|
||||
bufput(struct buf *buf, const void *data, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
|
||||
return;
|
||||
|
||||
memcpy(buf->data + buf->size, data, len);
|
||||
buf->size += len;
|
||||
}
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void
|
||||
bufputs(struct buf *buf, const char *str)
|
||||
{
|
||||
bufput(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
|
||||
/* bufputc: appends a single uint8_t to a buffer */
|
||||
void
|
||||
bufputc(struct buf *buf, int c)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
||||
return;
|
||||
|
||||
buf->data[buf->size] = c;
|
||||
buf->size += 1;
|
||||
}
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void
|
||||
bufrelease(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void
|
||||
bufreset(struct buf *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
free(buf->data);
|
||||
buf->data = NULL;
|
||||
buf->size = buf->asize = 0;
|
||||
}
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void
|
||||
bufslurp(struct buf *buf, size_t len)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (len >= buf->size) {
|
||||
buf->size = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
buf->size -= len;
|
||||
memmove(buf->data, buf->data + len, buf->size);
|
||||
}
|
||||
|
||||
/* buftrucate: truncates the buffer at `size` */
|
||||
int
|
||||
buftruncate(struct buf *buf, size_t size)
|
||||
{
|
||||
if (buf->size < size || size < 0) {
|
||||
/* bail out in debug mode so we can figure out why this happened */
|
||||
assert(0);
|
||||
return BUF_EINVALIDIDX;
|
||||
}
|
||||
|
||||
buf->size = size;
|
||||
return BUF_OK;
|
||||
}
|
100
SnudownTest/src/buffer.h
Normal file
100
SnudownTest/src/buffer.h
Normal file
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Copyright (c) 2008, Natacha Porté
|
||||
* Copyright (c) 2011, Vicent Martí
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BUFFER_H__
|
||||
#define BUFFER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define __attribute__(x)
|
||||
#define inline
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
BUF_OK = 0,
|
||||
BUF_ENOMEM = -1,
|
||||
BUF_EINVALIDIDX = -2,
|
||||
} buferror_t;
|
||||
|
||||
/* struct buf: character array buffer */
|
||||
struct buf {
|
||||
uint8_t *data; /* actual character data */
|
||||
size_t size; /* size of the string */
|
||||
size_t asize; /* allocated size (0 = volatile buffer) */
|
||||
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
||||
};
|
||||
|
||||
/* CONST_BUF: global buffer from a string litteral */
|
||||
#define BUF_STATIC(string) \
|
||||
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
||||
|
||||
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
||||
#define BUF_VOLATILE(strname) \
|
||||
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
||||
|
||||
/* BUFPUTSL: optimized bufputs of a string litteral */
|
||||
#define BUFPUTSL(output, literal) \
|
||||
bufput(output, literal, sizeof literal - 1)
|
||||
|
||||
/* bufgrow: increasing the allocated size to the given value */
|
||||
int bufgrow(struct buf *, size_t);
|
||||
|
||||
/* bufnew: allocation of a new buffer */
|
||||
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
||||
|
||||
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
||||
const char *bufcstr(struct buf *);
|
||||
|
||||
/* bufprefix: compare the beginning of a buffer with a string */
|
||||
int bufprefix(const struct buf *buf, const char *prefix);
|
||||
|
||||
/* bufput: appends raw data to a buffer */
|
||||
void bufput(struct buf *, const void *, size_t);
|
||||
|
||||
/* bufputs: appends a NUL-terminated string to a buffer */
|
||||
void bufputs(struct buf *, const char *);
|
||||
|
||||
/* bufputc: appends a single char to a buffer */
|
||||
void bufputc(struct buf *, int);
|
||||
|
||||
/* bufrelease: decrease the reference count and free the buffer if needed */
|
||||
void bufrelease(struct buf *);
|
||||
|
||||
/* bufreset: frees internal data of the buffer */
|
||||
void bufreset(struct buf *);
|
||||
|
||||
/* bufslurp: removes a given number of bytes from the head of the array */
|
||||
void bufslurp(struct buf *, size_t);
|
||||
|
||||
/* bufprintf: formatted printing to a buffer */
|
||||
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
||||
|
||||
/* buftruncate: truncates the buffer at `size` */
|
||||
int buftruncate(struct buf *buf, size_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
206
SnudownTest/src/html_blocks.h
Normal file
206
SnudownTest/src/html_blocks.h
Normal file
|
@ -0,0 +1,206 @@
|
|||
/* C code produced by gperf version 3.0.3 */
|
||||
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
|
||||
/* Computed positions: -k'1-2' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
||||
#endif
|
||||
|
||||
/* maximum key range = 37, duplicates = 0 */
|
||||
|
||||
#ifndef GPERF_DOWNCASE
|
||||
#define GPERF_DOWNCASE 1
|
||||
static unsigned char gperf_downcase[256] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
||||
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
||||
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
||||
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
||||
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
||||
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
||||
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
||||
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
||||
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
||||
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
||||
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
||||
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
||||
255
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef GPERF_CASE_STRNCMP
|
||||
#define GPERF_CASE_STRNCMP 1
|
||||
static int
|
||||
gperf_case_strncmp (s1, s2, n)
|
||||
register const char *s1;
|
||||
register const char *s2;
|
||||
register unsigned int n;
|
||||
{
|
||||
for (; n > 0;)
|
||||
{
|
||||
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
||||
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
||||
if (c1 != 0 && c1 == c2)
|
||||
{
|
||||
n--;
|
||||
continue;
|
||||
}
|
||||
return (int)c1 - (int)c2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
static unsigned int
|
||||
hash_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
|
||||
5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
|
||||
0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
|
||||
0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
|
||||
15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
||||
38, 38, 38, 38, 38, 38, 38
|
||||
};
|
||||
register int hval = len;
|
||||
|
||||
switch (hval)
|
||||
{
|
||||
default:
|
||||
hval += asso_values[(unsigned char)str[1]+1];
|
||||
/*FALLTHROUGH*/
|
||||
case 1:
|
||||
hval += asso_values[(unsigned char)str[0]];
|
||||
break;
|
||||
}
|
||||
return hval;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#ifdef __GNUC_STDC_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
const char *
|
||||
find_block_tag (str, len)
|
||||
register const char *str;
|
||||
register unsigned int len;
|
||||
{
|
||||
enum
|
||||
{
|
||||
TOTAL_KEYWORDS = 24,
|
||||
MIN_WORD_LENGTH = 1,
|
||||
MAX_WORD_LENGTH = 10,
|
||||
MIN_HASH_VALUE = 1,
|
||||
MAX_HASH_VALUE = 37
|
||||
};
|
||||
|
||||
static const char * const wordlist[] =
|
||||
{
|
||||
"",
|
||||
"p",
|
||||
"dl",
|
||||
"div",
|
||||
"math",
|
||||
"table",
|
||||
"",
|
||||
"ul",
|
||||
"del",
|
||||
"form",
|
||||
"blockquote",
|
||||
"figure",
|
||||
"ol",
|
||||
"fieldset",
|
||||
"",
|
||||
"h1",
|
||||
"",
|
||||
"h6",
|
||||
"pre",
|
||||
"", "",
|
||||
"script",
|
||||
"h5",
|
||||
"noscript",
|
||||
"",
|
||||
"style",
|
||||
"iframe",
|
||||
"h4",
|
||||
"ins",
|
||||
"", "", "",
|
||||
"h3",
|
||||
"", "", "", "",
|
||||
"h2"
|
||||
};
|
||||
|
||||
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
||||
{
|
||||
register int key = hash_block_tag (str, len);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
{
|
||||
register const char *s = wordlist[key];
|
||||
|
||||
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
292
SnudownTest/src/html_entities.gperf
Normal file
292
SnudownTest/src/html_entities.gperf
Normal file
|
@ -0,0 +1,292 @@
|
|||
%language=ANSI-C
|
||||
%define lookup-function-name is_allowed_named_entity
|
||||
%compare-strncmp
|
||||
%readonly-tables
|
||||
%define hash-function-name hash_html_entity
|
||||
%enum
|
||||
%includes
|
||||
%{
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Parsers tend to choke on entities with values greater than this */
|
||||
const u_int32_t max_num_entity_val = 0x10ffff;
|
||||
/* Any numeric entity longer than this is obviously above max_num_entity_val
|
||||
* used to avoid dealing with overflows. */
|
||||
const size_t MAX_NUM_ENTITY_LEN = 7;
|
||||
|
||||
inline int is_valid_numeric_entity(uint32_t entity_val)
|
||||
{
|
||||
/* Some XML parsers will choke on entities with certain
|
||||
* values (mostly control characters.)
|
||||
*
|
||||
* According to lxml these are all problematic:
|
||||
*
|
||||
* [xrange(0, 8),
|
||||
* xrange(11, 12),
|
||||
* xrange(14, 31),
|
||||
* xrange(55296, 57343),
|
||||
* xrange(65534, 65535)]
|
||||
*/
|
||||
return (entity_val > 8
|
||||
&& (entity_val != 11 && entity_val != 12)
|
||||
&& (entity_val < 14 || entity_val > 31)
|
||||
&& (entity_val < 55296 || entity_val > 57343)
|
||||
&& (entity_val != 65534 && entity_val != 65535)
|
||||
&& entity_val <= max_num_entity_val);
|
||||
}
|
||||
|
||||
%}
|
||||
%%
|
||||
Æ
|
||||
Á
|
||||
Â
|
||||
À
|
||||
Α
|
||||
Å
|
||||
Ã
|
||||
Ä
|
||||
Β
|
||||
Ç
|
||||
Χ
|
||||
‡
|
||||
Δ
|
||||
Ð
|
||||
É
|
||||
Ê
|
||||
È
|
||||
Ε
|
||||
Η
|
||||
Ë
|
||||
Γ
|
||||
Í
|
||||
Î
|
||||
Ì
|
||||
Ι
|
||||
Ï
|
||||
Κ
|
||||
Λ
|
||||
Μ
|
||||
Ñ
|
||||
Ν
|
||||
Œ
|
||||
Ó
|
||||
Ô
|
||||
Ò
|
||||
Ω
|
||||
Ο
|
||||
Ø
|
||||
Õ
|
||||
Ö
|
||||
Φ
|
||||
Π
|
||||
″
|
||||
Ψ
|
||||
Ρ
|
||||
Š
|
||||
Σ
|
||||
Þ
|
||||
Τ
|
||||
Θ
|
||||
Ú
|
||||
Û
|
||||
Ù
|
||||
Υ
|
||||
Ü
|
||||
Ξ
|
||||
Ý
|
||||
Ÿ
|
||||
Ζ
|
||||
á
|
||||
â
|
||||
´
|
||||
æ
|
||||
à
|
||||
ℵ
|
||||
α
|
||||
&
|
||||
∧
|
||||
∠
|
||||
'
|
||||
å
|
||||
≈
|
||||
ã
|
||||
ä
|
||||
„
|
||||
β
|
||||
¦
|
||||
•
|
||||
∩
|
||||
ç
|
||||
¸
|
||||
¢
|
||||
χ
|
||||
ˆ
|
||||
♣
|
||||
≅
|
||||
©
|
||||
↵
|
||||
∪
|
||||
¤
|
||||
⇓
|
||||
†
|
||||
↓
|
||||
°
|
||||
δ
|
||||
♦
|
||||
÷
|
||||
é
|
||||
ê
|
||||
è
|
||||
∅
|
||||
 
|
||||
 
|
||||
ε
|
||||
≡
|
||||
η
|
||||
ð
|
||||
ë
|
||||
€
|
||||
∃
|
||||
ƒ
|
||||
∀
|
||||
½
|
||||
¼
|
||||
¾
|
||||
⁄
|
||||
γ
|
||||
≥
|
||||
>
|
||||
⇔
|
||||
↔
|
||||
♥
|
||||
…
|
||||
í
|
||||
î
|
||||
¡
|
||||
ì
|
||||
ℑ
|
||||
∞
|
||||
∫
|
||||
ι
|
||||
¿
|
||||
∈
|
||||
ï
|
||||
κ
|
||||
⇐
|
||||
λ
|
||||
⟨
|
||||
«
|
||||
←
|
||||
⌈
|
||||
“
|
||||
≤
|
||||
⌊
|
||||
∗
|
||||
◊
|
||||
‎
|
||||
‹
|
||||
‘
|
||||
<
|
||||
¯
|
||||
—
|
||||
µ
|
||||
·
|
||||
−
|
||||
μ
|
||||
∇
|
||||
|
||||
–
|
||||
≠
|
||||
∋
|
||||
¬
|
||||
∉
|
||||
⊄
|
||||
ñ
|
||||
ν
|
||||
ó
|
||||
ô
|
||||
œ
|
||||
ò
|
||||
‾
|
||||
ω
|
||||
ο
|
||||
⊕
|
||||
∨
|
||||
ª
|
||||
º
|
||||
ø
|
||||
õ
|
||||
⊗
|
||||
ö
|
||||
¶
|
||||
∂
|
||||
‰
|
||||
⊥
|
||||
φ
|
||||
π
|
||||
ϖ
|
||||
±
|
||||
£
|
||||
′
|
||||
∏
|
||||
∝
|
||||
ψ
|
||||
"
|
||||
⇒
|
||||
√
|
||||
⟩
|
||||
»
|
||||
→
|
||||
⌉
|
||||
”
|
||||
ℜ
|
||||
®
|
||||
⌋
|
||||
ρ
|
||||
‏
|
||||
›
|
||||
’
|
||||
‚
|
||||
š
|
||||
⋅
|
||||
§
|
||||
­
|
||||
σ
|
||||
ς
|
||||
∼
|
||||
♠
|
||||
⊂
|
||||
⊆
|
||||
∑
|
||||
¹
|
||||
²
|
||||
³
|
||||
⊃
|
||||
⊇
|
||||
ß
|
||||
τ
|
||||
∴
|
||||
θ
|
||||
ϑ
|
||||
 
|
||||
þ
|
||||
˜
|
||||
×
|
||||
™
|
||||
⇑
|
||||
ú
|
||||
↑
|
||||
û
|
||||
ù
|
||||
¨
|
||||
ϒ
|
||||
υ
|
||||
ü
|
||||
℘
|
||||
ξ
|
||||
ý
|
||||
¥
|
||||
ÿ
|
||||
ζ
|
||||
‍
|
||||
‌
|
2661
SnudownTest/src/markdown.c
Normal file
2661
SnudownTest/src/markdown.c
Normal file
File diff suppressed because it is too large
Load diff
140
SnudownTest/src/markdown.h
Normal file
140
SnudownTest/src/markdown.h
Normal file
|
@ -0,0 +1,140 @@
|
|||
/* markdown.h - generic markdown parser */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009, Natacha Porté
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UPSKIRT_MARKDOWN_H
|
||||
#define UPSKIRT_MARKDOWN_H
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SUNDOWN_VERSION "1.16.0"
|
||||
#define SUNDOWN_VER_MAJOR 1
|
||||
#define SUNDOWN_VER_MINOR 16
|
||||
#define SUNDOWN_VER_REVISION 0
|
||||
|
||||
/********************
|
||||
* TYPE DEFINITIONS *
|
||||
********************/
|
||||
|
||||
/* mkd_autolink - type of autolink */
|
||||
enum mkd_autolink {
|
||||
MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/
|
||||
MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */
|
||||
MKDA_EMAIL, /* e-mail link without explit mailto: */
|
||||
};
|
||||
|
||||
enum mkd_tableflags {
|
||||
MKD_TABLE_ALIGN_L = 1,
|
||||
MKD_TABLE_ALIGN_R = 2,
|
||||
MKD_TABLE_ALIGN_CENTER = 3,
|
||||
MKD_TABLE_ALIGNMASK = 3,
|
||||
MKD_TABLE_HEADER = 4
|
||||
};
|
||||
|
||||
enum mkd_extensions {
|
||||
MKDEXT_NO_INTRA_EMPHASIS = (1 << 0),
|
||||
MKDEXT_TABLES = (1 << 1),
|
||||
MKDEXT_FENCED_CODE = (1 << 2),
|
||||
MKDEXT_AUTOLINK = (1 << 3),
|
||||
MKDEXT_STRIKETHROUGH = (1 << 4),
|
||||
MKDEXT_SPACE_HEADERS = (1 << 6),
|
||||
MKDEXT_SUPERSCRIPT = (1 << 7),
|
||||
MKDEXT_LAX_SPACING = (1 << 8),
|
||||
MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9),
|
||||
};
|
||||
|
||||
/* sd_callbacks - functions for rendering parsed data */
|
||||
struct sd_callbacks {
|
||||
/* block level callbacks - NULL skips the block */
|
||||
void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque);
|
||||
void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque);
|
||||
void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque);
|
||||
void (*hrule)(struct buf *ob, void *opaque);
|
||||
void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque);
|
||||
void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque);
|
||||
void (*table_row)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span);
|
||||
|
||||
|
||||
/* span level callbacks - NULL or return 0 prints the span verbatim */
|
||||
int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque);
|
||||
int (*codespan)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque);
|
||||
int (*linebreak)(struct buf *ob, void *opaque);
|
||||
int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque);
|
||||
int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque);
|
||||
int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
int (*superscript)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* low level callbacks - NULL copies input directly into the output */
|
||||
void (*entity)(struct buf *ob, const struct buf *entity, void *opaque);
|
||||
void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque);
|
||||
|
||||
/* header and footer */
|
||||
void (*doc_header)(struct buf *ob, void *opaque);
|
||||
void (*doc_footer)(struct buf *ob, void *opaque);
|
||||
};
|
||||
|
||||
struct sd_markdown;
|
||||
|
||||
/*********
|
||||
* FLAGS *
|
||||
*********/
|
||||
|
||||
/* list/listitem flags */
|
||||
#define MKD_LIST_ORDERED 1
|
||||
#define MKD_LI_BLOCK 2 /* <li> containing block data */
|
||||
|
||||
/**********************
|
||||
* EXPORTED FUNCTIONS *
|
||||
**********************/
|
||||
|
||||
extern struct sd_markdown *
|
||||
sd_markdown_new(
|
||||
unsigned int extensions,
|
||||
size_t max_nesting,
|
||||
size_t max_table_cols,
|
||||
const struct sd_callbacks *callbacks,
|
||||
void *opaque);
|
||||
|
||||
extern void
|
||||
sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_markdown_free(struct sd_markdown *md);
|
||||
|
||||
extern void
|
||||
sd_version(int *major, int *minor, int *revision);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set filetype=c: */
|
81
SnudownTest/src/stack.c
Normal file
81
SnudownTest/src/stack.c
Normal file
|
@ -0,0 +1,81 @@
|
|||
#include "stack.h"
|
||||
#include <string.h>
|
||||
|
||||
int
|
||||
stack_grow(struct stack *st, size_t new_size)
|
||||
{
|
||||
void **new_st;
|
||||
|
||||
if (st->asize >= new_size)
|
||||
return 0;
|
||||
|
||||
new_st = realloc(st->item, new_size * sizeof(void *));
|
||||
if (new_st == NULL)
|
||||
return -1;
|
||||
|
||||
memset(new_st + st->asize, 0x0,
|
||||
(new_size - st->asize) * sizeof(void *));
|
||||
|
||||
st->item = new_st;
|
||||
st->asize = new_size;
|
||||
|
||||
if (st->size > new_size)
|
||||
st->size = new_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
stack_free(struct stack *st)
|
||||
{
|
||||
if (!st)
|
||||
return;
|
||||
|
||||
free(st->item);
|
||||
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
}
|
||||
|
||||
int
|
||||
stack_init(struct stack *st, size_t initial_size)
|
||||
{
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
|
||||
if (!initial_size)
|
||||
initial_size = 8;
|
||||
|
||||
return stack_grow(st, initial_size);
|
||||
}
|
||||
|
||||
void *
|
||||
stack_pop(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[--st->size];
|
||||
}
|
||||
|
||||
int
|
||||
stack_push(struct stack *st, void *item)
|
||||
{
|
||||
if (stack_grow(st, st->size * 2) < 0)
|
||||
return -1;
|
||||
|
||||
st->item[st->size++] = item;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *
|
||||
stack_top(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[st->size - 1];
|
||||
}
|
||||
|
29
SnudownTest/src/stack.h
Normal file
29
SnudownTest/src/stack.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef STACK_H__
|
||||
#define STACK_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct stack {
|
||||
void **item;
|
||||
size_t size;
|
||||
size_t asize;
|
||||
};
|
||||
|
||||
void stack_free(struct stack *);
|
||||
int stack_grow(struct stack *, size_t);
|
||||
int stack_init(struct stack *, size_t);
|
||||
|
||||
int stack_push(struct stack *, void *);
|
||||
|
||||
void *stack_pop(struct stack *);
|
||||
void *stack_top(struct stack *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
81
SnudownTest/stack.c
Normal file
81
SnudownTest/stack.c
Normal file
|
@ -0,0 +1,81 @@
|
|||
#include "stack.h"
|
||||
#include <string.h>
|
||||
|
||||
int
|
||||
stack_grow(struct stack *st, size_t new_size)
|
||||
{
|
||||
void **new_st;
|
||||
|
||||
if (st->asize >= new_size)
|
||||
return 0;
|
||||
|
||||
new_st = realloc(st->item, new_size * sizeof(void *));
|
||||
if (new_st == NULL)
|
||||
return -1;
|
||||
|
||||
memset(new_st + st->asize, 0x0,
|
||||
(new_size - st->asize) * sizeof(void *));
|
||||
|
||||
st->item = new_st;
|
||||
st->asize = new_size;
|
||||
|
||||
if (st->size > new_size)
|
||||
st->size = new_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
stack_free(struct stack *st)
|
||||
{
|
||||
if (!st)
|
||||
return;
|
||||
|
||||
free(st->item);
|
||||
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
}
|
||||
|
||||
int
|
||||
stack_init(struct stack *st, size_t initial_size)
|
||||
{
|
||||
st->item = NULL;
|
||||
st->size = 0;
|
||||
st->asize = 0;
|
||||
|
||||
if (!initial_size)
|
||||
initial_size = 8;
|
||||
|
||||
return stack_grow(st, initial_size);
|
||||
}
|
||||
|
||||
void *
|
||||
stack_pop(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[--st->size];
|
||||
}
|
||||
|
||||
int
|
||||
stack_push(struct stack *st, void *item)
|
||||
{
|
||||
if (stack_grow(st, st->size * 2) < 0)
|
||||
return -1;
|
||||
|
||||
st->item[st->size++] = item;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *
|
||||
stack_top(struct stack *st)
|
||||
{
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[st->size - 1];
|
||||
}
|
||||
|
29
SnudownTest/stack.h
Normal file
29
SnudownTest/stack.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef STACK_H__
|
||||
#define STACK_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct stack {
|
||||
void **item;
|
||||
size_t size;
|
||||
size_t asize;
|
||||
};
|
||||
|
||||
void stack_free(struct stack *);
|
||||
int stack_grow(struct stack *, size_t);
|
||||
int stack_init(struct stack *, size_t);
|
||||
|
||||
int stack_push(struct stack *, void *);
|
||||
|
||||
void *stack_pop(struct stack *);
|
||||
void *stack_top(struct stack *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
199
SnudownTest/stdint.h
Normal file
199
SnudownTest/stdint.h
Normal file
|
@ -0,0 +1,199 @@
|
|||
/* stdint.h standard header */
|
||||
#pragma once
|
||||
#ifndef _STDINT
|
||||
#define _STDINT
|
||||
#ifndef RC_INVOKED
|
||||
#include <yvals.h>
|
||||
|
||||
/* NB: assumes
|
||||
byte has 8 bits
|
||||
long is 32 bits
|
||||
pointer can convert to and from long long
|
||||
long long is longest type
|
||||
*/
|
||||
|
||||
_C_STD_BEGIN
|
||||
/* TYPE DEFINITIONS */
|
||||
typedef signed char int8_t;
|
||||
typedef short int16_t;
|
||||
typedef int int32_t;
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
typedef signed char int_least8_t;
|
||||
typedef short int_least16_t;
|
||||
typedef int int_least32_t;
|
||||
|
||||
typedef unsigned char uint_least8_t;
|
||||
typedef unsigned short uint_least16_t;
|
||||
typedef unsigned int uint_least32_t;
|
||||
|
||||
typedef char int_fast8_t;
|
||||
typedef int int_fast16_t;
|
||||
typedef int int_fast32_t;
|
||||
|
||||
typedef unsigned char uint_fast8_t;
|
||||
typedef unsigned int uint_fast16_t;
|
||||
typedef unsigned int uint_fast32_t;
|
||||
|
||||
#ifndef _INTPTR_T_DEFINED
|
||||
#define _INTPTR_T_DEFINED
|
||||
#ifdef _WIN64
|
||||
typedef __int64 intptr_t;
|
||||
#else /* _WIN64 */
|
||||
typedef _W64 int intptr_t;
|
||||
#endif /* _WIN64 */
|
||||
#endif /* _INTPTR_T_DEFINED */
|
||||
|
||||
#ifndef _UINTPTR_T_DEFINED
|
||||
#define _UINTPTR_T_DEFINED
|
||||
#ifdef _WIN64
|
||||
typedef unsigned __int64 uintptr_t;
|
||||
#else /* _WIN64 */
|
||||
typedef _W64 unsigned int uintptr_t;
|
||||
#endif /* _WIN64 */
|
||||
#endif /* _UINTPTR_T_DEFINED */
|
||||
|
||||
typedef _Longlong int64_t;
|
||||
typedef _ULonglong uint64_t;
|
||||
|
||||
typedef _Longlong int_least64_t;
|
||||
typedef _ULonglong uint_least64_t;
|
||||
|
||||
typedef _Longlong int_fast64_t;
|
||||
typedef _ULonglong uint_fast64_t;
|
||||
|
||||
typedef _Longlong intmax_t;
|
||||
typedef _ULonglong uintmax_t;
|
||||
|
||||
/* LIMIT MACROS */
|
||||
#define INT8_MIN (-0x7f - _C2)
|
||||
#define INT16_MIN (-0x7fff - _C2)
|
||||
#define INT32_MIN (-0x7fffffff - _C2)
|
||||
|
||||
#define INT8_MAX 0x7f
|
||||
#define INT16_MAX 0x7fff
|
||||
#define INT32_MAX 0x7fffffff
|
||||
#define UINT8_MAX 0xff
|
||||
#define UINT16_MAX 0xffff
|
||||
#define UINT32_MAX 0xffffffff
|
||||
|
||||
#define INT_LEAST8_MIN (-0x7f - _C2)
|
||||
#define INT_LEAST16_MIN (-0x7fff - _C2)
|
||||
#define INT_LEAST32_MIN (-0x7fffffff - _C2)
|
||||
|
||||
#define INT_LEAST8_MAX 0x7f
|
||||
#define INT_LEAST16_MAX 0x7fff
|
||||
#define INT_LEAST32_MAX 0x7fffffff
|
||||
#define UINT_LEAST8_MAX 0xff
|
||||
#define UINT_LEAST16_MAX 0xffff
|
||||
#define UINT_LEAST32_MAX 0xffffffff
|
||||
|
||||
#define INT_FAST8_MIN (-0x7f - _C2)
|
||||
#define INT_FAST16_MIN (-0x7fff - _C2)
|
||||
#define INT_FAST32_MIN (-0x7fffffff - _C2)
|
||||
|
||||
#define INT_FAST8_MAX 0x7f
|
||||
#define INT_FAST16_MAX 0x7fff
|
||||
#define INT_FAST32_MAX 0x7fffffff
|
||||
#define UINT_FAST8_MAX 0xff
|
||||
#define UINT_FAST16_MAX 0xffff
|
||||
#define UINT_FAST32_MAX 0xffffffff
|
||||
|
||||
#if _INTPTR == 0 || _INTPTR == 1
|
||||
#define INTPTR_MAX 0x7fffffff
|
||||
#define INTPTR_MIN (-INTPTR_MAX - _C2)
|
||||
#define UINTPTR_MAX 0xffffffff
|
||||
|
||||
#else /* _INTPTR == 2 */
|
||||
#define INTPTR_MIN (-_LLONG_MAX - _C2)
|
||||
#define INTPTR_MAX _LLONG_MAX
|
||||
#define UINTPTR_MAX _ULLONG_MAX
|
||||
#endif /* _INTPTR */
|
||||
|
||||
#define INT8_C(x) (x)
|
||||
#define INT16_C(x) (x)
|
||||
#define INT32_C(x) ((x) + (INT32_MAX - INT32_MAX))
|
||||
|
||||
#define UINT8_C(x) (x)
|
||||
#define UINT16_C(x) (x)
|
||||
#define UINT32_C(x) ((x) + (UINT32_MAX - UINT32_MAX))
|
||||
|
||||
#ifdef _WIN64
|
||||
#define PTRDIFF_MIN INT64_MIN
|
||||
#define PTRDIFF_MAX INT64_MAX
|
||||
#else /* _WIN64 */
|
||||
#define PTRDIFF_MIN INT32_MIN
|
||||
#define PTRDIFF_MAX INT32_MAX
|
||||
#endif /* _WIN64 */
|
||||
|
||||
#define SIG_ATOMIC_MIN INT32_MIN
|
||||
#define SIG_ATOMIC_MAX INT32_MAX
|
||||
|
||||
#ifndef SIZE_MAX
|
||||
#ifdef _WIN64
|
||||
#define SIZE_MAX UINT64_MAX
|
||||
#else /* _WIN64 */
|
||||
#define SIZE_MAX UINT32_MAX
|
||||
#endif /* _WIN64 */
|
||||
#endif /* SIZE_MAX */
|
||||
|
||||
#define WCHAR_MIN 0x0000
|
||||
#define WCHAR_MAX 0xffff
|
||||
|
||||
#define WINT_MIN 0x0000
|
||||
#define WINT_MAX 0xffff
|
||||
|
||||
#define INT64_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INT64_MAX 0x7fffffffffffffff
|
||||
#define UINT64_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INT_LEAST64_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INT_LEAST64_MAX 0x7fffffffffffffff
|
||||
#define UINT_LEAST64_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INT_FAST64_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INT_FAST64_MAX 0x7fffffffffffffff
|
||||
#define UINT_FAST64_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INTMAX_MIN (-0x7fffffffffffffff - _C2)
|
||||
#define INTMAX_MAX 0x7fffffffffffffff
|
||||
#define UINTMAX_MAX 0xffffffffffffffffU
|
||||
|
||||
#define INT64_C(x) ((x) + (INT64_MAX - INT64_MAX))
|
||||
#define UINT64_C(x) ((x) + (UINT64_MAX - UINT64_MAX))
|
||||
#define INTMAX_C(x) INT64_C(x)
|
||||
#define UINTMAX_C(x) UINT64_C(x)
|
||||
_C_STD_END
|
||||
#endif /* RC_INVOKED */
|
||||
#endif /* _STDINT */
|
||||
|
||||
#if defined(_STD_USING)
|
||||
using _CSTD int8_t; using _CSTD int16_t;
|
||||
using _CSTD int32_t; using _CSTD int64_t;
|
||||
|
||||
using _CSTD uint8_t; using _CSTD uint16_t;
|
||||
using _CSTD uint32_t; using _CSTD uint64_t;
|
||||
|
||||
using _CSTD int_least8_t; using _CSTD int_least16_t;
|
||||
using _CSTD int_least32_t; using _CSTD int_least64_t;
|
||||
using _CSTD uint_least8_t; using _CSTD uint_least16_t;
|
||||
using _CSTD uint_least32_t; using _CSTD uint_least64_t;
|
||||
|
||||
using _CSTD intmax_t; using _CSTD uintmax_t;
|
||||
|
||||
using _CSTD uintptr_t;
|
||||
using _CSTD intptr_t;
|
||||
|
||||
using _CSTD int_fast8_t; using _CSTD int_fast16_t;
|
||||
using _CSTD int_fast32_t; using _CSTD int_fast64_t;
|
||||
using _CSTD uint_fast8_t; using _CSTD uint_fast16_t;
|
||||
using _CSTD uint_fast32_t; using _CSTD uint_fast64_t;
|
||||
#endif /* defined(_STD_USING) */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1992-2009 by P.J. Plauger. ALL RIGHTS RESERVED.
|
||||
* Consult your license regarding permissions and restrictions.
|
||||
V5.20:0009 */
|
20
SnudownTest/sundown.def
Normal file
20
SnudownTest/sundown.def
Normal file
|
@ -0,0 +1,20 @@
|
|||
LIBRARY SUNDOWN
|
||||
EXPORTS
|
||||
sdhtml_renderer
|
||||
sdhtml_toc_renderer
|
||||
sdhtml_smartypants
|
||||
bufgrow
|
||||
bufnew
|
||||
bufcstr
|
||||
bufprefix
|
||||
bufput
|
||||
bufputs
|
||||
bufputc
|
||||
bufrelease
|
||||
bufreset
|
||||
bufslurp
|
||||
bufprintf
|
||||
sd_markdown_new
|
||||
sd_markdown_render
|
||||
sd_markdown_free
|
||||
sd_version
|
461
SnudownTest/test_snudown.py
Normal file
461
SnudownTest/test_snudown.py
Normal file
|
@ -0,0 +1,461 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import snudown
|
||||
import unittest
|
||||
import itertools
|
||||
import cStringIO as StringIO
|
||||
|
||||
|
||||
cases = {
|
||||
'': '',
|
||||
'http://www.reddit.com':
|
||||
'<p><a href="http://www.reddit.com">http://www.reddit.com</a></p>\n',
|
||||
|
||||
'http://www.reddit.com/a\x00b':
|
||||
'<p><a href="http://www.reddit.com/ab">http://www.reddit.com/ab</a></p>\n',
|
||||
|
||||
'foo@example.com':
|
||||
'<p><a href="mailto:foo@example.com">foo@example.com</a></p>\n',
|
||||
|
||||
'[foo](http://en.wikipedia.org/wiki/Link_(film\))':
|
||||
'<p><a href="http://en.wikipedia.org/wiki/Link_(film)">foo</a></p>\n',
|
||||
|
||||
'(http://tsfr.org)':
|
||||
'<p>(<a href="http://tsfr.org">http://tsfr.org</a>)</p>\n',
|
||||
|
||||
'[A link with a /r/subreddit in it](/lol)':
|
||||
'<p><a href="/lol">A link with a /r/subreddit in it</a></p>\n',
|
||||
|
||||
'[A link with a http://www.url.com in it](/lol)':
|
||||
'<p><a href="/lol">A link with a http://www.url.com in it</a></p>\n',
|
||||
|
||||
'[Empty Link]()':
|
||||
'<p>[Empty Link]()</p>\n',
|
||||
|
||||
'http://en.wikipedia.org/wiki/café_racer':
|
||||
'<p><a href="http://en.wikipedia.org/wiki/caf%C3%A9_racer">http://en.wikipedia.org/wiki/café_racer</a></p>\n',
|
||||
|
||||
'#####################################################hi':
|
||||
'<h6>###############################################hi</h6>\n',
|
||||
|
||||
'[foo](http://bar\nbar)':
|
||||
'<p><a href="http://bar%0Abar">foo</a></p>\n',
|
||||
|
||||
'/r/test':
|
||||
'<p><a href="/r/test">/r/test</a></p>\n',
|
||||
|
||||
'Words words /r/test words':
|
||||
'<p>Words words <a href="/r/test">/r/test</a> words</p>\n',
|
||||
|
||||
'/r/':
|
||||
'<p>/r/</p>\n',
|
||||
|
||||
r'escaped \/r/test':
|
||||
'<p>escaped /r/test</p>\n',
|
||||
|
||||
'ampersands http://www.google.com?test&blah':
|
||||
'<p>ampersands <a href="http://www.google.com?test&blah">http://www.google.com?test&blah</a></p>\n',
|
||||
|
||||
'[_regular_ link with nesting](/test)':
|
||||
'<p><a href="/test"><em>regular</em> link with nesting</a></p>\n',
|
||||
|
||||
' www.a.co?with&test':
|
||||
'<p><a href="http://www.a.co?with&test">www.a.co?with&test</a></p>\n',
|
||||
|
||||
r'Normal^superscript':
|
||||
'<p>Normal<sup>superscript</sup></p>\n',
|
||||
|
||||
r'Escape\^superscript':
|
||||
'<p>Escape^superscript</p>\n',
|
||||
|
||||
r'~~normal strikethrough~~':
|
||||
'<p><del>normal strikethrough</del></p>\n',
|
||||
|
||||
r'\~~escaped strikethrough~~':
|
||||
'<p>~~escaped strikethrough~~</p>\n',
|
||||
|
||||
'anywhere\x03, you':
|
||||
'<p>anywhere, you</p>\n',
|
||||
|
||||
'[Test](//test)':
|
||||
'<p><a href="//test">Test</a></p>\n',
|
||||
|
||||
'[Test](//#test)':
|
||||
'<p><a href="//#test">Test</a></p>\n',
|
||||
|
||||
'[Test](#test)':
|
||||
'<p><a href="#test">Test</a></p>\n',
|
||||
|
||||
'[Test](git://github.com)':
|
||||
'<p><a href="git://github.com">Test</a></p>\n',
|
||||
|
||||
'[Speculation](//?)':
|
||||
'<p><a href="//?">Speculation</a></p>\n',
|
||||
|
||||
'/r/sr_with_underscores':
|
||||
'<p><a href="/r/sr_with_underscores">/r/sr_with_underscores</a></p>\n',
|
||||
|
||||
'[Test](///#test)':
|
||||
'<p><a href="///#test">Test</a></p>\n',
|
||||
|
||||
'/r/multireddit+test+yay':
|
||||
'<p><a href="/r/multireddit+test+yay">/r/multireddit+test+yay</a></p>\n',
|
||||
|
||||
'<test>':
|
||||
'<p><test></p>\n',
|
||||
|
||||
'words_with_underscores':
|
||||
'<p>words_with_underscores</p>\n',
|
||||
|
||||
'words*with*asterisks':
|
||||
'<p>words<em>with</em>asterisks</p>\n',
|
||||
|
||||
'~test':
|
||||
'<p>~test</p>\n',
|
||||
|
||||
'/u/test':
|
||||
'<p><a href="/u/test">/u/test</a></p>\n',
|
||||
|
||||
'/u/test/m/test test':
|
||||
'<p><a href="/u/test/m/test">/u/test/m/test</a> test</p>\n',
|
||||
|
||||
'/U/nope':
|
||||
'<p>/U/nope</p>\n',
|
||||
|
||||
'/r/test/m/test test':
|
||||
'<p><a href="/r/test/m/test">/r/test/m/test</a> test</p>\n',
|
||||
|
||||
'/r/test/w/test test':
|
||||
'<p><a href="/r/test/w/test">/r/test/w/test</a> test</p>\n',
|
||||
|
||||
'/r/test/comments/test test':
|
||||
'<p><a href="/r/test/comments/test">/r/test/comments/test</a> test</p>\n',
|
||||
|
||||
'/u/test/commentscommentscommentscommentscommentscommentscomments/test test':
|
||||
'<p><a href="/u/test/commentscommentscommentscommentscommentscommentscomments/test">/u/test/commentscommentscommentscommentscommentscommentscomments/test</a> test</p>\n',
|
||||
|
||||
'a /u/reddit':
|
||||
'<p>a <a href="/u/reddit">/u/reddit</a></p>\n',
|
||||
|
||||
'u/reddit':
|
||||
'<p><a href="/u/reddit">u/reddit</a></p>\n',
|
||||
|
||||
'a u/reddit':
|
||||
'<p>a <a href="/u/reddit">u/reddit</a></p>\n',
|
||||
|
||||
'a u/reddit/foobaz':
|
||||
'<p>a <a href="/u/reddit/foobaz">u/reddit/foobaz</a></p>\n',
|
||||
|
||||
'foo:u/reddit':
|
||||
'<p>foo:<a href="/u/reddit">u/reddit</a></p>\n',
|
||||
|
||||
'fuu/reddit':
|
||||
'<p>fuu/reddit</p>\n',
|
||||
|
||||
# Don't treat unicode punctuation as a word boundary for now
|
||||
u'a。u/reddit'.encode('utf8'):
|
||||
u'<p>a。u/reddit</p>\n'.encode('utf8'),
|
||||
|
||||
'\\/u/me':
|
||||
'<p>/u/me</p>\n',
|
||||
|
||||
'\\\\/u/me':
|
||||
'<p>\\<a href="/u/me">/u/me</a></p>\n',
|
||||
|
||||
'\\u/me':
|
||||
'<p>\\<a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'\\\\u/me':
|
||||
'<p>\\<a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'u\\/me':
|
||||
'<p>u/me</p>\n',
|
||||
|
||||
'*u/me*':
|
||||
'<p><em><a href="/u/me">u/me</a></em></p>\n',
|
||||
|
||||
'foo^u/me':
|
||||
'<p>foo<sup><a href="/u/me">u/me</a></sup></p>\n',
|
||||
|
||||
'*foo*u/me':
|
||||
'<p><em>foo</em><a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'u/me':
|
||||
'<p><a href="/u/me">u/me</a></p>\n',
|
||||
|
||||
'/u/me':
|
||||
'<p><a href="/u/me">/u/me</a></p>\n',
|
||||
|
||||
'u/m':
|
||||
'<p>u/m</p>\n',
|
||||
|
||||
'/u/m':
|
||||
'<p>/u/m</p>\n',
|
||||
|
||||
'/f/oobar':
|
||||
'<p>/f/oobar</p>\n',
|
||||
|
||||
'f/oobar':
|
||||
'<p>f/oobar</p>\n',
|
||||
|
||||
'/r/test/commentscommentscommentscommentscommentscommentscomments/test test':
|
||||
'<p><a href="/r/test/commentscommentscommentscommentscommentscommentscomments/test">/r/test/commentscommentscommentscommentscommentscommentscomments/test</a> test</p>\n',
|
||||
|
||||
'blah \\':
|
||||
'<p>blah \\</p>\n',
|
||||
|
||||
'/r/whatever: fork':
|
||||
'<p><a href="/r/whatever">/r/whatever</a>: fork</p>\n',
|
||||
|
||||
'/r/t:timereddit':
|
||||
'<p><a href="/r/t:timereddit">/r/t:timereddit</a></p>\n',
|
||||
|
||||
'/r/reddit.com':
|
||||
'<p><a href="/r/reddit.com">/r/reddit.com</a></p>\n',
|
||||
|
||||
'/r/not.cool':
|
||||
'<p><a href="/r/not">/r/not</a>.cool</p>\n',
|
||||
|
||||
'/r/very+clever+multireddit+reddit.com+t:fork+yay':
|
||||
'<p><a href="/r/very+clever+multireddit+reddit.com+t:fork+yay">/r/very+clever+multireddit+reddit.com+t:fork+yay</a></p>\n',
|
||||
|
||||
'/r/t:heatdeathoftheuniverse':
|
||||
'<p><a href="/r/t:heatdeathoftheuniverse">/r/t:heatdeathoftheuniverse</a></p>\n',
|
||||
|
||||
'/r/all-minus-something':
|
||||
'<p><a href="/r/all-minus-something">/r/all-minus-something</a></p>\n',
|
||||
|
||||
'/r/notall-minus':
|
||||
'<p><a href="/r/notall">/r/notall</a>-minus</p>\n',
|
||||
|
||||
'a /r/reddit.com':
|
||||
'<p>a <a href="/r/reddit.com">/r/reddit.com</a></p>\n',
|
||||
|
||||
'a r/reddit.com':
|
||||
'<p>a <a href="/r/reddit.com">r/reddit.com</a></p>\n',
|
||||
|
||||
'foo:r/reddit.com':
|
||||
'<p>foo:<a href="/r/reddit.com">r/reddit.com</a></p>\n',
|
||||
|
||||
'foobar/reddit.com':
|
||||
'<p>foobar/reddit.com</p>\n',
|
||||
|
||||
u'a。r/reddit.com'.encode('utf8'):
|
||||
u'<p>a。r/reddit.com</p>\n'.encode('utf8'),
|
||||
|
||||
'/R/reddit.com':
|
||||
'<p>/R/reddit.com</p>\n',
|
||||
|
||||
'/r/irc://foo.bar/':
|
||||
'<p><a href="/r/irc">/r/irc</a>://foo.bar/</p>\n',
|
||||
|
||||
'/r/t:irc//foo.bar/':
|
||||
'<p><a href="/r/t:irc//foo">/r/t:irc//foo</a>.bar/</p>\n',
|
||||
|
||||
'/r/all-irc://foo.bar/':
|
||||
'<p><a href="/r/all-irc">/r/all-irc</a>://foo.bar/</p>\n',
|
||||
|
||||
'/r/foo+irc://foo.bar/':
|
||||
'<p><a href="/r/foo+irc">/r/foo+irc</a>://foo.bar/</p>\n',
|
||||
|
||||
'/r/www.example.com':
|
||||
'<p><a href="/r/www">/r/www</a>.example.com</p>\n',
|
||||
|
||||
'.http://reddit.com':
|
||||
'<p>.<a href="http://reddit.com">http://reddit.com</a></p>\n',
|
||||
|
||||
'[r://<http://reddit.com/>](/aa)':
|
||||
'<p><a href="/aa">r://<a href="http://reddit.com/">http://reddit.com/</a></a></p>\n',
|
||||
|
||||
'/u/http://www.reddit.com/user/reddit':
|
||||
'<p><a href="/u/http">/u/http</a>://<a href="http://www.reddit.com/user/reddit">www.reddit.com/user/reddit</a></p>\n',
|
||||
|
||||
'www.http://example.com/':
|
||||
'<p><a href="http://www.http://example.com/">www.http://example.com/</a></p>\n',
|
||||
|
||||
('|' * 5) + '\n' + ('-|' * 5) + '\n|\n':
|
||||
'<table><thead>\n<tr>\n' + ('<th></th>\n' * 4) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="4" ></td>\n</tr>\n</tbody></table>\n',
|
||||
|
||||
('|' * 2) + '\n' + ('-|' * 2) + '\n|\n':
|
||||
'<table><thead>\n<tr>\n' + ('<th></th>\n' * 1) + '</tr>\n</thead><tbody>\n<tr>\n<td></td>\n</tr>\n</tbody></table>\n',
|
||||
|
||||
('|' * 65) + '\n' + ('-|' * 65) + '\n|\n':
|
||||
'<table><thead>\n<tr>\n' + ('<th></th>\n' * 64) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="64" ></td>\n</tr>\n</tbody></table>\n',
|
||||
|
||||
('|' * 66) + '\n' + ('-|' * 66) + '\n|\n':
|
||||
'<p>' + ('|' * 66) + '\n' + ('-|' * 66) + '\n|' + '</p>\n',
|
||||
|
||||
'ϑ':
|
||||
'<p>ϑ</p>\n',
|
||||
|
||||
'&foobar;':
|
||||
'<p>&foobar;</p>\n',
|
||||
|
||||
' ':
|
||||
'<p>&nbsp</p>\n',
|
||||
|
||||
'&#foobar;':
|
||||
'<p>&#foobar;</p>\n',
|
||||
|
||||
'oobar;':
|
||||
'<p>&#xfoobar;</p>\n',
|
||||
|
||||
'�':
|
||||
'<p>&#9999999999;</p>\n',
|
||||
|
||||
'c':
|
||||
'<p>c</p>\n',
|
||||
|
||||
'~':
|
||||
'<p>~</p>\n',
|
||||
|
||||
'~':
|
||||
'<p>~</p>\n',
|
||||
|
||||
'½':
|
||||
'<p>½</p>\n',
|
||||
|
||||
'aaa½aaa':
|
||||
'<p>aaa½aaa</p>\n',
|
||||
|
||||
'&':
|
||||
'<p>&</p>\n',
|
||||
|
||||
'&;':
|
||||
'<p>&;</p>\n',
|
||||
|
||||
'&#;':
|
||||
'<p>&#;</p>\n',
|
||||
|
||||
'&#;':
|
||||
'<p>&#;</p>\n',
|
||||
|
||||
'&#x;':
|
||||
'<p>&#x;</p>\n',
|
||||
}
|
||||
|
||||
# Test that every numeric entity is encoded as
|
||||
# it should be.
|
||||
ILLEGAL_NUMERIC_ENTS = frozenset(itertools.chain(
|
||||
xrange(0, 9),
|
||||
xrange(11, 13),
|
||||
xrange(14, 32),
|
||||
xrange(55296, 57344),
|
||||
xrange(65534, 65536),
|
||||
))
|
||||
|
||||
ent_test_key = ''
|
||||
ent_test_val = ''
|
||||
for i in xrange(65550):
|
||||
ent_testcase = '&#%d;&#x%x;' % (i, i)
|
||||
ent_test_key += ent_testcase
|
||||
if i in ILLEGAL_NUMERIC_ENTS:
|
||||
ent_test_val += ent_testcase.replace('&', '&')
|
||||
else:
|
||||
ent_test_val += ent_testcase
|
||||
|
||||
cases[ent_test_key] = '<p>%s</p>\n' % ent_test_val
|
||||
|
||||
wiki_cases = {
|
||||
'<table scope="foo"bar>':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo"bar colspan="2">':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table scope="foo" colspan="2"bar>':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scop="foo">':
|
||||
'<p><table></p>\n',
|
||||
|
||||
'<table ff= scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table colspan= scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope=ff"foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo" test="test">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo" longervalue="testing test" scope="test">':
|
||||
'<p><table scope="foo" scope="test"></p>\n',
|
||||
|
||||
'<table scope=`"foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
|
||||
'<table scope="foo bar">':
|
||||
'<p><table scope="foo bar"></p>\n',
|
||||
|
||||
'<table scope=\'foo colspan="foo">':
|
||||
'<p><table></p>\n',
|
||||
|
||||
'<table scope=\'foo\' colspan="foo">':
|
||||
'<p><table scope="foo" colspan="foo"></p>\n',
|
||||
|
||||
'<table scope=>':
|
||||
'<p><table></p>\n',
|
||||
|
||||
'<table scope= colspan="test" scope=>':
|
||||
'<p><table colspan="test"></p>\n',
|
||||
|
||||
'<table colspan="\'test">':
|
||||
'<p><table colspan="'test"></p>\n',
|
||||
|
||||
'<table scope="foo" colspan="2">':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table scope="foo" colspan="2" ff="test">':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
'<table ff="test" scope="foo" colspan="2" colspan=>':
|
||||
'<p><table scope="foo" colspan="2"></p>\n',
|
||||
|
||||
' <table colspan=\'\'\' a="" \' scope="foo">':
|
||||
'<p><table scope="foo"></p>\n',
|
||||
}
|
||||
|
||||
class SnudownTestCase(unittest.TestCase):
|
||||
def __init__(self, renderer=snudown.RENDERER_USERTEXT):
|
||||
self.renderer = renderer
|
||||
unittest.TestCase.__init__(self)
|
||||
|
||||
def runTest(self):
|
||||
output = snudown.markdown(self.input, renderer=self.renderer)
|
||||
|
||||
for i, (a, b) in enumerate(zip(repr(self.expected_output),
|
||||
repr(output))):
|
||||
if a != b:
|
||||
io = StringIO.StringIO()
|
||||
print >> io, "TEST FAILED:"
|
||||
print >> io, " input: %s" % repr(self.input)
|
||||
print >> io, " expected: %s" % repr(self.expected_output)
|
||||
print >> io, " actual: %s" % repr(output)
|
||||
print >> io, " %s" % (' ' * i + '^')
|
||||
self.fail(io.getvalue())
|
||||
|
||||
|
||||
|
||||
def test_snudown():
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
for input, expected_output in wiki_cases.iteritems():
|
||||
case = SnudownTestCase(renderer=snudown.RENDERER_WIKI)
|
||||
case.input = input
|
||||
case.expected_output = expected_output
|
||||
suite.addTest(case)
|
||||
|
||||
for input, expected_output in cases.iteritems():
|
||||
case = SnudownTestCase()
|
||||
case.input = input
|
||||
case.expected_output = expected_output
|
||||
suite.addTest(case)
|
||||
|
||||
return suite
|
153
Symlinker/symlinker.py
Normal file
153
Symlinker/symlinker.py
Normal file
|
@ -0,0 +1,153 @@
|
|||
import os
|
||||
import sys
|
||||
import string
|
||||
import tkinter
|
||||
import time
|
||||
import traceback
|
||||
|
||||
time.clock()
|
||||
|
||||
LINKTYPES_L = ['Symbolic file', 'Hardlink file', 'Symbolic dir', 'Junction dir']
|
||||
LINKTYPES = {'Symbolic file': '',
|
||||
'Hardlink file': '/H',
|
||||
'Symbolic dir': '/D',
|
||||
'Junction dir': '/J'
|
||||
}
|
||||
LINKTYPES_DIR = ['Symbolic dir', 'Junction dir', LINKTYPES['Symbolic dir'], LINKTYPES['Junction dir']]
|
||||
LINKTYPES_FILE = ['Symbolic file', 'Hardlink file', LINKTYPES['Symbolic file'], LINKTYPES['Hardlink file']]
|
||||
|
||||
TRACER_AUTOVERIFY_DELAY = 0.5
|
||||
|
||||
COLOR_BLACK = '#000'
|
||||
COLOR_YELLOW = '#aa0'
|
||||
COLOR_GREEN = '#0a0'
|
||||
COLOR_RED = '#a00'
|
||||
|
||||
def assert_linktypes(linktype, symbolpath, actualpath):
|
||||
if os.path.isdir(actualpath) and linktype not in LINKTYPES_DIR or \
|
||||
os.path.isfile(actualpath) and linktype not in LINKTYPES_FILE:
|
||||
message = 'Invalid linktype {linktype} for target path {target}'
|
||||
message = message.format(linktype=repr(linktype), target=repr(actualpath))
|
||||
raise TypeError(message)
|
||||
|
||||
def mklink(linktype, symbolpath, actualpath):
|
||||
symbolpath = os.path.abspath(symbolpath)
|
||||
actualpath = os.path.abspath(actualpath)
|
||||
try:
|
||||
assert_linktypes(linktype, symbolpath, actualpath)
|
||||
except TypeError:
|
||||
traceback.print_exc()
|
||||
return False
|
||||
command = 'mklink {linktype} "{symbolpath}" "{actualpath}"'
|
||||
command = command.format(linktype=linktype,
|
||||
symbolpath=symbolpath,
|
||||
actualpath=actualpath)
|
||||
print(''.join(c for c in command if c in string.printable))
|
||||
status_code = os.system(command)
|
||||
if status_code != 0:
|
||||
return False
|
||||
if linktype in LINKTYPES_DIR:
|
||||
symtype = 'symlink' if linktype == '/D' else 'junction'
|
||||
symlink_info = symtype + time.strftime('_%Y%m%d-%H%M%S.txt')
|
||||
symlink_info = os.path.join(actualpath, symlink_info)
|
||||
symlink_info = open(symlink_info, 'w')
|
||||
symlink_info.write('actual: ' + actualpath)
|
||||
symlink_info.write('\n')
|
||||
symlink_info.write(symtype + ': ' + symbolpath)
|
||||
symlink_info.close()
|
||||
|
||||
|
||||
class LinkGUI:
|
||||
def __init__(self):
|
||||
self.t = tkinter.Tk()
|
||||
|
||||
self.tracer_nextautoverify = 0
|
||||
self.tracer_lastkeystroke_verified = False
|
||||
self.tracer_activewaiter = False
|
||||
|
||||
self.stringvar_actualpath = tkinter.StringVar()
|
||||
self.stringvar_dropdown = tkinter.StringVar()
|
||||
self.label_actualpath = tkinter.Label(self.t, text='Actual path:')
|
||||
self.label_symbolpath = tkinter.Label(self.t, text='Symbol path:')
|
||||
self.entry_actualpath = tkinter.Entry(self.t, width=70, textvariable=self.stringvar_actualpath)
|
||||
self.entry_symbolpath = tkinter.Entry(self.t, width=70)
|
||||
self.dropdown_linktype = tkinter.OptionMenu(self.t, self.stringvar_dropdown, *LINKTYPES_L)
|
||||
self.dropdown_linktype.configure(width=15)
|
||||
self.button_do_it = tkinter.Button(self.t, text='Do it.', command=self.do_it)
|
||||
|
||||
self.stringvar_actualpath.trace('w', self.tracewatcher)
|
||||
self.stringvar_dropdown.trace('w', lambda *bb: self.tracer_verify_colors(False))
|
||||
self.stringvar_actualpath.set(os.getcwd())
|
||||
self.entry_symbolpath.insert(0, os.getcwd())
|
||||
|
||||
self.label_actualpath.grid(row=0, column=0, sticky='e')
|
||||
self.label_symbolpath.grid(row=1, column=0, sticky='e')
|
||||
self.entry_actualpath.grid(row=0, column=1, sticky='ew')
|
||||
self.entry_symbolpath.grid(row=1, column=1, sticky='ew')
|
||||
self.dropdown_linktype.grid(row=2, column=0)
|
||||
self.button_do_it.grid(row=2, column=1, sticky='e')
|
||||
|
||||
self.t.grid_columnconfigure(1, weight=1)
|
||||
self.t.mainloop()
|
||||
|
||||
def do_it(self, *bb):
|
||||
linktype = self.stringvar_dropdown.get()
|
||||
linktype = LINKTYPES[linktype]
|
||||
actualpath = self.entry_actualpath.get()
|
||||
symbolpath = self.entry_symbolpath.get()
|
||||
status = mklink(linktype, actualpath=actualpath, symbolpath=symbolpath)
|
||||
if status is False:
|
||||
self.button_do_it.configure(bg=COLOR_RED)
|
||||
else:
|
||||
self.button_do_it.configure(bg=COLOR_GREEN)
|
||||
|
||||
def tracewatcher(self, *bb):
|
||||
self.tracer_lastkeystroke_verified = False
|
||||
self.tracer_nextautoverify = time.time() + TRACER_AUTOVERIFY_DELAY
|
||||
if self.tracer_activewaiter is False:
|
||||
self.tracer_verify()
|
||||
|
||||
def tracer_verify(self):
|
||||
now = time.time()
|
||||
if self.tracer_lastkeystroke_verified is True:
|
||||
return
|
||||
if now < self.tracer_nextautoverify:
|
||||
delay = int(TRACER_AUTOVERIFY_DELAY * 1000)
|
||||
self.t.after(delay, self.tracer_verify)
|
||||
self.tracer_activewaiter = True
|
||||
self.dropdown_linktype.config(fg=COLOR_YELLOW)
|
||||
return
|
||||
self.tracer_lastkeystroke_verified = True
|
||||
self.tracer_activewaiter = False
|
||||
|
||||
self.tracer_verify_colors(set_for_me=True)
|
||||
|
||||
def tracer_verify_colors(self, set_for_me=False, *bb):
|
||||
path = self.stringvar_actualpath.get()
|
||||
linktype = self.stringvar_dropdown.get()
|
||||
if os.path.isfile(path):
|
||||
if set_for_me and linktype not in LINKTYPES_FILE:
|
||||
self.stringvar_dropdown.set('Symbolic file')
|
||||
return
|
||||
if linktype in LINKTYPES_FILE:
|
||||
self.dropdown_linktype.config(fg=COLOR_GREEN)
|
||||
else:
|
||||
self.dropdown_linktype.config(fg=COLOR_BLACK)
|
||||
elif os.path.isdir(path):
|
||||
if set_for_me and linktype not in LINKTYPES_DIR:
|
||||
self.stringvar_dropdown.set('Symbolic dir')
|
||||
return
|
||||
if linktype in LINKTYPES_DIR:
|
||||
self.dropdown_linktype.config(fg=COLOR_GREEN)
|
||||
else:
|
||||
self.dropdown_linktype.config(fg=COLOR_BLACK)
|
||||
else:
|
||||
self.dropdown_linktype.config(fg=COLOR_BLACK)
|
||||
|
||||
#mklink(LINKTYPE_SYMBOLIC_DIR, 'examples\\symbolic_dir', 'examples\\actual_dir')
|
||||
#mklink(LINKTYPE_JUNCTION_DIR, 'examples\\junction_dir', 'examples\\actual_dir')
|
||||
#mklink(LINKTYPE_SYMBOLIC_FILE, 'examples\\symbolic_file.txt', 'examples\\actual_file.txt')
|
||||
#mklink(LINKTYPE_HARDLINK_FILE, 'examples\\hardlink_file.txt', 'examples\\actual_file.txt')
|
||||
linker = LinkGUI()
|
||||
print('[ {0} elapsed ]'.format(round(time.clock(), 3)))
|
||||
|
1
gitnotes.txt
Normal file
1
gitnotes.txt
Normal file
|
@ -0,0 +1 @@
|
|||
git filter-branch -f --index-filter "git rm -r --cached --ignore-unmatch SubredditBirthdays\sql.db" HEAD
|
Loading…
Reference in a new issue