else/SnudownTest/autolink.c

/*
 * Copyright (c) 2011, Vicent Marti
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include "buffer.h"
#include "autolink.h"

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#if defined(_WIN32)
#define strncasecmp	_strnicmp
#endif

int
sd_autolink_issafe(const uint8_t *link, size_t link_len)
{
	static const size_t valid_uris_count = 14;
	static const char *valid_uris[] = {
		"http://", "https://", "ftp://", "mailto://",
		"/", "git://", "steam://", "irc://", "news://", "mumble://",
		"ssh://", "ircs://", "ts3server://", "#"
	};

	size_t i;

	for (i = 0; i < valid_uris_count; ++i) {
		size_t len = strlen(valid_uris[i]);

		if (link_len > len &&
			strncasecmp((char *)link, valid_uris[i], len) == 0 &&
			(isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?'))
			return 1;
	}

	return 0;
}

static size_t
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
{
	uint8_t cclose, copen = 0;
	size_t i;

	for (i = 0; i < link_end; ++i)
		if (data[i] == '<') {
			link_end = i;
			break;
		}

	while (link_end > 0) {
		uint8_t c = data[link_end - 1];

		if (c == 0)
			break;

		if (strchr("?!.,", c) != NULL)
			link_end--;

		else if (c == ';') {
			size_t new_end = link_end - 2;

			while (new_end > 0 && isalpha(data[new_end]))
				new_end--;

			if (new_end < link_end - 2 && data[new_end] == '&')
				link_end = new_end;
			else
				link_end--;
		}
		else break;
	}

	if (link_end == 0)
		return 0;

	cclose = data[link_end - 1];

	switch (cclose) {
	case '"':	copen = '"'; break;
	case '\'':	copen = '\''; break;
	case ')':	copen = '('; break;
	case ']':	copen = '['; break;
	case '}':	copen = '{'; break;
	}

	if (copen != 0) {
		size_t closing = 0;
		size_t opening = 0;
		size_t i = 0;

		/* Try to close the final punctuation sign in this same line;
		 * if we managed to close it outside of the URL, that means that it's
		 * not part of the URL. If it closes inside the URL, that means it
		 * is part of the URL.
		 *
		 * Examples:
		 *
		 *	foo http://www.pokemon.com/Pikachu_(Electric) bar
		 *		=> http://www.pokemon.com/Pikachu_(Electric)
		 *
		 *	foo (http://www.pokemon.com/Pikachu_(Electric)) bar
		 *		=> http://www.pokemon.com/Pikachu_(Electric)
		 *
		 *	foo http://www.pokemon.com/Pikachu_(Electric)) bar
		 *		=> http://www.pokemon.com/Pikachu_(Electric))
		 *
		 *	(foo http://www.pokemon.com/Pikachu_(Electric)) bar
		 *		=> foo http://www.pokemon.com/Pikachu_(Electric)
		 */

		while (i < link_end) {
			if (data[i] == copen)
				opening++;
			else if (data[i] == cclose)
				closing++;

			i++;
		}

		if (closing != opening)
			link_end--;
	}

	return link_end;
}

/*
 * Checks that `prefix_char` occurs on a word boundary just before `data`,
 * where `data` points to the character to search to the left of, and a word boundary
 * is (currently) a whitespace character, punctuation, or the start of the string.
 * Returns the length of the prefix.
 */
static int
check_reddit_autolink_prefix(
	const uint8_t* data,
	size_t max_rewind,
	size_t max_lookbehind,
	size_t size,
	char prefix_char
	)
{
	/* Make sure this `/` is part of `/?r/` */
	if (size < 2 || max_rewind < 1 || data[-1] != prefix_char)
		return 0;

	/* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */
	if (max_rewind > 1) {
		const char boundary = data[-2];
		if (boundary == '/')
			return 2;
		/**
		 * Here's where our lack of unicode-awareness bites us. We don't correctly
		 * match punctuation / whitespace characters for the boundary, because we
		 * reject valid cases like "。r/example" (note the fullwidth period.)
		 *
		 * A better implementation might try to rewind over bytes with the 8th bit set, try
		 * to decode them to a valid codepoint, then do a unicode-aware check on the codepoint.
		 */
		else if (ispunct(boundary) || isspace(boundary))
			return 1;
		else
			return 0;
	} else if (max_lookbehind > 2) {
		/* There's an inline element just left of the `prefix_char`, is it an escaped forward
		 * slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly
		 * allow "\\/r/foo".
		 */
		if (data[-2] == '/' && data[-3] == '\\')
			return 0;
	}

	/* Must be a new-style shortlink with nothing relevant to the left of it. */
	return 1;
}

static size_t
check_domain(uint8_t *data, size_t size, int allow_short)
{
	size_t i, np = 0;

	if (!isalnum(data[0]))
		return 0;

	for (i = 1; i < size - 1; ++i) {
		if (data[i] == '.') np++;
		else if (!isalnum(data[i]) && data[i] != '-') break;
	}

	if (allow_short) {
		/* We don't need a valid domain in the strict sense (with
		 * least one dot; so just make sure it's composed of valid
		 * domain characters and return the length of the the valid
		 * sequence. */
		return i;
	} else {
		/* a valid domain needs to have at least a dot.
		 * that's as far as we get */
		return np ? i : 0;
	}
}

size_t
sd_autolink__www(
	size_t *rewind_p,
	struct buf *link,
	uint8_t *data,
	size_t max_rewind,
	size_t size,
	unsigned int flags)
{
	size_t link_end;

	if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
		return 0;

	if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
		return 0;

	link_end = check_domain(data, size, 0);

	if (link_end == 0)
		return 0;

	while (link_end < size && !isspace(data[link_end]))
		link_end++;

	link_end = autolink_delim(data, link_end, max_rewind, size);

	if (link_end == 0)
		return 0;

	bufput(link, data, link_end);
	*rewind_p = 0;

	return (int)link_end;
}

size_t
sd_autolink__email(
	size_t *rewind_p,
	struct buf *link,
	uint8_t *data,
	size_t max_rewind,
	size_t size,
	unsigned int flags)
{
	size_t link_end, rewind;
	int nb = 0, np = 0;

	for (rewind = 0; rewind < max_rewind; ++rewind) {
		uint8_t c = data[-rewind - 1];

		if (c == 0)
			break;

		if (isalnum(c))
			continue;

		if (strchr(".+-_", c) != NULL)
			continue;

		break;
	}

	if (rewind == 0)
		return 0;

	for (link_end = 0; link_end < size; ++link_end) {
		uint8_t c = data[link_end];

		if (isalnum(c))
			continue;

		if (c == '@')
			nb++;
		else if (c == '.' && link_end < size - 1)
			np++;
		else if (c != '-' && c != '_')
			break;
	}

	if (link_end < 2 || nb != 1 || np == 0)
		return 0;

	link_end = autolink_delim(data, link_end, max_rewind, size);

	if (link_end == 0)
		return 0;

	bufput(link, data - rewind, link_end + rewind);
	*rewind_p = rewind;

	return link_end;
}

size_t
sd_autolink__url(
	size_t *rewind_p,
	struct buf *link,
	uint8_t *data,
	size_t max_rewind,
	size_t size,
	unsigned int flags)
{
	size_t link_end, rewind = 0, domain_len;

	if (size < 4 || data[1] != '/' || data[2] != '/')
		return 0;

	while (rewind < max_rewind && isalpha(data[-rewind - 1]))
		rewind++;

	if (!sd_autolink_issafe(data - rewind, size + rewind))
		return 0;

	link_end = strlen("://");

	domain_len = check_domain(
		data + link_end,
		size - link_end,
		flags & SD_AUTOLINK_SHORT_DOMAINS);

	if (domain_len == 0)
		return 0;

	link_end += domain_len;
	while (link_end < size && !isspace(data[link_end]))
		link_end++;

	link_end = autolink_delim(data, link_end, max_rewind, size);

	if (link_end == 0)
		return 0;

	bufput(link, data - rewind, link_end + rewind);
	*rewind_p = rewind;

	return link_end;
}

size_t
sd_autolink__subreddit(
	size_t *rewind_p,
	struct buf *link,
	uint8_t *data,
	size_t max_rewind,
	size_t max_lookbehind,
	size_t size,
	int *no_slash
	)
{
	/**
	 * This is meant to handle both r/foo and /r/foo style subreddit references.
	 * In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'.
	 * In pseudo-regex, this matches something like:
	 *
	 * `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?`
	 * where %subreddit% == `((t:)?\w{2,24}|reddit\.com)`
	 */
	size_t link_end;
	size_t rewind;
	int is_allminus = 0;

	rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r');
	if (!rewind)
		return 0;

	/* offset to the "meat" of the link */
	link_end = strlen("/");

	if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0)
		is_allminus = 1;

	do {
		size_t start = link_end;
		int max_length = 24;

		/* special case: /r/reddit.com (only subreddit containing '.'). */
		if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) {
			link_end += 10;
			/* Make sure there are no trailing characters (don't do
			 * any autolinking for /r/reddit.commission) */
			max_length = 10;
		}

		/* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */
		else {
			/* support autolinking to timereddits, /r/t:when (1 April 2012) */
			if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 )
				link_end += 2;  /* Jump over the 't:' */

			/* the first character of a subreddit name must be a letter or digit */
			if (!isalnum(data[link_end]))
				return 0;
			link_end += 1;
		}

		/* consume valid characters ([A-Za-z0-9_]) until we run out */
		while (link_end < size && (isalnum(data[link_end]) ||
							data[link_end] == '_'))
			link_end++;

		/* valid subreddit names are between 3 and 21 characters, with
		 * some subreddits having 2-character names. Don't bother with
		 * autolinking for anything outside this length range.
		 * (chksrname function in reddit/.../validator.py) */
		if ( link_end-start < 2 || link_end-start > max_length )
			return 0;

		/* If we are linking to a multireddit, continue */
	} while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ );

	if (link_end < size && data[link_end] == '/') {
		while (link_end < size && (isalnum(data[link_end]) ||
									data[link_end] == '_' ||
									data[link_end] == '/' ||
									data[link_end] == '-'))
			link_end++;
	}

	/* make the link */
	bufput(link, data - rewind, link_end + rewind);

	*no_slash = (rewind == 1);
	*rewind_p = rewind;

	return link_end;
}

size_t
sd_autolink__username(
	size_t *rewind_p,
	struct buf *link,
	uint8_t *data,
	size_t max_rewind,
	size_t max_lookbehind,
	size_t size,
	int *no_slash
	)
{
	size_t link_end;
	size_t rewind;

	if (size < 3)
		return 0;

	rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u');
	if (!rewind)
		return 0;

	link_end = strlen("/");

	/* the first letter of a username must... well, be valid, we don't care otherwise */
	if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-')
		return 0;
	link_end += 1;

	/* consume valid characters ([A-Za-z0-9_-/]) until we run out */
	while (link_end < size && (isalnum(data[link_end]) ||
								data[link_end] == '_' ||
								data[link_end] == '/' ||
								data[link_end] == '-'))
		link_end++;

	/* make the link */
	bufput(link, data - rewind, link_end + rewind);

	*no_slash = (rewind == 1);
	*rewind_p = rewind;

	return link_end;
}
else 2015-11-20 12:08:53 +00:00			`/*`
			`* Copyright (c) 2011, Vicent Marti`
			`*`
			`* Permission to use, copy, modify, and distribute this software for any`
			`* purpose with or without fee is hereby granted, provided that the above`
			`* copyright notice and this permission notice appear in all copies.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES`
			`* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF`
			`* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR`
			`* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES`
			`* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN`
			`* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF`
			`* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.`
			`*/`

			`#include "buffer.h"`
			`#include "autolink.h"`

			`#include <string.h>`
			`#include <stdlib.h>`
			`#include <stdio.h>`
			`#include <ctype.h>`

			`#if defined(_WIN32)`
			`#define strncasecmp _strnicmp`
			`#endif`

			`int`
			`sd_autolink_issafe(const uint8_t *link, size_t link_len)`
			`{`
			`static const size_t valid_uris_count = 14;`
			`static const char *valid_uris[] = {`
			`"http://", "https://", "ftp://", "mailto://",`
			`"/", "git://", "steam://", "irc://", "news://", "mumble://",`
			`"ssh://", "ircs://", "ts3server://", "#"`
			`};`

			`size_t i;`

			`for (i = 0; i < valid_uris_count; ++i) {`
			`size_t len = strlen(valid_uris[i]);`

			`if (link_len > len &&`
			`strncasecmp((char *)link, valid_uris[i], len) == 0 &&`
			`(isalnum(link[len]) \|\| link[len] == '#' \|\| link[len] == '/' \|\| link[len] == '?'))`
			`return 1;`
			`}`

			`return 0;`
			`}`

			`static size_t`
			`autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)`
			`{`
			`uint8_t cclose, copen = 0;`
			`size_t i;`

			`for (i = 0; i < link_end; ++i)`
			`if (data[i] == '<') {`
			`link_end = i;`
			`break;`
			`}`

			`while (link_end > 0) {`
			`uint8_t c = data[link_end - 1];`

			`if (c == 0)`
			`break;`

			`if (strchr("?!.,", c) != NULL)`
			`link_end--;`

			`else if (c == ';') {`
			`size_t new_end = link_end - 2;`

			`while (new_end > 0 && isalpha(data[new_end]))`
			`new_end--;`

			`if (new_end < link_end - 2 && data[new_end] == '&')`
			`link_end = new_end;`
			`else`
			`link_end--;`
			`}`
			`else break;`
			`}`

			`if (link_end == 0)`
			`return 0;`

			`cclose = data[link_end - 1];`

			`switch (cclose) {`
			`case '"': copen = '"'; break;`
			`case '\'': copen = '\''; break;`
			`case ')': copen = '('; break;`
			`case ']': copen = '['; break;`
			`case '}': copen = '{'; break;`
			`}`

			`if (copen != 0) {`
			`size_t closing = 0;`
			`size_t opening = 0;`
			`size_t i = 0;`

			`/* Try to close the final punctuation sign in this same line;`
			`* if we managed to close it outside of the URL, that means that it's`
			`* not part of the URL. If it closes inside the URL, that means it`
			`* is part of the URL.`
			`*`
			`* Examples:`
			`*`
			`* foo http://www.pokemon.com/Pikachu_(Electric) bar`
			`* => http://www.pokemon.com/Pikachu_(Electric)`
			`*`
			`* foo (http://www.pokemon.com/Pikachu_(Electric)) bar`
			`* => http://www.pokemon.com/Pikachu_(Electric)`
			`*`
			`* foo http://www.pokemon.com/Pikachu_(Electric)) bar`
			`* => http://www.pokemon.com/Pikachu_(Electric))`
			`*`
			`* (foo http://www.pokemon.com/Pikachu_(Electric)) bar`
			`* => foo http://www.pokemon.com/Pikachu_(Electric)`
			`*/`

			`while (i < link_end) {`
			`if (data[i] == copen)`
			`opening++;`
			`else if (data[i] == cclose)`
			`closing++;`

			`i++;`
			`}`

			`if (closing != opening)`
			`link_end--;`
			`}`

			`return link_end;`
			`}`

			`/*`
			* Checks that `prefix_char` occurs on a word boundary just before `data`,
			* where `data` points to the character to search to the left of, and a word boundary
			`* is (currently) a whitespace character, punctuation, or the start of the string.`
			`* Returns the length of the prefix.`
			`*/`
			`static int`
			`check_reddit_autolink_prefix(`
			`const uint8_t* data,`
			`size_t max_rewind,`
			`size_t max_lookbehind,`
			`size_t size,`
			`char prefix_char`
			`)`
			`{`
			/* Make sure this `/` is part of `/?r/` */
			`if (size < 2 \|\| max_rewind < 1 \|\| data[-1] != prefix_char)`
			`return 0;`

			/* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */
			`if (max_rewind > 1) {`
			`const char boundary = data[-2];`
			`if (boundary == '/')`
			`return 2;`
			`/**`
			`* Here's where our lack of unicode-awareness bites us. We don't correctly`
			`* match punctuation / whitespace characters for the boundary, because we`
			`* reject valid cases like "。r/example" (note the fullwidth period.)`
			`*`
			`* A better implementation might try to rewind over bytes with the 8th bit set, try`
			`* to decode them to a valid codepoint, then do a unicode-aware check on the codepoint.`
			`*/`
			`else if (ispunct(boundary) \|\| isspace(boundary))`
			`return 1;`
			`else`
			`return 0;`
			`} else if (max_lookbehind > 2) {`
			/* There's an inline element just left of the `prefix_char`, is it an escaped forward
			`* slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly`
			`* allow "\\/r/foo".`
			`*/`
			`if (data[-2] == '/' && data[-3] == '\\')`
			`return 0;`
			`}`

			`/* Must be a new-style shortlink with nothing relevant to the left of it. */`
			`return 1;`
			`}`

			`static size_t`
			`check_domain(uint8_t *data, size_t size, int allow_short)`
			`{`
			`size_t i, np = 0;`

			`if (!isalnum(data[0]))`
			`return 0;`

			`for (i = 1; i < size - 1; ++i) {`
			`if (data[i] == '.') np++;`
			`else if (!isalnum(data[i]) && data[i] != '-') break;`
			`}`

			`if (allow_short) {`
			`/* We don't need a valid domain in the strict sense (with`
			`* least one dot; so just make sure it's composed of valid`
			`* domain characters and return the length of the the valid`
			`* sequence. */`
			`return i;`
			`} else {`
			`/* a valid domain needs to have at least a dot.`
			`* that's as far as we get */`
			`return np ? i : 0;`
			`}`
			`}`

			`size_t`
			`sd_autolink__www(`
			`size_t *rewind_p,`
			`struct buf *link,`
			`uint8_t *data,`
			`size_t max_rewind,`
			`size_t size,`
			`unsigned int flags)`
			`{`
			`size_t link_end;`

			`if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))`
			`return 0;`

			`if (size < 4 \|\| memcmp(data, "www.", strlen("www.")) != 0)`
			`return 0;`

			`link_end = check_domain(data, size, 0);`

			`if (link_end == 0)`
			`return 0;`

			`while (link_end < size && !isspace(data[link_end]))`
			`link_end++;`

			`link_end = autolink_delim(data, link_end, max_rewind, size);`

			`if (link_end == 0)`
			`return 0;`

			`bufput(link, data, link_end);`
			`*rewind_p = 0;`

			`return (int)link_end;`
			`}`

			`size_t`
			`sd_autolink__email(`
			`size_t *rewind_p,`
			`struct buf *link,`
			`uint8_t *data,`
			`size_t max_rewind,`
			`size_t size,`
			`unsigned int flags)`
			`{`
			`size_t link_end, rewind;`
			`int nb = 0, np = 0;`

			`for (rewind = 0; rewind < max_rewind; ++rewind) {`
			`uint8_t c = data[-rewind - 1];`

			`if (c == 0)`
			`break;`

			`if (isalnum(c))`
			`continue;`

			`if (strchr(".+-_", c) != NULL)`
			`continue;`

			`break;`
			`}`

			`if (rewind == 0)`
			`return 0;`

			`for (link_end = 0; link_end < size; ++link_end) {`
			`uint8_t c = data[link_end];`

			`if (isalnum(c))`
			`continue;`

			`if (c == '@')`
			`nb++;`
			`else if (c == '.' && link_end < size - 1)`
			`np++;`
			`else if (c != '-' && c != '_')`
			`break;`
			`}`

			`if (link_end < 2 \|\| nb != 1 \|\| np == 0)`
			`return 0;`

			`link_end = autolink_delim(data, link_end, max_rewind, size);`

			`if (link_end == 0)`
			`return 0;`

			`bufput(link, data - rewind, link_end + rewind);`
			`*rewind_p = rewind;`

			`return link_end;`
			`}`

			`size_t`
			`sd_autolink__url(`
			`size_t *rewind_p,`
			`struct buf *link,`
			`uint8_t *data,`
			`size_t max_rewind,`
			`size_t size,`
			`unsigned int flags)`
			`{`
			`size_t link_end, rewind = 0, domain_len;`

			`if (size < 4 \|\| data[1] != '/' \|\| data[2] != '/')`
			`return 0;`

			`while (rewind < max_rewind && isalpha(data[-rewind - 1]))`
			`rewind++;`

			`if (!sd_autolink_issafe(data - rewind, size + rewind))`
			`return 0;`

			`link_end = strlen("://");`

			`domain_len = check_domain(`
			`data + link_end,`
			`size - link_end,`
			`flags & SD_AUTOLINK_SHORT_DOMAINS);`

			`if (domain_len == 0)`
			`return 0;`

			`link_end += domain_len;`
			`while (link_end < size && !isspace(data[link_end]))`
			`link_end++;`

			`link_end = autolink_delim(data, link_end, max_rewind, size);`

			`if (link_end == 0)`
			`return 0;`

			`bufput(link, data - rewind, link_end + rewind);`
			`*rewind_p = rewind;`

			`return link_end;`
			`}`

			`size_t`
			`sd_autolink__subreddit(`
			`size_t *rewind_p,`
			`struct buf *link,`
			`uint8_t *data,`
			`size_t max_rewind,`
			`size_t max_lookbehind,`
			`size_t size,`
			`int *no_slash`
			`)`
			`{`
			`/**`
			`* This is meant to handle both r/foo and /r/foo style subreddit references.`
			* In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'.
			`* In pseudo-regex, this matches something like:`
			`*`
			* `(/\|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)(/[\w\-/])?`
			* where %subreddit% == `((t:)?\w{2,24}\|reddit\.com)`
			`*/`
			`size_t link_end;`
			`size_t rewind;`
			`int is_allminus = 0;`

			`rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r');`
			`if (!rewind)`
			`return 0;`

			`/* offset to the "meat" of the link */`
			`link_end = strlen("/");`

			`if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0)`
			`is_allminus = 1;`

			`do {`
			`size_t start = link_end;`
			`int max_length = 24;`

			`/* special case: /r/reddit.com (only subreddit containing '.'). */`
			`if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) {`
			`link_end += 10;`
			`/* Make sure there are no trailing characters (don't do`
			`* any autolinking for /r/reddit.commission) */`
			`max_length = 10;`
			`}`

			`/* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */`
			`else {`
			`/* support autolinking to timereddits, /r/t:when (1 April 2012) */`
			`if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 )`
			`link_end += 2; /* Jump over the 't:' */`

			`/* the first character of a subreddit name must be a letter or digit */`
			`if (!isalnum(data[link_end]))`
			`return 0;`
			`link_end += 1;`
			`}`

			`/* consume valid characters ([A-Za-z0-9_]) until we run out */`
			`while (link_end < size && (isalnum(data[link_end]) \|\|`
			`data[link_end] == '_'))`
			`link_end++;`

			`/* valid subreddit names are between 3 and 21 characters, with`
			`* some subreddits having 2-character names. Don't bother with`
			`* autolinking for anything outside this length range.`
			`* (chksrname function in reddit/.../validator.py) */`
			`if ( link_end-start < 2 \|\| link_end-start > max_length )`
			`return 0;`

			`/* If we are linking to a multireddit, continue */`
			`} while ( link_end < size && (data[link_end] == '+' \|\| (is_allminus && data[link_end] == '-')) && link_end++ );`

			`if (link_end < size && data[link_end] == '/') {`
			`while (link_end < size && (isalnum(data[link_end]) \|\|`
			`data[link_end] == '_' \|\|`
			`data[link_end] == '/' \|\|`
			`data[link_end] == '-'))`
			`link_end++;`
			`}`

			`/* make the link */`
			`bufput(link, data - rewind, link_end + rewind);`

			`*no_slash = (rewind == 1);`
			`*rewind_p = rewind;`

			`return link_end;`
			`}`

			`size_t`
			`sd_autolink__username(`
			`size_t *rewind_p,`
			`struct buf *link,`
			`uint8_t *data,`
			`size_t max_rewind,`
			`size_t max_lookbehind,`
			`size_t size,`
			`int *no_slash`
			`)`
			`{`
			`size_t link_end;`
			`size_t rewind;`

			`if (size < 3)`
			`return 0;`

			`rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u');`
			`if (!rewind)`
			`return 0;`

			`link_end = strlen("/");`

			`/* the first letter of a username must... well, be valid, we don't care otherwise */`
			`if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-')`
			`return 0;`
			`link_end += 1;`

			`/* consume valid characters ([A-Za-z0-9_-/]) until we run out */`
			`while (link_end < size && (isalnum(data[link_end]) \|\|`
			`data[link_end] == '_' \|\|`
			`data[link_end] == '/' \|\|`
			`data[link_end] == '-'))`
			`link_end++;`

			`/* make the link */`
			`bufput(link, data - rewind, link_end + rewind);`

			`*no_slash = (rewind == 1);`
			`*rewind_p = rewind;`

			`return link_end;`
			`}`