#!/usr/bin/python # -*- coding: utf-8 -*- import snudown import unittest import itertools import cStringIO as StringIO cases = { '': '', 'http://www.reddit.com': '
\n', 'http://www.reddit.com/a\x00b': '\n', 'foo@example.com': '\n', '[foo](http://en.wikipedia.org/wiki/Link_(film\))': '\n', '(http://tsfr.org)': '\n', '[A link with a /r/subreddit in it](/lol)': 'A link with a /r/subreddit in it
\n', '[A link with a http://www.url.com in it](/lol)': 'A link with a http://www.url.com in it
\n', '[Empty Link]()': '[Empty Link]()
\n', 'http://en.wikipedia.org/wiki/café_racer': 'http://en.wikipedia.org/wiki/café_racer
\n', '#####################################################hi': 'Words words /r/test words
\n', '/r/': '/r/
\n', r'escaped \/r/test': 'escaped /r/test
\n', 'ampersands http://www.google.com?test&blah': 'ampersands http://www.google.com?test&blah
\n', '[_regular_ link with nesting](/test)': '\n', ' www.a.co?with&test': '\n', r'Normal^superscript': 'Normalsuperscript
\n', r'Escape\^superscript': 'Escape^superscript
\n', r'~~normal strikethrough~~': 'normal strikethrough
~~escaped strikethrough~~
\n', 'anywhere\x03, you': 'anywhere, you
\n', '[Test](//test)': '\n', '[Test](//#test)': '\n', '[Test](#test)': '\n', '[Test](git://github.com)': '\n', '[Speculation](//?)': '\n', '/r/sr_with_underscores': '\n', '[Test](///#test)': '\n', '/r/multireddit+test+yay': '\n', '<test>
\n', 'words_with_underscores': 'words_with_underscores
\n', 'words*with*asterisks': 'wordswithasterisks
\n', '~test': '~test
\n', '/u/test': '\n', '/u/test/m/test test': '/u/test/m/test test
\n', '/U/nope': '/U/nope
\n', '/r/test/m/test test': '/r/test/m/test test
\n', '/r/test/w/test test': '/r/test/w/test test
\n', '/r/test/comments/test test': '\n', '/u/test/commentscommentscommentscommentscommentscommentscomments/test test': '/u/test/commentscommentscommentscommentscommentscommentscomments/test test
\n', 'a /u/reddit': '\n', 'u/reddit': '\n', 'a u/reddit': 'a u/reddit
\n', 'a u/reddit/foobaz': '\n', 'foo:u/reddit': 'foo:u/reddit
\n', 'fuu/reddit': 'fuu/reddit
\n', # Don't treat unicode punctuation as a word boundary for now u'a。u/reddit'.encode('utf8'): u'a。u/reddit
\n'.encode('utf8'), '\\/u/me': '/u/me
\n', '\\\\/u/me': '\\/u/me
\n', '\\u/me': '\\u/me
\n', '\\\\u/me': '\\u/me
\n', 'u\\/me': 'u/me
\n', '*u/me*': '\n', 'foo^u/me': 'foou/me
\n', '*foo*u/me': 'foou/me
\n', 'u/me': '\n', '/u/me': '\n', 'u/m': 'u/m
\n', '/u/m': '/u/m
\n', '/f/oobar': '/f/oobar
\n', 'f/oobar': 'f/oobar
\n', '/r/test/commentscommentscommentscommentscommentscommentscomments/test test': '/r/test/commentscommentscommentscommentscommentscommentscomments/test test
\n', 'blah \\': 'blah \\
\n', '/r/whatever: fork': '/r/whatever: fork
\n', '/r/t:timereddit': '\n', '/r/reddit.com': '\n', '/r/not.cool': '/r/not.cool
\n', '/r/very+clever+multireddit+reddit.com+t:fork+yay': '/r/very+clever+multireddit+reddit.com+t:fork+yay
\n', '/r/t:heatdeathoftheuniverse': '\n', '/r/all-minus-something': '\n', '/r/notall-minus': '/r/notall-minus
\n', 'a /r/reddit.com': '\n', 'a r/reddit.com': '\n', 'foo:r/reddit.com': 'foo:r/reddit.com
\n', 'foobar/reddit.com': 'foobar/reddit.com
\n', u'a。r/reddit.com'.encode('utf8'): u'a。r/reddit.com
\n'.encode('utf8'), '/R/reddit.com': '/R/reddit.com
\n', '/r/irc://foo.bar/': '/r/irc://foo.bar/
\n', '/r/t:irc//foo.bar/': '/r/t:irc//foo.bar/
\n', '/r/all-irc://foo.bar/': '/r/all-irc://foo.bar/
\n', '/r/foo+irc://foo.bar/': '/r/foo+irc://foo.bar/
\n', '/r/www.example.com': '/r/www.example.com
\n', '.http://reddit.com': '\n', '[r:///u/http://www.reddit.com/user/reddit
\n', 'www.http://example.com/': '\n', ('|' * 5) + '\n' + ('-|' * 5) + '\n|\n': '\n' * 4) + ' | |||
---|---|---|---|
\n |
\n' * 1) + ' |
---|
\n |
\n' * 64) + ' | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
\n |
' + ('|' * 66) + '\n' + ('-|' * 66) + '\n|' + '
\n', 'ϑ': 'ϑ
\n', '&foobar;': '&foobar;
\n', ' ': ' 
\n', 'foobar;': '&#foobar;
\n', 'oobar;': 'oobar;
\n', '': '�
\n', 'c': 'c
\n', '~': '~
\n', '~': '~
\n', '½': '½
\n', 'aaa½aaa': 'aaa½aaa
\n', '&': '&
\n', '&;': '&;
\n', '': '&#;
\n', '': '&#;
\n', '': '&#x;
\n', } # Test that every numeric entity is encoded as # it should be. ILLEGAL_NUMERIC_ENTS = frozenset(itertools.chain( xrange(0, 9), xrange(11, 13), xrange(14, 32), xrange(55296, 57344), xrange(65534, 65536), )) ent_test_key = '' ent_test_val = '' for i in xrange(65550): ent_testcase = '%d;%x;' % (i, i) ent_test_key += ent_testcase if i in ILLEGAL_NUMERIC_ENTS: ent_test_val += ent_testcase.replace('&', '&') else: ent_test_val += ent_testcase cases[ent_test_key] = '%s
\n' % ent_test_val wiki_cases = { '