else/SnudownTest/html_entities.gperf

293 lines
2.9 KiB
Plaintext

%language=ANSI-C
%define lookup-function-name is_allowed_named_entity
%compare-strncmp
%readonly-tables
%define hash-function-name hash_html_entity
%enum
%includes
%{
#include <stdlib.h>
/* Parsers tend to choke on entities with values greater than this */
const u_int32_t MAX_NUM_ENTITY_VAL = 0x10ffff;
/* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL
* used to avoid dealing with overflows. */
const size_t MAX_NUM_ENTITY_LEN = 7;
inline int is_valid_numeric_entity(uint32_t entity_val)
{
/* Some XML parsers will choke on entities with certain
* values (mostly control characters.)
*
* According to lxml these are all problematic:
*
* [xrange(0, 8),
* xrange(11, 12),
* xrange(14, 31),
* xrange(55296, 57343),
* xrange(65534, 65535)]
*/
return (entity_val > 8
&& (entity_val != 11 && entity_val != 12)
&& (entity_val < 14 || entity_val > 31)
&& (entity_val < 55296 || entity_val > 57343)
&& (entity_val != 65534 && entity_val != 65535)
&& entity_val <= MAX_NUM_ENTITY_VAL);
}
%}
%%
&AElig;
&Aacute;
&Acirc;
&Agrave;
&Alpha;
&Aring;
&Atilde;
&Auml;
&Beta;
&Ccedil;
&Chi;
&Dagger;
&Delta;
&ETH;
&Eacute;
&Ecirc;
&Egrave;
&Epsilon;
&Eta;
&Euml;
&Gamma;
&Iacute;
&Icirc;
&Igrave;
&Iota;
&Iuml;
&Kappa;
&Lambda;
&Mu;
&Ntilde;
&Nu;
&OElig;
&Oacute;
&Ocirc;
&Ograve;
&Omega;
&Omicron;
&Oslash;
&Otilde;
&Ouml;
&Phi;
&Pi;
&Prime;
&Psi;
&Rho;
&Scaron;
&Sigma;
&THORN;
&Tau;
&Theta;
&Uacute;
&Ucirc;
&Ugrave;
&Upsilon;
&Uuml;
&Xi;
&Yacute;
&Yuml;
&Zeta;
&aacute;
&acirc;
&acute;
&aelig;
&agrave;
&alefsym;
&alpha;
&amp;
&and;
&ang;
&apos;
&aring;
&asymp;
&atilde;
&auml;
&bdquo;
&beta;
&brvbar;
&bull;
&cap;
&ccedil;
&cedil;
&cent;
&chi;
&circ;
&clubs;
&cong;
&copy;
&crarr;
&cup;
&curren;
&dArr;
&dagger;
&darr;
&deg;
&delta;
&diams;
&divide;
&eacute;
&ecirc;
&egrave;
&empty;
&emsp;
&ensp;
&epsilon;
&equiv;
&eta;
&eth;
&euml;
&euro;
&exist;
&fnof;
&forall;
&frac12;
&frac14;
&frac34;
&frasl;
&gamma;
&ge;
&gt;
&hArr;
&harr;
&hearts;
&hellip;
&iacute;
&icirc;
&iexcl;
&igrave;
&image;
&infin;
&int;
&iota;
&iquest;
&isin;
&iuml;
&kappa;
&lArr;
&lambda;
&lang;
&laquo;
&larr;
&lceil;
&ldquo;
&le;
&lfloor;
&lowast;
&loz;
&lrm;
&lsaquo;
&lsquo;
&lt;
&macr;
&mdash;
&micro;
&middot;
&minus;
&mu;
&nabla;
&nbsp;
&ndash;
&ne;
&ni;
&not;
&notin;
&nsub;
&ntilde;
&nu;
&oacute;
&ocirc;
&oelig;
&ograve;
&oline;
&omega;
&omicron;
&oplus;
&or;
&ordf;
&ordm;
&oslash;
&otilde;
&otimes;
&ouml;
&para;
&part;
&permil;
&perp;
&phi;
&pi;
&piv;
&plusmn;
&pound;
&prime;
&prod;
&prop;
&psi;
&quot;
&rArr;
&radic;
&rang;
&raquo;
&rarr;
&rceil;
&rdquo;
&real;
&reg;
&rfloor;
&rho;
&rlm;
&rsaquo;
&rsquo;
&sbquo;
&scaron;
&sdot;
&sect;
&shy;
&sigma;
&sigmaf;
&sim;
&spades;
&sub;
&sube;
&sum;
&sup1;
&sup2;
&sup3;
&sup;
&supe;
&szlig;
&tau;
&there4;
&theta;
&thetasym;
&thinsp;
&thorn;
&tilde;
&times;
&trade;
&uArr;
&uacute;
&uarr;
&ucirc;
&ugrave;
&uml;
&upsih;
&upsilon;
&uuml;
&weierp;
&xi;
&yacute;
&yen;
&yuml;
&zeta;
&zwj;
&zwnj;