292 lines
2.9 KiB
Text
292 lines
2.9 KiB
Text
%language=ANSI-C
|
|
%define lookup-function-name is_allowed_named_entity
|
|
%compare-strncmp
|
|
%readonly-tables
|
|
%define hash-function-name hash_html_entity
|
|
%enum
|
|
%includes
|
|
%{
|
|
#include <stdlib.h>
|
|
|
|
/* Parsers tend to choke on entities with values greater than this */
|
|
const u_int32_t MAX_NUM_ENTITY_VAL = 0x10ffff;
|
|
/* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL
|
|
* used to avoid dealing with overflows. */
|
|
const size_t MAX_NUM_ENTITY_LEN = 7;
|
|
|
|
inline int is_valid_numeric_entity(uint32_t entity_val)
|
|
{
|
|
/* Some XML parsers will choke on entities with certain
|
|
* values (mostly control characters.)
|
|
*
|
|
* According to lxml these are all problematic:
|
|
*
|
|
* [xrange(0, 8),
|
|
* xrange(11, 12),
|
|
* xrange(14, 31),
|
|
* xrange(55296, 57343),
|
|
* xrange(65534, 65535)]
|
|
*/
|
|
return (entity_val > 8
|
|
&& (entity_val != 11 && entity_val != 12)
|
|
&& (entity_val < 14 || entity_val > 31)
|
|
&& (entity_val < 55296 || entity_val > 57343)
|
|
&& (entity_val != 65534 && entity_val != 65535)
|
|
&& entity_val <= MAX_NUM_ENTITY_VAL);
|
|
}
|
|
|
|
%}
|
|
%%
|
|
Æ
|
|
Á
|
|
Â
|
|
À
|
|
Α
|
|
Å
|
|
Ã
|
|
Ä
|
|
Β
|
|
Ç
|
|
Χ
|
|
‡
|
|
Δ
|
|
Ð
|
|
É
|
|
Ê
|
|
È
|
|
Ε
|
|
Η
|
|
Ë
|
|
Γ
|
|
Í
|
|
Î
|
|
Ì
|
|
Ι
|
|
Ï
|
|
Κ
|
|
Λ
|
|
Μ
|
|
Ñ
|
|
Ν
|
|
Œ
|
|
Ó
|
|
Ô
|
|
Ò
|
|
Ω
|
|
Ο
|
|
Ø
|
|
Õ
|
|
Ö
|
|
Φ
|
|
Π
|
|
″
|
|
Ψ
|
|
Ρ
|
|
Š
|
|
Σ
|
|
Þ
|
|
Τ
|
|
Θ
|
|
Ú
|
|
Û
|
|
Ù
|
|
Υ
|
|
Ü
|
|
Ξ
|
|
Ý
|
|
Ÿ
|
|
Ζ
|
|
á
|
|
â
|
|
´
|
|
æ
|
|
à
|
|
ℵ
|
|
α
|
|
&
|
|
∧
|
|
∠
|
|
'
|
|
å
|
|
≈
|
|
ã
|
|
ä
|
|
„
|
|
β
|
|
¦
|
|
•
|
|
∩
|
|
ç
|
|
¸
|
|
¢
|
|
χ
|
|
ˆ
|
|
♣
|
|
≅
|
|
©
|
|
↵
|
|
∪
|
|
¤
|
|
⇓
|
|
†
|
|
↓
|
|
°
|
|
δ
|
|
♦
|
|
÷
|
|
é
|
|
ê
|
|
è
|
|
∅
|
|
 
|
|
 
|
|
ε
|
|
≡
|
|
η
|
|
ð
|
|
ë
|
|
€
|
|
∃
|
|
ƒ
|
|
∀
|
|
½
|
|
¼
|
|
¾
|
|
⁄
|
|
γ
|
|
≥
|
|
>
|
|
⇔
|
|
↔
|
|
♥
|
|
…
|
|
í
|
|
î
|
|
¡
|
|
ì
|
|
ℑ
|
|
∞
|
|
∫
|
|
ι
|
|
¿
|
|
∈
|
|
ï
|
|
κ
|
|
⇐
|
|
λ
|
|
⟨
|
|
«
|
|
←
|
|
⌈
|
|
“
|
|
≤
|
|
⌊
|
|
∗
|
|
◊
|
|
‎
|
|
‹
|
|
‘
|
|
<
|
|
¯
|
|
—
|
|
µ
|
|
·
|
|
−
|
|
μ
|
|
∇
|
|
|
|
–
|
|
≠
|
|
∋
|
|
¬
|
|
∉
|
|
⊄
|
|
ñ
|
|
ν
|
|
ó
|
|
ô
|
|
œ
|
|
ò
|
|
‾
|
|
ω
|
|
ο
|
|
⊕
|
|
∨
|
|
ª
|
|
º
|
|
ø
|
|
õ
|
|
⊗
|
|
ö
|
|
¶
|
|
∂
|
|
‰
|
|
⊥
|
|
φ
|
|
π
|
|
ϖ
|
|
±
|
|
£
|
|
′
|
|
∏
|
|
∝
|
|
ψ
|
|
"
|
|
⇒
|
|
√
|
|
⟩
|
|
»
|
|
→
|
|
⌉
|
|
”
|
|
ℜ
|
|
®
|
|
⌋
|
|
ρ
|
|
‏
|
|
›
|
|
’
|
|
‚
|
|
š
|
|
⋅
|
|
§
|
|
­
|
|
σ
|
|
ς
|
|
∼
|
|
♠
|
|
⊂
|
|
⊆
|
|
∑
|
|
¹
|
|
²
|
|
³
|
|
⊃
|
|
⊇
|
|
ß
|
|
τ
|
|
∴
|
|
θ
|
|
ϑ
|
|
 
|
|
þ
|
|
˜
|
|
×
|
|
™
|
|
⇑
|
|
ú
|
|
↑
|
|
û
|
|
ù
|
|
¨
|
|
ϒ
|
|
υ
|
|
ü
|
|
℘
|
|
ξ
|
|
ý
|
|
¥
|
|
ÿ
|
|
ζ
|
|
‍
|
|
‌
|