%language=ANSI-C %define lookup-function-name is_allowed_named_entity %compare-strncmp %readonly-tables %define hash-function-name hash_html_entity %enum %includes %{ #include /* Parsers tend to choke on entities with values greater than this */ const u_int32_t MAX_NUM_ENTITY_VAL = 0x10ffff; /* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL * used to avoid dealing with overflows. */ const size_t MAX_NUM_ENTITY_LEN = 7; inline int is_valid_numeric_entity(uint32_t entity_val) { /* Some XML parsers will choke on entities with certain * values (mostly control characters.) * * According to lxml these are all problematic: * * [xrange(0, 8), * xrange(11, 12), * xrange(14, 31), * xrange(55296, 57343), * xrange(65534, 65535)] */ return (entity_val > 8 && (entity_val != 11 && entity_val != 12) && (entity_val < 14 || entity_val > 31) && (entity_val < 55296 || entity_val > 57343) && (entity_val != 65534 && entity_val != 65535) && entity_val <= MAX_NUM_ENTITY_VAL); } %} %% Æ Á  À Α Å Ã Ä Β Ç Χ ‡ Δ Ð É Ê È Ε Η Ë Γ Í Î Ì Ι Ï Κ Λ Μ Ñ Ν Œ Ó Ô Ò Ω Ο Ø Õ Ö Φ Π ″ Ψ Ρ Š Σ Þ Τ Θ Ú Û Ù Υ Ü Ξ Ý Ÿ Ζ á â ´ æ à ℵ α & ∧ ∠ ' å ≈ ã ä „ β ¦ • ∩ ç ¸ ¢ χ ˆ ♣ ≅ © ↵ ∪ ¤ ⇓ † ↓ ° δ ♦ ÷ é ê è ∅     ε ≡ η ð ë € ∃ ƒ ∀ ½ ¼ ¾ ⁄ γ ≥ > ⇔ ↔ ♥ … í î ¡ ì ℑ ∞ ∫ ι ¿ ∈ ï κ ⇐ λ ⟨ « ← ⌈ “ ≤ ⌊ ∗ ◊ ‎ ‹ ‘ < ¯ — µ · − μ ∇   – ≠ ∋ ¬ ∉ ⊄ ñ ν ó ô œ ò ‾ ω ο ⊕ ∨ ª º ø õ ⊗ ö ¶ ∂ ‰ ⊥ φ π ϖ ± £ ′ ∏ ∝ ψ " ⇒ √ ⟩ » → ⌉ ” ℜ ® ⌋ ρ ‏ › ’ ‚ š ⋅ § ­ σ ς ∼ ♠ ⊂ ⊆ ∑ ¹ ² ³ ⊃ ⊇ ß τ ∴ θ ϑ   þ ˜ × ™ ⇑ ú ↑ û ù ¨ ϒ υ ü ℘ ξ ý ¥ ÿ ζ ‍ ‌