Fix handling of lone "(" with separate Sentinels for (, ).

2017-05-06 18:32:02 -07:00 · 2017-05-06 18:32:02 -07:00 · 6d52d32607
commit 6d52d32607
parent 53fcf8acd4
1 changed files with 32 additions and 4 deletions
--- a/ExpressionMatch/expressionmatch.py
+++ b/ExpressionMatch/expressionmatch.py
@ -9,6 +9,11 @@ UNARY_OPERATORS = {'NOT'}
 PRECEDENCE = ['NOT', 'AND', 'XOR', 'OR']
 OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS

+# Sentinel values used for breaking up the tokens, so we dont' have to use
+# strings '(' and ')' which can get confused with user input.
+PAREN_OPEN = object()
+PAREN_CLOSE = object()
+
 DEFAULT_MATCH_FUNCTION = str.__contains__

 MESSAGE_WRITE_YOUR_OWN_MATCHER = '''
@ -58,6 +63,8 @@ class ExpressionTree:
        if self.token not in OPERATORS:
            t = self.token
            t = t.replace('"', '\\"')
+            t = t.replace('(', '\\(')
+            t = t.replace(')', '\\)')
            if ' ' in t:
                t = '"%s"' % t
            return t
@ -187,6 +194,25 @@ class ExpressionTree:
        children = (child.evaluate(text, match_function=match_function) for child in self.children)
        return operator_function(children)

+    def diagram(self):
+        if self.token is None:
+            return '""'
+        t = self.token
+        if ' ' in t:
+            t = '"%s"' % t
+
+        output = t
+        indent = 1
+        for child in self.children:
+            child = child.diagram()
+            for line in child.splitlines():
+                output += (' ' * indent)
+                output += line + '\n'
+                indent = len(t) + 1
+        output = output.strip()
+
+        return output
+
    def evaluate(self, text, match_function=None):
        if match_function is None:
            match_function = DEFAULT_MATCH_FUNCTION
@ -408,9 +434,9 @@ def sublist_tokens(tokens, _from_index=0, depth=0):
        token = tokens[index]
        #print(index, token)
        index += 1
-        if token == '(':
+        if token is PAREN_OPEN:
            (token, index) = sublist_tokens(tokens, _from_index=index, depth=depth+1)
-        if token == ')':
+        if token is PAREN_CLOSE:
            break
        final_tokens.append(token)
    if _from_index == 0:
@ -440,18 +466,20 @@ def tokenize(expression):
    tokens = []
    for character in expression:
        if in_escape:
-            character = ESCAPE_SEQUENCES.get(character, '\\'+character)
+            #character = ESCAPE_SEQUENCES.get(character, '\\'+character)
            in_escape = False

        elif character in  {'(', ')'} and not in_quotes:
            if character == '(':
+                sentinel = PAREN_OPEN
                paren_depth += 1
            elif character == ')':
+                sentinel = PAREN_CLOSE
                paren_depth -= 1

            if paren_depth >= 0:
                tokens.append(''.join(current_word))
-                tokens.append(character)
+                tokens.append(sentinel)
                current_word.clear()
                continue
            else: