From 6d52d326079be5bc1152ed831d9bbebd6bf05bee Mon Sep 17 00:00:00 2001
From: Ethan Dalool <edalool@yahoo.com>
Date: Sat, 6 May 2017 18:32:02 -0700
Subject: [PATCH] Fix handling of lone "(" with separate Sentinels for (, ).

---
 ExpressionMatch/expressionmatch.py | 36 ++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/ExpressionMatch/expressionmatch.py b/ExpressionMatch/expressionmatch.py
index 39da13b..fcebd15 100644
--- a/ExpressionMatch/expressionmatch.py
+++ b/ExpressionMatch/expressionmatch.py
@@ -9,6 +9,11 @@ UNARY_OPERATORS = {'NOT'}
 PRECEDENCE = ['NOT', 'AND', 'XOR', 'OR']
 OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
 
+# Sentinel values used for breaking up the tokens, so we dont' have to use
+# strings '(' and ')' which can get confused with user input.
+PAREN_OPEN = object()
+PAREN_CLOSE = object()
+
 DEFAULT_MATCH_FUNCTION = str.__contains__
 
 MESSAGE_WRITE_YOUR_OWN_MATCHER = '''
@@ -58,6 +63,8 @@ class ExpressionTree:
         if self.token not in OPERATORS:
             t = self.token
             t = t.replace('"', '\\"')
+            t = t.replace('(', '\\(')
+            t = t.replace(')', '\\)')
             if ' ' in t:
                 t = '"%s"' % t
             return t
@@ -187,6 +194,25 @@ class ExpressionTree:
         children = (child.evaluate(text, match_function=match_function) for child in self.children)
         return operator_function(children)
 
+    def diagram(self):
+        if self.token is None:
+            return '""'
+        t = self.token
+        if ' ' in t:
+            t = '"%s"' % t
+
+        output = t
+        indent = 1
+        for child in self.children:
+            child = child.diagram()
+            for line in child.splitlines():
+                output += (' ' * indent)
+                output += line + '\n'
+                indent = len(t) + 1
+        output = output.strip()
+
+        return output
+
     def evaluate(self, text, match_function=None):
         if match_function is None:
             match_function = DEFAULT_MATCH_FUNCTION
@@ -408,9 +434,9 @@ def sublist_tokens(tokens, _from_index=0, depth=0):
         token = tokens[index]
         #print(index, token)
         index += 1
-        if token == '(':
+        if token is PAREN_OPEN:
             (token, index) = sublist_tokens(tokens, _from_index=index, depth=depth+1)
-        if token == ')':
+        if token is PAREN_CLOSE:
             break
         final_tokens.append(token)
     if _from_index == 0:
@@ -440,18 +466,20 @@ def tokenize(expression):
     tokens = []
     for character in expression:
         if in_escape:
-            character = ESCAPE_SEQUENCES.get(character, '\\'+character)
+            #character = ESCAPE_SEQUENCES.get(character, '\\'+character)
             in_escape = False
 
         elif character in  {'(', ')'} and not in_quotes:
             if character == '(':
+                sentinel = PAREN_OPEN
                 paren_depth += 1
             elif character == ')':
+                sentinel = PAREN_CLOSE
                 paren_depth -= 1
 
             if paren_depth >= 0:
                 tokens.append(''.join(current_word))
-                tokens.append(character)
+                tokens.append(sentinel)
                 current_word.clear()
                 continue
             else: