Improve expressionmatch docstrings.
This commit is contained in:
parent
af5ec0b52e
commit
9e0509ae17
1 changed files with 64 additions and 24 deletions
|
@ -1,17 +1,39 @@
|
||||||
ESCAPE_SEQUENCES = {
|
'''
|
||||||
'\\': '\\',
|
This module provides the ExpressionTree class, which parses a query expression
|
||||||
'"': '"',
|
like "a AND (b OR c)" and then evaluates whether an input satisfies the query.
|
||||||
}
|
|
||||||
|
Basic usage:
|
||||||
|
tree = expressionmatch.ExpressionTree.parse('a AND (b OR c)')
|
||||||
|
tree.evaluate('a b')
|
||||||
|
tree.evaluate('a c')
|
||||||
|
tree.evaluate('b c')
|
||||||
|
|
||||||
|
The available operators are:
|
||||||
|
a AND b
|
||||||
|
a OR b
|
||||||
|
a XOR b
|
||||||
|
NOT a
|
||||||
|
|
||||||
|
where a and b can be single tokens or a parenthesized group of tokens.
|
||||||
|
|
||||||
|
The operators must be capitalized as seen and can be enclosed in quotes if you
|
||||||
|
need to literally match the word "AND", etc.
|
||||||
|
|
||||||
|
If the tokens contain spaces, they must be enclosed in quotation marks:
|
||||||
|
tree = expressionmatch.ExpressionTree.parse('"mark hamill" OR "harrison ford"')
|
||||||
|
'''
|
||||||
|
from voussoirkit import sentinel
|
||||||
|
|
||||||
BINARY_OPERATORS = {'AND', 'OR', 'XOR'}
|
BINARY_OPERATORS = {'AND', 'OR', 'XOR'}
|
||||||
UNARY_OPERATORS = {'NOT'}
|
UNARY_OPERATORS = {'NOT'}
|
||||||
PRECEDENCE = ['NOT', 'AND', 'XOR', 'OR']
|
PRECEDENCE = ['NOT', 'AND', 'XOR', 'OR']
|
||||||
OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
|
OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
|
||||||
|
|
||||||
# Sentinel values used for breaking up the tokens, so we dont' have to use
|
# These sentinels help the parser distinguish between parens used for token
|
||||||
# strings '(' and ')' which can get confused with user input.
|
# grouping and parens that have been escaped by the user and should remain
|
||||||
PAREN_OPEN = object()
|
# as strings.
|
||||||
PAREN_CLOSE = object()
|
PAREN_OPEN = sentinel.Sentinel('PAREN_OPEN')
|
||||||
|
PAREN_CLOSE = sentinel.Sentinel('PAREN_CLOSE')
|
||||||
|
|
||||||
DEFAULT_MATCH_FUNCTION = str.__contains__
|
DEFAULT_MATCH_FUNCTION = str.__contains__
|
||||||
|
|
||||||
|
@ -51,6 +73,11 @@ class NoTokens(Exception):
|
||||||
|
|
||||||
class ExpressionTree:
|
class ExpressionTree:
|
||||||
def __init__(self, token, parent=None):
|
def __init__(self, token, parent=None):
|
||||||
|
'''
|
||||||
|
This constructor is for each individual node of the tree.
|
||||||
|
End-users should probably call ExpressionTree.parse instead of this
|
||||||
|
constructor.
|
||||||
|
'''
|
||||||
self.children = []
|
self.children = []
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.token = token
|
self.token = token
|
||||||
|
@ -92,7 +119,10 @@ class ExpressionTree:
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parse(cls, tokens, spaces=0):
|
def parse(cls, tokens):
|
||||||
|
'''
|
||||||
|
Create an ExpressionTree from the given query string or list of tokens.
|
||||||
|
'''
|
||||||
if isinstance(tokens, str):
|
if isinstance(tokens, str):
|
||||||
tokens = tokenize(tokens)
|
tokens = tokenize(tokens)
|
||||||
|
|
||||||
|
@ -100,13 +130,13 @@ class ExpressionTree:
|
||||||
raise NoTokens()
|
raise NoTokens()
|
||||||
|
|
||||||
if isinstance(tokens[0], list):
|
if isinstance(tokens[0], list):
|
||||||
current = cls.parse(tokens[0], spaces=spaces+1)
|
current = cls.parse(tokens[0])
|
||||||
else:
|
else:
|
||||||
current = cls(token=tokens[0])
|
current = cls(token=tokens[0])
|
||||||
|
|
||||||
for token in tokens[1:]:
|
for token in tokens[1:]:
|
||||||
if isinstance(token, list):
|
if isinstance(token, list):
|
||||||
new = cls.parse(token, spaces=spaces+1)
|
new = cls.parse(token)
|
||||||
else:
|
else:
|
||||||
new = cls(token=token)
|
new = cls(token=token)
|
||||||
|
|
||||||
|
@ -223,6 +253,10 @@ class ExpressionTree:
|
||||||
def is_leaf(self):
|
def is_leaf(self):
|
||||||
return self.token not in OPERATORS
|
return self.token not in OPERATORS
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_root(self):
|
||||||
|
return self.parent is None
|
||||||
|
|
||||||
def map(self, function):
|
def map(self, function):
|
||||||
'''
|
'''
|
||||||
Apply this function to all of the operands.
|
Apply this function to all of the operands.
|
||||||
|
@ -260,11 +294,15 @@ class ExpressionTree:
|
||||||
if node.is_leaf:
|
if node.is_leaf:
|
||||||
yield node
|
yield node
|
||||||
|
|
||||||
|
|
||||||
def implied_tokens(tokens):
|
def implied_tokens(tokens):
|
||||||
'''
|
'''
|
||||||
1. If two operands are directly next to each other, or an operand is followed
|
This function returns a new list of tokens which has all of the implied
|
||||||
by a unary operator, it is implied that there is an AND between them.
|
tokens added explicitly and meaningless tokens removed, by the
|
||||||
|
following rules:
|
||||||
|
|
||||||
|
1. If two operands are directly next to each other, or an operand is
|
||||||
|
followed by a unary operator, it is implied that there is an AND
|
||||||
|
between them.
|
||||||
'1 2' -> '1 AND 2'
|
'1 2' -> '1 AND 2'
|
||||||
'1 NOT 2' -> '1 AND NOT 2'
|
'1 NOT 2' -> '1 AND NOT 2'
|
||||||
|
|
||||||
|
@ -436,7 +474,8 @@ def tokenize(expression):
|
||||||
Break the string into a list of tokens. Spaces are the delimiter unless
|
Break the string into a list of tokens. Spaces are the delimiter unless
|
||||||
they are inside quotation marks.
|
they are inside quotation marks.
|
||||||
|
|
||||||
Quotation marks and parentheses can be escaped by preceeding with a backslash '\\'
|
Quotation marks and parentheses can be escaped by preceeding with a
|
||||||
|
backslash '\\'.
|
||||||
|
|
||||||
Opening and closing parentheses are put into their own token unless
|
Opening and closing parentheses are put into their own token unless
|
||||||
escaped / quoted.
|
escaped / quoted.
|
||||||
|
@ -498,7 +537,7 @@ if __name__ == '__main__':
|
||||||
'[sci-fi] OR [pg-13]',
|
'[sci-fi] OR [pg-13]',
|
||||||
'([sci-fi] OR [war]) AND [r]',
|
'([sci-fi] OR [war]) AND [r]',
|
||||||
'[r] XOR [sci-fi]',
|
'[r] XOR [sci-fi]',
|
||||||
'"mark hamill" "harrison ford"',
|
'"[mark hamill]" "[harrison ford]"',
|
||||||
]
|
]
|
||||||
teststrings = {
|
teststrings = {
|
||||||
'Star Wars': '[harrison ford] [george lucas] [sci-fi] [pg] [carrie fisher] [mark hamill] [space]',
|
'Star Wars': '[harrison ford] [george lucas] [sci-fi] [pg] [carrie fisher] [mark hamill] [space]',
|
||||||
|
@ -506,12 +545,13 @@ if __name__ == '__main__':
|
||||||
'Indiana Jones': '[harrison ford] [steven spielberg] [adventure] [pg-13]',
|
'Indiana Jones': '[harrison ford] [steven spielberg] [adventure] [pg-13]',
|
||||||
'Apocalypse Now': '[harrison ford] [francis coppola] [r] [war] [drama]'
|
'Apocalypse Now': '[harrison ford] [francis coppola] [r] [war] [drama]'
|
||||||
}
|
}
|
||||||
for token in tests:
|
for test in tests:
|
||||||
print('start:', token)
|
print('start:', test)
|
||||||
token = tokenize(token)
|
tokens = tokenize(test)
|
||||||
print('implied:', token)
|
print('implied:', tokens)
|
||||||
e = ExpressionTree.parse(token)
|
etree = ExpressionTree.parse(tokens)
|
||||||
print('tree:', e)
|
print('tree:', etree)
|
||||||
|
print(etree.diagram())
|
||||||
for (name, teststring) in teststrings.items():
|
for (name, teststring) in teststrings.items():
|
||||||
print('Matches', name, ':', e.evaluate(teststring))
|
print('Matches', name, ':', etree.evaluate(teststring))
|
||||||
print()
|
print()
|
||||||
|
|
Loading…
Reference in a new issue