ShuntingYard.py: Improve tokenize() quote handling

Quoted arguments were not grouped into one token, firstly, because the
parser didn't pay any attention to them, and secondly, because the data
structure carrying the postfixed string converted it back to being a
string, not a list, in column 2 of the result table.

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-08-03 10:00:29 +02:00
commit 421ff284cb

View file

@ -1,8 +1,9 @@
from collections import namedtuple from collections import namedtuple
import re import re
import shlex
L, R = 'Left Right'.split() L, R = 'Left Right'.split()
ARG, KEYW, LPAREN, RPAREN = 'arg kw ( )'.split() ARG, KEYW, QUOTED, LPAREN, RPAREN = 'arg kw quoted ( )'.split()
class Operator: # export class Operator: # export
@ -36,6 +37,7 @@ class ShuntingYard(object): # export
def __init__(self, operators = None): def __init__(self, operators = None):
self.do_debug = False self.do_debug = False
#self.do_debug = True
self.__ops = {} self.__ops = {}
if operators is not None: if operators is not None:
for k, v in operators.iteritems(): for k, v in operators.iteritems():
@ -74,9 +76,10 @@ class ShuntingYard(object): # export
regex = regex[1:] regex = regex[1:]
scanner = re.Scanner([ scanner = re.Scanner([
(regex, lambda scanner,token:("kw", token)), (regex, lambda scanner,token:(KEYW, token)),
(r"[^\s()]+", lambda scanner,token:("arg", token)), (r"'[^']*'", lambda scanner,token:(QUOTED, token[1:-1])),
(r"\s+", None), # None == skip token. (r"[^\s()]+", lambda scanner,token:(ARG, token)),
(r"\s+", None), # None == skip token.
]) ])
tokens, remainder = scanner.scan(spec) tokens, remainder = scanner.scan(spec)
@ -87,7 +90,10 @@ class ShuntingYard(object): # export
return tokens return tokens
r = [] r = []
for e in tokens: for e in tokens:
r.append(e[1]) if e[0] == "quoted":
r.append(e[1][1:-1])
else:
r.append(e[1])
return r return r
@ -102,10 +108,10 @@ class ShuntingYard(object): # export
for toktype, token in tokenized: for toktype, token in tokenized:
self.debug("Checking token", token) self.debug("Checking token", token)
note = action = '' note = action = ''
if toktype == ARG: if toktype in [ ARG, QUOTED ]:
action = 'Add arg to output' action = 'Add arg to output'
outq.append(token) outq.append(token)
table.append( (token, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) table.append( (token, action, outq, (s[0] for s in stack), note) )
elif toktype == KEYW: elif toktype == KEYW:
val = self.__ops[token] val = self.__ops[token]
t1, op1 = token, val t1, op1 = token, val
@ -129,9 +135,9 @@ class ShuntingYard(object): # export
else: else:
stack.pop() stack.pop()
action = '(Pop & discard "(")' action = '(Pop & discard "(")'
table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) table.append( (v, action, outq, (s[0] for s in stack), note) )
break break
table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) table.append( (v, action, (outq), (s[0] for s in stack), note) )
v = note = '' v = note = ''
else: else:
note = '' note = ''
@ -143,7 +149,7 @@ class ShuntingYard(object): # export
action = 'Push op token to stack' action = 'Push op token to stack'
else: else:
action = 'Discard ")"' action = 'Discard ")"'
table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) table.append( (v, action, (outq), (s[0] for s in stack), note) )
note = 'Drain stack to output' note = 'Drain stack to output'
while stack: while stack:
v = '' v = ''
@ -151,7 +157,7 @@ class ShuntingYard(object): # export
action = '(Pop op)' action = '(Pop op)'
stack.pop() stack.pop()
outq.append(t2) outq.append(t2)
table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) table.append( (v, action, outq, (s[0] for s in stack), note) )
v = note = '' v = note = ''
if self.do_debug: if self.do_debug:
maxcolwidths = [len(max(x, key=len)) for x in zip(*table)] maxcolwidths = [len(max(x, key=len)) for x in zip(*table)]
@ -159,7 +165,7 @@ class ShuntingYard(object): # export
print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row))) print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
for row in table[1:]: for row in table[1:]:
print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row))) print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
return table[-1][2].split() return table[-1][2]
def infix_to_postfix_orig(self, infix): def infix_to_postfix_orig(self, infix):
@ -207,11 +213,12 @@ class ShuntingYard(object): # export
def eval_postfix(self, postfixexpr): def eval_postfix(self, postfixexpr):
self.debug("postfix = ", postfixexpr)
vals = Stack() vals = Stack()
for token in postfixexpr: for token in postfixexpr:
self.debug("Checking token %s" % (token)) self.debug("Evaluating token %s" % (token))
if token not in self.__ops.keys(): if token not in self.__ops.keys():
vals.push(token) vals.push(token)
continue continue
@ -234,6 +241,8 @@ class ShuntingYard(object): # export
return infix return infix
postfix = self.infix_to_postfix(infix) postfix = self.infix_to_postfix(infix)
self.debug(infix, "-->", postfix) self.debug(infix, "-->", postfix)
for token in postfix:
self.debug("Token is %s" % (token))
return self.eval_postfix(postfix) return self.eval_postfix(postfix)
if __name__ == '__main__': if __name__ == '__main__':
@ -321,4 +330,3 @@ if __name__ == '__main__':
r = Matcher(obj).eval("name=hans and (not label~=worst)") r = Matcher(obj).eval("name=hans and (not label~=worst)")
self.debug("Result =", r) self.debug("Result =", r)