From 421ff284cba8ff7955398107144babd4a2e1a6bc Mon Sep 17 00:00:00 2001 From: Jan Lindemann Date: Thu, 3 Aug 2017 10:00:29 +0200 Subject: [PATCH] ShuntingYard.py: Improve tokenize() quote handling Quoted arguments were not grouped into one token, firstly, because the parser didn't pay any attention to them, and secondly, because the data structure carrying the postfixed string converted it back to being a string, not a list, in column 2 of the result table. Signed-off-by: Jan Lindemann --- tools/python/jwutils/algo/ShuntingYard.py | 36 ++++++++++++++--------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/tools/python/jwutils/algo/ShuntingYard.py b/tools/python/jwutils/algo/ShuntingYard.py index f1a5e7f..d30ce90 100644 --- a/tools/python/jwutils/algo/ShuntingYard.py +++ b/tools/python/jwutils/algo/ShuntingYard.py @@ -1,8 +1,9 @@ from collections import namedtuple import re +import shlex L, R = 'Left Right'.split() -ARG, KEYW, LPAREN, RPAREN = 'arg kw ( )'.split() +ARG, KEYW, QUOTED, LPAREN, RPAREN = 'arg kw quoted ( )'.split() class Operator: # export @@ -36,6 +37,7 @@ class ShuntingYard(object): # export def __init__(self, operators = None): self.do_debug = False + #self.do_debug = True self.__ops = {} if operators is not None: for k, v in operators.iteritems(): @@ -74,9 +76,10 @@ class ShuntingYard(object): # export regex = regex[1:] scanner = re.Scanner([ - (regex, lambda scanner,token:("kw", token)), - (r"[^\s()]+", lambda scanner,token:("arg", token)), - (r"\s+", None), # None == skip token. + (regex, lambda scanner,token:(KEYW, token)), + (r"'[^']*'", lambda scanner,token:(QUOTED, token[1:-1])), + (r"[^\s()]+", lambda scanner,token:(ARG, token)), + (r"\s+", None), # None == skip token. ]) tokens, remainder = scanner.scan(spec) @@ -87,7 +90,10 @@ class ShuntingYard(object): # export return tokens r = [] for e in tokens: - r.append(e[1]) + if e[0] == "quoted": + r.append(e[1][1:-1]) + else: + r.append(e[1]) return r @@ -102,10 +108,10 @@ class ShuntingYard(object): # export for toktype, token in tokenized: self.debug("Checking token", token) note = action = '' - if toktype == ARG: + if toktype in [ ARG, QUOTED ]: action = 'Add arg to output' outq.append(token) - table.append( (token, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) + table.append( (token, action, outq, (s[0] for s in stack), note) ) elif toktype == KEYW: val = self.__ops[token] t1, op1 = token, val @@ -129,9 +135,9 @@ class ShuntingYard(object): # export else: stack.pop() action = '(Pop & discard "(")' - table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) + table.append( (v, action, outq, (s[0] for s in stack), note) ) break - table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) + table.append( (v, action, (outq), (s[0] for s in stack), note) ) v = note = '' else: note = '' @@ -143,7 +149,7 @@ class ShuntingYard(object): # export action = 'Push op token to stack' else: action = 'Discard ")"' - table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) + table.append( (v, action, (outq), (s[0] for s in stack), note) ) note = 'Drain stack to output' while stack: v = '' @@ -151,7 +157,7 @@ class ShuntingYard(object): # export action = '(Pop op)' stack.pop() outq.append(t2) - table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) ) + table.append( (v, action, outq, (s[0] for s in stack), note) ) v = note = '' if self.do_debug: maxcolwidths = [len(max(x, key=len)) for x in zip(*table)] @@ -159,7 +165,7 @@ class ShuntingYard(object): # export print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row))) for row in table[1:]: print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row))) - return table[-1][2].split() + return table[-1][2] def infix_to_postfix_orig(self, infix): @@ -207,11 +213,12 @@ class ShuntingYard(object): # export def eval_postfix(self, postfixexpr): + self.debug("postfix = ", postfixexpr) vals = Stack() for token in postfixexpr: - self.debug("Checking token %s" % (token)) + self.debug("Evaluating token %s" % (token)) if token not in self.__ops.keys(): vals.push(token) continue @@ -234,6 +241,8 @@ class ShuntingYard(object): # export return infix postfix = self.infix_to_postfix(infix) self.debug(infix, "-->", postfix) + for token in postfix: + self.debug("Token is %s" % (token)) return self.eval_postfix(postfix) if __name__ == '__main__': @@ -321,4 +330,3 @@ if __name__ == '__main__': r = Matcher(obj).eval("name=hans and (not label~=worst)") self.debug("Result =", r) -