ShuntingYard.py: Improve tokenize() quote handling

Quoted arguments were not grouped into one token, firstly, because the parser didn't pay any attention to them, and secondly, because the data structure carrying the postfixed string converted it back to being a string, not a list, in column 2 of the result table. Signed-off-by: Jan Lindemann <jan@janware.com>
2026-01-15 01:52:56 +01:00 · 2017-08-03 10:00:29 +02:00 · 2017-08-03 10:00:29 +02:00 · 421ff284cb
commit 421ff284cb
parent 36aac73b5d
1 changed files with 22 additions and 14 deletions
--- a/tools/python/jwutils/algo/ShuntingYard.py
+++ b/tools/python/jwutils/algo/ShuntingYard.py
@ -1,8 +1,9 @@
 from collections import namedtuple
 import re
 import shlex
 L, R = 'Left Right'.split()
-ARG, KEYW, LPAREN, RPAREN = 'arg kw ( )'.split()
+ARG, KEYW, QUOTED, LPAREN, RPAREN = 'arg kw quoted ( )'.split()
 class Operator: # export
@ -36,6 +37,7 @@ class ShuntingYard(object): # export
    def __init__(self, operators = None):
        self.do_debug = False
        #self.do_debug = True
        self.__ops = {}
        if operators is not None:
            for k, v in operators.iteritems():
@ -74,9 +76,10 @@ class ShuntingYard(object): # export
        regex = regex[1:]
        scanner = re.Scanner([
-          (regex,       lambda scanner,token:("kw", token)),
+          (regex,       lambda scanner,token:(KEYW,   token)),
-          (r"[^\s()]+",      lambda scanner,token:("arg", token)),
+          (r"'[^']*'",  lambda scanner,token:(QUOTED, token[1:-1])),
-          (r"\s+", None), # None == skip token.
+          (r"[^\s()]+", lambda scanner,token:(ARG,    token)),
          (r"\s+",      None), # None == skip token.
        ])
        tokens, remainder = scanner.scan(spec)
@ -87,7 +90,10 @@ class ShuntingYard(object): # export
        return tokens
        r = []
        for e in tokens:
-            r.append(e[1])
+            if e[0] == "quoted":
                r.append(e[1][1:-1])
            else:
                r.append(e[1])
        return r
@ -102,10 +108,10 @@ class ShuntingYard(object): # export
        for toktype, token in tokenized:
            self.debug("Checking token", token)
            note = action = ''
-            if toktype == ARG:
+            if toktype in [ ARG, QUOTED ]:
                action = 'Add arg to output'
                outq.append(token)
-                table.append( (token, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
+                table.append( (token, action, outq, (s[0] for s in stack), note) )
            elif toktype == KEYW:
                val = self.__ops[token]
                t1, op1 = token, val
@ -129,9 +135,9 @@ class ShuntingYard(object): # export
                            else:
                                stack.pop()
                                action = '(Pop & discard "(")'
-                                table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
+                                table.append( (v, action, outq, (s[0] for s in stack), note) )
                                break
-                        table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
+                        table.append( (v, action, (outq), (s[0] for s in stack), note) )
                        v = note = ''
                    else:
                        note = ''
@ -143,7 +149,7 @@ class ShuntingYard(object): # export
                    action = 'Push op token to stack'
                else:
                    action = 'Discard ")"'
-                table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
+                table.append( (v, action, (outq), (s[0] for s in stack), note) )
        note = 'Drain stack to output'
        while stack:
            v = ''
@ -151,7 +157,7 @@ class ShuntingYard(object): # export
            action = '(Pop op)'
            stack.pop()
            outq.append(t2)
-            table.append( (v, action, ' '.join(outq), ' '.join(s[0] for s in stack), note) )
+            table.append( (v, action, outq, (s[0] for s in stack), note) )
            v = note = ''
        if self.do_debug:
            maxcolwidths = [len(max(x, key=len)) for x in zip(*table)]
@ -159,7 +165,7 @@ class ShuntingYard(object): # export
            print( ' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
            for row in table[1:]:
                print( ' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
-        return table[-1][2].split()
+        return table[-1][2]
    def infix_to_postfix_orig(self, infix):
@ -207,11 +213,12 @@ class ShuntingYard(object): # export
    def eval_postfix(self, postfixexpr):
        self.debug("postfix = ", postfixexpr)
        vals = Stack()
        for token in postfixexpr:
-            self.debug("Checking token %s" % (token))
+            self.debug("Evaluating token %s" % (token))
            if token not in self.__ops.keys():
                vals.push(token)
                continue
@ -234,6 +241,8 @@ class ShuntingYard(object): # export
            return infix
        postfix = self.infix_to_postfix(infix)
        self.debug(infix, "-->", postfix)
        for token in postfix:
            self.debug("Token is %s" % (token))
        return self.eval_postfix(postfix)
 if __name__ == '__main__':
@ -321,4 +330,3 @@ if __name__ == '__main__':
    r = Matcher(obj).eval("name=hans and (not label~=worst)")
    self.debug("Result =", r)