jw-python/tools/python/jwutils/algo/ShuntingYard.py

from collections import namedtuple
import re
import shlex

# --- python 2 / 3 compatibility stuff
try:
    basestring
except NameError:
    basestring = str

L, R = 'Left Right'.split()
ARG, KEYW, QUOTED, LPAREN, RPAREN = 'arg kw quoted ( )'.split()

class Operator: # export

    def __init__(self, func=None, nargs=2, precedence=3, assoc=L):
        self.func = func
        self.nargs = nargs
        self.prec = precedence
        self.assoc = assoc

class Stack:

    def __init__(self, itemlist=[]):
        self.items = itemlist

    def isEmpty(self):
        if self.items == []:
            return True
        return False

    def peek(self):
        return self.items[-1:][0]

    def pop(self):
        return self.items.pop()

    def push(self, item):
        self.items.append(item)
        return 0

class ShuntingYard(object): # export

    def __init__(self, operators = None):
        self.do_debug = False
        #self.do_debug = True
        self.__ops = {}
        if operators is not None:
            for k, v in operators.items():
                self.add_operator(k, v.func, v.nargs, v.prec, v.assoc)

    def debug(self, *args):
        if self.do_debug:
            msg = ""
            for count, thing in enumerate(args):
                msg += ' ' + str(thing)
            if len(msg):
                print(msg[1:])

    def token_string(self):
        r = ""
        for k in sorted(self.__ops):
            v = self.__ops[k]
            buf = ", \"" + k
            if v.nargs == 1:
                if k[len(k)-1].isalnum():
                    buf = buf + ' '
                buf = buf + "xxx"
            buf = buf + "\""
            r = r + buf

        if len(r):
            return r[2:]
        return r

    def tokenize(self, spec):

        regex = ""
        for k in self.__ops.keys():
            regex = regex + "|" + re.escape(k)

        regex = regex[1:]

        scanner = re.Scanner([
          (regex,       lambda scanner,token:(KEYW,   token)),
          (r"'[^']*'",  lambda scanner,token:(QUOTED, token[1:-1])),
          (r"[^\s()]+", lambda scanner,token:(ARG,    token)),
          (r"\s+",      None), # None == skip token.
        ])

        tokens, remainder = scanner.scan(spec)
        if len(remainder)>0:
            raise Exception("Failed to tokenize " + spec + ", remaining bit is ", remainder)

        #self.debug(tokens)
        return tokens
        r = []
        for e in tokens:
            if e[0] == "quoted":
                r.append(e[1][1:-1])
            else:
                r.append(e[1])

        return r

    def add_operator(self, name, func, nargs, precedence, assoc):
        self.__ops[name] = Operator(func, nargs, precedence, assoc)

    def infix_to_postfix(self, infix):
        tokenized = self.tokenize(infix)
        self.debug("tokenized = ", tokenized)
        outq, stack = [], []
        table = ['TOKEN,ACTION,RPN OUTPUT,OP STACK,NOTES'.split(',')]
        for toktype, token in tokenized:
            self.debug("Checking token", token)
            note = action = ''
            if toktype in [ ARG, QUOTED ]:
                action = 'Add arg to output'
                outq.append(token)
                table.append( (token, action, outq, (s[0] for s in stack), note) )
            elif toktype == KEYW:
                val = self.__ops[token]
                t1, op1 = token, val
                v = t1
                note = 'Pop ops from stack to output'
                while stack:
                    t2, op2 = stack[-1]
                    if (op1.assoc == L and op1.prec <= op2.prec) or (op1.assoc == R and op1.prec < op2.prec):
                        if t1 != RPAREN:
                            if t2 != LPAREN:
                                stack.pop()
                                action = '(Pop op)'
                                outq.append(t2)
                            else:
                                break
                        else:
                            if t2 != LPAREN:
                                stack.pop()
                                action = '(Pop op)'
                                outq.append(t2)
                            else:
                                stack.pop()
                                action = '(Pop & discard "(")'
                                table.append( (v, action, outq, (s[0] for s in stack), note) )
                                break
                        table.append( (v, action, (outq), (s[0] for s in stack), note) )
                        v = note = ''
                    else:
                        note = ''
                        break
                    note = ''
                note = ''
                if t1 != RPAREN:
                    stack.append((token, val))
                    action = 'Push op token to stack'
                else:
                    action = 'Discard ")"'
                table.append( (v, action, (outq), (s[0] for s in stack), note) )
        note = 'Drain stack to output'
        while stack:
            v = ''
            t2, op2 = stack[-1]
            action = '(Pop op)'
            stack.pop()
            outq.append(t2)
            table.append( (v, action, outq, (s[0] for s in stack), note) )
            v = note = ''
        if self.do_debug:
            maxcolwidths = [len(max(x, key=len)) for x in zip(*table)]
            row = table[0]
            print(' '.join('{cell:^{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
            for row in table[1:]:
                print(' '.join('{cell:<{width}}'.format(width=width, cell=cell) for (width, cell) in zip(maxcolwidths, row)))
        return table[-1][2]

    def infix_to_postfix_orig(self, infix):

        s = Stack()
        r = []
        tokens = self.tokenize(infix)

        for tokinfo in tokens:

            self.debug(tokinfo)
            toktype, token = tokinfo[0], tokinfo[1]

            self.debug("Checking token ", token)

            if token not in self.__ops.keys():
                r.append(token)
                continue

            if token == '(':
                s.push(token)
                continue

            if token == ')':
                topToken = s.pop()
                while topToken != '(':
                    r.append(topToken)
                    topToken = s.pop()
                continue

            while (not s.isEmpty()) and (self.__ops[s.peek()].prec >= self.__ops[token].prec):
                #self.debug(token)
                r.append(s.pop())
                #self.debug(r)

            s.push(token)
            self.debug((s.peek()))

        while not s.isEmpty():
            opToken = s.pop()
            r.append(opToken)
            #self.debug(r)

        return r
        #return " ".join(r)

    def eval_postfix(self, postfixexpr):

        self.debug("postfix = ", postfixexpr)
        vals = Stack()

        for token in postfixexpr:

            self.debug("Evaluating token %s" % (token))
            if token not in self.__ops.keys():
                vals.push(token)
                continue

            op = self.__ops[token]
            args = []
            self.debug("Adding %d arguments" % (op.nargs))
            for i in range(0, op.nargs):
                self.debug("Adding argument %d" % (i))
                args.append(vals.pop())
            #self.debug("running %s(%s)" % (token, ', '.join(reversed(args))))
            val = op.func(*reversed(args))
            self.debug("%s(%s) = %s" % (token, ', '.join(map(str, reversed(args))), val))
            vals.push(val)

        return vals.pop()

    def eval(self, infix):
        if not isinstance(infix, basestring):
            return infix
        postfix = self.infix_to_postfix(infix)
        self.debug(infix, "-->", postfix)
        for token in postfix:
            self.debug("Token is %s" % (token))
        return self.eval_postfix(postfix)

if __name__ == '__main__':

    # ------------- testbed calculator

    from string import atof

    class Calculator(ShuntingYard):

        #def tokenize(self, string):
        #    return string.split()

        def f_mult(self, a, b):
            return str(atof(a) * atof(b));

        def f_div(self, a, b):
            return str(atof(a) / atof(b));

        def f_add(self, a, b):
            return str(atof(a) + atof(b));

        def f_sub(self, a, b):
            return str(atof(a) - atof(b));

        def __init__(self):
            Op = Operator
            operators = {
                '^': Op(None,        2, 4, R),
                '*': Op(self.f_mult, 2, 3, L),
                '/': Op(self.f_div,  2, 3, L),
                '+': Op(self.f_add,  2, 2, L),
                '-': Op(self.f_sub,  2, 2, L),
                '(': Op(None,        0, 9, L),
                ')': Op(None,        0, 0, L),
            }
            super(Calculator, self).__init__(operators)

    rr = Calculator().eval("( 2 * 3 + 4 * 5 ) / ( 5 - 3 )")
    self.debug("Result =", rr)

    # ------------- testbed match object

    Object = namedtuple("Object", [ "Name", "Label" ])

    class Matcher(ShuntingYard):

        def f_is_name(self, a):
            if obj.Name == a:
                return "True"
            return "False"

        def f_matches_label(self, a):
            if re.match(a, obj.Label):
                return "True"
            return "False"

        def f_is_not(self, a):
            if a == "True":
                return "False"
            return "True"

        def f_and(self, a_, b_):
            a = a_ == "True"
            b = b_ == "True"
            if a and b:
                return "True"
            return "False"

        def __init__(self, obj):
            Op = Operator
            operators = {
                '(':       Op(None,                  2, 9, L),
                ')':       Op(None,                  2, 0, L),
                'name=':   Op(self.f_is_name,        1, 3, R),
                'and':     Op(self.f_and,            2, 3, L),
                'label~=': Op(self.f_matches_label,  1, 3, R),
                'False':   Op(None,                  0, 3, L),
                'True':    Op(None,                  0, 3, L),
                'not':     Op(self.f_is_not,         1, 3, R),
            }
            super(Matcher, self).__init__(operators)

    obj = Object("hans", "wurst")

    r = Matcher(obj).eval("name=hans and (not label~=worst)")
    self.debug("Result =", r)