grammar.py: Add grammar_unroll_alternatives()

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-10-29 13:13:14 +01:00
commit cbdd343f85
2 changed files with 99 additions and 6 deletions

View file

@ -106,7 +106,7 @@ def cleanup_token(tok):
return tok
def tok2ctype(tok):
if tok in [ '{', '}', '[', ']', '<', '>', '(', ')', '?' ]:
if tok in [ '{', '}', '[', ']', '<', '>', '(', ')', '?', '|' ]:
return t_grammar
return t_target_lang
@ -222,6 +222,12 @@ class RuleComp:
class State:
def __init__(self):
self.__pair_square = ['[', ']']
self.__pair_curly = ['{', '}']
self.__pair_ext = ['<', '>']
self.__pair_group = ['(', ')']
self.__pair_comment = ['(*', '*)']
self.__pair_special = ['?', '?']
self.reset()
def reset(self):
@ -234,6 +240,7 @@ class State:
self.production = None
self.rule = []
self.rules = []
self.things = []
def optional(self):
return self.square != 0 or self.curly != 0
@ -242,30 +249,45 @@ class State:
if not self.in_comment:
if tok == '[':
self.square += 1
self.things.append(self.__pair_square)
elif tok == ']':
self.square -= 1
assert(self.things.pop() == self.__pair_square)
elif tok == '{':
self.curly += 1
self.things.append(self.__pair_curly)
elif tok == '}':
self.curly -= 1
assert(self.things.pop() == self.__pair_curly)
elif tok == '(':
self.group += 1
self.things.append(self.__pair_group)
elif tok == ')':
self.group -= 1
assert(self.things.pop() == self.__pair_group)
elif tok == '<':
self.ext += 1
self.things.append(self.__pair_ext)
elif tok == '>':
self.ext -= 1
assert(self.things.pop() == self.__pair_ext)
elif tok == '?':
self.in_special = not self.in_special
if not self.in_special:
self.in_special = True
self.things.append(self.__pair_special)
else:
self.in_special = False
assert(self.things.pop() == self.__pair_special)
elif tok == '(*':
self.in_comment = True
self.things.append(self.__pair_comment)
elif tok == '*)':
raise Exception("Unmatched closing EBNF comment mark", tok, "in line", line)
else:
if tok == '(*':
raise Exception("Nested EBNF comment", tok, "in line", line)
elif tok == '*)':
assert(self.things.pop() == self.__pair_comment)
self.in_comment = False
if self.curly < 0 or self.square < 0 or self.ext < 0 or self.group < 0:
@ -285,9 +307,9 @@ class State:
return self.ext > 0
def in_something(self):
if self.square > 0 or self.curly > 0 or self.group > 0 or self.ext > 0 or self.in_comment or self.in_special:
return True
return False
if len(self.things) == 0:
return None
return self.things[-1]
class Symbol:
@ -471,7 +493,7 @@ def grammar_parse_ebnf_tokens(tokens):
continue
if tok == ',':
continue
if tok == '|' and not state.in_something():
if tok == '|' and state.in_something() is None:
ruleset.append(rule)
rule = []
continue
@ -697,6 +719,73 @@ def grammar_unroll_options(grammar):
grammar[tok].rules = rules_unroll_options(p.rules)
return grammar
def rules_unroll_alternatives(rules):
r = []
found = False
slog(INFO, "unrolling alternatives in", format_rules(rules))
sep = RuleComp('|')
for rule in rules:
if not sep in rule:
r.append(rule)
continue
found = True
state = State()
end = len(rule) - 1
first = last = -1
for i, c in enumerate(rule):
state.update(c.token, line=c.line)
if c.token != '|' or c.tp != t_grammar:
slog(INFO, "checking token", c.token, "of type", c.tp, "at position", i)
continue
slog(INFO, "found token at position", i)
container = state.in_something()
slog(INFO, "thing delimiters are", container)
if container is None:
raise Exception("Alternative in line", c.line, "at rule position", i, "outside container:", format_rule(rule))
first = last = -1
k = i - 1
while k >= 0:
prev = rule[k]
slog(INFO, "comparing token", rule[k].token, "at position", k, "against opener", container[0])
if prev.token == container[0]:
first = k
break
k -= 1
if first == -1:
raise Exception("Alternative in line", c.line, "missing previous element:", format_rule(rule))
k = i
while k <= end:
nxt = rule[k]
slog(INFO, "comparing token", rule[k].token, "at position", k, "against closer", container[1])
if nxt.token == container[1]:
last = k
break
k += 1
if last == i:
raise Exception("Alternative in line", c.line, "missing next element:", format_rule(rule))
break # found what I wanted
assert(first > 0)
assert(last > 0)
assert(last <= end)
head = rule[0:first]
mid = rule[first+1:last]
tail = rule[last+1:end]
slog(INFO, "first =", first, "last =", last, "end =", end)
slog(INFO, "head = ", format_rule(head))
slog(INFO, "mid = ", format_rule(mid))
slog(INFO, "tail = ", format_rule(tail))
for m in split_list_by(mid, sep):
unrolled_rule = head + m + tail
r.append(unrolled_rule)
#if found:
# return rules_unroll_alternatives(r)
return r
def grammar_unroll_alternatives(grammar):
for tok, p in grammar.iteritems():
grammar[tok].rules = rules_unroll_alternatives(p.rules)
return grammar
def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None):
if checked is None:
checked = set()
@ -1357,6 +1446,7 @@ class GrammarCmd(jwutils.Cmd):
p.add_argument('-l', '--unroll-lists', help='unroll EBNF lists', action='store_true', default=False)
p.add_argument('-e', '--fix-extensions', help='fix EBNF prefix extensions (' + '|'.join(fix_extensions_mode) + ')', default=mode_concat)
p.add_argument('-o', '--unroll-options', help='unroll EBNF options', action='store_true', default=False)
p.add_argument('-a', '--unroll-alternatives', help='unroll EBNF alternatives', action='store_true', default=False)
p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='')
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
@ -1366,6 +1456,8 @@ class GrammarCmd(jwutils.Cmd):
if args.fix_extensions not in fix_extensions_mode:
raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions")
grammar = grammar_fix_extensions(grammar, args.fix_extensions)
if args.unroll_alternatives:
grammar = grammar_unroll_alternatives(grammar)
if args.unroll_lists:
grammar = grammar_unroll_lists(grammar)
if args.unroll_options: