diff --git a/test/grammar/Makefile b/test/grammar/Makefile index 3799c43..fce45d6 100644 --- a/test/grammar/Makefile +++ b/test/grammar/Makefile @@ -21,6 +21,7 @@ GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL --fix-extensions $(FIX_EXTENSIONS) \ --unroll-lists \ --unroll-options \ + --unroll-alternatives \ $(CHECK_SYMBOLS) \ --trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \ $(CREATE_EXTRA_ARGS) diff --git a/tools/python/jwutils/grammar.py b/tools/python/jwutils/grammar.py index bf4b73f..5efb297 100644 --- a/tools/python/jwutils/grammar.py +++ b/tools/python/jwutils/grammar.py @@ -106,7 +106,7 @@ def cleanup_token(tok): return tok def tok2ctype(tok): - if tok in [ '{', '}', '[', ']', '<', '>', '(', ')', '?' ]: + if tok in [ '{', '}', '[', ']', '<', '>', '(', ')', '?', '|' ]: return t_grammar return t_target_lang @@ -222,6 +222,12 @@ class RuleComp: class State: def __init__(self): + self.__pair_square = ['[', ']'] + self.__pair_curly = ['{', '}'] + self.__pair_ext = ['<', '>'] + self.__pair_group = ['(', ')'] + self.__pair_comment = ['(*', '*)'] + self.__pair_special = ['?', '?'] self.reset() def reset(self): @@ -234,6 +240,7 @@ class State: self.production = None self.rule = [] self.rules = [] + self.things = [] def optional(self): return self.square != 0 or self.curly != 0 @@ -242,30 +249,45 @@ class State: if not self.in_comment: if tok == '[': self.square += 1 + self.things.append(self.__pair_square) elif tok == ']': self.square -= 1 + assert(self.things.pop() == self.__pair_square) elif tok == '{': self.curly += 1 + self.things.append(self.__pair_curly) elif tok == '}': self.curly -= 1 + assert(self.things.pop() == self.__pair_curly) elif tok == '(': self.group += 1 + self.things.append(self.__pair_group) elif tok == ')': self.group -= 1 + assert(self.things.pop() == self.__pair_group) elif tok == '<': self.ext += 1 + self.things.append(self.__pair_ext) elif tok == '>': self.ext -= 1 + assert(self.things.pop() == self.__pair_ext) elif tok == '?': - self.in_special = not self.in_special + if not self.in_special: + self.in_special = True + self.things.append(self.__pair_special) + else: + self.in_special = False + assert(self.things.pop() == self.__pair_special) elif tok == '(*': self.in_comment = True + self.things.append(self.__pair_comment) elif tok == '*)': raise Exception("Unmatched closing EBNF comment mark", tok, "in line", line) else: if tok == '(*': raise Exception("Nested EBNF comment", tok, "in line", line) elif tok == '*)': + assert(self.things.pop() == self.__pair_comment) self.in_comment = False if self.curly < 0 or self.square < 0 or self.ext < 0 or self.group < 0: @@ -285,9 +307,9 @@ class State: return self.ext > 0 def in_something(self): - if self.square > 0 or self.curly > 0 or self.group > 0 or self.ext > 0 or self.in_comment or self.in_special: - return True - return False + if len(self.things) == 0: + return None + return self.things[-1] class Symbol: @@ -471,7 +493,7 @@ def grammar_parse_ebnf_tokens(tokens): continue if tok == ',': continue - if tok == '|' and not state.in_something(): + if tok == '|' and state.in_something() is None: ruleset.append(rule) rule = [] continue @@ -697,6 +719,73 @@ def grammar_unroll_options(grammar): grammar[tok].rules = rules_unroll_options(p.rules) return grammar +def rules_unroll_alternatives(rules): + r = [] + found = False + slog(INFO, "unrolling alternatives in", format_rules(rules)) + sep = RuleComp('|') + for rule in rules: + if not sep in rule: + r.append(rule) + continue + found = True + state = State() + end = len(rule) - 1 + first = last = -1 + for i, c in enumerate(rule): + state.update(c.token, line=c.line) + if c.token != '|' or c.tp != t_grammar: + slog(INFO, "checking token", c.token, "of type", c.tp, "at position", i) + continue + slog(INFO, "found token at position", i) + container = state.in_something() + slog(INFO, "thing delimiters are", container) + if container is None: + raise Exception("Alternative in line", c.line, "at rule position", i, "outside container:", format_rule(rule)) + first = last = -1 + k = i - 1 + while k >= 0: + prev = rule[k] + slog(INFO, "comparing token", rule[k].token, "at position", k, "against opener", container[0]) + if prev.token == container[0]: + first = k + break + k -= 1 + if first == -1: + raise Exception("Alternative in line", c.line, "missing previous element:", format_rule(rule)) + k = i + while k <= end: + nxt = rule[k] + slog(INFO, "comparing token", rule[k].token, "at position", k, "against closer", container[1]) + if nxt.token == container[1]: + last = k + break + k += 1 + if last == i: + raise Exception("Alternative in line", c.line, "missing next element:", format_rule(rule)) + break # found what I wanted + assert(first > 0) + assert(last > 0) + assert(last <= end) + head = rule[0:first] + mid = rule[first+1:last] + tail = rule[last+1:end] + slog(INFO, "first =", first, "last =", last, "end =", end) + slog(INFO, "head = ", format_rule(head)) + slog(INFO, "mid = ", format_rule(mid)) + slog(INFO, "tail = ", format_rule(tail)) + for m in split_list_by(mid, sep): + unrolled_rule = head + m + tail + r.append(unrolled_rule) + #if found: + # return rules_unroll_alternatives(r) + return r + +def grammar_unroll_alternatives(grammar): + for tok, p in grammar.iteritems(): + grammar[tok].rules = rules_unroll_alternatives(p.rules) + return grammar + def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None): if checked is None: checked = set() @@ -1357,6 +1446,7 @@ class GrammarCmd(jwutils.Cmd): p.add_argument('-l', '--unroll-lists', help='unroll EBNF lists', action='store_true', default=False) p.add_argument('-e', '--fix-extensions', help='fix EBNF prefix extensions (' + '|'.join(fix_extensions_mode) + ')', default=mode_concat) p.add_argument('-o', '--unroll-options', help='unroll EBNF options', action='store_true', default=False) + p.add_argument('-a', '--unroll-alternatives', help='unroll EBNF alternatives', action='store_true', default=False) p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='') p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='') p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='') @@ -1366,6 +1456,8 @@ class GrammarCmd(jwutils.Cmd): if args.fix_extensions not in fix_extensions_mode: raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions") grammar = grammar_fix_extensions(grammar, args.fix_extensions) + if args.unroll_alternatives: + grammar = grammar_unroll_alternatives(grammar) if args.unroll_lists: grammar = grammar_unroll_lists(grammar) if args.unroll_options: