mirror of
ssh://git.janware.com/srv/git/janware/proj/jw-python
synced 2026-01-15 09:53:32 +01:00
grammar.py: Add grammar_unroll_alternatives()
Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
parent
432d78cdc5
commit
cbdd343f85
2 changed files with 99 additions and 6 deletions
|
|
@ -21,6 +21,7 @@ GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL
|
||||||
--fix-extensions $(FIX_EXTENSIONS) \
|
--fix-extensions $(FIX_EXTENSIONS) \
|
||||||
--unroll-lists \
|
--unroll-lists \
|
||||||
--unroll-options \
|
--unroll-options \
|
||||||
|
--unroll-alternatives \
|
||||||
$(CHECK_SYMBOLS) \
|
$(CHECK_SYMBOLS) \
|
||||||
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
||||||
$(CREATE_EXTRA_ARGS)
|
$(CREATE_EXTRA_ARGS)
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ def cleanup_token(tok):
|
||||||
return tok
|
return tok
|
||||||
|
|
||||||
def tok2ctype(tok):
|
def tok2ctype(tok):
|
||||||
if tok in [ '{', '}', '[', ']', '<', '>', '(', ')', '?' ]:
|
if tok in [ '{', '}', '[', ']', '<', '>', '(', ')', '?', '|' ]:
|
||||||
return t_grammar
|
return t_grammar
|
||||||
return t_target_lang
|
return t_target_lang
|
||||||
|
|
||||||
|
|
@ -222,6 +222,12 @@ class RuleComp:
|
||||||
class State:
|
class State:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.__pair_square = ['[', ']']
|
||||||
|
self.__pair_curly = ['{', '}']
|
||||||
|
self.__pair_ext = ['<', '>']
|
||||||
|
self.__pair_group = ['(', ')']
|
||||||
|
self.__pair_comment = ['(*', '*)']
|
||||||
|
self.__pair_special = ['?', '?']
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
|
|
@ -234,6 +240,7 @@ class State:
|
||||||
self.production = None
|
self.production = None
|
||||||
self.rule = []
|
self.rule = []
|
||||||
self.rules = []
|
self.rules = []
|
||||||
|
self.things = []
|
||||||
|
|
||||||
def optional(self):
|
def optional(self):
|
||||||
return self.square != 0 or self.curly != 0
|
return self.square != 0 or self.curly != 0
|
||||||
|
|
@ -242,30 +249,45 @@ class State:
|
||||||
if not self.in_comment:
|
if not self.in_comment:
|
||||||
if tok == '[':
|
if tok == '[':
|
||||||
self.square += 1
|
self.square += 1
|
||||||
|
self.things.append(self.__pair_square)
|
||||||
elif tok == ']':
|
elif tok == ']':
|
||||||
self.square -= 1
|
self.square -= 1
|
||||||
|
assert(self.things.pop() == self.__pair_square)
|
||||||
elif tok == '{':
|
elif tok == '{':
|
||||||
self.curly += 1
|
self.curly += 1
|
||||||
|
self.things.append(self.__pair_curly)
|
||||||
elif tok == '}':
|
elif tok == '}':
|
||||||
self.curly -= 1
|
self.curly -= 1
|
||||||
|
assert(self.things.pop() == self.__pair_curly)
|
||||||
elif tok == '(':
|
elif tok == '(':
|
||||||
self.group += 1
|
self.group += 1
|
||||||
|
self.things.append(self.__pair_group)
|
||||||
elif tok == ')':
|
elif tok == ')':
|
||||||
self.group -= 1
|
self.group -= 1
|
||||||
|
assert(self.things.pop() == self.__pair_group)
|
||||||
elif tok == '<':
|
elif tok == '<':
|
||||||
self.ext += 1
|
self.ext += 1
|
||||||
|
self.things.append(self.__pair_ext)
|
||||||
elif tok == '>':
|
elif tok == '>':
|
||||||
self.ext -= 1
|
self.ext -= 1
|
||||||
|
assert(self.things.pop() == self.__pair_ext)
|
||||||
elif tok == '?':
|
elif tok == '?':
|
||||||
self.in_special = not self.in_special
|
if not self.in_special:
|
||||||
|
self.in_special = True
|
||||||
|
self.things.append(self.__pair_special)
|
||||||
|
else:
|
||||||
|
self.in_special = False
|
||||||
|
assert(self.things.pop() == self.__pair_special)
|
||||||
elif tok == '(*':
|
elif tok == '(*':
|
||||||
self.in_comment = True
|
self.in_comment = True
|
||||||
|
self.things.append(self.__pair_comment)
|
||||||
elif tok == '*)':
|
elif tok == '*)':
|
||||||
raise Exception("Unmatched closing EBNF comment mark", tok, "in line", line)
|
raise Exception("Unmatched closing EBNF comment mark", tok, "in line", line)
|
||||||
else:
|
else:
|
||||||
if tok == '(*':
|
if tok == '(*':
|
||||||
raise Exception("Nested EBNF comment", tok, "in line", line)
|
raise Exception("Nested EBNF comment", tok, "in line", line)
|
||||||
elif tok == '*)':
|
elif tok == '*)':
|
||||||
|
assert(self.things.pop() == self.__pair_comment)
|
||||||
self.in_comment = False
|
self.in_comment = False
|
||||||
|
|
||||||
if self.curly < 0 or self.square < 0 or self.ext < 0 or self.group < 0:
|
if self.curly < 0 or self.square < 0 or self.ext < 0 or self.group < 0:
|
||||||
|
|
@ -285,9 +307,9 @@ class State:
|
||||||
return self.ext > 0
|
return self.ext > 0
|
||||||
|
|
||||||
def in_something(self):
|
def in_something(self):
|
||||||
if self.square > 0 or self.curly > 0 or self.group > 0 or self.ext > 0 or self.in_comment or self.in_special:
|
if len(self.things) == 0:
|
||||||
return True
|
return None
|
||||||
return False
|
return self.things[-1]
|
||||||
|
|
||||||
class Symbol:
|
class Symbol:
|
||||||
|
|
||||||
|
|
@ -471,7 +493,7 @@ def grammar_parse_ebnf_tokens(tokens):
|
||||||
continue
|
continue
|
||||||
if tok == ',':
|
if tok == ',':
|
||||||
continue
|
continue
|
||||||
if tok == '|' and not state.in_something():
|
if tok == '|' and state.in_something() is None:
|
||||||
ruleset.append(rule)
|
ruleset.append(rule)
|
||||||
rule = []
|
rule = []
|
||||||
continue
|
continue
|
||||||
|
|
@ -697,6 +719,73 @@ def grammar_unroll_options(grammar):
|
||||||
grammar[tok].rules = rules_unroll_options(p.rules)
|
grammar[tok].rules = rules_unroll_options(p.rules)
|
||||||
return grammar
|
return grammar
|
||||||
|
|
||||||
|
def rules_unroll_alternatives(rules):
|
||||||
|
r = []
|
||||||
|
found = False
|
||||||
|
slog(INFO, "unrolling alternatives in", format_rules(rules))
|
||||||
|
sep = RuleComp('|')
|
||||||
|
for rule in rules:
|
||||||
|
if not sep in rule:
|
||||||
|
r.append(rule)
|
||||||
|
continue
|
||||||
|
found = True
|
||||||
|
state = State()
|
||||||
|
end = len(rule) - 1
|
||||||
|
first = last = -1
|
||||||
|
for i, c in enumerate(rule):
|
||||||
|
state.update(c.token, line=c.line)
|
||||||
|
if c.token != '|' or c.tp != t_grammar:
|
||||||
|
slog(INFO, "checking token", c.token, "of type", c.tp, "at position", i)
|
||||||
|
continue
|
||||||
|
slog(INFO, "found token at position", i)
|
||||||
|
container = state.in_something()
|
||||||
|
slog(INFO, "thing delimiters are", container)
|
||||||
|
if container is None:
|
||||||
|
raise Exception("Alternative in line", c.line, "at rule position", i, "outside container:", format_rule(rule))
|
||||||
|
first = last = -1
|
||||||
|
k = i - 1
|
||||||
|
while k >= 0:
|
||||||
|
prev = rule[k]
|
||||||
|
slog(INFO, "comparing token", rule[k].token, "at position", k, "against opener", container[0])
|
||||||
|
if prev.token == container[0]:
|
||||||
|
first = k
|
||||||
|
break
|
||||||
|
k -= 1
|
||||||
|
if first == -1:
|
||||||
|
raise Exception("Alternative in line", c.line, "missing previous element:", format_rule(rule))
|
||||||
|
k = i
|
||||||
|
while k <= end:
|
||||||
|
nxt = rule[k]
|
||||||
|
slog(INFO, "comparing token", rule[k].token, "at position", k, "against closer", container[1])
|
||||||
|
if nxt.token == container[1]:
|
||||||
|
last = k
|
||||||
|
break
|
||||||
|
k += 1
|
||||||
|
if last == i:
|
||||||
|
raise Exception("Alternative in line", c.line, "missing next element:", format_rule(rule))
|
||||||
|
break # found what I wanted
|
||||||
|
assert(first > 0)
|
||||||
|
assert(last > 0)
|
||||||
|
assert(last <= end)
|
||||||
|
head = rule[0:first]
|
||||||
|
mid = rule[first+1:last]
|
||||||
|
tail = rule[last+1:end]
|
||||||
|
slog(INFO, "first =", first, "last =", last, "end =", end)
|
||||||
|
slog(INFO, "head = ", format_rule(head))
|
||||||
|
slog(INFO, "mid = ", format_rule(mid))
|
||||||
|
slog(INFO, "tail = ", format_rule(tail))
|
||||||
|
for m in split_list_by(mid, sep):
|
||||||
|
unrolled_rule = head + m + tail
|
||||||
|
r.append(unrolled_rule)
|
||||||
|
#if found:
|
||||||
|
# return rules_unroll_alternatives(r)
|
||||||
|
return r
|
||||||
|
|
||||||
|
def grammar_unroll_alternatives(grammar):
|
||||||
|
for tok, p in grammar.iteritems():
|
||||||
|
grammar[tok].rules = rules_unroll_alternatives(p.rules)
|
||||||
|
return grammar
|
||||||
|
|
||||||
def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None):
|
def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None):
|
||||||
if checked is None:
|
if checked is None:
|
||||||
checked = set()
|
checked = set()
|
||||||
|
|
@ -1357,6 +1446,7 @@ class GrammarCmd(jwutils.Cmd):
|
||||||
p.add_argument('-l', '--unroll-lists', help='unroll EBNF lists', action='store_true', default=False)
|
p.add_argument('-l', '--unroll-lists', help='unroll EBNF lists', action='store_true', default=False)
|
||||||
p.add_argument('-e', '--fix-extensions', help='fix EBNF prefix extensions (' + '|'.join(fix_extensions_mode) + ')', default=mode_concat)
|
p.add_argument('-e', '--fix-extensions', help='fix EBNF prefix extensions (' + '|'.join(fix_extensions_mode) + ')', default=mode_concat)
|
||||||
p.add_argument('-o', '--unroll-options', help='unroll EBNF options', action='store_true', default=False)
|
p.add_argument('-o', '--unroll-options', help='unroll EBNF options', action='store_true', default=False)
|
||||||
|
p.add_argument('-a', '--unroll-alternatives', help='unroll EBNF alternatives', action='store_true', default=False)
|
||||||
p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='')
|
p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='')
|
||||||
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
|
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
|
||||||
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
|
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
|
||||||
|
|
@ -1366,6 +1456,8 @@ class GrammarCmd(jwutils.Cmd):
|
||||||
if args.fix_extensions not in fix_extensions_mode:
|
if args.fix_extensions not in fix_extensions_mode:
|
||||||
raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions")
|
raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions")
|
||||||
grammar = grammar_fix_extensions(grammar, args.fix_extensions)
|
grammar = grammar_fix_extensions(grammar, args.fix_extensions)
|
||||||
|
if args.unroll_alternatives:
|
||||||
|
grammar = grammar_unroll_alternatives(grammar)
|
||||||
if args.unroll_lists:
|
if args.unroll_lists:
|
||||||
grammar = grammar_unroll_lists(grammar)
|
grammar = grammar_unroll_lists(grammar)
|
||||||
if args.unroll_options:
|
if args.unroll_options:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue