grammar.py: Fix duplicate and missing rules

- unroll_lists() sometimes introduced identical lists into a rule set
- unroll_options() sometimes chopped off a production's last rule

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-11-05 18:30:57 +01:00
commit a2e5f9e1c9

View file

@ -418,6 +418,18 @@ def split_list_by_regex(l_, regex):
l = copy.deepcopy(l_) l = copy.deepcopy(l_)
return [list(x[1]) for x in itertools.groupby(l, lambda x: re.match(regex, x)) if not x[0]] return [list(x[1]) for x in itertools.groupby(l, lambda x: re.match(regex, x)) if not x[0]]
def remove_duplicate_rules(rules):
r = []
for rule in rules:
if rule in r:
continue
r.append(rule)
slog(DEBUG, "rules after removing duplicates >")
for rule in rules:
slog(DEBUG, "-> " + format_rule(rule))
slog(DEBUG, "rules after removing duplicates <")
return r
def grammar_tokenize_ebnf(content): def grammar_tokenize_ebnf(content):
r = [] r = []
c = '' c = ''
@ -627,6 +639,7 @@ def grammar_fix_extensions(grammar, mode):
def grammar_unroll_lists(grammar): def grammar_unroll_lists(grammar):
delimiters = [ '","', '";"', '"|"' ] # TODO: this could be a function parameter to make it generic delimiters = [ '","', '";"', '"|"' ] # TODO: this could be a function parameter to make it generic
newrule = None
for tok, p in grammar.iteritems(): for tok, p in grammar.iteritems():
newrules = [] newrules = []
for rule in p.rules: for rule in p.rules:
@ -677,13 +690,22 @@ def grammar_unroll_lists(grammar):
listrule.append(c) listrule.append(c)
continue continue
newrule.append(c) newrule.append(c)
slog(DEBUG, "appending " + format_rule(newrule))
newrules.append(newrule) newrules.append(newrule)
grammar[tok].rules = newrules newrule = None
else:
if newrule is not None:
slog(DEBUG, "appending " + format_rule(newrule))
newrules.append(newrule)
newrule = None
slog(DEBUG, "done processing rules for " + tok)
grammar[tok].rules = remove_duplicate_rules(newrules)
return grammar return grammar
def rules_unroll_options(rules): def rules_unroll_options(rules):
r = [] r = []
found = False found = False
newrule = None
slog(DEBUG, "unrolling", format_rules(rules)) slog(DEBUG, "unrolling", format_rules(rules))
for rule in rules: for rule in rules:
square = 0 square = 0
@ -733,13 +755,19 @@ def rules_unroll_options(rules):
break break
if not found: if not found:
r.append(newrule) r.append(newrule)
newrule = None
else:
if newrule is not None:
slog(DEBUG, "appending " + format_rule(newrule))
r.append(newrule)
newrule = None
if found: if found:
return rules_unroll_options(r) return rules_unroll_options(r)
return r return r
def grammar_unroll_options(grammar): def grammar_unroll_options(grammar):
for tok, p in grammar.iteritems(): for tok, p in grammar.iteritems():
grammar[tok].rules = rules_unroll_options(p.rules) grammar[tok].rules = remove_duplicate_rules(rules_unroll_options(p.rules))
return grammar return grammar
def rules_unroll_alternatives(rules): def rules_unroll_alternatives(rules):
@ -1208,6 +1236,7 @@ def grammar_create_y(grammar, opts):
out += textwrap.dedent("""\ out += textwrap.dedent("""\
%define parse.error verbose %define parse.error verbose
// %define lr.type ielr
%define api.pure full %define api.pure full
%param { struct context *context } { void *scanner } %param { struct context *context } { void *scanner }
""") """)
@ -1673,6 +1702,7 @@ class GrammarCmd(jwutils.Cmd):
grammar = grammar_unroll_alternatives(grammar) grammar = grammar_unroll_alternatives(grammar)
if args.unroll_options: if args.unroll_options:
grammar = grammar_unroll_options(grammar) grammar = grammar_unroll_options(grammar)
#grammar['logical_expression'].dump(ERR)
if len(args.check_symbols): if len(args.check_symbols):
check_symbols = [] check_symbols = []
if args.check_symbols == 'all': if args.check_symbols == 'all':