grammar.py: Reimplement configured types

Add function grammar_add_configured_types() and keep configured types as
symbols as part of the grammar like all the others

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-11-05 16:34:00 +01:00
commit 07a328d9a8

View file

@ -27,6 +27,7 @@ p_terminal = "term"
p_literal = "literal" p_literal = "literal"
p_lexical = "lexical" p_lexical = "lexical"
p_special = "special" p_special = "special"
p_regex = "regex"
mode_unroll = "unroll" mode_unroll = "unroll"
mode_concat = "concat" mode_concat = "concat"
@ -361,7 +362,7 @@ class Symbol:
self.regex = tok2regex(self.token) self.regex = tok2regex(self.token)
self.is_lexical_element = False self.is_lexical_element = False
self.datatype = 'std::string' self.datatype = 'std::string'
elif tp == p_special or tp == p_lexical: elif tp == p_special or tp == p_lexical or tp == p_regex:
if len(self.rules): if len(self.rules):
self.dump(ERR) self.dump(ERR)
raise Exception("Tried to set symbol", self.token, "to special which has", len(self.rules), "rules") raise Exception("Tried to set symbol", self.token, "to special which has", len(self.rules), "rules")
@ -824,6 +825,19 @@ def grammar_replace_whitespace(grammar):
slog(INFO, "added symbol", newtok) slog(INFO, "added symbol", newtok)
return r return r
def grammar_add_configured_types(grammar, conf):
if conf is None:
return grammar
symbols = conf.get('symbols')
if symbols is None:
return grammar
for t, c in symbols.iteritems():
s = Symbol(t)
s.set_type(p_regex)
s.regex = c["regex"].value()
grammar[t] = s
return grammar
def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None): def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None):
if checked is None: if checked is None:
checked = set() checked = set()
@ -1070,20 +1084,6 @@ def grammar_create_ebnf(grammar, opts):
out += ' ' * indent + ' ;\n' out += ' ' * indent + ' ;\n'
return out return out
def symbols_from_config(conf, types = None):
if types == None or types == "all":
types = [ c_token, c_non_terminal ]
r = set()
if conf is None:
return r
symbols = conf.get('symbols')
if symbols is None:
return r
for k, v in symbols.iteritems():
if v["type"].value() in types:
r.add(k)
return r
def format_token(sym, tp): def format_token(sym, tp):
return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */' return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
@ -1101,7 +1101,6 @@ def grammar_create_y(grammar, opts):
indent = '\t' * (spaces / 8) indent = '\t' * (spaces / 8)
conf = opts['config'] conf = opts['config']
conf_tokens = symbols_from_config(conf, [ c_token, c_non_terminal ])
out = "" out = ""
@ -1153,7 +1152,8 @@ def grammar_create_y(grammar, opts):
types = grammar_get_types(grammar) types = grammar_get_types(grammar)
for t in types.keys(): for t in types.keys():
if conf is not None and t in conf['symbols'].keys(): s = grammar[t]
if s.tp == p_regex:
continue continue
out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';' out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
out += '\n' out += '\n'
@ -1192,19 +1192,19 @@ def grammar_create_y(grammar, opts):
out += format_token(p.sym, t) +'\n' out += format_token(p.sym, t) +'\n'
# tokens from config # tokens from config
if conf is not None: for t, p in grammar.iteritems():
for k, t in conf['symbols'].iteritems(): if p.tp == p_regex:
slog(NOTICE, "adding token", k) out += format_token(t, "blah") + '\n'
out += format_token(k, "blah") + '\n'
# types # types
out += '\n' out += '\n'
for t, p in grammar.iteritems(): for t, p in grammar.iteritems():
if conf is not None and p.sym in conf['symbols'].keys(): if p.tp == p_regex:
continue continue
if p.tp == p_ruleset: if p.tp == p_ruleset:
out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n' out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
# options
out += textwrap.dedent("""\ out += textwrap.dedent("""\
%define parse.error verbose %define parse.error verbose
@ -1225,7 +1225,7 @@ def grammar_create_y(grammar, opts):
continue continue
if p.tp == p_special: if p.tp == p_special:
continue continue
if conf is not None and p.sym in conf['symbols'].keys(): if p.tp == p_regex:
continue continue
slog(INFO, "creating production for symbol", p.str()) slog(INFO, "creating production for symbol", p.str())
@ -1351,15 +1351,16 @@ def grammar_create_l(grammar, opts):
assert p.term[-1] in [ '"', "'" ], p.term assert p.term[-1] in [ '"', "'" ], p.term
out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n' out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'
if conf is not None: for t, p in grammar.iteritems():
for k, v in conf['symbols'].iteritems(): if p.tp == p_regex:
lex_as = v.get('lex_as') c = conf['symbols'][t]
lex_as = c.get('lex_as')
if lex_as is not None: if lex_as is not None:
retval = lex_as.value() retval = lex_as.value()
else: else:
retval = k retval = t
regex = v['regex'].value() regex = c['regex'].value()
out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + k + '"); return ' + retval + '; }\n' out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + t + '"); return ' + retval + '; }\n'
#out += textwrap.dedent("""\ #out += textwrap.dedent("""\
# #
@ -1518,7 +1519,6 @@ def grammar_create_l(grammar, opts):
def grammar_create_h(grammar, opts): def grammar_create_h(grammar, opts):
out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n' out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
ns = opts['namespace'] ns = opts['namespace']
tokens = symbols_from_config(opts['config'], "all")
out += textwrap.dedent("""\ out += textwrap.dedent("""\
@ -1563,7 +1563,8 @@ def grammar_create_h(grammar, opts):
# struct forward declarations # struct forward declarations
for t, members in types.iteritems(): for t, members in types.iteritems():
if t in tokens: s = grammar[t]
if s.tp == p_regex:
continue continue
if len(members): if len(members):
out += '\nstruct ' + t + ';' out += '\nstruct ' + t + ';'
@ -1571,7 +1572,8 @@ def grammar_create_h(grammar, opts):
# struct / non-struct typedefs # struct / non-struct typedefs
for t, members in types.iteritems(): for t, members in types.iteritems():
if t in tokens: s = grammar[t]
if s.tp == p_regex:
continue continue
if not len(members): if not len(members):
out += '\ntypedef const char ' + t + '_t;' out += '\ntypedef const char ' + t + '_t;'
@ -1581,7 +1583,8 @@ def grammar_create_h(grammar, opts):
# struct definitions # struct definitions
for t, rules in types.iteritems(): for t, rules in types.iteritems():
if t in tokens: s = grammar[t]
if s.tp == p_regex:
continue continue
if not len(rules): if not len(rules):
continue continue
@ -1598,7 +1601,8 @@ def grammar_create_h(grammar, opts):
if rule.count(m) > 1: if rule.count(m) > 1:
idx += 1 idx += 1
suffix = '_' + str(idx) suffix = '_' + str(idx)
if m in tokens: ms = grammar[m]
if ms.tp == p_regex:
continue continue
p = grammar[m] p = grammar[m]
out += '\n\t\t' + p.datatype + ' *' + member_prefix + m + suffix + ';' out += '\n\t\t' + p.datatype + ' *' + member_prefix + m + suffix + ';'
@ -1655,6 +1659,11 @@ class GrammarCmd(jwutils.Cmd):
return p return p
def processGrammar(self, args, grammar): def processGrammar(self, args, grammar):
if args.config_file is not None:
config = serdes.read(args.config_file)
#config.dump(ERR)
grammar = grammar_add_configured_types(grammar, config)
if args.fix_extensions not in fix_extensions_mode: if args.fix_extensions not in fix_extensions_mode:
raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions") raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions")
grammar = grammar_fix_extensions(grammar, args.fix_extensions) grammar = grammar_fix_extensions(grammar, args.fix_extensions)
@ -1730,7 +1739,6 @@ class CmdCreate(DerivedGrammarCmd):
config = None config = None
if args.config_file is not None: if args.config_file is not None:
config = serdes.read(args.config_file) config = serdes.read(args.config_file)
config.dump(ERR)
# generated code breaks without this, not sure why # generated code breaks without this, not sure why
if ext == 'l': if ext == 'l':