diff --git a/tools/python/jwutils/grammar.py b/tools/python/jwutils/grammar.py index 6f0d094..e604383 100644 --- a/tools/python/jwutils/grammar.py +++ b/tools/python/jwutils/grammar.py @@ -27,6 +27,7 @@ p_terminal = "term" p_literal = "literal" p_lexical = "lexical" p_special = "special" +p_regex = "regex" mode_unroll = "unroll" mode_concat = "concat" @@ -361,7 +362,7 @@ class Symbol: self.regex = tok2regex(self.token) self.is_lexical_element = False self.datatype = 'std::string' - elif tp == p_special or tp == p_lexical: + elif tp == p_special or tp == p_lexical or tp == p_regex: if len(self.rules): self.dump(ERR) raise Exception("Tried to set symbol", self.token, "to special which has", len(self.rules), "rules") @@ -824,6 +825,19 @@ def grammar_replace_whitespace(grammar): slog(INFO, "added symbol", newtok) return r +def grammar_add_configured_types(grammar, conf): + if conf is None: + return grammar + symbols = conf.get('symbols') + if symbols is None: + return grammar + for t, c in symbols.iteritems(): + s = Symbol(t) + s.set_type(p_regex) + s.regex = c["regex"].value() + grammar[t] = s + return grammar + def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None): if checked is None: checked = set() @@ -1070,20 +1084,6 @@ def grammar_create_ebnf(grammar, opts): out += ' ' * indent + ' ;\n' return out -def symbols_from_config(conf, types = None): - if types == None or types == "all": - types = [ c_token, c_non_terminal ] - r = set() - if conf is None: - return r - symbols = conf.get('symbols') - if symbols is None: - return r - for k, v in symbols.iteritems(): - if v["type"].value() in types: - r.add(k) - return r - def format_token(sym, tp): return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */' @@ -1101,7 +1101,6 @@ def grammar_create_y(grammar, opts): indent = '\t' * (spaces / 8) conf = opts['config'] - conf_tokens = symbols_from_config(conf, [ c_token, c_non_terminal ]) out = "" @@ -1153,7 +1152,8 @@ def grammar_create_y(grammar, opts): types = grammar_get_types(grammar) for t in types.keys(): - if conf is not None and t in conf['symbols'].keys(): + s = grammar[t] + if s.tp == p_regex: continue out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';' out += '\n' @@ -1192,19 +1192,19 @@ def grammar_create_y(grammar, opts): out += format_token(p.sym, t) +'\n' # tokens from config - if conf is not None: - for k, t in conf['symbols'].iteritems(): - slog(NOTICE, "adding token", k) - out += format_token(k, "blah") + '\n' + for t, p in grammar.iteritems(): + if p.tp == p_regex: + out += format_token(t, "blah") + '\n' # types out += '\n' for t, p in grammar.iteritems(): - if conf is not None and p.sym in conf['symbols'].keys(): + if p.tp == p_regex: continue if p.tp == p_ruleset: out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n' + # options out += textwrap.dedent("""\ %define parse.error verbose @@ -1225,7 +1225,7 @@ def grammar_create_y(grammar, opts): continue if p.tp == p_special: continue - if conf is not None and p.sym in conf['symbols'].keys(): + if p.tp == p_regex: continue slog(INFO, "creating production for symbol", p.str()) @@ -1351,15 +1351,16 @@ def grammar_create_l(grammar, opts): assert p.term[-1] in [ '"', "'" ], p.term out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n' - if conf is not None: - for k, v in conf['symbols'].iteritems(): - lex_as = v.get('lex_as') + for t, p in grammar.iteritems(): + if p.tp == p_regex: + c = conf['symbols'][t] + lex_as = c.get('lex_as') if lex_as is not None: retval = lex_as.value() else: - retval = k - regex = v['regex'].value() - out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + k + '"); return ' + retval + '; }\n' + retval = t + regex = c['regex'].value() + out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + t + '"); return ' + retval + '; }\n' #out += textwrap.dedent("""\ # @@ -1518,7 +1519,6 @@ def grammar_create_l(grammar, opts): def grammar_create_h(grammar, opts): out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n' ns = opts['namespace'] - tokens = symbols_from_config(opts['config'], "all") out += textwrap.dedent("""\ @@ -1563,7 +1563,8 @@ def grammar_create_h(grammar, opts): # struct forward declarations for t, members in types.iteritems(): - if t in tokens: + s = grammar[t] + if s.tp == p_regex: continue if len(members): out += '\nstruct ' + t + ';' @@ -1571,7 +1572,8 @@ def grammar_create_h(grammar, opts): # struct / non-struct typedefs for t, members in types.iteritems(): - if t in tokens: + s = grammar[t] + if s.tp == p_regex: continue if not len(members): out += '\ntypedef const char ' + t + '_t;' @@ -1581,7 +1583,8 @@ def grammar_create_h(grammar, opts): # struct definitions for t, rules in types.iteritems(): - if t in tokens: + s = grammar[t] + if s.tp == p_regex: continue if not len(rules): continue @@ -1598,7 +1601,8 @@ def grammar_create_h(grammar, opts): if rule.count(m) > 1: idx += 1 suffix = '_' + str(idx) - if m in tokens: + ms = grammar[m] + if ms.tp == p_regex: continue p = grammar[m] out += '\n\t\t' + p.datatype + ' *' + member_prefix + m + suffix + ';' @@ -1655,6 +1659,11 @@ class GrammarCmd(jwutils.Cmd): return p def processGrammar(self, args, grammar): + + if args.config_file is not None: + config = serdes.read(args.config_file) + #config.dump(ERR) + grammar = grammar_add_configured_types(grammar, config) if args.fix_extensions not in fix_extensions_mode: raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions") grammar = grammar_fix_extensions(grammar, args.fix_extensions) @@ -1730,7 +1739,6 @@ class CmdCreate(DerivedGrammarCmd): config = None if args.config_file is not None: config = serdes.read(args.config_file) - config.dump(ERR) # generated code breaks without this, not sure why if ext == 'l':