grammar.py: Reimplement configured types

Add function grammar_add_configured_types() and keep configured types as symbols as part of the grammar like all the others Signed-off-by: Jan Lindemann <jan@janware.com>
2026-01-15 01:52:56 +01:00 · 2017-11-05 16:34:00 +01:00 · 2017-11-05 16:34:00 +01:00 · 07a328d9a8
commit 07a328d9a8
parent c7ecfbff3a
1 changed files with 43 additions and 35 deletions
--- a/tools/python/jwutils/grammar.py
+++ b/tools/python/jwutils/grammar.py
@ -27,6 +27,7 @@ p_terminal = "term"
 p_literal  = "literal"
 p_lexical  = "lexical"
 p_special  = "special"
+p_regex    = "regex"

 mode_unroll  = "unroll"
 mode_concat  = "concat"
@ -361,7 +362,7 @@ class Symbol:
            self.regex = tok2regex(self.token)
            self.is_lexical_element = False
            self.datatype = 'std::string'
-        elif tp == p_special or tp == p_lexical:
+        elif tp == p_special or tp == p_lexical or tp == p_regex:
            if len(self.rules):
                self.dump(ERR)
                raise Exception("Tried to set symbol", self.token, "to special which has", len(self.rules), "rules")
@ -824,6 +825,19 @@ def grammar_replace_whitespace(grammar):
        slog(INFO, "added symbol", newtok)
    return r

+def grammar_add_configured_types(grammar, conf):
+    if conf is None:
+        return grammar
+    symbols = conf.get('symbols')
+    if symbols is None:
+        return grammar
+    for t, c in symbols.iteritems():
+        s = Symbol(t)
+        s.set_type(p_regex)
+        s.regex = c["regex"].value()
+        grammar[t] = s
+    return grammar 
+
 def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None):
    if checked is None:
        checked = set()
@ -1070,20 +1084,6 @@ def grammar_create_ebnf(grammar, opts):
 	out += ' ' * indent + ' ;\n'
    return out

-def symbols_from_config(conf, types = None):
-    if types == None or types == "all":
-        types = [ c_token, c_non_terminal ]
-    r = set()
-    if conf is None:
-        return r
-    symbols = conf.get('symbols')
-    if symbols is None:
-        return r
-    for k, v in symbols.iteritems():
-        if v["type"].value() in types:
-            r.add(k)
-    return r
-
 def format_token(sym, tp):
    return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'

@ -1101,7 +1101,6 @@ def grammar_create_y(grammar, opts):
    indent = '\t' * (spaces / 8)

    conf = opts['config']
-    conf_tokens = symbols_from_config(conf, [ c_token, c_non_terminal ])

    out = ""

@ -1153,7 +1152,8 @@ def grammar_create_y(grammar, opts):

    types = grammar_get_types(grammar)
    for t in types.keys():
-        if conf is not None and t in conf['symbols'].keys():
+        s = grammar[t]
+        if s.tp == p_regex:
            continue
        out += '\n\t' + opts['namespace'] + '::' +  t + '_t *' + t + ';'
    out += '\n'
@ -1192,19 +1192,19 @@ def grammar_create_y(grammar, opts):
            out += format_token(p.sym, t) +'\n'

    # tokens from config
-    if conf is not None:
-        for k, t in conf['symbols'].iteritems():
-            slog(NOTICE, "adding token", k)
-            out += format_token(k, "blah") + '\n'
+    for t, p in grammar.iteritems():
+        if p.tp == p_regex:
+            out += format_token(t, "blah") + '\n'

    # types
    out += '\n'
    for t, p in grammar.iteritems():
-        if conf is not None and p.sym in conf['symbols'].keys():
+        if p.tp == p_regex:
            continue
        if p.tp == p_ruleset:
            out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'

+    # options
    out += textwrap.dedent("""\

        %define parse.error verbose
@ -1225,7 +1225,7 @@ def grammar_create_y(grammar, opts):
            continue
        if p.tp == p_special:
            continue
-        if conf is not None and p.sym in conf['symbols'].keys():
+        if p.tp == p_regex:
            continue
        slog(INFO, "creating production for symbol", p.str())

@ -1351,15 +1351,16 @@ def grammar_create_l(grammar, opts):
            assert p.term[-1] in [ '"', "'" ], p.term
            out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'

-    if conf is not None:
-        for k, v in conf['symbols'].iteritems():
-            lex_as = v.get('lex_as')
+    for t, p in grammar.iteritems():
+        if p.tp == p_regex:
+            c = conf['symbols'][t]
+            lex_as = c.get('lex_as')
            if lex_as is not None:
                retval = lex_as.value()
            else:
-                retval = k
-            regex = v['regex'].value()
-            out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + k + '"); return ' + retval + '; }\n'
+                retval = t
+            regex = c['regex'].value()
+            out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + t + '"); return ' + retval + '; }\n'

    #out += textwrap.dedent("""\
    #
@ -1518,7 +1519,6 @@ def grammar_create_l(grammar, opts):
 def grammar_create_h(grammar, opts):
    out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
    ns = opts['namespace']
-    tokens = symbols_from_config(opts['config'], "all")

    out += textwrap.dedent("""\
    
@ -1563,7 +1563,8 @@ def grammar_create_h(grammar, opts):

    # struct forward declarations
    for t, members in types.iteritems():
-        if t in tokens:
+        s = grammar[t]
+        if s.tp == p_regex:
            continue
        if len(members):
            out += '\nstruct ' +  t + ';'
@ -1571,7 +1572,8 @@ def grammar_create_h(grammar, opts):

    # struct / non-struct typedefs
    for t, members in types.iteritems():
-        if t in tokens:
+        s = grammar[t]
+        if s.tp == p_regex:
            continue
        if not len(members):
            out += '\ntypedef const char ' +  t + '_t;'
@ -1581,7 +1583,8 @@ def grammar_create_h(grammar, opts):

    # struct definitions
    for t, rules in types.iteritems():
-        if t in tokens:
+        s = grammar[t]
+        if s.tp == p_regex:
            continue
        if not len(rules):
            continue
@ -1598,7 +1601,8 @@ def grammar_create_h(grammar, opts):
                if rule.count(m) > 1:
                    idx += 1
                    suffix = '_' + str(idx)
-                if m in tokens:
+                ms = grammar[m]
+                if ms.tp == p_regex:
                    continue
                p = grammar[m]
                out += '\n\t\t' + p.datatype + ' *' + member_prefix + m + suffix + ';'
@ -1655,6 +1659,11 @@ class GrammarCmd(jwutils.Cmd):
        return p

    def processGrammar(self, args, grammar):
+
+        if args.config_file is not None:
+            config = serdes.read(args.config_file)
+            #config.dump(ERR)
+            grammar = grammar_add_configured_types(grammar, config)
        if args.fix_extensions not in fix_extensions_mode:
            raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions")
        grammar = grammar_fix_extensions(grammar, args.fix_extensions)
@ -1730,7 +1739,6 @@ class CmdCreate(DerivedGrammarCmd):
        config = None
        if args.config_file is not None:
            config = serdes.read(args.config_file)
-            config.dump(ERR)

        # generated code breaks without this, not sure why
        if ext == 'l':