grammar.py and friends: Implement config file support

Signed-off-by: Jan Lindemann <jan@janware.com>
2026-01-15 01:52:56 +01:00 · 2017-11-02 08:54:39 +01:00 · 2017-11-02 08:54:39 +01:00 · 1a7a34f73c
commit 1a7a34f73c
parent 214c222002
6 changed files with 100 additions and 27 deletions
--- a/make/generate-flex-bison.mk
+++ b/make/generate-flex-bison.mk
@ -12,6 +12,10 @@ ifneq ($(CHECK_SYMBOLS),)
  OPT_CHECK_SYMBOLS  ?= --check-symbols='$(CHECK_SYMBOLS)'
 endif

+ifneq ($(GENERATE_CONFIG_FILE),)
+  OPT_CONFIG_FILE    ?= --config-file=$(GENERATE_CONFIG_FILE)
+endif
+
 GENERATED_STD          += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf $(FB_COMMON_H)
 GENERATED              += $(FB_NAME)-dense.ebnf $(GENERATED_STD)
 GRAMMAR_INPUT        ?= $(FB_NAME)-input.ebnf
@ -25,15 +29,15 @@ FB_COMMON_H          ?= $(FB_HDRDIR)/$(FB_NAME).h

 INCLUDED_BY_GENERATED  += include/defs.h $(FB_COMMON_H) include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h

-
 GENERATE_PY          ?= ./generate.py
 GENERATE             ?= python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
-				--fix-extensions $(FIX_EXTENSIONS) \
-				--unroll-lists \
-				--unroll-options \
+			--fix-extensions $(FIX_EXTENSIONS) \
+			--unroll-lists \
+			--unroll-options \
 				--unroll-alternatives \
 				--replace-whitespace \
 				$(OPT_CHECK_SYMBOLS) \
+				$(OPT_CONFIG_FILE) \
 				--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/  */,/g') \
 				--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/  */,/g') \
 				--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/  */,/g') \
--- a/test/grammar/Makefile
+++ b/test/grammar/Makefile
@ -4,6 +4,7 @@ EXE_ARGS              ?= grammartest.code
 PREREQ_BUILD          += ytools
 FB_NAME                = grammartest
 NAMESPACE_IN_GENERATED = gt
+GENERATE_CONFIG_FILE   = generate.conf

 include $(TOPDIR)/make/proj.mk
 include $(TOPDIR)/make/generate-flex-bison.mk
--- a/test/grammar/generate.conf
+++ b/test/grammar/generate.conf
@ -0,0 +1,9 @@
+[symbols]
+  [white_space[
+    regex = "[ \n\t\r]+" 
+  ]
+  [test[
+    dings = bums
+    regex = "bumsdings"
+  ]
+
--- a/test/grammar/include/defs.h
+++ b/test/grammar/include/defs.h
@ -7,6 +7,7 @@

 struct context {
 	int line;
+	int column;
 };

 union YYSTYPE;
--- a/test/grammar/main.cpp
+++ b/test/grammar/main.cpp
@ -12,6 +12,8 @@
 #include "include/defs.h"
 #include "include/grammartest.tab.h"

+extern int FB_SYM(debug);
+
 using namespace std;

 int main(int argc, const char *argv[])
@ -28,10 +30,14 @@ int main(int argc, const char *argv[])
 		return 1;
 	}

+	// TODO: Initialize this in a generated function
 	struct context context = {
-		line: 0
+		line: 1,
+		column: 0
 	};

+	FB_SYM(debug) = 1;
+
 	struct vp_scanner *scanner = FB_SYM(init_scanner)(content.c_str());
 	int status = FB_SYM(parse)(&context, FB_SYM(scanner_get_data)(scanner));
 	FB_SYM(cleanup_scanner)(scanner);
--- a/tools/python/jwutils/grammar.py
+++ b/tools/python/jwutils/grammar.py
@ -13,6 +13,9 @@ from abc import abstractmethod
 import os.path

 import jwutils
+#from jwutils.stree import StringTree, serdes
+import jwutils.stree.serdes as serdes
+import jwutils.stree.StringTree as StringTree

 from jwutils.log import *

@ -1062,6 +1065,18 @@ def grammar_create_ebnf(grammar, opts):
 	out += ' ' * indent + ' ;\n'
    return out

+def tokens_from_config(conf):
+    r = set()
+    if conf is None:
+        return r
+    symbols = conf.get('symbols')
+    if symbols is None:
+        return r
+    for k, v in symbols.iteritems():
+        if v.get('regex') is not None:
+            r.add(k)
+    return r
+
 def format_token(sym, tp):
    return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'

@ -1078,6 +1093,9 @@ def grammar_create_y(grammar, opts):
        spaces += 8
    indent = '\t' * (spaces / 8)

+    conf = opts['config']
+    tokens = tokens_from_config(conf)
+
    out = ""

    # preamble
@ -1099,21 +1117,22 @@ def grammar_create_y(grammar, opts):

    out += "\nusing namespace " + opts['namespace'] + ';\n'

+    #out += textwrap.dedent("""\
+    #    using namespace std;
+
+    #    namespace {
+
+    #    typedef vector<const char *> wrap_t;
+    #    const wrap_t curly_braces{ "{", "}" };
+    #    const wrap_t round_braces{ "(", ")" };
+
+    #    }
+
+    #    #ifdef __cplusplus
+    #    // extern "C" {
+    #    #endif
+
    out += textwrap.dedent("""\
-        using namespace std;
-
-        namespace {
-
-        typedef vector<const char *> wrap_t;
-        const wrap_t curly_braces{ "{", "}" };
-        const wrap_t round_braces{ "(", ")" };
-
-        }
-
-        #ifdef __cplusplus
-        // extern "C" {
-        #endif
-
        %}

    """)
@ -1125,6 +1144,8 @@ def grammar_create_y(grammar, opts):

    types = grammar_get_types(grammar)
    for t in types.keys():
+        if t in tokens:
+            continue
        out += '\n\t' + opts['namespace'] + '::' +  t + '_t *' + t + ';'
    out += '\n'

@ -1155,15 +1176,22 @@ def grammar_create_y(grammar, opts):
                continue
            out += format_token(p.sym, t) +'\n'

-    # regex tokens
+    # tokens from grammar
    out += '\n'
    for t, p in grammar.iteritems():
        if p.tp == p_literal:
            out += format_token(p.sym, t) +'\n'

+    # tokens from config
+    for k, t in conf['symbols'].iteritems():
+        slog(NOTICE, "adding token", k)
+        out += format_token(k, "blah") + '\n'
+
    # types
    out += '\n'
    for t, p in grammar.iteritems():
+        if p.sym in conf['symbols'].keys():
+            continue
        if p.tp == p_ruleset:
            out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'

@ -1184,6 +1212,8 @@ def grammar_create_y(grammar, opts):
            continue
        if p.tp == p_special:
            continue
+        if p.sym in conf['symbols'].keys():
+            continue
        slog(INFO, "creating production for symbol", p.str())

        #if p.is_lexical_element is True:
@ -1202,6 +1232,7 @@ def grammar_create_y(grammar, opts):
            else:
                out += indent + "| " + format_yacc_rule(rule) + "\n"
            out += indent + "{" + "\n"
+            out += indent + "\t" + "$$ = new " + opts['namespace'] + '::' + t + ";\n"
            out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
            tokens = []
            for c in rule:
@ -1245,6 +1276,7 @@ def grammar_create_y(grammar, opts):
 def grammar_create_l(grammar, opts):

    ignore = ""
+    conf = opts['config']

    out = textwrap.dedent("""\
        %option reentrant
@ -1283,7 +1315,8 @@ def grammar_create_l(grammar, opts):

        %%

-        \\n { context->line++; }
+        \\n { context->line++; context->column = 0; REJECT; }
+        . { context->column++; REJECT; }

        """)

@ -1292,7 +1325,10 @@ def grammar_create_l(grammar, opts):
            # \. { return T_DOT; }
            assert p.term[0] in [ '"', "'" ], p.term
            assert p.term[-1] in [ '"', "'" ], p.term
-            out += re.escape(p.term[1:-1]) + ' { return ' + p.sym + '; }\n'
+            out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'
+
+    for k, v in conf['symbols'].iteritems():
+        out += v['regex'].value() + ' { slog(PRI_NOTICE, "found regex ' + k + '"); return ' + k + '; }\n'

    #out += textwrap.dedent("""\
    #
@ -1362,6 +1398,7 @@ def grammar_create_l(grammar, opts):

    out += textwrap.dedent("""\
        . {
+                slog(PRI_NOTICE, "returning character %c", yytext[0]);
        	return yytext[0];
        }

@ -1372,7 +1409,7 @@ def grammar_create_l(grammar, opts):
        void FB_SYM(error)(struct context *context, void *scanner, const char *msg)
        {
        	struct yyguts_t *yyg =(struct yyguts_t*)scanner;
-        	set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d", msg, yytext, context->line);
+                set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d:%d", msg, yytext, context->line, context->column);
        }

        int FB_SYM(wrap)(void *scanner)
@ -1427,6 +1464,7 @@ def grammar_create_l(grammar, opts):
 def grammar_create_h(grammar, opts):
    out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
    ns = opts['namespace']
+    tokens = tokens_from_config(opts['config'])

    if ns is not None:
        out += 'namespace ' + ns + '{\n\n'
@ -1435,12 +1473,16 @@ def grammar_create_h(grammar, opts):

    # struct forward declarations
    for t, members in types.iteritems():
+        if t in tokens:
+            continue
        if len(members):
            out += '\nstruct ' +  t + ';'
    out += '\n'

    # struct / non-struct typedefs
    for t, members in types.iteritems():
+        if t in tokens:
+            continue
        if not len(members):
            out += '\ntypedef const char ' +  t + '_t;'
            continue
@ -1449,6 +1491,8 @@ def grammar_create_h(grammar, opts):

    # struct definitions
    for t, rules in types.iteritems():
+        if t in tokens:
+            continue
        if not len(rules):
            continue
        out += '\n\nstruct ' +  t + ' {\n'
@ -1524,6 +1568,7 @@ class GrammarCmd(jwutils.Cmd):
        p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
        p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
        p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
+        p.add_argument('-f', '--config-file', help='config file', nargs='?', default=None)
        return p

    def processGrammar(self, args, grammar):
@ -1590,7 +1635,8 @@ class CmdCreate(DerivedGrammarCmd):
        return p

    def _run(self, args, grammar):
-        name, ext = os.path.splitext(args.output)[1]
+        name, ext = os.path.splitext(args.output)
+        ext = ext[1:]
        #cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
        mip = None
        if ext == 'h':
@ -1598,6 +1644,11 @@ class CmdCreate(DerivedGrammarCmd):

        includes = args.includes.split(',')

+        config = None
+        if args.config_file is not None:
+            config = serdes.read(args.config_file)
+            config.dump(ERR)
+
        # generated code breaks without this, not sure why
        if ext == 'l':
            tmp = []
@ -1606,13 +1657,14 @@ class CmdCreate(DerivedGrammarCmd):
                    tmp.append(f)
            includes = tmp

-        cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
-
        opts = {
            "namespace" : args.namespace,
            "includes"  : includes,
-            "mip"       : mip
+            "mip"       : mip,
+            "config"    : config
        }
+
+        cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
        out = cmd(grammar, opts)
        print(out)