grammar.py and friends: Implement config file support

Signed-off-by: Jan Lindemann <jan@janware.com>
2026-01-15 09:53:32 +01:00 · 2017-11-02 08:54:39 +01:00 · 2017-11-02 08:54:39 +01:00 · 1a7a34f73c
commit 1a7a34f73c
parent 214c222002
6 changed files with 100 additions and 27 deletions
--- a/make/generate-flex-bison.mk
+++ b/make/generate-flex-bison.mk
@ -12,6 +12,10 @@ ifneq ($(CHECK_SYMBOLS),)
  OPT_CHECK_SYMBOLS  ?= --check-symbols='$(CHECK_SYMBOLS)'
 endif
 ifneq ($(GENERATE_CONFIG_FILE),)
  OPT_CONFIG_FILE    ?= --config-file=$(GENERATE_CONFIG_FILE)
 endif
 GENERATED_STD          += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf $(FB_COMMON_H)
 GENERATED              += $(FB_NAME)-dense.ebnf $(GENERATED_STD)
 GRAMMAR_INPUT        ?= $(FB_NAME)-input.ebnf
@ -25,15 +29,15 @@ FB_COMMON_H          ?= $(FB_HDRDIR)/$(FB_NAME).h
 INCLUDED_BY_GENERATED  += include/defs.h $(FB_COMMON_H) include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h
 GENERATE_PY          ?= ./generate.py
 GENERATE             ?= python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
-				--fix-extensions $(FIX_EXTENSIONS) \
+			--fix-extensions $(FIX_EXTENSIONS) \
-				--unroll-lists \
+			--unroll-lists \
-				--unroll-options \
+			--unroll-options \
 				--unroll-alternatives \
 				--replace-whitespace \
 				$(OPT_CHECK_SYMBOLS) \
 				$(OPT_CONFIG_FILE) \
 				--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/  */,/g') \
 				--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/  */,/g') \
 				--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/  */,/g') \
--- a/test/grammar/Makefile
+++ b/test/grammar/Makefile
@ -4,6 +4,7 @@ EXE_ARGS              ?= grammartest.code
 PREREQ_BUILD          += ytools
 FB_NAME                = grammartest
 NAMESPACE_IN_GENERATED = gt
 GENERATE_CONFIG_FILE   = generate.conf
 include $(TOPDIR)/make/proj.mk
 include $(TOPDIR)/make/generate-flex-bison.mk
--- a/test/grammar/generate.conf
+++ b/test/grammar/generate.conf
@ -0,0 +1,9 @@
 [symbols]
  [white_space[
    regex = "[ \n\t\r]+" 
  ]
  [test[
    dings = bums
    regex = "bumsdings"
  ]
--- a/test/grammar/include/defs.h
+++ b/test/grammar/include/defs.h
@ -7,6 +7,7 @@
 struct context {
 	int line;
 	int column;
 };
 union YYSTYPE;
--- a/test/grammar/main.cpp
+++ b/test/grammar/main.cpp
@ -12,6 +12,8 @@
 #include "include/defs.h"
 #include "include/grammartest.tab.h"
 extern int FB_SYM(debug);
 using namespace std;
 int main(int argc, const char *argv[])
@ -28,10 +30,14 @@ int main(int argc, const char *argv[])
 		return 1;
 	}
 	// TODO: Initialize this in a generated function
 	struct context context = {
-		line: 0
+		line: 1,
 		column: 0
 	};
 	FB_SYM(debug) = 1;
 	struct vp_scanner *scanner = FB_SYM(init_scanner)(content.c_str());
 	int status = FB_SYM(parse)(&context, FB_SYM(scanner_get_data)(scanner));
 	FB_SYM(cleanup_scanner)(scanner);
--- a/tools/python/jwutils/grammar.py
+++ b/tools/python/jwutils/grammar.py
@ -13,6 +13,9 @@ from abc import abstractmethod
 import os.path
 import jwutils
 #from jwutils.stree import StringTree, serdes
 import jwutils.stree.serdes as serdes
 import jwutils.stree.StringTree as StringTree
 from jwutils.log import *
@ -1062,6 +1065,18 @@ def grammar_create_ebnf(grammar, opts):
 	out += ' ' * indent + ' ;\n'
    return out
 def tokens_from_config(conf):
    r = set()
    if conf is None:
        return r
    symbols = conf.get('symbols')
    if symbols is None:
        return r
    for k, v in symbols.iteritems():
        if v.get('regex') is not None:
            r.add(k)
    return r
 def format_token(sym, tp):
    return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
@ -1078,6 +1093,9 @@ def grammar_create_y(grammar, opts):
        spaces += 8
    indent = '\t' * (spaces / 8)
    conf = opts['config']
    tokens = tokens_from_config(conf)
    out = ""
    # preamble
@ -1099,21 +1117,22 @@ def grammar_create_y(grammar, opts):
    out += "\nusing namespace " + opts['namespace'] + ';\n'
    #out += textwrap.dedent("""\
    #    using namespace std;
    #    namespace {
    #    typedef vector<const char *> wrap_t;
    #    const wrap_t curly_braces{ "{", "}" };
    #    const wrap_t round_braces{ "(", ")" };
    #    }
    #    #ifdef __cplusplus
    #    // extern "C" {
    #    #endif
    out += textwrap.dedent("""\
        using namespace std;
        namespace {
        typedef vector<const char *> wrap_t;
        const wrap_t curly_braces{ "{", "}" };
        const wrap_t round_braces{ "(", ")" };
        }
        #ifdef __cplusplus
        // extern "C" {
        #endif
        %}
    """)
@ -1125,6 +1144,8 @@ def grammar_create_y(grammar, opts):
    types = grammar_get_types(grammar)
    for t in types.keys():
        if t in tokens:
            continue
        out += '\n\t' + opts['namespace'] + '::' +  t + '_t *' + t + ';'
    out += '\n'
@ -1155,15 +1176,22 @@ def grammar_create_y(grammar, opts):
                continue
            out += format_token(p.sym, t) +'\n'
-    # regex tokens
+    # tokens from grammar
    out += '\n'
    for t, p in grammar.iteritems():
        if p.tp == p_literal:
            out += format_token(p.sym, t) +'\n'
    # tokens from config
    for k, t in conf['symbols'].iteritems():
        slog(NOTICE, "adding token", k)
        out += format_token(k, "blah") + '\n'
    # types
    out += '\n'
    for t, p in grammar.iteritems():
        if p.sym in conf['symbols'].keys():
            continue
        if p.tp == p_ruleset:
            out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
@ -1184,6 +1212,8 @@ def grammar_create_y(grammar, opts):
            continue
        if p.tp == p_special:
            continue
        if p.sym in conf['symbols'].keys():
            continue
        slog(INFO, "creating production for symbol", p.str())
        #if p.is_lexical_element is True:
@ -1202,6 +1232,7 @@ def grammar_create_y(grammar, opts):
            else:
                out += indent + "| " + format_yacc_rule(rule) + "\n"
            out += indent + "{" + "\n"
            out += indent + "\t" + "$$ = new " + opts['namespace'] + '::' + t + ";\n"
            out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
            tokens = []
            for c in rule:
@ -1245,6 +1276,7 @@ def grammar_create_y(grammar, opts):
 def grammar_create_l(grammar, opts):
    ignore = ""
    conf = opts['config']
    out = textwrap.dedent("""\
        %option reentrant
@ -1283,7 +1315,8 @@ def grammar_create_l(grammar, opts):
        %%
-        \\n { context->line++; }
+        \\n { context->line++; context->column = 0; REJECT; }
        . { context->column++; REJECT; }
        """)
@ -1292,7 +1325,10 @@ def grammar_create_l(grammar, opts):
            # \. { return T_DOT; }
            assert p.term[0] in [ '"', "'" ], p.term
            assert p.term[-1] in [ '"', "'" ], p.term
-            out += re.escape(p.term[1:-1]) + ' { return ' + p.sym + '; }\n'
+            out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'
    for k, v in conf['symbols'].iteritems():
        out += v['regex'].value() + ' { slog(PRI_NOTICE, "found regex ' + k + '"); return ' + k + '; }\n'
    #out += textwrap.dedent("""\
    #
@ -1362,6 +1398,7 @@ def grammar_create_l(grammar, opts):
    out += textwrap.dedent("""\
        . {
                slog(PRI_NOTICE, "returning character %c", yytext[0]);
        	return yytext[0];
        }
@ -1372,7 +1409,7 @@ def grammar_create_l(grammar, opts):
        void FB_SYM(error)(struct context *context, void *scanner, const char *msg)
        {
        	struct yyguts_t *yyg =(struct yyguts_t*)scanner;
-        	set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d", msg, yytext, context->line);
+                set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d:%d", msg, yytext, context->line, context->column);
        }
        int FB_SYM(wrap)(void *scanner)
@ -1427,6 +1464,7 @@ def grammar_create_l(grammar, opts):
 def grammar_create_h(grammar, opts):
    out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
    ns = opts['namespace']
    tokens = tokens_from_config(opts['config'])
    if ns is not None:
        out += 'namespace ' + ns + '{\n\n'
@ -1435,12 +1473,16 @@ def grammar_create_h(grammar, opts):
    # struct forward declarations
    for t, members in types.iteritems():
        if t in tokens:
            continue
        if len(members):
            out += '\nstruct ' +  t + ';'
    out += '\n'
    # struct / non-struct typedefs
    for t, members in types.iteritems():
        if t in tokens:
            continue
        if not len(members):
            out += '\ntypedef const char ' +  t + '_t;'
            continue
@ -1449,6 +1491,8 @@ def grammar_create_h(grammar, opts):
    # struct definitions
    for t, rules in types.iteritems():
        if t in tokens:
            continue
        if not len(rules):
            continue
        out += '\n\nstruct ' +  t + ' {\n'
@ -1524,6 +1568,7 @@ class GrammarCmd(jwutils.Cmd):
        p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
        p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
        p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
        p.add_argument('-f', '--config-file', help='config file', nargs='?', default=None)
        return p
    def processGrammar(self, args, grammar):
@ -1590,7 +1635,8 @@ class CmdCreate(DerivedGrammarCmd):
        return p
    def _run(self, args, grammar):
-        name, ext = os.path.splitext(args.output)[1]
+        name, ext = os.path.splitext(args.output)
        ext = ext[1:]
        #cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
        mip = None
        if ext == 'h':
@ -1598,6 +1644,11 @@ class CmdCreate(DerivedGrammarCmd):
        includes = args.includes.split(',')
        config = None
        if args.config_file is not None:
            config = serdes.read(args.config_file)
            config.dump(ERR)
        # generated code breaks without this, not sure why
        if ext == 'l':
            tmp = []
@ -1606,13 +1657,14 @@ class CmdCreate(DerivedGrammarCmd):
                    tmp.append(f)
            includes = tmp
        cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
        opts = {
            "namespace" : args.namespace,
            "includes"  : includes,
-            "mip"       : mip
+            "mip"       : mip,
            "config"    : config
        }
        cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
        out = cmd(grammar, opts)
        print(out)