grammar.py et al: Centralize more code

More code is removed from the special parser directories and centralized into grammar.py, Cmd.py, and generate-flex-bison.mk. Signed-off-by: Jan Lindemann <jan@janware.com>
2026-01-15 09:53:32 +01:00 · 2017-10-29 18:25:55 +01:00 · 2017-10-29 18:25:55 +01:00 · 6297f10f55
commit 6297f10f55
parent fee94deb48
6 changed files with 210 additions and 177 deletions
--- a/tools/python/jwutils/grammar.py
+++ b/tools/python/jwutils/grammar.py
@ -10,6 +10,7 @@ import itertools
 import copy
 from collections import OrderedDict
 from abc import abstractmethod
+import os.path

 import jwutils

@ -914,7 +915,7 @@ def grammar_check(grammar, check_symbols = None):
        slog(INFO, "======= checking", tok)
        rr = step_out(grammar, terminals, orphans, lexicals, tok, 0, checked=set(), found=found)
        if rr == sys.maxint:
-            slog(ERR, "No way out for", tok, "in production", p.str())
+            slog(ERR, "No way out for", tok)
            exit(1)
        if not tok in grammar.keys():
            slog(ERR, "Token", tok, "has no production")
@ -1034,7 +1035,7 @@ def grammar_trim_symbols(grammar, symbols):

    return grammar

-def create_ebnf(grammar):
+def grammar_create_ebnf(grammar, opts):
    indent = 40
    slog(INFO, "creating ebnf from grammar of size", len(grammar))
    out = ''
@ -1049,7 +1050,7 @@ def create_ebnf(grammar):
 	out += ' ' * indent + ' ;\n'
    return out

-def create_yacc(grammar):
+def grammar_create_y(grammar, opts):
    indent = ' ' * 40
    width = 0
    for t, p in grammar.iteritems():
@ -1076,13 +1077,19 @@ def create_yacc(grammar):
        #include <vector>
        #include <string>

-        #include "include/defs.h"
-        #include "include/vhdl2017.h"
-        #include "include/lex.vhdl2017.h"
-        #include "include/vhdl2017.tab.h"
+    """)

+    for f in opts['includes']:
+        out += '#include "' + f + '"' + '\n'
+    #include "include/defs.h"
+    #include "include/vhdl2017.h"
+    #include "include/lex.vhdl2017.h"
+    #include "include/vhdl2017.tab.h"
+
+    out += "\nusing namespace " + opts['namespace'] + ';\n'
+
+    out += textwrap.dedent("""\
        using namespace std;
-        using namespace v2017;

        namespace {

@ -1107,7 +1114,7 @@ def create_yacc(grammar):

    types = grammar_get_types(grammar)
    for t in types.keys():
-        out += '\n\tv2017::' +  t + '_t *' + t + ';'
+        out += '\n\t' + opts['namespace'] + '::' +  t + '_t *' + t + ';'
    out += '\n'

    out += textwrap.dedent("""\
@ -1184,7 +1191,7 @@ def create_yacc(grammar):
            else:
                out += indent + "| " + format_yacc_rule(rule) + "\n"
            out += indent + "{" + "\n"
-            out += indent + "\t" + "$$->type = v2017::" + t + "::t_" + str(n_rule) + ";\n"
+            out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
            tokens = []
            for c in rule:
                if c.tp == t_target_lang:
@ -1222,7 +1229,7 @@ def create_yacc(grammar):

    return out + "\n"

-def create_lex(grammar):
+def grammar_create_l(grammar, opts):

    ignore = ""

@ -1233,13 +1240,19 @@ def create_lex(grammar):
        %{
        #include <slog.h>

-        #include "include/defs.h"
-        #include "include/vhdl2017.h"
+    """)

-        // #include "include/lex.vhdl2017.h"
-        #include "include/vhdl2017.tab.h"
+    for f in opts['includes']:
+        out += '#include "' + f + '"' + '\n'
+    #include "include/defs.h"
+    #include "include/vhdl2017.h"

-        using namespace v2017;
+    #// #include "include/lex.vhdl2017.h"
+    #include "include/vhdl2017.tab.h"
+
+    out += "\nusing namespace " + opts['namespace'] + ';\n'
+
+    out += textwrap.dedent("""\

        /* This is too late in the Flex generated file to work. Still lots of
         * prototypes are spat into it above it, which end up with C++ linkage, of
@ -1399,10 +1412,12 @@ def create_lex(grammar):

    return out

-def create_header(grammar, mip, namespace = None):
-    out = "#ifndef " + mip + '\n#define ' + mip + '\n\n'
-    if namespace is not None:
-        out += 'namespace ' + namespace + '{\n\n'
+def grammar_create_h(grammar, opts):
+    out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
+    ns = opts['namespace']
+
+    if ns is not None:
+        out += 'namespace ' + ns + '{\n\n'

    types = grammar_get_types(grammar)

@ -1463,9 +1478,9 @@ def create_header(grammar, mip, namespace = None):

    out += '\n'

-    if namespace is not None:
-        out += '\n} /* namespace ' + namespace + '*/'
-    out += '\n#endif /* #ifndef + ' + mip + ' */'
+    if ns is not None:
+        out += '\n} /* namespace ' + ns + '*/'
+    out += '\n#endif /* #ifndef + ' + opts['mip'] + ' */'

    return out

@ -1510,3 +1525,79 @@ class GrammarCmd(jwutils.Cmd):
        if args.replace_whitespace:
            grammar = grammar_replace_whitespace(grammar)
        return grammar
+
+# ------------------------------------------------- TODO: clean this up >
+
+class DerivedGrammarCmd(GrammarCmd):
+
+    def __init__(self, name, help):
+        super(DerivedGrammarCmd, self).__init__(name, help=help)
+
+    @abstractmethod
+    def _run(self, grammar):
+        pass
+
+    def _parse(self, contents):
+        return grammar_parse_ebnf(contents)
+
+    def add_parser(self, parsers):
+        p = super(DerivedGrammarCmd, self).add_parser(parsers)
+        return p
+
+    def run(self, args):
+        with open(args.input, 'r') as infile:
+            contents = infile.read()
+        grammar = self._parse(contents)
+        grammar = super(DerivedGrammarCmd, self).processGrammar(args, grammar)
+        self._run(args, grammar)
+
+class CmdCreate(DerivedGrammarCmd):
+
+    def __init__(self):
+        super(CmdCreate, self).__init__("create", help="Create a file")
+
+    def add_parser(self, parsers):
+        p = super(CmdCreate, self).add_parser(parsers)
+        p.add_argument("output", help="output file")
+        p.add_argument('--namespace', help='namespace of generated AST', default='parser')
+        p.add_argument('--includes', help='list of header files to be #included in C/C++ implementation files', default='')
+        return p
+
+    def _run(self, args, grammar):
+        name, ext = os.path.splitext(args.output)[1]
+        #cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
+        mip = None
+        if ext == 'h':
+            mip = args.namespace + re.sub(r'[-./]', '_', args.output).upper()
+
+        includes = args.includes.split(',')
+
+        # generated code breaks without this, not sure why
+        if ext == 'l':
+            tmp = []
+            for f in includes:
+                if not re.match('.*lex\..*\.h', f):
+                    tmp.append(f)
+            includes = tmp
+
+        cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
+
+        opts = {
+            "namespace" : args.namespace,
+            "includes"  : includes,
+            "mip"       : mip
+        }
+        out = cmd(grammar, opts)
+        print(out)
+
+class CmdCheck(DerivedGrammarCmd):
+
+    def __init__(self):
+        super(CmdCheck, self).__init__("check", help="Check grammar")
+
+    def add_parser(self, parsers):
+        p = super(CmdCheck, self).add_parser(parsers)
+        return p
+
+    def _run(self, args, grammar):
+        pass