grammar.py et al: Centralize more code

More code is removed from the special parser directories and centralized
into grammar.py, Cmd.py, and generate-flex-bison.mk.

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-10-29 18:25:55 +01:00
commit 6297f10f55
6 changed files with 210 additions and 177 deletions

View file

@ -10,6 +10,7 @@ import itertools
import copy
from collections import OrderedDict
from abc import abstractmethod
import os.path
import jwutils
@ -914,7 +915,7 @@ def grammar_check(grammar, check_symbols = None):
slog(INFO, "======= checking", tok)
rr = step_out(grammar, terminals, orphans, lexicals, tok, 0, checked=set(), found=found)
if rr == sys.maxint:
slog(ERR, "No way out for", tok, "in production", p.str())
slog(ERR, "No way out for", tok)
exit(1)
if not tok in grammar.keys():
slog(ERR, "Token", tok, "has no production")
@ -1034,7 +1035,7 @@ def grammar_trim_symbols(grammar, symbols):
return grammar
def create_ebnf(grammar):
def grammar_create_ebnf(grammar, opts):
indent = 40
slog(INFO, "creating ebnf from grammar of size", len(grammar))
out = ''
@ -1049,7 +1050,7 @@ def create_ebnf(grammar):
out += ' ' * indent + ' ;\n'
return out
def create_yacc(grammar):
def grammar_create_y(grammar, opts):
indent = ' ' * 40
width = 0
for t, p in grammar.iteritems():
@ -1076,13 +1077,19 @@ def create_yacc(grammar):
#include <vector>
#include <string>
#include "include/defs.h"
#include "include/vhdl2017.h"
#include "include/lex.vhdl2017.h"
#include "include/vhdl2017.tab.h"
""")
for f in opts['includes']:
out += '#include "' + f + '"' + '\n'
#include "include/defs.h"
#include "include/vhdl2017.h"
#include "include/lex.vhdl2017.h"
#include "include/vhdl2017.tab.h"
out += "\nusing namespace " + opts['namespace'] + ';\n'
out += textwrap.dedent("""\
using namespace std;
using namespace v2017;
namespace {
@ -1107,7 +1114,7 @@ def create_yacc(grammar):
types = grammar_get_types(grammar)
for t in types.keys():
out += '\n\tv2017::' + t + '_t *' + t + ';'
out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
out += '\n'
out += textwrap.dedent("""\
@ -1184,7 +1191,7 @@ def create_yacc(grammar):
else:
out += indent + "| " + format_yacc_rule(rule) + "\n"
out += indent + "{" + "\n"
out += indent + "\t" + "$$->type = v2017::" + t + "::t_" + str(n_rule) + ";\n"
out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
tokens = []
for c in rule:
if c.tp == t_target_lang:
@ -1222,7 +1229,7 @@ def create_yacc(grammar):
return out + "\n"
def create_lex(grammar):
def grammar_create_l(grammar, opts):
ignore = ""
@ -1233,13 +1240,19 @@ def create_lex(grammar):
%{
#include <slog.h>
#include "include/defs.h"
#include "include/vhdl2017.h"
""")
// #include "include/lex.vhdl2017.h"
#include "include/vhdl2017.tab.h"
for f in opts['includes']:
out += '#include "' + f + '"' + '\n'
#include "include/defs.h"
#include "include/vhdl2017.h"
using namespace v2017;
#// #include "include/lex.vhdl2017.h"
#include "include/vhdl2017.tab.h"
out += "\nusing namespace " + opts['namespace'] + ';\n'
out += textwrap.dedent("""\
/* This is too late in the Flex generated file to work. Still lots of
* prototypes are spat into it above it, which end up with C++ linkage, of
@ -1399,10 +1412,12 @@ def create_lex(grammar):
return out
def create_header(grammar, mip, namespace = None):
out = "#ifndef " + mip + '\n#define ' + mip + '\n\n'
if namespace is not None:
out += 'namespace ' + namespace + '{\n\n'
def grammar_create_h(grammar, opts):
out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
ns = opts['namespace']
if ns is not None:
out += 'namespace ' + ns + '{\n\n'
types = grammar_get_types(grammar)
@ -1463,9 +1478,9 @@ def create_header(grammar, mip, namespace = None):
out += '\n'
if namespace is not None:
out += '\n} /* namespace ' + namespace + '*/'
out += '\n#endif /* #ifndef + ' + mip + ' */'
if ns is not None:
out += '\n} /* namespace ' + ns + '*/'
out += '\n#endif /* #ifndef + ' + opts['mip'] + ' */'
return out
@ -1510,3 +1525,79 @@ class GrammarCmd(jwutils.Cmd):
if args.replace_whitespace:
grammar = grammar_replace_whitespace(grammar)
return grammar
# ------------------------------------------------- TODO: clean this up >
class DerivedGrammarCmd(GrammarCmd):
def __init__(self, name, help):
super(DerivedGrammarCmd, self).__init__(name, help=help)
@abstractmethod
def _run(self, grammar):
pass
def _parse(self, contents):
return grammar_parse_ebnf(contents)
def add_parser(self, parsers):
p = super(DerivedGrammarCmd, self).add_parser(parsers)
return p
def run(self, args):
with open(args.input, 'r') as infile:
contents = infile.read()
grammar = self._parse(contents)
grammar = super(DerivedGrammarCmd, self).processGrammar(args, grammar)
self._run(args, grammar)
class CmdCreate(DerivedGrammarCmd):
def __init__(self):
super(CmdCreate, self).__init__("create", help="Create a file")
def add_parser(self, parsers):
p = super(CmdCreate, self).add_parser(parsers)
p.add_argument("output", help="output file")
p.add_argument('--namespace', help='namespace of generated AST', default='parser')
p.add_argument('--includes', help='list of header files to be #included in C/C++ implementation files', default='')
return p
def _run(self, args, grammar):
name, ext = os.path.splitext(args.output)[1]
#cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
mip = None
if ext == 'h':
mip = args.namespace + re.sub(r'[-./]', '_', args.output).upper()
includes = args.includes.split(',')
# generated code breaks without this, not sure why
if ext == 'l':
tmp = []
for f in includes:
if not re.match('.*lex\..*\.h', f):
tmp.append(f)
includes = tmp
cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
opts = {
"namespace" : args.namespace,
"includes" : includes,
"mip" : mip
}
out = cmd(grammar, opts)
print(out)
class CmdCheck(DerivedGrammarCmd):
def __init__(self):
super(CmdCheck, self).__init__("check", help="Check grammar")
def add_parser(self, parsers):
p = super(CmdCheck, self).add_parser(parsers)
return p
def _run(self, args, grammar):
pass