grammar.py and friends: Implement config file support

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-11-02 08:54:39 +01:00
commit 1a7a34f73c
6 changed files with 100 additions and 27 deletions

View file

@ -12,6 +12,10 @@ ifneq ($(CHECK_SYMBOLS),)
OPT_CHECK_SYMBOLS ?= --check-symbols='$(CHECK_SYMBOLS)'
endif
ifneq ($(GENERATE_CONFIG_FILE),)
OPT_CONFIG_FILE ?= --config-file=$(GENERATE_CONFIG_FILE)
endif
GENERATED_STD += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf $(FB_COMMON_H)
GENERATED += $(FB_NAME)-dense.ebnf $(GENERATED_STD)
GRAMMAR_INPUT ?= $(FB_NAME)-input.ebnf
@ -25,15 +29,15 @@ FB_COMMON_H ?= $(FB_HDRDIR)/$(FB_NAME).h
INCLUDED_BY_GENERATED += include/defs.h $(FB_COMMON_H) include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h
GENERATE_PY ?= ./generate.py
GENERATE ?= python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
--fix-extensions $(FIX_EXTENSIONS) \
--unroll-lists \
--unroll-options \
--fix-extensions $(FIX_EXTENSIONS) \
--unroll-lists \
--unroll-options \
--unroll-alternatives \
--replace-whitespace \
$(OPT_CHECK_SYMBOLS) \
$(OPT_CONFIG_FILE) \
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/ */,/g') \
--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/ */,/g') \

View file

@ -4,6 +4,7 @@ EXE_ARGS ?= grammartest.code
PREREQ_BUILD += ytools
FB_NAME = grammartest
NAMESPACE_IN_GENERATED = gt
GENERATE_CONFIG_FILE = generate.conf
include $(TOPDIR)/make/proj.mk
include $(TOPDIR)/make/generate-flex-bison.mk

View file

@ -0,0 +1,9 @@
[symbols]
[white_space[
regex = "[ \n\t\r]+"
]
[test[
dings = bums
regex = "bumsdings"
]

View file

@ -7,6 +7,7 @@
struct context {
int line;
int column;
};
union YYSTYPE;

View file

@ -12,6 +12,8 @@
#include "include/defs.h"
#include "include/grammartest.tab.h"
extern int FB_SYM(debug);
using namespace std;
int main(int argc, const char *argv[])
@ -28,10 +30,14 @@ int main(int argc, const char *argv[])
return 1;
}
// TODO: Initialize this in a generated function
struct context context = {
line: 0
line: 1,
column: 0
};
FB_SYM(debug) = 1;
struct vp_scanner *scanner = FB_SYM(init_scanner)(content.c_str());
int status = FB_SYM(parse)(&context, FB_SYM(scanner_get_data)(scanner));
FB_SYM(cleanup_scanner)(scanner);

View file

@ -13,6 +13,9 @@ from abc import abstractmethod
import os.path
import jwutils
#from jwutils.stree import StringTree, serdes
import jwutils.stree.serdes as serdes
import jwutils.stree.StringTree as StringTree
from jwutils.log import *
@ -1062,6 +1065,18 @@ def grammar_create_ebnf(grammar, opts):
out += ' ' * indent + ' ;\n'
return out
def tokens_from_config(conf):
r = set()
if conf is None:
return r
symbols = conf.get('symbols')
if symbols is None:
return r
for k, v in symbols.iteritems():
if v.get('regex') is not None:
r.add(k)
return r
def format_token(sym, tp):
return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
@ -1078,6 +1093,9 @@ def grammar_create_y(grammar, opts):
spaces += 8
indent = '\t' * (spaces / 8)
conf = opts['config']
tokens = tokens_from_config(conf)
out = ""
# preamble
@ -1099,21 +1117,22 @@ def grammar_create_y(grammar, opts):
out += "\nusing namespace " + opts['namespace'] + ';\n'
#out += textwrap.dedent("""\
# using namespace std;
# namespace {
# typedef vector<const char *> wrap_t;
# const wrap_t curly_braces{ "{", "}" };
# const wrap_t round_braces{ "(", ")" };
# }
# #ifdef __cplusplus
# // extern "C" {
# #endif
out += textwrap.dedent("""\
using namespace std;
namespace {
typedef vector<const char *> wrap_t;
const wrap_t curly_braces{ "{", "}" };
const wrap_t round_braces{ "(", ")" };
}
#ifdef __cplusplus
// extern "C" {
#endif
%}
""")
@ -1125,6 +1144,8 @@ def grammar_create_y(grammar, opts):
types = grammar_get_types(grammar)
for t in types.keys():
if t in tokens:
continue
out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
out += '\n'
@ -1155,15 +1176,22 @@ def grammar_create_y(grammar, opts):
continue
out += format_token(p.sym, t) +'\n'
# regex tokens
# tokens from grammar
out += '\n'
for t, p in grammar.iteritems():
if p.tp == p_literal:
out += format_token(p.sym, t) +'\n'
# tokens from config
for k, t in conf['symbols'].iteritems():
slog(NOTICE, "adding token", k)
out += format_token(k, "blah") + '\n'
# types
out += '\n'
for t, p in grammar.iteritems():
if p.sym in conf['symbols'].keys():
continue
if p.tp == p_ruleset:
out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
@ -1184,6 +1212,8 @@ def grammar_create_y(grammar, opts):
continue
if p.tp == p_special:
continue
if p.sym in conf['symbols'].keys():
continue
slog(INFO, "creating production for symbol", p.str())
#if p.is_lexical_element is True:
@ -1202,6 +1232,7 @@ def grammar_create_y(grammar, opts):
else:
out += indent + "| " + format_yacc_rule(rule) + "\n"
out += indent + "{" + "\n"
out += indent + "\t" + "$$ = new " + opts['namespace'] + '::' + t + ";\n"
out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
tokens = []
for c in rule:
@ -1245,6 +1276,7 @@ def grammar_create_y(grammar, opts):
def grammar_create_l(grammar, opts):
ignore = ""
conf = opts['config']
out = textwrap.dedent("""\
%option reentrant
@ -1283,7 +1315,8 @@ def grammar_create_l(grammar, opts):
%%
\\n { context->line++; }
\\n { context->line++; context->column = 0; REJECT; }
. { context->column++; REJECT; }
""")
@ -1292,7 +1325,10 @@ def grammar_create_l(grammar, opts):
# \. { return T_DOT; }
assert p.term[0] in [ '"', "'" ], p.term
assert p.term[-1] in [ '"', "'" ], p.term
out += re.escape(p.term[1:-1]) + ' { return ' + p.sym + '; }\n'
out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'
for k, v in conf['symbols'].iteritems():
out += v['regex'].value() + ' { slog(PRI_NOTICE, "found regex ' + k + '"); return ' + k + '; }\n'
#out += textwrap.dedent("""\
#
@ -1362,6 +1398,7 @@ def grammar_create_l(grammar, opts):
out += textwrap.dedent("""\
. {
slog(PRI_NOTICE, "returning character %c", yytext[0]);
return yytext[0];
}
@ -1372,7 +1409,7 @@ def grammar_create_l(grammar, opts):
void FB_SYM(error)(struct context *context, void *scanner, const char *msg)
{
struct yyguts_t *yyg =(struct yyguts_t*)scanner;
set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d", msg, yytext, context->line);
set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d:%d", msg, yytext, context->line, context->column);
}
int FB_SYM(wrap)(void *scanner)
@ -1427,6 +1464,7 @@ def grammar_create_l(grammar, opts):
def grammar_create_h(grammar, opts):
out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
ns = opts['namespace']
tokens = tokens_from_config(opts['config'])
if ns is not None:
out += 'namespace ' + ns + '{\n\n'
@ -1435,12 +1473,16 @@ def grammar_create_h(grammar, opts):
# struct forward declarations
for t, members in types.iteritems():
if t in tokens:
continue
if len(members):
out += '\nstruct ' + t + ';'
out += '\n'
# struct / non-struct typedefs
for t, members in types.iteritems():
if t in tokens:
continue
if not len(members):
out += '\ntypedef const char ' + t + '_t;'
continue
@ -1449,6 +1491,8 @@ def grammar_create_h(grammar, opts):
# struct definitions
for t, rules in types.iteritems():
if t in tokens:
continue
if not len(rules):
continue
out += '\n\nstruct ' + t + ' {\n'
@ -1524,6 +1568,7 @@ class GrammarCmd(jwutils.Cmd):
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
p.add_argument('-f', '--config-file', help='config file', nargs='?', default=None)
return p
def processGrammar(self, args, grammar):
@ -1590,7 +1635,8 @@ class CmdCreate(DerivedGrammarCmd):
return p
def _run(self, args, grammar):
name, ext = os.path.splitext(args.output)[1]
name, ext = os.path.splitext(args.output)
ext = ext[1:]
#cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
mip = None
if ext == 'h':
@ -1598,6 +1644,11 @@ class CmdCreate(DerivedGrammarCmd):
includes = args.includes.split(',')
config = None
if args.config_file is not None:
config = serdes.read(args.config_file)
config.dump(ERR)
# generated code breaks without this, not sure why
if ext == 'l':
tmp = []
@ -1606,13 +1657,14 @@ class CmdCreate(DerivedGrammarCmd):
tmp.append(f)
includes = tmp
cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
opts = {
"namespace" : args.namespace,
"includes" : includes,
"mip" : mip
"mip" : mip,
"config" : config
}
cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
out = cmd(grammar, opts)
print(out)