From 1a7a34f73cca527d9e3a29557b42c543b5ca632e Mon Sep 17 00:00:00 2001 From: Jan Lindemann Date: Thu, 2 Nov 2017 08:54:39 +0100 Subject: [PATCH] grammar.py and friends: Implement config file support Signed-off-by: Jan Lindemann --- make/generate-flex-bison.mk | 12 +++-- test/grammar/Makefile | 1 + test/grammar/generate.conf | 9 ++++ test/grammar/include/defs.h | 1 + test/grammar/main.cpp | 8 ++- tools/python/jwutils/grammar.py | 96 +++++++++++++++++++++++++-------- 6 files changed, 100 insertions(+), 27 deletions(-) create mode 100644 test/grammar/generate.conf diff --git a/make/generate-flex-bison.mk b/make/generate-flex-bison.mk index 5387554..5f05d7f 100644 --- a/make/generate-flex-bison.mk +++ b/make/generate-flex-bison.mk @@ -12,6 +12,10 @@ ifneq ($(CHECK_SYMBOLS),) OPT_CHECK_SYMBOLS ?= --check-symbols='$(CHECK_SYMBOLS)' endif +ifneq ($(GENERATE_CONFIG_FILE),) + OPT_CONFIG_FILE ?= --config-file=$(GENERATE_CONFIG_FILE) +endif + GENERATED_STD += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf $(FB_COMMON_H) GENERATED += $(FB_NAME)-dense.ebnf $(GENERATED_STD) GRAMMAR_INPUT ?= $(FB_NAME)-input.ebnf @@ -25,15 +29,15 @@ FB_COMMON_H ?= $(FB_HDRDIR)/$(FB_NAME).h INCLUDED_BY_GENERATED += include/defs.h $(FB_COMMON_H) include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h - GENERATE_PY ?= ./generate.py GENERATE ?= python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \ - --fix-extensions $(FIX_EXTENSIONS) \ - --unroll-lists \ - --unroll-options \ + --fix-extensions $(FIX_EXTENSIONS) \ + --unroll-lists \ + --unroll-options \ --unroll-alternatives \ --replace-whitespace \ $(OPT_CHECK_SYMBOLS) \ + $(OPT_CONFIG_FILE) \ --trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \ --cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/ */,/g') \ --irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/ */,/g') \ diff --git a/test/grammar/Makefile b/test/grammar/Makefile index fd991fc..9d687e8 100644 --- a/test/grammar/Makefile +++ b/test/grammar/Makefile @@ -4,6 +4,7 @@ EXE_ARGS ?= grammartest.code PREREQ_BUILD += ytools FB_NAME = grammartest NAMESPACE_IN_GENERATED = gt +GENERATE_CONFIG_FILE = generate.conf include $(TOPDIR)/make/proj.mk include $(TOPDIR)/make/generate-flex-bison.mk diff --git a/test/grammar/generate.conf b/test/grammar/generate.conf new file mode 100644 index 0000000..6e5dfa0 --- /dev/null +++ b/test/grammar/generate.conf @@ -0,0 +1,9 @@ +[symbols] + [white_space[ + regex = "[ \n\t\r]+" + ] + [test[ + dings = bums + regex = "bumsdings" + ] + diff --git a/test/grammar/include/defs.h b/test/grammar/include/defs.h index b677234..7ce3ba8 100644 --- a/test/grammar/include/defs.h +++ b/test/grammar/include/defs.h @@ -7,6 +7,7 @@ struct context { int line; + int column; }; union YYSTYPE; diff --git a/test/grammar/main.cpp b/test/grammar/main.cpp index 88bf135..e25b430 100644 --- a/test/grammar/main.cpp +++ b/test/grammar/main.cpp @@ -12,6 +12,8 @@ #include "include/defs.h" #include "include/grammartest.tab.h" +extern int FB_SYM(debug); + using namespace std; int main(int argc, const char *argv[]) @@ -28,10 +30,14 @@ int main(int argc, const char *argv[]) return 1; } + // TODO: Initialize this in a generated function struct context context = { - line: 0 + line: 1, + column: 0 }; + FB_SYM(debug) = 1; + struct vp_scanner *scanner = FB_SYM(init_scanner)(content.c_str()); int status = FB_SYM(parse)(&context, FB_SYM(scanner_get_data)(scanner)); FB_SYM(cleanup_scanner)(scanner); diff --git a/tools/python/jwutils/grammar.py b/tools/python/jwutils/grammar.py index de10305..1567521 100644 --- a/tools/python/jwutils/grammar.py +++ b/tools/python/jwutils/grammar.py @@ -13,6 +13,9 @@ from abc import abstractmethod import os.path import jwutils +#from jwutils.stree import StringTree, serdes +import jwutils.stree.serdes as serdes +import jwutils.stree.StringTree as StringTree from jwutils.log import * @@ -1062,6 +1065,18 @@ def grammar_create_ebnf(grammar, opts): out += ' ' * indent + ' ;\n' return out +def tokens_from_config(conf): + r = set() + if conf is None: + return r + symbols = conf.get('symbols') + if symbols is None: + return r + for k, v in symbols.iteritems(): + if v.get('regex') is not None: + r.add(k) + return r + def format_token(sym, tp): return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */' @@ -1078,6 +1093,9 @@ def grammar_create_y(grammar, opts): spaces += 8 indent = '\t' * (spaces / 8) + conf = opts['config'] + tokens = tokens_from_config(conf) + out = "" # preamble @@ -1099,21 +1117,22 @@ def grammar_create_y(grammar, opts): out += "\nusing namespace " + opts['namespace'] + ';\n' + #out += textwrap.dedent("""\ + # using namespace std; + + # namespace { + + # typedef vector wrap_t; + # const wrap_t curly_braces{ "{", "}" }; + # const wrap_t round_braces{ "(", ")" }; + + # } + + # #ifdef __cplusplus + # // extern "C" { + # #endif + out += textwrap.dedent("""\ - using namespace std; - - namespace { - - typedef vector wrap_t; - const wrap_t curly_braces{ "{", "}" }; - const wrap_t round_braces{ "(", ")" }; - - } - - #ifdef __cplusplus - // extern "C" { - #endif - %} """) @@ -1125,6 +1144,8 @@ def grammar_create_y(grammar, opts): types = grammar_get_types(grammar) for t in types.keys(): + if t in tokens: + continue out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';' out += '\n' @@ -1155,15 +1176,22 @@ def grammar_create_y(grammar, opts): continue out += format_token(p.sym, t) +'\n' - # regex tokens + # tokens from grammar out += '\n' for t, p in grammar.iteritems(): if p.tp == p_literal: out += format_token(p.sym, t) +'\n' + # tokens from config + for k, t in conf['symbols'].iteritems(): + slog(NOTICE, "adding token", k) + out += format_token(k, "blah") + '\n' + # types out += '\n' for t, p in grammar.iteritems(): + if p.sym in conf['symbols'].keys(): + continue if p.tp == p_ruleset: out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n' @@ -1184,6 +1212,8 @@ def grammar_create_y(grammar, opts): continue if p.tp == p_special: continue + if p.sym in conf['symbols'].keys(): + continue slog(INFO, "creating production for symbol", p.str()) #if p.is_lexical_element is True: @@ -1202,6 +1232,7 @@ def grammar_create_y(grammar, opts): else: out += indent + "| " + format_yacc_rule(rule) + "\n" out += indent + "{" + "\n" + out += indent + "\t" + "$$ = new " + opts['namespace'] + '::' + t + ";\n" out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n" tokens = [] for c in rule: @@ -1245,6 +1276,7 @@ def grammar_create_y(grammar, opts): def grammar_create_l(grammar, opts): ignore = "" + conf = opts['config'] out = textwrap.dedent("""\ %option reentrant @@ -1283,7 +1315,8 @@ def grammar_create_l(grammar, opts): %% - \\n { context->line++; } + \\n { context->line++; context->column = 0; REJECT; } + . { context->column++; REJECT; } """) @@ -1292,7 +1325,10 @@ def grammar_create_l(grammar, opts): # \. { return T_DOT; } assert p.term[0] in [ '"', "'" ], p.term assert p.term[-1] in [ '"', "'" ], p.term - out += re.escape(p.term[1:-1]) + ' { return ' + p.sym + '; }\n' + out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n' + + for k, v in conf['symbols'].iteritems(): + out += v['regex'].value() + ' { slog(PRI_NOTICE, "found regex ' + k + '"); return ' + k + '; }\n' #out += textwrap.dedent("""\ # @@ -1362,6 +1398,7 @@ def grammar_create_l(grammar, opts): out += textwrap.dedent("""\ . { + slog(PRI_NOTICE, "returning character %c", yytext[0]); return yytext[0]; } @@ -1372,7 +1409,7 @@ def grammar_create_l(grammar, opts): void FB_SYM(error)(struct context *context, void *scanner, const char *msg) { struct yyguts_t *yyg =(struct yyguts_t*)scanner; - set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d", msg, yytext, context->line); + set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d:%d", msg, yytext, context->line, context->column); } int FB_SYM(wrap)(void *scanner) @@ -1427,6 +1464,7 @@ def grammar_create_l(grammar, opts): def grammar_create_h(grammar, opts): out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n' ns = opts['namespace'] + tokens = tokens_from_config(opts['config']) if ns is not None: out += 'namespace ' + ns + '{\n\n' @@ -1435,12 +1473,16 @@ def grammar_create_h(grammar, opts): # struct forward declarations for t, members in types.iteritems(): + if t in tokens: + continue if len(members): out += '\nstruct ' + t + ';' out += '\n' # struct / non-struct typedefs for t, members in types.iteritems(): + if t in tokens: + continue if not len(members): out += '\ntypedef const char ' + t + '_t;' continue @@ -1449,6 +1491,8 @@ def grammar_create_h(grammar, opts): # struct definitions for t, rules in types.iteritems(): + if t in tokens: + continue if not len(rules): continue out += '\n\nstruct ' + t + ' {\n' @@ -1524,6 +1568,7 @@ class GrammarCmd(jwutils.Cmd): p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='') p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='') p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='') + p.add_argument('-f', '--config-file', help='config file', nargs='?', default=None) return p def processGrammar(self, args, grammar): @@ -1590,7 +1635,8 @@ class CmdCreate(DerivedGrammarCmd): return p def _run(self, args, grammar): - name, ext = os.path.splitext(args.output)[1] + name, ext = os.path.splitext(args.output) + ext = ext[1:] #cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output)) mip = None if ext == 'h': @@ -1598,6 +1644,11 @@ class CmdCreate(DerivedGrammarCmd): includes = args.includes.split(',') + config = None + if args.config_file is not None: + config = serdes.read(args.config_file) + config.dump(ERR) + # generated code breaks without this, not sure why if ext == 'l': tmp = [] @@ -1606,13 +1657,14 @@ class CmdCreate(DerivedGrammarCmd): tmp.append(f) includes = tmp - cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext) - opts = { "namespace" : args.namespace, "includes" : includes, - "mip" : mip + "mip" : mip, + "config" : config } + + cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext) out = cmd(grammar, opts) print(out)