mirror of
ssh://git.janware.com/srv/git/janware/proj/jw-python
synced 2026-01-15 09:53:32 +01:00
grammar.py and friends: Implement config file support
Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
parent
214c222002
commit
1a7a34f73c
6 changed files with 100 additions and 27 deletions
|
|
@ -12,6 +12,10 @@ ifneq ($(CHECK_SYMBOLS),)
|
||||||
OPT_CHECK_SYMBOLS ?= --check-symbols='$(CHECK_SYMBOLS)'
|
OPT_CHECK_SYMBOLS ?= --check-symbols='$(CHECK_SYMBOLS)'
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq ($(GENERATE_CONFIG_FILE),)
|
||||||
|
OPT_CONFIG_FILE ?= --config-file=$(GENERATE_CONFIG_FILE)
|
||||||
|
endif
|
||||||
|
|
||||||
GENERATED_STD += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf $(FB_COMMON_H)
|
GENERATED_STD += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf $(FB_COMMON_H)
|
||||||
GENERATED += $(FB_NAME)-dense.ebnf $(GENERATED_STD)
|
GENERATED += $(FB_NAME)-dense.ebnf $(GENERATED_STD)
|
||||||
GRAMMAR_INPUT ?= $(FB_NAME)-input.ebnf
|
GRAMMAR_INPUT ?= $(FB_NAME)-input.ebnf
|
||||||
|
|
@ -25,15 +29,15 @@ FB_COMMON_H ?= $(FB_HDRDIR)/$(FB_NAME).h
|
||||||
|
|
||||||
INCLUDED_BY_GENERATED += include/defs.h $(FB_COMMON_H) include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h
|
INCLUDED_BY_GENERATED += include/defs.h $(FB_COMMON_H) include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h
|
||||||
|
|
||||||
|
|
||||||
GENERATE_PY ?= ./generate.py
|
GENERATE_PY ?= ./generate.py
|
||||||
GENERATE ?= python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
|
GENERATE ?= python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
|
||||||
--fix-extensions $(FIX_EXTENSIONS) \
|
--fix-extensions $(FIX_EXTENSIONS) \
|
||||||
--unroll-lists \
|
--unroll-lists \
|
||||||
--unroll-options \
|
--unroll-options \
|
||||||
--unroll-alternatives \
|
--unroll-alternatives \
|
||||||
--replace-whitespace \
|
--replace-whitespace \
|
||||||
$(OPT_CHECK_SYMBOLS) \
|
$(OPT_CHECK_SYMBOLS) \
|
||||||
|
$(OPT_CONFIG_FILE) \
|
||||||
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
||||||
--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/ */,/g') \
|
--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/ */,/g') \
|
||||||
--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/ */,/g') \
|
--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/ */,/g') \
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ EXE_ARGS ?= grammartest.code
|
||||||
PREREQ_BUILD += ytools
|
PREREQ_BUILD += ytools
|
||||||
FB_NAME = grammartest
|
FB_NAME = grammartest
|
||||||
NAMESPACE_IN_GENERATED = gt
|
NAMESPACE_IN_GENERATED = gt
|
||||||
|
GENERATE_CONFIG_FILE = generate.conf
|
||||||
|
|
||||||
include $(TOPDIR)/make/proj.mk
|
include $(TOPDIR)/make/proj.mk
|
||||||
include $(TOPDIR)/make/generate-flex-bison.mk
|
include $(TOPDIR)/make/generate-flex-bison.mk
|
||||||
|
|
|
||||||
9
test/grammar/generate.conf
Normal file
9
test/grammar/generate.conf
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
[symbols]
|
||||||
|
[white_space[
|
||||||
|
regex = "[ \n\t\r]+"
|
||||||
|
]
|
||||||
|
[test[
|
||||||
|
dings = bums
|
||||||
|
regex = "bumsdings"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
struct context {
|
struct context {
|
||||||
int line;
|
int line;
|
||||||
|
int column;
|
||||||
};
|
};
|
||||||
|
|
||||||
union YYSTYPE;
|
union YYSTYPE;
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@
|
||||||
#include "include/defs.h"
|
#include "include/defs.h"
|
||||||
#include "include/grammartest.tab.h"
|
#include "include/grammartest.tab.h"
|
||||||
|
|
||||||
|
extern int FB_SYM(debug);
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int main(int argc, const char *argv[])
|
int main(int argc, const char *argv[])
|
||||||
|
|
@ -28,10 +30,14 @@ int main(int argc, const char *argv[])
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Initialize this in a generated function
|
||||||
struct context context = {
|
struct context context = {
|
||||||
line: 0
|
line: 1,
|
||||||
|
column: 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
FB_SYM(debug) = 1;
|
||||||
|
|
||||||
struct vp_scanner *scanner = FB_SYM(init_scanner)(content.c_str());
|
struct vp_scanner *scanner = FB_SYM(init_scanner)(content.c_str());
|
||||||
int status = FB_SYM(parse)(&context, FB_SYM(scanner_get_data)(scanner));
|
int status = FB_SYM(parse)(&context, FB_SYM(scanner_get_data)(scanner));
|
||||||
FB_SYM(cleanup_scanner)(scanner);
|
FB_SYM(cleanup_scanner)(scanner);
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,9 @@ from abc import abstractmethod
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
import jwutils
|
import jwutils
|
||||||
|
#from jwutils.stree import StringTree, serdes
|
||||||
|
import jwutils.stree.serdes as serdes
|
||||||
|
import jwutils.stree.StringTree as StringTree
|
||||||
|
|
||||||
from jwutils.log import *
|
from jwutils.log import *
|
||||||
|
|
||||||
|
|
@ -1062,6 +1065,18 @@ def grammar_create_ebnf(grammar, opts):
|
||||||
out += ' ' * indent + ' ;\n'
|
out += ' ' * indent + ' ;\n'
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def tokens_from_config(conf):
|
||||||
|
r = set()
|
||||||
|
if conf is None:
|
||||||
|
return r
|
||||||
|
symbols = conf.get('symbols')
|
||||||
|
if symbols is None:
|
||||||
|
return r
|
||||||
|
for k, v in symbols.iteritems():
|
||||||
|
if v.get('regex') is not None:
|
||||||
|
r.add(k)
|
||||||
|
return r
|
||||||
|
|
||||||
def format_token(sym, tp):
|
def format_token(sym, tp):
|
||||||
return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
|
return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
|
||||||
|
|
||||||
|
|
@ -1078,6 +1093,9 @@ def grammar_create_y(grammar, opts):
|
||||||
spaces += 8
|
spaces += 8
|
||||||
indent = '\t' * (spaces / 8)
|
indent = '\t' * (spaces / 8)
|
||||||
|
|
||||||
|
conf = opts['config']
|
||||||
|
tokens = tokens_from_config(conf)
|
||||||
|
|
||||||
out = ""
|
out = ""
|
||||||
|
|
||||||
# preamble
|
# preamble
|
||||||
|
|
@ -1099,21 +1117,22 @@ def grammar_create_y(grammar, opts):
|
||||||
|
|
||||||
out += "\nusing namespace " + opts['namespace'] + ';\n'
|
out += "\nusing namespace " + opts['namespace'] + ';\n'
|
||||||
|
|
||||||
|
#out += textwrap.dedent("""\
|
||||||
|
# using namespace std;
|
||||||
|
|
||||||
|
# namespace {
|
||||||
|
|
||||||
|
# typedef vector<const char *> wrap_t;
|
||||||
|
# const wrap_t curly_braces{ "{", "}" };
|
||||||
|
# const wrap_t round_braces{ "(", ")" };
|
||||||
|
|
||||||
|
# }
|
||||||
|
|
||||||
|
# #ifdef __cplusplus
|
||||||
|
# // extern "C" {
|
||||||
|
# #endif
|
||||||
|
|
||||||
out += textwrap.dedent("""\
|
out += textwrap.dedent("""\
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
typedef vector<const char *> wrap_t;
|
|
||||||
const wrap_t curly_braces{ "{", "}" };
|
|
||||||
const wrap_t round_braces{ "(", ")" };
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
// extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
@ -1125,6 +1144,8 @@ def grammar_create_y(grammar, opts):
|
||||||
|
|
||||||
types = grammar_get_types(grammar)
|
types = grammar_get_types(grammar)
|
||||||
for t in types.keys():
|
for t in types.keys():
|
||||||
|
if t in tokens:
|
||||||
|
continue
|
||||||
out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
|
out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
|
||||||
out += '\n'
|
out += '\n'
|
||||||
|
|
||||||
|
|
@ -1155,15 +1176,22 @@ def grammar_create_y(grammar, opts):
|
||||||
continue
|
continue
|
||||||
out += format_token(p.sym, t) +'\n'
|
out += format_token(p.sym, t) +'\n'
|
||||||
|
|
||||||
# regex tokens
|
# tokens from grammar
|
||||||
out += '\n'
|
out += '\n'
|
||||||
for t, p in grammar.iteritems():
|
for t, p in grammar.iteritems():
|
||||||
if p.tp == p_literal:
|
if p.tp == p_literal:
|
||||||
out += format_token(p.sym, t) +'\n'
|
out += format_token(p.sym, t) +'\n'
|
||||||
|
|
||||||
|
# tokens from config
|
||||||
|
for k, t in conf['symbols'].iteritems():
|
||||||
|
slog(NOTICE, "adding token", k)
|
||||||
|
out += format_token(k, "blah") + '\n'
|
||||||
|
|
||||||
# types
|
# types
|
||||||
out += '\n'
|
out += '\n'
|
||||||
for t, p in grammar.iteritems():
|
for t, p in grammar.iteritems():
|
||||||
|
if p.sym in conf['symbols'].keys():
|
||||||
|
continue
|
||||||
if p.tp == p_ruleset:
|
if p.tp == p_ruleset:
|
||||||
out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
|
out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
|
||||||
|
|
||||||
|
|
@ -1184,6 +1212,8 @@ def grammar_create_y(grammar, opts):
|
||||||
continue
|
continue
|
||||||
if p.tp == p_special:
|
if p.tp == p_special:
|
||||||
continue
|
continue
|
||||||
|
if p.sym in conf['symbols'].keys():
|
||||||
|
continue
|
||||||
slog(INFO, "creating production for symbol", p.str())
|
slog(INFO, "creating production for symbol", p.str())
|
||||||
|
|
||||||
#if p.is_lexical_element is True:
|
#if p.is_lexical_element is True:
|
||||||
|
|
@ -1202,6 +1232,7 @@ def grammar_create_y(grammar, opts):
|
||||||
else:
|
else:
|
||||||
out += indent + "| " + format_yacc_rule(rule) + "\n"
|
out += indent + "| " + format_yacc_rule(rule) + "\n"
|
||||||
out += indent + "{" + "\n"
|
out += indent + "{" + "\n"
|
||||||
|
out += indent + "\t" + "$$ = new " + opts['namespace'] + '::' + t + ";\n"
|
||||||
out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
|
out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
|
||||||
tokens = []
|
tokens = []
|
||||||
for c in rule:
|
for c in rule:
|
||||||
|
|
@ -1245,6 +1276,7 @@ def grammar_create_y(grammar, opts):
|
||||||
def grammar_create_l(grammar, opts):
|
def grammar_create_l(grammar, opts):
|
||||||
|
|
||||||
ignore = ""
|
ignore = ""
|
||||||
|
conf = opts['config']
|
||||||
|
|
||||||
out = textwrap.dedent("""\
|
out = textwrap.dedent("""\
|
||||||
%option reentrant
|
%option reentrant
|
||||||
|
|
@ -1283,7 +1315,8 @@ def grammar_create_l(grammar, opts):
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
\\n { context->line++; }
|
\\n { context->line++; context->column = 0; REJECT; }
|
||||||
|
. { context->column++; REJECT; }
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
@ -1292,7 +1325,10 @@ def grammar_create_l(grammar, opts):
|
||||||
# \. { return T_DOT; }
|
# \. { return T_DOT; }
|
||||||
assert p.term[0] in [ '"', "'" ], p.term
|
assert p.term[0] in [ '"', "'" ], p.term
|
||||||
assert p.term[-1] in [ '"', "'" ], p.term
|
assert p.term[-1] in [ '"', "'" ], p.term
|
||||||
out += re.escape(p.term[1:-1]) + ' { return ' + p.sym + '; }\n'
|
out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'
|
||||||
|
|
||||||
|
for k, v in conf['symbols'].iteritems():
|
||||||
|
out += v['regex'].value() + ' { slog(PRI_NOTICE, "found regex ' + k + '"); return ' + k + '; }\n'
|
||||||
|
|
||||||
#out += textwrap.dedent("""\
|
#out += textwrap.dedent("""\
|
||||||
#
|
#
|
||||||
|
|
@ -1362,6 +1398,7 @@ def grammar_create_l(grammar, opts):
|
||||||
|
|
||||||
out += textwrap.dedent("""\
|
out += textwrap.dedent("""\
|
||||||
. {
|
. {
|
||||||
|
slog(PRI_NOTICE, "returning character %c", yytext[0]);
|
||||||
return yytext[0];
|
return yytext[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1372,7 +1409,7 @@ def grammar_create_l(grammar, opts):
|
||||||
void FB_SYM(error)(struct context *context, void *scanner, const char *msg)
|
void FB_SYM(error)(struct context *context, void *scanner, const char *msg)
|
||||||
{
|
{
|
||||||
struct yyguts_t *yyg =(struct yyguts_t*)scanner;
|
struct yyguts_t *yyg =(struct yyguts_t*)scanner;
|
||||||
set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d", msg, yytext, context->line);
|
set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d:%d", msg, yytext, context->line, context->column);
|
||||||
}
|
}
|
||||||
|
|
||||||
int FB_SYM(wrap)(void *scanner)
|
int FB_SYM(wrap)(void *scanner)
|
||||||
|
|
@ -1427,6 +1464,7 @@ def grammar_create_l(grammar, opts):
|
||||||
def grammar_create_h(grammar, opts):
|
def grammar_create_h(grammar, opts):
|
||||||
out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
|
out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
|
||||||
ns = opts['namespace']
|
ns = opts['namespace']
|
||||||
|
tokens = tokens_from_config(opts['config'])
|
||||||
|
|
||||||
if ns is not None:
|
if ns is not None:
|
||||||
out += 'namespace ' + ns + '{\n\n'
|
out += 'namespace ' + ns + '{\n\n'
|
||||||
|
|
@ -1435,12 +1473,16 @@ def grammar_create_h(grammar, opts):
|
||||||
|
|
||||||
# struct forward declarations
|
# struct forward declarations
|
||||||
for t, members in types.iteritems():
|
for t, members in types.iteritems():
|
||||||
|
if t in tokens:
|
||||||
|
continue
|
||||||
if len(members):
|
if len(members):
|
||||||
out += '\nstruct ' + t + ';'
|
out += '\nstruct ' + t + ';'
|
||||||
out += '\n'
|
out += '\n'
|
||||||
|
|
||||||
# struct / non-struct typedefs
|
# struct / non-struct typedefs
|
||||||
for t, members in types.iteritems():
|
for t, members in types.iteritems():
|
||||||
|
if t in tokens:
|
||||||
|
continue
|
||||||
if not len(members):
|
if not len(members):
|
||||||
out += '\ntypedef const char ' + t + '_t;'
|
out += '\ntypedef const char ' + t + '_t;'
|
||||||
continue
|
continue
|
||||||
|
|
@ -1449,6 +1491,8 @@ def grammar_create_h(grammar, opts):
|
||||||
|
|
||||||
# struct definitions
|
# struct definitions
|
||||||
for t, rules in types.iteritems():
|
for t, rules in types.iteritems():
|
||||||
|
if t in tokens:
|
||||||
|
continue
|
||||||
if not len(rules):
|
if not len(rules):
|
||||||
continue
|
continue
|
||||||
out += '\n\nstruct ' + t + ' {\n'
|
out += '\n\nstruct ' + t + ' {\n'
|
||||||
|
|
@ -1524,6 +1568,7 @@ class GrammarCmd(jwutils.Cmd):
|
||||||
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
|
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
|
||||||
p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
|
p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
|
||||||
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
|
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
|
||||||
|
p.add_argument('-f', '--config-file', help='config file', nargs='?', default=None)
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def processGrammar(self, args, grammar):
|
def processGrammar(self, args, grammar):
|
||||||
|
|
@ -1590,7 +1635,8 @@ class CmdCreate(DerivedGrammarCmd):
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def _run(self, args, grammar):
|
def _run(self, args, grammar):
|
||||||
name, ext = os.path.splitext(args.output)[1]
|
name, ext = os.path.splitext(args.output)
|
||||||
|
ext = ext[1:]
|
||||||
#cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
|
#cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
|
||||||
mip = None
|
mip = None
|
||||||
if ext == 'h':
|
if ext == 'h':
|
||||||
|
|
@ -1598,6 +1644,11 @@ class CmdCreate(DerivedGrammarCmd):
|
||||||
|
|
||||||
includes = args.includes.split(',')
|
includes = args.includes.split(',')
|
||||||
|
|
||||||
|
config = None
|
||||||
|
if args.config_file is not None:
|
||||||
|
config = serdes.read(args.config_file)
|
||||||
|
config.dump(ERR)
|
||||||
|
|
||||||
# generated code breaks without this, not sure why
|
# generated code breaks without this, not sure why
|
||||||
if ext == 'l':
|
if ext == 'l':
|
||||||
tmp = []
|
tmp = []
|
||||||
|
|
@ -1606,13 +1657,14 @@ class CmdCreate(DerivedGrammarCmd):
|
||||||
tmp.append(f)
|
tmp.append(f)
|
||||||
includes = tmp
|
includes = tmp
|
||||||
|
|
||||||
cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
|
|
||||||
|
|
||||||
opts = {
|
opts = {
|
||||||
"namespace" : args.namespace,
|
"namespace" : args.namespace,
|
||||||
"includes" : includes,
|
"includes" : includes,
|
||||||
"mip" : mip
|
"mip" : mip,
|
||||||
|
"config" : config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
|
||||||
out = cmd(grammar, opts)
|
out = cmd(grammar, opts)
|
||||||
print(out)
|
print(out)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue