grammar.py et al: Centralize more code

More code is removed from the special parser directories and centralized
into grammar.py, Cmd.py, and generate-flex-bison.mk.

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-10-29 18:25:55 +01:00
commit 6297f10f55
6 changed files with 210 additions and 177 deletions

1
.gitignore vendored
View file

@ -13,3 +13,4 @@ jw-python
*.secret *.secret
local.mk local.mk
.gdb_history .gdb_history
*.pyc

View file

@ -0,0 +1,74 @@
GENERATED_STD += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf include/$(FB_NAME).h
INCLUDED_BY_GENERATED += include/defs.h include/$(FB_NAME).h include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h
NAMESPACE_IN_GENERATED ?= $(FB_NAME)
# These types are meant to be cut off the tree and turned into hand coded flex
# regexes
#TRIM_SYMBOLS = blah
TRIM_SYMBOLS ?=
GENERATE_LOG_LEVEL ?= notice
FIX_EXTENSIONS ?= discard
CHECK_SYMBOLS ?= all
ifneq ($(CHECK_SYMBOLS),)
OPT_CHECK_SYMBOLS ?= --check-symbols='$(CHECK_SYMBOLS)'
endif
GRAMMAR_INPUT ?= $(FB_NAME)-input.ebnf
GENERATED = $(FB_NAME)-dense.ebnf $(GENERATED_STD)
GENERATE_PY = ./generate.py
GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
--fix-extensions $(FIX_EXTENSIONS) \
--unroll-lists \
--unroll-options \
--unroll-alternatives \
--replace-whitespace \
$(OPT_CHECK_SYMBOLS) \
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
--namespace=$(NAMESPACE_IN_GENERATED) \
--includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/ */,/g') \
$(CREATE_EXTRA_ARGS)
FB_NAME_PREFIX ?= $(FB_NAME)_
FB_HDRDIR ?= include
FB_BISON_OUT_EXT ?= cpp
FB_FLEX_OUT_EXT ?= cpp
FB_CASE_INSENSITIVE ?= true
FB_SRC ?= $(filter %.y %.l,$(GENERATED))
include $(TOPDIR)/make/proj.mk
include $(MODDIR)/make/flex-bison.mk
include $(MODDIR)/make/py-defs.mk
all:
debug-all:
GENERATE_LOG_LEVEL=debug make all 2>&1 | tee run.out
generate: $(GENERATED)
$(FB_NAME).y: include/$(FB_NAME).h
lex.$(FB_NAME).c: $(FB_NAME).l
check: $(GRAMMAR_INPUT) $(GENERATE_PY) Makefile
python ./$(GENERATE_PY) --log-level info check --fix-extensions unroll --unroll-lists --unroll-options $(OPT_CHECK_SYMBOLS) $<
$(FB_NAME)-dense.ebnf: $(GRAMMAR_INPUT) $(GENERATE_PY)
python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create --fix-extensions keep $< $(FB_NAME).ebnf > $@.tmp
mv $@.tmp $@
define generate_rule
$(1): $$(GRAMMAR_INPUT) $$(GENERATE_PY) Makefile
$$(GENERATE) $$< $$(patsubst $(FB_NAME).%,$(FB_NAME).%,$$@) > $$@.tmp
mv $$@.tmp $$@
endef
$(foreach target,$(GENERATED_STD),$(eval $(call generate_rule,$(target))))
clean.generated:
rm -f $(GENERATED)
clean: clean.generated
echo-generated:
@echo GENERATED = $(GENERATED)
help:
$(GENERATE) --help
expand-macros:
make 2>/dev/null | sed '/g++/ !d; s/g++\|gcc//; s/-o .*//' | xargs g++ -E -C | indent

View file

@ -2,74 +2,9 @@ TOPDIR = ../..
USE_PROJECT_LIB = true USE_PROJECT_LIB = true
MEMBERS += local.a($(OBJ)) MEMBERS += local.a($(OBJ))
PREREQ_BUILD += ytools
GENERATED_STD = grammartest.l grammartest.y grammartest.ebnf include/grammartest.h FB_NAME = grammartest
NAMESPACE_IN_GENERATED = gt
# These types are meant to be cut off the tree and turned into hand coded flex
# regexes
#TRIM_SYMBOLS = blah
TRIM_SYMBOLS =
GENERATE_LOG_LEVEL ?= notice
FIX_EXTENSIONS ?= discard
CHECK_SYMBOLS ?= --check-symbols=all
GRAMMAR_INPUT ?= grammartest-input.ebnf
GENERATED = grammartest-dense.ebnf $(GENERATED_STD)
GENERATE_PY = ./generate.py
GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
--fix-extensions $(FIX_EXTENSIONS) \
--unroll-lists \
--unroll-options \
--unroll-alternatives \
--replace-whitespace \
$(CHECK_SYMBOLS) \
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
$(CREATE_EXTRA_ARGS)
CHECK_SYMBOLS ?= special_character
FB_NAME_PREFIX ?= grammartest_
FB_HDRDIR ?= include
FB_BISON_OUT_EXT ?= cpp
FB_FLEX_OUT_EXT ?= cpp
FB_CASE_INSENSITIVE ?= true
FB_SRC ?= $(filter %.y %.l,$(GENERATED))
include $(TOPDIR)/make/proj.mk include $(TOPDIR)/make/proj.mk
include $(MODDIR)/make/flex-bison.mk include $(TOPDIR)/make/generate-flex-bison.mk
include $(MODDIR)/make/py-defs.mk
all:
debug-all:
GENERATE_LOG_LEVEL=debug make all 2>&1 | tee run.out
generate: $(GENERATED)
grammartest.y: include/grammartest.h
lex.grammartest.c: grammartest.l
check: $(GRAMMAR_INPUT) $(GENERATE_PY) Makefile
python ./$(GENERATE_PY) --log-level info check --fix-extensions unroll --unroll-lists --unroll-options --check-symbols='$(CHECK_SYMBOLS)' $<
grammartest-dense.ebnf: $(GRAMMAR_INPUT) $(GENERATE_PY)
python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create --fix-extensions keep $< grammartest.ebnf > $@.tmp
mv $@.tmp $@
define generate_rule
$(1): $$(GRAMMAR_INPUT) $$(GENERATE_PY) Makefile
$$(GENERATE) $$< $$(patsubst grammartest.%,grammartest.%,$$@) > $$@.tmp
mv $$@.tmp $$@
endef
$(foreach target,$(GENERATED_STD),$(eval $(call generate_rule,$(target))))
clean.generated:
rm -f $(GENERATED)
clean: clean.generated
echo-generated:
@echo GENERATED = $(GENERATED)
help:
$(GENERATE) --help
expand-macros:
make 2>/dev/null | sed '/g++/ !d; s/g++\|gcc//; s/-o .*//' | xargs g++ -E -C | indent

View file

@ -1,79 +1,6 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import print_function
import argparse
import sys
import re
import textwrap
from collections import OrderedDict
from abc import abstractmethod
import jwutils import jwutils
from jwutils.log import * jwutils.run_sub_commands('generate Test parser files', modules = ['jwutils.grammar'])
from jwutils import grammar
base = 'grammartest'
mip = '_JW_PYTHON_' + base + base.upper()
namespace = base
def create_grammartest_ebnf(grammar):
print(jwutils.grammar.create_ebnf(grammar))
def create_grammartest_y(grammar):
print(jwutils.grammar.create_yacc(grammar))
def create_grammartest_l(grammar):
print(jwutils.grammar.create_lex(grammar))
def create_include_grammartest_h(grammar):
print(jwutils.grammar.create_header(grammar, mip=mip, namespace=namespace))
class GrammarCmd(jwutils.grammar.GrammarCmd):
def __init__(self, name, help):
super(GrammarCmd, self).__init__(name, help=help)
@abstractmethod
def _run(self, grammar):
pass
def add_parser(self, parsers):
p = super(GrammarCmd, self).add_parser(parsers)
return p
def run(self, args):
with open(args.input, 'r') as infile:
contents = infile.read()
grammar = jwutils.grammar.grammar_parse_ebnf(contents)
grammar = super(GrammarCmd, self).processGrammar(args, grammar)
self._run(args, grammar)
class CmdCreate(GrammarCmd):
def __init__(self):
super(CmdCreate, self).__init__("create", help="Create a file")
def add_parser(self, parsers):
p = super(CmdCreate, self).add_parser(parsers)
p.add_argument("output", help="output file")
return p
def _run(self, args, grammar):
cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
cmd(grammar)
class CmdCheck(GrammarCmd):
def __init__(self):
super(CmdCheck, self).__init__("check", help="Check grammar")
def add_parser(self, parsers):
p = super(CmdCheck, self).add_parser(parsers)
return p
def _run(self, args, grammar):
pass
jwutils.run_sub_commands('generate Test parser files')

View file

@ -4,6 +4,7 @@ import os
import sys import sys
import inspect import inspect
import re import re
import importlib
import Object import Object
import log import log
@ -29,23 +30,27 @@ class Cmd(Object.Object): # export
r.set_defaults(func=self.run) r.set_defaults(func=self.run)
return r return r
def run_sub_commands(description = '', prefix = 'Cmd'): # export def run_sub_commands(description = '', filter = '^Cmd.*', modules=None): # export
classes = inspect.getmembers(sys.modules["__main__"], inspect.isclass)
parser = argparse.ArgumentParser(usage=os.path.basename(sys.argv[0]) + ' [command] [options]', parser = argparse.ArgumentParser(usage=os.path.basename(sys.argv[0]) + ' [command] [options]',
formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=description) formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=description)
parser.add_argument('--log-level', help='Log level', default='notice') parser.add_argument('--log-level', help='Log level', default='notice')
parser.add_argument('--log-flags', help='Log flags', default='stderr,position,prio,color') parser.add_argument('--log-flags', help='Log flags', default='stderr,position,prio,color')
subparsers = parser.add_subparsers(title='Available commands', metavar='') subparsers = parser.add_subparsers(title='Available commands', metavar='')
for (name, cls) in classes: if modules == None:
if not re.match('^Cmd.', name): modules = [ '__main__' ]
for m in modules:
if m != '__main__':
importlib.import_module(m)
for name, c in inspect.getmembers(sys.modules[m], inspect.isclass):
if not re.match(filter, name):
continue continue
if inspect.isabstract(cls): if inspect.isabstract(c):
continue continue
cls().add_parser(subparsers) c().add_parser(subparsers)
args = parser.parse_args()
args=parser.parse_args()
log.set_level(args.log_level) log.set_level(args.log_level)
log.set_flags(args.log_flags) log.set_flags(args.log_flags)
args.func(args) args.func(args)

View file

@ -10,6 +10,7 @@ import itertools
import copy import copy
from collections import OrderedDict from collections import OrderedDict
from abc import abstractmethod from abc import abstractmethod
import os.path
import jwutils import jwutils
@ -914,7 +915,7 @@ def grammar_check(grammar, check_symbols = None):
slog(INFO, "======= checking", tok) slog(INFO, "======= checking", tok)
rr = step_out(grammar, terminals, orphans, lexicals, tok, 0, checked=set(), found=found) rr = step_out(grammar, terminals, orphans, lexicals, tok, 0, checked=set(), found=found)
if rr == sys.maxint: if rr == sys.maxint:
slog(ERR, "No way out for", tok, "in production", p.str()) slog(ERR, "No way out for", tok)
exit(1) exit(1)
if not tok in grammar.keys(): if not tok in grammar.keys():
slog(ERR, "Token", tok, "has no production") slog(ERR, "Token", tok, "has no production")
@ -1034,7 +1035,7 @@ def grammar_trim_symbols(grammar, symbols):
return grammar return grammar
def create_ebnf(grammar): def grammar_create_ebnf(grammar, opts):
indent = 40 indent = 40
slog(INFO, "creating ebnf from grammar of size", len(grammar)) slog(INFO, "creating ebnf from grammar of size", len(grammar))
out = '' out = ''
@ -1049,7 +1050,7 @@ def create_ebnf(grammar):
out += ' ' * indent + ' ;\n' out += ' ' * indent + ' ;\n'
return out return out
def create_yacc(grammar): def grammar_create_y(grammar, opts):
indent = ' ' * 40 indent = ' ' * 40
width = 0 width = 0
for t, p in grammar.iteritems(): for t, p in grammar.iteritems():
@ -1076,13 +1077,19 @@ def create_yacc(grammar):
#include <vector> #include <vector>
#include <string> #include <string>
""")
for f in opts['includes']:
out += '#include "' + f + '"' + '\n'
#include "include/defs.h" #include "include/defs.h"
#include "include/vhdl2017.h" #include "include/vhdl2017.h"
#include "include/lex.vhdl2017.h" #include "include/lex.vhdl2017.h"
#include "include/vhdl2017.tab.h" #include "include/vhdl2017.tab.h"
out += "\nusing namespace " + opts['namespace'] + ';\n'
out += textwrap.dedent("""\
using namespace std; using namespace std;
using namespace v2017;
namespace { namespace {
@ -1107,7 +1114,7 @@ def create_yacc(grammar):
types = grammar_get_types(grammar) types = grammar_get_types(grammar)
for t in types.keys(): for t in types.keys():
out += '\n\tv2017::' + t + '_t *' + t + ';' out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
out += '\n' out += '\n'
out += textwrap.dedent("""\ out += textwrap.dedent("""\
@ -1184,7 +1191,7 @@ def create_yacc(grammar):
else: else:
out += indent + "| " + format_yacc_rule(rule) + "\n" out += indent + "| " + format_yacc_rule(rule) + "\n"
out += indent + "{" + "\n" out += indent + "{" + "\n"
out += indent + "\t" + "$$->type = v2017::" + t + "::t_" + str(n_rule) + ";\n" out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
tokens = [] tokens = []
for c in rule: for c in rule:
if c.tp == t_target_lang: if c.tp == t_target_lang:
@ -1222,7 +1229,7 @@ def create_yacc(grammar):
return out + "\n" return out + "\n"
def create_lex(grammar): def grammar_create_l(grammar, opts):
ignore = "" ignore = ""
@ -1233,13 +1240,19 @@ def create_lex(grammar):
%{ %{
#include <slog.h> #include <slog.h>
""")
for f in opts['includes']:
out += '#include "' + f + '"' + '\n'
#include "include/defs.h" #include "include/defs.h"
#include "include/vhdl2017.h" #include "include/vhdl2017.h"
// #include "include/lex.vhdl2017.h" #// #include "include/lex.vhdl2017.h"
#include "include/vhdl2017.tab.h" #include "include/vhdl2017.tab.h"
using namespace v2017; out += "\nusing namespace " + opts['namespace'] + ';\n'
out += textwrap.dedent("""\
/* This is too late in the Flex generated file to work. Still lots of /* This is too late in the Flex generated file to work. Still lots of
* prototypes are spat into it above it, which end up with C++ linkage, of * prototypes are spat into it above it, which end up with C++ linkage, of
@ -1399,10 +1412,12 @@ def create_lex(grammar):
return out return out
def create_header(grammar, mip, namespace = None): def grammar_create_h(grammar, opts):
out = "#ifndef " + mip + '\n#define ' + mip + '\n\n' out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
if namespace is not None: ns = opts['namespace']
out += 'namespace ' + namespace + '{\n\n'
if ns is not None:
out += 'namespace ' + ns + '{\n\n'
types = grammar_get_types(grammar) types = grammar_get_types(grammar)
@ -1463,9 +1478,9 @@ def create_header(grammar, mip, namespace = None):
out += '\n' out += '\n'
if namespace is not None: if ns is not None:
out += '\n} /* namespace ' + namespace + '*/' out += '\n} /* namespace ' + ns + '*/'
out += '\n#endif /* #ifndef + ' + mip + ' */' out += '\n#endif /* #ifndef + ' + opts['mip'] + ' */'
return out return out
@ -1510,3 +1525,79 @@ class GrammarCmd(jwutils.Cmd):
if args.replace_whitespace: if args.replace_whitespace:
grammar = grammar_replace_whitespace(grammar) grammar = grammar_replace_whitespace(grammar)
return grammar return grammar
# ------------------------------------------------- TODO: clean this up >
class DerivedGrammarCmd(GrammarCmd):
def __init__(self, name, help):
super(DerivedGrammarCmd, self).__init__(name, help=help)
@abstractmethod
def _run(self, grammar):
pass
def _parse(self, contents):
return grammar_parse_ebnf(contents)
def add_parser(self, parsers):
p = super(DerivedGrammarCmd, self).add_parser(parsers)
return p
def run(self, args):
with open(args.input, 'r') as infile:
contents = infile.read()
grammar = self._parse(contents)
grammar = super(DerivedGrammarCmd, self).processGrammar(args, grammar)
self._run(args, grammar)
class CmdCreate(DerivedGrammarCmd):
def __init__(self):
super(CmdCreate, self).__init__("create", help="Create a file")
def add_parser(self, parsers):
p = super(CmdCreate, self).add_parser(parsers)
p.add_argument("output", help="output file")
p.add_argument('--namespace', help='namespace of generated AST', default='parser')
p.add_argument('--includes', help='list of header files to be #included in C/C++ implementation files', default='')
return p
def _run(self, args, grammar):
name, ext = os.path.splitext(args.output)[1]
#cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
mip = None
if ext == 'h':
mip = args.namespace + re.sub(r'[-./]', '_', args.output).upper()
includes = args.includes.split(',')
# generated code breaks without this, not sure why
if ext == 'l':
tmp = []
for f in includes:
if not re.match('.*lex\..*\.h', f):
tmp.append(f)
includes = tmp
cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
opts = {
"namespace" : args.namespace,
"includes" : includes,
"mip" : mip
}
out = cmd(grammar, opts)
print(out)
class CmdCheck(DerivedGrammarCmd):
def __init__(self):
super(CmdCheck, self).__init__("check", help="Check grammar")
def add_parser(self, parsers):
p = super(CmdCheck, self).add_parser(parsers)
return p
def _run(self, args, grammar):
pass