From 6297f10f559a50f4f9cb78947e0b5e5c4bc717cf Mon Sep 17 00:00:00 2001 From: Jan Lindemann Date: Sun, 29 Oct 2017 18:25:55 +0100 Subject: [PATCH] grammar.py et al: Centralize more code More code is removed from the special parser directories and centralized into grammar.py, Cmd.py, and generate-flex-bison.mk. Signed-off-by: Jan Lindemann --- .gitignore | 1 + make/generate-flex-bison.mk | 74 +++++++++++++++++ test/grammar/Makefile | 75 ++--------------- test/grammar/generate.py | 75 +---------------- tools/python/jwutils/Cmd.py | 25 +++--- tools/python/jwutils/grammar.py | 137 ++++++++++++++++++++++++++------ 6 files changed, 210 insertions(+), 177 deletions(-) create mode 100644 make/generate-flex-bison.mk diff --git a/.gitignore b/.gitignore index 466337c..f07d092 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ jw-python *.secret local.mk .gdb_history +*.pyc diff --git a/make/generate-flex-bison.mk b/make/generate-flex-bison.mk new file mode 100644 index 0000000..d32abce --- /dev/null +++ b/make/generate-flex-bison.mk @@ -0,0 +1,74 @@ +GENERATED_STD += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf include/$(FB_NAME).h +INCLUDED_BY_GENERATED += include/defs.h include/$(FB_NAME).h include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h +NAMESPACE_IN_GENERATED ?= $(FB_NAME) + +# These types are meant to be cut off the tree and turned into hand coded flex +# regexes +#TRIM_SYMBOLS = blah +TRIM_SYMBOLS ?= +GENERATE_LOG_LEVEL ?= notice +FIX_EXTENSIONS ?= discard +CHECK_SYMBOLS ?= all +ifneq ($(CHECK_SYMBOLS),) + OPT_CHECK_SYMBOLS ?= --check-symbols='$(CHECK_SYMBOLS)' +endif + +GRAMMAR_INPUT ?= $(FB_NAME)-input.ebnf +GENERATED = $(FB_NAME)-dense.ebnf $(GENERATED_STD) +GENERATE_PY = ./generate.py +GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \ + --fix-extensions $(FIX_EXTENSIONS) \ + --unroll-lists \ + --unroll-options \ + --unroll-alternatives \ + --replace-whitespace \ + $(OPT_CHECK_SYMBOLS) \ + --trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \ + --namespace=$(NAMESPACE_IN_GENERATED) \ + --includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/ */,/g') \ + $(CREATE_EXTRA_ARGS) +FB_NAME_PREFIX ?= $(FB_NAME)_ +FB_HDRDIR ?= include +FB_BISON_OUT_EXT ?= cpp +FB_FLEX_OUT_EXT ?= cpp +FB_CASE_INSENSITIVE ?= true +FB_SRC ?= $(filter %.y %.l,$(GENERATED)) + +include $(TOPDIR)/make/proj.mk +include $(MODDIR)/make/flex-bison.mk +include $(MODDIR)/make/py-defs.mk + +all: +debug-all: + GENERATE_LOG_LEVEL=debug make all 2>&1 | tee run.out + +generate: $(GENERATED) + +$(FB_NAME).y: include/$(FB_NAME).h +lex.$(FB_NAME).c: $(FB_NAME).l + +check: $(GRAMMAR_INPUT) $(GENERATE_PY) Makefile + python ./$(GENERATE_PY) --log-level info check --fix-extensions unroll --unroll-lists --unroll-options $(OPT_CHECK_SYMBOLS) $< + +$(FB_NAME)-dense.ebnf: $(GRAMMAR_INPUT) $(GENERATE_PY) + python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create --fix-extensions keep $< $(FB_NAME).ebnf > $@.tmp + mv $@.tmp $@ + +define generate_rule +$(1): $$(GRAMMAR_INPUT) $$(GENERATE_PY) Makefile + $$(GENERATE) $$< $$(patsubst $(FB_NAME).%,$(FB_NAME).%,$$@) > $$@.tmp + mv $$@.tmp $$@ +endef +$(foreach target,$(GENERATED_STD),$(eval $(call generate_rule,$(target)))) + +clean.generated: + rm -f $(GENERATED) +clean: clean.generated +echo-generated: + @echo GENERATED = $(GENERATED) + +help: + $(GENERATE) --help + +expand-macros: + make 2>/dev/null | sed '/g++/ !d; s/g++\|gcc//; s/-o .*//' | xargs g++ -E -C | indent diff --git a/test/grammar/Makefile b/test/grammar/Makefile index af7b5a1..a565ca1 100644 --- a/test/grammar/Makefile +++ b/test/grammar/Makefile @@ -1,75 +1,10 @@ TOPDIR = ../.. -USE_PROJECT_LIB = true +USE_PROJECT_LIB = true MEMBERS += local.a($(OBJ)) - -GENERATED_STD = grammartest.l grammartest.y grammartest.ebnf include/grammartest.h - -# These types are meant to be cut off the tree and turned into hand coded flex -# regexes -#TRIM_SYMBOLS = blah -TRIM_SYMBOLS = -GENERATE_LOG_LEVEL ?= notice -FIX_EXTENSIONS ?= discard -CHECK_SYMBOLS ?= --check-symbols=all - - -GRAMMAR_INPUT ?= grammartest-input.ebnf -GENERATED = grammartest-dense.ebnf $(GENERATED_STD) -GENERATE_PY = ./generate.py -GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \ - --fix-extensions $(FIX_EXTENSIONS) \ - --unroll-lists \ - --unroll-options \ - --unroll-alternatives \ - --replace-whitespace \ - $(CHECK_SYMBOLS) \ - --trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \ - $(CREATE_EXTRA_ARGS) -CHECK_SYMBOLS ?= special_character - -FB_NAME_PREFIX ?= grammartest_ -FB_HDRDIR ?= include -FB_BISON_OUT_EXT ?= cpp -FB_FLEX_OUT_EXT ?= cpp -FB_CASE_INSENSITIVE ?= true -FB_SRC ?= $(filter %.y %.l,$(GENERATED)) +PREREQ_BUILD += ytools +FB_NAME = grammartest +NAMESPACE_IN_GENERATED = gt include $(TOPDIR)/make/proj.mk -include $(MODDIR)/make/flex-bison.mk -include $(MODDIR)/make/py-defs.mk - -all: -debug-all: - GENERATE_LOG_LEVEL=debug make all 2>&1 | tee run.out - -generate: $(GENERATED) - -grammartest.y: include/grammartest.h -lex.grammartest.c: grammartest.l - -check: $(GRAMMAR_INPUT) $(GENERATE_PY) Makefile - python ./$(GENERATE_PY) --log-level info check --fix-extensions unroll --unroll-lists --unroll-options --check-symbols='$(CHECK_SYMBOLS)' $< - -grammartest-dense.ebnf: $(GRAMMAR_INPUT) $(GENERATE_PY) - python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create --fix-extensions keep $< grammartest.ebnf > $@.tmp - mv $@.tmp $@ - -define generate_rule -$(1): $$(GRAMMAR_INPUT) $$(GENERATE_PY) Makefile - $$(GENERATE) $$< $$(patsubst grammartest.%,grammartest.%,$$@) > $$@.tmp - mv $$@.tmp $$@ -endef -$(foreach target,$(GENERATED_STD),$(eval $(call generate_rule,$(target)))) - -clean.generated: - rm -f $(GENERATED) -clean: clean.generated -echo-generated: - @echo GENERATED = $(GENERATED) - -help: - $(GENERATE) --help - -expand-macros: - make 2>/dev/null | sed '/g++/ !d; s/g++\|gcc//; s/-o .*//' | xargs g++ -E -C | indent +include $(TOPDIR)/make/generate-flex-bison.mk diff --git a/test/grammar/generate.py b/test/grammar/generate.py index 14c0664..c2f7b18 100644 --- a/test/grammar/generate.py +++ b/test/grammar/generate.py @@ -1,79 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from __future__ import print_function -import argparse -import sys -import re -import textwrap -from collections import OrderedDict -from abc import abstractmethod - import jwutils -from jwutils.log import * -from jwutils import grammar - -base = 'grammartest' -mip = '_JW_PYTHON_' + base + base.upper() -namespace = base - -def create_grammartest_ebnf(grammar): - print(jwutils.grammar.create_ebnf(grammar)) - -def create_grammartest_y(grammar): - print(jwutils.grammar.create_yacc(grammar)) - -def create_grammartest_l(grammar): - print(jwutils.grammar.create_lex(grammar)) - -def create_include_grammartest_h(grammar): - print(jwutils.grammar.create_header(grammar, mip=mip, namespace=namespace)) - -class GrammarCmd(jwutils.grammar.GrammarCmd): - - def __init__(self, name, help): - super(GrammarCmd, self).__init__(name, help=help) - - @abstractmethod - def _run(self, grammar): - pass - - def add_parser(self, parsers): - p = super(GrammarCmd, self).add_parser(parsers) - return p - - def run(self, args): - with open(args.input, 'r') as infile: - contents = infile.read() - grammar = jwutils.grammar.grammar_parse_ebnf(contents) - grammar = super(GrammarCmd, self).processGrammar(args, grammar) - self._run(args, grammar) - -class CmdCreate(GrammarCmd): - - def __init__(self): - super(CmdCreate, self).__init__("create", help="Create a file") - - def add_parser(self, parsers): - p = super(CmdCreate, self).add_parser(parsers) - p.add_argument("output", help="output file") - return p - - def _run(self, args, grammar): - cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output)) - cmd(grammar) - -class CmdCheck(GrammarCmd): - - def __init__(self): - super(CmdCheck, self).__init__("check", help="Check grammar") - - def add_parser(self, parsers): - p = super(CmdCheck, self).add_parser(parsers) - return p - - def _run(self, args, grammar): - pass - -jwutils.run_sub_commands('generate Test parser files') +jwutils.run_sub_commands('generate Test parser files', modules = ['jwutils.grammar']) diff --git a/tools/python/jwutils/Cmd.py b/tools/python/jwutils/Cmd.py index 70e7aa5..a6e91f4 100644 --- a/tools/python/jwutils/Cmd.py +++ b/tools/python/jwutils/Cmd.py @@ -4,6 +4,7 @@ import os import sys import inspect import re +import importlib import Object import log @@ -29,23 +30,27 @@ class Cmd(Object.Object): # export r.set_defaults(func=self.run) return r -def run_sub_commands(description = '', prefix = 'Cmd'): # export +def run_sub_commands(description = '', filter = '^Cmd.*', modules=None): # export - classes = inspect.getmembers(sys.modules["__main__"], inspect.isclass) parser = argparse.ArgumentParser(usage=os.path.basename(sys.argv[0]) + ' [command] [options]', formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=description) parser.add_argument('--log-level', help='Log level', default='notice') parser.add_argument('--log-flags', help='Log flags', default='stderr,position,prio,color') subparsers = parser.add_subparsers(title='Available commands', metavar='') - for (name, cls) in classes: - if not re.match('^Cmd.', name): - continue - if inspect.isabstract(cls): - continue - cls().add_parser(subparsers) - - args=parser.parse_args() + if modules == None: + modules = [ '__main__' ] + for m in modules: + if m != '__main__': + importlib.import_module(m) + for name, c in inspect.getmembers(sys.modules[m], inspect.isclass): + if not re.match(filter, name): + continue + if inspect.isabstract(c): + continue + c().add_parser(subparsers) + args = parser.parse_args() log.set_level(args.log_level) log.set_flags(args.log_flags) + args.func(args) diff --git a/tools/python/jwutils/grammar.py b/tools/python/jwutils/grammar.py index 197c9c9..f29eede 100644 --- a/tools/python/jwutils/grammar.py +++ b/tools/python/jwutils/grammar.py @@ -10,6 +10,7 @@ import itertools import copy from collections import OrderedDict from abc import abstractmethod +import os.path import jwutils @@ -914,7 +915,7 @@ def grammar_check(grammar, check_symbols = None): slog(INFO, "======= checking", tok) rr = step_out(grammar, terminals, orphans, lexicals, tok, 0, checked=set(), found=found) if rr == sys.maxint: - slog(ERR, "No way out for", tok, "in production", p.str()) + slog(ERR, "No way out for", tok) exit(1) if not tok in grammar.keys(): slog(ERR, "Token", tok, "has no production") @@ -1034,7 +1035,7 @@ def grammar_trim_symbols(grammar, symbols): return grammar -def create_ebnf(grammar): +def grammar_create_ebnf(grammar, opts): indent = 40 slog(INFO, "creating ebnf from grammar of size", len(grammar)) out = '' @@ -1049,7 +1050,7 @@ def create_ebnf(grammar): out += ' ' * indent + ' ;\n' return out -def create_yacc(grammar): +def grammar_create_y(grammar, opts): indent = ' ' * 40 width = 0 for t, p in grammar.iteritems(): @@ -1076,13 +1077,19 @@ def create_yacc(grammar): #include #include - #include "include/defs.h" - #include "include/vhdl2017.h" - #include "include/lex.vhdl2017.h" - #include "include/vhdl2017.tab.h" + """) + for f in opts['includes']: + out += '#include "' + f + '"' + '\n' + #include "include/defs.h" + #include "include/vhdl2017.h" + #include "include/lex.vhdl2017.h" + #include "include/vhdl2017.tab.h" + + out += "\nusing namespace " + opts['namespace'] + ';\n' + + out += textwrap.dedent("""\ using namespace std; - using namespace v2017; namespace { @@ -1107,7 +1114,7 @@ def create_yacc(grammar): types = grammar_get_types(grammar) for t in types.keys(): - out += '\n\tv2017::' + t + '_t *' + t + ';' + out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';' out += '\n' out += textwrap.dedent("""\ @@ -1184,7 +1191,7 @@ def create_yacc(grammar): else: out += indent + "| " + format_yacc_rule(rule) + "\n" out += indent + "{" + "\n" - out += indent + "\t" + "$$->type = v2017::" + t + "::t_" + str(n_rule) + ";\n" + out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n" tokens = [] for c in rule: if c.tp == t_target_lang: @@ -1222,7 +1229,7 @@ def create_yacc(grammar): return out + "\n" -def create_lex(grammar): +def grammar_create_l(grammar, opts): ignore = "" @@ -1233,13 +1240,19 @@ def create_lex(grammar): %{ #include - #include "include/defs.h" - #include "include/vhdl2017.h" + """) - // #include "include/lex.vhdl2017.h" - #include "include/vhdl2017.tab.h" + for f in opts['includes']: + out += '#include "' + f + '"' + '\n' + #include "include/defs.h" + #include "include/vhdl2017.h" - using namespace v2017; + #// #include "include/lex.vhdl2017.h" + #include "include/vhdl2017.tab.h" + + out += "\nusing namespace " + opts['namespace'] + ';\n' + + out += textwrap.dedent("""\ /* This is too late in the Flex generated file to work. Still lots of * prototypes are spat into it above it, which end up with C++ linkage, of @@ -1399,10 +1412,12 @@ def create_lex(grammar): return out -def create_header(grammar, mip, namespace = None): - out = "#ifndef " + mip + '\n#define ' + mip + '\n\n' - if namespace is not None: - out += 'namespace ' + namespace + '{\n\n' +def grammar_create_h(grammar, opts): + out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n' + ns = opts['namespace'] + + if ns is not None: + out += 'namespace ' + ns + '{\n\n' types = grammar_get_types(grammar) @@ -1463,9 +1478,9 @@ def create_header(grammar, mip, namespace = None): out += '\n' - if namespace is not None: - out += '\n} /* namespace ' + namespace + '*/' - out += '\n#endif /* #ifndef + ' + mip + ' */' + if ns is not None: + out += '\n} /* namespace ' + ns + '*/' + out += '\n#endif /* #ifndef + ' + opts['mip'] + ' */' return out @@ -1510,3 +1525,79 @@ class GrammarCmd(jwutils.Cmd): if args.replace_whitespace: grammar = grammar_replace_whitespace(grammar) return grammar + +# ------------------------------------------------- TODO: clean this up > + +class DerivedGrammarCmd(GrammarCmd): + + def __init__(self, name, help): + super(DerivedGrammarCmd, self).__init__(name, help=help) + + @abstractmethod + def _run(self, grammar): + pass + + def _parse(self, contents): + return grammar_parse_ebnf(contents) + + def add_parser(self, parsers): + p = super(DerivedGrammarCmd, self).add_parser(parsers) + return p + + def run(self, args): + with open(args.input, 'r') as infile: + contents = infile.read() + grammar = self._parse(contents) + grammar = super(DerivedGrammarCmd, self).processGrammar(args, grammar) + self._run(args, grammar) + +class CmdCreate(DerivedGrammarCmd): + + def __init__(self): + super(CmdCreate, self).__init__("create", help="Create a file") + + def add_parser(self, parsers): + p = super(CmdCreate, self).add_parser(parsers) + p.add_argument("output", help="output file") + p.add_argument('--namespace', help='namespace of generated AST', default='parser') + p.add_argument('--includes', help='list of header files to be #included in C/C++ implementation files', default='') + return p + + def _run(self, args, grammar): + name, ext = os.path.splitext(args.output)[1] + #cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output)) + mip = None + if ext == 'h': + mip = args.namespace + re.sub(r'[-./]', '_', args.output).upper() + + includes = args.includes.split(',') + + # generated code breaks without this, not sure why + if ext == 'l': + tmp = [] + for f in includes: + if not re.match('.*lex\..*\.h', f): + tmp.append(f) + includes = tmp + + cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext) + + opts = { + "namespace" : args.namespace, + "includes" : includes, + "mip" : mip + } + out = cmd(grammar, opts) + print(out) + +class CmdCheck(DerivedGrammarCmd): + + def __init__(self): + super(CmdCheck, self).__init__("check", help="Check grammar") + + def add_parser(self, parsers): + p = super(CmdCheck, self).add_parser(parsers) + return p + + def _run(self, args, grammar): + pass