mirror of
ssh://git.janware.com/srv/git/janware/proj/jw-python
synced 2026-01-15 01:52:56 +01:00
grammar.py et al: Centralize more code
More code is removed from the special parser directories and centralized into grammar.py, Cmd.py, and generate-flex-bison.mk. Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
parent
fee94deb48
commit
6297f10f55
6 changed files with 210 additions and 177 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -13,3 +13,4 @@ jw-python
|
|||
*.secret
|
||||
local.mk
|
||||
.gdb_history
|
||||
*.pyc
|
||||
|
|
|
|||
74
make/generate-flex-bison.mk
Normal file
74
make/generate-flex-bison.mk
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
GENERATED_STD += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf include/$(FB_NAME).h
|
||||
INCLUDED_BY_GENERATED += include/defs.h include/$(FB_NAME).h include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h
|
||||
NAMESPACE_IN_GENERATED ?= $(FB_NAME)
|
||||
|
||||
# These types are meant to be cut off the tree and turned into hand coded flex
|
||||
# regexes
|
||||
#TRIM_SYMBOLS = blah
|
||||
TRIM_SYMBOLS ?=
|
||||
GENERATE_LOG_LEVEL ?= notice
|
||||
FIX_EXTENSIONS ?= discard
|
||||
CHECK_SYMBOLS ?= all
|
||||
ifneq ($(CHECK_SYMBOLS),)
|
||||
OPT_CHECK_SYMBOLS ?= --check-symbols='$(CHECK_SYMBOLS)'
|
||||
endif
|
||||
|
||||
GRAMMAR_INPUT ?= $(FB_NAME)-input.ebnf
|
||||
GENERATED = $(FB_NAME)-dense.ebnf $(GENERATED_STD)
|
||||
GENERATE_PY = ./generate.py
|
||||
GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
|
||||
--fix-extensions $(FIX_EXTENSIONS) \
|
||||
--unroll-lists \
|
||||
--unroll-options \
|
||||
--unroll-alternatives \
|
||||
--replace-whitespace \
|
||||
$(OPT_CHECK_SYMBOLS) \
|
||||
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
||||
--namespace=$(NAMESPACE_IN_GENERATED) \
|
||||
--includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/ */,/g') \
|
||||
$(CREATE_EXTRA_ARGS)
|
||||
FB_NAME_PREFIX ?= $(FB_NAME)_
|
||||
FB_HDRDIR ?= include
|
||||
FB_BISON_OUT_EXT ?= cpp
|
||||
FB_FLEX_OUT_EXT ?= cpp
|
||||
FB_CASE_INSENSITIVE ?= true
|
||||
FB_SRC ?= $(filter %.y %.l,$(GENERATED))
|
||||
|
||||
include $(TOPDIR)/make/proj.mk
|
||||
include $(MODDIR)/make/flex-bison.mk
|
||||
include $(MODDIR)/make/py-defs.mk
|
||||
|
||||
all:
|
||||
debug-all:
|
||||
GENERATE_LOG_LEVEL=debug make all 2>&1 | tee run.out
|
||||
|
||||
generate: $(GENERATED)
|
||||
|
||||
$(FB_NAME).y: include/$(FB_NAME).h
|
||||
lex.$(FB_NAME).c: $(FB_NAME).l
|
||||
|
||||
check: $(GRAMMAR_INPUT) $(GENERATE_PY) Makefile
|
||||
python ./$(GENERATE_PY) --log-level info check --fix-extensions unroll --unroll-lists --unroll-options $(OPT_CHECK_SYMBOLS) $<
|
||||
|
||||
$(FB_NAME)-dense.ebnf: $(GRAMMAR_INPUT) $(GENERATE_PY)
|
||||
python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create --fix-extensions keep $< $(FB_NAME).ebnf > $@.tmp
|
||||
mv $@.tmp $@
|
||||
|
||||
define generate_rule
|
||||
$(1): $$(GRAMMAR_INPUT) $$(GENERATE_PY) Makefile
|
||||
$$(GENERATE) $$< $$(patsubst $(FB_NAME).%,$(FB_NAME).%,$$@) > $$@.tmp
|
||||
mv $$@.tmp $$@
|
||||
endef
|
||||
$(foreach target,$(GENERATED_STD),$(eval $(call generate_rule,$(target))))
|
||||
|
||||
clean.generated:
|
||||
rm -f $(GENERATED)
|
||||
clean: clean.generated
|
||||
echo-generated:
|
||||
@echo GENERATED = $(GENERATED)
|
||||
|
||||
help:
|
||||
$(GENERATE) --help
|
||||
|
||||
expand-macros:
|
||||
make 2>/dev/null | sed '/g++/ !d; s/g++\|gcc//; s/-o .*//' | xargs g++ -E -C | indent
|
||||
|
|
@ -1,75 +1,10 @@
|
|||
TOPDIR = ../..
|
||||
|
||||
USE_PROJECT_LIB = true
|
||||
USE_PROJECT_LIB = true
|
||||
MEMBERS += local.a($(OBJ))
|
||||
|
||||
GENERATED_STD = grammartest.l grammartest.y grammartest.ebnf include/grammartest.h
|
||||
|
||||
# These types are meant to be cut off the tree and turned into hand coded flex
|
||||
# regexes
|
||||
#TRIM_SYMBOLS = blah
|
||||
TRIM_SYMBOLS =
|
||||
GENERATE_LOG_LEVEL ?= notice
|
||||
FIX_EXTENSIONS ?= discard
|
||||
CHECK_SYMBOLS ?= --check-symbols=all
|
||||
|
||||
|
||||
GRAMMAR_INPUT ?= grammartest-input.ebnf
|
||||
GENERATED = grammartest-dense.ebnf $(GENERATED_STD)
|
||||
GENERATE_PY = ./generate.py
|
||||
GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
|
||||
--fix-extensions $(FIX_EXTENSIONS) \
|
||||
--unroll-lists \
|
||||
--unroll-options \
|
||||
--unroll-alternatives \
|
||||
--replace-whitespace \
|
||||
$(CHECK_SYMBOLS) \
|
||||
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
||||
$(CREATE_EXTRA_ARGS)
|
||||
CHECK_SYMBOLS ?= special_character
|
||||
|
||||
FB_NAME_PREFIX ?= grammartest_
|
||||
FB_HDRDIR ?= include
|
||||
FB_BISON_OUT_EXT ?= cpp
|
||||
FB_FLEX_OUT_EXT ?= cpp
|
||||
FB_CASE_INSENSITIVE ?= true
|
||||
FB_SRC ?= $(filter %.y %.l,$(GENERATED))
|
||||
PREREQ_BUILD += ytools
|
||||
FB_NAME = grammartest
|
||||
NAMESPACE_IN_GENERATED = gt
|
||||
|
||||
include $(TOPDIR)/make/proj.mk
|
||||
include $(MODDIR)/make/flex-bison.mk
|
||||
include $(MODDIR)/make/py-defs.mk
|
||||
|
||||
all:
|
||||
debug-all:
|
||||
GENERATE_LOG_LEVEL=debug make all 2>&1 | tee run.out
|
||||
|
||||
generate: $(GENERATED)
|
||||
|
||||
grammartest.y: include/grammartest.h
|
||||
lex.grammartest.c: grammartest.l
|
||||
|
||||
check: $(GRAMMAR_INPUT) $(GENERATE_PY) Makefile
|
||||
python ./$(GENERATE_PY) --log-level info check --fix-extensions unroll --unroll-lists --unroll-options --check-symbols='$(CHECK_SYMBOLS)' $<
|
||||
|
||||
grammartest-dense.ebnf: $(GRAMMAR_INPUT) $(GENERATE_PY)
|
||||
python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create --fix-extensions keep $< grammartest.ebnf > $@.tmp
|
||||
mv $@.tmp $@
|
||||
|
||||
define generate_rule
|
||||
$(1): $$(GRAMMAR_INPUT) $$(GENERATE_PY) Makefile
|
||||
$$(GENERATE) $$< $$(patsubst grammartest.%,grammartest.%,$$@) > $$@.tmp
|
||||
mv $$@.tmp $$@
|
||||
endef
|
||||
$(foreach target,$(GENERATED_STD),$(eval $(call generate_rule,$(target))))
|
||||
|
||||
clean.generated:
|
||||
rm -f $(GENERATED)
|
||||
clean: clean.generated
|
||||
echo-generated:
|
||||
@echo GENERATED = $(GENERATED)
|
||||
|
||||
help:
|
||||
$(GENERATE) --help
|
||||
|
||||
expand-macros:
|
||||
make 2>/dev/null | sed '/g++/ !d; s/g++\|gcc//; s/-o .*//' | xargs g++ -E -C | indent
|
||||
include $(TOPDIR)/make/generate-flex-bison.mk
|
||||
|
|
|
|||
|
|
@ -1,79 +1,6 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import sys
|
||||
import re
|
||||
import textwrap
|
||||
from collections import OrderedDict
|
||||
from abc import abstractmethod
|
||||
|
||||
import jwutils
|
||||
|
||||
from jwutils.log import *
|
||||
from jwutils import grammar
|
||||
|
||||
base = 'grammartest'
|
||||
mip = '_JW_PYTHON_' + base + base.upper()
|
||||
namespace = base
|
||||
|
||||
def create_grammartest_ebnf(grammar):
|
||||
print(jwutils.grammar.create_ebnf(grammar))
|
||||
|
||||
def create_grammartest_y(grammar):
|
||||
print(jwutils.grammar.create_yacc(grammar))
|
||||
|
||||
def create_grammartest_l(grammar):
|
||||
print(jwutils.grammar.create_lex(grammar))
|
||||
|
||||
def create_include_grammartest_h(grammar):
|
||||
print(jwutils.grammar.create_header(grammar, mip=mip, namespace=namespace))
|
||||
|
||||
class GrammarCmd(jwutils.grammar.GrammarCmd):
|
||||
|
||||
def __init__(self, name, help):
|
||||
super(GrammarCmd, self).__init__(name, help=help)
|
||||
|
||||
@abstractmethod
|
||||
def _run(self, grammar):
|
||||
pass
|
||||
|
||||
def add_parser(self, parsers):
|
||||
p = super(GrammarCmd, self).add_parser(parsers)
|
||||
return p
|
||||
|
||||
def run(self, args):
|
||||
with open(args.input, 'r') as infile:
|
||||
contents = infile.read()
|
||||
grammar = jwutils.grammar.grammar_parse_ebnf(contents)
|
||||
grammar = super(GrammarCmd, self).processGrammar(args, grammar)
|
||||
self._run(args, grammar)
|
||||
|
||||
class CmdCreate(GrammarCmd):
|
||||
|
||||
def __init__(self):
|
||||
super(CmdCreate, self).__init__("create", help="Create a file")
|
||||
|
||||
def add_parser(self, parsers):
|
||||
p = super(CmdCreate, self).add_parser(parsers)
|
||||
p.add_argument("output", help="output file")
|
||||
return p
|
||||
|
||||
def _run(self, args, grammar):
|
||||
cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
|
||||
cmd(grammar)
|
||||
|
||||
class CmdCheck(GrammarCmd):
|
||||
|
||||
def __init__(self):
|
||||
super(CmdCheck, self).__init__("check", help="Check grammar")
|
||||
|
||||
def add_parser(self, parsers):
|
||||
p = super(CmdCheck, self).add_parser(parsers)
|
||||
return p
|
||||
|
||||
def _run(self, args, grammar):
|
||||
pass
|
||||
|
||||
jwutils.run_sub_commands('generate Test parser files')
|
||||
jwutils.run_sub_commands('generate Test parser files', modules = ['jwutils.grammar'])
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import os
|
|||
import sys
|
||||
import inspect
|
||||
import re
|
||||
import importlib
|
||||
|
||||
import Object
|
||||
import log
|
||||
|
|
@ -29,23 +30,27 @@ class Cmd(Object.Object): # export
|
|||
r.set_defaults(func=self.run)
|
||||
return r
|
||||
|
||||
def run_sub_commands(description = '', prefix = 'Cmd'): # export
|
||||
def run_sub_commands(description = '', filter = '^Cmd.*', modules=None): # export
|
||||
|
||||
classes = inspect.getmembers(sys.modules["__main__"], inspect.isclass)
|
||||
parser = argparse.ArgumentParser(usage=os.path.basename(sys.argv[0]) + ' [command] [options]',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=description)
|
||||
parser.add_argument('--log-level', help='Log level', default='notice')
|
||||
parser.add_argument('--log-flags', help='Log flags', default='stderr,position,prio,color')
|
||||
subparsers = parser.add_subparsers(title='Available commands', metavar='')
|
||||
|
||||
for (name, cls) in classes:
|
||||
if not re.match('^Cmd.', name):
|
||||
continue
|
||||
if inspect.isabstract(cls):
|
||||
continue
|
||||
cls().add_parser(subparsers)
|
||||
|
||||
args=parser.parse_args()
|
||||
if modules == None:
|
||||
modules = [ '__main__' ]
|
||||
for m in modules:
|
||||
if m != '__main__':
|
||||
importlib.import_module(m)
|
||||
for name, c in inspect.getmembers(sys.modules[m], inspect.isclass):
|
||||
if not re.match(filter, name):
|
||||
continue
|
||||
if inspect.isabstract(c):
|
||||
continue
|
||||
c().add_parser(subparsers)
|
||||
args = parser.parse_args()
|
||||
log.set_level(args.log_level)
|
||||
log.set_flags(args.log_flags)
|
||||
|
||||
args.func(args)
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import itertools
|
|||
import copy
|
||||
from collections import OrderedDict
|
||||
from abc import abstractmethod
|
||||
import os.path
|
||||
|
||||
import jwutils
|
||||
|
||||
|
|
@ -914,7 +915,7 @@ def grammar_check(grammar, check_symbols = None):
|
|||
slog(INFO, "======= checking", tok)
|
||||
rr = step_out(grammar, terminals, orphans, lexicals, tok, 0, checked=set(), found=found)
|
||||
if rr == sys.maxint:
|
||||
slog(ERR, "No way out for", tok, "in production", p.str())
|
||||
slog(ERR, "No way out for", tok)
|
||||
exit(1)
|
||||
if not tok in grammar.keys():
|
||||
slog(ERR, "Token", tok, "has no production")
|
||||
|
|
@ -1034,7 +1035,7 @@ def grammar_trim_symbols(grammar, symbols):
|
|||
|
||||
return grammar
|
||||
|
||||
def create_ebnf(grammar):
|
||||
def grammar_create_ebnf(grammar, opts):
|
||||
indent = 40
|
||||
slog(INFO, "creating ebnf from grammar of size", len(grammar))
|
||||
out = ''
|
||||
|
|
@ -1049,7 +1050,7 @@ def create_ebnf(grammar):
|
|||
out += ' ' * indent + ' ;\n'
|
||||
return out
|
||||
|
||||
def create_yacc(grammar):
|
||||
def grammar_create_y(grammar, opts):
|
||||
indent = ' ' * 40
|
||||
width = 0
|
||||
for t, p in grammar.iteritems():
|
||||
|
|
@ -1076,13 +1077,19 @@ def create_yacc(grammar):
|
|||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "include/defs.h"
|
||||
#include "include/vhdl2017.h"
|
||||
#include "include/lex.vhdl2017.h"
|
||||
#include "include/vhdl2017.tab.h"
|
||||
""")
|
||||
|
||||
for f in opts['includes']:
|
||||
out += '#include "' + f + '"' + '\n'
|
||||
#include "include/defs.h"
|
||||
#include "include/vhdl2017.h"
|
||||
#include "include/lex.vhdl2017.h"
|
||||
#include "include/vhdl2017.tab.h"
|
||||
|
||||
out += "\nusing namespace " + opts['namespace'] + ';\n'
|
||||
|
||||
out += textwrap.dedent("""\
|
||||
using namespace std;
|
||||
using namespace v2017;
|
||||
|
||||
namespace {
|
||||
|
||||
|
|
@ -1107,7 +1114,7 @@ def create_yacc(grammar):
|
|||
|
||||
types = grammar_get_types(grammar)
|
||||
for t in types.keys():
|
||||
out += '\n\tv2017::' + t + '_t *' + t + ';'
|
||||
out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
|
||||
out += '\n'
|
||||
|
||||
out += textwrap.dedent("""\
|
||||
|
|
@ -1184,7 +1191,7 @@ def create_yacc(grammar):
|
|||
else:
|
||||
out += indent + "| " + format_yacc_rule(rule) + "\n"
|
||||
out += indent + "{" + "\n"
|
||||
out += indent + "\t" + "$$->type = v2017::" + t + "::t_" + str(n_rule) + ";\n"
|
||||
out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
|
||||
tokens = []
|
||||
for c in rule:
|
||||
if c.tp == t_target_lang:
|
||||
|
|
@ -1222,7 +1229,7 @@ def create_yacc(grammar):
|
|||
|
||||
return out + "\n"
|
||||
|
||||
def create_lex(grammar):
|
||||
def grammar_create_l(grammar, opts):
|
||||
|
||||
ignore = ""
|
||||
|
||||
|
|
@ -1233,13 +1240,19 @@ def create_lex(grammar):
|
|||
%{
|
||||
#include <slog.h>
|
||||
|
||||
#include "include/defs.h"
|
||||
#include "include/vhdl2017.h"
|
||||
""")
|
||||
|
||||
// #include "include/lex.vhdl2017.h"
|
||||
#include "include/vhdl2017.tab.h"
|
||||
for f in opts['includes']:
|
||||
out += '#include "' + f + '"' + '\n'
|
||||
#include "include/defs.h"
|
||||
#include "include/vhdl2017.h"
|
||||
|
||||
using namespace v2017;
|
||||
#// #include "include/lex.vhdl2017.h"
|
||||
#include "include/vhdl2017.tab.h"
|
||||
|
||||
out += "\nusing namespace " + opts['namespace'] + ';\n'
|
||||
|
||||
out += textwrap.dedent("""\
|
||||
|
||||
/* This is too late in the Flex generated file to work. Still lots of
|
||||
* prototypes are spat into it above it, which end up with C++ linkage, of
|
||||
|
|
@ -1399,10 +1412,12 @@ def create_lex(grammar):
|
|||
|
||||
return out
|
||||
|
||||
def create_header(grammar, mip, namespace = None):
|
||||
out = "#ifndef " + mip + '\n#define ' + mip + '\n\n'
|
||||
if namespace is not None:
|
||||
out += 'namespace ' + namespace + '{\n\n'
|
||||
def grammar_create_h(grammar, opts):
|
||||
out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
|
||||
ns = opts['namespace']
|
||||
|
||||
if ns is not None:
|
||||
out += 'namespace ' + ns + '{\n\n'
|
||||
|
||||
types = grammar_get_types(grammar)
|
||||
|
||||
|
|
@ -1463,9 +1478,9 @@ def create_header(grammar, mip, namespace = None):
|
|||
|
||||
out += '\n'
|
||||
|
||||
if namespace is not None:
|
||||
out += '\n} /* namespace ' + namespace + '*/'
|
||||
out += '\n#endif /* #ifndef + ' + mip + ' */'
|
||||
if ns is not None:
|
||||
out += '\n} /* namespace ' + ns + '*/'
|
||||
out += '\n#endif /* #ifndef + ' + opts['mip'] + ' */'
|
||||
|
||||
return out
|
||||
|
||||
|
|
@ -1510,3 +1525,79 @@ class GrammarCmd(jwutils.Cmd):
|
|||
if args.replace_whitespace:
|
||||
grammar = grammar_replace_whitespace(grammar)
|
||||
return grammar
|
||||
|
||||
# ------------------------------------------------- TODO: clean this up >
|
||||
|
||||
class DerivedGrammarCmd(GrammarCmd):
|
||||
|
||||
def __init__(self, name, help):
|
||||
super(DerivedGrammarCmd, self).__init__(name, help=help)
|
||||
|
||||
@abstractmethod
|
||||
def _run(self, grammar):
|
||||
pass
|
||||
|
||||
def _parse(self, contents):
|
||||
return grammar_parse_ebnf(contents)
|
||||
|
||||
def add_parser(self, parsers):
|
||||
p = super(DerivedGrammarCmd, self).add_parser(parsers)
|
||||
return p
|
||||
|
||||
def run(self, args):
|
||||
with open(args.input, 'r') as infile:
|
||||
contents = infile.read()
|
||||
grammar = self._parse(contents)
|
||||
grammar = super(DerivedGrammarCmd, self).processGrammar(args, grammar)
|
||||
self._run(args, grammar)
|
||||
|
||||
class CmdCreate(DerivedGrammarCmd):
|
||||
|
||||
def __init__(self):
|
||||
super(CmdCreate, self).__init__("create", help="Create a file")
|
||||
|
||||
def add_parser(self, parsers):
|
||||
p = super(CmdCreate, self).add_parser(parsers)
|
||||
p.add_argument("output", help="output file")
|
||||
p.add_argument('--namespace', help='namespace of generated AST', default='parser')
|
||||
p.add_argument('--includes', help='list of header files to be #included in C/C++ implementation files', default='')
|
||||
return p
|
||||
|
||||
def _run(self, args, grammar):
|
||||
name, ext = os.path.splitext(args.output)[1]
|
||||
#cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
|
||||
mip = None
|
||||
if ext == 'h':
|
||||
mip = args.namespace + re.sub(r'[-./]', '_', args.output).upper()
|
||||
|
||||
includes = args.includes.split(',')
|
||||
|
||||
# generated code breaks without this, not sure why
|
||||
if ext == 'l':
|
||||
tmp = []
|
||||
for f in includes:
|
||||
if not re.match('.*lex\..*\.h', f):
|
||||
tmp.append(f)
|
||||
includes = tmp
|
||||
|
||||
cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
|
||||
|
||||
opts = {
|
||||
"namespace" : args.namespace,
|
||||
"includes" : includes,
|
||||
"mip" : mip
|
||||
}
|
||||
out = cmd(grammar, opts)
|
||||
print(out)
|
||||
|
||||
class CmdCheck(DerivedGrammarCmd):
|
||||
|
||||
def __init__(self):
|
||||
super(CmdCheck, self).__init__("check", help="Check grammar")
|
||||
|
||||
def add_parser(self, parsers):
|
||||
p = super(CmdCheck, self).add_parser(parsers)
|
||||
return p
|
||||
|
||||
def _run(self, args, grammar):
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue