mirror of
ssh://git.janware.com/srv/git/janware/proj/jw-python
synced 2026-01-15 09:53:32 +01:00
grammar.py: Add support for --irrelevant-symbols
Arguments to --irrelevant-symbols are not meant to be represented in the AST resulting from parsing. Also, add pad() to misc.py. Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
parent
6297f10f55
commit
8c5934725c
4 changed files with 43 additions and 19 deletions
|
|
@ -4,8 +4,9 @@ NAMESPACE_IN_GENERATED ?= $(FB_NAME)
|
||||||
|
|
||||||
# These types are meant to be cut off the tree and turned into hand coded flex
|
# These types are meant to be cut off the tree and turned into hand coded flex
|
||||||
# regexes
|
# regexes
|
||||||
#TRIM_SYMBOLS = blah
|
|
||||||
TRIM_SYMBOLS ?=
|
TRIM_SYMBOLS ?=
|
||||||
|
CUT_SYMBOLS ?=
|
||||||
|
IRRELEVANT_SYMBOLS ?=
|
||||||
GENERATE_LOG_LEVEL ?= notice
|
GENERATE_LOG_LEVEL ?= notice
|
||||||
FIX_EXTENSIONS ?= discard
|
FIX_EXTENSIONS ?= discard
|
||||||
CHECK_SYMBOLS ?= all
|
CHECK_SYMBOLS ?= all
|
||||||
|
|
@ -24,6 +25,8 @@ GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL
|
||||||
--replace-whitespace \
|
--replace-whitespace \
|
||||||
$(OPT_CHECK_SYMBOLS) \
|
$(OPT_CHECK_SYMBOLS) \
|
||||||
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
|
||||||
|
--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/ */,/g') \
|
||||||
|
--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/ */,/g') \
|
||||||
--namespace=$(NAMESPACE_IN_GENERATED) \
|
--namespace=$(NAMESPACE_IN_GENERATED) \
|
||||||
--includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/ */,/g') \
|
--includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/ */,/g') \
|
||||||
$(CREATE_EXTRA_ARGS)
|
$(CREATE_EXTRA_ARGS)
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ def tok2sym(tok):
|
||||||
if term is not None:
|
if term is not None:
|
||||||
if term in special_terminals.keys():
|
if term in special_terminals.keys():
|
||||||
return "T_" + special_terminals[term].upper()
|
return "T_" + special_terminals[term].upper()
|
||||||
return "T_" + term.upper()
|
return "T_" + re.sub('[^a-zA-Z0-9]', '_', term).upper()
|
||||||
return tok
|
return tok
|
||||||
|
|
||||||
def tok2regex(tok):
|
def tok2regex(tok):
|
||||||
|
|
@ -319,6 +319,7 @@ class Symbol:
|
||||||
|
|
||||||
def __init__(self, token, tp = None, rules = None):
|
def __init__(self, token, tp = None, rules = None):
|
||||||
self.reset(token, tp, rules)
|
self.reset(token, tp, rules)
|
||||||
|
self.set_is_payload(True)
|
||||||
|
|
||||||
def reset(self, token, tp = None, rules = None):
|
def reset(self, token, tp = None, rules = None):
|
||||||
if tp == None:
|
if tp == None:
|
||||||
|
|
@ -339,6 +340,9 @@ class Symbol:
|
||||||
self.rules = rules
|
self.rules = rules
|
||||||
self.set_type(tp)
|
self.set_type(tp)
|
||||||
|
|
||||||
|
def set_is_payload(self, onoff):
|
||||||
|
self.is_payload = onoff
|
||||||
|
|
||||||
def set_type(self, tp):
|
def set_type(self, tp):
|
||||||
if tp == p_ruleset:
|
if tp == p_ruleset:
|
||||||
self.term = None
|
self.term = None
|
||||||
|
|
@ -551,6 +555,8 @@ def grammar_get_types(grammar):
|
||||||
pp = grammar[c.token]
|
pp = grammar[c.token]
|
||||||
if pp.tp is p_terminal:
|
if pp.tp is p_terminal:
|
||||||
continue
|
continue
|
||||||
|
if not pp.is_payload:
|
||||||
|
continue
|
||||||
members.append(tok2sym(c.token))
|
members.append(tok2sym(c.token))
|
||||||
if True or len(members):
|
if True or len(members):
|
||||||
rules.append(members)
|
rules.append(members)
|
||||||
|
|
@ -1035,6 +1041,12 @@ def grammar_trim_symbols(grammar, symbols):
|
||||||
|
|
||||||
return grammar
|
return grammar
|
||||||
|
|
||||||
|
# flag symbols as non-payload
|
||||||
|
def grammar_irrelevant_symbols(grammar, symbols):
|
||||||
|
for s in symbols:
|
||||||
|
grammar[s].set_is_payload(False)
|
||||||
|
|
||||||
|
return grammar
|
||||||
def grammar_create_ebnf(grammar, opts):
|
def grammar_create_ebnf(grammar, opts):
|
||||||
indent = 40
|
indent = 40
|
||||||
slog(INFO, "creating ebnf from grammar of size", len(grammar))
|
slog(INFO, "creating ebnf from grammar of size", len(grammar))
|
||||||
|
|
@ -1050,6 +1062,9 @@ def grammar_create_ebnf(grammar, opts):
|
||||||
out += ' ' * indent + ' ;\n'
|
out += ' ' * indent + ' ;\n'
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def format_token(sym, tp):
|
||||||
|
return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
|
||||||
|
|
||||||
def grammar_create_y(grammar, opts):
|
def grammar_create_y(grammar, opts):
|
||||||
indent = ' ' * 40
|
indent = ' ' * 40
|
||||||
width = 0
|
width = 0
|
||||||
|
|
@ -1134,7 +1149,7 @@ def grammar_create_y(grammar, opts):
|
||||||
out += '\n'
|
out += '\n'
|
||||||
for t, p in grammar.iteritems():
|
for t, p in grammar.iteritems():
|
||||||
if p.tp == p_terminal:
|
if p.tp == p_terminal:
|
||||||
out += '%token <' + p.sym + '> ' + p.sym + (20 - len(p.sym)) * ' ' + '/* ' + t + ' */' +'\n'
|
out += format_token(p.sym, t) +'\n'
|
||||||
|
|
||||||
# special tokens
|
# special tokens
|
||||||
out += '\n'
|
out += '\n'
|
||||||
|
|
@ -1142,19 +1157,19 @@ def grammar_create_y(grammar, opts):
|
||||||
if p.tp == p_special:
|
if p.tp == p_special:
|
||||||
if p.token == '?': # TODO: why is this among the symbols anyway?
|
if p.token == '?': # TODO: why is this among the symbols anyway?
|
||||||
continue
|
continue
|
||||||
out += '%token <' + p.sym + '> ' + p.sym + (20 - len(p.sym)) * ' ' + '/* ' + t + ' */' +'\n'
|
out += format_token(p.sym, t) +'\n'
|
||||||
|
|
||||||
# regex tokens
|
# regex tokens
|
||||||
out += '\n'
|
out += '\n'
|
||||||
for t, p in grammar.iteritems():
|
for t, p in grammar.iteritems():
|
||||||
if p.tp == p_literal:
|
if p.tp == p_literal:
|
||||||
out += '%token <' + p.sym + '> ' + p.sym + (20 - len(p.sym)) * ' ' + '/* ' + t + ' */' +'\n'
|
out += format_token(p.sym, t) +'\n'
|
||||||
|
|
||||||
# types
|
# types
|
||||||
out += '\n'
|
out += '\n'
|
||||||
for t, p in grammar.iteritems():
|
for t, p in grammar.iteritems():
|
||||||
if p.tp == p_ruleset:
|
if p.tp == p_ruleset:
|
||||||
out += '%type <' + tok2sym(p.token) + '> ' + t + (40 - len(t)) * ' ' + '/* ' + t + ' */' +'\n'
|
out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
|
||||||
|
|
||||||
out += textwrap.dedent("""\
|
out += textwrap.dedent("""\
|
||||||
|
|
||||||
|
|
@ -1207,6 +1222,8 @@ def grammar_create_y(grammar, opts):
|
||||||
# continue
|
# continue
|
||||||
if p.tp not in [ p_ruleset ]:
|
if p.tp not in [ p_ruleset ]:
|
||||||
continue
|
continue
|
||||||
|
if not p.is_payload:
|
||||||
|
continue
|
||||||
tp = tok2name(c.token)
|
tp = tok2name(c.token)
|
||||||
suffix = ''
|
suffix = ''
|
||||||
if tokens.count(c.token) > 1:
|
if tokens.count(c.token) > 1:
|
||||||
|
|
@ -1499,6 +1516,7 @@ class GrammarCmd(jwutils.Cmd):
|
||||||
p.add_argument('-w', '--replace-whitespace', help='replace white space in tokens by underscore characters', action='store_true', default=False)
|
p.add_argument('-w', '--replace-whitespace', help='replace white space in tokens by underscore characters', action='store_true', default=False)
|
||||||
p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='')
|
p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='')
|
||||||
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
|
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
|
||||||
|
p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
|
||||||
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
|
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
|
||||||
return p
|
return p
|
||||||
|
|
||||||
|
|
@ -1518,12 +1536,14 @@ class GrammarCmd(jwutils.Cmd):
|
||||||
args.check_symbols = ''
|
args.check_symbols = ''
|
||||||
check_symbols = args.check_symbols.split()
|
check_symbols = args.check_symbols.split()
|
||||||
grammar_check(grammar, check_symbols)
|
grammar_check(grammar, check_symbols)
|
||||||
|
if args.replace_whitespace:
|
||||||
|
grammar = grammar_replace_whitespace(grammar)
|
||||||
if len(args.trim_symbols):
|
if len(args.trim_symbols):
|
||||||
grammar = grammar_trim_symbols(grammar, args.trim_symbols.split(','))
|
grammar = grammar_trim_symbols(grammar, args.trim_symbols.split(','))
|
||||||
if len(args.cut_symbols):
|
if len(args.cut_symbols):
|
||||||
grammar = grammar_cut_symbols(grammar, args.cut_symbols.split(','))
|
grammar = grammar_cut_symbols(grammar, args.cut_symbols.split(','))
|
||||||
if args.replace_whitespace:
|
if len(args.irrelevant_symbols):
|
||||||
grammar = grammar_replace_whitespace(grammar)
|
grammar = grammar_irrelevant_symbols(grammar, args.irrelevant_symbols.split(','))
|
||||||
return grammar
|
return grammar
|
||||||
|
|
||||||
# ------------------------------------------------- TODO: clean this up >
|
# ------------------------------------------------- TODO: clean this up >
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import syslog
|
||||||
import sys
|
import sys
|
||||||
import inspect
|
import inspect
|
||||||
from os.path import basename
|
from os.path import basename
|
||||||
|
import misc
|
||||||
|
|
||||||
EMERG = syslog.LOG_EMERG
|
EMERG = syslog.LOG_EMERG
|
||||||
ALERT = syslog.LOG_ALERT
|
ALERT = syslog.LOG_ALERT
|
||||||
|
|
@ -63,15 +64,6 @@ prio_colors = {
|
||||||
EMERG : [ CONSOLE_FONT_BOLD + CONSOLE_FONT_MAGENTA, CONSOLE_FONT_OFF ],
|
EMERG : [ CONSOLE_FONT_BOLD + CONSOLE_FONT_MAGENTA, CONSOLE_FONT_OFF ],
|
||||||
}
|
}
|
||||||
|
|
||||||
def __pad(token, total_size, right_align = False):
|
|
||||||
add = total_size - len(token)
|
|
||||||
if add <= 0:
|
|
||||||
return token
|
|
||||||
space = ' ' * add
|
|
||||||
if right_align:
|
|
||||||
return space + token
|
|
||||||
return token + space
|
|
||||||
|
|
||||||
def get_caller_pos(up = 1):
|
def get_caller_pos(up = 1):
|
||||||
assert(up == 1) # TODO: implement this
|
assert(up == 1) # TODO: implement this
|
||||||
caller_frame = inspect.currentframe().f_back.f_back
|
caller_frame = inspect.currentframe().f_back.f_back
|
||||||
|
|
@ -95,7 +87,7 @@ def slog(prio, *args, **kwargs): # export
|
||||||
name, line = kwargs['caller']
|
name, line = kwargs['caller']
|
||||||
else:
|
else:
|
||||||
name, line = get_caller_pos(1)
|
name, line = get_caller_pos(1)
|
||||||
msg += __pad(name, 20) + '[' + __pad(str(line), 4, True) + ']'
|
msg += misc.pad(name, 20) + '[' + misc.pad(str(line), 4, True) + ']'
|
||||||
|
|
||||||
if f_color in flags:
|
if f_color in flags:
|
||||||
color_on, color_off = prio_colors[prio]
|
color_on, color_off = prio_colors[prio]
|
||||||
|
|
|
||||||
|
|
@ -6,3 +6,12 @@ def silentremove(filename): #export
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
if e.errno != errno.ENOENT:
|
if e.errno != errno.ENOENT:
|
||||||
raise # re-raise exception if a different error occurred
|
raise # re-raise exception if a different error occurred
|
||||||
|
|
||||||
|
def pad(token, total_size, right_align = False):
|
||||||
|
add = total_size - len(token)
|
||||||
|
if add <= 0:
|
||||||
|
return token
|
||||||
|
space = ' ' * add
|
||||||
|
if right_align:
|
||||||
|
return space + token
|
||||||
|
return token + space
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue