grammar.py: Add support for --irrelevant-symbols

Arguments to --irrelevant-symbols are not meant to be represented in the
AST resulting from parsing.

Also, add pad() to misc.py.

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2017-10-30 10:00:25 +01:00
commit 8c5934725c
4 changed files with 43 additions and 19 deletions

View file

@ -4,8 +4,9 @@ NAMESPACE_IN_GENERATED ?= $(FB_NAME)
# These types are meant to be cut off the tree and turned into hand coded flex # These types are meant to be cut off the tree and turned into hand coded flex
# regexes # regexes
#TRIM_SYMBOLS = blah
TRIM_SYMBOLS ?= TRIM_SYMBOLS ?=
CUT_SYMBOLS ?=
IRRELEVANT_SYMBOLS ?=
GENERATE_LOG_LEVEL ?= notice GENERATE_LOG_LEVEL ?= notice
FIX_EXTENSIONS ?= discard FIX_EXTENSIONS ?= discard
CHECK_SYMBOLS ?= all CHECK_SYMBOLS ?= all
@ -24,6 +25,8 @@ GENERATE = python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL
--replace-whitespace \ --replace-whitespace \
$(OPT_CHECK_SYMBOLS) \ $(OPT_CHECK_SYMBOLS) \
--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \ --trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/ */,/g') \
--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/ */,/g') \
--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/ */,/g') \
--namespace=$(NAMESPACE_IN_GENERATED) \ --namespace=$(NAMESPACE_IN_GENERATED) \
--includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/ */,/g') \ --includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/ */,/g') \
$(CREATE_EXTRA_ARGS) $(CREATE_EXTRA_ARGS)

View file

@ -138,7 +138,7 @@ def tok2sym(tok):
if term is not None: if term is not None:
if term in special_terminals.keys(): if term in special_terminals.keys():
return "T_" + special_terminals[term].upper() return "T_" + special_terminals[term].upper()
return "T_" + term.upper() return "T_" + re.sub('[^a-zA-Z0-9]', '_', term).upper()
return tok return tok
def tok2regex(tok): def tok2regex(tok):
@ -319,6 +319,7 @@ class Symbol:
def __init__(self, token, tp = None, rules = None): def __init__(self, token, tp = None, rules = None):
self.reset(token, tp, rules) self.reset(token, tp, rules)
self.set_is_payload(True)
def reset(self, token, tp = None, rules = None): def reset(self, token, tp = None, rules = None):
if tp == None: if tp == None:
@ -339,6 +340,9 @@ class Symbol:
self.rules = rules self.rules = rules
self.set_type(tp) self.set_type(tp)
def set_is_payload(self, onoff):
self.is_payload = onoff
def set_type(self, tp): def set_type(self, tp):
if tp == p_ruleset: if tp == p_ruleset:
self.term = None self.term = None
@ -551,6 +555,8 @@ def grammar_get_types(grammar):
pp = grammar[c.token] pp = grammar[c.token]
if pp.tp is p_terminal: if pp.tp is p_terminal:
continue continue
if not pp.is_payload:
continue
members.append(tok2sym(c.token)) members.append(tok2sym(c.token))
if True or len(members): if True or len(members):
rules.append(members) rules.append(members)
@ -1035,6 +1041,12 @@ def grammar_trim_symbols(grammar, symbols):
return grammar return grammar
# flag symbols as non-payload
def grammar_irrelevant_symbols(grammar, symbols):
for s in symbols:
grammar[s].set_is_payload(False)
return grammar
def grammar_create_ebnf(grammar, opts): def grammar_create_ebnf(grammar, opts):
indent = 40 indent = 40
slog(INFO, "creating ebnf from grammar of size", len(grammar)) slog(INFO, "creating ebnf from grammar of size", len(grammar))
@ -1050,6 +1062,9 @@ def grammar_create_ebnf(grammar, opts):
out += ' ' * indent + ' ;\n' out += ' ' * indent + ' ;\n'
return out return out
def format_token(sym, tp):
return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
def grammar_create_y(grammar, opts): def grammar_create_y(grammar, opts):
indent = ' ' * 40 indent = ' ' * 40
width = 0 width = 0
@ -1134,7 +1149,7 @@ def grammar_create_y(grammar, opts):
out += '\n' out += '\n'
for t, p in grammar.iteritems(): for t, p in grammar.iteritems():
if p.tp == p_terminal: if p.tp == p_terminal:
out += '%token <' + p.sym + '> ' + p.sym + (20 - len(p.sym)) * ' ' + '/* ' + t + ' */' +'\n' out += format_token(p.sym, t) +'\n'
# special tokens # special tokens
out += '\n' out += '\n'
@ -1142,19 +1157,19 @@ def grammar_create_y(grammar, opts):
if p.tp == p_special: if p.tp == p_special:
if p.token == '?': # TODO: why is this among the symbols anyway? if p.token == '?': # TODO: why is this among the symbols anyway?
continue continue
out += '%token <' + p.sym + '> ' + p.sym + (20 - len(p.sym)) * ' ' + '/* ' + t + ' */' +'\n' out += format_token(p.sym, t) +'\n'
# regex tokens # regex tokens
out += '\n' out += '\n'
for t, p in grammar.iteritems(): for t, p in grammar.iteritems():
if p.tp == p_literal: if p.tp == p_literal:
out += '%token <' + p.sym + '> ' + p.sym + (20 - len(p.sym)) * ' ' + '/* ' + t + ' */' +'\n' out += format_token(p.sym, t) +'\n'
# types # types
out += '\n' out += '\n'
for t, p in grammar.iteritems(): for t, p in grammar.iteritems():
if p.tp == p_ruleset: if p.tp == p_ruleset:
out += '%type <' + tok2sym(p.token) + '> ' + t + (40 - len(t)) * ' ' + '/* ' + t + ' */' +'\n' out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
out += textwrap.dedent("""\ out += textwrap.dedent("""\
@ -1207,6 +1222,8 @@ def grammar_create_y(grammar, opts):
# continue # continue
if p.tp not in [ p_ruleset ]: if p.tp not in [ p_ruleset ]:
continue continue
if not p.is_payload:
continue
tp = tok2name(c.token) tp = tok2name(c.token)
suffix = '' suffix = ''
if tokens.count(c.token) > 1: if tokens.count(c.token) > 1:
@ -1499,6 +1516,7 @@ class GrammarCmd(jwutils.Cmd):
p.add_argument('-w', '--replace-whitespace', help='replace white space in tokens by underscore characters', action='store_true', default=False) p.add_argument('-w', '--replace-whitespace', help='replace white space in tokens by underscore characters', action='store_true', default=False)
p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='') p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='')
p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='') p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='') p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
return p return p
@ -1518,12 +1536,14 @@ class GrammarCmd(jwutils.Cmd):
args.check_symbols = '' args.check_symbols = ''
check_symbols = args.check_symbols.split() check_symbols = args.check_symbols.split()
grammar_check(grammar, check_symbols) grammar_check(grammar, check_symbols)
if args.replace_whitespace:
grammar = grammar_replace_whitespace(grammar)
if len(args.trim_symbols): if len(args.trim_symbols):
grammar = grammar_trim_symbols(grammar, args.trim_symbols.split(',')) grammar = grammar_trim_symbols(grammar, args.trim_symbols.split(','))
if len(args.cut_symbols): if len(args.cut_symbols):
grammar = grammar_cut_symbols(grammar, args.cut_symbols.split(',')) grammar = grammar_cut_symbols(grammar, args.cut_symbols.split(','))
if args.replace_whitespace: if len(args.irrelevant_symbols):
grammar = grammar_replace_whitespace(grammar) grammar = grammar_irrelevant_symbols(grammar, args.irrelevant_symbols.split(','))
return grammar return grammar
# ------------------------------------------------- TODO: clean this up > # ------------------------------------------------- TODO: clean this up >

View file

@ -3,6 +3,7 @@ import syslog
import sys import sys
import inspect import inspect
from os.path import basename from os.path import basename
import misc
EMERG = syslog.LOG_EMERG EMERG = syslog.LOG_EMERG
ALERT = syslog.LOG_ALERT ALERT = syslog.LOG_ALERT
@ -63,15 +64,6 @@ prio_colors = {
EMERG : [ CONSOLE_FONT_BOLD + CONSOLE_FONT_MAGENTA, CONSOLE_FONT_OFF ], EMERG : [ CONSOLE_FONT_BOLD + CONSOLE_FONT_MAGENTA, CONSOLE_FONT_OFF ],
} }
def __pad(token, total_size, right_align = False):
add = total_size - len(token)
if add <= 0:
return token
space = ' ' * add
if right_align:
return space + token
return token + space
def get_caller_pos(up = 1): def get_caller_pos(up = 1):
assert(up == 1) # TODO: implement this assert(up == 1) # TODO: implement this
caller_frame = inspect.currentframe().f_back.f_back caller_frame = inspect.currentframe().f_back.f_back
@ -95,7 +87,7 @@ def slog(prio, *args, **kwargs): # export
name, line = kwargs['caller'] name, line = kwargs['caller']
else: else:
name, line = get_caller_pos(1) name, line = get_caller_pos(1)
msg += __pad(name, 20) + '[' + __pad(str(line), 4, True) + ']' msg += misc.pad(name, 20) + '[' + misc.pad(str(line), 4, True) + ']'
if f_color in flags: if f_color in flags:
color_on, color_off = prio_colors[prio] color_on, color_off = prio_colors[prio]

View file

@ -6,3 +6,12 @@ def silentremove(filename): #export
except OSError as e: except OSError as e:
if e.errno != errno.ENOENT: if e.errno != errno.ENOENT:
raise # re-raise exception if a different error occurred raise # re-raise exception if a different error occurred
def pad(token, total_size, right_align = False):
add = total_size - len(token)
if add <= 0:
return token
space = ' ' * add
if right_align:
return space + token
return token + space