mirror of
ssh://git.janware.com/srv/git/janware/proj/jw-python
synced 2026-01-15 01:52:56 +01:00
grammar.py and friends: Make list parsing run through
First time parsing doesn't error out with a syntax error. No usable AST is produced, strings are not returned from lexer, and AST lists aren't lists, really. TEXT:="Hello world!"; had to be excluded from the example, because I don't get how this could be parsed with the given syntax. There's a special sequence "all visible characters", but any lexer regex I could think of will also match the types defining "alphabetic character" and return the respective tokens (e.g. T_A) or vice-versa, depending on the order in the lexer input file. I suppose, the only sensible way to handle this, is to define "all visible characters" by defining the tokens for the missing characters, and then use them along T_A ... T_Z or their derived types. Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
parent
6ca2eeef61
commit
a3a8313ce8
6 changed files with 61 additions and 30 deletions
|
|
@ -1,10 +1,13 @@
|
|||
TOPDIR = ../..
|
||||
|
||||
-include local.mk
|
||||
|
||||
EXE_ARGS ?= grammartest.code
|
||||
PREREQ_BUILD += ytools
|
||||
FB_NAME = grammartest
|
||||
NAMESPACE_IN_GENERATED = gt
|
||||
GENERATE_CONFIG_FILE = generate.conf
|
||||
IRRELEVANT_SYMBOLS ?= white_space
|
||||
|
||||
include $(TOPDIR)/make/proj.mk
|
||||
include $(TOPDIR)/make/generate-flex-bison.mk
|
||||
|
|
|
|||
|
|
@ -1,8 +1,18 @@
|
|||
[symbols]
|
||||
|
||||
[white_space[
|
||||
type = token
|
||||
regex = "[ \n\t\r]+"
|
||||
]
|
||||
|
||||
[all_characters[
|
||||
type = non-terminal
|
||||
regex = "[[:print:]]"
|
||||
#lex_as = yytext[0]
|
||||
]
|
||||
|
||||
[test[
|
||||
type = token
|
||||
dings = bums
|
||||
regex = "bumsdings"
|
||||
]
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
program = 'PROGRAM', white space, identifier, white space,
|
||||
'BEGIN', white space,
|
||||
{ assignment, ";", white space },
|
||||
'END.' ;
|
||||
'END.', [ white space ];
|
||||
identifier = alphabetic character, { alphabetic character | digit } ;
|
||||
number = [ "-" ], digit, { digit } ;
|
||||
string = '"' , { all characters }, '"' ;
|
||||
|
|
|
|||
|
|
@ -6,5 +6,4 @@ BEGIN
|
|||
C:=A;
|
||||
D123:=B34A;
|
||||
BABOON:=GIRAFFE;
|
||||
TEXT:="Hello world!";
|
||||
END.
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ mode_keep = "keep"
|
|||
mode_discard = "discard"
|
||||
fix_extensions_mode = [ mode_unroll, mode_concat, mode_keep, mode_discard ]
|
||||
|
||||
c_token = "token"
|
||||
c_non_terminal = "non-terminal"
|
||||
|
||||
member_prefix = ''
|
||||
|
||||
special_terminals = {
|
||||
|
|
@ -638,23 +641,25 @@ def grammar_unroll_lists(grammar):
|
|||
if c.token == '}':
|
||||
if len(listrule) == 0:
|
||||
raise Exception("Rule of production", p.name, "contains empty list:", format_rule(rule))
|
||||
name = ""
|
||||
delpos = []
|
||||
name = "list"
|
||||
for i, rule in enumerate(listrule):
|
||||
if rule.token in delimiters:
|
||||
delpos.append(i)
|
||||
continue
|
||||
if rule.tp != t_target_lang:
|
||||
continue
|
||||
name += tok2name(rule.token) + "_"
|
||||
name += "_" + tok2name(rule.token)
|
||||
|
||||
# not really: there are lists without delimiters, too
|
||||
#if len(delpos) != 1:
|
||||
# p.dump(ERR)
|
||||
# raise Exception("need exactly one delimiter in list rule:", format_rule(listrule))
|
||||
|
||||
name = name + "my_list"
|
||||
newrule.append(RuleComp(name, t_target_lang))
|
||||
listrule.insert(0, RuleComp('(', t_grammar))
|
||||
listrule.insert(0, RuleComp(name, t_target_lang)) # enable iteration
|
||||
listrule.append(RuleComp(')', t_grammar))
|
||||
p = Symbol(name, rules=[[], listrule])
|
||||
#p = Symbol(name)
|
||||
#p.rules = [ [], listrule ]
|
||||
|
|
@ -1065,7 +1070,9 @@ def grammar_create_ebnf(grammar, opts):
|
|||
out += ' ' * indent + ' ;\n'
|
||||
return out
|
||||
|
||||
def tokens_from_config(conf):
|
||||
def symbols_from_config(conf, types = None):
|
||||
if types == None or types == "all":
|
||||
types = [ c_token, c_non_terminal ]
|
||||
r = set()
|
||||
if conf is None:
|
||||
return r
|
||||
|
|
@ -1073,7 +1080,7 @@ def tokens_from_config(conf):
|
|||
if symbols is None:
|
||||
return r
|
||||
for k, v in symbols.iteritems():
|
||||
if v.get('regex') is not None:
|
||||
if v["type"].value() in types:
|
||||
r.add(k)
|
||||
return r
|
||||
|
||||
|
|
@ -1094,7 +1101,7 @@ def grammar_create_y(grammar, opts):
|
|||
indent = '\t' * (spaces / 8)
|
||||
|
||||
conf = opts['config']
|
||||
tokens = tokens_from_config(conf)
|
||||
conf_tokens = symbols_from_config(conf, [ c_token, c_non_terminal ])
|
||||
|
||||
out = ""
|
||||
|
||||
|
|
@ -1144,7 +1151,7 @@ def grammar_create_y(grammar, opts):
|
|||
|
||||
types = grammar_get_types(grammar)
|
||||
for t in types.keys():
|
||||
if t in tokens:
|
||||
if conf is not None and t in conf['symbols'].keys():
|
||||
continue
|
||||
out += '\n\t' + opts['namespace'] + '::' + t + '_t *' + t + ';'
|
||||
out += '\n'
|
||||
|
|
@ -1183,6 +1190,7 @@ def grammar_create_y(grammar, opts):
|
|||
out += format_token(p.sym, t) +'\n'
|
||||
|
||||
# tokens from config
|
||||
if conf is not None:
|
||||
for k, t in conf['symbols'].iteritems():
|
||||
slog(NOTICE, "adding token", k)
|
||||
out += format_token(k, "blah") + '\n'
|
||||
|
|
@ -1190,7 +1198,7 @@ def grammar_create_y(grammar, opts):
|
|||
# types
|
||||
out += '\n'
|
||||
for t, p in grammar.iteritems():
|
||||
if p.sym in conf['symbols'].keys():
|
||||
if conf is not None and p.sym in conf['symbols'].keys():
|
||||
continue
|
||||
if p.tp == p_ruleset:
|
||||
out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
|
||||
|
|
@ -1212,7 +1220,7 @@ def grammar_create_y(grammar, opts):
|
|||
continue
|
||||
if p.tp == p_special:
|
||||
continue
|
||||
if p.sym in conf['symbols'].keys():
|
||||
if conf is not None and p.sym in conf['symbols'].keys():
|
||||
continue
|
||||
slog(INFO, "creating production for symbol", p.str())
|
||||
|
||||
|
|
@ -1240,10 +1248,12 @@ def grammar_create_y(grammar, opts):
|
|||
tokens.append(c.token)
|
||||
idx = 0
|
||||
for c in rule:
|
||||
n += 1
|
||||
if c.tp == t_grammar:
|
||||
s.update(c.token, 0)
|
||||
continue
|
||||
if c.token in tokens:
|
||||
continue
|
||||
n += 1
|
||||
p = grammar[c.token]
|
||||
#if is_terminal(c.token) is not None:
|
||||
# continue
|
||||
|
|
@ -1327,8 +1337,15 @@ def grammar_create_l(grammar, opts):
|
|||
assert p.term[-1] in [ '"', "'" ], p.term
|
||||
out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'
|
||||
|
||||
if conf is not None:
|
||||
for k, v in conf['symbols'].iteritems():
|
||||
out += v['regex'].value() + ' { slog(PRI_NOTICE, "found regex ' + k + '"); return ' + k + '; }\n'
|
||||
lex_as = v.get('lex_as')
|
||||
if lex_as is not None:
|
||||
retval = lex_as.value()
|
||||
else:
|
||||
retval = k
|
||||
regex = v['regex'].value()
|
||||
out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + k + '"); return ' + retval + '; }\n'
|
||||
|
||||
#out += textwrap.dedent("""\
|
||||
#
|
||||
|
|
@ -1464,7 +1481,7 @@ def grammar_create_l(grammar, opts):
|
|||
def grammar_create_h(grammar, opts):
|
||||
out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
|
||||
ns = opts['namespace']
|
||||
tokens = tokens_from_config(opts['config'])
|
||||
tokens = symbols_from_config(opts['config'], "all")
|
||||
|
||||
if ns is not None:
|
||||
out += 'namespace ' + ns + '{\n\n'
|
||||
|
|
@ -1508,6 +1525,8 @@ def grammar_create_h(grammar, opts):
|
|||
if rule.count(m) > 1:
|
||||
idx += 1
|
||||
suffix = '_' + str(idx)
|
||||
if m in tokens:
|
||||
continue
|
||||
p = grammar[m]
|
||||
out += '\n\t\t' + p.datatype + ' *' + member_prefix + m + suffix + ';'
|
||||
out += '\n\t};'
|
||||
|
|
@ -1575,10 +1594,10 @@ class GrammarCmd(jwutils.Cmd):
|
|||
if args.fix_extensions not in fix_extensions_mode:
|
||||
raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions")
|
||||
grammar = grammar_fix_extensions(grammar, args.fix_extensions)
|
||||
if args.unroll_alternatives:
|
||||
grammar = grammar_unroll_alternatives(grammar)
|
||||
if args.unroll_lists:
|
||||
grammar = grammar_unroll_lists(grammar)
|
||||
if args.unroll_alternatives:
|
||||
grammar = grammar_unroll_alternatives(grammar)
|
||||
if args.unroll_options:
|
||||
grammar = grammar_unroll_options(grammar)
|
||||
if len(args.check_symbols):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue