From c2c409ed4b87b2a9fbb6c38c3b843f0f1838a5f0 Mon Sep 17 00:00:00 2001
From: Jan Lindemann <jan@janware.com>
Date: Sun, 12 Nov 2017 16:08:26 +0100
Subject: [PATCH] Move grammar-related stuff into package jw-grammar

Signed-off-by: Jan Lindemann <jan@janware.com>
---
 make/generate-flex-bison.mk         |   83 --
 test/grammar/Makefile               |   14 -
 test/grammar/generate.conf          |   20 -
 test/grammar/generate.py            |    6 -
 test/grammar/grammartest-input.ebnf |   16 -
 test/grammar/grammartest.code       |    9 -
 test/grammar/main.cpp               |   30 -
 tools/python/jwutils/grammar.py     | 1826 ---------------------------
 8 files changed, 2004 deletions(-)
 delete mode 100644 make/generate-flex-bison.mk
 delete mode 100644 test/grammar/Makefile
 delete mode 100644 test/grammar/generate.conf
 delete mode 100644 test/grammar/generate.py
 delete mode 100644 test/grammar/grammartest-input.ebnf
 delete mode 100644 test/grammar/grammartest.code
 delete mode 100644 test/grammar/main.cpp
 delete mode 100644 tools/python/jwutils/grammar.py

diff --git a/make/generate-flex-bison.mk b/make/generate-flex-bison.mk
deleted file mode 100644
index 6f3f20c..0000000
--- a/make/generate-flex-bison.mk
+++ /dev/null
@@ -1,83 +0,0 @@
-NAMESPACE_IN_GENERATED ?= $(FB_NAME)
-
-# These types are meant to be cut off the tree and turned into hand coded flex
-# regexes
-TRIM_SYMBOLS         ?=
-CUT_SYMBOLS          ?=
-IRRELEVANT_SYMBOLS   ?=
-GENERATE_LOG_LEVEL   ?= notice
-FIX_EXTENSIONS       ?= discard
-CHECK_SYMBOLS        ?= all
-ifneq ($(CHECK_SYMBOLS),)
-  OPT_CHECK_SYMBOLS  ?= --check-symbols='$(CHECK_SYMBOLS)'
-endif
-
-ifneq ($(GENERATE_CONFIG_FILE),)
-  OPT_CONFIG_FILE    ?= --config-file=$(GENERATE_CONFIG_FILE)
-endif
-
-GENERATED_STD          += $(FB_NAME).l $(FB_NAME).y $(FB_NAME).ebnf $(FB_COMMON_H)
-GENERATED              += $(FB_NAME)-dense.ebnf $(GENERATED_STD)
-GRAMMAR_INPUT        ?= $(FB_NAME)-input.ebnf
-FB_NAME_PREFIX       ?= $(FB_NAME)_
-FB_HDRDIR            ?= include
-FB_BISON_OUT_EXT     ?= cpp
-FB_FLEX_OUT_EXT      ?= cpp
-FB_CASE_INSENSITIVE  ?= true
-FB_SRC               ?= $(filter %.y %.l,$(GENERATED))
-FB_COMMON_H          ?= $(FB_HDRDIR)/$(FB_NAME).h
-
-INCLUDED_BY_GENERATED  += $(FB_COMMON_H) include/lex.$(FB_NAME).h include/$(FB_NAME).tab.h
-
-GENERATE_PY          ?= ./generate.py
-GENERATE             ?= python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create \
-			--fix-extensions $(FIX_EXTENSIONS) \
-			--unroll-lists \
-			--unroll-options \
-			--unroll-alternatives \
-			--replace-whitespace \
-			$(OPT_CHECK_SYMBOLS) \
-			$(OPT_CONFIG_FILE) \
-			--trim-symbols=$(shell echo $(TRIM_SYMBOLS) | sed 's/  */,/g') \
-			--cut-symbols=$(shell echo $(CUT_SYMBOLS) | sed 's/  */,/g') \
-			--irrelevant-symbols=$(shell echo $(IRRELEVANT_SYMBOLS) | sed 's/  */,/g') \
-			--namespace=$(NAMESPACE_IN_GENERATED) \
-			--includes=$(shell echo $(INCLUDED_BY_GENERATED) | sed 's/  */,/g') \
-			$(CREATE_EXTRA_ARGS)
-include $(TOPDIR)/make/proj.mk
-include $(MODDIR)/make/flex-bison.mk
-include $(MODDIR)/make/py-defs.mk
-
-all:
-debug-all:
-	GENERATE_LOG_LEVEL=debug make all 2>&1 | tee run.out
-
-generate: $(GENERATED)
-#$(FB_NAME).y: $(FB_COMMON_H) lex.$(FB_NAME).$(FB_FLEX_OUT_EXT)
-#lex.$(FB_NAME).$(FB_FLEX_OUT_EXT): $(FB_NAME).l
-
-check: $(GRAMMAR_INPUT) $(GENERATE_PY) Makefile
-	python ./$(GENERATE_PY) --log-level info check --fix-extensions unroll --unroll-lists --unroll-options $(OPT_CHECK_SYMBOLS) $<
-
-$(FB_NAME)-dense.ebnf: $(GRAMMAR_INPUT) $(GENERATE_PY)
-	python ./$(GENERATE_PY) --log-level $(GENERATE_LOG_LEVEL) create --fix-extensions keep $< $(FB_NAME).ebnf > $@.tmp
-	mv $@.tmp $@
-
-define generate_rule
-$(1): $$(GRAMMAR_INPUT) $$(GENERATE_PY) Makefile $(GENERATE_CONFIG_FILE)
-	$$(GENERATE) $$< $$(patsubst $(FB_NAME).%,$(FB_NAME).%,$$@) > $$@.tmp
-	mv $$@.tmp $$@
-endef
-$(foreach target,$(GENERATED_STD),$(eval $(call generate_rule,$(target))))
-
-clean.generated:
-	rm -f $(GENERATED)
-clean: clean.generated
-echo-generated:
-	@echo GENERATED = $(GENERATED)
-
-help:
-	$(GENERATE) --help
-
-expand-macros:
-	make 2>/dev/null | sed '/g++/ !d; s/g++\|gcc//; s/-o .*//' | xargs g++ -E -C | indent
diff --git a/test/grammar/Makefile b/test/grammar/Makefile
deleted file mode 100644
index 8c57a37..0000000
--- a/test/grammar/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-TOPDIR = ../..
-
--include local.mk
-
-EXE_ARGS              ?= grammartest.code
-PREREQ_BUILD          += ytools
-FB_NAME                = grammartest
-NAMESPACE_IN_GENERATED = gt
-GENERATE_CONFIG_FILE   = generate.conf
-IRRELEVANT_SYMBOLS    ?= white_space
-
-include $(TOPDIR)/make/proj.mk
-include $(TOPDIR)/make/generate-flex-bison.mk
-include $(MODDIR)/make/exe.mk
diff --git a/test/grammar/generate.conf b/test/grammar/generate.conf
deleted file mode 100644
index 8bc1e28..0000000
--- a/test/grammar/generate.conf
+++ /dev/null
@@ -1,20 +0,0 @@
-[symbols]
-
-  [white_space[
-    type = token
-    lex_extra_action = "if memchr(yytext, '\n', yyleng) context->line++;"
-    regex = "[ \n\t\r]+" 
-  ]
-
-  [all_characters[
-    type = non-terminal
-    regex = "[[:print:]]"
-    #lex_as = yytext[0]
-  ]
-
-  [test[
-    type = token
-    dings = bums
-    regex = "bumsdings"
-  ]
-
diff --git a/test/grammar/generate.py b/test/grammar/generate.py
deleted file mode 100644
index c2f7b18..0000000
--- a/test/grammar/generate.py
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import jwutils
-
-jwutils.run_sub_commands('generate Test parser files', modules = ['jwutils.grammar'])
diff --git a/test/grammar/grammartest-input.ebnf b/test/grammar/grammartest-input.ebnf
deleted file mode 100644
index e06d923..0000000
--- a/test/grammar/grammartest-input.ebnf
+++ /dev/null
@@ -1,16 +0,0 @@
- (* a simple program syntax in EBNF − Wikipedia *)
- program = 'PROGRAM', white space, identifier, white space, 
-            'BEGIN', white space, 
-            { assignment, ";", white space }, 
-            'END.', [ white space ];
- identifier = alphabetic character, { alphabetic character | digit } ;
- number = [ "-" ], digit, { digit } ;
- string = '"' , { all characters }, '"' ;
- assignment = identifier , ":=" , ( number | identifier | string ) ;
- alphabetic character = "A" | "B" | "C" | "D" | "E" | "F" | "G"
-                      | "H" | "I" | "J" | "K" | "L" | "M" | "N"
-                      | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
-                      | "V" | "W" | "X" | "Y" | "Z" ;
- digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
- white space = ? white space characters ? ;
- all characters = ? all visible characters ? ;
diff --git a/test/grammar/grammartest.code b/test/grammar/grammartest.code
deleted file mode 100644
index b6f8c4d..0000000
--- a/test/grammar/grammartest.code
+++ /dev/null
@@ -1,9 +0,0 @@
-PROGRAM DEMO1
-BEGIN
-  A:=3;
-  B:=45;
-  H:=-100023;
-  C:=A;
-  D123:=B34A;
-  BABOON:=GIRAFFE;
-END.
diff --git a/test/grammar/main.cpp b/test/grammar/main.cpp
deleted file mode 100644
index 719f520..0000000
--- a/test/grammar/main.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <string>
-
-#include <slog.h>
-#include <YMisc.h>
-
-#include "include/grammartest.h"
-
-using namespace std;
-
-int main(int argc, const char *argv[])
-{
-	if (argc < 2) {
-		fprintf(stderr, "usage: %s input-file\n", filenotdir(argv[0]));
-		return 1;
-	}
-
-	const char *path = argv[1];
-	string content;
-	if (YMisc::suck_in_file(path, content)<0) {
-		slog(PRI_ERR, "failed to read [%s] (%s)", path, err());
-		return 1;
-	}
-
-	if (FB_SYM(create_ast)(content.c_str())<0) {
-		slog(PRI_ERR, "failed to create AST from [%s] (%s)", path, err());
-		return 1;
-	}
-
-	return 0;
-}
diff --git a/tools/python/jwutils/grammar.py b/tools/python/jwutils/grammar.py
deleted file mode 100644
index c775e65..0000000
--- a/tools/python/jwutils/grammar.py
+++ /dev/null
@@ -1,1826 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import argparse
-import sys
-import re
-import lxml.etree as ET
-import textwrap
-import itertools
-import copy
-from collections import OrderedDict
-from abc import abstractmethod
-import os.path
-
-import jwutils
-#from jwutils.stree import StringTree, serdes
-import jwutils.stree.serdes as serdes
-import jwutils.stree.StringTree as StringTree
-
-from jwutils.log import *
-
-t_grammar      = "grammar"
-t_target_lang  = "target"
-
-p_ruleset  = "ruleset"
-p_terminal = "term"
-p_literal  = "literal"
-p_lexical  = "lexical"
-p_special  = "special"
-p_regex    = "regex"
-
-mode_unroll  = "unroll"
-mode_concat  = "concat"
-mode_keep    = "keep"
-mode_discard = "discard"
-fix_extensions_mode = [ mode_unroll, mode_concat, mode_keep, mode_discard ]
-
-c_token      = "token"
-c_non_terminal = "non-terminal"
-
-member_prefix = ''
-
-special_terminals = {
-    "`"   : "BACKTICK",
-    "^"   : "CARET",
-    "<"   : "LT",
-    "<<"  : "LEFT_SHIFT",
-    "<="  : "LTE",
-    "<=>" : "SPACE_SHIP",
-    "<>"  : "NE",
-    "="   : "EQ",
-    "=>"  : "EG",
-    ">"   : "GT",
-    ">="  : "GE",
-    ">>"  : "RIGHT_SHIFT",
-    "|"   : "PIPE",
-    "_"   : "UNDERSCORE",
-    ","   : "COMMA",
-    ";"   : "SEMICOLON",
-    ":"   : "COLON",
-    ":="  : "DEFINE",
-    "?"   : "QM",
-    "?<"  : "QM_LT",
-    "?<=" : "QM_LE",
-    "?="  : "QM_EQ",
-    "?>"  : "QM_GT",
-    "?>=" : "QM_GE",
-    "??"  : "QM_QM",
-    "?/=" : "QM_DIV_EQ",
-    "/"   : "DIV",
-    "/="  : "DIV_EQ",
-    "."   : "DOT",
-    "\""  : "DQUOTE",
-    "'"   : "QUOTE",
-    "("   : "LPAREN",
-    ")"   : "RPAREN",
-    "["   : "LBRACKET",
-    "]"   : "RBRACKET",
-    "@"   : "AT",
-    "*"   : "ASTERISK",
-    "**"  : "DASTERISK",
-    "\\"  : "BACKSLASH",
-    "&"   : "AMPERSAND",
-    "#"   : "NUMBER_SIGN",
-    "+"   : "PLUS",
-    "-"   : "MINUS"
-}
-
-token_regexes = {
-    "PSL_Property_Declaration"   : "property[ \t]+[^;]+;",
-    "PSL_Sequence_Declaration"   : "sequence[ \t]+[^;]+;",
-    "PSL_Clock_Declaration"      : "default[ \t]+clock[ \t]+[^;]+;",
-    "PSL_Directive"              : "([^;]+:)*(assert|assume|restrict|restrict!|cover|fairness|strong_fairness)[ \t]+[^;]+;",
-    "PSL_Verification_Unit"      : "(vunit|vpkg|vprop|vmode)[^{]*{[^}]*}",
-}
-
-quotechars = [ '"', "'" ]
-
-def dump(obj):
-  for c, v in obj.iteritems():
-    slog(INFO, "obj.%s = %s (=> %s)" % (str(type(c)), str(c), str(v)))
-
-def dump_grammar(prio, grammar):
-    caller = get_caller_pos()
-    for t, p in grammar.iteritems():
-        p.dump(prio, caller=caller)
-
-def cleanup_token(tok):
-    tok = tok.strip()
-    if len(tok) == 0:
-        return None
-    if tok[0] == "'" and tok[-1] == "'":
-        tok = '"' + tok[1:-1] + '"'
-    return tok
-
-def tok2ctype(tok):
-    if tok in [ '{', '}', '[', ']', '<', '>', '(', ')', '?', '|' ]:
-        return t_grammar
-    return t_target_lang
-
-def is_terminal(tok):
-    size = len(tok)
-    if size < 2:
-        return None
-    first = tok[0]
-    last = tok[-1]
-    if (not first in quotechars) and (not last in quotechars):
-        return None
-    if first != last:
-        raise Exception('Token >"' + tok + '"< isn\'t symmetrically enclosed in quotes')
-    return tok[1:-1]
-
-def tok2name(tok):
-    tok = cleanup_token(tok)
-    term = is_terminal(tok)
-    if term is not None:
-        if term in special_terminals.keys():
-           return special_terminals[term]
-        return term
-    return tok
-
-def tok2sym(tok):
-    tok = cleanup_token(tok)
-    term = is_terminal(tok)
-    if term is not None:
-        if term in special_terminals.keys():
-           return "T_" + special_terminals[term].upper()
-        return "T_" + re.sub('[^a-zA-Z0-9]', '_', term).upper()
-    return tok
-
-def tok2regex(tok):
-    if tok in token_regexes.keys():
-        return token_regexes[tok]
-    return re.escape(tok)
-
-def format_rule(rule):
-    return ' '.join(c.str() for c in rule)
-
-def format_rules(rules):
-    return ', '.join(format_rule(rule) for rule in rules)
-
-def format_ebnf_rule(grammar, rule):
-    r = ""
-    last = None
-    for comp in rule:
-        if last is not None:
-            if comp.tp == t_grammar:
-                if last.tp == t_grammar:
-                    pass
-                else:
-                    if comp.token in [ '[', '(', '{', '<' ]:
-                        r += ','
-            else:
-                if last.tp == t_grammar:
-                    if comp.token in [ ']', ')', '}', '>' ]:
-                        r += ','
-                else:
-                        r += ','
-        r += ' ' + comp.token
-        last = comp
-    if len(r) == 0:
-        return r
-    return r.strip()
-
-def format_yacc_rule(rule):
-    r = ''
-    for c in rule:
-        if c.tp != t_target_lang:
-            slog(DEBUG, "ignoring non-target-language token", c.token, "in rule")
-            continue
-        r += tok2sym(c.token) + ' '
-    return r[:-1]
-
-class SourceElement:
-
-    def __init__(self, token, line):
-        self.token = token
-        self.line = line
-
-class RuleComp:
-
-    def __init__(self, token, tp = None, line=-1):
-        assert(token is not None)
-        # assert(token != '|')
-        self.token = token
-        if tp is None:
-            tp = tok2ctype(token)
-        self.tp = tp
-        slog(INFO, "creating rule component >" + self.str() + "<")
-        assert(token != "{ assignment")
-        self.line = line
-
-    def __eq__(self, rhs):
-        if self.token != rhs.token:
-            return False
-        if self.tp != rhs.tp:
-            return False
-        return True
-
-    def __ne__(self, rhs):
-        return not self.__eq__(rhs)
-
-    def str(self):
-        tp = 'u'
-        if self.tp == t_grammar:
-            tp = 'g'
-        elif self.tp == t_target_lang:
-            tp = 'l'
-        else:
-            tp = self.tp
-        return "{" + tp + ": " + self.token + "}"
-
-class State:
-
-    def __init__(self):
-        self.__pair_square = ['[', ']']
-        self.__pair_curly = ['{', '}']
-        self.__pair_ext = ['<', '>']
-        self.__pair_group = ['(', ')']
-        self.__pair_comment = ['(*', '*)']
-        self.__pair_special = ['?', '?']
-        self.reset() 
-
-    def reset(self):
-        self.curly = 0
-        self.square = 0
-        self.ext = 0
-        self.group = 0
-        self.in_comment = False
-        self.in_special = False
-        self.production = None
-        self.rule = []
-        self.rules = []
-        self.things = []
-
-    def optional(self):
-        return self.square != 0 or self.curly != 0
-
-    def update(self, tok, line):
-        if not self.in_comment:
-            if tok == '[':
-                self.square += 1
-                self.things.append(self.__pair_square)
-            elif tok == ']':
-                self.square -= 1
-                assert(self.things.pop() == self.__pair_square)
-            elif tok == '{':
-                self.curly += 1
-                self.things.append(self.__pair_curly)
-            elif tok == '}':
-                self.curly -= 1
-                assert(self.things.pop() == self.__pair_curly)
-            elif tok == '(':
-                self.group += 1
-                self.things.append(self.__pair_group)
-            elif tok == ')':
-                self.group -= 1
-                assert(self.things.pop() == self.__pair_group)
-            elif tok == '<':
-                self.ext += 1
-                self.things.append(self.__pair_ext)
-            elif tok == '>':
-                self.ext -= 1
-                assert(self.things.pop() == self.__pair_ext)
-            elif tok == '?':
-                if not self.in_special:
-                    self.in_special = True
-                    self.things.append(self.__pair_special)
-                else:
-                    self.in_special = False
-                    assert(self.things.pop() == self.__pair_special)
-            elif tok == '(*':
-                self.in_comment = True
-                self.things.append(self.__pair_comment)
-            elif tok == '*)':
-                raise Exception("Unmatched closing EBNF comment mark", tok, "in line", line)
-        else:
-            if tok == '(*':
-                raise Exception("Nested EBNF comment", tok, "in line", line)
-            elif tok == '*)':
-                assert(self.things.pop() == self.__pair_comment)
-                self.in_comment = False
-
-        if self.curly < 0 or self.square < 0 or self.ext < 0 or self.group < 0:
-            raise Exception("Unbalanced BNF bracket", tok, "in line", line)
-        return self.optional()
-
-    def in_list(self):
-        return self.curly > 0
-
-    def in_option(self):
-        return self.square > 0
-
-    def in_group(self):
-        return self.group > 0
-
-    def in_ext(self):
-        return self.ext > 0
-
-    def in_something(self):
-        if len(self.things) == 0:
-            return None
-        return self.things[-1]
-
-    def is_optional(self):
-        return self.in_list() or self.in_option()
-
-class Symbol:
-
-    def __init__(self, token, tp = None, rules = None):
-        self.reset(token, tp, rules)
-        self.set_is_payload(True)
-
-    def reset(self, token, tp = None, rules = None):
-        if tp == None:
-            if is_terminal(token) is not None:
-                tp = p_terminal
-            else:
-                tp = p_ruleset
-        self.tp = tp
-        self.token = token
-        self.name = tok2name(token)
-        self.sym = tok2sym(token)
-        self.term = None
-        self.regex = None
-        self.is_lexical_element = False
-        self.rules = []
-        self.datatype = None
-        if rules is not None:
-            self.rules = rules
-        self.set_type(tp)
-
-    def set_is_payload(self, onoff):
-        self.is_payload = onoff
-
-    def set_type(self, tp):
-        if tp == p_ruleset:
-            self.term = None
-            self.regex = None
-            self.is_lexical_element = False
-            self.datatype = self.token + '_t'
-        elif tp == p_literal:
-            assert(len(self.rules) == 0)
-            self.term = None
-            self.regex = tok2regex(self.token)
-            self.is_lexical_element = False
-            self.datatype = 'std::string'
-        elif tp == p_special or tp == p_lexical or tp == p_regex:
-            if len(self.rules):
-                self.dump(ERR)
-                raise Exception("Tried to set symbol", self.token, "to special which has", len(self.rules), "rules")
-            self.term = None
-            self.regex = None
-            self.is_lexical_element = True
-            self.datatype = 'std::string'
-        elif tp == p_terminal:
-            if len(self.rules):
-                slog(ERR, "rules = ", self.rules)
-                self.dump(ERR)
-                raise Exception("Tried to set symbol", self.token, "to terminal which has", len(self.rules), "rules")
-            self.term = self.token
-            self.regex = tok2regex(self.token)
-            self.is_lexical_element = False
-            self.datatype = None
-        else:
-            self.dump()
-            raise Exception("Tried to set symbol to unknown type", tp)
-        self.tp = tp
-
-    def str(self):
-        r = self.name + ' = ' + format_rules(self.rules)
-        return r
-
-    def equals(self, rhs):
-        for k, v in self.__dict__.iteritems():
-            if (not k in rhs.__dict__) or self.__dict__[k] != rhs.__dict__[k]:
-                slog(WARNING, k, self.__dict__[k], rhs.__dict__[k])
-                return False
-        return True
-
-    def dump(self, prio = NOTICE, msg="", caller=None):
-        if caller is None:
-            caller = get_caller_pos(1)
-        slog(prio, ",----------------", msg, caller=caller)
-        slog(prio, "| type          =", self.tp, caller=caller)
-        slog(prio, "| name          =", self.name, caller=caller)
-        slog(prio, "| token         =", self.token, caller=caller)
-        slog(prio, "| sym           =", self.sym, caller=caller)
-        slog(prio, "| term          =", self.term, caller=caller)
-        slog(prio, "| regex         =", self.regex, caller=caller)
-        slog(prio, "| datatype      =", self.datatype, caller=caller)
-        slog(prio, "| is_lexical_element =", self.is_lexical_element, caller=caller)
-        slog(prio, "| rules         =", format_rules(self.rules), caller=caller)
-        slog(prio, "`----------------", msg, caller=caller)
-
-def split_list_by(l_, tok):
-    l = copy.deepcopy(l_)
-    return [list(x[1]) for x in itertools.groupby(l, lambda x: x==tok) if not x[0]]
-
-def split_list_by_regex(l_, regex):
-    l = copy.deepcopy(l_)
-    return [list(x[1]) for x in itertools.groupby(l, lambda x: re.match(regex, x)) if not x[0]]
-
-def remove_duplicate_rules(rules):
-    r = []
-    for rule in rules:
-        if rule in r:
-            continue
-        r.append(rule)
-    slog(DEBUG, "rules after removing duplicates >")
-    for rule in rules:
-        slog(DEBUG, "-> " + format_rule(rule))
-    slog(DEBUG, "rules after removing duplicates <")
-    return r
-
-def grammar_tokenize_ebnf(content):
-    r = []
-    c = ''
-    l = 0
-    in_comment = False
-    in_quote = None
-    for line in content.splitlines(True):
-        end = len(line) - 1
-        l += 1
-        tok = ''
-        p = -1
-        while p < end:
-            p += 1
-            if p < end and in_quote == None:
-                cand = line[p:p+2]
-                if cand == '(*':
-                    if in_comment:
-                        raise Exception("Syntax error in line", l, ": spurious comment closure")
-                    in_comment = True
-                    p += 1
-                    continue
-                elif cand == '*)':
-                    if not in_comment:
-                        raise Exception("Syntax error in line", l, ": spurious comment opener")
-                    in_comment = False
-                    p += 1
-                    continue
-            if in_comment:
-                continue
-            c = line[p]
-            if c in [ '"', "'" ]:
-                if in_quote is None:
-                    in_quote = c
-                else:
-                    if in_quote == c:
-                        in_quote = None
-            if in_quote is not None:
-                tok += c
-                continue
-            if c in [ '(', ')', '[', ']', '{', '}', ',', ';', '=', '?', '|', '\n' ]:
-                tok = tok.strip()
-                if len(tok):
-                    r.append((tok, l))
-                tok = ''
-                if not c.isspace():
-                    r.append((c, l))
-                continue
-            tok += c
-
-    tok = tok.strip()
-    if len(tok):
-        r.append((tok, l))
-    return r
-
-def grammar_add_symbol(grammar, tok, rules):
-    assert(tok is not None)
-    if tok in grammar.keys():
-        s = grammar[tok]
-    else:
-        s = Symbol(tok, rules=rules)
-        grammar[tok] = s
-    if rules is not None:
-        slog(NOTICE, "Adding rules for symbol", tok, ":", format_rules(rules))
-        for rule in rules:
-            if not rule in s.rules:
-                s.rules.append(rule)
-        grammar[tok] = s
-
-def grammar_parse_ebnf_tokens(tokens):
-    grammar = OrderedDict()
-    state = State()
-    lhs = None
-    last = None
-    ruleset = []
-    rule = []
-    terminals = []
-    specials = []
-    for tok, line in tokens:
-        try:
-            state.update(tok, line)
-            if tok == '=':
-                lhs = last
-                continue
-            last = tok
-            if tok == ';':
-                ruleset.append(rule)
-                grammar_add_symbol(grammar, lhs, ruleset)
-                ruleset = []
-                rule = []
-                lhs = None
-                continue
-            if tok == ',':
-                continue
-            if tok == '|' and state.in_something() is None:
-                ruleset.append(rule)
-                rule = []
-                continue
-            if is_terminal(tok) and tok not in terminals:
-                terminals.append(tok)
-            elif state.in_special and tok not in specials:
-                specials.append(tok)
-            if lhs is not None:
-                rule.append(RuleComp(tok, line=line))
-        except Exception as err:
-            for t in tokens:
-                slog(ERR, t)
-            slog(ERR, "Unexpected error in line", line, ":", str(err))
-            raise
-            exit(1)
-    for s in terminals:
-        grammar_add_symbol(grammar, s, None)
-        grammar[s].set_type(p_terminal)
-    for s in specials:
-        slog(INFO, "found special sequence symbol", s)
-        grammar_add_symbol(grammar, s, None)
-        grammar[s].set_type(p_special)
-
-    return grammar
-
-def grammar_parse_ebnf(content_):
-    tokens = grammar_tokenize_ebnf(content_)
-    grammar = grammar_parse_ebnf_tokens(tokens)
-    return grammar
-
-def grammar_get_types(grammar):
-    types = dict()
-    for t, p in grammar.iteritems():
-        if not len(p.rules):
-            continue
-        if p.term is not None:
-            continue
-        ruleno = 1
-        rules = []
-        for rule in p.rules:
-            members = []
-            for c in rule:
-                if c.tp != t_target_lang:
-                    continue
-                if not c.token in grammar.keys():
-                    p.dump(ERR)
-                    raise Exception("Can't make type from unknown token \"" + c.token + "\" in rule", format_rule(rule))
-                pp = grammar[c.token]
-                if pp.tp is p_terminal:
-                    continue
-                if not pp.is_payload:
-                    continue
-                members.append(tok2sym(c.token))
-            if True or len(members):
-                rules.append(members)
-        if t in types.keys():
-            raise Exception("Tried to add type", t, "twice")
-        types[t] = rules
-    return types
-
-def grammar_fix_extensions(grammar, mode):
-    for tok, p in grammar.iteritems():
-        newrules = []
-        for rule in p.rules:
-            newrule = []
-            prefix = ""
-            paren = 0
-            for c in rule:
-                if c.tp == t_grammar and c.token in ['<', '>']:
-                    if c.token == '<':
-                        paren += 1
-                    elif c.token == '>':
-                        paren -= 1
-                    if paren <= 1: # don't add first level of control chars
-                        continue
-                    newrule.append(c)
-                    continue
-                if paren > 0:
-                    assert(len(c.token) != 0)
-                    prefix += '_' + c.token
-                    continue
-                if len(prefix) > 0:
-                    prefix = prefix[1:]
-                    slog(INFO, "Found prefix", prefix)
-                    if mode == mode_keep:
-                        newrule.append(RuleComp('<'))
-                        newrule.append(RuleComp(prefix, t_target_lang))
-                        newrule.append(RuleComp('>'))
-                        newrule.append(c)
-                    elif mode == mode_discard:
-                        prefix = ''
-                        continue
-                    elif mode in [ mode_unroll, mode_concat ]:
-                        combined = RuleComp(c.token, c.tp)
-                        combined.token = prefix + c.token
-                        prefix = ''
-                        newrule.append(combined)
-                        slog(INFO, "Appended new rule return value", combined.token)
-                        if mode == mode_unroll:
-                            if combined.token in grammar.keys():
-                                continue
-                            grammar[combined.token] = Symbol(combined.token, rules=[[c]])
-                    else:
-                        raise Exception("Invalid prefix mode", mode)
-                    prefix = ''
-                    continue
-                newrule.append(c)
-            if len(prefix): # undigested prefix, since it was the last
-                newrule.append(RuleComp(prefix[1:], t_target_lang))
-            newrules.append(newrule)
-        grammar[tok].rules = newrules # TODO: not sure if this could be done on iterator only
-    return grammar # TODO: not sure if this is necessary
-
-def grammar_unroll_lists(grammar):
-    delimiters = [ '","', '";"', '"|"' ] # TODO: this could be a function parameter to make it generic
-    newrule = None
-    for tok, p in grammar.iteritems():
-        newrules = []
-        for rule in p.rules:
-            newrule = []
-            listrule = []
-            prefix = None
-            s = State()
-            slog(INFO, "----------------- list-unrolling rule", format_rule(rule))
-            for c in rule:
-                s.update(c.token, c.line)
-                if c.token == '{':
-                    continue
-                if c.token == '}':
-                    if len(listrule) == 0:
-                        raise Exception("Rule of production", p.name, "contains empty list:", format_rule(rule))
-                    delpos = []
-                    name = "list"
-                    for i, rule in enumerate(listrule):
-                        if rule.token in delimiters:
-                            delpos.append(i)
-                            continue
-                        if rule.tp != t_target_lang:
-                            continue
-                        name += "_" + tok2name(rule.token)
-
-                    # not really: there are lists without delimiters, too
-                    #if len(delpos) != 1:
-                    #    p.dump(ERR)
-                    #    raise Exception("need exactly one delimiter in list rule:", format_rule(listrule))
-
-                    newrule.append(RuleComp(name, t_target_lang))
-                    listrule.insert(0, RuleComp('(', t_grammar))
-                    listrule.insert(0, RuleComp(name, t_target_lang)) # enable iteration
-                    listrule.append(RuleComp(')', t_grammar))
-                    #p = Symbol(name, rules=[[], listrule])
-                    p = Symbol(name, rules=[listrule])
-                    #p = Symbol(name)
-                    #p.rules = [ [], listrule ]
-                    listrule = []
-                    if name not in grammar.keys():
-                        grammar[name] = p
-                        continue
-                    if not p.equals(grammar[name]):
-                        p.dump(ERR, "old list production")
-                        p.dump(ERR, "new list production")
-                        raise Exception("List production expands to already taken name", name)
-                    continue
-                if s.in_list():
-                    listrule.append(c)
-                    continue
-                newrule.append(c)
-            slog(DEBUG, "appending " + format_rule(newrule))
-            newrules.append(newrule)
-            newrule = None
-        else:
-            if newrule is not None:
-                slog(DEBUG, "appending " + format_rule(newrule))
-                newrules.append(newrule)
-                newrule = None
-        slog(DEBUG, "done processing rules for " + tok)
-        grammar[tok].rules = remove_duplicate_rules(newrules)
-    return grammar
-
-def rules_unroll_options(rules):
-    r = []
-    found = False
-    newrule = None
-    slog(DEBUG, "unrolling", format_rules(rules))
-    for rule in rules:
-        square = 0
-        option = []
-        newrule = []
-        for i, c in enumerate(rule):
-            if c.tp == t_grammar:
-                if c.token == '[':
-                    square += 1
-                elif c.token == ']':
-                    square -= 1
-                if square == 1:
-                    continue
-            if square >= 1:
-                option.append(c)
-                continue
-            slog(DEBUG, "square =", square)
-            assert(square == 0)
-            n = len(option)
-            if n == 0:
-                newrule.append(c)
-                continue
-            # first without option
-            replaced = newrule[:]
-            tail = rule[i+1:len(rule)]
-            slog(DEBUG, "i                      = ", i)
-            slog(DEBUG, "n                      = ", n)
-            slog(DEBUG, "rule                   = ", format_rule(rule))
-            slog(DEBUG, "tail                   = ", format_rule(tail))
-            slog(DEBUG, ",-------------------------")
-            slog(DEBUG, "head                   = ", format_rule(replaced))
-            replaced.extend(tail)
-            slog(DEBUG, "head + tail            = ", format_rule(replaced))
-            r.append(replaced)
-            # then with option inserted
-            for unrolled in rules_unroll_options([ option ]):
-                replaced = newrule[:]
-                slog(DEBUG, ",-------------------------")
-                slog(DEBUG, "head                   = ", format_rule(replaced))
-                slog(DEBUG, "unrolled               = ", format_rule(unrolled))
-                replaced.extend(unrolled)
-                slog(DEBUG, "head + unrolled        =", format_rule(replaced))
-                replaced.extend(tail)
-                slog(DEBUG, "head + unrolled + tail =", format_rule(replaced))
-                r.append(replaced)
-            found = True
-            break
-        if not found:
-            r.append(newrule)
-            newrule = None
-    else:
-        if newrule is not None:
-            slog(DEBUG, "appending " + format_rule(newrule))
-            r.append(newrule)
-            newrule = None
-    if found:
-        return rules_unroll_options(r)
-    return r
-
-def grammar_unroll_options(grammar):
-    for tok, p in grammar.iteritems():
-        grammar[tok].rules = remove_duplicate_rules(rules_unroll_options(p.rules))
-    return grammar
-
-def rules_unroll_alternatives(rules):
-    r = []
-    found = False
-    slog(INFO, "unrolling alternatives in", format_rules(rules))
-    sep = RuleComp('|')
-    for rule in rules:
-        if not sep in rule:
-            r.append(rule)
-            continue
-        found = True
-        state = State()
-        end = len(rule) - 1
-        first = last = -1
-        for i, c in enumerate(rule):
-            state.update(c.token, line=c.line)
-            if c.token != '|' or c.tp != t_grammar:
-                slog(INFO, "checking token", c.token, "of type", c.tp, "at position", i)
-                continue
-            slog(INFO, "found token at position", i)
-            container = state.in_something()
-            slog(INFO, "thing delimiters are", container)
-            if container is None:
-                raise Exception("Alternative in line", c.line, "at rule position", i, "outside container:", format_rule(rule))
-            first = last = -1
-            k = i - 1
-            while k >= 0:
-                prev = rule[k]
-                slog(INFO, "comparing token", rule[k].token, "at position", k, "against opener", container[0])
-                if prev.token == container[0]:
-                    first = k
-                    break
-                k -= 1
-            if first == -1:
-                raise Exception("Alternative in line", c.line, "missing previous element:", format_rule(rule))
-            k = i
-            while k <= end:
-                nxt = rule[k]
-                slog(INFO, "comparing token", rule[k].token, "at position", k, "against closer", container[1])
-                if nxt.token == container[1]:
-                    last = k
-                    break
-                k += 1
-            if last == i:
-                raise Exception("Alternative in line", c.line, "missing next element:", format_rule(rule))
-            break # found what I wanted
-        assert(first > 0)
-        assert(last > 0)
-        assert(last <= end)
-        head = rule[0:first]
-        mid = rule[first+1:last]
-        tail = rule[last+1:end]
-        slog(INFO, "first =", first, "last =", last, "end =", end)
-        slog(INFO, "head = ", format_rule(head))
-        slog(INFO, "mid  = ", format_rule(mid))
-        slog(INFO, "tail = ", format_rule(tail))
-        for m in split_list_by(mid, sep):
-            unrolled_rule = head + m + tail
-            r.append(unrolled_rule)
-    #if found:
-    #    return rules_unroll_alternatives(r)
-    return r
-
-def grammar_unroll_alternatives(grammar):
-    for tok, p in grammar.iteritems():
-        grammar[tok].rules = rules_unroll_alternatives(p.rules)
-    return grammar
-
-def grammar_replace_whitespace(grammar):
-    r = OrderedDict()
-    for tok, s in grammar.iteritems():
-        newrules = []
-        for rule in s.rules:
-            newrule = []
-            for c in rule:
-                newc = RuleComp(c.token.replace(' ', '_'), tp=c.tp, line=c.line)
-                newrule.append(newc)
-            newrules.append(newrule)
-        newtok = tok.replace(' ', '_')
-        s.reset(newtok, tp=s.tp, rules=newrules)
-        r[newtok] = s
-        slog(INFO, "added symbol", newtok)
-    return r
-
-def grammar_add_configured_types(grammar, conf):
-    if conf is None:
-        return grammar
-    symbols = conf.get('symbols')
-    if symbols is None:
-        return grammar
-    for t, c in symbols.iteritems():
-        s = Symbol(t)
-        s.set_type(p_regex)
-        s.regex = c["regex"].value()
-        grammar[t] = s
-    return grammar 
-
-def step_out(grammar, terminals, orphans, lexicals, tok, depth, checked = None, found = None):
-    if checked is None:
-        checked = set()
-    if found is None:
-        found = dict()
-    indent = ' ' * depth * 2
-    if tok in found.keys():
-        slog(INFO, indent + " + found cached", tok, "with depth", found[tok])
-        return found[tok]
-    slog(INFO, indent + " + " + tok)
-    indent = indent + "  "
-    if tok in terminals:
-        found[tok] = 1
-        slog(INFO, indent + " + found terminal", tok, "with depth", found[tok])
-        return 1
-    if tok in orphans:
-        found[tok] = 1
-        slog(INFO, indent + " + found orphan", tok, "with depth", found[tok])
-        return 1
-    #if tok in lexicals:
-    #    found[tok] = 1
-    #    slog(INFO, indent + " + found lexical element", tok, "with depth", found[tok])
-    #    return 1
-    if tok in checked:
-        slog(INFO, indent, "token", tok, "is among checked", ' '.join(checked))
-        return sys.maxint
-
-    slog(INFO, indent, "checked =", ' '.join(checked))
-    checked.add(tok)
-    if tok not in grammar.keys():
-        slog(ERR, "tried to validate unknown token \"" + tok + "\"")
-        return sys.maxint
-    p = grammar[tok]
-    r = sys.maxint
-    slog(INFO, indent, p.token, "has", len(p.rules), "rules")
-    only_optional = True
-    for rule in p.rules:
-        slog(INFO, indent, "testing rule", format_rule(rule))
-        if tok in [ c.token for c in rule ]:
-            continue
-        mn = sys.maxint
-        mx = 0
-        s = State()
-        for c in rule:
-            slog(INFO, indent, "testing token", c.token)
-            if c.tp == t_grammar and s.update(c.token, 0):
-                continue
-            if c.tp != t_target_lang:
-                slog(INFO, indent, "  token", c.token, "is not a VHDL token")
-                continue
-            only_optional = False
-            # same "found" argument in next call?
-            rr = step_out(grammar, terminals, orphans, lexicals, c.token, depth + 1, checked.copy(), found)
-            slog(INFO, indent, "  token", c.token, "needs", rr, "steps to escape, mn=", mn, "mx=", mx)
-            if rr == sys.maxint or rr is None:
-                slog(INFO, indent, "  got error for token", c.token)
-                mn = sys.maxint
-                mx = 0
-                break
-            if rr > mx:
-                slog(INFO, indent, "  adjusting mx to", rr)
-                mx = rr
-            if rr < mn:
-                slog(INFO, indent, "  adjusting mn to", rr)
-                mn = rr
-        if mn == sys.maxint or mx == 0: # unusable as escape route
-            slog(INFO, indent, "  unusable as escape route for " + tok + ":", format_rule(rule))
-            continue
-        slog(INFO, indent, "after checking all rules, mx is", mx)
-        if mx < r:
-            slog(INFO, indent, "setting return value to max", mx)
-            r = mx
-    if only_optional:
-        slog(INFO, indent, tok, "has only optional rules, accepting")
-        r = 0
-    if r != sys.maxint:
-        r += 1
-        slog(INFO, indent, "found way out for", tok, "at depth", depth, "with", r, "steps")
-        found[tok] = r
-    slog(INFO, indent, "returning", r, "for token", tok)
-    return r
-
-def grammar_check(grammar, check_symbols = None):
-    terminals = {tok for tok, p in grammar.iteritems() if p.term is not None}
-    orphans   = {tok for tok, p in grammar.iteritems() if p.token not in grammar}
-    lexicals  = {tok for tok, p in grammar.iteritems() if p.is_lexical_element is True}
-    elements  = set()
-    if check_symbols is None:
-        check_symbols = []
-    if len(check_symbols) == 0:
-        for tok, p in grammar.iteritems():
-            if p.is_lexical_element:
-                elements.add(tok)
-                continue
-            for rule in p.rules:
-                for c in rule:
-                    if c.tp == t_grammar:
-                        continue
-                    elements.add(c.token)
-        check_symbols = sorted(list(elements))
-    found = dict()
-    for tok in check_symbols:
-        slog(INFO, "======= checking", tok)
-        rr = step_out(grammar, terminals, orphans, lexicals, tok, 0, checked=set(), found=found)
-        if rr == sys.maxint:
-            slog(ERR, "No way out for", tok)
-            exit(1)
-        if not tok in grammar.keys():
-            slog(ERR, "Token", tok, "has no production")
-            exit(1)
-        slog(INFO, tok, "->", str(rr))
-
-def grammar_lhss_map(grammar):
-    r = dict()
-    for t in grammar.keys():
-        r[t] = set()
-    for t, p in grammar.iteritems():
-        for rule in p.rules:
-            for c in rule:
-                if c.tp == t_target_lang:
-                    r[c.token].add(t)
-    return r
-
-def do_grammar_lhss(dmap, stop, rhs, buf, recursive):
-    lhss = dmap[rhs]
-    for lhs in lhss:
-        if lhs in buf:
-            continue
-        buf.add(lhs)
-        if lhs in stop:
-            slog(INFO, "    symbol", lhs, "is among stop symbols, stopping recursion")
-            continue
-        if recursive:
-            do_grammar_lhss(dmap, stop, lhs, buf, recursive)
-
-def grammar_lhss(dmap, stop, symbols, recursive = False):
-    r = set()
-    for s in symbols:
-        if s in r:
-            continue
-        do_grammar_lhss(dmap, stop, s, r, recursive)
-    return r
-
-def do_grammar_rhss(grammar, stop, sym, buf):
-    p = grammar[sym]
-    for rule in p.rules:
-        for c in rule:
-            if c.tp != t_target_lang:
-                continue
-            if c.token in stop:
-                continue
-            if c.token in buf:
-                continue
-            buf.add(c.token)
-            do_grammar_rhss(grammar, stop, c.token, buf)
-
-def grammar_rhss(grammar, stop, symbols):
-    r = set()
-    for s in symbols:
-        if s in r:
-            continue
-        do_grammar_rhss(grammar, stop, s, r)
-    return r
-
-def grammar_symbol_in_use(grammar, dmap, stop, checked, sym):
-    if sym in stop:
-        return False
-    # Does this have to be recursive?
-    defined = grammar_lhss(dmap, stop, set([sym]))
-    slog(INFO, "    symbol", sym, "defines:", ', '.join(defined))
-    if not len(defined):
-        return True
-    for d in defined:
-        if d in stop:
-            continue
-        if d in checked:
-            continue
-        checked.add(d)
-        if grammar_symbol_in_use(grammar, dmap, stop, checked, d):
-            return True
-    return False
-
-def do_grammar_unused(grammar, dmap, doomed):
-    r = set(doomed)
-    rhss = grammar_rhss(grammar, set(), doomed)
-    for rhs in rhss:
-        slog(INFO, "+++ checking if symbol", rhs, "is in use >>")
-        if not grammar_symbol_in_use(grammar, dmap, doomed, set(), rhs):
-            slog(INFO, "  symbol", rhs, "is not in use")
-            r.add(rhs)
-        slog(INFO, "+++ checking if symbol", rhs, "is in use (yes) <<")
-    return r
-
-def grammar_unused(grammar, dmap, doomed):
-    r = set(doomed)
-    while True:
-        unused = do_grammar_unused(grammar, dmap, r)
-        slog(INFO, "unused:", ', '.join(unused))
-        slog(INFO, "r:     ", ', '.join(r))
-        if unused == r:
-            break
-        r |= unused
-    return r
-
-# eradicate symbols from tree
-def grammar_cut_symbols(grammar, symbols):
-    slog(INFO, "-------- removing symbols:", ', '.join(symbols))
-    dmap = grammar_lhss_map(grammar)
-    unused = grammar_unused(grammar, dmap, symbols)
-    for s in unused:
-        slog(INFO, " + removing symbol", s)
-        del grammar[s]
-    return grammar
-
-# make symbol an empty literal production
-def grammar_trim_symbols(grammar, symbols):
-    grammar_cut_symbols(grammar, symbols)
-    for s in symbols:
-        slog(INFO, " + adding empty production for symbol", s)
-        p = Symbol(s)
-        p.set_type(p_literal)
-        grammar[s] = p
-
-    return grammar
-
-# flag symbols as non-payload
-def grammar_irrelevant_symbols(grammar, symbols):
-    for s in symbols:
-        grammar[s].set_is_payload(False)
-
-    return grammar
-def grammar_create_ebnf(grammar, opts):
-    indent = 40
-    slog(INFO, "creating ebnf from grammar of size", len(grammar))
-    out = ''
-    for t, p in grammar.iteritems():
-        slog(INFO, "formatting rule", t)
-        if not len(p.rules):
-            slog(INFO, "ignoring " + t + " (has no rules)\n")
-            continue
-        out += t + ' ' * (indent - len(t)) + " = " + format_ebnf_rule(grammar, p.rules[0]) + '\n'
-        for rule in p.rules[1:]:
-            out += ' ' * indent + " | " + format_ebnf_rule(grammar, rule) + '\n'
-	out += ' ' * indent + ' ;\n'
-    return out
-
-def format_token(sym, tp):
-    return misc.pad('%token <' + sym + '>', 27) + misc.pad(sym, 20) + '/* ' + tp + ' */'
-
-def grammar_create_y(grammar, opts):
-    indent = ' ' * 40
-    width = 0
-    for t, p in grammar.iteritems():
-        if p.term is not None:
-            continue
-        if len(t) > width:
-            width = len(t)
-    spaces = 0
-    while spaces < width:
-        spaces += 8
-    indent = '\t' * (spaces / 8)
-
-    conf = opts['config']
-
-    out = ""
-
-    # preamble
-    out += textwrap.dedent("""\
-        %{
-        #include <stdio.h>
-        #include <string.h>
-        #include <assert.h>
-        #include <stdlib.h>
-        #include <stdarg.h>
-
-        #include <vector>
-        #include <string>
-
-        #include <slog.h>
-
-    """)
-
-    for f in opts['includes']:
-        out += '#include "' + f + '"' + '\n'
-
-    out += "\nusing namespace " + opts['namespace'] + ';\n'
-
-    #out += textwrap.dedent("""\
-    #    using namespace std;
-
-    #    namespace {
-
-    #    typedef vector<const char *> wrap_t;
-    #    const wrap_t curly_braces{ "{", "}" };
-    #    const wrap_t round_braces{ "(", ")" };
-
-    #    }
-
-    #    #ifdef __cplusplus
-    #    // extern "C" {
-    #    #endif
-
-    out += textwrap.dedent("""\
-        %}
-
-    """)
-
-    # types
-    out += textwrap.dedent("""\
-        %union {
-    """)
-
-    types = grammar_get_types(grammar)
-    for t in types.keys():
-        s = grammar[t]
-        if s.tp == p_regex:
-            continue
-        out += '\n\t' + opts['namespace'] + '::' +  t + '_t *' + t + ';'
-    out += '\n'
-
-    out += textwrap.dedent("""\
-        }
-
-    """)
-
-    # yydecl
-    out += textwrap.dedent("""\
-        %{
-        // int FB_SYM(lex)(YYSTYPE *lval, struct vprun *vprun, void *scanner);
-        YY_DECL;
-        %}
-    """)
-
-    # terminal tokens
-    out += '\n'
-    for t, p in grammar.iteritems():
-        if p.tp == p_terminal:
-            out += format_token(p.sym, t) +'\n'
-
-    # special tokens
-    out += '\n'
-    for t, p in grammar.iteritems():
-        if p.tp == p_special:
-            if p.token == '?': # TODO: why is this among the symbols anyway?
-                continue
-            out += format_token(p.sym, t) +'\n'
-
-    # tokens from grammar
-    out += '\n'
-    for t, p in grammar.iteritems():
-        if p.tp == p_literal:
-            out += format_token(p.sym, t) +'\n'
-
-    # tokens from config
-    for t, p in grammar.iteritems():
-        if p.tp == p_regex:
-            out += format_token(t, "blah") + '\n'
-
-    # types
-    out += '\n'
-    for t, p in grammar.iteritems():
-        if p.tp == p_regex:
-            continue
-        if p.tp == p_ruleset:
-            out += misc.pad('%type <' + tok2sym(p.token) + '>', 40) + misc.pad(t, 35) + '/* ' + t + ' */' +'\n'
-
-    # options
-    out += textwrap.dedent("""\
-
-        %define parse.error verbose
-        // %define lr.type ielr
-        %define api.pure full
-        %param { struct context *context } { void *scanner }
-    """)
-
-    if opts['start'] is not None:
-        out += "%start " + opts['start']
-
-    # productions
-    out += '\n%%\n\n'
-    for t, p in grammar.iteritems():
-
-        if not len(p.rules):
-            continue
-        if p.tp == p_terminal:
-            continue
-        if p.tp == p_special:
-            continue
-        if p.tp == p_regex:
-            continue
-        slog(INFO, "creating production for symbol", p.str())
-
-        #if p.is_lexical_element is True:
-        #    continue
-        if len(p.rules) == 0:
-            raise Exception("Symbol ", p.str(), "has no rules")
-        first = True
-        n_rule = 0
-        for rule in p.rules:
-            n_rule += 1
-            n = 0
-            s = State()
-            if first:
-                out += t + ":" + (spaces - (len(t) + 1)) * ' ' + format_yacc_rule(rule) + "\n"
-                first = False
-            else:
-                out += indent + "| " + format_yacc_rule(rule) + "\n"
-            out += indent + "{" + "\n"
-            out += indent + "\t" + 'slog(PRI_NOTICE, "stack size = %d, %d / %d, %d", yyssp - &yyssa[0], yyss - &yyssa[0], yyvsp - &yyvsa[0], yyvs - &yyvsa[0]);\n'
-            out += indent + "\t" + "$$ = new " + opts['namespace'] + '::' + t + ";\n"
-            out += indent + "\t" + "$$->type = " + opts['namespace'] + '::' + t + "::t_" + str(n_rule) + ";\n"
-            tokens = []
-            for c in rule:
-                if c.tp == t_target_lang:
-                    tokens.append(c.token)
-            idx = 0
-            for c in rule:
-                if c.tp == t_grammar:
-                    s.update(c.token, 0)
-                    continue
-                if c.token in tokens:
-                    continue
-                n += 1
-                p = grammar[c.token]
-                #if is_terminal(c.token) is not None:
-                #    continue
-                if p.tp not in [ p_ruleset ]:
-                    continue
-                if not p.is_payload:
-                    continue
-                tp = tok2name(c.token)
-                suffix = ''
-                if tokens.count(c.token) > 1:
-                    idx += 1
-                    suffix = '_' + str(idx)
-                out += indent + "\t" + \
-                    "$$->data.r" + str(n_rule) + '.' + member_prefix + tp + suffix + \
-                    " = new " + p.datatype + "(*$" + str(n) + ");\n"
-            out += indent + "}" + "\n"
-        out += indent + ";\n\n"
-
-    # tail
-    out += '\n%%\n\n'
-
-    out += textwrap.dedent("""
-        #ifdef __cplusplus
-        // } /* extern "C" */
-        #endif
-    """)
-
-    return out + "\n"
-
-def grammar_create_l(grammar, opts):
-
-    ignore = ""
-    conf = opts['config']
-
-    out = textwrap.dedent("""\
-        %option reentrant
-        %option bison-bridge
-
-        %{
-        #include <slog.h>
-
-        #define YY_USER_ACTION \\
-            context->first_line = context->last_line; \\
-            context->first_column = context->last_column; \\
-            for(int i = 0; yytext[i] != '\\0'; i++) { \\
-                if(yytext[i] == '\\n') { \\
-                    context->last_line++; \\
-                    context->last_column = 0; \\
-                } else { \\
-                    context->last_column++; \\
-                } \\
-            }
-    """)
-
-    for f in opts['includes']:
-        out += '#include "' + f + '"' + '\n'
-
-    out += "\nusing namespace " + opts['namespace'] + ';\n'
-
-    #out += textwrap.dedent("""\
-
-    #    /* This is too late in the Flex generated file to work. Still lots of
-    #     * prototypes are spat into it above it, which end up with C++ linkage, of
-    #     * course, causing the linkages to be inconsistent to the functions below this
-    #     * extern "C". Only way I found was to use C++ is to use it on Bison only, and
-    #     * have Flex use C instead. */
-    #    #ifdef __cplusplus
-    #    // extern "C" {
-    #    #endif
-
-    #    #ifdef _REMOVE_ME
-    #    static void get_string(YYSTYPE *yylval_param, yyscan_t yyscanner, int skip);
-    #    static void get_based_string(YYSTYPE *yylval_param, yyscan_t yyscanner, int skip);
-    #    #endif
-
-    #    %}
-
-    out += textwrap.dedent("""\
-        %}
-
-        %%
-
-        """)
-
-    for t, p in grammar.iteritems():
-        if p.term is not None:
-            # \. { return T_DOT; }
-            assert p.term[0] in [ '"', "'" ], p.term
-            assert p.term[-1] in [ '"', "'" ], p.term
-            out += re.escape(p.term[1:-1]) + ' { slog(PRI_NOTICE, "found terminal ' + p.sym + '"); return ' + p.sym + '; }\n'
-
-    for t, p in grammar.iteritems():
-        if p.tp == p_regex:
-            c = conf['symbols'][t]
-            lex_as = c.get('lex_as')
-            if lex_as is not None:
-                retval = lex_as.value()
-            else:
-                retval = t
-            regex = c['regex'].value()
-            out += regex + ' { slog(PRI_NOTICE, "found regex \\"' + regex + '\\" for ' + t + '"); return ' + retval + '; }\n'
-
-    #out += textwrap.dedent("""\
-    #
-    #    %{/* basic_identifier */%}
-    #    %{/* extended_identifier */%}
-    #    %{/* based_integer */%}
-    #    %{/* bit_value */%}
-    #    %{/* numeric_literal */%}
-    #    %{/* enumeration_literal */%}
-    #    %{/* string_literal */%}
-    #    %{/* bit_string_literal */%}
-    #    %{/* character_literal */%}
-    #    %{/* graphic_character */%}
-    #    %{/* basic_character */%}
-    #    %{/* integer */%}
-    #
-    #	""")
-
-    ignore += textwrap.dedent("""\
-
-        %{ /* not sure how to handle literals >> */ %}
-        \\"[ \\!#-~]*\\" |
-        \\'[0-1]\\' {
-        	// get_string(yylval_param, yyscanner, 1);
-        	/* Gets a string excluding " or ' */
-        	int skip = 1;
-        	int i;
-
-        	for (i=skip; yytext[i]!='"' && yytext[i]!='\\'' && yytext[i]!=0; i++);
-        	yytext[i] = 0;
-        	YYSTYPE *lv = FB_SYM(get_lval(yyscanner));
-        	lv->txt=(char *)malloc(i+1);
-        	strcpy(lv->txt, yytext+skip);
-
-        	return STRING;
-        }
-
-        #[0-9a-f]*# {
-        	// get_based_string(yylval_param, yyscanner, 1); /* skip leading # */
-        	/* Gets a string excluding # */
-        	int i;
-        	int skip = 1;
-
-        	for(i=skip; yytext[i] !='#' && yytext[i]!=0; i++);
-        	yytext[i] = 0;
-        	YYSTYPE *lv = FB_SYM(get_lval(yyscanner));
-        	lv->txt = (char *)malloc(i+1);
-        	strcpy(lv->txt, yytext + skip);
-
-        	return BASED;
-        }
-
-        [a-zA-Z_$][a-zA-Z0-9_$.]* {
-        	YYSTYPE *lv = FB_SYM(get_lval(yyscanner));
-        	lv->txt=(char *)malloc(strlen(yytext)+1);
-        	strcpy(lv->txt, yytext);
-        	return NAME;
-        }
-
-        [0-9]+ {
-        	YYSTYPE *lv = FB_SYM(get_lval(yyscanner));
-        	sscanf(yytext, "%d", &lv->n);
-        	return NATURAL;
-        }
-
-        """)
-
-    out += textwrap.dedent("""\
-        . {
-                slog(PRI_NOTICE, "returning character '%c'", yytext[0]);
-        	return yytext[0];
-        }
-
-        %{/* not sure how to handle literals << */%}
-
-        %%
-
-        void FB_SYM(error)(struct context *context, void *scanner, const char *msg)
-        {
-        	struct yyguts_t *yyg =(struct yyguts_t*)scanner;
-                set_error(PRI_ERR, EINVAL, "%s at \\"%s\\" in line %d:%d", msg, yytext, context->last_line, context->last_column);
-        }
-
-        int FB_SYM(wrap)(void *scanner)
-        {
-        	return 1;
-        }
-
-        struct vp_scanner {
-        	YY_BUFFER_STATE buf;
-        	void *scanner;
-        	char *str;
-        };
-
-        /* utilities which need to be placed here, because I can't find
-         * yylex_init() / _destroy() in any generated header file (??) */
-        struct vp_scanner *FB_SYM(init_scanner)(const char *str)
-        {
-        	struct vp_scanner *r = (struct vp_scanner *)calloc(1, sizeof(*r));
-
-        	yylex_init(&r->scanner);
-        	r->str = strdup(str);
-        	r->buf = yy_scan_string(r->str, r->scanner);
-        	FB_SYM(set_extra)(r, r->scanner);
-        	// yyset_in(stdin, r->scanner);
-        	// yyset_out(stdout, r->scanner);
-        	return r;
-        }
-
-        void *FB_SYM(scanner_get_data)(const struct vp_scanner *scanner)
-        {
-        	return scanner->scanner;
-        }
-
-        void FB_SYM(cleanup_scanner)(struct vp_scanner *scanner)
-        {
-        	free(scanner->str);
-        	yy_delete_buffer(scanner->buf, scanner->scanner);
-        	yylex_destroy(scanner->scanner);
-        	free(scanner);
-        }
-
-        int FB_SYM(create_ast)(const char *str)
-        {
-        	// TODO: Initialize this in a generated function
-        	struct context context = {
-        		first_line: 1,
-        		last_line: 1,
-        		first_column: 0,
-        		last_column: 0
-        	};
-        
-        	FB_SYM(debug) = 1;
-        
-        	struct vp_scanner *scanner = FB_SYM(init_scanner)(str);
-        	int status = FB_SYM(parse)(&context, FB_SYM(scanner_get_data)(scanner));
-        	FB_SYM(cleanup_scanner)(scanner);
-        	if (status) {
-        		slog(PRI_ERR, "failed to parse (%s)", err());
-        		return -1;
-        	}
-
-        	return 0;
-        }
-
-	""")
-
-    #    #ifdef __cplusplus
-    #    // } // extern "C"
-    #    #endif
-    #
-    #	""")
-
-    return out
-
-def grammar_create_h(grammar, opts):
-    out = "#ifndef " + opts['mip'] + '\n#define ' + opts['mip'] + '\n\n'
-    ns = opts['namespace']
-
-    out += textwrap.dedent("""\
-    
-        #define YY_NO_INPUT
-        #define YY_NO_UNPUT
-        // #define YY_NO_UNISTD_H
-
-        struct context {
-        	int first_line;
-        	int last_line;
-        	int first_column;
-        	int last_column;
-        };
-
-        union YYSTYPE;
-
-        #ifdef __cplusplus
-        extern "C" {
-        #endif
-
-        struct vp_scanner;
-
-        struct vp_scanner *FB_SYM(init_scanner)(const char *str);
-        void *FB_SYM(scanner_get_data)(const struct vp_scanner *scanner);
-        void FB_SYM(cleanup_scanner)(struct vp_scanner *scanner);
-        int FB_SYM(create_ast)(const char *str);
-
-        void FB_SYM(error)(struct context *context, void *scanner, const char *s);
-
-        #ifdef __cplusplus
-        } // extern "C"
-        #endif
-
-        #define YY_DECL int FB_SYM(lex)(YYSTYPE *yylval_param, struct context *context, void *yyscanner)
-
-	""")
-
-    if ns is not None:
-        out += 'namespace ' + ns + '{\n\n'
-
-    types = grammar_get_types(grammar)
-
-    # struct forward declarations
-    for t, members in types.iteritems():
-        s = grammar[t]
-        if s.tp == p_regex:
-            continue
-        if len(members):
-            out += '\nstruct ' +  t + ';'
-    out += '\n'
-
-    # struct / non-struct typedefs
-    for t, members in types.iteritems():
-        s = grammar[t]
-        if s.tp == p_regex:
-            continue
-        if not len(members):
-            out += '\ntypedef const char ' +  t + '_t;'
-            continue
-        out += '\ntypedef struct ' +  t + ' ' + t + '_t;'
-    out += '\n'
-
-    # struct definitions
-    for t, rules in types.iteritems():
-        s = grammar[t]
-        if s.tp == p_regex:
-            continue
-        if not len(rules):
-            continue
-        out += '\n\nstruct ' +  t + ' {\n'
-
-        # rule structs
-        n = 0
-        for rule in rules:
-            n += 1
-            idx = 0
-            out += '\n\tstruct ' + 'r' + str(n) + '_t {'
-            for m in rule:
-                suffix = ''
-                if rule.count(m) > 1:
-                    idx += 1
-                    suffix = '_' + str(idx)
-                ms = grammar[m]
-                if ms.tp == p_regex:
-                    continue
-                p = grammar[m]
-                out += '\n\t\t' + p.datatype + ' *' + member_prefix + m + suffix + ';'
-            out += '\n\t};'
-
-        # type enum
-        n = 0
-        out += '\n\n\tenum {'
-        for rule in rules:
-            n += 1
-            out += '\n\t\tt_' + str(n) + ','
-        out += '\n\t} type;'
-        out += '\n'
-
-        # data union
-        n = 0
-        out += '\n\tunion {'
-        for rule in rules:
-            n += 1
-            out += '\n\t\tstruct ' + 'r' + str(n) + '_t r' + str(n) + ';'
-        out += '\n\t} data;'
-
-        # struct done
-        out += '\n};'
-
-    out += '\n'
-
-    if ns is not None:
-        out += '\n} /* namespace ' + ns + '*/'
-
-    out += '\n\n#endif /* #ifndef + ' + opts['mip'] + ' */'
-
-    return out
-
-def grammar_fix_list_recursion(grammar):
-    dead_end = set()
-    for kl, l in grammar.iteritems():
-        if not re.match('^list_', kl):
-            continue
-        for ks, s in grammar.iteritems():
-            for rule in s.rules:
-                state = State()
-                for c in rule:
-                    state.update(c.token, c.line)
-                    if c.token == kl:
-                        if state.is_optional():
-                            continue
-                        dead_end.add(c.token)
-    for t in dead_end:
-        grammar[t].rules.insert(0, [])
-    return grammar
-
-class GrammarCmd(jwutils.Cmd):
-
-    def __init__(self, name, help):
-        super(GrammarCmd, self).__init__(name, help=help)
-
-    def add_parser(self, parsers):
-        p = super(GrammarCmd, self).add_parser(parsers)
-        p.add_argument("input", help="input file")
-        p.add_argument('-l', '--unroll-lists', help='unroll EBNF lists', action='store_true', default=False)
-        p.add_argument('-e', '--fix-extensions', help='fix EBNF prefix extensions (' + '|'.join(fix_extensions_mode) + ')', default=mode_concat)
-        p.add_argument('-o', '--unroll-options', help='unroll EBNF options', action='store_true', default=False)
-        p.add_argument('-a', '--unroll-alternatives', help='unroll EBNF alternatives', action='store_true', default=False)
-        p.add_argument('-w', '--replace-whitespace', help='replace white space in tokens by underscore characters', action='store_true', default=False)
-        p.add_argument('--check-symbols', help='check symbols, comma-separated or "all"', nargs='?', default='')
-        p.add_argument('-t', '--trim-symbols', help='trim grammar tree at symbol', nargs='?', default='')
-        p.add_argument('-r', '--irrelevant-symbols', help='exclude symbol from output payload', nargs='?', default='')
-        p.add_argument('-c', '--cut-symbols', help='cut grammar tree at symbol', nargs='?', default='')
-        p.add_argument('-s', '--start-symbols', help='use start-symbols', nargs='?', default=None)
-        p.add_argument('-f', '--config-file', help='config file', nargs='?', default=None)
-        return p
-
-    def processGrammar(self, args, grammar):
-
-        if args.config_file is not None:
-            config = serdes.read(args.config_file)
-            #config.dump(ERR)
-            grammar = grammar_add_configured_types(grammar, config)
-        if args.fix_extensions not in fix_extensions_mode:
-            raise Exception("Illegal argument ", args.fix_extensions, "to --fix-extensions")
-        grammar = grammar_fix_extensions(grammar, args.fix_extensions)
-        if args.unroll_lists:
-            grammar = grammar_unroll_lists(grammar)
-        if args.unroll_alternatives:
-            grammar = grammar_unroll_alternatives(grammar)
-        if args.unroll_options:
-            grammar = grammar_unroll_options(grammar)
-        grammar = grammar_fix_list_recursion(grammar)
-        #grammar['logical_expression'].dump(ERR)
-        if len(args.check_symbols):
-            check_symbols = []
-            if args.check_symbols == 'all':
-                args.check_symbols = ''
-            check_symbols = args.check_symbols.split()
-            grammar_check(grammar, check_symbols)
-        if args.replace_whitespace:
-            grammar = grammar_replace_whitespace(grammar)
-        if len(args.trim_symbols):
-            grammar = grammar_trim_symbols(grammar, args.trim_symbols.split(','))
-        if len(args.cut_symbols):
-            grammar = grammar_cut_symbols(grammar, args.cut_symbols.split(','))
-        if len(args.irrelevant_symbols):
-            grammar = grammar_irrelevant_symbols(grammar, args.irrelevant_symbols.split(','))
-        return grammar
-
-# ------------------------------------------------- TODO: clean this up >
-
-class DerivedGrammarCmd(GrammarCmd):
-
-    def __init__(self, name, help):
-        super(DerivedGrammarCmd, self).__init__(name, help=help)
-
-    @abstractmethod
-    def _run(self, grammar):
-        pass
-
-    def _parse(self, contents):
-        return grammar_parse_ebnf(contents)
-
-    def add_parser(self, parsers):
-        p = super(DerivedGrammarCmd, self).add_parser(parsers)
-        return p
-
-    def run(self, args):
-        with open(args.input, 'r') as infile:
-            contents = infile.read()
-        grammar = self._parse(contents)
-        grammar = super(DerivedGrammarCmd, self).processGrammar(args, grammar)
-        self._run(args, grammar)
-
-class CmdCreate(DerivedGrammarCmd):
-
-    def __init__(self):
-        super(CmdCreate, self).__init__("create", help="Create a file")
-
-    def add_parser(self, parsers):
-        p = super(CmdCreate, self).add_parser(parsers)
-        p.add_argument("output", help="output file")
-        p.add_argument('--namespace', help='namespace of generated AST', default='parser')
-        p.add_argument('--includes', help='list of header files to be #included in C/C++ implementation files', default='')
-        return p
-
-    def _run(self, args, grammar):
-        name, ext = os.path.splitext(args.output)
-        ext = ext[1:]
-        #cmd = getattr(sys.modules[__name__], 'create_' + re.sub(r'[-./]', '_', args.output))
-        mip = None
-        if ext == 'h':
-            mip = args.namespace + re.sub(r'[-./]', '_', args.output).upper()
-
-        includes = args.includes.split(',')
-
-        config = None
-        if args.config_file is not None:
-            config = serdes.read(args.config_file)
-
-        # generated code breaks without this, not sure why
-        if ext == 'l':
-            tmp = []
-            for f in includes:
-                if not re.match('.*lex\..*\.h', f):
-                    tmp.append(f)
-            includes = tmp
-
-        opts = {
-            "namespace" : args.namespace,
-            "includes"  : includes,
-            "mip"       : mip,
-            "config"    : config,
-            "start"     : args.start_symbols
-        }
-
-        cmd = getattr(sys.modules[__name__], 'grammar_create_' + ext)
-        out = cmd(grammar, opts)
-        print(out)
-
-class CmdCheck(DerivedGrammarCmd):
-
-    def __init__(self):
-        super(CmdCheck, self).__init__("check", help="Check grammar")
-
-    def add_parser(self, parsers):
-        p = super(CmdCheck, self).add_parser(parsers)
-        return p
-
-    def _run(self, args, grammar):
-        pass