jw-python/scripts/process-text-files.py
Jan Lindemann a85f0929c6 Consistently replace def run() by async def run()
Signed-off-by: Jan Lindemann <jan@janware.com>
2019-12-20 08:41:58 +01:00

591 lines
23 KiB
Python

#!/usr/bin/python3
# -*- coding: utf-8 -*-
from __future__ import print_function
import re
import argparse
from abc import abstractmethod
import os
from shutil import copyfile
import subprocess
import jwutils
from jwutils.log import *
def _regexify(l):
return '|'.join(x.replace('.', '\.') + '$' for x in l)
_exts_h = set([ '.h', '.H', '.hxx', '.HXX', '.hpp'])
_exts_cpp = set([ '.cpp', '.CPP', '.c', '.C', '.cxx', '.CXX' ])
_exts_h_cpp = _exts_h | _exts_cpp
# ----------------------------------------------------------------------------------- Base class
class Cmd(jwutils.Cmd):
def __init__(self, name, help):
self.replacements = None
super(Cmd, self).__init__(name, help=help)
@staticmethod
def _replace_pattern(line, src, target, context=None):
return line.replace(src, target)
@staticmethod
def _indent_pattern(data, src, target, context=None):
indent = 30
pattern = '='
right_align_match = 0
skip_lhs_pattern = None
require_lhs_pattern = None
skip_short = None
min_assignments = None
left_pad_match = 1
if context is not None:
if 'indent' in context:
indent = context['indent']
if 'pattern' in context:
pattern = context['pattern']
if 'right-align-match' in context:
right_align_match = context['right-align-match']
if 'left-pad-match' in context:
left_pad_match = context['left-pad-match']
if 'skip-lhs-pattern' in context:
skip_lhs_pattern = context['skip-lhs-pattern']
if 'require-lhs-pattern' in context:
require_lhs_pattern = context['require-lhs-pattern']
if 'skip-short' in context:
skip_short = context['skip-short']
if 'min-assignments' in context:
min_assignments = context['min-assignments']
r = ''
assignments=0
lines = data.splitlines()
if skip_short is not None and len(lines) < skip_short:
return data
for line in iter(lines):
#slog(NOTICE, "indenting pattern", pattern, "of", line)
parts = re.split(pattern, line)
if (
len(parts) < 2
or (skip_lhs_pattern is not None and re.match(skip_lhs_pattern, parts[0]))
or (require_lhs_pattern is not None and not re.match(require_lhs_pattern, parts[0]))
):
r += line + '\n'
continue
#slog(NOTICE, "split into", parts)
if right_align_match > 0:
parts[1] = parts[1].rjust(right_align_match)
lhs = parts[0].rstrip().ljust(indent)
if len(parts) > 2:
p2_stripped = parts[2].strip()
if len(p2_stripped) or len(parts) > 3:
parts[2] = ' ' + p2_stripped
if left_pad_match > 0:
pad = ''
#slog(NOTICE, "working off {} >{}<".format(left_pad_match, lhs[-left_pad_match:]))
for c in lhs[-left_pad_match:]:
if c != ' ':
pad += ' '
for c in parts[1]:
if c != ' ':
break
if len(pad):
pad = pad[:-1]
#slog(NOTICE, " pad for {}=>{}<".format(line, pad))
lhs += pad
r += lhs + ''.join(parts[1:]) + '\n'
assignments += 1
if min_assignments is not None and assignments < min_assignments:
return data
return r
@staticmethod
def _cpp_indent_macros(data, src, target, context=None):
def __ctx(context, key, default):
if key in context:
return context[key]
return default
def format_line(context, indent, directive, rhs):
if indent < 0:
indent = 0
spaces = __ctx(context, 'spaces', 2)
indent_macro = __ctx(context, 'indent-macro', 0)
indent_def = __ctx(context, 'indent-definiton', 40)
indent_comment = __ctx(context, 'indent-comment', 70)
rhs = rhs.strip()
directive = directive.strip()
r = '#' + ' ' * indent * spaces + directive
if len(rhs) == 0:
return r + '\n'
r += ' '
if indent_macro > 1:
r = r.ljust(indent_macro - 1)
if not directive in [ 'define' ]:
return (r + rhs).rstrip() + '\n'
#slog(NOTICE, "dissecting", rhs)
m = re.match("^\s*(\w+(\([^)]*\))*)(\s+(.*))*", rhs)
if m is None:
raise Exception("invalid rhs", rhs)
macro = m.group(1)
def_and_comment = m.group(4)
#slog(NOTICE, "macro=>{}<, def+comment=>{}<".format(macro, def_and_comment))
r += macro
if def_and_comment is None:
return r.rstrip() + '\n'
r += ' '
if indent_def > 1:
r = r.ljust(indent_def - 1)
parts = re.split("(//|/\*)", def_and_comment)
if len(parts) <= 1:
return r + def_and_comment + '\n'
if len(parts) == 2:
raise Exception("Failed to dissect definition + comment", def_and_comment)
return (r + parts[0].strip()).ljust(indent_comment) + parts[1].strip() + ' ' + ''.join(parts[2:]).strip() + '\n'
skip_outer = True
if context is not None:
if 'skip-outer' in context:
skip_outer = context['skip-outer']
r = ''
lnum = 0
level = -1 if context['skip-outer'] else 0
lines = data.splitlines()
for line in iter(lines):
lnum += 1
m = re.match("^\s*#\s*(\w+)(\W*)(.*)", line)
if m is None:
r += line + '\n'
continue
directive = m.group(1)
rhs = (m.group(2) + m.group(3)).strip()
#slog(NOTICE, "{}: directive=>{}<, rhs=>{}<".format(level, directive, rhs))
if directive is None or rhs is None:
raise Exception("Syntax error in line", lnum, ":", line)
if directive in [ "if", "ifdef", "ifndef" ]:
r += format_line(context, level, directive, rhs)
level += 1
continue
if directive in [ "else" ]:
r += format_line(context, level - 1, directive, rhs)
continue
if directive in [ "endif" ]:
level -= 1
r += format_line(context, level, directive, rhs)
continue
r += format_line(context, level, directive, rhs)
return r
@staticmethod
def _cleanup_spaces(data, src, target, context=None):
lines = data.splitlines()
last = len(lines) - 1
r = ''
while last >= 0 and len(lines[last].strip()) == 0:
last -= 1
i = 0
while i <= last:
r += lines[i].rstrip() + '\n'
i += 1
return r
@staticmethod
def _replace_cpp_symbol(data, src, target, context=None):
stopc = "^a-zA-Z0-9_"
stopa = "(^|[" + stopc + "])"
stope = "([" + stopc + "]|$)"
f = stopa + src + stope
t = "\\1" + target + "\\2"
done = False
try:
while True:
#slog(WARNING, "replacing", f, "by", t)
r = re.sub(f, t, data, flags=re.MULTILINE)
if r == data:
break
data = r
except:
slog(ERR, "failed to replace", f, "by", t, "in", data)
return data
#if r != data:
# slog(NOTICE, " replaced ", f, "->", t)
# slog(NOTICE, " resulted in ", data, "->", r)
return r
def _replace_in_file(self, path, replacements, func=None, backup='rep', context=None):
if func is None:
func = self._replace_pattern
tmp_ext = backup
if tmp_ext is None:
tmp_ext = tmp
tmp = path + '.' + tmp_ext
changed = False
with open(path) as infile, open(tmp, 'w') as outfile:
data = infile.read()
#slog(NOTICE, "-- opened", path)
for src, target in replacements.items():
#slog(NOTICE, "replacing", src, "to", target)
odata = data
#data = data.replace(src, target)
data = func(data, src, target, context)
if data != odata:
changed = True
outfile.write(data)
if not changed:
os.unlink(tmp)
return False
if backup is None:
os.rename(tmp, path)
else:
copyfile(tmp, path)
return True
def _replace_in_string(self, string, replacements, func=None):
r = ""
if func is None:
func = self._replace_pattern
for line in iter(string.splitlines()):
for src, target in replacements.items():
line = func(line, src, target)
r = r + line
return r
def _add_namespace_to_header(self, data, ns_new):
lines = data.splitlines()
old = None
ns_cur = []
for line in iter(lines):
match = re.sub('^ *namespace[ \t]*([^ ]+)[ \t]*{.*', '\\1', line)
if match != line:
#ns_cur = match(blah, bl
raise Exception("Name space addition is not yet implemented")
classname = re.sub('^ *#[ \t]*ifndef[\t ]+([^ ]+)($|[ \t/;])', '\\1', line)
def _fix_include_guard(self, prefix, path):
dir, name = os.path.split(path)
if len(name) == 0:
return False
trunc, ext = os.path.splitext(name)
if ext not in _exts_h:
return False
tok = re.sub('([A-Z])', '_\\1', name)
tok = re.sub('[.-]', '_', tok)
ig = prefix + '_' + tok
ig = re.sub('__', '_', ig)
ig = ig.upper()
# find first old ig
with open(path, 'r') as f:
odata = f.read()
lines = odata.splitlines()
old = None
cand = None
for line in iter(lines):
if old is None:
if cand is None:
m = re.match('^\s*#\s*ifndef\s+(\w+)', line)
if m is not None:
cand = m.group(1)
else:
m = re.match('^\s*#\s*define\s+(\w+).*', line)
if m is None:
cand = None
continue
cand = None
old = m.group(1)
break
if old is None:
# TODO: add anyway at beginning and end
raise Exception('No multiple inclusion preventer found in', path, ', not implemented')
ndata = ''
level = 0
found = -1
num=0
for line in iter(lines):
num += 1
if re.match('^\s*#\s*(ifdef|ifndef|if)\s+[a-zA-Z0-9_(]', line):
level += 1
if found >= 0 or re.match('^\s*#\s*ifndef\s+(\w+)', line) is None:
ndata += line + '\n'
continue
found = level
ndata += '#ifndef ' + ig + '\n'
continue
elif re.match('^\s*#\s*define\s+' + old + '($|\s+)', line):
if found != level:
ndata += line + '\n'
continue
ndata += '#define ' + ig + '\n'
continue
elif re.match('^\s*#\s*endif($|\s.*)', line):
level -= 1
if found != level + 1:
ndata += line + '\n'
continue
ndata += '#endif // ' + ig + '\n'
continue
ndata += line + '\n'
if odata != ndata:
slog(NOTICE, "+ fix include guards in", path)
tmp = path + '.ig'
with open(tmp, 'w') as f:
f.write(ndata)
os.rename(tmp, path)
def add_parser(self, parsers):
p = super(Cmd, self).add_parser(parsers)
p.add_argument('-r', "--root", help="Point in file system from which to start search", default='.')
p.add_argument("--name-regex", help="Regular expression to select input file names", default=None)
p.add_argument("--replace-patterns-from", help="File with patterns to replace, side by side, divided by '->'", default=None)
p.add_argument("--backup", help="Backup extension", default='rep')
p.add_argument('-g', '--git', help="Use git mv for renaming files", action='store_true', default=False)
return p
def _init(self, args):
if args.replace_patterns_from is not None:
self.replacements = dict()
with open(args.replace_patterns_from) as infile:
for line in infile:
s = re.split('->', line)
self.replacements[s[0]] = s[1].rstrip('\n')
#slog(NOTICE, "replacements =", self.replacements)
# overriding
async def run(self, args):
self._init(args)
files = []
if args.name_regex is not None:
for root, dirs, names in os.walk(args.root):
for name in names:
if re.search(args.name_regex, name):
files.append((root, name))
self.process(args, files)
@abstractmethod
def process(self, args, files):
pass
# ----------------------------------------------------------------------------------- commands
class CmdReplacePatterns(Cmd):
def __init__(self):
super(CmdReplacePatterns, self).__init__("replace-patterns", "Replace patterns in files")
def process(self, args, files):
for dir, name in files:
if self.replacements is not None:
path = dir + '/' + name
self._replace_in_file(path, self.replacements, func=self._replace_pattern)
class CmdCleanupSpaces(Cmd):
def __init__(self):
super(CmdCleanupSpaces, self).__init__("cleanup-spaces", "Remove trailing empty lines")
def add_parser(self, parsers):
p = super(CmdCleanupSpaces, self).add_parser(parsers)
return p
def process(self, args, files):
slog(NOTICE, "Cleaning up unnecessary space in", len(files), "files:")
context = dict()
replacements = {"blah": "blub"} # just a dummy to use _replace_in_file, TODO: obviate the need
for dir, name in files:
path = dir + '/' + name
if self._replace_in_file(path, replacements, func=self._cleanup_spaces, context=context):
slog(NOTICE, "+ purged spaces :", path)
class CmdMkIndentEquals(Cmd):
def __init__(self):
super(CmdMkIndentEquals, self).__init__("mk-indent", "Indent and beautify makefiles")
def add_parser(self, parsers):
p = super(CmdMkIndentEquals, self).add_parser(parsers)
p.add_argument('-e', "--equal-pos", help="Columns number of equal sign", type=int, default=40)
p.add_argument("--skip-short", help="Don't change makefiles with less lines of code", type=int, default=6)
p.add_argument("--min-assignments", help="Don't change makefiles with less assignment statements", type=int, default=4)
return p
def process(self, args, files):
slog(NOTICE, "Beautifying", len(files), "makefiles:")
context = dict()
right_align_match = 2
left_pad_match = 1
context["indent"] = args.equal_pos - right_align_match
context["pattern"] = "([?+:]*=|::=)"
context["right-align-match"] = right_align_match
context["left-pad-match"] = left_pad_match
context["skip-lhs-pattern"] = "[^A-Za-z0-9_# ]"
context["require-lhs-pattern"] = "^[ #]*[A-Za-z0-9_]"
context["skip-short"] = args.skip_short
context["min-assignments"] = args.min_assignments
replacements = {"blah": "blub"} # just a dummy to use _replace_in_file, TODO: obviate the need
for dir, name in files:
path = dir + '/' + name
if self._replace_in_file(path, replacements, func=self._cleanup_spaces):
slog(NOTICE, "+ purged spaces :", path)
if self._replace_in_file(path, replacements, func=self._indent_pattern, context=context):
slog(NOTICE, "+ aligned equals :", path)
class CmdCppFixIncludeGuard(Cmd):
def __init__(self):
super(CmdCppFixIncludeGuard, self).__init__("cpp-fix-include-guard", "Standardize include guard in header files")
def add_parser(self, parsers):
p = super(CmdCppFixIncludeGuard, self).add_parser(parsers)
p.add_argument('-P', '--ig-prefix', help="Prefix to include guard", default='')
p.set_defaults(name_regex=_regexify(_exts_h))
return p
def process(self, args, files):
slog(NOTICE, "Fixing include guard in", len(files), "header files:")
for dir, name in files:
path = dir + '/' + name
self._fix_include_guard(args.ig_prefix, path)
class CmdCppReplaceSymbols(Cmd):
def __init__(self):
super(CmdCppReplaceSymbols, self).__init__("cpp-replace-symbols", "Replace C++ symbols in files")
def add_parser(self, parsers):
p = super(CmdCppReplaceSymbols, self).add_parser(parsers)
p.add_argument('-F', '--rename-files', help="Rename source files, too", action='store_true', default=False)
p.add_argument('-P', '--ig-prefix', help="Prefix to include guard", default='')
return p
# overriding
async def run(self, args):
if args.name_regex is not None:
return super(CmdCppReplaceSymbols, self).run(args)
self._init(args)
files = []
exts = _exts_h_cpp | set([ '.sh', '.py' ])
for root, dirs, names in os.walk(args.root):
for name in names:
trunc, ext = os.path.splitext(name)
if ext in exts:
files.append((root, name))
self.process(args, files)
# overriding
def _init(self, args):
r = super(CmdCppReplaceSymbols, self)._init(args)
self.file_truncs = set()
if self.replacements is not None:
for patt in self.replacements:
self.file_truncs.add(patt.lower())
return r
def process(self, args, files):
for dir, name in files:
path = dir + '/' + name
if self.replacements is not None:
self._replace_in_file(path, self.replacements, func=self._replace_cpp_symbol)
if args.rename_files:
for dir, name in files:
trunc, ext = os.path.splitext(name)
if not ext in _exts_h_cpp:
continue
if not trunc.lower() in self.file_truncs:
continue
for patt, repl in self.replacements.items():
if patt == trunc:
path = dir + '/' + name
new_path = dir + '/' + repl + ext
assert(new_path != path)
slog(NOTICE, "renaming", path, "->", new_path)
if args.git:
subprocess.call(['git', 'mv', path, new_path])
else:
os.rename(path, new_path)
self._fix_include_guard(args.ig_prefix, new_path)
class CmdCppIndentMacros(Cmd):
def __init__(self):
super(CmdCppIndentMacros, self).__init__("cpp-indent-macros", "Indent and beautify C/C++ preprocessor macros")
def add_parser(self, parsers):
p = super(CmdCppIndentMacros, self).add_parser(parsers)
p.add_argument('-w', "--spaces", help="Number of spaces per indentation level", type=int, default=2)
p.add_argument('-s', "--skip-outer", help="Skip the outmost macro (read multiple-inclusion guards)", action='store_true', default=True)
p.add_argument('-S', "--skip-outer-name-regex", help="Regex for file names that --skip-outer should be applied to", default=_regexify(_exts_h))
return p
def process(self, args, files):
slog(NOTICE, "Beautifying", len(files), "C++ files:")
context = dict()
context["spaces"] = args.spaces
replacements = {"blah": "blub"} # just a dummy to use _replace_in_file, TODO: obviate the need
for dir, name in files:
path = dir + '/' + name
if self._replace_in_file(path, replacements, func=self._cleanup_spaces):
slog(NOTICE, "+ purged spaces :", path)
if args.skip_outer and re.search(args.skip_outer_name_regex, path):
context["skip-outer"] = True
else:
context["skip-outer"] = False
if self._replace_in_file(path, replacements, func=self._cpp_indent_macros, context=context):
slog(NOTICE, "+ indented C++ macros :", path)
class CmdCppAddNamespace(Cmd):
def __init__(self):
super(CmdCppAddNamespace, self).__init__("cpp-add-namespace", "Enclose C++ classes in namespace")
def add_parser(self, parsers):
p = super(CmdCppAddNamespace, self).add_parser(parsers)
p.add_argument('-n', '--namespace', help="Namespace", default=None)
p.add_argument('-p', '--package', help="Package", default=None)
return p
# overriding
async def run(self, args):
if args.name_regex is not None:
return super(CmdCppAddNamespace, self).run(args)
self._init(args)
files = []
exts = _exts_h_cpp | set([ '.sh', '.py' ])
for root, dirs, names in os.walk(args.root):
for name in names:
trunc, ext = os.path.splitext(name)
if ext in exts:
files.append((root, name))
self.process(args, files)
# overriding
def _init(self, args):
r = super(CmdCppAddNamespace, self)._init(args)
self.file_truncs = set()
if self.replacements is not None:
for patt in self.replacements:
self.file_truncs.add(patt.lower())
return r
def process(self, args, files):
if args.namespace:
for dir, name in files:
path = dir + '/' + name
with open(path) as infile:
data = odata = infile.read()
trunc, ext = os.path.splitext(name)
if ext in _exts_h:
data = self._add_namespace_to_header(data, namespace)
elif ext in _exts_cpp:
data = self._add_using_namespace(data, namespace)
#elif: Not sure what this was meant to do
# continue
if data == odata:
continue
tmp = path + '.' + ('rep' if args.backup is None else args.backup)
with open(tmp, 'w') as outfile:
outfile.write(data)
jwutils.run_sub_commands('Process text files')