jw-python/scripts/process-text-files.py

304 lines
11 KiB
Python
Raw Normal View History

#!/usr/bin/python2
# -*- coding: utf-8 -*-
from __future__ import print_function
import re
import argparse
from abc import abstractmethod
import os
from shutil import copyfile
import subprocess
import jwutils
from jwutils.log import *
_exts_h = set([ '.h', '.H', '.hxx', '.HXX'])
_exts_cpp = set([ '.cpp', '.CPP', '.c', '.C', '.cxx', '.CXX' ])
_exts_h_cpp = _exts_h | _exts_cpp
class Cmd(jwutils.Cmd):
def __init__(self, name, help):
self.replacements = None
super(Cmd, self).__init__(name, help=help)
@staticmethod
def _replace_pattern(line, src, target):
return line.replace(src, target)
@staticmethod
def _replace_cpp_symbol(data, src, target):
stopc = "^a-zA-Z0-9_"
stopa = "(^|[" + stopc + "])"
stope = "([" + stopc + "]|$)"
f = stopa + src + stope
t = "\\1" + target + "\\2"
done = False
try:
while True:
#slog(WARNING, "replacing", f, "by", t)
r = re.sub(f, t, data, flags=re.MULTILINE)
if r == data:
break
data = r
except:
slog(ERR, "failed to replace", f, "by", t, "in", data)
return data
#if r != data:
# slog(NOTICE, " replaced ", f, "->", t)
# slog(NOTICE, " resulted in ", data, "->", r)
return r
def _replace_in_file(self, path, replacements, func=None, backup='rep'):
if func is None:
func = self._replace_pattern
tmp = path + '.' + backup
changed = False
with open(path) as infile, open(tmp, 'w') as outfile:
data = infile.read()
for src, target in replacements.iteritems():
odata = data
#data = data.replace(src, target)
data = func(data, src, target)
if data != odata:
#slog(NOTICE, "changed", odata, "to", data, "in", path)
changed = True
outfile.write(data)
if not changed:
return False
if backup is None:
os.rename(tmp, path)
else:
copyfile(tmp, path)
return True
def _replace_in_string(self, string, replacements, func=None):
r = ""
if func is None:
func = self._replace_pattern
for line in iter(string.splitlines()):
for src, target in replacements.iteritems():
line = func(line, src, target)
r = r + line
return r
def _add_namespace_to_header(self, data, ns_new):
lines = data.splitlines()
old = None
ns_cur = []
for line in iter(lines):
match = re.sub('^ *namespace[ \t]*([^ ]+)[ \t]*{.*', '\\1', line)
if match != line:
#ns_cur = match(blah, bl
raise Exception("Name space addition is not yet implemented")
classname = re.sub('^ *#[ \t]*ifndef[\t ]+([^ ]+)($|[ \t/;])', '\\1', line)
def _fix_multiple_inclusion_preventer(self, prefix, path):
dir, name = os.path.split(path)
if len(name) == 0:
return False
trunc, ext = os.path.splitext(name)
if ext not in _exts_h:
return False
tok = re.sub('([A-Z])', '_\\1', name)
tok = re.sub('\.', '_', tok)
mip = prefix + '_' + tok
mip = re.sub('__', '_', mip)
mip = mip.upper()
# find first old mip
with open(path, 'r') as f:
data = f.read()
lines = data.splitlines()
old = None
for line in iter(lines):
old = re.sub('^ *#[ \t]*ifndef[\t ]+([^ ]+)($|[ \t/;])', '\\1', line)
if old == line:
continue
#slog(NOTICE, "found MIP", old, "in", line)
break
if old is None:
# TODO: add anyway at beginning and end
raise Exception('No multiple inclusion preventer found in', path, ', not implemented')
data = ''
level = 0
for line in iter(lines):
if re.match('^ *#[ \t]*if.?def[\t ]', line):
level += 1
if re.match('^ *#[ \t]*ifndef[\t ]+' + old + '($|[ \t/;])', line):
data += '#ifndef ' + mip + '\n'
continue
elif re.match('^ *#[ \t]*define[\t ]+' + old + '($|[ \t/;])', line):
data += '#define ' + mip + '\n'
continue
elif re.match('^ *#[ \t]*endif($|[ \t/;])', line):
level -= 1
if level == 0:
data += '#endif /* ' + mip + ' */' + '\n'
continue
data += line + '\n'
tmp = path + '.mip'
with open(tmp, 'w') as f:
f.write(data)
os.rename(tmp, path)
def add_parser(self, parsers):
p = super(Cmd, self).add_parser(parsers)
p.add_argument('-r', "--root", help="Point in file system from which to start search", default='.')
p.add_argument("--name-regex", help="Regular expression to select input file names", default=None)
p.add_argument("--replace-patterns-from", help="File with patterns to replace, side by side, divided by '->'", default=None)
p.add_argument("--backup", help="Backup extension", default='rep')
p.add_argument('-g', '--git', help="Use git mv for renaming files", action='store_true', default=False)
return p
def _init(self, args):
if args.replace_patterns_from is not None:
self.replacements = dict()
with open(args.replace_patterns_from) as infile:
for line in infile:
s = re.split('->', line)
self.replacements[s[0]] = s[1].rstrip('\n')
#slog(NOTICE, "replacements =", self.replacements)
# overriding
def run(self, args):
self._init(args)
slog(NOTICE, "running")
files = []
if args.name_regex is not None:
for root, dirs, names in os.walk(args.root):
for name in names:
if re.match(args.name_regex, name):
files.append((root, name))
self.process(args, files)
@abstractmethod
def process(self, args, files):
pass
class CmdReplacePatterns(Cmd):
def __init__(self):
super(CmdReplacePatterns, self).__init__("replace-patterns", "Replace patterns in files")
def process(self, args, files):
for dir, name in files:
if self.replacements is not None:
path = dir + '/' + name
self._replace_in_file(path, self.replacements, func=self._replace_pattern)
class CmdReplaceCppSymbols(Cmd):
def __init__(self):
super(CmdReplaceCppSymbols, self).__init__("replace-cpp-symbols", "Replace C++ symbols in files")
def add_parser(self, parsers):
p = super(CmdReplaceCppSymbols, self).add_parser(parsers)
p.add_argument('-F', '--rename-files', help="Rename source files, too", action='store_true', default=False)
p.add_argument('-P', '--mip-prefix', help="Prefix to multiple-inclusion preventer", default='')
return p
# overriding
def run(self, args):
if args.name_regex is not None:
return super(CmdReplaceCppSymbols, self).run(args)
self._init(args)
slog(NOTICE, "running")
files = []
exts = _exts_h_cpp | set([ '.sh', '.py' ])
for root, dirs, names in os.walk(args.root):
for name in names:
trunc, ext = os.path.splitext(name)
if ext in exts:
files.append((root, name))
self.process(args, files)
# overriding
def _init(self, args):
r = super(CmdReplaceCppSymbols, self)._init(args)
self.file_truncs = set()
if self.replacements is not None:
for patt in self.replacements:
self.file_truncs.add(patt.lower())
return r
def process(self, args, files):
for dir, name in files:
path = dir + '/' + name
if self.replacements is not None:
self._replace_in_file(path, self.replacements, func=self._replace_cpp_symbol)
if args.rename_files:
for dir, name in files:
trunc, ext = os.path.splitext(name)
if not ext in _exts_h_cpp:
continue
if not trunc.lower() in self.file_truncs:
continue
for patt, repl in self.replacements.iteritems():
if patt == trunc:
path = dir + '/' + name
new_path = dir + '/' + repl + ext
assert(new_path != path)
slog(NOTICE, "renaming", path, "->", new_path)
if args.git:
subprocess.call(['git', 'mv', path, new_path])
else:
os.rename(path, new_path)
self._fix_multiple_inclusion_preventer(args.mip_prefix, new_path)
class CmdAddCppNamespace(Cmd):
def __init__(self):
super(CmdAddCppNamespace, self).__init__("add-cpp-namespace", "Enclose C++ classes in namespace")
def add_parser(self, parsers):
p = super(CmdAddCppNamespace, self).add_parser(parsers)
p.add_argument('-n', '--namespace', help="Namespace", default=None)
p.add_argument('-p', '--package', help="Package", default=None)
return p
# overriding
def run(self, args):
if args.name_regex is not None:
return super(CmdAddCppNamespace, self).run(args)
self._init(args)
slog(NOTICE, "running")
files = []
exts = _exts_h_cpp | set([ '.sh', '.py' ])
for root, dirs, names in os.walk(args.root):
for name in names:
trunc, ext = os.path.splitext(name)
if ext in exts:
files.append((root, name))
self.process(args, files)
# overriding
def _init(self, args):
r = super(CmdAddCppNamespace, self)._init(args)
self.file_truncs = set()
if self.replacements is not None:
for patt in self.replacements:
self.file_truncs.add(patt.lower())
return r
def process(self, args, files):
if args.namespace:
for dir, name in files:
path = dir + '/' + name
with open(path) as infile:
data = odata = infile.read()
trunc, ext = os.path.splitext(name)
if ext in _exts_h:
data = self._add_namespace_to_header(data, namespace)
elif ext in _exts_cpp:
data = self._add_using_namespace(data, namespace)
#elif: Not sure what this was meant to do
# continue
if data == odata:
continue
tmp = path + '.' + ('rep' if args.backup is None else args.backup)
with open(tmp, 'w') as outfile:
outfile.write(data)
jwutils.run_sub_commands('process text files')