From e0d48b2dc753b891e774d65e4e74aeb88058e5a8 Mon Sep 17 00:00:00 2001 From: Jan Lindemann Date: Wed, 20 Dec 2017 20:08:23 +0100 Subject: [PATCH] Add process-text-files.py Signed-off-by: Jan Lindemann --- scripts/process-text-files.py | 241 ++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 scripts/process-text-files.py diff --git a/scripts/process-text-files.py b/scripts/process-text-files.py new file mode 100644 index 0000000..f5e5f17 --- /dev/null +++ b/scripts/process-text-files.py @@ -0,0 +1,241 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from __future__ import print_function +import re +import argparse +from abc import abstractmethod +import os +from shutil import copyfile +import subprocess + +import jwutils +from jwutils.log import * + +_exts_h = set([ '.h', '.H', '.hxx', '.HXX']) +_exts_cpp = _exts_h | set([ '.cpp', '.CPP', '.c', '.C', '.cxx', '.CXX' ]) + +class Cmd(jwutils.Cmd): + + def __init__(self, name, help): + self.replacements = None + super(Cmd, self).__init__(name, help=help) + + @staticmethod + def _replace_pattern(line, src, target): + return line.replace(src, target) + + @staticmethod + def _replace_cpp_symbol(data, src, target): + stopc = "^a-zA-Z0-9_" + stopa = "(^|[" + stopc + "])" + stope = "([" + stopc + "]|$)" + #stopc2 = stopc.replace("\\", "\\\\") + # return re.sub("(^|[" + stopc + "])" + src + "([" + stopc + "]|$)", "\1" + target + "\2", data) + #f = "(^|[" + stopc + "])" + src + "([" + stopc + "]|$)" + f = stopa + src + stope + t = "\\1" + target + "\\2" + done = False + try: + while True: + #slog(WARNING, "replacing", f, "by", t) + r = re.sub(f, t, data, flags=re.MULTILINE) + if r == data: + break + data = r + except: + slog(ERR, "failed to replace", f, "by", t, "in", data) + return data + #if r != data: + # slog(NOTICE, " replaced ", f, "->", t) + # slog(NOTICE, " resulted in ", data, "->", r) + return r + + def _replace_in_file(self, path, replacements, func=None, backup='rep'): + if func is None: + func = self._replace_pattern + tmp = path + '.' + backup + changed = False + with open(path) as infile, open(tmp, 'w') as outfile: + data = infile.read() + for src, target in replacements.iteritems(): + odata = data + #data = data.replace(src, target) + data = func(data, src, target) + if data != odata: + #slog(NOTICE, "changed", odata, "to", data, "in", path) + changed = True + outfile.write(data) + if not changed: + return False + + if backup is None: + os.rename(tmp, path) + else: + copyfile(tmp, path) + return True + + def _replace_in_string(self, string, replacements, func=None): + r = "" + if func is None: + func = self._replace_pattern + for line in iter(string.splitlines()): + for src, target in replacements.iteritems(): + line = func(line, src, target) + r = r + line + return r + + def _fix_multiple_inclusion_preventer(self, prefix, path): + dir, name = os.path.split(path) + if len(name) == 0: + return False + trunc, ext = os.path.splitext(name) + if ext not in _exts_h: + return False + tok = re.sub('([A-Z])', '_\\1', name) + tok = re.sub('\.', '_', tok) + mip = prefix + '_' + tok + mip = re.sub('__', '_', mip) + mip = mip.upper() + # find first old mip + with open(path, 'r') as f: + data = f.read() + lines = data.splitlines() + old = None + for line in iter(lines): + old = re.sub('^ *#[ \t]*ifndef[\t ]+([^ ]+)($|[ \t/;])', '\\1', line) + if old == line: + continue + #slog(NOTICE, "found MIP", old, "in", line) + break + if old is None: + # TODO: add anyway at beginning and end + raise Exception('No multiple inclusion preventer found in', path, ', not implemented') + data = '' + level = 0 + for line in iter(lines): + if re.match('^ *#[ \t]*if.?def[\t ]', line): + level += 1 + if re.match('^ *#[ \t]*ifndef[\t ]+' + old + '($|[ \t/;])', line): + data += '#ifndef ' + mip + '\n' + continue + elif re.match('^ *#[ \t]*define[\t ]+' + old + '($|[ \t/;])', line): + data += '#define ' + mip + '\n' + continue + elif re.match('^ *#[ \t]*endif($|[ \t/;])', line): + level -= 1 + if level == 0: + data += '#endif /* ' + mip + ' */' + '\n' + continue + data += line + '\n' + tmp = path + '.mip' + with open(tmp, 'w') as f: + f.write(data) + os.rename(tmp, path) + + def add_parser(self, parsers): + p = super(Cmd, self).add_parser(parsers) + p.add_argument("--regex", help="Specify regular expression by input file", default=None) + p.add_argument("--replace-patterns-from", help="File with patterns to replace, side by side, divided by '->'", default=None) + p.add_argument('-r', "--root", help="Point in file system from which to start search", default='.') + p.add_argument("--backup", help="Backup extension", default='rep') + p.add_argument('-g', '--git', help="Use git mv for renaming files", action='store_true', default=False) + return p + + def _init(self, args): + if args.replace_patterns_from is not None: + self.replacements = dict() + with open(args.replace_patterns_from) as infile: + for line in infile: + s = re.split('->', line) + self.replacements[s[0]] = s[1].rstrip('\n') + #slog(NOTICE, "replacements =", self.replacements) + + # overriding + def run(self, args): + self._init(args) + slog(NOTICE, "running") + files = [] + if args.regex is not None: + for root, dirs, names in os.walk(args.root): + for name in names: + if re.match(args.regex, name): + files.append((root, name)) + self.process(args, files) + + @abstractmethod + def process(self, args, files): + pass + + +class CmdReplacePatterns(Cmd): + + def __init__(self): + super(CmdReplacePatterns, self).__init__("replace-patterns", "Replace patterns in files") + + def process(self, args, files): + for dir, name in files: + if self.replacements is not None: + path = dir + '/' + name + self._replace_in_file(path, self.replacements, func=self._replace_pattern) + +class CmdReplaceCppSymbols(Cmd): + + def __init__(self): + super(CmdReplaceCppSymbols, self).__init__("replace-cpp-symbols", "Replace C++ symbols in files") + + def add_parser(self, parsers): + p = super(CmdReplaceCppSymbols, self).add_parser(parsers) + p.add_argument('-F', '--rename-files', help="Rename source files, too", action='store_true', default=False) + p.add_argument('-P', '--mip-prefix', help="Prefix to multiple-inclusion preventer", default='') + return p + + # overriding + def run(self, args): + if args.regex is not None: + return super(CmdReplaceCppSymbols, self).run(args) + self._init(args) + slog(NOTICE, "running") + files = [] + exts = _exts_cpp | set([ '.sh', '.py' ]) + for root, dirs, names in os.walk(args.root): + for name in names: + trunc, ext = os.path.splitext(name) + if ext in exts: + files.append((root, name)) + self.process(args, files) + + # overriding + def _init(self, args): + r = super(CmdReplaceCppSymbols, self)._init(args) + self.file_truncs = set() + if self.replacements is not None: + for patt in self.replacements: + self.file_truncs.add(patt.lower()) + return r + + def process(self, args, files): + for dir, name in files: + path = dir + '/' + name + if self.replacements is not None: + self._replace_in_file(path, self.replacements, func=self._replace_cpp_symbol) + if args.rename_files: + for dir, name in files: + trunc, ext = os.path.splitext(name) + if not ext in _exts_cpp: + continue + if not trunc.lower() in self.file_truncs: + continue + for patt, repl in self.replacements.iteritems(): + if patt.lower() == trunc.lower(): + path = dir + '/' + name + new_path = dir + '/' + repl + ext + assert(new_path != path) + slog(NOTICE, "renaming", path, "->", new_path) + if args.git: + subprocess.call(['git', 'mv', path, new_path]) + else: + os.rename(path, new_path) + self._fix_multiple_inclusion_preventer(args.mip_prefix, new_path) + +jwutils.run_sub_commands('process text files')