diff --git a/tools/python/jwutils/db/Makefile b/tools/python/jwutils/db/Makefile new file mode 100644 index 0000000..59b3ac1 --- /dev/null +++ b/tools/python/jwutils/db/Makefile @@ -0,0 +1,4 @@ +TOPDIR = ../../../.. + +include $(TOPDIR)/make/proj.mk +include $(JWBDIR)/make/py-mod.mk diff --git a/tools/python/jwutils/db/rows.py b/tools/python/jwutils/db/rows.py new file mode 100644 index 0000000..271dae6 --- /dev/null +++ b/tools/python/jwutils/db/rows.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- + +import io +import os, re, textwrap, json, csv +from tabulate import tabulate # type: ignore + +from jwutils.log import * + +def rows_pretty(rows): # export + if type(rows) == dict: + rows = [rows] + out = [] + for row in rows: + out.append(json.dumps(row, sort_keys=True, indent=4, default=str)) + return '\n'.join(out) + +def rows_duplicates(rows, log_prio=INFO, caller=None): # export + def __equal(r1, r2): + for col in set(r1.keys()) | set(r2.keys()): + if col in r1: + if col not in r2: + return False + else: + if col in r2: + return False + if r1[col] != r2[col]: + return False + return True + ret = [] + last = len(rows) - 1 + i = last + while last > 0: + for i in reversed(range(0, last-1)): + if __equal(rows[last], rows[i]): + ret.append(last) + last -= 1 + break + last -= 1 + return ret + +def rows_remove(rows, callback=None, candidates=None, log_prio=INFO, caller=None): # export + + def __is_remove_candidate(row): + for remove_row in candidates: + for col, val in row.items(): + if not col in remove_row.keys(): + break + if val != remove_row[col]: + break + else: + return True + return False + + if caller is None: + caller = get_caller_pos() + if callback is None: + if candidates is not None: + callback = __is_remove_candidate + else: + raise Exception('No criterion to remove rows') + remove = list() + index = -1 + for row in rows: + index += 1 + if callback(row): + remove.append(index) + continue + for index in reversed(remove): + slog(log_prio, f'Removing row {rows[index]}', caller=caller) + del rows[index] + +def rows_select(rows, rules): # export + ret = [] + for row in rows: + for rule in rules: + if type(rule) == tuple(): + search_rule = rule[0] + else: + search_rule = rule + for col_name, expr in search_rule.items(): + if not re.search(expr, row[col_name]): + break + else: + ret.append(row) + break + return ret + +def rows_rewrite_regex(rows, rules): # export + for row in rows: + for rule in rules: + try: + for col_name, expr in rule[0].items(): + if not re.search(expr, row[col_name]): + break + else: + for exec_col_name, exec_val in rule[1].items(): + slog(INFO, f'Rewriting {row} {row.get(exec_col_name)} -> {exec_val}') + row[exec_col_name] = exec_val + except Exception as e: + slog(ERR, f'Failed to run rule {rule} against {row} ({e})') + raise + +def rows_check_not_null(rows, keys, log_prio=WARNING, buf=None, stat_key=None, throw=True, caller=None): # export + if type(keys) == str: + keys = [keys] + if caller is None: + caller = get_caller_pos() + count = 0 + stats = dict() + if buf is None: + buf = [] + else: + buf.clear() + for row in rows: + for key in keys: + if row.get(key) is None: + slog(log_prio, f'{key} is missing in row {row}', caller=caller) + buf.append(row) + if stat_key is not None: + stat_val = row[stat_key] + if not stat_val in stats.keys(): + stats[stat_val] = 0 + stats[stat_val] += 1 + count += 1 + break + if count > 0: + if stat_key is not None: + i = 0 + for k, v in reversed(sorted(stats.items(), key=lambda item: item[1])): + i += 1 + slog(ERR, f'{i:>3}. {k:<23}: {v}', caller=caller) + if throw: + raise Exception(f'Found {count} rows violating null-constraint for keys {keys}') + return buf + +def rows_dumps(rows, log_prio=INFO, caller=None, use_cols=None, skip_cols=None, table_name=None, out_path='log', heading=None, lead=None, tablefmt=None): # export + + headers = 'keys' + dump_rows = rows + if use_cols is not None: + #dump_rows = {col: rows[col] for col in use_cols} + new_dump_rows = [] + for row in dump_rows: + new_dump_rows.append({col: row.get(col) for col in use_cols}) + dump_rows = new_dump_rows + if skip_cols is not None: + new_dump_rows = [] + for row in dump_rows: + new_row = {} + for col, val in row.items(): + if col in skip_cols: + continue + new_row[col] = val + new_dump_rows.append(new_row) + dump_rows = new_dump_rows + out = header = footer = "" + match tablefmt: + case 'html': + if heading is not None: + heading = f'