def fix_supervision_status(dburi, name=None, namespace='tsh'): engine = create_engine(find_dburi(dburi)) tsh = timeseries(namespace) if name: series = [name] else: series = [ name for name, stype in tsh.list_series(engine).items() if stype == 'primary' ] categories = defaultdict(list) bar = tqdm.tqdm(range(len(series))) for name in series: meta = tsh.metadata(engine, name) if 'supervision_status' in meta: continue categories[status].append(name) meta['supervision_status'] = status with engine.begin() as cn: tsh.update_metadata(cn, name, meta, internal=True) # reclaim space if status in ('handcrafted', 'unsupervised'): if tsh.upstream.exists(cn, name): tsh.upstream.delete(cn, name) bar.update() bar.close() print('unsupervised', len(categories['unsupervised'])) print('handcrafted', len(categories['handcrafted'])) print('supervised', len(categories['supervised']))
def view(db_uri, handler, debug=False): """visualize time series through the web""" uri = find_dburi(db_uri) ipaddr = host() port = int(getenv('TSVIEW_PORT', 5678)) serieshandler = tshclass(handler) if debug: kickoff(ipaddr, port, uri, serieshandler) return server = Thread(name='tsview.webapp', target=kickoff, kwargs={ 'host': ipaddr, 'port': port, 'dburi': uri, 'handler': serieshandler }) server.daemon = True server.start() webbrowser.open('http://{ipaddr}:{port}/tsview'.format(ipaddr=ipaddr, port=port)) input()
def check(db_uri, series=None, namespace='tsh'): "coherence checks of the db" e = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) if series is None: series = tsh.list_series(e) else: series = [series] for idx, s in enumerate(series): t0 = time() with e.begin() as cn: hist = tsh.history(cn, s) start, end = None, None mon = True for ts in hist.values(): cmin = ts.index.min() cmax = ts.index.max() start = min(start or cmin, cmin) end = max(end or cmax, cmax) mon = ts.index.is_monotonic_increasing ival = tsh.interval(e, s) if ival.left != start: print(' start:', s, f'{ival.left} != {start}') if ival.right != end: print(' end:', s, f'{ival.right} != {end}') monmsg = '' if mon else 'non-monotonic' print( idx, s, 'inserts={}, read-time={} {}'.format(len(hist), time() - t0, monmsg))
def info(db_uri, namespace='tsh'): """show global statistics of the repository""" engine = create_engine(find_dburi(db_uri)) info = timeseries(namespace).info(engine) info['serie names'] = ', '.join(info['serie names']) print(INFOFMT.format(**info))
def verify_aliases(dburi, only=None, namespace='tsh'): " verify aliases wholesale (all or per type using --only) " if only is None: tables = TABLES else: assert only in TABLES tables = [only] engine = create_engine(find_dburi(dburi)) tsh = tsio.timeseries(namespace=namespace) for table in tables: colname = 'serie' if table == 'outliers' else 'alias' for row in engine.execute( f'select distinct {colname} from "{namespace}-alias"."{table}"' ).fetchall(): name = row[0] try: series = tsh.get(engine, name) except tsio.AliasError as err: print(err) else: if not series.index.is_monotonic_increasing: print(name, 'is non monotonic') print(name, len(series))
def audit_aliases(dburi, alias=None, namespace='tsh'): " perform a visual audit of aliases " engine = create_engine(find_dburi(dburi)) aliases = [] if alias: # verify if _alias_kind(engine, namespace, alias) is not None: aliases.append(alias) else: for kind in ('priority', 'arithmetic'): aliases += [ alias for alias, in engine.execute( f'select distinct alias from "{namespace}-alias".{kind}'). fetchall() ] tsh = tsio.timeseries(namespace=namespace) trees = [] for idx, alias in enumerate(aliases): trees.append(helpers.buildtree(engine, tsh, alias, [])) # now, display shit for tree in trees: print('-' * 70) helpers.showtree(tree)
def remove_alias(dburi, alias_type, alias, namespace='tsh'): "remove singe alias" engine = create_engine(find_dburi(dburi)) table = '"{}-alias".{}'.format(namespace, alias_type) sql = "delete from {} where alias = %(alias)s".format(table) with engine.begin() as cn: cn.execute(sql, alias=alias)
def history(db_uri, seriename, from_insertion_date, to_insertion_date, from_value_date, to_value_date, diff, json, namespace='tsh'): """show a serie full history """ engine = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) with engine.begin() as cn: hist = tsh.history(cn, seriename, from_insertion_date, to_insertion_date, from_value_date, to_value_date, diffmode=diff) if json: out = { str(idate): {str(vdate): val for vdate, val in ts.to_dict().items()} for idate, ts in hist.items() } print(dumps(out)) else: for idate in hist: print(hist[idate])
def test_formula(db_uri, formula, pdbshell=False, namespace='tsh'): engine = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) ts = tsh.eval_formula(engine, formula) print(ts) if pdbshell: import ipdb ipdb.set_trace()
def migrate_dot_four_to_dot_five(dburi, namespace='tsh'): engine = create_engine(find_dburi(dburi)) for table in ('outliers', 'arithmetic', 'priority'): sql = f'alter table "{namespace}-alias".{table} set schema {namespace} ' with engine.begin() as cn: cn.execute(sql) with engine.begin() as cn: cn.execute(f'drop schema "{namespace}-alias"')
def export_aliases(dburi, aliases, namespace='tsh'): engine = create_engine(find_dburi(dburi)) tsh = tsio.timeseries(namespace=namespace) trees = [] for alias in aliases: trees.append(helpers.buildtree(engine, tsh, alias, [])) data = {'primary': set(), 'arithmetic': set(), 'priority': set()} def collect(tree): if isinstance(tree, str): if tree.startswith('unknown'): print(f'skipping {tree}') else: data['primary'].add(tree) return for (alias, kind), subtrees in tree.items(): data[kind].add(alias) for subtree in subtrees: collect(subtree) for tree in trees: collect(tree) Path('primary.csv').write_bytes('\n'.join(data['primary']).encode('utf-8')) arith = [] for name in data['arithmetic']: out = engine.execute( f'select alias, serie, coefficient, fillopt ' f'from "{namespace}-alias".arithmetic ' f'where alias = %(name)s ' f'order by alias', name=name).fetchall() arith.extend(dict(row) for row in out) df = pd.DataFrame(arith) df.to_csv(Path('arith.csv'), columns=('alias', 'serie', 'coefficient', 'fillopt'), index=False) prio = [] for name in data['priority']: out = engine.execute( f'select alias, serie, priority, coefficient, prune ' f'from "{namespace}-alias".priority ' f'where alias = %(name)s ' f'order by alias, priority asc', name=name).fetchall() prio.extend(dict(row) for row in out) df = pd.DataFrame(prio) df.to_csv(Path('prio.csv'), columns=('alias', 'serie', 'priority', 'coefficient', 'prune'), index=False)
def get(db_uri, seriename, json, namespace='tsh'): """show a serie in its current state """ engine = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) ts = tsh.get(engine, seriename) if json: print(ts.to_json()) else: with pd.option_context('display.max_rows', None, 'display.max_columns', 3): print(ts)
def rename(db_uri, mapfile, namespace='tsh'): """rename series by providing a map file (csv format) map file header must be `old,new` """ seriesmap = {p.old: p.new for p in pd.read_csv(mapfile).itertuples()} engine = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) for old, new in seriesmap.items(): with engine.begin() as cn: print('rename', old, '->', new) tsh.rename(cn, old, new)
def reset_aliases(dburi, only=None, namespace='tsh'): " remove aliases wholesale (all or per type using --only) " if only is None: tables = TABLES else: assert only in TABLES tables = [only] engine = create_engine(find_dburi(dburi)) for table in tables: with engine.begin() as cn: cn.execute(f'delete from "{namespace}-alias"."{table}"')
def ingest_formulas(dburi, formula_file, strict=False, namespace='tsh'): """ingest a csv file of formulas Must be a two-columns file with a header "name,text" """ engine = create_engine(find_dburi(dburi)) df = pd.read_csv(formula_file) tsh = timeseries(namespace) with engine.begin() as cn: for row in df.itertuples(): print('ingesting', row.name) tsh.register_formula(cn, row.name, row.text, strict)
def typecheck_formula(db_uri, pdbshell=False, namespace='tsh'): engine = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) i = Interpreter(engine, tsh, {}) for name, kind in tsh.list_series(engine).items(): if kind != 'formula': continue formula = tsh.formula(engine, name) parsed = parse(formula) print(name, f'`{parsed[0]}`') typecheck(parsed, env=i.env)
def list_mismatch(db_uri, namespace='tsh'): e = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) series = set(tsh.list_series(e)) upstream = set(tsh.upstream.list_series(e)) diff = upstream - series if not diff: print('no mismatch') return print(f'found {len(diff)} series in upstream') for name in sorted(diff): assert (False, True) == (tsh.exists(e, name), tsh.upstream.exists(e, name)) print(name)
def delete(db_uri, series=None, deletefile=None, namespace='tsh'): """delete series by providing a one-column file (csv format) file header must be `name` """ if not (series or deletefile): print('You must provide a series name _or_ a csv file path') return if deletefile: series = [p.name for p in pd.read_csv(deletefile).itertuples()] else: series = [series] engine = create_engine(find_dburi(db_uri)) delete_series(engine, series, namespace)
def update_metadata(dburi, reset=False, namespace='tsh'): engine = create_engine(find_dburi(dburi)) tsh = timeseries(namespace) if reset: for name, kind in tsh.list_series(engine).items(): if kind != 'formula': continue # reset meta = tsh.metadata(engine, name) if meta: meta = {k: v for k, v in meta.items() if k not in tsh.metakeys} else: meta = {} sql = (f'update "{namespace}".formula ' 'set metadata = %(metadata)s ' 'where name = %(name)s') print('reset', name, 'to', meta) with engine.begin() as cn: cn.execute(sql, metadata=json.dumps(meta), name=name) todo = [] errors = [] def justdoit(): for name, kind in tsh.list_series(engine).items(): if kind != 'formula': continue print(name) tree = parse(tsh.formula(engine, name)) smap = tsh.find_series(engine, tree) try: meta = tsh.filter_metadata(smap) except ValueError as err: errors.append((name, err)) continue if not meta or 'index_dtype' not in meta: todo.append(name) print(' -> todo') continue tsh.update_metadata(engine, name, meta) justdoit() print('TODO', todo) print('FAIL', errors)
def log(db_uri, limit, series, from_insertion_date=None, to_insertion_date=None, namespace='tsh'): """show revision history of entire repository or series""" engine = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) for rev in tsh.log(engine, series, limit=limit, fromdate=from_insertion_date, todate=to_insertion_date): print(format_rev(rev)) print()
def drop_alias_tables(db_uri, drop=False, namespace='tsh'): engine = create_engine(find_dburi(db_uri)) # convert outliers to clip operator elts = { k: (min, max) for k, min, max in engine.execute( 'select serie, min, max from tsh.outliers').fetchall() } tsh = timeseries(namespace) rewriteme = [] for name, kind in tsh.list_series(engine).items(): if kind != 'formula': continue tree = parse(tsh.formula(engine, name)) smap = tsh.find_series(engine, tree) for sname in smap: if sname in elts: rewriteme.append((name, tree)) break for name, tree in rewriteme: tree2 = rewrite(tree, elts) print(name) print(serialize(tree)) print('->') print(serialize(tree2)) print() tsh.register_formula(engine, name, serialize(tree2), update=True) if not drop: print('DID NOT DROP the tables') print('pass --drop to really drop them') return with engine.begin() as cn: cn.execute(f'drop table if exists "{namespace}".arithmetic') cn.execute(f'drop table if exists "{namespace}".priority') cn.execute(f'drop table if exists "{namespace}".outliers')
def fix_slice(db_uri, really=False, namespace='tsh'): e = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) for name, kind in tsh.list_series(e).items(): if kind != 'formula': continue # parse+serialize -> normalization step form = serialize(parse(tsh.formula(e, name))) tree = parse(form) newtree = rewrite_slice(tree) newform = serialize(newtree) if form != newform: print('rewritten', name) print(' was', form) print(' ->', newform) if not really: continue tsh.register_formula(e, name, newform, update=True) if not really: print('UNCHANGED. To apply changes, pass --really')
def register_priorities(dburi, priority_file, override=False): " register priorities timeseries aliases " engine = create_engine(find_dburi(dburi)) with engine.begin() as cn: db.register_priority(cn, priority_file, override)
def register_arithmetic(dburi, arithmetic_file, override): " register arithmetic timeseries aliases " engine = create_engine(find_dburi(dburi)) with engine.begin() as cn: db.register_arithmetic(cn, arithmetic_file, override)
def register_outliers(dburi, outliers_file, override): " register outlier definitions " engine = create_engine(find_dburi(dburi)) with engine.begin() as cn: db.register_outliers(cn, outliers_file, override)
def init_db(db_uri, namespace): "initialize the formula part of a timeseries history schema" engine = create_engine(find_dburi(db_uri)) formula_schema(namespace).create(engine)
def shell(db_uri, namespace='tsh'): e = create_engine(find_dburi(db_uri)) tsh = timeseries(namespace) import pdb pdb.set_trace()
def init_db(db_uri, reset=False, namespace='tsh'): """initialize an new db.""" engine = create_engine(find_dburi(db_uri)) schem = tshistory.schema.tsschema(namespace) schem.create(engine)