def audit_aliases(dburi, alias=None, namespace='tsh'): " perform a visual audit of aliases " engine = create_engine(find_dburi(dburi)) aliases = [] if alias: # verify if _alias_kind(engine, namespace, alias) is not None: aliases.append(alias) else: for kind in ('priority', 'arithmetic'): aliases += [ alias for alias, in engine.execute( f'select distinct alias from "{namespace}-alias".{kind}'). fetchall() ] tsh = tsio.timeseries(namespace=namespace) trees = [] for idx, alias in enumerate(aliases): trees.append(helpers.buildtree(engine, tsh, alias, [])) # now, display shit for tree in trees: print('-' * 70) helpers.showtree(tree)
def verify_aliases(dburi, only=None, namespace='tsh'): " verify aliases wholesale (all or per type using --only) " if only is None: tables = TABLES else: assert only in TABLES tables = [only] engine = create_engine(find_dburi(dburi)) tsh = tsio.timeseries(namespace=namespace) for table in tables: colname = 'serie' if table == 'outliers' else 'alias' for row in engine.execute( f'select distinct {colname} from "{namespace}-alias"."{table}"' ).fetchall(): name = row[0] try: series = tsh.get(engine, name) except tsio.AliasError as err: print(err) else: if not series.index.is_monotonic_increasing: print(name, 'is non monotonic') print(name, len(series))
def export_aliases(dburi, aliases, namespace='tsh'): engine = create_engine(find_dburi(dburi)) tsh = tsio.timeseries(namespace=namespace) trees = [] for alias in aliases: trees.append(helpers.buildtree(engine, tsh, alias, [])) data = {'primary': set(), 'arithmetic': set(), 'priority': set()} def collect(tree): if isinstance(tree, str): if tree.startswith('unknown'): print(f'skipping {tree}') else: data['primary'].add(tree) return for (alias, kind), subtrees in tree.items(): data[kind].add(alias) for subtree in subtrees: collect(subtree) for tree in trees: collect(tree) Path('primary.csv').write_bytes('\n'.join(data['primary']).encode('utf-8')) arith = [] for name in data['arithmetic']: out = engine.execute( f'select alias, serie, coefficient, fillopt ' f'from "{namespace}-alias".arithmetic ' f'where alias = %(name)s ' f'order by alias', name=name).fetchall() arith.extend(dict(row) for row in out) df = pd.DataFrame(arith) df.to_csv(Path('arith.csv'), columns=('alias', 'serie', 'coefficient', 'fillopt'), index=False) prio = [] for name in data['priority']: out = engine.execute( f'select alias, serie, priority, coefficient, prune ' f'from "{namespace}-alias".priority ' f'where alias = %(name)s ' f'order by alias, priority asc', name=name).fetchall() prio.extend(dict(row) for row in out) df = pd.DataFrame(prio) df.to_csv(Path('prio.csv'), columns=('alias', 'serie', 'priority', 'coefficient', 'prune'), index=False)
def register_arithmetic(cn, path, override=False): df = pd.read_csv(path) aliases = np.unique(df['alias'].dropna()) tsh = tsio.timeseries() map_fillopt = {} for alias in aliases: sub_df = df[df['alias'] == alias] map_coef = {row.serie: row.coefficient for row in sub_df.itertuples()} for row in sub_df.itertuples(): if not pd.isnull(row.fillopt): map_fillopt[row.serie] = row.fillopt tsh.build_arithmetic(cn, alias, map_coef, map_fillopt, override)
def register_priority(cn, path, override=False): df = pd.read_csv(path) aliases = np.unique(df['alias'].dropna()) map_prune = {} map_coef = {} tsh = tsio.timeseries() for alias in aliases: sub_df = df[df['alias'] == alias] sub_df = sub_df.sort_values(by='priority') list_names = sub_df['serie'] for row in sub_df.itertuples(): if not pd.isnull(row.prune): map_prune[row.serie] = row.prune if not pd.isnull(row.coefficient): map_coef[row.serie] = row.coefficient tsh.build_priority(cn, alias, list_names, map_prune, map_coef, override)
def register_outliers(cn, path, override=False): df = pd.read_csv(path) tsh = tsio.timeseries() for row in df.itertuples(): tsh.add_bounds(cn, row.serie, row.min, row.max)
def tsh(request, engine): return timeseries()
def shell(db_uri, namespace='tsh'): e = create_engine(find_dburi(db_uri)) tsh = tsio.timeseries(namespace) import pdb pdb.set_trace()