def reset_metadata(env: Environment): """Reset metadata, all or selectively""" # TODO raise NotImplementedError with env.session_scope() as sess: sess.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}pipe_log cascade;" ) sess.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}pipe_log_id_seq cascade;" ) sess.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_resource_log cascade;" ) sess.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_resource_log_id_seq cascade;" ) sess.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_resource_metadata cascade;" ) sess.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_set_metadata cascade;" ) sess.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}stored_data_resource_metadata cascade;" )
def logs(env: Environment): """Show log of Pipes on DataBlocks""" with env.session_scope() as sess: query = sess.query(PipeLog).order_by(PipeLog.updated_at.desc()) drls = [] for dfl in query: if dfl.data_block_logs: for drl in dfl.data_block_logs: r = [ dfl.started_at.strftime("%F %T"), dfl.node_key, drl.direction.display, drl.data_block_id, ] drls.append(r) else: drls.append([ dfl.started_at.strftime("%F %t"), f"{dfl.node_key} nothing to do", "-", "-", ]) headers = [ "Started", "Pipe", "Direction", "DataBlock", ] echo_table(headers, drls)
def list_nodes(env: Environment): with env.session_scope() as sess: query = (sess.query( PipeLog.node_key, func.count(PipeLog.id), func.max(PipeLog.started_at), func.count(DataBlockLog.id), ).join(PipeLog.data_block_logs).group_by(PipeLog.node_key).all()) headers = [ "Node key", "Run count", "Last run at", "block count", ] rows = [(k, c, m.strftime("%F %T")) for k, c, m in query] echo_table(headers, rows)
def list_data_blocks(env: Environment): with env.session_scope() as sess: query = (sess.query(DataBlockMetadata).filter( ~DataBlockMetadata.deleted).order_by(DataBlockMetadata.created_at)) headers = [ "ID", "Nominal schema", "Created by node", "# Records", "Stored", ] rows = [[ r.id, r.nominal_schema_key, r.created_by_node_key, r.record_count, r.stored_data_blocks.count(), ] for r in query] echo_table(headers, rows)
def test_env_init(): from . import _test_module # Test module / components env = Environment("_test", metadata_storage="sqlite://", initial_modules=[]) with env.session_scope() as sess: assert len(env.get_module_order()) == 1 env.add_module(_test_module) assert env.get_module_order() == [ env.get_local_module().name, _test_module.name, ] assert env.get_schema("TestSchema", sess) is _test_module.schemas.TestSchema assert env.get_pipe("test_sql") is _test_module.pipes.test_sql # Test runtime / storage env.add_storage("postgresql://test") assert len(env.storages) == 2 # added plus default local memory assert len(env.runtimes) == 3 # added plus default local python # TODO