def reset_metadata(env: Environment): """Reset metadata, all or selectively""" # TODO raise NotImplementedError with env.get_metadata_api().begin(): env.md_api.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}snap_log cascade;" ) env.md_api.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}snap_log_id_seq cascade;" ) env.md_api.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_resource_log cascade;" ) env.md_api.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_resource_log_id_seq cascade;" ) env.md_api.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_resource_metadata cascade;" ) env.md_api.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}data_set_metadata cascade;" ) env.md_api.execute( f"drop table {SNAPFLOW_METADATA_TABLE_PREFIX}stored_data_resource_metadata cascade;" )
def logs(env: Environment): """Show log of Snaps on DataBlocks""" with env.get_metadata_api().begin(): query = env.md_api.execute(select(SnapLog).order_by(SnapLog.updated_at.desc())) drls = [] for dfl in query: if dfl.data_block_logs: for drl in dfl.data_block_logs: r = [ dfl.started_at.strftime("%F %T"), dfl.node_key, drl.direction.display, drl.data_block_id, ] drls.append(r) else: drls.append( [ dfl.started_at.strftime("%F %t"), f"{dfl.node_key} nothing to do", "-", "-", ] ) headers = [ "Started", "_Snap", "Direction", "DataBlock", ] echo_table(headers, drls)
def list_data_blocks(env: Environment): with env.get_metadata_api().begin(): query = env.md_api.execute( select(DataBlockMetadata) .filter(~DataBlockMetadata.deleted) .order_by(DataBlockMetadata.created_at) ) headers = [ "ID", "Nominal schema", "Created by node", "# Records", "Stored", ] rows = [ [ r.id, r.nominal_schema_key, r.created_by_node_key, r.record_count, r.stored_data_blocks.count(), ] for r in query ] echo_table(headers, rows)
def list_nodes(env: Environment): with env.get_metadata_api().begin(): query = env.md_api.execute( select( SnapLog.node_key, func.count(SnapLog.id), func.max(SnapLog.started_at), func.count(DataBlockLog.id), ) .join(SnapLog.data_block_logs) .group_by(SnapLog.node_key) ).all() headers = [ "Node key", "Run count", "Last run at", "block count", ] rows = [(k, c, m.strftime("%F %T")) for k, c, m in query] echo_table(headers, rows)