def extract_lineage( catalog: Catalog, visited_query: DmlVisitor, source: CatSource, parsed: Parsed, start_time, end_time, ) -> JobExecution: job = catalog.add_job(name=parsed.name, source=source, context={"query": parsed.query}) job_execution = catalog.add_job_execution( job=job, started_at=start_time, ended_at=end_time, status=JobExecutionStatus.SUCCESS, ) for source, target in zip(visited_query.source_columns, visited_query.target_columns): for column in source.columns: edge = catalog.add_column_lineage(column, target, job_execution.id, {}) logging.debug("Added {}".format(edge)) return job_execution
def create_graph(catalog: Catalog, visited_queries: List[DmlVisitor]) -> DbGraph: logger = LogMixin() job_ids = set() for query in visited_queries: job = catalog.add_job(query.name, {}) job_execution = catalog.add_job_execution(job, datetime.now(), datetime.now(), JobExecutionStatus.SUCCESS) for source, target in zip(query.source_columns, query.target_columns): edge = catalog.add_column_lineage(source, target, job_execution.id, {}) job_ids.add(job.id) logger.logger.debug("Added {}".format(edge)) graph = DbGraph(catalog, job_ids) graph.load() return graph