def ingest(preprocess): new_tables = set() def _ingest(file_path): raw_path = AIRFLOW_RAW / f"{file_path.name[:22]}.tsv" ti = preprocess(raw_path) file_config = ti.xcom_pull("config", "init") extract_table_name = file_config["extract_table"] load_table_name = file_config["load_table"] date = pendulum.from_format(file_path.stem[23:], "YYYY_MM_DD").naive() table_name = f"fact.session_{date.format('YYYY_MM_DD')}" new_tables.update([table_name, extract_table_name, load_table_name]) Fact.etl(date, file_path.name, extract_table_name, load_table_name) yield _ingest Fact.remove_tables(*new_tables)
def clean_callable(**kwargs): """remove generated preprocessed files and fails the DAG if any previous task failed.""" task_instance = kwargs["ti"] file_config = task_instance.xcom_pull(key="config", task_ids="init") file_stem = file_config["file_stem"] for file_path in AIRFLOW_IMPORT.glob(f"{file_stem}{RAW_GLOB}"): if file_path.exists(): logging.info(f"Removing {file_path}.") file_path.unlink() extract_table_name = file_config["extract_table"] load_table_name = file_config["load_table"] Fact.remove_tables(extract_table_name, load_table_name) for task_instance in kwargs["dag_run"].get_task_instances(): if (task_instance.current_state() == State.FAILED and task_instance.task_id != kwargs["task_instance"].task_id): raise Exception( f"Failing this DAG run, because task upstream {task_instance.task_id} failed. " )