Пример #1
0
def ingest(preprocess):

    new_tables = set()

    def _ingest(file_path):
        raw_path = AIRFLOW_RAW / f"{file_path.name[:22]}.tsv"
        ti = preprocess(raw_path)
        file_config = ti.xcom_pull("config", "init")
        extract_table_name = file_config["extract_table"]
        load_table_name = file_config["load_table"]
        date = pendulum.from_format(file_path.stem[23:], "YYYY_MM_DD").naive()
        table_name = f"fact.session_{date.format('YYYY_MM_DD')}"
        new_tables.update([table_name, extract_table_name, load_table_name])
        Fact.etl(date, file_path.name, extract_table_name, load_table_name)

    yield _ingest

    Fact.remove_tables(*new_tables)
Пример #2
0
def clean_callable(**kwargs):
    """remove generated preprocessed files
    and fails the DAG if any previous task failed."""

    task_instance = kwargs["ti"]
    file_config = task_instance.xcom_pull(key="config", task_ids="init")

    file_stem = file_config["file_stem"]

    for file_path in AIRFLOW_IMPORT.glob(f"{file_stem}{RAW_GLOB}"):
        if file_path.exists():
            logging.info(f"Removing {file_path}.")
            file_path.unlink()

    extract_table_name = file_config["extract_table"]
    load_table_name = file_config["load_table"]
    Fact.remove_tables(extract_table_name, load_table_name)

    for task_instance in kwargs["dag_run"].get_task_instances():
        if (task_instance.current_state() == State.FAILED
                and task_instance.task_id != kwargs["task_instance"].task_id):
            raise Exception(
                f"Failing this DAG run, because task upstream {task_instance.task_id} failed. "
            )