示例#1
0
def load_subdag(parent_dag_name, child_dag_name, def_args):
    dag_subdag = DAG(dag_id=f'{parent_dag_name}.{child_dag_name}',
                     default_args=def_args,
                     schedule_interval='@hourly')

    with dag_subdag:
        wait_to_finish_dag = ExternalTaskSensor(
            task_id='wait_for_dag',
            external_dag_id='gridu_dag',
            execution_delta=timedelta(minutes=5),
            external_task_id=None,
            allowed_states=['success'])
        remove_a_file = BashOperator(task_id='remove_a_file',
                                     bash_command=f'rm {path_to_run_file}')
        print_a_result = PythonOperator(task_id='print_a_result',
                                        python_callable=print_pulled_value)
        create_a_file = BashOperator(
            task_id='create_a_file',
            bash_command='touch finished_{{ ts_nodash }}')

        wait_to_finish_dag >> remove_a_file >> print_a_result >> create_a_file

    return dag_subdag
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator, BranchPythonOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.postgres_operator import PostgresOperator, PostgresHook
from airflow.utils.trigger_rule import TriggerRule
from operators.countoperator import PostgreSQLCountRowsOperator

default_args = {
    'owner': 'Olya',
    'start_date': datetime(2020, 10, 1),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}


def print_process_start(**op_kwargs):
    print(
        f'{op_kwargs.get("dag_id", None)} start processing tables in database: {op_kwargs.get("database", None)}.'
    )


def check_table_exist(sql_to_get_schema, sql_to_check_table_exist, table_name):
    """ callable function to get schema name and after that check if table exist """
    hook = PostgresHook('airflow_course_postgres')
    # get schema name
    query = hook.get_records(sql=sql_to_get_schema)
    schema = None