def load_subdag(parent_dag_name, child_dag_name, def_args): dag_subdag = DAG(dag_id=f'{parent_dag_name}.{child_dag_name}', default_args=def_args, schedule_interval='@hourly') with dag_subdag: wait_to_finish_dag = ExternalTaskSensor( task_id='wait_for_dag', external_dag_id='gridu_dag', execution_delta=timedelta(minutes=5), external_task_id=None, allowed_states=['success']) remove_a_file = BashOperator(task_id='remove_a_file', bash_command=f'rm {path_to_run_file}') print_a_result = PythonOperator(task_id='print_a_result', python_callable=print_pulled_value) create_a_file = BashOperator( task_id='create_a_file', bash_command='touch finished_{{ ts_nodash }}') wait_to_finish_dag >> remove_a_file >> print_a_result >> create_a_file return dag_subdag
from airflow import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.operators.python_operator import PythonOperator, BranchPythonOperator from airflow.operators.bash_operator import BashOperator from airflow.operators.postgres_operator import PostgresOperator, PostgresHook from airflow.utils.trigger_rule import TriggerRule from operators.countoperator import PostgreSQLCountRowsOperator default_args = { 'owner': 'Olya', 'start_date': datetime(2020, 10, 1), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } def print_process_start(**op_kwargs): print( f'{op_kwargs.get("dag_id", None)} start processing tables in database: {op_kwargs.get("database", None)}.' ) def check_table_exist(sql_to_get_schema, sql_to_check_table_exist, table_name): """ callable function to get schema name and after that check if table exist """ hook = PostgresHook('airflow_course_postgres') # get schema name query = hook.get_records(sql=sql_to_get_schema) schema = None