seven_days_ago = datetime.combine(datetime.today() - timedelta(7), datetime.min.time()) args = { 'owner': 'airflow', 'start_date': seven_days_ago, } dag = DAG(dag_id='example_branch_operator', default_args=args, schedule_interval="@daily") cmd = 'ls -l' run_this_first = DummyOperator(task_id='run_this_first', dag=dag) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(run_this_first) join = DummyOperator(task_id='join', trigger_rule='one_success', dag=dag) for option in options: t = DummyOperator(task_id=option, dag=dag) t.set_upstream(branching) dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag) t.set_downstream(dummy_follow) dummy_follow.set_downstream(join)
# [START push_data_into_datastore_task] def task_push_data_into_datastore(**context): logging.info("Loading resource via API") resource_dict = context["params"].get("resource", {}) ckan_api_key = context["params"].get("ckan_config", {}).get("api_key") ckan_site_url = context["params"].get("ckan_config", {}).get("site_url") return load_resource_via_api(resource_dict, ckan_api_key, ckan_site_url) push_data_into_datastore_task = PythonOperator( task_id="push_data_into_datastore", provide_context=True, python_callable=task_push_data_into_datastore, trigger_rule="none_failed_or_skipped", dag=dag, doc_md=dedent("""\ #### create new datastore table This task pushes the data into datastore on newly created or exisiting datastore table. """), ) # [END push_data_into_datastore_task] # [SET WORKFLOW ] check_schema_task.set_upstream(fetch_and_read_data_task) create_datastore_table_task.set_upstream(check_schema_task) push_data_into_datastore_task.set_upstream( [create_datastore_table_task, check_schema_task]) # [END WORKFLOW]