dag = DAG('hello_world3', default_args=default_args, schedule_interval=None, catchup=False) def print_context(): print("Hello yoo") with open('/storage/hello-world-example/some1.txt', 'a') as a: a.write("Executed now2 yo 1" + str(datetime.now())) time.sleep(10) return 'Whatever you return gets printed in the logs' def print_context2(): with open('/storage/hello-world-example/some1.txt', 'a') as a: a.write("Executed now2 yo 2" + str(datetime.now())) time.sleep(15) return 'Whatever you return gets printed in the logs' t1 = PythonOperator(task_id='print_the_context1', python_callable=print_context, dag=dag) t2 = PythonOperator(task_id='print_the_context2', python_callable=print_context2, dag=dag) t2.set_upstream(t1)
'retries': 1, 'retry_delay': timedelta(minutes=1), } with DAG( 'air_quality_index', default_args=default_args, schedule_interval='@hourly', ) as dag: task_1 = PythonOperator(task_id='create_table', provide_context=True, python_callable=create_statement, op_kwargs={ 'host': 'localhost', 'dbname': 'postgres', 'user': '******', 'password': '******', 'statement': create_statment }, dag=dag) task_2 = PythonOperator(task_id='get_data_from_api', provide_context=True, python_callable=get_data_api, dag=dag) task_3 = PythonOperator(task_id='load_json_file', provide_context=True, python_callable=load_json_file, dag=dag)
username=conn_conf_dict[conn_id].login, password=conn_conf_dict[conn_id].password, table=table_name, ufile_path=UFILE_PATH % (db_name, table_name), query=query, m=18 if table_name == 'channel_response_code' else 20), dag=dag, ) # check table check_table = PythonOperator( task_id='check_table_{}'.format(hive_table_name), priority_weight=priority_weight_nm, python_callable=run_check_table, provide_context=True, op_kwargs={ 'db_name': db_name, 'table_name': table_name, 'conn_id': conn_id, 'hive_table_name': hive_table_name }, dag=dag) # add partitions add_partitions = HiveOperator( task_id='add_partitions_{}'.format(hive_table_name), priority_weight=priority_weight_nm, hql=''' ALTER TABLE {table} ADD IF NOT EXISTS PARTITION (dt = '{{{{ tomorrow_ds }}}}',hour = '{{{{ execution_date.strftime("%H") }}}}') '''.format(table=hive_table_name), schema=HIVE_DB, dag=dag)
from ( select 1 as star_rating, 'Very bad' as rating_title union all select 2 as star_rating, 'Bad' as rating_title union all select 3 as star_rating, 'Not good' as rating_title union all select 4 as star_rating, 'Good' as rating_title union all select 5 as star_rating, 'VeryExcellent' as rating_title ) r; """ cur.execute(query) conn.commit() create_calendar = PythonOperator(task_id="fill_calendar", dag=dag, python_callable=fill_calendar, op_kwargs={"dwh_conn_id": "dwh"}) create_rating = PythonOperator(task_id="fill_rating", dag=dag, python_callable=fill_rating, op_kwargs={"dwh_conn_id": "dwh"}) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) end_operator = DummyOperator(task_id='Stop_execution', dag=dag) start_operator >> create_calendar >> end_operator start_operator >> create_rating >> end_operator
'schedule_interval': '@daily', 'max_active_runs': 1, 'retries': 3, 'retry_delay': timedelta(minutes=1) } DAG_ID = 'covid' with DAG(dag_id=DAG_ID, default_args=DEFAULT_ARGS) as dag: with open('/usr/local/airflow/dags/covid/sqls/ddl_covid_timeseries.sql', 'r') as f: sql = f.read() create_table_if_not_exists = PostgresOperator(task_id='check_table', postgres_conn_id='postgres_staging' ,sql=sql) extract = PythonOperator( task_id='extract', python_callable=get_covid_data, op_kwargs={'uri':'https://api.covid19india.org/v4/timeseries.json'}, provide_context=True ) transform = PythonOperator( task_id='transform', python_callable=json_to_df, op_kwargs={'start_date': '2020-01-01', 'end_date': '9999-12-31'}, provide_context=True ) stage_load = PythonOperator( task_id='stage_load', python_callable=load_to_stage_area, provide_context=True
'start_date': datetime(2020, 1, 27), } dag = DAG( dag_id='my_second_dag', default_args=args, schedule_interval=timedelta(minutes=150), ) def _print_exec_date(**context): print("This is my execution date: " + str(context["execution_date"])) print_execution_date = PythonOperator( task_id="print_execution_date", python_callable=_print_exec_date, provide_context=True, dag=dag, ) for i in (1, 5, 10): wait = BashOperator(task_id=f"wait_{i}", bash_command=f"sleep {i}") the_end = DummyOperator( task_id='the_end', dag=dag, ) print_execution_date >> wait >> the_end
if test_mode: print(" 'foo' was passed in via test={} command : kwargs[params][foo] \ = {}".format(test_mode, params["foo"])) # Print out the value of "miff", passed in below via the Python Operator print(" 'miff' was passed in via task params = {}".format(params["miff"])) return 1 my_templated_command = """ echo " 'foo was passed in via Airflow CLI Test command with value {{ params.foo }} " echo " 'miff was passed in via BashOperator with value {{ params.miff }} " """ run_this = PythonOperator( task_id='run_this', python_callable=my_py_command, params={"miff": "agg"}, dag=dag, ) also_run_this = BashOperator( task_id='also_run_this', bash_command=my_templated_command, params={"miff": "agg"}, dag=dag, ) def print_env_vars(test_mode): """ Print out the "foo" param passed in via `airflow tasks test example_passing_params_via_test_command env_var_test_task <date>
import datetime import logging from airflow import DAG from airflow.operators.python_operator import PythonOperator def hello_world(): logging.info("Hello Ben!") dag = DAG('Exercise1', start_date=datetime.datetime.now()) greet_task = PythonOperator(task_id="hello_world_task", python_callable=hello_world, dag=dag)
def greet(): print('Trying to write to the file') with open('./greet.txt', 'a+', encoding='utf8') as f: now = dt.datetime.now() t = now.strftime("%Y-%m-%d %H:%M") f.write(str(t) + '\n') return 'Greeted' def respond(): return 'Greet Responded Again' default_args = { 'owner': 'airflow', 'start_date': dt.datetime(2018, 9, 24, 10, 00, 00), 'concurrency': 1, 'retries': 0 } with DAG('my_simple_dag', default_args=default_args, schedule_interval='*/10 * * * *', ) as dag: opr_hello = BashOperator(task_id='say_Hi', bash_command='echo "Hi!!"') opr_greet = PythonOperator(task_id='greet', python_callable=greet) opr_sleep = BashOperator(task_id='sleep_me', bash_command='sleep 5') opr_respond = PythonOperator(task_id='respond', python_callable=respond) opr_hello >> opr_greet >> opr_sleep >> opr_respond
parameters={ "first_parameter": "a_value", "second_parameter": "18" }, # parameters="resources/paremeter.json", You can also pass a path to a json file containing your param jenkins_connection_id= "your_jenkins_connection" # T he connection must be configured first ) def grab_artifact_from_jenkins(**context): """ Grab an artifact from the previous job The python-jenkins library doesn't expose a method for that But it's totally possible to build manually the request for that """ hook = JenkinsHook("your_jenkins_connection") jenkins_server = hook.get_jenkins_server() url = context['task_instance'].xcom_pull(task_ids='trigger_job') # The JenkinsJobTriggerOperator store the job url in the xcom variable corresponding to the task # You can then use it to access things or to get the job number # This url looks like : http://jenkins_url/job/job_name/job_number/ url = url + "artifact/myartifact.xml" # Or any other artifact name request = Request(url) response = jenkins_server.jenkins_open(request) return response # We store the artifact content in a xcom variable for later use artifact_grabber = PythonOperator( task_id='artifact_grabber', python_callable=grab_artifact_from_jenkins) job_trigger >> artifact_grabber
# ------------------------------------------------------------------------------- # dag # these args will get passed on to each operator # you can override them on a per-task basis during operator initialization default_args = { 'owner': 'TongYu', 'catchup': False, 'start_date': trans_utc_datetime('0:00:00'), } dag = DAG( 'valuation_date_update_dag', catchup=False, default_args=default_args, schedule_interval='0,1 17 * * *', dagrun_timeout=timedelta(minutes=10), description='valuation date manager dag') # ---------------------------------------- PythonOperator( task_id='valuation_date_update_task', python_callable=set_valuation_date, execution_timeout=timedelta(minutes=10), dag=dag) PythonOperator( task_id='calendar_import_year_update_task', python_callable=set_calendar_import_year, execution_timeout=timedelta(minutes=10), dag=dag)
def csvToJson(): df=pd.read_csv('/home/demilsonfayika/dirty-data.csv') for i,r in df.iterrows(): print(r['name']) df.to_json('anglo.json',orient='records') default_args = { 'owner': 'Demilson', 'start_date': dt.datetime(2020, 12, 15), 'retries': 1, 'retry_delay': dt.timedelta(minutes=5), } with DAG('MyCSVDAG', default_args=default_args, schedule_interval=timedelta(minutes=5), # '0 * * * *', ) as dag: print_starting = BashOperator(task_id='starting', bash_command='echo "I am reading the CSV now....."') csvJson = PythonOperator(task_id='convertCSVtoJson', python_callable=csvToJson) print_starting >> csvJson
object=f"rocket_launches/ds={ds}", mime_type='application/json') def _print_stats(ds, **context): gcloud_storage_hook = GoogleCloudStorageHook() tmp_file_handle = NamedTemporaryFile(delete=True) gcloud_storage_hook.download(bucket="nice_bucket", object=f"rocket_launches/ds={ds}", filename=tmp_file_handle.name) data = json.load(tmp_file_handle) rockets_launched = [launch["name"] for launch in data["launches"]] rockets_str = "" if rockets_launched: rockets_str = f" ({' & '.join(rockets_launched)})" print( f"{len(rockets_launched)} rocket launch(es) on {ds}{rockets_str}.") download_rocket_launches = PythonOperator( task_id="download_rocket_launches", python_callable=_download_rocket_launches, provide_context=True, dag=dag, ) print_stats = PythonOperator(task_id="print_stats", python_callable=_print_stats, provide_context=True, dag=dag) download_rocket_launches >> print_stats
task_id="create_oldest", dag=dag, sql=""" BEGIN; DROP TABLE IF EXISTS older_riders; CREATE TABLE older_riders AS ( SELECT * FROM trips WHERE birthyear > 0 AND birthyear <= 1945 ); COMMIT; """, postgres_conn_id="redshift" ) log_oldest_task = PythonOperator( task_id="log_oldest", dag=dag, python_callable=log_oldest ) create_youngest_task = PostgresOperator( task_id="create_youngest", dag=dag, sql=""" BEGIN; DROP TABLE IF EXISTS younger_riders; CREATE TABLE younger_riders AS ( SELECT * FROM trips WHERE birthyear > 2000 ); COMMIT; """,
def transform_codes_to_parquet(**kwargs): # ti is the Task Instance ti = kwargs['ti'] cluster_id = ti.xcom_pull(task_ids='create_cluster') cluster_dns = emr.get_cluster_dns(cluster_id) headers = emr.create_spark_session(cluster_dns, 'spark') session_url = emr.wait_for_idle_session(cluster_dns, headers) statement_response = emr.submit_statement( session_url, '/root/airflow/dags/transform/codes.scala') emr.track_statement_progress(cluster_dns, statement_response.headers) emr.kill_spark_session(session_url) # Define the individual tasks using Python Operators create_cluster = PythonOperator(task_id='create_cluster', python_callable=create_emr, dag=dag) wait_for_cluster_completion = PythonOperator( task_id='wait_for_cluster_completion', python_callable=wait_for_completion, dag=dag) transform_movies = PythonOperator(task_id='transform_codes', python_callable=transform_codes_to_parquet, dag=dag) terminate_cluster = PythonOperator(task_id='terminate_cluster', python_callable=terminate_emr, trigger_rule='all_done', dag=dag)
def MonthlyPipeline(): MONTHLY_RELEASE_TRIGGER = '15 17 * * 4#3' def MonthlyGenerateTestArgs(**kwargs): """Loads the configuration that will be used for this Iteration.""" conf = kwargs['dag_run'].conf if conf is None: conf = dict() # If version is overridden then we should use it otherwise we use it's # default or monthly value. version = conf.get('VERSION') or istio_common_dag.GetVariableOrDefault( 'monthly-version', None) if not version or version == 'INVALID': raise ValueError('version needs to be provided') Variable.set('monthly-version', 'INVALID') #GCS_MONTHLY_STAGE_PATH is of the form ='prerelease/{version}' gcs_path = 'prerelease/%s' % (version) branch = conf.get('BRANCH') or istio_common_dag.GetVariableOrDefault( 'monthly-branch', None) if not branch or branch == 'INVALID': raise ValueError('branch needs to be provided') Variable.set('monthly-branch', 'INVALID') commit = conf.get('COMMIT') or branch mfest_commit = conf.get('MFEST_COMMIT') or branch default_conf = environment_config.GetDefaultAirflowConfig( branch=branch, commit=commit, gcs_path=gcs_path, mfest_commit=mfest_commit, pipeline_type='monthly', verify_consistency='true', version=version) config_settings = dict() for name in default_conf.iterkeys(): config_settings[name] = conf.get(name) or default_conf[name] # These are the extra params that are passed to the dags for monthly release monthly_conf = dict() monthly_conf['DOCKER_HUB'] = 'istio' monthly_conf['GCR_RELEASE_DEST'] = 'istio-io' monthly_conf['GCS_GITHUB_PATH'] = 'istio-secrets/github.txt.enc' monthly_conf['RELEASE_PROJECT_ID'] = 'istio-io' # GCS_MONTHLY_RELEASE_PATH is of the form 'istio-release/releases/{version}' monthly_conf[ 'GCS_MONTHLY_RELEASE_PATH'] = 'istio-release/releases/%s' % ( version) for name in monthly_conf.iterkeys(): config_settings[name] = conf.get(name) or monthly_conf[name] testMonthlyConfigSettings(config_settings) return config_settings def ReportMonthlySuccessful(task_instance, **kwargs): del kwargs dag, tasks, addAirflowBashOperator = istio_common_dag.MakeCommonDag( MonthlyGenerateTestArgs, 'istio_monthly_dag', schedule_interval=MONTHLY_RELEASE_TRIGGER, extra_param_lst=monthly_extra_params) addAirflowBashOperator('release_push_github_docker_template', 'github_and_docker_release', need_commit=True) addAirflowBashOperator('release_tag_github_template', 'github_tag_repos', need_commit=True) mark_monthly_complete = PythonOperator( task_id='mark_monthly_complete', python_callable=ReportMonthlySuccessful, provide_context=True, dag=dag, ) tasks['mark_monthly_complete'] = mark_monthly_complete # tasks['generate_workflow_args'] tasks['get_git_commit'].set_upstream(tasks['generate_workflow_args']) tasks['run_cloud_builder'].set_upstream(tasks['get_git_commit']) tasks['run_release_qualification_tests'].set_upstream( tasks['run_cloud_builder']) tasks['modify_values_helm'].set_upstream( tasks['run_release_qualification_tests']) tasks['copy_files_for_release'].set_upstream(tasks['modify_values_helm']) tasks['github_and_docker_release'].set_upstream( tasks['copy_files_for_release']) tasks['github_tag_repos'].set_upstream(tasks['github_and_docker_release']) tasks['mark_monthly_complete'].set_upstream(tasks['github_tag_repos']) return dag
if not os.path.exists(outdir): os.mkdir(outdir) output_df.to_csv(str(outdir+'/predout.csv')) def pull_test_data(): fetch_test_data =ast.literal_eval(Variable.get("fetch_test_data")) response=invoke_endpoint(fetch_test_data) output_dict=ast.literal_eval(response['1:Fetch Housing Price Test Data']['message']) output_df=pd.DataFrame(output_dict) outdir='/storage/housing_price_prediction' if not os.path.exists(outdir): os.mkdir(outdir) output_df.to_csv(str(outdir+'/test.csv')) t1 = PythonOperator( task_id='testing_data_processing', python_callable=testing_data_processing, dag=dag) t2 = PythonOperator( task_id='testing_model', python_callable=testing_model, dag=dag) t3 = PythonOperator( task_id='pull_predicted_labels', python_callable=pull_predicted_labels, dag=dag) t4 = PythonOperator( task_id='pull_test_data', python_callable=pull_test_data,
dag_ids = dag.dag_id msg = [{ "dag": dag, "db": "opay_dw", "table": "{dag_name}".format(dag_name=dag_ids), "partition": "country_code=NG/dt={pt}".format(pt=ds), "timeout": "3000" }] TaskTimeoutMonitor().set_task_monitor(msg) task_timeout_monitor = PythonOperator(task_id='task_timeout_monitor', python_callable=fun_task_timeout_monitor, provide_context=True, dag=dag) ##----------------------------------------- 变量 ---------------------------------------## db_name = "opay_dw" table_name = "dm_opay_user_trans_portrait_df" hdfs_path = "oss://opay-datalake/opay/opay_dw/" + table_name def dm_opay_user_trans_portrait_df_sql_task(ds): HQL = ''' set mapred.max.split.size=1000000; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.parallel=true;
poke_interval=10, timeout=300) def transform_func(**kwargs): folder_path = FSHook(conn_id=FILE_CONNECTION_ID).get_path() file_path = f"{folder_path}/{FILE_NAME}" destination_file = f"{folder_path}/{OUTPUT_TRANSFORM_FILE}" df = pd.read_csv(file_path, header=0, encoding='ISO-8859-1') df.to_csv(destination_file, index=False) os.remove(file_path) return destination_file transform_process = PythonOperator(dag=dag, task_id='transform_process', python_callable=transform_func, provide_context=True) def insert_process(**kwargs): ti = kwargs['ti'] source_file = ti.xcom_pull(task_ids='transform_process') db_connection = MySqlHook('airflow_db').get_sqlalchemy_engine() df = pd.read_csv(source_file) with db_connection.begin() as transaction: transaction.execute("DELETE FROM test.confirmed_table WHERE 1=1") df.to_sql("confirmed_table", con=transaction, schema="test",
# Instructions # Define a function that uses the python logger to log a function. Then finish filling in the details of the DAG down below. Once you’ve done that, run "/opt/airflow/start.sh" command to start the web server. Once the Airflow web server is ready, open the Airflow UI using the "Access Airflow" button. Turn your DAG “On”, and then Run your DAG. If you get stuck, you can take a look at the solution file or the video walkthrough on the next page. import datetime import logging from airflow import DAG from airflow.operators.python_operator import PythonOperator # # TODO: Define a function for the PythonOperator to call and have it log something # def my_function(): logging.info('This is my first pipeline') dag = DAG('lesson1.exercise1', start_date=datetime.datetime.now()) # # TODO: Uncomment the operator below and replace the arguments labeled <REPLACE> below # greet_task = PythonOperator(task_id="task1", python_callable=my_function, dag=dag)
dag = DAG( dag_id="my_simple_dag", start_date=datetime(year=2020, month=1, day=1, hour=12, minute=1, second=1), schedule_interval="@yearly", max_active_runs=1, ) opr_hello = BashOperator( task_id="say_Hi", bash_command='echo "Hi!!"', dag=dag, ) opr_greet = PythonOperator( task_id="greet", python_callable=greet, dag=dag, ) opr_sleep = BashOperator( task_id="sleep_me", bash_command="sleep 5", dag=dag, ) opr_respond = PythonOperator( task_id="respond", python_callable=respond, dag=dag, ) opr_spark = PythonOperator(
dag = DAG('lesson2.exercise3', start_date=datetime.datetime(2018, 1, 1, 0, 0, 0, 0), end_date=datetime.datetime(2018, 12, 1, 0, 0, 0, 0), schedule_interval='@monthly', max_active_runs=1) create_trips_table = PostgresOperator( task_id="create_trips_table", dag=dag, postgres_conn_id="redshift", sql=sql_statements.CREATE_TRIPS_TABLE_SQL) copy_trips_task = PythonOperator( task_id='load_trips_from_s3_to_redshift', dag=dag, python_callable=load_trip_data_to_redshift, provide_context=True, # provide context to our Python Operator ) create_stations_table = PostgresOperator( task_id="create_stations_table", dag=dag, postgres_conn_id="redshift", sql=sql_statements.CREATE_STATIONS_TABLE_SQL, ) copy_stations_task = PythonOperator( task_id='load_stations_from_s3_to_redshift', dag=dag, python_callable=load_station_data_to_redshift, )
with DAG("store_dag", default_args=default_args, schedule_interval='@daily', template_searchpath=['/usr/local/airflow/sql_files'], catchup=False) as dag: # task1: check if the source file exists in the input directory # note that the file in airflow container. t1 = BashOperator( task_id="check_file_exists", bash_command="shasum ~/store_files_airflow/raw_store_transactions.csv", retries=1, retry_delay=timedelta(seconds=15)) # task 2: clean data (remove special characters) t2 = PythonOperator(task_id="clean_raw_csv", python_callable=data_cleaner) # task 3: create table t3 = MySqlOperator(task_id="create_mysql_table", mysql_conn_id="mysql_conn", sql="create_table.sql") # task 4: insert cleaned data into table t4 = MySqlOperator(task_id="insert_into_table", mysql_conn_id="mysql_conn", sql="insert_into_table.sql", dag=dag) # task 5: calculate store-wise and location-wise profit (yesterday) and save results as csv t5 = MySqlOperator(task_id="select_from_table", mysql_conn_id="mysql_conn",
api_tse_cand_contas_extraction = DivulgacaoCandContasTSEProcessor( db_uri=MONGO_URI, election_year=TSE_YEAR_DATA) args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': airflow.utils.dates.days_ago(730), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, 'retries': 1 } dag = DAG(dag_id='import_candidate_extra_data', default_args=args, schedule_interval=None, dagrun_timeout=timedelta(minutes=60)) start_import = DummyOperator(task_id='start_import', dag=dag) import_candidate_extra_data = PythonOperator( task_id='import_extra_data_candidate', python_callable=api_tse_cand_contas_extraction.run, dag=dag) import_candidate_extra_data.set_upstream(start_import) if __name__ == '__main__': dag.cli()
data.rename(columns=COLUMNS).to_sql('muertes', con=connection, schema='test', if_exists='append', index=False) os.remove(full_path) logger.info(f"Rows inserted {len(data.index)}") dag = DAG('muertos', description='Muertos', default_args={ 'owner': 'jsique', 'depends_on_past': False, 'max_active_runs': 1, 'start_date': days_ago(5) }, schedule_interval='0 1 * * *', catchup=False) sensor = FileSensor(task_id="file_sensor_task_muertos", dag=dag, filepath='time_series_covid19_deaths_global.csv', fs_conn_id=FILE_CONNECTION_NAME, poke_interval=10, timeout=600) etl = PythonOperator(task_id="dead_etl", provide_context=True, python_callable=etl_process, dag=dag ) sensor >> etl
df = pd.DataFrame.from_dict(records, orient='index') df['reviewText'] = df['reviewText'].apply(lambda x: x.replace('|', '')) df.to_csv(output_path, sep='|', index=False) with DAG( dag_id="create_fact_review_table", schedule_interval="@daily", default_args=default_args, catchup=False) as dag: # Step 1: Unzip and store as csv unzip_file_store_as_csv = PythonOperator( task_id='unzip_file_store_as_csv', python_callable=unzip_to_csv, op_kwargs={'input_path': '/usr/local/airflow/dags/files/reviews_Musical_Instruments.json.gz', 'output_path': '/usr/local/airflow/dags/files/review_data.csv'} ) # Step 2: Move json file to hdfs storage move_to_hdfs = BashOperator( task_id="move_to_hdfs", bash_command=""" hdfs dfs -mkdir -p /fact_review && \ hdfs dfs -put -f $AIRFLOW_HOME/dags/files/review_data.csv /fact_review """ ) # Step 3: Create a hive table on our sku_data creating_fact_table = HiveOperator( task_id="creating_fact_review_table",
if weekday == "Mon": person = "email_bob" elif weekday == "Wed": person = "email_alice" elif weekday == "Fri": person = "email_joe" else: person = "unkown" return person print_week_day = PythonOperator( task_id="print_week_day", python_callable=_get_weekday, provide_context=True, dag=dag, ) branching = BranchPythonOperator(task_id="branching", python_callable=_get_weekday, provide_context=True, dag=dag) join = DummyOperator(task_id="join", trigger_rule="none_failed", dag=dag) print_week_day >> branching persons = ["email_bob", "email_alice", "email_joe"] for person in persons: branching >> DummyOperator(task_id=person, dag=dag) >> join
# [END howto_operator_http_task_del_op] # [START howto_operator_http_http_sensor_check] task_http_sensor_check = HttpSensor( task_id='api_health_check', http_conn_id='rest-connection', endpoint='/', request_params={}, # response_check=lambda response: "httpbin" in response.text, poke_interval=5, # on_failure_callback=notify_email, dag=dag, ) # Task 3: Save JSON data locally # save_and_transform = PythonOperator( # task_id="save_and_transform", # python_callable=transform_json, # provide_context=True, # ) save_employee = PythonOperator( task_id="save_employee_transform", python_callable=save_emp_json, provide_context=True ) task_http_sensor_check >> task_save_employee >> save_employee >> task_get_byid_employee save_employee >> task_get_all_employee # save_employee >> task_update_employee >> task_get_byid_employee
import sys from airflow.operators.python_operator import PythonOperator from datetime import datetime from airflow import DAG # business imports sys.path.append('/home/airflow/python/') from cl_cr_update import jira_update dag = DAG('cl_cr_update', description='Run update jira isussues.', schedule_interval='*/5 * * * *', start_date=datetime(2019, 12, 7), catchup=False) step_1 = PythonOperator(task_id='step_1', python_callable=jira_update, dag=dag)
callback_url = conf_json['callback_url'] input_json_dir = Path(conf_json['input_json_dir']) context['ti'].xcom_push(key='mount_volumes', value=mount_volumes) context['ti'].xcom_push(key='callback_url', value=callback_url) context['ti'].xcom_push(key='input_json_dir', value=input_json_dir) def post_process(**context): print('start post process.') callback_url = context['ti'].xcom_pull(key='callback_url') requests.post(callback_url) t1 = PythonOperator( task_id='pre_process', python_callable=pre_process, dag=dag ) t3 = PythonOperator( task_id='post_process', python_callable=post_process, trigger_rule=TriggerRule.ALL_DONE, # error発生時でも必ず実行する dag=dag ) t2 = DockerOperatorEx( task_id='main_process', image='qunomon/eval_mnist_data_coverage:0.1', docker_url='unix://var/run/docker.sock', api_version='auto',