}

dag = DAG(
    dag_id='test_example_bash_operator', default_args=args,
    schedule_interval='0 0 * * *',
    dagrun_timeout=timedelta(minutes=60))

cmd = 'ls -l'
run_this_last = DummyOperator(task_id='run_this_last', dag=dag)

run_this = BashOperator(
    task_id='run_after_loop', bash_command='echo 1', dag=dag)
run_this.set_downstream(run_this_last)

for i in range(3):
    i = str(i)
    task = BashOperator(
        task_id='runme_'+i,
        bash_command='echo "{{ task_instance_key_str }}" && sleep 1',
        dag=dag)
    task.set_downstream(run_this)

task = BashOperator(
    task_id='also_run_this',
    bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"',
    dag=dag)
task.set_downstream(run_this_last)

if __name__ == "__main__":
    dag.cli()
Пример #2
0
dag = DAG(
    dag_id='restart_streamer',
    default_args=args,
    schedule_interval='0 3 21 * *',
    dagrun_timeout=timedelta(minutes=60),
    user_defined_macros={
        'yesterday_year': compute_yesterday_year,
        'src_dir': '/dest/colditz/tweetstreamer',
        'src_host': 'vm049',
        'real_user': '******'  # must be able to sudo to this user
    })

kill_streamer = BashOperator(
    task_id='kill_streamer',
    bash_command=
    'sudo -u {{real_user}} ssh {{src_host}} rm {{src_dir}}/DeleteToKill.txt ',
    dag=dag,
)

start_streamer = BashOperator(
    task_id='start_streamer',
    bash_command=
    'sudo -u {{real_user}} ssh {{src_host}} "nohup bash {{src_dir}}/runstreamer.sh </dev/null > /dev/null 2>&1 & " ',
    dag=dag,
    on_success_callback=notify_success_email)

kill_streamer >> start_streamer

if __name__ == "__main__":
    dag.cli()
Пример #3
0
args = {
    'owner': 'Airflow',
    'start_date': airflow.utils.dates.days_ago(1),
}

dag_prjt_main = DAG(
    dag_id=DAG_NAME,
    default_args=args,
    schedule_interval='* * * * *' #"@once"
)

SQOOP_Task1 = BashOperator(task_id="Sqoop",
                      bash_command='~/sqoop-1.4.7.bin__hadoop-2.6.0/bin/sqoop job --exec Sqoop_weblogdetails_test37', dag=dag_prjt_main)

hive_cmd= """use test1;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=1000;
insert into weblog_partiton_table partition(host) select id, datevalue, ipaddress, url, responsecode, host from weblog_external as a where not exists(select b.id from weblog_partiton_table as b where a.id = b.id);"""

hive_part = HiveOperator(hive_cli_conn_id='hive_cli_default', hql=hive_cmd, task_id = 'Hive', dag=dag_prjt_main)

finish_task = DummyOperator(task_id="finaltask", dag=dag_prjt_main)

SQOOP_Task1 >> hive_part >> finish_task


if __name__ == '__main__':
    dag_prjt_main.cli()