# rsync_task = BashOperator( # task_id='rsync', # bash_command=as_user(rsync_command, USER), # params={'klustadir': KLUSTA_DIR, # 'mansortdir': MANSORT_DIR, # 'mansorthost': MANSORT_HOST}, # dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='You may now manually sort on NIAO', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task) mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task) # make_mansort_dir_task.set_upstream(phy_task) # rsync_task.set_upstream(clear_phy_task) # rsync_task.set_upstream(mv_kwik_bak_task) # rsync_task.set_upstream(make_mansort_dir_task) # email_me.set_upstream(rsync_task) email_me.set_upstream(mv_kwik_bak_task) email_me.set_upstream(clear_phy_task) globals()[dag_id] = dag
dag = DAG('vs', default_args=default_args, schedule_interval='@once') chem1_pdb_prot1_pdb = BashOperator( task_id='chem1_pdb_prot1_pdb', bash_command="(cd /working-directory; virtualScreening.py -l chem1.pdb -o result -p prot1.pdb) ", dag=dag) chem1_pdb_prot1_pdb_success_mail = EmailOperator( task_id="chem1_pdb_prot1_pdb_success_mail", to=[u'*****@*****.**'], subject="chem1_pdb_prot1_pdb success", html_content="chem1_pdb_prot1_pdb success", dag=dag) chem1_pdb_prot1_pdb_success_mail.set_upstream(chem1_pdb_prot1_pdb) #chem1_pdb_prot1_pdb.set_upstream( ) chem1_pdb_prot2_pdb = BashOperator( task_id='chem1_pdb_prot2_pdb', bash_command="(cd /working-directory; virtualScreening.py -l chem1.pdb -o result -p prot2.pdb) ", dag=dag) chem1_pdb_prot2_pdb_success_mail = EmailOperator( task_id="chem1_pdb_prot2_pdb_success_mail", to=[u'*****@*****.**'], subject="chem1_pdb_prot2_pdb success", html_content="chem1_pdb_prot2_pdb success", dag=dag)
dag = DAG(dag_id="connect_to_monary_and_email_operator", default_args=default_args, params=params) def connect_to_monary_and_email_operator(ds, **kwargs): m = Monary() pipeline = [{"$group": {"_id": "$state", "totPop": {"$sum": "$pop"}}}] states, population = m.aggregate("zips", "data", pipeline, ["_id", "totPop"], ["string:2", "int64"]) strs = list(map(lambda x: x.decode("utf-8"), states)) result = list("%s: %d" % (state, pop) for (state, pop) in zip(strs, population)) print(result) run_this = PythonOperator( task_id="connect_to_monary_and_email_operator", provide_context=True, python_callable=connect_to_monary_and_email_operator, dag=dag, ) send_email_notification_flow_successful = EmailOperator( task_id="send_email_notification_flow_successful", to="*****@*****.**", subject="custom email from airflow", html_content="{{ params['foo'](execution_date) }}", params=params, dag=dag, ) send_email_notification_flow_successful.set_upstream(run_this)
branching = BranchPythonOperator(task_id='branching', python_callable=lambda: 'source_count' if datetime.now().day <= 7 and datetime.today( ).weekday() == 6 else 'ignore_not_sunday', dag=dag) branching.set_upstream(run_this_first) esucc = EmailOperator(task_id='email_success_' + dag.dag_id, to=email_addr, subject=dag.dag_id + ' [success] on ' + datetime.now().strftime('%Y-%m-%d'), html_content='Congratulation!', trigger_rule='all_success', dag=dag) source_count = BashOperator( task_id='source_count', bash_command='/disk1/source_data_count; ./daily_table_count.sh > out.log ', dag=dag) source_count.set_upstream(branching) esucc.set_upstream(source_count) ignore_not_sunday = DummyOperator(task_id='ignore_not_sunday', dag=dag) ignore_not_sunday.set_upstream(branching) join = DummyOperator(task_id='join', trigger_rule='all_success', dag=dag) join << ignore_not_sunday join << esucc
'mansortdir': MANSORT_DIR}, dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='You may now manually sort on NIAO', dag=dag) slack_it = SlackAPIPostOperator( task_id='slack_it', token=SLACK_TOKEN, text='%s is complete' % dag_id, channel='#ephys', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task) mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task) #rsync_task.set_upstream(merge_events_task) rsync_task.set_upstream(clear_phy_task) rsync_task.set_upstream(mv_kwik_bak_task) email_me.set_upstream(rsync_task) slack_it.set_upstream(rsync_task) globals()[dag_id] = dag
params={'rasterdir': RASTER_DIR}, on_success_callback=lambda c: set_perms(c['params']['rasterdir'], default_args['owner']), dag=dag) make_raster_task = BashOperator(task_id='make_rasters', bash_command=make_raster_cmd, env={'PATH': ANACONDA_PATH}, params={ 'postphydir': POSTPHY_DIR, 'ecanalysispath': ECANALYSIS_PATH, 'rasterdir': RASTER_DIR }, dag=dag) ############ Report Completion email_me = EmailOperator(task_id='email_me', to=default_args['email'], subject='%s is merged' % dag_id, html_content='You may commence analysis.', dag=dag) rsync_task.set_upstream(make_postphy_dir_task) merge_events_task.set_upstream(rsync_task) kwik2pandas_task.set_upstream(merge_events_task) email_me.set_upstream(kwik2pandas_task) make_raster_dir_task.set_upstream(kwik2pandas_task) make_raster_task.set_upstream(make_raster_dir_task) globals()[dag_id] = dag
] # copy table to bi #bitables = ['hardware', 'hardwareios'] bitables = [] for table in tables: imp = BashOperator( task_id='import_' + table, bash_command= '/disk1/bdl/etl/ETL/imp_mongo_doc_with_date_input.sh {table} {begin} {end} > /disk1/bdl/etl/ETL/log/{table}.log ' .format(table=table, begin='{{ ds }}', end='{{ tomorrow_ds }}'), dag=dag) if table in bitables: bimp = BashOperator( task_id='send_2_bi_' + table, bash_command= '/disk1/bdl/etl/ETL/send_bi_impala_with_date_input.sh {table} {begin} {end} > /disk1/bdl/etl/ETL/log/BI/{table}.log ' .format(table=table, begin='{{ ds }}', end='{{ tomorrow_ds }}'), dag=dag) bimp.set_upstream(imp) esucc.set_upstream(bimp) else: esucc.set_upstream(imp) imp_software = BashOperator( task_id='import_software', bash_command= '/disk1/bdl/etl/ETL/imp_software_doc_with_date_input.sh {{ ds }} {{ tomorrow_ds }} > /disk1/bdl/etl/ETL/log/software.log ', dag=dag) esucc.set_upstream(imp_software)
dag = DAG(dag_id='connect_to_monary_and_email_operator', default_args=default_args, params=params) def connect_to_monary_and_email_operator(ds, **kwargs): m = Monary() pipeline = [{"$group": {"_id": "$state", "totPop": {"$sum": "$pop"}}}] states, population = m.aggregate("zips", "data", pipeline, ["_id", "totPop"], ["string:2", "int64"]) strs = list(map(lambda x: x.decode("utf-8"), states)) result = list("%s: %d" % (state, pop) for (state, pop) in zip(strs, population)) print(result) run_this = PythonOperator(task_id='connect_to_monary_and_email_operator', provide_context=True, python_callable=connect_to_monary_and_email_operator, dag=dag) send_email_notification_flow_successful = EmailOperator( task_id='send_email_notification_flow_successful', to="*****@*****.**", subject='custom email from airflow', html_content="{{ params['foo'](execution_date) }}", params=params, dag=dag) send_email_notification_flow_successful.set_upstream(run_this)
simple_search = PythonOperator(task_id='search_twitter', provide_context=True, python_callable=search_twitter, dag=dag, params={'query': '#python'}) move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite', provide_context=True, python_callable=csv_to_sqlite, dag=dag) id_popular = PythonOperator(task_id='identify_popular_links', provide_context=True, python_callable=identify_popular_links, dag=dag) email_links = EmailOperator(task_id='email_best_links', to='*****@*****.**', subject='Latest popular links', html_content='Check out the latest!!', files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)], dag=dag) simple_search.set_downstream(move_tweets_to_sqlite) id_popular.set_upstream(move_tweets_to_sqlite) email_links.set_upstream(id_popular)
'mansortdir': MANSORT_DIR }, dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='You may now manually sort on NIAO', dag=dag) slack_it = SlackAPIPostOperator(task_id='slack_it', token=SLACK_TOKEN, text='%s is complete' % dag_id, channel='#ephys', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task) mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task) #rsync_task.set_upstream(merge_events_task) rsync_task.set_upstream(clear_phy_task) rsync_task.set_upstream(mv_kwik_bak_task) email_me.set_upstream(rsync_task) slack_it.set_upstream(rsync_task) globals()[dag_id] = dag
# task_id='rsync', # bash_command=as_user(rsync_command, USER), # params={'klustadir': KLUSTA_DIR, # 'mansortdir': MANSORT_DIR, # 'mansorthost': MANSORT_HOST}, # dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='You may now manually sort on NIAO', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task) mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task) # make_mansort_dir_task.set_upstream(phy_task) # rsync_task.set_upstream(clear_phy_task) # rsync_task.set_upstream(mv_kwik_bak_task) # rsync_task.set_upstream(make_mansort_dir_task) # email_me.set_upstream(rsync_task) email_me.set_upstream(mv_kwik_bak_task) email_me.set_upstream(clear_phy_task) globals()[dag_id] = dag
'owner': 'airflow', 'start_date': datetime.now() - timedelta(seconds=10), 'retries': 0 } dag = DAG('Sales_Nov', default_args=default_args, start_date=datetime.now() - timedelta(seconds=10)) op1 = DummyOperator(task_id='File1_landing', dag=dag) t1 = EmailOperator(task_id='Processing_File_1', to='*****@*****.**', subject="Airflow_report", html_content="File 1 started", dag=dag) op2 = DummyOperator(task_id='File2_landing', dag=dag) t2 = EmailOperator(task_id='Processing_File_2', to='*****@*****.**', subject="Airflow_report", html_content="File 2 started", dag=dag) op3 = DummyOperator(task_id='Aggregating', dag=dag) op4 = DummyOperator(task_id='Final_Table_Push', dag=dag) t1.set_upstream(op1) t2.set_upstream(op2) op3.set_upstream(t1) op3.set_upstream(t2) op4.set_upstream(op3)
params={'rasterdir': RASTER_DIR}, on_success_callback = lambda c: set_perms(c['params']['rasterdir'],default_args['owner']), dag=dag) make_raster_task = BashOperator( task_id='make_rasters', bash_command=make_raster_cmd, env={'PATH': ANACONDA_PATH}, params={'postphydir': POSTPHY_DIR, 'ecanalysispath': ECANALYSIS_PATH, 'rasterdir': RASTER_DIR}, dag=dag) ############ Report Completion email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is merged' % dag_id, html_content='You may commence analysis.', dag=dag) rsync_task.set_upstream(make_postphy_dir_task) merge_events_task.set_upstream(rsync_task) kwik2pandas_task.set_upstream(merge_events_task) email_me.set_upstream(kwik2pandas_task) make_raster_dir_task.set_upstream(kwik2pandas_task) make_raster_task.set_upstream(make_raster_dir_task) globals()[dag_id] = dag
wrtr.writerow(['url', 'count']) wrtr.writerows(cntr.most_common(5)) simple_search = PythonOperator(task_id='search_twitter', provide_context=True, python_callable=search_twitter, dag=dag, params={'query': '#python'}) move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite', provide_context=True, python_callable=csv_to_sqlite, dag=dag) id_popular = PythonOperator(task_id='identify_popular_links', provide_context=True, python_callable=identify_popular_links, dag=dag) email_links = EmailOperator(task_id='email_best_links', to='*****@*****.**', subject='Latest popular links', html_content='Check out the latest!!', files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)], dag=dag) simple_search.set_downstream(move_tweets_to_sqlite) id_popular.set_upstream(move_tweets_to_sqlite) email_links.set_upstream(id_popular)