Exemplo n.º 1
0
from datetime import datetime

default_args = {
    'start_date': datetime.strptime('2018-03-07', '%Y-%m-%d'),
}

infinity_war_ticket_check_dag = DAG('infinity_war_ticket_check_dag',
                                    default_args=default_args,
                                    catchup=False,
                                    schedule_interval=None,
                                    max_active_runs=1)

t_0 = BMSOperator(
    dag=infinity_war_ticket_check_dag,
    task_id="check_tickets_for_inox_mantri_imax",
    site_url=
    "https://in.bookmyshow.com/buytickets/avengers-infinity-war-3d-bengaluru/movie-bang-ET00074502-MT/",
    show_date="20180427",
    venue="INMB")

t_1 = EmailOperator(
    dag=infinity_war_ticket_check_dag,
    task_id='email_for_inox_mantri_imax',
    trigger_rule=TriggerRule.ALL_SUCCESS,
    to='*****@*****.**',
    subject='Tickets available at INOX Mantri Mall for Avengers Infinity war',
    html_content=
    'Tickets available at INOX Mantri Mall for Avengers Infinity war')

t_1.set_upstream(t_0)
Exemplo n.º 2
0
    ["/Users/ravimuthyala/AirflowSparkTestCode/receipts.csv"],
    'driver_memory': '1g',
    'executor_cores': 1,
    'num_executors': 1,
    'executor_memory': '1g'
}

spark_submit_operator = SparkSubmitOperator(task_id='Spark_Scala_Submit_Job',
                                            dag=dag,
                                            **spark_config)

emailNotify = EmailOperator(task_id='email_notification',
                            to='*****@*****.**',
                            subject='Spark Submit Job Alert',
                            html_content='Airflow Spark Submit Job Done',
                            dag=dag)

t1Failed = EmailOperator(dag=dag,
                         trigger_rule=TriggerRule.ONE_FAILED,
                         task_id="SparkJobFailed",
                         to=["*****@*****.**"],
                         subject="Spark job Failed",
                         html_content='<h3>Spark job has failed</h3>')

python_operator.set_downstream(spark_submit_operator)
spark_submit_operator.set_downstream(emailNotify)
t1Failed.set_upstream([spark_submit_operator])

if __name__ == '__main__':
    dag.cli()
Exemplo n.º 3
0
)

move_tweets_to_sql = PythonOperator(
    task_id="csv_to_sql",
    # extra DAG context
    provide_context=True,
    # call the function
    python_callable=csv_to_sql,
    dag=dag,
)

id_popular = PythonOperator(
    task_id="identify_popular_links",
    provide_context=True,
    python_callable=identify_popular_links,
    dag=dag,
)

email_links = EmailOperator(
    task_id="email_best_links",
    to="*****@*****.**",
    subject="Latest popular links",
    html_content="Check out the latest!!",
    files=["{}/latest_links.txt".format(RAW_TWEET_DIR)],
    dag=dag,
)

simple_search.set_downstream(move_tweets_to_sql)
id_popular.set_upstream(move_tweets_to_sql)
email_links.set_upstream(id_popular)
Exemplo n.º 4
0

get_airflow_ts_nodash_id_task = PythonOperator(
    task_id='get_airflow_ts_nodash_id_task',
    python_callable=get_airflow_ts_nodash_id,
    dag=dag)

ts_no_dash_id = '{{ ti.xcom_pull(task_ids="get_airflow_ts_nodash_id_task" , dag_id = "main_dag")}}'  # showing how to get xcom

bash_job_task = BashOperator(task_id="bash_job_task",
                             bash_command="echo one",
                             dag=dag)

email_task = EmailOperator(task_id='send_email',
                           to=email_to,
                           subject='Airflow Alert - ' + ts_no_dash_id,
                           html_content=""" <h3>Email Test </h3> """,
                           dag=dag)

spark_job_task = spark_submit(python_file=base_path + '/spark_job.py',
                              job_name="spark",
                              dag=dag,
                              conn_id=None,
                              env_var_size='SMALL')

first_dummy_task.set_downstream(get_airflow_ts_nodash_id_task)

get_airflow_ts_nodash_id_task.set_downstream([spark_job_task, bash_job_task])
email_task.set_upstream([spark_job_task, bash_job_task])
second_dummy_task.set_upstream(email_task)
hello_task.set_upstream(email_task)
Exemplo n.º 5
0
    schema='my_hive_db',
    provide_context=True,
    dag=dag
)
clean_data.set_upstream(create_source_id)
count_data_rows.set_downstream([stop_flow, clean_data])


move_data_mysql = PythonOperator(
    task_id='move_data_mysql',
    python_callable=tasks.move_data_mssql,
    templates_dict={'schema': 'my_hive_db'},
    provide_context=True,
    dag=dag
)
move_data_mysql.set_upstream(clean_data)


send_email = EmailOperator(
    task_id='send_email',
    to='*****@*****.**',
    subject='ingestion complete',
    html_content="Date: {{ ds }}",
    dag=dag)

send_email.set_upstream(move_data_mysql)




Exemplo n.º 6
0
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('start_test_baise',
          default_args=default_args,
          schedule_interval=timedelta(hours=4))


def start_task():
    or_path = os.getcwd()
    os.chdir('/home/zluser/Desktop/airflow')
    os.system('python3 baise.py')
    os.chdir(or_path)


t1 = PythonOperator(
    task_id="test_baise",
    python_callable=start_task,
    dag=dag,
)

t2 = EmailOperator(
    task_id="send_email",
    dag=dag,
    trigger_rule=TriggerRule.ONE_SUCCESS,
    to='*****@*****.**',
    subject='Baise task is ok',
    html_content='<h3>Hi,dear,your task is Completed Successfully! </h3>\n%s' %
    time.asctime(time.localtime(time.time())))
t2.set_upstream(t1)
            ),
            two_day_rolling_avg as (
                SELECT AVG(a.state, b.state) as two_day_avg
                FROM yesterday_covid_data a
                JOIN yesterday_covid_data b 
                ON a.state = b.state
            )
            SELECT a.state, b.state, c.two_day_avg
            FROM yesterday_covid_data a
            JOIN today_covid_data b
            ON a.state=b.state
            JOIN two_day_rolling_avg c
            ON a.state=b.two_day_avg;''',
                               params={
                                   'today': today,
                                   'yesterday': yesterday
                               })

    #Define task to send email
    send_email = EmailOperator(
        task_id='send_email',
        to=email_to,
        subject='Covid Queries DAG',
        html_content='<p>The Covid queries DAG completed successfully. <p>')

    #Define task dependencies using multiple methods
    t0 >> [query_1, query_2, query_3]
    query_1.set_downstream(send_email)
    query_2.set_downstream(send_email)
    send_email.set_upstream(query_3)
Exemplo n.º 8
0
    ep = ExecutePreprocessor(timeout=21600)
    try:
        out = ep.preprocess(nb, {'metadata': {'path': notebook_dir}})
    except CellExecutionError:
        msg = 'Error executing the notebook "%s".\n\n' % notebook_path
        msg += 'See notebook "%s" for the traceback.' % notebook_path
        print(msg)
        raise
    finally:
        with open(notebook_path, mode='wt') as f:
            nbformat.write(nb, f)


python_operator = PythonOperator(task_id='notebook_task',
                                 provide_context=True,
                                 python_callable=nb_task,
                                 dag=dag,
                                 op_kwargs={'notebook_path': notebook_path},
                                 run_as_user=username)

if email_on_success:
    email_operator = EmailOperator(
        task_id='email_task',
        to=emails_success.split(","),
        subject='{} completed successfully'.format(dag_id),
        dag=dag,
        html_content=
        "<p>This job is successfully executed, to customize the email content, please edit dag_template.py</p>"
    )
    email_operator.set_upstream(python_operator)
Exemplo n.º 9
0
                     bash_command='echo "hello world!!"',
                     dag=dag)

task3 = MySqlOperator(mysql_conn_id='airflow_db',
                      task_id='basic_mysql',
                      sql="SELECT * FROM `dag`",
                      dag=dag)

EMAIL_CONTENT = """

<ul>
    <li>Instatnce key: %s</li>
    <li>Owner: %s</li>
    <li>Host: %s</li>
</ul>

""" % (
    "{{ task_instance_key_str }}",
    "{{ task.owner}}",
    "{{ ti.hostname }}",
)
send_mail = EmailOperator(dag=dag,
                          task_id="send_mail",
                          to=["*****@*****.**"],
                          subject="バッチ成功: 実行日 {{ ds }}",
                          html_content=EMAIL_CONTENT)

task1.set_downstream(task2)
task2.set_downstream(task3)
send_mail.set_upstream(task2)