Exemplo n.º 1
0
 def test_mysql_operator_test_multi(self):
     sql = [
         "TRUNCATE TABLE test_airflow",
         "INSERT INTO test_airflow VALUES ('X')",
     ]
     from airflow.operators.mysql_operator import MySqlOperator
     t = MySqlOperator(
         task_id='mysql_operator_test_multi',
         mysql_conn_id='airflow_db',
         sql=sql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Exemplo n.º 2
0
 def test_mysql_operator_test(self):
     sql = """
     CREATE TABLE IF NOT EXISTS test_airflow (
         dummy VARCHAR(50)
     );
     """
     from airflow.operators.mysql_operator import MySqlOperator
     t = MySqlOperator(
         task_id='basic_mysql',
         sql=sql,
         dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Exemplo n.º 3
0
 def test_mysql_operator_test_multi(self):
     sql = [
         "CREATE TABLE IF NOT EXISTS test_airflow (dummy VARCHAR(50))",
         "TRUNCATE TABLE test_airflow",
         "INSERT INTO test_airflow VALUES ('X')",
     ]
     from airflow.operators.mysql_operator import MySqlOperator
     t = MySqlOperator(
         task_id='mysql_operator_test_multi',
         sql=sql,
         dag=self.dag,
     )
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Exemplo n.º 4
0
    def test_overwrite_schema(self):
        """
        Verifies option to overwrite connection schema
        """
        from airflow.operators.mysql_operator import MySqlOperator

        sql = "SELECT 1;"
        t = MySqlOperator(
            task_id='test_mysql_operator_test_schema_overwrite',
            sql=sql,
            dag=self.dag,
            database="foobar",
        )

        from _mysql_exceptions import OperationalError
        try:
            t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
                  ignore_ti_state=True)
        except OperationalError as e:
            assert "Unknown database 'foobar'" in str(e)
Exemplo n.º 5
0
    def test_overwrite_schema(self):
        """
        Verifies option to overwrite connection schema
        """
        from airflow.operators.mysql_operator import MySqlOperator

        sql = "SELECT 1;"
        op = MySqlOperator(
            task_id='test_mysql_operator_test_schema_overwrite',
            sql=sql,
            dag=self.dag,
            database="foobar",
        )

        from _mysql_exceptions import OperationalError
        try:
            op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
                   ignore_ti_state=True)
        except OperationalError as e:
            assert "Unknown database 'foobar'" in str(e)
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag = DAG('mysqltutorial', default_args=default_args, schedule_interval=timedelta(days=1))

# t1, t2  are examples of tasks created by instantiating operators


t1 = MySqlOperator(
    task_id= 'insert_PD',
    sql= 'call storedP1("2019-06-01","2019-06-30")',
    mysql_conn_id= 'xyz_id',
    database= 'dbname',
    dag = dag)

t2 = MySqlOperator(
    task_id= 'update_PD',
    sql= 'call storedP2("2019-06-01","2019-06-30")',
    mysql_conn_id= 'xyz_id',
    database= 'dbname',
    dag = dag)


t2.set_upstream(t1)
	)



create_spotify_daily_top_50_global = MySqlOperator(
	task_id='create_spotify_daily_top_50_global',
	dag=dag,
	mysql_conn_id="airflow_mysql_db",
	sql="""
	CREATE TABLE IF NOT EXISTS spotify_daily_top_50_global 
	(
	artist_name VARCHAR(100),
	track_id VARCHAR(100),
	track_name VARCHAR(100),
	duration_ms INT,
	explicit BOOL,
	popularity INT,
	daily_rank INT,
	album_id VARCHAR(100),
	album_name VARCHAR(100),
	album_type VARCHAR(100),
	release_date VARCHAR(100),
	dt VARCHAR(100)
	) 
	; 
	"""
)

populate_spotify_daily_top_50_global = BashOperator(
	task_id='populate_spotify_daily_top_50_global',
	dag=dag,
Exemplo n.º 8
0
        connection.commit()
    curr.close()


dag = DAG('run_zomato_api',
          start_date=datetime.datetime.now()
          # max_active_runs = 1
          )

get_data_zomato_api = PythonOperator(task_id='get_data_zomato_api',
                                     python_callable=get_data_zomato_api,
                                     dag=dag)

create_tables_mysql = MySqlOperator(
    task_id='create_tables_mysql',
    mysql_conn_id='mysql_zomato',
    sql=sql_statements_zomato.create_all_tables,
    dag=dag)

# prep_json = PythonOperator(
# 			task_id = 'prep_json',
# 			python_callable = prep_json,
# 			provide_context = True,
# 			op_kwargs = {"result" : "{{ ti.xcom_pull(task_ids= 'get_data_zomato_api') }}"},
# 			dag = dag)

create_insert_url_details = PythonOperator(
    task_id='create_insert_url_details',
    python_callable=url_details_create_insert,
    dag=dag)
Exemplo n.º 9
0
         default_args=default_args,
         schedule_interval='@daily',
         template_searchpath=['/usr/local/airflow/sql_files'],
         catchup=True) as dag:

    t1 = BashOperator(
        task_id='check_file_exists',
        bash_command=
        'shasum ~/store_files_airflow/raw_store_transactions_2020-06-30.csv',
        retries=2,
        retry_delay=timedelta(seconds=15))

    t2 = PythonOperator(task_id='clean_raw_csv', python_callable=data_cleaner)

    t3 = MySqlOperator(task_id='create_mysql_table',
                       mysql_conn_id="mysql_conn",
                       sql="create_table.sql")

    t4 = MySqlOperator(task_id='insert_into_table',
                       mysql_conn_id="mysql_conn",
                       sql="insert_into_table.sql")

    t5 = MySqlOperator(task_id='select_from_table',
                       mysql_conn_id="mysql_conn",
                       sql="select_from_table.sql")

    t6 = BashOperator(
        task_id='move_file1',
        bash_command=
        'cat ~/store_files_airflow/location_wise_profit.csv && mv ~/store_files_airflow/location_wise_profit.csv ~/store_files_airflow/location_wise_profit_%s.csv'
        % yesterday_date)
Exemplo n.º 10
0
)


class CustomMySqlOperator(MySqlOperator):
    def execute(self, context):
        self.log.info('Executing: %s', self.sql)
        hook = MySqlHook(mysql_conn_id=self.mysql_conn_id,
                         schema=self.database)
        return hook.get_records(self.sql, parameters=self.parameters)



resume_replication = MySqlOperator(
    task_id='resume_replication',
    sql='START SLAVE;',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag,
    trigger_rule='all_done'
)


update_flat_obs = MySqlOperator(
    task_id='update_flat_obs',
    sql='flat_obs_v1.3.sql',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag
)


#
success_finish_operator = \
    BioWardrobeTriggerBasicAnalysisOperator(task_id='success_finish',
                                            trigger_rule=TriggerRule.ONE_SUCCESS,
                                            dag=dag)
success_finish_operator.set_upstream([cli_download_sra, url_download, download_local_operator, copy_from_biowardrobe])


#
#  A STEP
#
error_finish_operator = MySqlOperator(
    task_id="finish_with_error",
    mysql_conn_id=biowardrobe_connection_id,
    sql="""update labdata set libstatus=2000,
        libstatustxt="{{ ti.xcom_pull(task_ids=['download_sra','download_local', 'download_aria2'], key=None) }}"
        where uid='{{ ti.xcom_pull(task_ids='branch_download', key='uid') }}'""",
    trigger_rule=TriggerRule.ONE_FAILED,
    autocommit=True,
    dag=dag)
error_finish_operator.set_upstream([cli_download_sra, url_download, download_local_operator, copy_from_biowardrobe])


#
#      BIOWARDROBE DOWNLOAD TRIGGER
#


dag_t = DAG(
    dag_id='biowardrobe_download_trigger',
    default_args={
Exemplo n.º 12
0
         default_args=default_args,
         schedule_interval='@daily') as dag:

    check_file = BashOperator(task_id="check_file",
                              bash_command="shasum ~/ip_files/or.csv",
                              retries=2,
                              retry_delay=timedelta(seconds=15))

    pre_process = PythonOperator(task_id="pre_process",
                                 python_callable=pre_process)

    agg = PythonOperator(task_id="agg", python_callable=process_data)

    create_table = MySqlOperator(
        task_id="create_table",
        mysql_conn_id="mysql_db1",
        sql=
        "CREATE table IF NOT EXISTS aggre_res (stock_code varchar(100) NULL,descb varchar(100) NULL,country varchar(100) NULL,total_price varchar(100) NULL)"
    )
    insert = MySqlOperator(
        task_id='insert_db',
        mysql_conn_id="mysql_db1",
        sql=
        "LOAD DATA  INFILE '/var/lib/mysql-files/fin.csv' INTO TABLE aggre_res FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;"
    )

    email = EmailOperator(
        task_id='send_email',
        to='*****@*****.**',
        subject='Daily report generated',
        html_content=
        """ <h1>Congratulations! Your store reports are ready.</h1> """,
"""xcomの値を削除するDAG"""
"""delete_xcom_const.pyのSQLを用途に合わせて修正すること"""

from datetime import datetime
from airflow import DAG
from airflow.operators.mysql_operator import MySqlOperator
import pendulum
from function import const_delete_xcom as consts

DAG_NAME = 'dag_delete_xcom'
default_args = {
    'owner': 'terunrun',
    'depends_on_past': False,
    'start_date': datetime(2021, 2, 22, 0, 0, tzinfo=pendulum.timezone('Asia/Tokyo')),
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
}
dag = DAG(DAG_NAME, schedule_interval=None, catchup=False, default_args=default_args)

# TASK設定
delete_xcom = MySqlOperator(
    task_id='delete_xcom',
    mysql_conn_id='airflow_db',
    sql=[consts.QUERY],
    dag=dag,
)
Exemplo n.º 14
0
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
    # 'wait_for_downstream': False,
    # 'dag': dag,
    # 'sla': timedelta(hours=2),
    # 'execution_timeout': timedelta(seconds=300),
    # 'on_failure_callback': some_function,
    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}

dag = DAG('mysql_ingest', default_args=default_args, schedule_interval=None)

mysql_task = MySqlOperator(task_id='mysql_ingest',
                           mysql_conn_id='mysql_test',
                           sql='select * from authors',
                           database='mysql',
                           dag=dag)

hello_task = HelloDBOperator(task_id='sample-task',
                             name='foo_bar',
                             mysql_conn_id='mysql_test',
                             database='mysql',
                             dag=dag)
Exemplo n.º 15
0
from airflow import DAG
from airflow.operators.hive_to_mysql import HiveToMySqlTransfer
from airflow.operators.mysql_operator import MySqlOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta

default_args = {
    'owner': 'airflow',
    'start_date': days_ago(2),
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1)
}

dag = DAG(dag_id='hive_to_mysql',
          schedule_interval='@once',
          default_args=default_args)

t1 = MySqlOperator(task_id='drop_table',
                   sql='drop table if exists temp_drivers',
                   dag=dag)
t2 = HiveToMySqlTransfer(
    task_id='hive_to_mysql',
    mysql_table='temp_drivers',
    mysql_preoperator='create table temp_drivers(col_value varchar(256))',
    sql='select * from temp_drivers',
    dag=dag)

t1 >> t2
Exemplo n.º 16
0
    mysql_hook = MySqlHook(mysql_conn_id="mysql", schema="airflow_mdb")
    connection = mysql_hook.get_conn()
    cursor = connection.cursor()
    cursor.execute(request)
    tweets = cursor.fetchall()
    for tweet in tweets:
        print(tweet)
    #return tweets


with DAG(dag_id="twitter_dag",
         schedule_interval="*/10 * * * *",
         default_args=default_args) as dag:
    waiting_for_tweets = FileSensor(task_id="waiting_for_tweets",
                                    fs_conn_id="fs_tweet",
                                    filepath="data.csv",
                                    poke_interval=5)

    fetching_tweets = PythonOperator(task_id="fetching_tweets",
                                     python_callable=fetching_tweets)
    cleaning_tweets = PythonOperator(task_id="cleaning_tweets",
                                     python_callable=cleaning_tweets)
    storing_tweets = MySqlOperator(task_id="storing_tweets",
                                   sql=storing_tweets(),
                                   mysql_conn_id='mysql',
                                   autocommit=True,
                                   database='airflow_mdb')
    loading_tweets = PythonOperator(task_id="loading_tweets",
                                    python_callable=loading_tweets)

    waiting_for_tweets >> fetching_tweets >> cleaning_tweets >> storing_tweets >> loading_tweets
Exemplo n.º 17
0
                                   hive_cli_conn_id='beeline',
                                   dag=dag)

insertCategory_hiphop = HiveOperator(task_id='insertCategory_hiphop',
                                     hql=hiveSQL_insertCategory_hiphop,
                                     hive_cli_conn_id='beeline',
                                     dag=dag)

insertoverwrite_final = HiveOperator(task_id='insertoverwrite_final',
                                     hql=hiveSQL_insertoverwrite_final,
                                     hive_cli_conn_id='beeline',
                                     dag=dag)

createTableToMysql = MySqlOperator(mysql_conn_id='mysql',
                                   task_id='createTableToMysql',
                                   sql=hiveSQL_create_mysql,
                                   database='spotify_data',
                                   dag=dag)

exportToMysql = HiveToMySqlTransfer(
    task_id='exportToMysql',
    sql="SELECT * FROM final",
    mysql_preoperator='DELETE FROM spotify_data.titledata',
    mysql_table='spotify_data.titledata',
    mysql_conn_id='mysql',
    hiveserver2_conn_id='beeline',
    dag=dag)

########################################################### C O P Y ################################################################
i = 0
if os.path.exists('/home/airflow/requestData/playlist'):
Exemplo n.º 18
0
        FROM
            oride_dw_ods.ods_sqoop_base_data_driver_df dd
            left join online_time ot on ot.driver_id=dd.id
            left join driver_comment dc on dc.driver_id=dd.id
            left join order_data od on od.driver_id=dd.id
            left join oride_dw_ods.ods_sqoop_base_data_driver_group_df ddg on ddg.id=dd.group_id AND ddg.dt=dd.dt
            left join driver_app_version dav on dav.driver_id=dd.id
        WHERE
            dd.dt='{{ ds }}'
    """,
    schema='dashboard',
    dag=dag)

clear_driver_daily_summary = MySqlOperator(
    task_id='clear_driver_daily_summary',
    sql="""
        DELETE FROM data_driver_report WHERE dt='{{ ds }}'
    """,
    dag=dag)


def dirver_daily_summary_insert(ds, **kwargs):
    sql = """
        SELECT
                null as id,
                dt,
                driver_id,
                real_name,
                phone_number,
                group_id,
                nvl(group_name, ''),
                nvl(group_leader, ''),
for db_object in DATABASE_OBJECTS:

    cleanup = PythonOperator(
        task_id='cleanup_' + str(db_object["airflow_db_model"].__name__),
        python_callable=cleanup_function,
        params=db_object,
        provide_context=True,
        dag=dag
    )

    print_configuration.set_downstream(cleanup)


# SQL query builder
def custom_sql_query_builder(db_object):
    return "DELETE FROM %s WHERE   DATE(%s) <= DATE(CURRENT_DATE() - INTERVAL %d DAY);" % (db_object['airflow_tablename'], db_object['age_check_column'], max_retention_days)


## Task declaration for custom mysql tables
for db_object in CUSTOM_DATABASE_OBJECTS:

    celery_cleanup =  MySqlOperator(
        task_id='cleanup_' + str(db_object["airflow_tablename"]),
        sql=custom_sql_query_builder(db_object),
        mysql_conn_id='mysql_default',
        autocommit=True,
        database='airflow',
    )

    print_configuration.set_downstream(celery_cleanup)
Exemplo n.º 20
0
}

with DAG('store_dag',
         default_args=default_args,
         schedule_interval='@daily',
         template_searchpath=['/usr/local/airflow/sql_files'],
         catchup=False) as dag:
    t1 = BashOperator(
        task_id='check_file_exists',
        bash_command=
        'shasum /usr/local/airflow/store_files_airflow/raw_store_transactions.csv',
        retries=2,
        retry_delay=timedelta(seconds=15))
    t2 = PythonOperator(task_id='clean_raw_csv', python_callable=data_cleaner)
    t3 = MySqlOperator(task_id='create_mysql_table',
                       sql='create_table.sql',
                       mysql_conn_id='mysql_conn')
    t4 = MySqlOperator(task_id='insert_into_table',
                       sql='insert_into_table.sql',
                       mysql_conn_id='mysql_conn')
    t5 = MySqlOperator(task_id='select_from_table',
                       sql='select_from_table.sql',
                       mysql_conn_id='mysql_conn')
    t6 = BashOperator(
        task_id='move_location_profits',
        bash_command=
        'mv ~/store_files_airflow/location_wise_profit.csv ~/store_files_airflow/location_wise_profit_%s.csv'
        % yesterdays_date)
    t7 = BashOperator(
        task_id='move_store_profits',
        bash_command=
Exemplo n.º 21
0
default_args = {
    'owner': 'liyi',
    'start_date': airflow.utils.dates.days_ago(0),
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

dag1 = DAG('mysql_create_from', default_args=default_args, description='A simple create_mysql_data DAG',
           schedule_interval=timedelta(minutes=10))
# print(dag1)
# print(type(dag1))
sql1 = ["INSERT INTO airflow_test.test1_from (date1) values (now())"]

t1 = MySqlOperator(
    mysql_conn_id='liyi',
    sql=sql1,
    task_id='mysql_create_from',
    dag=dag1
)

sql2 = ["""insert into airflow_test.test1_in(date1)
select * from airflow_test.test1_from ORDER BY date1 DESC LIMIT 1;"""]

t2 = MySqlOperator(
    mysql_conn_id='liyi',
    sql=sql2,
    task_id='mysql_insert_into',
    dag=dag1
)

t2.set_upstream(t1)
Exemplo n.º 22
0
)

sleep6 = BashOperator(
    task_id='sleep6',
    bash_command="sleep 6m",
    dag=dag,
)
sleep7 = BashOperator(
    task_id='sleep7',
    bash_command="sleep 8m",
    dag=dag,
)

build_hiv_summary_1 = MySqlOperator(
    task_id='build_hiv_summary_1',
    sql='call generate_flat_hiv_summary_vitals("build",11,500,1);',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag)

build_hiv_summary_2 = MySqlOperator(
    task_id='build_hiv_summary_2',
    sql='call generate_flat_hiv_summary_vitals("build",12,500,1);',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag)

build_hiv_summary_3 = MySqlOperator(
    task_id='build_hiv_summary_3',
    sql='call generate_flat_hiv_summary_vitals("build",13,500,1);',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
Exemplo n.º 23
0
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.mysql_operator import MySqlOperator

default_args = {
	'owner': 'airflow',
	'start_date': dt.datetime(2020, 1, 1),
	'retries': 0
}

with DAG('dynamic_dag',
	default_args=default_args,
	schedule_interval='@daily',
	catchup=False) as dag:
	
	opr_end = BashOperator(task_id='opr_end', bash_command='echo "Done"')

	# Dynamic Definition of your DAG!!
	for counter in range(1, 4):
		task_id='opr_insert_' + str(counter)
		task_date=dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		opr_insert = MySqlOperator(task_id=task_id, 
						sql="INSERT INTO tasks (id, timestamp) VALUES ('" + task_id + "_" + task_date  + "', '" + task_date + "');", 
						mysql_conn_id='mysql',
						autocommit=True,
						database='airflow_mdb')
		opr_insert >> opr_end



Exemplo n.º 24
0
# task 3 - python function
def WriteToFile():
    dir_path = '/home/karan/Attempt_ApacheAirflow/'
    file_name = 't3.log'
    full_path = dir_path + file_name
    f = open(full_path, "a+")
    f.write("T3 Time start: {}\n".format(str(datetime.now())))
    f.close()


t3 = PythonOperator(task_id='python_write_file',
                    depends_on_past=False,
                    python_callable=WriteToFile,
                    email=['*****@*****.**'],
                    email_on_failure=True,
                    dag=dag)

t3_complete = datetime.now()
t3_status_sql = """
insert into stage_status values (3, '{}', 'python_create_file', 'success'),
(2, '{}', 'create_file', 'success');
""".format(str(t3_complete), str(t2_complete))

t4 = MySqlOperator(task_id='Update_status_table',
                   sql=t3_status_sql,
                   mysql_conn_id='local_mysql',
                   owner='airflow',
                   dag=dag)

# Dependancy
t1 >> [t2, t3] >> t4
Exemplo n.º 25
0
        task_id='dwd_oride_driver_cheating_detection_hi_prev_hour_task',
        bucket_key='{hdfs_path_str}/dt={pt}/hour={hour}/_SUCCESS'.format(
            hdfs_path_str="oride/oride_dw/dwd_oride_driver_cheating_detection_hi/country_code=nal",
            pt='{{ds}}',
            hour='{{ execution_date.strftime("%H") }}'
        ),
        bucket_name='opay-datalake',
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag
    )


clear_promoter_mysql_data = MySqlOperator(
    task_id='clear_promoter_mysql_data',
    sql="""
        DELETE FROM promoter_users_device WHERE dt='{{ ds_nodash }}' AND hour='{{ execution_date.hour }}';
        DELETE FROM promoter_data_hour WHERE day='{{ ds_nodash }}' and hour='{{ execution_date.hour}}';
    """,
    mysql_conn_id='opay_spread_mysql',
    dag=dag)

promoter_detail_to_msyql = HiveToMySqlTransfer(
    task_id='promoter_detail_to_msyql',
    sql="""
            SELECT
                null as id,
                t.code as code,
                t.bind_number as phone,
                t.bind_device as device_id,
                0 as register_time,
                t.bind_time as bind_time,
                {{ ds_nodash }},
Exemplo n.º 26
0
    start_date=datetime(2018, 8, 12, 19, 35),
    #end_date=datetime(2018,12,31),
    default_args=default_args,
    #concurrency=1,
    default_view='tree',
    orientation='TB')

# Task to print date
t1 = BashOperator(task_id='start_print_date', bash_command='date', dag=dag)

# Task to interact with MySQL
#
# MySQL Connection Info:
#   Conn ID:    mysql_cloudsql
#   Conn Type:  MySQL
#   Host:       <ip_address>
#   Login:      root
#   Password:   <password>
#
t2 = MySqlOperator(task_id='interact_with_mysql',
                   sql='create table ztable100 (cust_id int)',
                   mysql_conn_id='mysql_cloudsql',
                   autocommit=False,
                   database="zdatabase",
                   dag=dag)

# Create DAG by specifying upstream tasks
t2.set_upstream(t1)

#ZEND
    hive_cli_conn_id='spark_thrift',
    hql="alter table stg.teacher_contract_carport_time add PARTITION (etl_date='{{ macros.ds(ti) }}');\n ",
    dag=dag)


stg_ods_carport_time = HiveOperator(
    task_id='stg_ods_carport_time',
    hive_cli_conn_id='spark_thrift',
    hql='scripts/stg2ods.teacher_contract_carport_time_insert.sql',
    dag=dag
)

end = MySqlOperator(
    task_id='end',
    mysql_conn_id='etl_db',
    sql="INSERT INTO `signal` VALUES('{1}', '{0}') ON DUPLICATE KEY UPDATE `value`='{0}'; ".format("{{ macros.ds(ti) }}", "{{ dag.dag_id }}"),
    database='etl',
    dag=dag
)

start >> delpart_stg_bk_class_times >> stg_bk_class_times >> addpart_stg_bk_class_times >> stg_ods_bk_class_times >> wait

start >> delpart_stg_bk_class >> stg_bk_class >> addpart_stg_bk_class >> stg_ods_bk_class >> wait

start >> delpart_stg_bk_sign_in >> stg_bk_sign_in >> addpart_stg_bk_sign_in >> stg_ods_bk_sign_in >> wait

start >> delpart_stg_bk_subject >> stg_bk_subject >> addpart_stg_bk_subject >> stg_ods_bk_subject >> wait

start >> delpart_stg_live_course_details >> stg_live_course_details >> addpart_stg_live_course_details >> stg_ods_live_course_details >> wait

start >> delpart_stg_bk_subjectdet >> stg_bk_subjectdet >> addpart_stg_bk_subjectdet >> stg_ods_bk_subjectdet >> wait
Exemplo n.º 28
0
from pyspark.sql.types import FloatType
from bs4 import BeautifulSoup
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.mysql_operator import MySqlOperator
from airflow.utils.dates import days_ago

args = {'owner': 'Tom'}
dag = DAG('DAG1',
          schedule_interval="@daily",
          start_date=days_ago(1),
          default_args=args)  #instantiating DAG

create_database = MySqlOperator(task_id='create_database',
                                mysql_conn="mysql_default",
                                sql="CREATE DATABASE IF NOT EXISTS dag_data;",
                                dag=dag)

#grabbing SQL password from file
with open('/home/tom/sql_password_json.json', 'r') as f:
    password = json.load(f)

SQL_PASSWORD = password['password']


def get_data():  #scrapes and cleans data from website
    URL = "http://books.toscrape.com/"
    request = requests.get(URL)
    content = request.content  #HTML content of website
    soup = BeautifulSoup(content, 'lxml')
    title_data = soup.find_all('article')
Exemplo n.º 29
0
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}
dag = DAG(
    'basic_mysql',
    default_args=default_args,
    description='A simple mysql operation',
    schedule_interval=timedelta(days=1),
)

# t1
t1 = BashOperator(
    task_id='print_date',
    bash_command='date',
    dag=dag,
)

# sql for t2
sql = """
    CREATE TABLE IF NOT EXISTS test_airflow (
        dummy VARCHAR(50)
    );
    """

t2 = MySqlOperator(task_id='insert_mysql',
                   sql=sql,
                   mysql_conn_id='local_instance',
                   dag=dag)

t1 >> t2
Exemplo n.º 30
0
default_args = {
    'owner': 'Airflow',
    'start_date': datetime(2019, 11, 30),
    'retries': 1,
    'retries_delay': timedelta(seconds=5)
}

with DAG('spotify_dag',
         default_args=default_args,
         schedule_interval='@daily',
         template_searchpath=['/usr/local/airflow/sql_files'],
         catchup=False) as dag:

    t1 = PythonOperator(
        task_id='get_data_from_spotify',
        python_callable=getDataFromSpotify,
    )

    t2 = MySqlOperator(
        task_id='create_mysql_table',
        mysql_conn_id="mysql_conn",
        sql="create_table.sql",
    )

    t3 = MySqlOperator(task_id='insert_mysql_table',
                       mysql_conn_id="mysql_conn",
                       sql="insert_into_table.sql")

t1 >> t2 >> t3
Exemplo n.º 31
0
        "email_list":EMAIL_LIST,
        "success_message":"Task completed successfully",
        "failure_message":"Task Failed"
},
trigger_rule='all_done'
)


t3 = MySqlOperator(
task_id='MySqlOperatorExample',
mysql_conn_id="server_55",
sql="create_table.sql",
on_success_callback=notify_email_success,
on_failure_callback=notify_email_failure,
params={
  "master":master,
  #"success_message":"Mysql Operator passed successfully",
  #"failure_message":"Mysql Operator did not pass successfully",
  #"success_file":['/root/airflow/dags/Airflow_Tutorial/sql_files/create_table.sql'],
  #"failure_file":['/root/airflow/dags/Airflow_Tutorial/sql_files/create_table.py'],
  "email_list":EMAIL_LIST
  },
dag=dag
)


t4 = EmailOperator(
task_id='EmailOperatorExample',
subject='Test: Email Operator',
cc='*****@*****.**',
bcc='*****@*****.**',
to='*****@*****.**',
Exemplo n.º 32
0
    params={
        "email_list": EMAIL_LIST,
        "task_success_msg": "Flag set to 0.",
        "task_failure_msg": "Failed to set flag back to 0"
    },
    dag=dag)

get_cp_past_5_runs = MySqlOperator(
    task_id='get_cp_past_5_runs',
    mysql_conn_id="mysql_server",
    on_failure_callback=notify_email_failure,
    params={
        "master_db_name":
        master_db_name,
        "app_db_name":
        app_db_name,
        "email_list":
        EMAIL_LIST,
        "task_success_msg":
        "SQL Script to generate score of past five runs successfully execcuted.",
        "task failure_msg":
        "Failed to execute SQL Script for generating score of past runs."
    },
    sql="cp_past_5_runs.sql",
    dag=dag)

email_status = EmailOperator(
    task_id='email_status',
    to=EMAIL_LIST,
    trigger_rule='all_done',
    subject='Execution Completed',
    html_content="The Decision Analytics has completed it's execution",
Exemplo n.º 33
0
    'retry_delay': timedelta(minutes=30),
    'max_active_runs': 1
}

dag = DAG(dag_id=DAG_ID,
          default_args=default_args,
          schedule_interval='0 2 * * *',
          dagrun_timeout=timedelta(minutes=60),
          catchup=False,
          template_searchpath=[
              '/usr/local/airflow/etl-scripts/flat_tables',
              '/usr/local/airflow/etl-scripts/calculated_tables',
              '/usr/local/airflow/etl-scripts/database_updates'
          ])

#update_surge_weekly
update_surge_weekly = MySqlOperator(
    task_id='update_surge_weekly',
    sql='generate_surge_weekly_report_dataset_v2("sync",1,15000,20,true);',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag)

finish = DummyOperator(task_id='finish', dag=dag)

update_surge_weekly >> finish
#update_vitals >> finish

if __name__ == "__main__":
    dag.cli()
create_oride_orders_status = MySqlOperator(
    task_id='create_oride_orders_status',
    sql="""
        CREATE TABLE IF NOT EXISTS oride_orders_status_10min (
            `city_id` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '城市ID',
            `city_name` varchar(64) NOT NULL DEFAULT '' COMMENT '城市名称',
            `serv_type` int(11) NOT NULL DEFAULT '-1' COMMENT '服务类型1专车2快车',
            `order_time` timestamp not null default '1970-01-02 00:00:00' comment 'time 10min',
            `daily` timestamp not null default '1970-01-02 00:00:00' comment 'time day',
            `orders` int unsigned not null default 0 comment '下单数',
            `orders_user` int unsigned not null default 0 comment '下单用户数',
            `orders_pick` int unsigned not null default 0 comment '订单业务接单数',
            `orders_pick_dserv` int unsigned not null default 0 comment '按司机业务接单数',
            `drivers_serv` int unsigned not null default 0 comment '在线司机数',
            `drivers_orderable` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '可接单司机数',
            `orders_finish` int unsigned not null default 0 comment '订单业务完单数',
            `orders_finish_dserv` int unsigned not null default 0 comment '司机业务完单数',
            `orders_finish_create` int unsigned not null default 0 comment '订单业务基于创建时间的完单数',
            `avg_pick` int unsigned not null default 0 comment '订单业务平均应答时长(秒)take_time-create_time',
            `avg_pick_dserv` int unsigned not null default 0 comment '司机业务平均应答时长(秒)take_time-create_time',
            `avg_take` decimal(10,1) unsigned not null default '0.0' comment '订单业务平均接驾时长(分)pick_time-take_time',
            `avg_take_dserv` decimal(10,1) unsigned not null default '0.0' comment '司机业务平均接驾时长(分)pick_time-take_time',
            `not_sys_cancel_orders` int unsigned not null default 0 comment '订单业务应答后取消status = 6 and driver_id > 0 and cancel_role <> 3 and cancel_role <> 4',
            `not_sys_cancel_orders_dserv` int unsigned not null default 0 comment '司机业务应答后取消status = 6 and driver_id > 0 and cancel_role <> 3 and cancel_role <> 4',
            `picked_orders` int unsigned not null default 0 comment '订单业务成功接驾',
            `picked_orders_dserv` int unsigned not null default 0 comment '司机业务成接驾',
            `orders_accept` int unsigned not null default 0 comment '订单业务接单数',
            `orders_accept_dserv` int unsigned not null default 0 comment '司机业务接单数',
            `agg_orders_finish` int unsigned not null default 0 comment '订单业务累计完单数',
            `agg_orders_finish_dserv` int unsigned not null default 0 comment '司机业务累计完单数',
            primary key (`city_id`,`serv_type`,`order_time`)
        ) engine=innodb DEFAULT CHARSET=utf8;
    """,
    database='bi',
    mysql_conn_id='mysql_bi',
    dag=dag
)