示例#1
0
from airflow.operators.python_operator import PythonOperator
from datacleaner import data_cleaner
from airflow.operators.mysql_operator import MySqlOperator
from airflow.operators.email_operator import EmailOperator

default_args = {
    'owner': 'Airflow',
    'depends_on_past': False,
    'start_date': datetime(2020, 2, 17),
    'retries': 1,
    'retry_delay': timedelta(seconds=5)
}

with DAG('store_dag12',
         default_args=default_args,
         schedule_interval='@daily',
         template_searchpath=['/usr/local/airflow/sql_files'],
         catchup=True) as dag:
    t1 = BashOperator(
        task_id='check_file_exists',
        bash_command='shasum ~/store_files_airflow/raw_store_transactions.csv',
        retries=2,
        retry_delay=timedelta(seconds=15))
    t2 = PythonOperator(task_id='clean_raw_csv', python_callable=data_cleaner)
    t3 = MySqlOperator(task_id='create_mysql_table',
                       mysql_conn_id="mysql_conn",
                       sql="create_table.sql")
    t4 = MySqlOperator(task_id='insert_into_table',
                       mysql_conn_id="mysql_conn",
                       sql="insert_into_table.sql")
    t1 >> t2 >> t3 >> t4
示例#2
0
         default_args=default_args,
         schedule_interval='@daily',
         catchup=True) as dag:

    check_file = BashOperator(task_id="check_file",
                              bash_command="shasum ~/ip_files/or.csv",
                              retries=2,
                              retry_delay=timedelta(seconds=15))

    pre_process = PythonOperator(task_id="pre", python_callable=pre_process)

    groupbys = PythonOperator(task_id="aggre", python_callable=process_data)

    create_table = MySqlOperator(
        task_id='create_table',
        mysql_conn_id="mysql_db1",
        sql=
        "CREATE table IF NOT EXISTS aggre_res (stock_code varchar(100) NULL,descb varchar(100) NULL,country varchar(100) NULL,total_price varchar(100) NULL)"
    )

    insert = MySqlOperator(
        task_id='insert_db',
        mysql_conn_id="mysql_db1",
        sql=
        "LOAD DATA  INFILE '/var/lib/mysql-files/fin.csv' INTO TABLE aggre_res FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;"
    )

    email = EmailOperator(
        task_id='send_email',
        to='*****@*****.**',
        subject='Daily report generated',
        html_content=
示例#3
0
l_load = [
    'dummy_table',
    'dummy_table2',
    'dummy_table3',
    'dummy_table4',
]

dag = DAG("dag_etl_sp", schedule_interval="@once", default_args=default_args)

conn_id_extract = "conn_test_source"
conn_id_transform = "conn_test_transform"
conn_id_load = "conn_test_load"

o_transform_sp = MySqlOperator(task_id='transform_sp',
                               sql='call test_transform.dummy_airflow_sp()',
                               mysql_conn_id=conn_id_transform,
                               dag=dag)

o_truncate_transform = MySqlOperator(
    task_id='truncate_transform',
    sql='call test_transform.truncate_dummy_data()',
    mysql_conn_id=conn_id_transform,
    dag=dag)

o_truncate_load = MySqlOperator(
    task_id='truncate_load',
    sql='call test_destination.truncate_dummy_data()',
    mysql_conn_id=conn_id_load,
    dag=dag)

o_completed = DummyOperator(task_id="Completed", dag=dag)
示例#4
0
hive_insert_overwrite_final_address_data = HiveOperator(
    task_id='hive_insert_overwrite_final_address_data',
    hql=insertoverwrite_final_address_data,
    hive_cli_conn_id='beeline',
    dag=dag)

create_hive_table_final_address_data >> hive_insert_overwrite_final_address_data

# --------------------------------------------------------------------------------
# create table remote
# --------------------------------------------------------------------------------

create_table_remote = MySqlOperator(
    task_id='create_table_remote',
    sql="CREATE TABLE IF NOT EXISTS final_address_data(number varchar(255),street varchar(255),city varchar(255),postcode varchar(255),hash varchar(255),country varchar(255));",
    mysql_conn_id='mysql_default',
    database='db'
)

hive_insert_overwrite_final_address_data >> create_table_remote

# --------------------------------------------------------------------------------
# clear table remote
# --------------------------------------------------------------------------------

delete_from_remote = MySqlOperator(
    task_id='delete_from_remote',
    sql="DELETE FROM final_address_data;",
    mysql_conn_id='mysql_default',
    database='db'
)
    dir_path = '/home/karan/Attempt_ApacheAirflow/'
    file_name = 't3.log'
    full_path = dir_path + file_name
    f = open(full_path, "a+")
    f.write("T3 Time start: {}\n".format(str(datetime.now())))
    f.close()


# Get sub dag
sub_dag, t2_complete, t3_complete = load_subdag('sample_sub_dag',
                                                'file_operation', default_args)

sub_dag_tasks = SubDagOperator(task_id='file_operation',
                               subdag=sub_dag,
                               dag=dag)

task_status_sql = """
insert into stage_status values (3, '{}', 'python_create_file', 'success'),
(2, '{}', 'create_file', 'success');
""".format(str(t3_complete), str(t2_complete))

# Mysql is installed locally
# Connection can be created using Airflow UI
t4 = MySqlOperator(task_id='Update_status_table',
                   sql=task_status_sql,
                   mysql_conn_id='local_mysql',
                   owner='airflow',
                   dag=dag)

# Dependancy
t1 >> sub_dag_tasks >> t4
示例#6
0
         catchup=False) as dag:

    # task1: check if the source file exists in the input directory
    # note that the file in airflow container.
    t1 = BashOperator(
        task_id="check_file_exists",
        bash_command="shasum ~/store_files_airflow/raw_store_transactions.csv",
        retries=1,
        retry_delay=timedelta(seconds=15))

    # task 2: clean data (remove special characters)
    t2 = PythonOperator(task_id="clean_raw_csv", python_callable=data_cleaner)

    # task 3: create table
    t3 = MySqlOperator(task_id="create_mysql_table",
                       mysql_conn_id="mysql_conn",
                       sql="create_table.sql")

    # task 4: insert cleaned data into table
    t4 = MySqlOperator(task_id="insert_into_table",
                       mysql_conn_id="mysql_conn",
                       sql="insert_into_table.sql",
                       dag=dag)

    # task 5: calculate store-wise and location-wise profit (yesterday) and save results as csv
    t5 = MySqlOperator(task_id="select_from_table",
                       mysql_conn_id="mysql_conn",
                       sql="select_from_table.sql")

    yesterday_date = datetime.strftime(datetime.now() - timedelta(1),
                                       "%Y-%m-%d")
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    'catchup': False
}

dag = DAG('mysql2mysql', default_args=default_args, schedule_interval='@daily')

qry_truncate_staging = """
truncate st_adventure.st_contact
"""

t1 = MySqlOperator(sql=qry_truncate_staging,
                   mysql_conn_id='mysql_adventure',
                   task_id='truncating_staging',
                   dag=dag)

qry_populate_staging = """
    insert into st_adventure.st_contact
    (select
        ContactID ,
        FirstName ,
        MiddleName ,
        LastName ,
        EmailAddress ,
        Phone 
    from adventureworks.contact c)
"""

t2 = MySqlOperator(sql=qry_populate_staging,
示例#8
0
    'email_on_failure': False,
    'email_on_retry': False,
}

dag = DAG('airflow_logs',
          default_args=default_args,
          schedule_interval='0/1 * * * *')

task1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)

task2 = BashOperator(task_id='print_hello-world',
                     bash_command='echo "hello world!!"',
                     dag=dag)

task3 = MySqlOperator(mysql_conn_id='airflow_db',
                      task_id='basic_mysql',
                      sql="SELECT * FROM `dag`",
                      dag=dag)

EMAIL_CONTENT = """

<ul>
    <li>Instatnce key: %s</li>
    <li>Owner: %s</li>
    <li>Host: %s</li>
</ul>

""" % (
    "{{ task_instance_key_str }}",
    "{{ task.owner}}",
    "{{ ti.hostname }}",
)
    ]
)


class CustomMySqlOperator(MySqlOperator):
    def execute(self, context):
        self.log.info('Executing: %s', self.sql)
        hook = MySqlHook(mysql_conn_id=self.mysql_conn_id,
                         schema=self.database)
        return hook.get_records(self.sql, parameters=self.parameters)


update_flat_obs = MySqlOperator(
    task_id='update_flat_obs',
    sql='flat_obs_v1.3.sql',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag
)



update_flat_orders = MySqlOperator(
    task_id='update_flat_orders',
    sql='flat_orders_v1.1.sql',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag
)

update_flat_lab_obs = MySqlOperator(
示例#10
0
import airflow
from airflow.models import DAG
from airflow.operators.mysql_operator import MySqlOperator
from airflow.operators.bash_operator import BashOperator

### CONSTANTS: DO NOT EDIT ###
## MYSQL CONNECTION
MYSQL_CONN_ID = 'amrs_slave_conn'

## DAG ID
DAG_ID = 'hiv_monthly_summary_daily_10pm'
### END TRIGGER RULES ###

nbo_timezone = timezone("Africa/Nairobi")
#start_date = nbo_timezone.localize(datetime.strptime('2019-06-25 20:00', '%Y-%m-%d %H:%M'))
start_date = datetime.strptime('2019-06-25', '%Y-%m-%d')

# Dag is returned by a factory method
dag = DAG(dag_id=DAG_ID,
          schedule_interval='30 22 * * *',
          start_date=start_date,
          catchup=False)

update_hiv_monthly_report_dataset = MySqlOperator(
    task_id='update_hiv_monthly_report_dataset',
    sql=
    'call generate_hiv_monthly_report_dataset_v1_4("sync",1,100000,100,"2013-01-01");',
    mysql_conn_id=MYSQL_CONN_ID,
    database='etl',
    dag=dag)
示例#11
0
    'owner': 'sergio',
    'start_date': datetime(2021, 3, 1, tzinfo=local_tz)
}

with DAG('11-load-dimensions.py',
         default_args=default_args,
         schedule_interval=None,
         template_searchpath=cfg,
         catchup=False,
         is_paused_upon_creation=False) as dag:

    start = DummyOperator(task_id='start')

    truncate_tables = MySqlOperator(task_id='truncate_tables',
                                    mysql_conn_id='cool_car',
                                    sql='00-truncate_tables.sql',
                                    autocommit=True,
                                    dag=dag)

    load_branch = MySqlOperator(task_id='load_branch_office',
                                mysql_conn_id='cool_car',
                                sql='01-dim_branch_office.sql',
                                autocommit=True,
                                dag=dag)

    load_car = MySqlOperator(task_id='load_car',
                             mysql_conn_id='cool_car',
                             sql='02-dim_car.sql',
                             autocommit=True,
                             dag=dag)