Пример #1
0
 def tracking_dependency_operator(self, task_id, dep):
     return SqlSensor(
         task_id=task_id,
         conn_id=self.conn_id,
         sql="SELECT created_date FROM %s.%s WHERE CREATED_DATE>'{{ ds }}' LIMIT 1" % (
             dep.schema, dep.table),
         **self.task_attributes
     )
Пример #2
0
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.sensors import SqlSensor
from airflow.operators.hive_operator import HiveOperator
from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator

from common.utils import generate_spark_args
from settings import default_args


dag = DAG('bi_sales_tmk_d', default_args=default_args,
          schedule_interval='0 4 * * *')

wait_dw_commerce_contract = SqlSensor(
    task_id='wait_dw_commerce_contract',
    conn_id='etl_db',
    sql="SELECT * FROM etl.signal WHERE `name`='dw_commerce_contract_d' AND `value`='{{ macros.ds(ti) }}';",
    dag=dag
)

wait_dw_student_basic = SqlSensor(
    task_id='wait_dw_student_basic',
    conn_id='etl_db',
    sql="SELECT * FROM etl.signal WHERE `name`='dw_student_basic_d' AND `value`='{{ macros.ds(ti) }}';",
    dag=dag
)

wait_dw_teaching_lesson = SqlSensor(
    task_id='wait_dw_teaching_lesson',
    conn_id='etl_db',
    sql="SELECT * FROM etl.signal WHERE `name`='dw_teaching_lesson_d' AND `value`='{{ macros.ds(ti) }}';",
    dag=dag
Пример #3
0
from airflow.operators.sensors import SqlSensor
from airflow.operators.hive_operator import HiveOperator
from airflow.operators.mysql_operator import MySqlOperator

from settings import default_args

# - 员工信息
# - admin,部门

dag = DAG('dw_staff_basic_d',
          default_args=default_args,
          schedule_interval='10 1 * * *')

start = SqlSensor(
    task_id='start',
    conn_id='src_main_db',
    sql=
    "SELECT * FROM restore_tracking.restore_log WHERE date(restored_time) = current_date;",
    dag=dag)

# admin
del_partiton_stg_admin = HiveOperator(
    task_id='del_partiton_stg_admin',
    hive_cli_conn_id='spark_thrift',
    hql=
    "alter table stg.newuuabc_admin drop if exists PARTITION (etl_date='{{ macros.ds(ti) }}');\n ",
    dag=dag)

src_stg_admin = BashOperator(
    task_id='src_stg_admin',
    bash_command=
    'dataship extract uuold_newuuabc.admin {{ macros.ds(ti) }} {{ macros.tomorrow_ds(ti) }}',
Пример #4
0
        print(dag_run_obj.payload)
        return dag_run_obj


pub_id = 3
dag_ver = 'v4'
dag_id = 'PUB_{0}_{1}'.format(pub_id, dag_ver)
sub_id = 3
sub_id_ver = 'v1'
pubid_sql = 72

dag = DAG('PUB_3_v4', schedule_interval=None, default_args=default_args)
t0 = SqlSensor(
    task_id='sql_sensor_{}'.format(pub_id),
    conn_id='sd_batch1',
    sql=
    'select b.publishid from syncsystemobjectupdate a, syncpublish b where lower(a.objectdesc) = lower(b.objectdesc) and b.publishid = {}'
    .format(pubid_sql),
    dag=dag)
t1 = PythonOperator(task_id='python_{}_1'.format(pub_id),
                    python_callable=my_display_function,
                    op_kwargs={'phase': 'BUILD_DELTA_START'},
                    dag=dag)
t2 = BashOperator(
    task_id='builddelta_{}'.format(pub_id),
    pool='simba_build_delta',
    bash_command=
    'sh /x/home/dm_hdp_batch/test/projects/steam_donkey/scripts/delta_processing.sh ',
    dag=dag)
t3 = PythonOperator(task_id='python_{}_2'.format(pub_id),
                    python_callable=my_display_function,
    'depends_on_past': False,
    'start_date': datetime(d.year, d.month, d.day) - timedelta(days=7),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('sqlite_example_dag_' + user.replace('.', ''),
          default_args=default_args,
          schedule_interval=timedelta(days=1))

# t1, t2 and t3 are examples of tasks created by instantiating operators
t0 = SqlSensor(task_id='check_babynames_tables',
               delta=timedelta(seconds=1),
               dag=dag)

t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)


def my_cool_function(ds=None, **kwargs):
    print "{}".format(ds)


t2 = PythonOperator(task_id='show_ds',
                    python_callable=my_cool_function,
                    retries=3,
                    provide_context=True,
                    dag=dag)
Пример #6
0
defaultArgs = {
	"owner": "Nicholas",
	"retries": 1,
	"retry_delay": timedelta(minutes = 5),
	"start_date": datetime(2019, 10, 15),
	"depends_on_past": False,
	"email": ["*****@*****.**"],
	"email_on_failure": True
}

dag_id = "monthly_processing"
dag = DAG(dag_id = dag_id,
	 	default_args = defaultArgs,
	 	schedule_interval = timedelta(hours = 7))

sql_sensor = SqlSensor(
		task_id = "check_for_requests",
		conn_id = "insight",
		sql = "SELECT * FROM requests;",
		poke_interval = 30,
		dag = dag
	)

spark_submit_task = PythonOperator(
		task_id = "spark_job",
		python_callable = schedule,
		dag = dag
	)

sql_sensor >> spark_submit_task
Пример #7
0
        "email_list":EMAIL_LIST,
        "task_success_msg":"Checking signal for running MigrateDatabase passed successfully.",
        "task_failure_msg":"Signal not received to run MigrateDatabase."
    },
    trigger_rule = 'all_success'
)'''

check_signal = SqlSensor(
    task_id='check_signal',
    conn_id='mysql_server',
    sql='SELECT flag FROM ' + master_db_name +
    '.md_check_signal where module="Decision Analytics" and task="Migrate Database"',
    dag=dag,
    timeout=300,
    poke_interval=10,
    soft_fail=True,
    on_failure_callback=notify_email_failure,
    params={
        "email_list": EMAIL_LIST,
        "task_success_msg":
        "Checking signal for running MigrateDatabase passed successfully.",
        "task_failure_msg": "Signal not received to run MigrateDatabase."
    },
    trigger_rule='all_success')

run_sp_fc_processing_to_history = BashOperator(
    task_id='run_sp_fc_processing_to_history',
    bash_command=PYTHON_LOCATION + ' ' +
    os.path.join(MIGRATE_DB_SCRIPT_LOCATION,
                 'main.py run_sp_fc_processing_to_history '),
    dag=dag,
from airflow.operators.python_operator import PythonOperator
from airflow.operators.http_operator import SimpleHttpOperator

from settings import default_args
from common.utils import on_prd, read_script
from common.mail import mail_files

current_dir = os.path.dirname(os.path.abspath(__file__))

dag = DAG('bi_tchops_pref_d',
          default_args=default_args,
          schedule_interval=on_prd('5 8 * * *'))

start = SqlSensor(
    task_id='start',
    conn_id='src_main_db',
    sql=
    "SELECT * FROM restore_tracking.restore_log WHERE date(restored_time) >= '{{ macros.ds(ti) }}';",
    dag=dag)

# gen_excel=SimpleHttpOperator(
#     task_id='gen_excel',
#     http_conn_id='taskhub_host',
#     endpoint='excel',
#     data=json.dumps([
#         {
#             "sql": read_script(current_dir, "scripts/rpt.export_student_to_onesmart.sql"),
#             "filename": "testx01"
#         },
#         {
#             "header": "a,b",
#             "sql": "SELECT id, flag from ods.newuuabc_student_user limit 11",
Пример #9
0
                "id": "{}".format(account_id),
                "token": Variable.get("APPLICATION_ACCESS_TOKEN")
            },  # http params
            response_check=
            response_check,  # will retry based on default_args if it fails
            dag=export_account_dag)

        print("Created account processing DAG {}".format(
            export_account_dag.dag_id))

        # register the dynamically created DAG in the global namespace?
        globals()[export_account_task_name] = export_account_dag

    return account_ids


sensor = SqlSensor(
    task_id='account_creation_check',
    conn_id='account-database',
    poke_interval=600,  #do the select every 600 seconds, 5 minutes
    sql="SELECT id from accounts where created_at > '{{ds}}' LIMIT 1",
    dag=dag)

process_new_accounts_task = PythonOperator(
    task_id='process_new_accounts',
    provide_context=True,
    python_callable=process_new_accounts,
    dag=dag)

sensor >> process_new_accounts_task
from airflow.operators.bash_operator import BashOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.sensors import SqlSensor
from airflow.operators.hive_operator import HiveOperator
from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator

from utils import generate_spark_args
from settings import default_args

dag = DAG('bi_monitor_schema_columns_d',
          default_args=default_args,
          schedule_interval='54 0 * * *')

wait_dw_schema_monitor = SqlSensor(
    task_id='wait_dw_schema_monitor',
    conn_id='etl_db',
    sql=
    "SELECT * FROM etl.signal WHERE `name`='dw_monitor_schema_d' AND `value`='{{ macros.ds(ti) }}';",
    dag=dag)

dw_dm_columns_changed = HiveOperator(
    task_id='dw_dm_columns_changed',
    hive_cli_conn_id='spark_thrift',
    hql='scripts/dm.monitor_schema_col_changed.sql',
    hiveconf_jinja_translate=True,
    dag=dag)

#从spark中读取comment变更了的字段,如果有则将alter语句写入到固定目录下;若有除comment以外的字段变化,则邮件抛出

end = DummyOperator(task_id='end', dag=dag)

wait_dw_schema_monitor >> dw_dm_columns_changed >> end