Exemplo n.º 1
0
dumy01 = DummyOperator(task_id='dummy01', dag=dag)
dumy02 = DummyOperator(task_id='dummy02', dag=dag)

task01 = get_big_query_task_model(
    task_id="qp03_01",
    dag=dag,
    sql=sql1,
    replace_field_name="target_date",
    gcp_conn_id="gree-anger-dev-bigquery",
    query_params=query_params_01,
    destination_dataset_table=destination_dataset_table)

task02 = get_big_query_task_model(
    task_id="qp03_02",
    dag=dag,
    sql=sql2,
    replace_field_name="dt",
    gcp_conn_id="gree-anger-dev-bigquery",
    write_disposition="WRITE_APPEND",
    query_params=query_params_02,
    destination_dataset_table=destination_dataset_table)

tasks = [task01, task02]

from_date = datetime.strptime("2020-10-01", '%Y-%m-%d')
to_date = datetime.strptime("2020-12-30", '%Y-%m-%d')

execute_task = ExecuteTaskList(dag, tasks, from_date, to_date, interval)
execute_task.make_task_list().execute_parallel_task()
task_2_01 = get_big_query_task_model(
    task_id="TEST-Series-Task1",
    dag=dag,
    sql=sql1,
    replace_field_name="target_date",
    gcp_conn_id="gree-anger-dev-bigquery",
    query_params=query_params_01,
    destination_dataset_table=destination_dataset_table)

task_2_02 = get_big_query_task_model(
    task_id="TEST-Series-Task2",
    dag=dag,
    sql=sql2,
    replace_field_name="dt",
    gcp_conn_id="gree-anger-dev-bigquery",
    write_disposition="WRITE_APPEND",
    query_params=query_params_02,
    destination_dataset_table=destination_dataset_table)

task_list_2 = [task_2_01, task_2_02]

from_date_2 = datetime.strptime("2020-10-01", '%Y-%m-%d')
to_date_2 = datetime.strptime("2020-12-30", '%Y-%m-%d')
######### 直列execute_series_task処理タスクテスト END

execute_task = ExecuteTaskList(dag)
execute_task.make_task_list(task_list1, from_date, to_date,
                            interval).execute_series_task().make_task_list(
                                task_list_2, from_date_2, to_date_2,
                                interval_2).execute_series_task()
Exemplo n.º 3
0
        "email_on_retry": False, # Trueにするとtaskがリトライが発生した際にメールが通知
        "retries": 1,
        "retry_delay": timedelta(minutes=10)}

schedule_interval = timedelta(days=1)

dag = DAG("TEST_PRJ-T001", default_args=original_args, schedule_interval=schedule_interval)

interval = create_interval("weekly", 1)

destination_dataset_table = "your_project_id.dataset.table${{ params.partition_date }}"

task01 = get_big_query_task_model(
  task_id = "task01",
  dag = dag,
  sql = "SELECT  * FROM `your_project_id.dataset.table` WHERE dt = '{{ params.target_date }}'",
  gcp_conn_id = "your-conne-id",
  destination_dataset_table = destination_dataset_table)


tasks = [task01]

from_date = datetime.strptime("2021-01-01", '%Y-%m-%d')
to_date = datetime.strptime("2021-04-01", '%Y-%m-%d')



execute_task = ExecuteTaskList(dag, tasks, from_date, to_date, interval)
execute_task.make_task_list()
execute_task.execute_series_task()