示例#1
0
                    args.get("total-executor-cores"), row.job_id),
                dag=dag,
                on_failure_callback=failure_callback_wrapper(
                    row.owner.split(",") if row.owner != '' else None))
        md = md + "[{}:{}]  ".format(task_id, row.job_desc)
        job_dict[str(row.job_num)] = task_id

    for _, row in rows[(rows.schedule_id == d.schedule_id)
                       & (rows.dependent_jobs != '')].iterrows():
        dependent_jobs = row.dependent_jobs
        for dep_job_id in dependent_jobs.split(","):
            if "." in dep_job_id:
                ext_dag_id, ext_task_id, execution_delta = dep_job_id.split(
                    ".")
                try:
                    ext_task = dag.get_task('wait_for_{}_{}'.format(
                        ext_dag_id, ext_task_id))
                    dummy = dag.get_task('{}_{}_finish'.format(
                        ext_dag_id, ext_task_id))
                except:
                    ext_task = ExternalTaskSensor(
                        task_id='wait_for_{}_{}'.format(
                            ext_dag_id, ext_task_id),
                        external_dag_id=ext_dag_id,
                        external_task_id=ext_task_id,
                        execution_delta=datetime.timedelta(
                            minutes=int(execution_delta)),
                        dag=dag)
                    dummy = DummyOperator(task_id='{}_{}_finish'.format(
                        ext_dag_id, ext_task_id),
                                          dag=dag)
                ext_task >> dummy >> dag.get_task(
示例#2
0
from airflow.providers.docker.operators.docker import DockerOperator
from airflow import DAG
from airflow.utils.dates import days_ago

default_args = {
    'start_date': days_ago(0),
}

dag = DAG(
    dag_id='{{project_name}}',
    default_args=default_args,
    description='Ploomber DAG ({{project_name}})',
    schedule_interval=None,
)

path_to_spec = Path(__file__).parent / '{{project_name}}.json'
spec = json.loads(path_to_spec.read_text())

for task in spec['tasks']:
    DockerOperator(image=spec['image'],
                   command=task['command'],
                   dag=dag,
                   task_id=task['name'])

for task in spec['tasks']:
    t = dag.get_task(task['name'])

    for upstream in task['upstream']:
        t.set_upstream(dag.get_task(upstream))
示例#3
0
 def assertDagDictEqual(self, source: dict, dag: DAG):
     assert dag.task_dict.keys() == source.keys()
     for task_id, downstream_list in source.items():
         assert dag.has_task(task_id)
         task = dag.get_task(task_id)
         assert task.downstream_task_ids == set(downstream_list)
示例#4
0
                                    dag=dag)
            batchops[0].set_upstream(startop)

        # if are in batch two and onwards, set the previous joiner as an upstream
        else:
            if len(batchops) > 0:
                batchops[-1].set_upstream(batchjoins[-1])

    # init the "real" worker that does the heavy lifting
    workerop = PythonOperator(task_id='dyn_task_{}'.format(thing_identifyer),
                              python_callable=process_thing,
                              dag=dag,
                              params={
                                  'thing_identifyer': thing_identifyer,
                                  'thing_action': thing_action
                              })

    # set the batch group as the upstream
    workerop.set_upstream(batchops[-1])
    j += 1

    # finish batch group by joining the tasks in the group into a dummy joiner
    if j is max_tasks:
        batchjoins.append(
            DummyOperator(task_id='join_{}'.format(len(batchjoins)), dag=dag))
        for task in batchops[-1].downstream_list:
            dag.get_task(task.task_id).set_downstream(batchjoins[-1])
        j = 0

    c += 1
from airflow import DAG
from eai_graph_tools.airflow_data.dag_components.prediction_unb2017 import create_grid_prediction_dag
import pytest

cfg_name = "unit_test_deep_graph_embeddings_agg_gs_fext_deg_dim10_interval10"

dag = DAG(dag_id=cfg_name,
          default_args={'start_date': datetime.utcfromtimestamp(0)},
          start_date=datetime.utcfromtimestamp(0),
          schedule_interval=None)

create_grid_prediction_dag(dag,
                           cfg_file="eai_graph_tools/airflow_data/configs/configs_unb2017.ini",
                           cfg_name=cfg_name)

create_training_dataset = dag.get_task("create_training_dataset")
create_inference_dataset = dag.get_task("create_inference_dataset")
train_graph_model = dag.get_task("train_graph_model")
create_graph_model_node_embeddings = dag.get_task("create_graph_model_node_embeddings")
infer_predictions = dag.get_task("predict")
create_interval_metrics = dag.get_task("create_interval_metrics")


def overwrite_out_dir_param(path, cfg_name=""):
    Variable.set(cfg_name + 'out_dir', path)


def get_out_dir(cfg_name=""):
    return Variable.get(cfg_name + 'out_dir', default_var='')