示例#1
0
    def __init__(self,
                 dag_folder=None,
                 executor=None,
                 include_examples=configuration.conf.getboolean(
                     'core', 'LOAD_EXAMPLES')):

        # do not use default arg in signature, to fix import cycle on plugin load
        if executor is None:
            executor = GetDefaultExecutor()
        self.executor = executor
        dag_folder = dag_folder or settings.DAGS_FOLDER
        self.log.info("Filling up the DagBag from %s", dag_folder)
        self.dag_folder = dag_folder
        self.dags = {}
        # the file's last modified timestamp when we last read it
        # 记录文件的最后修改时间
        self.file_last_changed = {}
        self.import_errors = {}
        self.has_logged = False

        # 从目录中加载dag示例
        if include_examples:
            example_dag_folder = os.path.join(os.path.dirname(__file__),
                                              'example_dags')
            self.collect_dags(example_dag_folder)
        # 从文件夹中加载dag
        self.collect_dags(dag_folder)
    def test_executors(self):
        from airflow.executors.test_plugin import PluginExecutor
        self.assertTrue(issubclass(PluginExecutor, BaseExecutor))

        from airflow.executors import GetDefaultExecutor
        self.assertTrue(issubclass(type(GetDefaultExecutor()), BaseExecutor))

        # test plugin executor import based on a name string, (like defined in airflow.cfg)
        # this is not identical to the first assertion!
        from airflow.executors import _get_executor
        self.assertTrue(issubclass(type(_get_executor('test_plugin.PluginExecutor')), BaseExecutor))
示例#3
0
def create_subdag_operator(dag_parent, label, team):
    subdag, dependencies = create_subdag(dag_parent, label, team)

    # Since v1.10, Airflow forces to use the SequentialExecutor as the default
    # executor for the SubDagOperator, so we need to explicitly specify the
    # executor from the airflow.cfg
    sd_op = SubDagOperator(task_id=label,
                           dag=dag_parent,
                           subdag=subdag,
                           executor=GetDefaultExecutor())
    return sd_op, dependencies
    def __init__(self, subdag, executor=GetDefaultExecutor(), *args, **kwargs):
        """
        Yo dawg. This runs a sub dag. By convention, a sub dag's dag_id
        should be prefixed by its parent and a dot. As in `parent.child`.

        :param subdag: the DAG object to run as a subdag of the current DAG.
        :type subdag: airflow.DAG
        :param dag: the parent DAG
        :type subdag: airflow.DAG
        """
        import airflow.models
        dag = kwargs.get('dag') or airflow.models._CONTEXT_MANAGER_DAG
        if not dag:
            raise AirflowException('Please pass in the `dag` param or call '
                                   'within a DAG context manager')
        session = kwargs.pop('session')
        super(SubDagOperator, self).__init__(*args, **kwargs)

        # validate subdag name
        if dag.dag_id + '.' + kwargs['task_id'] != subdag.dag_id:
            raise AirflowException(
                "The subdag's dag_id should have the form "
                "'{{parent_dag_id}}.{{this_task_id}}'. Expected "
                "'{d}.{t}'; received '{rcvd}'.".format(d=dag.dag_id,
                                                       t=kwargs['task_id'],
                                                       rcvd=subdag.dag_id))

        # validate that subdag operator and subdag tasks don't have a
        # pool conflict
        if self.pool:
            conflicts = [t for t in subdag.tasks if t.pool == self.pool]
            if conflicts:
                # only query for pool conflicts if one may exist
                pool = (session.query(Pool).filter(Pool.slots == 1).filter(
                    Pool.pool == self.pool).first())
                if pool and any(t.pool == self.pool for t in subdag.tasks):
                    raise AirflowException(
                        'SubDagOperator {sd} and subdag task{plural} {t} both '
                        'use pool {p}, but the pool only has 1 slot. The '
                        'subdag tasks will never run.'.format(
                            sd=self.task_id,
                            plural=len(conflicts) > 1,
                            t=', '.join(t.task_id for t in conflicts),
                            p=self.pool))

        self.subdag = subdag
        self.executor = executor
示例#5
0
def sub_dag_operator_with_default_executor(subdag, *args, **kwargs):
    return SubDagOperator(subdag=subdag, executor=GetDefaultExecutor(), *args, **kwargs)
 def sync_dags(self):
     GetDefaultExecutor().sync_dags()
 def get_dag_tag(self):
     _, dag_tar, pod = GetDefaultExecutor().get_image_dag_info()
     return dag_tar, pod
示例#8
0
main_summary_export = SubDagOperator(subdag=export_to_parquet(
    table=
    "moz-fx-data-shared-prod:telemetry_derived.main_summary_v4${{ds_nodash}}",
    static_partitions=["submission_date_s3={{ds_nodash}}"],
    arguments=[
        "--partition-by=sample_id",
        "--replace='{{ds_nodash}}' AS submission_date",
        "--maps-from-entries",
    ] + main_summary_bigint_columns,
    parent_dag_name=dag.dag_id,
    dag_name="main_summary_export",
    default_args=default_args,
    num_workers=40),
                                     task_id="main_summary_export",
                                     executor=GetDefaultExecutor(),
                                     dag=dag)

register_status(main_summary, "Main Summary", "A summary view of main pings.")

addons = bigquery_etl_query(task_id="addons",
                            destination_table="addons_v2",
                            dataset_id="telemetry_derived",
                            dag=dag)

addons_export = SubDagOperator(
    subdag=export_to_parquet(
        table=
        "moz-fx-data-derived-datasets:telemetry_derived.addons_v2${{ds_nodash}}",
        static_partitions=["submission_date_s3={{ds_nodash}}"],
        arguments=[