def __init__(self, dag_folder=None, executor=None, include_examples=configuration.conf.getboolean( 'core', 'LOAD_EXAMPLES')): # do not use default arg in signature, to fix import cycle on plugin load if executor is None: executor = GetDefaultExecutor() self.executor = executor dag_folder = dag_folder or settings.DAGS_FOLDER self.log.info("Filling up the DagBag from %s", dag_folder) self.dag_folder = dag_folder self.dags = {} # the file's last modified timestamp when we last read it # 记录文件的最后修改时间 self.file_last_changed = {} self.import_errors = {} self.has_logged = False # 从目录中加载dag示例 if include_examples: example_dag_folder = os.path.join(os.path.dirname(__file__), 'example_dags') self.collect_dags(example_dag_folder) # 从文件夹中加载dag self.collect_dags(dag_folder)
def test_executors(self): from airflow.executors.test_plugin import PluginExecutor self.assertTrue(issubclass(PluginExecutor, BaseExecutor)) from airflow.executors import GetDefaultExecutor self.assertTrue(issubclass(type(GetDefaultExecutor()), BaseExecutor)) # test plugin executor import based on a name string, (like defined in airflow.cfg) # this is not identical to the first assertion! from airflow.executors import _get_executor self.assertTrue(issubclass(type(_get_executor('test_plugin.PluginExecutor')), BaseExecutor))
def create_subdag_operator(dag_parent, label, team): subdag, dependencies = create_subdag(dag_parent, label, team) # Since v1.10, Airflow forces to use the SequentialExecutor as the default # executor for the SubDagOperator, so we need to explicitly specify the # executor from the airflow.cfg sd_op = SubDagOperator(task_id=label, dag=dag_parent, subdag=subdag, executor=GetDefaultExecutor()) return sd_op, dependencies
def __init__(self, subdag, executor=GetDefaultExecutor(), *args, **kwargs): """ Yo dawg. This runs a sub dag. By convention, a sub dag's dag_id should be prefixed by its parent and a dot. As in `parent.child`. :param subdag: the DAG object to run as a subdag of the current DAG. :type subdag: airflow.DAG :param dag: the parent DAG :type subdag: airflow.DAG """ import airflow.models dag = kwargs.get('dag') or airflow.models._CONTEXT_MANAGER_DAG if not dag: raise AirflowException('Please pass in the `dag` param or call ' 'within a DAG context manager') session = kwargs.pop('session') super(SubDagOperator, self).__init__(*args, **kwargs) # validate subdag name if dag.dag_id + '.' + kwargs['task_id'] != subdag.dag_id: raise AirflowException( "The subdag's dag_id should have the form " "'{{parent_dag_id}}.{{this_task_id}}'. Expected " "'{d}.{t}'; received '{rcvd}'.".format(d=dag.dag_id, t=kwargs['task_id'], rcvd=subdag.dag_id)) # validate that subdag operator and subdag tasks don't have a # pool conflict if self.pool: conflicts = [t for t in subdag.tasks if t.pool == self.pool] if conflicts: # only query for pool conflicts if one may exist pool = (session.query(Pool).filter(Pool.slots == 1).filter( Pool.pool == self.pool).first()) if pool and any(t.pool == self.pool for t in subdag.tasks): raise AirflowException( 'SubDagOperator {sd} and subdag task{plural} {t} both ' 'use pool {p}, but the pool only has 1 slot. The ' 'subdag tasks will never run.'.format( sd=self.task_id, plural=len(conflicts) > 1, t=', '.join(t.task_id for t in conflicts), p=self.pool)) self.subdag = subdag self.executor = executor
def sub_dag_operator_with_default_executor(subdag, *args, **kwargs): return SubDagOperator(subdag=subdag, executor=GetDefaultExecutor(), *args, **kwargs)
def sync_dags(self): GetDefaultExecutor().sync_dags()
def get_dag_tag(self): _, dag_tar, pod = GetDefaultExecutor().get_image_dag_info() return dag_tar, pod
main_summary_export = SubDagOperator(subdag=export_to_parquet( table= "moz-fx-data-shared-prod:telemetry_derived.main_summary_v4${{ds_nodash}}", static_partitions=["submission_date_s3={{ds_nodash}}"], arguments=[ "--partition-by=sample_id", "--replace='{{ds_nodash}}' AS submission_date", "--maps-from-entries", ] + main_summary_bigint_columns, parent_dag_name=dag.dag_id, dag_name="main_summary_export", default_args=default_args, num_workers=40), task_id="main_summary_export", executor=GetDefaultExecutor(), dag=dag) register_status(main_summary, "Main Summary", "A summary view of main pings.") addons = bigquery_etl_query(task_id="addons", destination_table="addons_v2", dataset_id="telemetry_derived", dag=dag) addons_export = SubDagOperator( subdag=export_to_parquet( table= "moz-fx-data-derived-datasets:telemetry_derived.addons_v2${{ds_nodash}}", static_partitions=["submission_date_s3={{ds_nodash}}"], arguments=[