def test_generate_dummy_dag_code(self):
     generator = DAGGenerator()
     workflow = TestDAGGenerator.create_workflow()
     dag = generator.generator(workflow)
     self.assertIsNotNone(dag)
     self.assertFalse('DummyOperator' in dag)
     self.assertFalse('SendEventOperator' in dag)
 def test_generate_bash_dag_code(self):
     from datetime import datetime, timedelta
     default_args = {
         'owner': 'airflow',
         'depends_on_past': False,
         'start_date': datetime(2015, 12, 1),
         'email': ['*****@*****.**'],
         'email_on_failure': False,
         'email_on_retry': False,
         'retries': 1,
         'retry_delay': timedelta(minutes=5),
         'schedule_interval': None,
     }
     generator = DAGGenerator()
     workflow = TestDAGGenerator.create_bash_workflow()
     dag = generator.generator(workflow, 'aa', default_args)
     print("\n\n")
     print(dag)
 def test_generate_bash_dag_code(self):
     from datetime import datetime, timedelta
     default_args = {
         'owner': 'airflow',
         'depends_on_past': False,
         'start_date': datetime(2015, 12, 1),
         'email': ['*****@*****.**'],
         'email_on_failure': False,
         'email_on_retry': False,
         'retries': 1,
         'retry_delay': timedelta(minutes=5),
         'schedule_interval': None,
     }
     generator = DAGGenerator()
     workflow = TestDAGGenerator.create_bash_workflow()
     dag = generator.generator(workflow, 'aa', default_args)
     self.assertIsNotNone(dag)
     for i in range(3):
         self.assertTrue(
             "BashOperator(task_id='{0}_job', dag=dag, bash_command='echo \"{0} hello word!\"')".format(i) in dag)
Пример #4
0
def _generate_airflow_file_text(ai_graph: AIGraph = default_graph(),
                                project_desc: ProjectDesc = ProjectDesc(),
                                dag_id=None) -> Optional[str]:
    """
    Submit ai flow to schedule.

    :param ai_graph: The ai graph constructed from project.
    :param project_desc: The project description.
    :return: Workflow id.
    """
    ex_workflow = generate_workflow(ai_graph, project_desc)
    for job in ex_workflow.jobs.values():
        register_job_meta(workflow_id=ex_workflow.workflow_id, job=job)
    _default_project.upload_project_package(ex_workflow)
    return DAGGenerator().generator(ex_workflow, dag_id)
 def test_generate_dummy_dag_code(self):
     generator = DAGGenerator()
     workflow = TestDAGGenerator.create_workflow()
     dag = generator.generator(workflow)
     print("\n\n")
     print(dag)
Пример #6
0
 def test_generate_simple_bash_dag_code(self):
     generator = DAGGenerator()
     workflow = TestDAGGenerator.create_simple_bash_workflow()
     dag = generator.generator(workflow)
     self.assertIsNotNone(dag)
     print(dag)
 def __init__(self, config: SchedulerConfig):
     super().__init__(config)
     self.dag_generator = DAGGenerator()
     self._airflow_client = None
class AirFlowScheduler(AbstractScheduler):
    def __init__(self, config: SchedulerConfig):
        super().__init__(config)
        self.dag_generator = DAGGenerator()
        self._airflow_client = None

    @classmethod
    def airflow_dag_id(cls, namespace, workflow_name):
        return '{}.{}'.format(namespace, workflow_name)

    @classmethod
    def dag_id_to_namespace_workflow(cls, dag_id: Text):
        tmp = dag_id.split('.')
        return tmp[0], tmp[1]

    @classmethod
    def parse_namespace_workflow_name(cls, dag_id: Text):
        index = dag_id.find('.')
        return dag_id[0:index - 1], dag_id[index + 1:]

    @classmethod
    def airflow_state_to_state(cls, state):
        if State.SUCCESS == state:
            return job_meta.State.FINISHED
        elif State.FAILED == state:
            return job_meta.State.FAILED
        elif State.RUNNING == state:
            return job_meta.State.RUNNING
        elif State.KILLING == state:
            return job_meta.State.KILLING
        elif State.KILLED == state or State.SHUTDOWN == state:
            return job_meta.State.KILLED
        else:
            return job_meta.State.INIT

    @classmethod
    def dag_exist(cls, dag_id):
        with create_session() as session:
            dag = session.query(DagModel).filter(
                DagModel.dag_id == dag_id).first()
            if dag is None:
                return False
            else:
                return True

    @classmethod
    def dagrun_exist(cls, run_id):
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == run_id).first()
            if dag_run is None:
                return False
            else:
                return True

    @property
    def airflow_client(self):
        if self._airflow_client is None:
            self._airflow_client = EventSchedulerClient(
                server_uri=self.config.notification_service_uri(),
                namespace=SCHEDULER_NAMESPACE)
        return self._airflow_client

    def submit_workflow(self,
                        workflow: Workflow,
                        project_desc: ProjectDesc,
                        args: Dict = None) -> WorkflowInfo:
        workflow_name = workflow.workflow_name
        dag_id = self.airflow_dag_id(project_desc.project_name,
                                     workflow.workflow_name)
        code_text = self.dag_generator.generator(workflow, dag_id, args)
        workflow.workflow_name = workflow_name
        deploy_path = self.config.properties().get('airflow_deploy_path')
        if deploy_path is None:
            raise Exception("airflow_deploy_path config not set!")
        if not os.path.exists(deploy_path):
            os.makedirs(deploy_path)
        airflow_file_path = os.path.join(deploy_path, dag_id + '.py')
        if os.path.exists(airflow_file_path):
            os.remove(airflow_file_path)
        with NamedTemporaryFile(mode='w+t',
                                prefix=dag_id,
                                suffix='.py',
                                dir='/tmp',
                                delete=False) as f:
            f.write(code_text)
        os.rename(f.name, airflow_file_path)
        self.airflow_client.trigger_parse_dag(airflow_file_path)
        return WorkflowInfo(namespace=project_desc.project_name,
                            workflow_name=workflow.workflow_name)

    def delete_workflow(self, project_name: Text,
                        workflow_name: Text) -> Optional[WorkflowInfo]:
        dag_id = self.airflow_dag_id(project_name, workflow_name)
        if not self.dag_exist(dag_id):
            return None
        deploy_path = self.config.properties().get('airflow_deploy_path')
        if deploy_path is None:
            raise Exception("airflow_deploy_path config not set!")
        airflow_file_path = os.path.join(deploy_path, dag_id + '.py')
        if os.path.exists(airflow_file_path):
            os.remove(airflow_file_path)

        # stop all workflow executions
        self.kill_all_workflow_execution(project_name, workflow_name)

        # clean db meta
        with create_session() as session:
            dag = session.query(DagModel).filter(
                DagModel.dag_id == dag_id).first()
            session.query(DagTag).filter(DagTag.dag_id == dag_id).delete()
            session.query(DagModel).filter(DagModel.dag_id == dag_id).delete()
            session.query(DagCode).filter(
                DagCode.fileloc_hash == DagCode.dag_fileloc_hash(
                    dag.fileloc)).delete()
            session.query(SerializedDagModel).filter(
                SerializedDagModel.dag_id == dag_id).delete()
            session.query(DagRun).filter(DagRun.dag_id == dag_id).delete()
            session.query(TaskState).filter(
                TaskState.dag_id == dag_id).delete()
            session.query(TaskInstance).filter(
                TaskInstance.dag_id == dag_id).delete()
            session.query(TaskExecution).filter(
                TaskExecution.dag_id == dag_id).delete()
        return WorkflowInfo(namespace=project_name,
                            workflow_name=workflow_name)

    def pause_workflow_scheduling(self, project_name: Text,
                                  workflow_name: Text) -> WorkflowInfo:
        dag_id = self.airflow_dag_id(project_name, workflow_name)
        DagModel.get_dagmodel(dag_id=dag_id).set_is_paused(is_paused=True)
        return WorkflowInfo(namespace=project_name,
                            workflow_name=workflow_name)

    def resume_workflow_scheduling(self, project_name: Text,
                                   workflow_name: Text) -> WorkflowInfo:
        dag_id = self.airflow_dag_id(project_name, workflow_name)
        DagModel.get_dagmodel(dag_id=dag_id).set_is_paused(is_paused=False)
        return WorkflowInfo(namespace=project_name,
                            workflow_name=workflow_name)

    def get_workflow(self, project_name: Text,
                     workflow_name: Text) -> Optional[WorkflowInfo]:
        dag_id = self.airflow_dag_id(project_name, workflow_name)
        with create_session() as session:
            dag = session.query(DagModel).filter(
                DagModel.dag_id == dag_id).first()
            if dag is None:
                return None
            else:
                return WorkflowInfo(namespace=project_name,
                                    workflow_name=workflow_name)

    def list_workflows(self, project_name: Text) -> List[WorkflowInfo]:
        with create_session() as session:
            dag_list = session.query(DagModel).filter(
                DagModel.dag_id.startswith('{}.'.format(project_name))).all()
            if dag_list is None:
                return []
            else:
                result = []
                for dag in dag_list:
                    ns, workflow_name = self.parse_namespace_workflow_name(
                        dag.dag_id)
                    result.append(
                        WorkflowInfo(namespace=project_name,
                                     workflow_name=workflow_name))
                return result

    def start_new_workflow_execution(
            self, project_name: Text,
            workflow_name: Text) -> Optional[WorkflowExecutionInfo]:
        dag_id = self.airflow_dag_id(project_name, workflow_name)
        deploy_path = self.config.properties().get('airflow_deploy_path')
        if deploy_path is None:
            raise Exception("airflow_deploy_path config not set!")
        if not self.dag_exist(dag_id):
            return None
        context: ExecutionContext = self.airflow_client.schedule_dag(dag_id)
        return WorkflowExecutionInfo(workflow_info=WorkflowInfo(
            namespace=project_name, workflow_name=workflow_name),
                                     execution_id=context.dagrun_id,
                                     state=job_meta.State.INIT)

    def kill_all_workflow_execution(
            self, project_name: Text,
            workflow_name: Text) -> List[WorkflowExecutionInfo]:
        workflow_execution_list = self.list_workflow_executions(
            project_name, workflow_name)
        for we in workflow_execution_list:
            if we.state == job_meta.State.RUNNING:
                self.kill_workflow_execution(we.execution_id)
        return workflow_execution_list

    def kill_workflow_execution(
            self, execution_id: Text) -> Optional[WorkflowExecutionInfo]:
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == execution_id).first()
            if dag_run is None:
                return None
            project_name, workflow_name = self.dag_id_to_namespace_workflow(
                dag_run.dag_id)
            context: ExecutionContext = ExecutionContext(execution_id)
            current_context = self.airflow_client.stop_dag_run(
                dag_run.dag_id, context)
            return WorkflowExecutionInfo(
                workflow_info=WorkflowInfo(namespace=project_name,
                                           workflow_name=workflow_name),
                execution_id=current_context.dagrun_id,
                state=job_meta.State.KILLING)

    def get_workflow_execution(
            self, execution_id: Text) -> Optional[WorkflowExecutionInfo]:
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == execution_id).first()
            if dag_run is None:
                return None
            else:
                state = self.airflow_state_to_state(dag_run.state)
                project_name, workflow_name = self.dag_id_to_namespace_workflow(
                    dag_run.dag_id)
                return WorkflowExecutionInfo(workflow_info=WorkflowInfo(
                    namespace=project_name, workflow_name=workflow_name),
                                             execution_id=dag_run.run_id,
                                             state=state)

    def list_workflow_executions(
            self, project_name: Text,
            workflow_name: Text) -> List[WorkflowExecutionInfo]:
        dag_id = self.airflow_dag_id(project_name, workflow_name)
        with create_session() as session:
            dagrun_list = session.query(DagRun).filter(
                DagRun.dag_id == dag_id).all()
            if dagrun_list is None:
                return []
            else:
                result = []
                for dagrun in dagrun_list:
                    state = self.airflow_state_to_state(dagrun.state)
                    result.append(
                        WorkflowExecutionInfo(workflow_info=WorkflowInfo(
                            namespace=project_name,
                            workflow_name=workflow_name),
                                              execution_id=dagrun.run_id,
                                              state=state))
                return result

    def start_job(self, job_name: Text,
                  execution_id: Text) -> Optional[JobInfo]:
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == execution_id).first()
            if dag_run is None:
                return None
            if dag_run.state != State.RUNNING:
                raise Exception(
                    'execution: {} state: {} can not trigger job.'.format(
                        execution_id, dag_run.state))
            task = dag_run.get_task_instance(job_name, session)
            if task is None:
                return None
            if task.state in State.unfinished:
                raise Exception('job:{} state: {} can not start!'.format(
                    job_name, task.state))
            self.airflow_client.schedule_task(
                dag_id=dag_run.dag_id,
                task_id=job_name,
                action=SchedulingAction.START,
                context=ExecutionContext(dagrun_id=dag_run.run_id))
            project_name, workflow_name = self.dag_id_to_namespace_workflow(
                dag_run.dag_id)
            return JobInfo(
                job_name=job_name,
                state=self.airflow_state_to_state(task.state),
                workflow_execution=WorkflowExecutionInfo(
                    workflow_info=WorkflowInfo(namespace=project_name,
                                               workflow_name=workflow_name),
                    execution_id=dag_run.run_id,
                    state=self.airflow_state_to_state(dag_run.state)))

    def stop_job(self, job_name: Text,
                 execution_id: Text) -> Optional[JobInfo]:
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == execution_id).first()
            if dag_run is None:
                return None
            if dag_run.state != State.RUNNING:
                raise Exception(
                    'execution: {} state: {} can not trigger job.'.format(
                        execution_id, dag_run.state))
            task = dag_run.get_task_instance(job_name, session)
            if task is None:
                return None
            if task.state != State.RUNNING:
                raise Exception('job:{} state: {} can not stop!'.format(
                    job_name, task.state))
            self.airflow_client.schedule_task(
                dag_id=dag_run.dag_id,
                task_id=job_name,
                action=SchedulingAction.STOP,
                context=ExecutionContext(dagrun_id=dag_run.run_id))
            project_name, workflow_name = self.dag_id_to_namespace_workflow(
                dag_run.dag_id)
            return JobInfo(
                job_name=job_name,
                state=self.airflow_state_to_state(task.state),
                workflow_execution=WorkflowExecutionInfo(
                    workflow_info=WorkflowInfo(namespace=project_name,
                                               workflow_name=workflow_name),
                    execution_id=dag_run.run_id,
                    state=self.airflow_state_to_state(dag_run.state)))

    def restart_job(self, job_name: Text,
                    execution_id: Text) -> Optional[JobInfo]:
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == execution_id).first()
            if dag_run is None:
                return None
            if dag_run.state != State.RUNNING:
                raise Exception(
                    'execution: {} state: {} can not trigger job.'.format(
                        execution_id, dag_run.state))
            task = dag_run.get_task_instance(job_name, session)
            if task is None:
                return None
            self.airflow_client.schedule_task(
                dag_id=dag_run.dag_id,
                task_id=job_name,
                action=SchedulingAction.RESTART,
                context=ExecutionContext(dagrun_id=dag_run.run_id))
            project_name, workflow_name = self.dag_id_to_namespace_workflow(
                dag_run.dag_id)
            return JobInfo(
                job_name=job_name,
                state=self.airflow_state_to_state(task.state),
                workflow_execution=WorkflowExecutionInfo(
                    workflow_info=WorkflowInfo(namespace=project_name,
                                               workflow_name=workflow_name),
                    execution_id=dag_run.run_id,
                    state=self.airflow_state_to_state(dag_run.state)))

    def get_job(self, job_name: Text, execution_id: Text) -> Optional[JobInfo]:
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == execution_id).first()
            if dag_run is None:
                return None
            task = session.query(TaskInstance).filter(
                TaskInstance.dag_id == dag_run.dag_id,
                TaskInstance.execution_date == dag_run.execution_date,
                TaskInstance.task_id == job_name).first()
            if task is None:
                return None
            else:
                project_name, workflow_name = self.dag_id_to_namespace_workflow(
                    dag_run.dag_id)
                return JobInfo(job_name=job_name,
                               state=self.airflow_state_to_state(task.state),
                               workflow_execution=WorkflowExecutionInfo(
                                   workflow_info=WorkflowInfo(
                                       namespace=project_name,
                                       workflow_name=workflow_name),
                                   execution_id=dag_run.run_id,
                                   state=self.airflow_state_to_state(
                                       dag_run.state)))

    def list_jobs(self, execution_id: Text) -> List[JobInfo]:
        with create_session() as session:
            dag_run = session.query(DagRun).filter(
                DagRun.run_id == execution_id).first()
            if dag_run is None:
                return None
            task_list = session.query(TaskInstance).filter(
                TaskInstance.dag_id == dag_run.dag_id,
                TaskInstance.execution_date == dag_run.execution_date).all()
            if task_list is None:
                return []
            else:
                result = []
                project_name, workflow_name = self.dag_id_to_namespace_workflow(
                    dag_run.dag_id)
                for task in task_list:
                    job = JobInfo(
                        job_name=task.task_id,
                        state=self.airflow_state_to_state(task.state),
                        workflow_execution=WorkflowExecutionInfo(
                            workflow_info=WorkflowInfo(
                                namespace=project_name,
                                workflow_name=workflow_name),
                            execution_id=dag_run.run_id,
                            state=self.airflow_state_to_state(dag_run.state)))
                    result.append(job)
                return result