Exemplo n.º 1
0
    def _prepare_db(self):
        dag = DAG(self.DAG_ID, start_date=self.DEFAULT_DATE)
        dag.sync_to_db()
        with create_session() as session:
            op = DummyOperator(task_id=self.TASK_ID, dag=dag)
            self.ti = TaskInstance(task=op, execution_date=self.DEFAULT_DATE)
            self.ti.try_number = 3

            session.merge(self.ti)
Exemplo n.º 2
0
 def _prepare_db(self):
     dagbag = self.app.dag_bag  # pylint: disable=no-member
     dag = DAG(self.DAG_ID, start_date=timezone.parse(self.default_time))
     dag.sync_to_db()
     dagbag.bag_dag(dag=dag, root_dag=dag)
     with create_session() as session:
         self.ti = TaskInstance(
             task=DummyOperator(task_id=self.TASK_ID, dag=dag),
             execution_date=timezone.parse(self.default_time),
         )
         self.ti.try_number = 1
         session.merge(self.ti)
class ComputeEngineApplicationScheduler():
    """
    ComputeEngineApplicationScheduler: A scheduler for compute engine's applications
    """
    def __init__(self):
        self.dag = None

    def __airflow_dag_init(self):
        dag_default_args = {
            'owner': 'airflow',
            'depends_on_past': False,
            'start_date': datetime.today(),
            'email': ['*****@*****.**'],
            'email_on_failure': False,
            'email_on_retry': False,
            'retries': 1,
            'retry_delay': timedelta(minutes=5),
        }
        self.dag = DAG(
            DAG_ID,
            default_args=dag_default_args,
            description='Compute Engine Application DAG',
            schedule_interval=timedelta(days=1),
        )

    def create_dag(self):
        """
        create_dag: create airflow dag for excution
        """
        self.__airflow_dag_init()
        ops_static_data_processor = self.create_operator(OPS_NAME_STATICDATAPROCESSOR)
        ops_dynamic_data_processor = self.create_operator(OPS_NAME_DYNAMICDATAPROCESSOR)
        ops_core_data_generator = self.create_operator(OPS_NAME_COREDATAGENERATOR)
        [ops_static_data_processor, ops_dynamic_data_processor] >> ops_core_data_generator # pylint: disable=pointless-statement
        self.dag.sync_to_db()
        return self.dag

    @staticmethod
    def __apache_spark_submit(**kwargs):
        opt_name = kwargs['opt_name']
        jar_dir = f"{os.environ['HOME']}/spark_playground/template/scala/target/scala-2.11"
        jar_name = "computeapplicationentrypoint_2.11-1.0.jar"
        try:
            submit_pipe = [
                f'{APACHE_SPARK_HOME}/spark-submit',
                '--class', 'com.huawei.compute.ComputeApplicationEntrypoint',
                '--master', 'local[*]',
                #'--driver-memory', f'4g',
                #'--executor-memory', f'4g',
                #'--executor-cores', 8,
                f'{jar_dir}/{jar_name}',
                f'{opt_name}',
            ]
            log_string(f"Submitting application with entrypoint: {opt_name}")
            log_string(f"Cmd parameters: {submit_pipe}")
            subprocess.call(submit_pipe)
        except Exception as err: # pylint: disable=broad-except
            log_string(f"Error when submitting application: {err}")
        log_string(f"Complete application submission with entrypoint: {opt_name}")

    def create_operator(self, opt_name):
        """
        create_operator: create airflow operator that attached to dag
        """
        opt = PythonOperator(
            task_id=f'{opt_name}',
            provide_context=True,
            python_callable=self.__apache_spark_submit,
            op_kwargs={'opt_name': opt_name},
            dag=self.dag)
        return opt