Exemplo n.º 1
0
def register_deployed_dags_command(url: str = None,
                                   user: str = None,
                                   pwd: str = None,
                                   verbose: int = 0):
    # Full imports
    import logging as log
    import time

    # Partial imports
    from sqlalchemy.exc import DBAPIError, IntegrityError

    # Internal modules imports
    from cornflow_client.airflow.api import Airflow
    from ..models import DeployedDAG
    from cornflow_core.shared import db

    af_client = Airflow(url, user, pwd)
    max_attempts = 20
    attempts = 0
    while not af_client.is_alive() and attempts < max_attempts:
        attempts += 1
        if verbose == 1:
            print(f"Airflow is not reachable (attempt {attempts})")
        time.sleep(15)

    if not af_client.is_alive():
        if verbose == 1:
            print("Airflow is not reachable")
        return False

    dags_registered = [dag.id for dag in DeployedDAG.get_all_objects()]

    response = af_client.get_model_dags()
    dag_list = response.json()["dags"]

    processed_dags = [
        DeployedDAG({
            "id": dag["dag_id"],
            "description": dag["description"]
        }) for dag in dag_list if dag["dag_id"] not in dags_registered
    ]

    if len(processed_dags) > 0:
        db.session.bulk_save_objects(processed_dags)

    try:
        db.session.commit()
    except IntegrityError as e:
        db.session.rollback()
        log.error(f"Integrity error on deployed dags register: {e}")
    except DBAPIError as e:
        db.session.rollback()
        log.error(f"Unknown error on deployed dags register: {e}")

    if verbose == 1:
        if len(processed_dags) > 0:
            print(f"DAGs registered: {processed_dags}")
        else:
            print("No new DAGs")
    return True
Exemplo n.º 2
0
def update_schemas_command(url, user, pwd, verbose):
    import time

    from cornflow_client.airflow.api import Airflow

    af_client = Airflow(url, user, pwd)
    max_attempts = 20
    attempts = 0
    while not af_client.is_alive() and attempts < max_attempts:
        attempts += 1
        if verbose == 1:
            print(f"Airflow is not reachable (attempt {attempts})")
        time.sleep(15)

    if not af_client.is_alive():
        if verbose == 1:
            print("Airflow is not reachable")
        return False

    response = af_client.update_schemas()
    if response.status_code == 200:
        if verbose == 1:
            print("DAGs schemas updated")
    else:
        if verbose == 1:
            print("The DAGs schemas were not updated properly")

    return True
Exemplo n.º 3
0
 def run_update_all_schemas_until_finished(self):
     client = Airflow(url="http://localhost:8080", user="******", pwd="admin")
     response = client.consume_dag_run(dag_name="update_all_schemas", payload={})
     self.assertEqual(response.status_code, 200)
     data = response.json()
     finished = False
     while not finished:
         time.sleep(2)
         status = client.get_dag_run_status("update_all_schemas", data["dag_run_id"])
         state = status.json()["state"]
         finished = state != "running"
         print("STATUS OF update_all_schemas: {}".format(state))
     return client
Exemplo n.º 4
0
    def get(self, dag_name):
        """
        API method to get the input, output and config schemas for a given dag

        :return: A dictionary with a message and a integer with the HTTP status code
        :rtype: Tuple(dict, integer)
        """
        user = Auth().get_user_from_header(request.headers)
        permission = PermissionsDAG.check_if_has_permissions(user_id=user.id,
                                                             dag_id=dag_name)

        if permission:
            af_client = Airflow.from_config(current_app.config)
            if not af_client.is_alive():
                log.error(
                    "Airflow not accessible when getting schema {}".format(
                        dag_name))
                raise AirflowError(error="Airflow is not accessible")

            # try airflow and see if dag_name exists
            af_client.get_dag_info(dag_name)

            log.info("User gets schema {}".format(dag_name))
            # it exists: we try to get its schemas
            return af_client.get_schemas_for_dag_name(dag_name)
        else:
            raise NoPermission(
                error="User does not have permission to access this dag",
                status_code=403,
            )
 def test_connect_from_config(self):
     client = Airflow.from_config({
         "AIRFLOW_URL": "http://127.0.0.1:8080",
         "AIRFLOW_USER": "******",
         "AIRFLOW_PWD": "admin",
     })
     self.assertTrue(client.is_alive())
Exemplo n.º 6
0
 def post(self, idx):
     execution = ExecutionModel.get_one_object(user=self.get_user(),
                                               idx=idx)
     if execution is None:
         raise ObjectDoesNotExist()
     af_client = Airflow.from_config(current_app.config)
     if not af_client.is_alive():
         raise AirflowError(error="Airflow is not accessible")
     response = af_client.set_dag_run_to_fail(
         dag_name=execution.schema, dag_run_id=execution.dag_run_id)
     execution.update_state(EXEC_STATE_STOPPED)
     log.info(f"User {self.get_user()} stopped execution {idx}")
     return {"message": "The execution has been stopped"}, 200
Exemplo n.º 7
0
    def get(self, idx):
        """
        API method to get the status of the execution created by the user
        It requires authentication to be passed in the form of a token that has to be linked to
        an existing session (login) made by a user.

        :param str idx:  ID of the execution
        :return: A dictionary with a message (error if the execution does not exist or status of the execution)
            and an integer with the HTTP status code.
        :rtype: Tuple(dict, integer)
        """
        execution = self.data_model.get_one_object(user=self.get_user(),
                                                   idx=idx)
        if execution is None:
            raise ObjectDoesNotExist()
        if execution.state not in [EXEC_STATE_RUNNING, EXEC_STATE_UNKNOWN]:
            # we only care on asking airflow if the status is unknown or is running.
            return execution, 200

        def _raise_af_error(execution, error, state=EXEC_STATE_UNKNOWN):
            message = EXECUTION_STATE_MESSAGE_DICT[state]
            execution.update_state(state)
            raise AirflowError(error=error,
                               payload=dict(message=message, state=state))

        dag_run_id = execution.dag_run_id
        if not dag_run_id:
            # it's safe to say we will never get anything if we did not store the dag_run_id
            _raise_af_error(
                execution,
                state=EXEC_STATE_ERROR,
                error="The execution has no dag_run associated",
            )

        af_client = Airflow.from_config(current_app.config)
        if not af_client.is_alive():
            _raise_af_error(execution, "Airflow is not accessible")

        try:
            # TODO: get the dag_name from somewhere!
            response = af_client.get_dag_run_status(dag_name=execution.schema,
                                                    dag_run_id=dag_run_id)
        except AirflowError as err:
            _raise_af_error(execution,
                            f"Airflow responded with an error: {err}")

        data = response.json()
        state = AIRFLOW_TO_STATE_MAP.get(data["state"], EXEC_STATE_UNKNOWN)
        execution.update_state(state)
        return execution, 200
Exemplo n.º 8
0
 def get(self):
     af_client = Airflow.from_config(current_app.config)
     airflow_status = STATUS_HEALTHY
     cornflow_status = STATUS_HEALTHY
     if not af_client.is_alive():
         airflow_status = STATUS_UNHEALTHY
     try:
         db.engine.execute("SELECT 1")
     except Exception:
         cornflow_status = STATUS_UNHEALTHY
     return {
         "cornflow_status": cornflow_status,
         "airflow_status": airflow_status
     }
 def test_bad_connection(self):
     client = Airflow(url="http://127.0.0.1:8088",
                      user="******",
                      pwd="admin!")
     self.assertFalse(client.is_alive())
 def setUp(self):
     self.client = Airflow(url="http://127.0.0.1:8080",
                           user="******",
                           pwd="admin")
class TestAirflowClient(TestCase):
    def setUp(self):
        self.client = Airflow(url="http://127.0.0.1:8080",
                              user="******",
                              pwd="admin")

    def test_alive(self):
        self.assertTrue(self.client.is_alive())

    def test_connect_from_config(self):
        client = Airflow.from_config({
            "AIRFLOW_URL": "http://127.0.0.1:8080",
            "AIRFLOW_USER": "******",
            "AIRFLOW_PWD": "admin",
        })
        self.assertTrue(client.is_alive())

    def test_bad_connection(self):
        client = Airflow(url="http://127.0.0.1:8088",
                         user="******",
                         pwd="admin!")
        self.assertFalse(client.is_alive())

    def test_update_schemas(self):
        response = self.client.update_schemas()
        self.assertEqual(200, response.status_code)

    def test_update_dag_registry(self):
        response = self.client.update_dag_registry()
        self.assertEqual(200, response.status_code)

    def test_run_dag(self):
        data = _load_file(PULP_EXAMPLE)
        cf_client = CornFlow(url="http://127.0.0.1:5050/")
        cf_login = cf_client.login("user", "UserPassword1!")
        instance = cf_client.create_instance(data, "test_example",
                                             "test_description")
        execution = cf_client.create_execution(
            instance_id=instance["id"],
            config={
                "solver": "PULP_CBC_CMD",
                "timeLimit": 100
            },
            name="test_execution",
            description="execution_description",
            schema="solve_model_dag",
            run=False,
        )

        # Check that execution is not run
        status = cf_client.get_status(execution_id=execution["id"])
        self.assertEqual(-4, status["state"])

        # Run the execution
        response = self.client.run_dag(execution_id=execution["id"])
        self.assertEqual(200, response.status_code)
        self.assertIn("dag_run_id", response.json().keys())

        # Check that is optimal
        time.sleep(10)
        status = cf_client.get_status(execution_id=execution["id"])
        self.assertEqual(1, status["state"])
Exemplo n.º 12
0
    def post(self, **kwargs):
        """
        API method to create a new execution linked to an already existing instance
        It requires authentication to be passed in the form of a token that has to be linked to
        an existing session (login) made by a user

        :return: A dictionary with a message (error if authentication failed, error if data is not validated or
          the reference_id for the newly created execution if successful) and a integer wit the HTTP status code
        :rtype: Tuple(dict, integer)
        """
        # TODO: should validation should be done even if the execution is not going to be run?
        # TODO: should the schema field be cross valdiated with the instance schema field?
        config = current_app.config

        if "schema" not in kwargs:
            kwargs["schema"] = "solve_model_dag"
        # TODO: review the order of these two operations
        # Get dag config schema and validate it
        marshmallow_obj = get_schema(config, kwargs["schema"], "config")
        validate_and_continue(marshmallow_obj(), kwargs["config"])

        execution, status_code = self.post_list(data=kwargs)
        instance = InstanceModel.get_one_object(user=self.get_user(),
                                                idx=execution.instance_id)

        if instance is None:
            raise ObjectDoesNotExist(
                error="The instance to solve does not exist")

        # this allows testing without airflow interaction:
        if request.args.get("run", "1") == "0":
            execution.update_state(EXEC_STATE_NOT_RUN)
            return execution, 201

        # We now try to launch the task in airflow
        af_client = Airflow.from_config(config)
        if not af_client.is_alive():
            err = "Airflow is not accessible"
            log.error(err)
            execution.update_state(EXEC_STATE_ERROR_START)
            raise AirflowError(
                error=err,
                payload=dict(
                    message=EXECUTION_STATE_MESSAGE_DICT[
                        EXEC_STATE_ERROR_START],
                    state=EXEC_STATE_ERROR_START,
                ),
            )
        # ask airflow if dag_name exists
        schema = execution.schema
        schema_info = af_client.get_dag_info(schema)

        # Validate that instance and dag_name are compatible
        marshmallow_obj = get_schema(config, schema, INSTANCE_SCHEMA)
        validate_and_continue(marshmallow_obj(), instance.data)

        info = schema_info.json()
        if info["is_paused"]:
            err = "The dag exists but it is paused in airflow"
            log.error(err)
            execution.update_state(EXEC_STATE_ERROR_START)
            raise AirflowError(
                error=err,
                payload=dict(
                    message=EXECUTION_STATE_MESSAGE_DICT[
                        EXEC_STATE_ERROR_START],
                    state=EXEC_STATE_ERROR_START,
                ),
            )

        try:
            response = af_client.run_dag(execution.id, dag_name=schema)
        except AirflowError as err:
            error = "Airflow responded with an error: {}".format(err)
            log.error(error)
            execution.update_state(EXEC_STATE_ERROR)
            raise AirflowError(
                error=error,
                payload=dict(
                    message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR],
                    state=EXEC_STATE_ERROR,
                ),
            )

        # if we succeed, we register the dag_run_id in the execution table:
        af_data = response.json()
        execution.dag_run_id = af_data["dag_run_id"]
        execution.update_state(EXEC_STATE_RUNNING)
        log.info("User {} creates execution {}".format(self.get_user_id(),
                                                       execution.id))
        return execution, 201