Exemplo n.º 1
0
def reinit_airflow_sql_conn():
    from airflow.settings import configure_orm, configure_vars

    from dbnd._core.configuration.dbnd_config import config as dbnd_config

    configure_vars()
    # The webservers import this file from models.py with the default settings.
    configure_orm()
    # add query handler before every execute
    # this will print query, code line and stack trace
    if dbnd_config.getboolean("log", "sqlalchemy_trace"):
        from airflow import settings as airflow_settings
        from sqlalchemy import event

        from dbnd_airflow.db_utils import trace_sqlalchemy_query

        event.listen(airflow_settings.engine, "before_cursor_execute",
                     trace_sqlalchemy_query)

    # this will print query execution time
    from airflow import settings as airflow_settings
    from sqlalchemy import event

    from dbnd_airflow.db_utils import (
        profile_after_cursor_execute,
        profile_before_cursor_execute,
    )

    event.listen(airflow_settings.engine, "before_cursor_execute",
                 profile_before_cursor_execute)
    event.listen(airflow_settings.engine, "after_cursor_execute",
                 profile_after_cursor_execute)
Exemplo n.º 2
0
 def setUpClass(cls):
     settings.configure_orm()
     cls.session = settings.Session
     with conf_vars({("api", "auth_backend"): "tests.test_utils.remote_user_api_auth_backend"}):
         cls.app = app.create_app(testing=True)
     # TODO: Add new role for each view to test permission.
     create_user(cls.app, username="******", role="Admin")
Exemplo n.º 3
0
def main():
    # Inline the airflow imports because they cause the global config to be loaded
    from airflow.utils import timezone
    from airflow import jobs
    from airflow.configuration import conf
    from airflow.settings import configure_orm, Session

    configure_orm(disable_connection_pool=True)

    base_job_model = jobs.BaseJob
    scheduler_health_check_threshold = timedelta(
        seconds=conf.getint('scheduler', 'scheduler_health_check_threshold')
    )

    latest_scheduler_heartbeat = None
    try:

        latest_scheduler_heartbeat = (
            Session.query(func.max(base_job_model.latest_heartbeat))
            .filter(base_job_model.state == 'running', base_job_model.job_type == 'SchedulerJob')
            .scalar()
        )
    except Exception:
        pass

    if not latest_scheduler_heartbeat:
        status_code = 1
    else:
        if timezone.utcnow() - latest_scheduler_heartbeat <= scheduler_health_check_threshold:
            status_code = 0
        else:
            status_code = 1

    return status_code
Exemplo n.º 4
0
 def setUpClass(cls):
     settings.configure_orm()
     cls.session = settings.Session
     cls.app = application.create_app(testing=True)
     cls.appbuilder = cls.app.appbuilder  # pylint: disable=no-member
     cls.app.config['WTF_CSRF_ENABLED'] = False
     cls.security_manager = cls.appbuilder.sm
     cls.delete_roles()
 def setUp(self):
     self.app, self.appbuilder = application.create_app(session=Session, testing=True)
     self.app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///'
     self.app.config['SECRET_KEY'] = 'secret_key'
     self.app.config['CSRF_ENABLED'] = False
     self.app.config['WTF_CSRF_ENABLED'] = False
     self.client = self.app.test_client()
     settings.configure_orm()
     self.session = Session
Exemplo n.º 6
0
    def setUp(self):
        super().setUp()
        from airflow.www import app as application
        self.app, self.appbuilder = application.create_app(session=Session, testing=True)
        self.app.config['TESTING'] = True

        self.parser = cli.CLIFactory.get_parser()
        self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True)
        settings.configure_orm()
        self.session = Session
Exemplo n.º 7
0
 def setUp(self):
     conf.load_test_config()
     self.app, self.appbuilder = application.create_app(session=Session, testing=True)
     self.app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///'
     self.app.config['SECRET_KEY'] = 'secret_key'
     self.app.config['CSRF_ENABLED'] = False
     self.app.config['WTF_CSRF_ENABLED'] = False
     self.client = self.app.test_client()
     settings.configure_orm()
     self.session = Session
Exemplo n.º 8
0
def set_airflow_db(sql_alchemy_conn: str, fernet_key: str):
    with set_env(
            AIRFLOW__CORE__SQL_ALCHEMY_CONN=sql_alchemy_conn,
            AIRFLOW__CORE__FERNET_KEY=fernet_key,
    ):
        settings.configure_vars()
        settings.configure_orm()
        assert repr(settings.engine.url) == sql_alchemy_conn
        yield
    settings.configure_vars()
    settings.configure_orm()
Exemplo n.º 9
0
 def setUpClass(cls):
     settings.configure_orm()
     cls.session = settings.Session
     cls.app = application.create_app(testing=True)
     cls.appbuilder = cls.app.appbuilder  # pylint: disable=no-member
     cls.app.config['WTF_CSRF_ENABLED'] = False
     cls.security_manager = cls.appbuilder.sm
     cls.role_admin = cls.security_manager.find_role('Admin')
     cls.user = cls.appbuilder.sm.add_user('admin', 'admin', 'user',
                                           '*****@*****.**', cls.role_admin,
                                           'general')
Exemplo n.º 10
0
 def test_sql_alchemy_invalid_connect_args(self, mock_create_engine,
                                           mock_sessionmaker,
                                           mock_scoped_session,
                                           mock_setup_event_handlers):
     config = {
         ('core', 'sql_alchemy_connect_args'): 'does.not.exist',
         ('core', 'sql_alchemy_pool_enabled'): 'False'
     }
     with self.assertRaises(AirflowConfigException):
         with conf_vars(config):
             settings.configure_orm()
Exemplo n.º 11
0
def mock_airflow_db() -> ContextManager[AirflowDb]:
    with tempfile.TemporaryDirectory() as temp_dir:
        test_db_path = os.path.join(temp_dir, 'airflow.db')
        sql_alchemy_conn = f'sqlite:///{test_db_path}'
        with set_env(AIRFLOW__CORE__SQL_ALCHEMY_CONN=sql_alchemy_conn):
            settings.configure_vars()
            settings.configure_orm()
            assert repr(settings.engine.url) == sql_alchemy_conn
            initdb()
            yield AirflowDb(sql_alchemy_conn=sql_alchemy_conn)
    settings.configure_vars()
    settings.configure_orm()
Exemplo n.º 12
0
 def test_configure_orm_with_default_values(self, mock_create_engine,
                                            mock_sessionmaker,
                                            mock_scoped_session,
                                            mock_setup_event_handlers):
     settings.configure_orm()
     mock_create_engine.assert_called_once_with(settings.SQL_ALCHEMY_CONN,
                                                connect_args={},
                                                encoding='utf-8',
                                                max_overflow=10,
                                                pool_pre_ping=True,
                                                pool_recycle=1800,
                                                pool_size=5)
Exemplo n.º 13
0
def task_run(args, dag=None):
    """Runs a single task instance"""
    if dag:
        args.dag_id = dag.dag_id

    log = LoggingMixin().log

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if not args.pickle and not dag:
        dag = get_dag(args)
    elif not dag:
        with db.create_session() as session:
            log.info('Loading pickle id %s', args.pickle)
            dag_pickle = session.query(DagPickle).filter(
                DagPickle.id == args.pickle).first()
            if not dag_pickle:
                raise AirflowException("Who hid the pickle!? [missing pickle]")
            dag = dag_pickle.pickle

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.refresh_from_db()

    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()
    log.info("Running %s on host %s", ti, hostname)

    if args.interactive:
        _run(args, dag, ti)
    else:
        with redirect_stdout(ti.log, logging.INFO), redirect_stderr(
                ti.log, logging.WARN):
            _run(args, dag, ti)
    logging.shutdown()
Exemplo n.º 14
0
 def factory():
     app = create_app(testing=True)
     app.config["WTF_CSRF_ENABLED"] = False
     settings.configure_orm()
     security_manager = app.appbuilder.sm  # pylint: disable=no-member
     if not security_manager.find_user(username='******'):
         security_manager.add_user(
             username='******',
             first_name='test',
             last_name='test',
             email='*****@*****.**',
             role=security_manager.find_role('Admin'),
             password='******',
         )
     return app
Exemplo n.º 15
0
 def test_sql_alchemy_connect_args(self, mock_create_engine,
                                   mock_sessionmaker, mock_scoped_session,
                                   mock_setup_event_handlers):
     config = {
         ('core', 'sql_alchemy_connect_args'):
         'tests.core.test_sqlalchemy_config.SQL_ALCHEMY_CONNECT_ARGS',
         ('core', 'sql_alchemy_pool_enabled'): 'False'
     }
     with conf_vars(config):
         settings.configure_orm()
         mock_create_engine.assert_called_once_with(
             settings.SQL_ALCHEMY_CONN,
             connect_args=SQL_ALCHEMY_CONNECT_ARGS,
             poolclass=NullPool,
             encoding='utf-8')
Exemplo n.º 16
0
def task_run(args, dag=None):
    """Runs a single task instance"""

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException(
            "You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.refresh_from_db()

    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()
    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
            _run_task_by_selected_method(args, dag, ti)
    logging.shutdown()
def set_airflow_db(sql_alchemy_conn: Optional[str],
                   fernet_key: Optional[str]) -> ContextManager[AirflowDb]:
    env = {}
    if sql_alchemy_conn is not None:
        env['AIRFLOW__CORE__SQL_ALCHEMY_CONN'] = sql_alchemy_conn
    if fernet_key is not None:
        env['AIRFLOW__CORE__FERNET_KEY'] = fernet_key
    with set_env(**env):
        settings.configure_vars()
        settings.configure_orm()
        if sql_alchemy_conn is not None:
            assert str(
                settings.engine.url
            ) == sql_alchemy_conn, f'{settings.engine.url} != {sql_alchemy_conn}'
        yield AirflowDb(sql_alchemy_conn or settings.SQL_ALCHEMY_CONN)
    settings.configure_vars()
    settings.configure_orm()
Exemplo n.º 18
0
    def setUpClass(cls):
        settings.configure_orm()
        cls.session = settings.Session
        with conf_vars({
            ("api", "auth_backend"):
                "tests.test_utils.remote_user_api_auth_backend"
        }):
            cls.app = app.create_app(testing=True)

        create_user(
            cls.app,
            username="******",
            role_name="Test",
            permissions=[('can_read', 'Dag'), ('can_read', 'DagRun'),
                         ('can_read', 'Task')],
        )
        create_user(cls.app,
                    username="******",
                    role_name="TestNoPermissions")
Exemplo n.º 19
0
 def test_sql_alchemy_connect_args(self, mock_create_engine,
                                   mock_sessionmaker, mock_scoped_session,
                                   mock_setup_event_handlers):
     config = {
         (
             'core',
             'sql_alchemy_connect_args',
         ): 'tests.core.test_sqlalchemy_config.SQL_ALCHEMY_CONNECT_ARGS',
         ('core', 'sql_alchemy_pool_enabled'): 'False',
     }
     with conf_vars(config):
         settings.configure_orm()
         engine_args = {}
         if settings.SQL_ALCHEMY_CONN.startswith('mysql'):
             engine_args['isolation_level'] = 'READ COMMITTED'
         mock_create_engine.assert_called_once_with(
             settings.SQL_ALCHEMY_CONN,
             connect_args=SQL_ALCHEMY_CONNECT_ARGS,
             poolclass=NullPool,
             encoding='utf-8',
             **engine_args,
         )
Exemplo n.º 20
0
    def setUpClass(cls):
        settings.configure_orm()
        cls.session = settings.Session
        with conf_vars({
            ("api", "auth_backend"):
                "tests.test_utils.remote_user_api_auth_backend"
        }):
            cls.app = app.create_app(testing=True)

        create_user(
            cls.app,
            username="******",
            role_name="Test",
            permissions=[
                (permissions.ACTION_CAN_READ, permissions.RESOURCE_DAGS),
                (permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG_RUN),
                (permissions.ACTION_CAN_READ, permissions.RESOURCE_TASK),
            ],
        )
        create_user(cls.app,
                    username="******",
                    role_name="TestNoPermissions")
Exemplo n.º 21
0
def task_run(args, dag=None):
    """Runs a single task instance"""

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException("You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()

    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        if settings.DONOT_MODIFY_HANDLERS:
            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)
        else:
            # Get all the Handlers from 'airflow.task' logger
            # Add these handlers to the root logger so that we can get logs from
            # any custom loggers defined in the DAG
            airflow_logger_handlers = logging.getLogger('airflow.task').handlers
            root_logger = logging.getLogger()
            root_logger_handlers = root_logger.handlers

            # Remove all handlers from Root Logger to avoid duplicate logs
            for handler in root_logger_handlers:
                root_logger.removeHandler(handler)

            for handler in airflow_logger_handlers:
                root_logger.addHandler(handler)
            root_logger.setLevel(logging.getLogger('airflow.task').level)

            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)

            # We need to restore the handlers to the loggers as celery worker process
            # can call this command multiple times,
            # so if we don't reset this then logs from next task would go to the wrong place
            for handler in airflow_logger_handlers:
                root_logger.removeHandler(handler)
            for handler in root_logger_handlers:
                root_logger.addHandler(handler)

    logging.shutdown()
Exemplo n.º 22
0
 def setUpClass(cls):
     settings.configure_orm()
     cls.session = settings.Session
     cls.app = app.create_app(testing=True)
Exemplo n.º 23
0
 def setUp(self):
     super().setUp()
     settings.configure_orm()
     self.session = Session
     self._cleanup()
Exemplo n.º 24
0
def configured_session():
    settings.configure_orm()
    return Session
Exemplo n.º 25
0
    def _run_file_processor(
        result_channel: MultiprocessingConnection,
        parent_channel: MultiprocessingConnection,
        file_path: str,
        pickle_dags: bool,
        dag_ids: Optional[List[str]],
        thread_name: str,
        callback_requests: List[CallbackRequest],
    ) -> None:
        """
        Process the given file.

        :param result_channel: the connection to use for passing back the result
        :type result_channel: multiprocessing.Connection
        :param parent_channel: the parent end of the channel to close in the child
        :type parent_channel: multiprocessing.Connection
        :param file_path: the file to process
        :type file_path: str
        :param pickle_dags: whether to pickle the DAGs found in the file and
            save them to the DB
        :type pickle_dags: bool
        :param dag_ids: if specified, only examine DAG ID's that are
            in this list
        :type dag_ids: list[str]
        :param thread_name: the name to use for the process that is launched
        :type thread_name: str
        :param callback_requests: failure callback to execute
        :type callback_requests: List[airflow.utils.callback_requests.CallbackRequest]
        :return: the process that was launched
        :rtype: multiprocessing.Process
        """
        # This helper runs in the newly created process
        log: logging.Logger = logging.getLogger("airflow.processor")

        # Since we share all open FDs from the parent, we need to close the parent side of the pipe here in
        # the child, else it won't get closed properly until we exit.
        log.info("Closing parent pipe")

        parent_channel.close()
        del parent_channel

        set_context(log, file_path)
        setproctitle(f"airflow scheduler - DagFileProcessor {file_path}")

        try:
            # redirect stdout/stderr to log
            with redirect_stdout(StreamLogWriter(
                    log, logging.INFO)), redirect_stderr(
                        StreamLogWriter(log,
                                        logging.WARN)), Stats.timer() as timer:
                # Re-configure the ORM engine as there are issues with multiple processes
                settings.configure_orm()

                # Change the thread name to differentiate log lines. This is
                # really a separate process, but changing the name of the
                # process doesn't work, so changing the thread name instead.
                threading.current_thread().name = thread_name

                log.info("Started process (PID=%s) to work on %s", os.getpid(),
                         file_path)
                dag_file_processor = DagFileProcessor(dag_ids=dag_ids, log=log)
                result: Tuple[int, int] = dag_file_processor.process_file(
                    file_path=file_path,
                    pickle_dags=pickle_dags,
                    callback_requests=callback_requests,
                )
                result_channel.send(result)
            log.info("Processing %s took %.3f seconds", file_path,
                     timer.duration)
        except Exception:  # pylint: disable=broad-except
            # Log exceptions through the logging framework.
            log.exception("Got an exception! Propagating...")
            raise
        finally:
            # We re-initialized the ORM within this Process above so we need to
            # tear it down manually here
            settings.dispose_orm()

            result_channel.close()
Exemplo n.º 26
0
def task_run(args, dag=None):
    """Run a single task instance.

    Note that there must be at least one DagRun for this to start,
    i.e. it must have been scheduled and/or triggered previously.
    Alternatively, if you just need to run it for testing then use
    "airflow tasks test ..." command instead.
    """
    # Load custom airflow config

    if args.local and args.raw:
        raise AirflowException(
            "Option --raw and --local are mutually exclusive. "
            "Please remove one option to execute the command.")

    if args.raw:
        unsupported_options = [
            o for o in RAW_TASK_UNSUPPORTED_OPTION if getattr(args, o)
        ]

        if unsupported_options:
            unsupported_raw_task_flags = ', '.join(
                f'--{o}' for o in RAW_TASK_UNSUPPORTED_OPTION)
            unsupported_flags = ', '.join(f'--{o}'
                                          for o in unsupported_options)
            raise AirflowException(
                "Option --raw does not work with some of the other options on this command. "
                "You can't use --raw option and the following options: "
                f"{unsupported_raw_task_flags}. "
                f"You provided the option {unsupported_flags}. "
                "Delete it to execute the command.")
    if dag and args.pickle:
        raise AirflowException(
            "You cannot use the --pickle option when using DAG.cli() method.")
    if args.cfg_path:
        with open(args.cfg_path) as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    settings.MASK_SECRETS_IN_LOGS = True

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass
    task = dag.get_task(task_id=args.task_id)
    ti = _get_ti(task, args.execution_date_or_run_id)
    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()

    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        with _capture_task_logs(ti):
            _run_task_by_selected_method(args, dag, ti)
Exemplo n.º 27
0
    def setUpClass(cls) -> None:
        from airflow import settings

        settings.configure_orm()
Exemplo n.º 28
0
def session():
    settings.configure_orm()
    yield settings.Session