Exemplo n.º 1
0
    def log(self):
        """Return logger for a sensor instance object."""
        # The created log_id is used inside of smart sensor as the key to fetch
        # the corresponding in memory log handler.
        si = self.si
        si.raw = False  # Otherwise set_context will fail
        log_id = "-".join([
            si.dag_id, si.task_id,
            si.execution_date.strftime("%Y_%m_%dT%H_%M_%S_%f"),
            str(si.try_number)
        ])
        logger = logging.getLogger(f'airflow.task.{log_id}')

        if len(logger.handlers) == 0:
            handler = self.create_new_task_handler()
            logger.addHandler(handler)
            set_context(logger, si)

            line_break = "-" * 120
            logger.info(line_break)
            logger.info(
                "Processing sensor task %s in smart sensor service on host: %s",
                self.ti_key, get_hostname())
            logger.info(line_break)
        return logger
Exemplo n.º 2
0
    def _run_task_instance(self, ti, mark_success, pool, session=None):
        # set proper state and try number to keep logger in sync
        if isinstance(ti, SimpleTaskInstance):
            from airflow.models import TaskInstance

            dag = self.dag_bag.get_dag(ti.dag_id)
            task = dag.get_task(ti.task_id)
            ti = TaskInstance(task, ti.execution_date)
        ti.state = State.RUNNING
        ti._try_number += 1
        # let save state
        session.merge(ti)
        session.commit()
        # backward compatible with airflow loggers
        from airflow.utils.log import logging_mixin

        logging_mixin.set_context(logging.root, ti)

        try:
            ti._run_raw_task(mark_success=mark_success,
                             job_id=ti.job_id,
                             pool=pool)
        finally:
            for handler in logging.root.handlers:
                if handler.name == "task":
                    handler.close()
    def test_file_task_handler(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")
        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='task_for_testing_file_log_handler',
            dag=dag,
            python_callable=task_callable,
            provide_context=True
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)

        logger = ti.log
        ti.log.disabled = False

        file_handler = next((handler for handler in logger.handlers
                             if handler.name == FILE_TASK_HANDLER), None)
        self.assertIsNotNone(file_handler)

        set_context(logger, ti)
        self.assertIsNotNone(file_handler.handler)
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        self.assertTrue(os.path.isfile(log_filename))
        self.assertTrue(log_filename.endswith("1.log"), log_filename)

        ti.run(ignore_ti_state=True)

        file_handler.flush()
        file_handler.close()

        self.assertTrue(hasattr(file_handler, 'read'))
        # Return value of read must be a tuple of list and list.
        logs, metadatas = file_handler.read(ti)
        self.assertTrue(isinstance(logs, list))
        self.assertTrue(isinstance(metadatas, list))
        self.assertEqual(len(logs), 1)
        self.assertEqual(len(logs), len(metadatas))
        self.assertTrue(isinstance(metadatas[0], dict))
        target_re = r'\n\[[^\]]+\] {test_log_handlers.py:\d+} INFO - test\n'

        # We should expect our log line from the callable above to appear in
        # the logs we read back
        six.assertRegex(
            self,
            logs[0],
            target_re,
            "Logs were " + str(logs)
        )

        # Remove the generated tmp log file.
        os.remove(log_filename)
Exemplo n.º 4
0
    def test_file_task_handler(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")
        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='task_for_testing_file_log_handler',
            dag=dag,
            python_callable=task_callable,
            provide_context=True
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)

        logger = ti.log
        ti.log.disabled = False

        file_handler = next((handler for handler in logger.handlers
                             if handler.name == FILE_TASK_HANDLER), None)
        self.assertIsNotNone(file_handler)

        set_context(logger, ti)
        self.assertIsNotNone(file_handler.handler)
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        self.assertTrue(os.path.isfile(log_filename))
        self.assertTrue(log_filename.endswith("1.log"), log_filename)

        ti.run(ignore_ti_state=True)

        file_handler.flush()
        file_handler.close()

        self.assertTrue(hasattr(file_handler, 'read'))
        # Return value of read must be a tuple of list and list.
        logs, metadatas = file_handler.read(ti)
        self.assertTrue(isinstance(logs, list))
        self.assertTrue(isinstance(metadatas, list))
        self.assertEqual(len(logs), 1)
        self.assertEqual(len(logs), len(metadatas))
        self.assertTrue(isinstance(metadatas[0], dict))
        target_re = r'\n\[[^\]]+\] {test_log_handlers.py:\d+} INFO - test\n'

        # We should expect our log line from the callable above to appear in
        # the logs we read back
        six.assertRegex(
            self,
            logs[0],
            target_re,
            "Logs were " + str(logs)
        )

        # Remove the generated tmp log file.
        os.remove(log_filename)
    def test_formatter(self):
        dag = DAG('test_dag', start_date=DEFAULT_DATE)
        task = DummyOperator(task_id='test_task', dag=dag)
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)

        logger = ti.log
        ti.log.disabled = False
        handler = next((handler for handler in logger.handlers if handler.name == TASK_HANDLER), None)
        self.assertIsNotNone(handler)

        # setting the expected value of the formatter
        expected_formatter_value = "test_dag-test_task:" + handler.formatter._fmt
        set_context(logger, ti)
        self.assertEqual(expected_formatter_value, handler.formatter._fmt)
    def test_file_task_handler(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")

        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        dag.create_dagrun(run_type=DagRunType.MANUAL, state=State.RUNNING, execution_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='task_for_testing_file_log_handler',
            dag=dag,
            python_callable=task_callable,
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)

        logger = ti.log
        ti.log.disabled = False

        file_handler = next(
            (handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None
        )
        assert file_handler is not None

        set_context(logger, ti)
        assert file_handler.handler is not None
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        assert os.path.isfile(log_filename)
        assert log_filename.endswith("1.log"), log_filename

        ti.run(ignore_ti_state=True)

        file_handler.flush()
        file_handler.close()

        assert hasattr(file_handler, 'read')
        # Return value of read must be a tuple of list and list.
        logs, metadatas = file_handler.read(ti)
        assert isinstance(logs, list)
        assert isinstance(metadatas, list)
        assert len(logs) == 1
        assert len(logs) == len(metadatas)
        assert isinstance(metadatas[0], dict)
        target_re = r'\n\[[^\]]+\] {test_log_handlers.py:\d+} INFO - test\n'

        # We should expect our log line from the callable above to appear in
        # the logs we read back
        assert re.search(target_re, logs[0][0][-1]), "Logs were " + str(logs)

        # Remove the generated tmp log file.
        os.remove(log_filename)
    def test_set_context(self):
        handler1 = mock.MagicMock()
        handler2 = mock.MagicMock()
        parent = mock.MagicMock()
        parent.propagate = False
        parent.handlers = [handler1, ]
        log = mock.MagicMock()
        log.handlers = [handler2, ]
        log.parent = parent
        log.propagate = True

        value = "test"
        set_context(log, value)

        handler1.set_context.assert_called_with(value)
        handler2.set_context.assert_called_with(value)
    def test_set_context(self):
        handler1 = mock.MagicMock()
        handler2 = mock.MagicMock()
        parent = mock.MagicMock()
        parent.propagate = False
        parent.handlers = [handler1, ]
        log = mock.MagicMock()
        log.handlers = [handler2, ]
        log.parent = parent
        log.propagate = True

        value = "test"
        set_context(log, value)

        handler1.set_context.assert_called_with(value)
        handler2.set_context.assert_called_with(value)
Exemplo n.º 9
0
    def test_file_task_handler_when_ti_value_is_invalid(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")

        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        dag.create_dagrun(run_type=DagRunType.MANUAL, state=State.RUNNING, execution_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='task_for_testing_file_log_handler',
            dag=dag,
            python_callable=task_callable,
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)

        logger = ti.log
        ti.log.disabled = False

        file_handler = next(
            (handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None
        )
        assert file_handler is not None

        set_context(logger, ti)
        assert file_handler.handler is not None
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        assert os.path.isfile(log_filename)
        assert log_filename.endswith("1.log"), log_filename

        ti.run(ignore_ti_state=True)

        file_handler.flush()
        file_handler.close()

        assert hasattr(file_handler, 'read')
        # Return value of read must be a tuple of list and list.
        # passing invalid `try_number` to read function
        logs, metadatas = file_handler.read(ti, 0)
        assert isinstance(logs, list)
        assert isinstance(metadatas, list)
        assert len(logs) == 1
        assert len(logs) == len(metadatas)
        assert isinstance(metadatas[0], dict)
        assert logs[0][0][0] == "default_host"
        assert logs[0][0][1] == "Error fetching the logs. Try number 0 is invalid."

        # Remove the generated tmp log file.
        os.remove(log_filename)
    def test_file_task_handler_running(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")

        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='task_for_testing_file_log_handler',
            dag=dag,
            python_callable=task_callable,
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.try_number = 2
        ti.state = State.RUNNING

        logger = ti.log
        ti.log.disabled = False

        file_handler = next(
            (handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None
        )
        assert file_handler is not None

        set_context(logger, ti)
        assert file_handler.handler is not None
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        assert os.path.isfile(log_filename)
        assert log_filename.endswith("2.log"), log_filename

        logger.info("Test")

        # Return value of read must be a tuple of list and list.
        logs, metadatas = file_handler.read(ti)
        assert isinstance(logs, list)
        # Logs for running tasks should show up too.
        assert isinstance(logs, list)
        assert isinstance(metadatas, list)
        assert len(logs) == 2
        assert len(logs) == len(metadatas)
        assert isinstance(metadatas[0], dict)

        # Remove the generated tmp log file.
        os.remove(log_filename)
Exemplo n.º 11
0
    def test_file_task_handler_running(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")
        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='task_for_testing_file_log_handler',
            dag=dag,
            python_callable=task_callable,
            provide_context=True
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.try_number = 2
        ti.state = State.RUNNING

        logger = ti.log
        ti.log.disabled = False

        file_handler = next((handler for handler in logger.handlers
                             if handler.name == FILE_TASK_HANDLER), None)
        self.assertIsNotNone(file_handler)

        set_context(logger, ti)
        self.assertIsNotNone(file_handler.handler)
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        self.assertTrue(os.path.isfile(log_filename))
        self.assertTrue(log_filename.endswith("2.log"), log_filename)

        logger.info("Test")

        # Return value of read must be a tuple of list and list.
        logs, metadatas = file_handler.read(ti)
        self.assertTrue(isinstance(logs, list))
        # Logs for running tasks should show up too.
        self.assertTrue(isinstance(logs, list))
        self.assertTrue(isinstance(metadatas, list))
        self.assertEqual(len(logs), 2)
        self.assertEqual(len(logs), len(metadatas))
        self.assertTrue(isinstance(metadatas[0], dict))

        # Remove the generated tmp log file.
        os.remove(log_filename)
Exemplo n.º 12
0
    def test_file_task_handler_running(self):
        def task_callable(ti, **kwargs):
            ti.log.info("test")

        dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE)
        task = PythonOperator(task_id='task_for_testing_file_log_handler',
                              dag=dag,
                              python_callable=task_callable,
                              provide_context=True)
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.try_number = 2
        ti.state = State.RUNNING

        logger = ti.log
        ti.log.disabled = False

        file_handler = next((handler for handler in logger.handlers
                             if handler.name == FILE_TASK_HANDLER), None)
        self.assertIsNotNone(file_handler)

        set_context(logger, ti)
        self.assertIsNotNone(file_handler.handler)
        # We expect set_context generates a file locally.
        log_filename = file_handler.handler.baseFilename
        self.assertTrue(os.path.isfile(log_filename))
        self.assertTrue(log_filename.endswith("2.log"), log_filename)

        logger.info("Test")

        # Return value of read must be a list.
        logs = file_handler.read(ti)
        self.assertTrue(isinstance(logs, list))
        # Logs for running tasks should show up too.
        self.assertEqual(len(logs), 2)

        # Remove the generated tmp log file.
        os.remove(log_filename)
Exemplo n.º 13
0
    def _run_file_processor(
        result_channel: MultiprocessingConnection,
        parent_channel: MultiprocessingConnection,
        file_path: str,
        pickle_dags: bool,
        dag_ids: Optional[List[str]],
        thread_name: str,
        callback_requests: List[CallbackRequest],
    ) -> None:
        """
        Process the given file.

        :param result_channel: the connection to use for passing back the result
        :type result_channel: multiprocessing.Connection
        :param parent_channel: the parent end of the channel to close in the child
        :type parent_channel: multiprocessing.Connection
        :param file_path: the file to process
        :type file_path: str
        :param pickle_dags: whether to pickle the DAGs found in the file and
            save them to the DB
        :type pickle_dags: bool
        :param dag_ids: if specified, only examine DAG ID's that are
            in this list
        :type dag_ids: list[str]
        :param thread_name: the name to use for the process that is launched
        :type thread_name: str
        :param callback_requests: failure callback to execute
        :type callback_requests: List[airflow.utils.callback_requests.CallbackRequest]
        :return: the process that was launched
        :rtype: multiprocessing.Process
        """
        # This helper runs in the newly created process
        log: logging.Logger = logging.getLogger("airflow.processor")

        # Since we share all open FDs from the parent, we need to close the parent side of the pipe here in
        # the child, else it won't get closed properly until we exit.
        log.info("Closing parent pipe")

        parent_channel.close()
        del parent_channel

        set_context(log, file_path)
        setproctitle(f"airflow scheduler - DagFileProcessor {file_path}")

        try:
            # redirect stdout/stderr to log
            with redirect_stdout(StreamLogWriter(
                    log, logging.INFO)), redirect_stderr(
                        StreamLogWriter(log,
                                        logging.WARN)), Stats.timer() as timer:
                # Re-configure the ORM engine as there are issues with multiple processes
                settings.configure_orm()

                # Change the thread name to differentiate log lines. This is
                # really a separate process, but changing the name of the
                # process doesn't work, so changing the thread name instead.
                threading.current_thread().name = thread_name

                log.info("Started process (PID=%s) to work on %s", os.getpid(),
                         file_path)
                dag_file_processor = DagFileProcessor(dag_ids=dag_ids, log=log)
                result: Tuple[int, int] = dag_file_processor.process_file(
                    file_path=file_path,
                    pickle_dags=pickle_dags,
                    callback_requests=callback_requests,
                )
                result_channel.send(result)
            log.info("Processing %s took %.3f seconds", file_path,
                     timer.duration)
        except Exception:  # pylint: disable=broad-except
            # Log exceptions through the logging framework.
            log.exception("Got an exception! Propagating...")
            raise
        finally:
            # We re-initialized the ORM within this Process above so we need to
            # tear it down manually here
            settings.dispose_orm()

            result_channel.close()