Exemplo n.º 1
0
    def _find_zombies(self, session):
        """
        Find zombie task instances, which are tasks haven't heartbeated for too long.
        :return: Zombie task instances in SimpleTaskInstance format.
        """
        now = timezone.utcnow()
        zombies = []
        if (now - self._last_zombie_query_time).total_seconds() \
                > self._zombie_query_interval:
            # to avoid circular imports
            from airflow.jobs import LocalTaskJob as LJ
            self.log.info("Finding 'running' jobs without a recent heartbeat")
            TI = airflow.models.TaskInstance
            limit_dttm = timezone.utcnow() - timedelta(
                seconds=self._zombie_threshold_secs)
            self.log.info("Failing jobs without heartbeat after %s", limit_dttm)

            tis = (
                session.query(TI)
                .join(LJ, TI.job_id == LJ.id)
                .filter(TI.state == State.RUNNING)
                .filter(
                    or_(
                        LJ.state != State.RUNNING,
                        LJ.latest_heartbeat < limit_dttm,
                    )
                ).all()
            )
            self._last_zombie_query_time = timezone.utcnow()
            for ti in tis:
                zombies.append(SimpleTaskInstance(ti))

        return zombies
Exemplo n.º 2
0
    def assert_tasks_on_executor(self, executor):
        # start the executor
        executor.start()

        success_command = ['true', 'some_parameter']
        fail_command = ['false', 'some_parameter']

        executor.execute_async(key='success', command=success_command)
        executor.execute_async(key='fail', command=fail_command)

        success_future = next(
            k for k, v in executor.futures.items() if v == 'success')
        fail_future = next(
            k for k, v in executor.futures.items() if v == 'fail')

        # wait for the futures to execute, with a timeout
        timeout = timezone.utcnow() + timedelta(seconds=30)
        while not (success_future.done() and fail_future.done()):
            if timezone.utcnow() > timeout:
                raise ValueError(
                    'The futures should have finished; there is probably '
                    'an error communciating with the Dask cluster.')

        # both tasks should have finished
        self.assertTrue(success_future.done())
        self.assertTrue(fail_future.done())

        # check task exceptions
        self.assertTrue(success_future.exception() is None)
        self.assertTrue(fail_future.exception() is not None)
    def test_file_transfer_no_intermediate_dir_error_get(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command="echo '{0}' > {1}".format(test_remote_file_content,
                                              self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag
            )
            self.assertIsNotNone(get_test_task)
            ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
    def heartbeat(self):
        """
        Override the scheduler heartbeat to determine when the test is complete
        """
        super(SchedulerMetricsJob, self).heartbeat()
        session = settings.Session()
        # Get all the relevant task instances
        TI = TaskInstance
        successful_tis = (
            session
            .query(TI)
            .filter(TI.dag_id.in_(DAG_IDS))
            .filter(TI.state.in_([State.SUCCESS]))
            .all()
        )
        session.commit()

        dagbag = DagBag(SUBDIR)
        dags = [dagbag.dags[dag_id] for dag_id in DAG_IDS]
        # the tasks in perf_dag_1 and per_dag_2 have a daily schedule interval.
        num_task_instances = sum([(timezone.utcnow() - task.start_date).days
                                 for dag in dags for task in dag.tasks])

        if (len(successful_tis) == num_task_instances or
                (timezone.utcnow() - self.start_date).total_seconds() >
                MAX_RUNTIME_SECS):
            if len(successful_tis) == num_task_instances:
                self.log.info("All tasks processed! Printing stats.")
            else:
                self.log.info("Test timeout reached. "
                                 "Printing available stats.")
            self.print_stats()
            set_dags_paused_state(True)
            sys.exit()
Exemplo n.º 5
0
    def test_dask_executor_functions(self):
        executor = DaskExecutor(cluster_address=self.cluster.scheduler_address)

        # start the executor
        executor.start()

        success_command = 'echo 1'
        fail_command = 'exit 1'

        executor.execute_async(key='success', command=success_command)
        executor.execute_async(key='fail', command=fail_command)

        success_future = next(
            k for k, v in executor.futures.items() if v == 'success')
        fail_future = next(
            k for k, v in executor.futures.items() if v == 'fail')

        # wait for the futures to execute, with a timeout
        timeout = timezone.utcnow() + timedelta(seconds=30)
        while not (success_future.done() and fail_future.done()):
            if timezone.utcnow() > timeout:
                raise ValueError(
                    'The futures should have finished; there is probably '
                    'an error communciating with the Dask cluster.')

        # both tasks should have finished
        self.assertTrue(success_future.done())
        self.assertTrue(fail_future.done())

        # check task exceptions
        self.assertTrue(success_future.exception() is None)
        self.assertTrue(fail_future.exception() is not None)
    def test_symlink_latest_log_directory(self):
        handler = FileProcessorHandler(base_log_folder=self.base_log_folder,
                                       filename_template=self.filename)
        handler.dag_dir = self.dag_dir

        date1 = (timezone.utcnow() + timedelta(days=1)).strftime("%Y-%m-%d")
        date2 = (timezone.utcnow() + timedelta(days=2)).strftime("%Y-%m-%d")

        p1 = os.path.join(self.base_log_folder, date1, "log1")
        p2 = os.path.join(self.base_log_folder, date1, "log2")

        if os.path.exists(p1):
            os.remove(p1)
        if os.path.exists(p2):
            os.remove(p2)

        link = os.path.join(self.base_log_folder, "latest")

        with freeze_time(date1):
            handler.set_context(filename=os.path.join(self.dag_dir, "log1"))
            self.assertTrue(os.path.islink(link))
            self.assertEqual(os.path.basename(os.readlink(link)), date1)
            self.assertTrue(os.path.exists(os.path.join(link, "log1")))

        with freeze_time(date2):
            handler.set_context(filename=os.path.join(self.dag_dir, "log2"))
            self.assertTrue(os.path.islink(link))
            self.assertEqual(os.path.basename(os.readlink(link)), date2)
            self.assertTrue(os.path.exists(os.path.join(link, "log2")))
 def test_should_fail_before_reschedule_date_multiple(self, find_for_task_instance):
     find_for_task_instance.return_value = [
         self._get_task_reschedule(utcnow() - timedelta(minutes=19)),
         self._get_task_reschedule(utcnow() - timedelta(minutes=9)),
         self._get_task_reschedule(utcnow() + timedelta(minutes=1)),
     ]
     ti = self._get_task_instance(State.UP_FOR_RESCHEDULE)
     self.assertFalse(ReadyToRescheduleDep().is_met(ti=ti))
Exemplo n.º 8
0
 def poke(self, context):
     self.log.info('Poking until weekday is in %s, Today is %s',
                   self.week_day,
                   WeekDay(timezone.utcnow().isoweekday()).name)
     if self.use_task_execution_day:
         return context['execution_date'].isoweekday() in self._week_day_num
     else:
         return timezone.utcnow().isoweekday() in self._week_day_num
Exemplo n.º 9
0
 def _print_stat(self):
     """
     Occasionally print out stats about how fast the files are getting processed
     """
     if ((timezone.utcnow() - self.last_stat_print_time).total_seconds() >
             self.print_stats_interval):
         if len(self._file_paths) > 0:
             self._log_file_processing_stats(self._file_paths)
         self.last_stat_print_time = timezone.utcnow()
Exemplo n.º 10
0
 def execute(self, context):
     started_at = timezone.utcnow()
     while not self.poke(context):
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             if self.soft_fail:
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
 def execute(self, context):
     started_at = timezone.utcnow()
     while not self.poke(context):
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             # If sensor is in soft fail mode but will be retried then
             # give it a chance and fail with timeout.
             # This gives the ability to set up non-blocking AND soft-fail sensors.
             if self.soft_fail and not context['ti'].is_eligible_to_retry():
                 self._do_skip_downstream_tasks(context)
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
Exemplo n.º 12
0
    def test_post_execute_hook(self):
        """
        Test that post_execute hook is called with the Operator's result.
        The result ('error') will cause an error to be raised and trapped.
        """

        class TestError(Exception):
            pass

        class TestOperator(PythonOperator):
            def post_execute(self, context, result):
                if result == 'error':
                    raise TestError('expected error.')

        dag = models.DAG(dag_id='test_post_execute_dag')
        task = TestOperator(
            task_id='test_operator',
            dag=dag,
            python_callable=lambda: 'error',
            owner='airflow',
            start_date=timezone.datetime(2017, 2, 1))
        ti = TI(task=task, execution_date=timezone.utcnow())

        with self.assertRaises(TestError):
            ti.run()
Exemplo n.º 13
0
    def test_xcom_pull_different_execution_date(self):
        """
        tests xcom fetch behavior with different execution dates, using
        both xcom_pull with "include_prior_dates" and without
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=timezone.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = timezone.utcnow()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        exec_date += datetime.timedelta(days=1)
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run()
        # We have set a new execution date (and did not pass in
        # 'include_prior_dates'which means this task should now have a cleared
        # xcom value
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
        # We *should* get a value using 'include_prior_dates'
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom',
                                      key=key,
                                      include_prior_dates=True),
                         value)
Exemplo n.º 14
0
def health():
    """
    An endpoint helping check the health status of the Airflow instance,
    including metadatabase and scheduler.
    """
    session = settings.Session()
    BJ = jobs.BaseJob
    payload = {}
    scheduler_health_check_threshold = timedelta(seconds=conf.getint('scheduler',
                                                                     'scheduler_health_check_threshold'
                                                                     ))

    latest_scheduler_heartbeat = None
    payload['metadatabase'] = {'status': 'healthy'}
    try:
        latest_scheduler_heartbeat = session.query(func.max(BJ.latest_heartbeat)). \
            filter(BJ.state == 'running', BJ.job_type == 'SchedulerJob'). \
            scalar()
    except Exception:
        payload['metadatabase']['status'] = 'unhealthy'

    if not latest_scheduler_heartbeat:
        scheduler_status = 'unhealthy'
    else:
        if timezone.utcnow() - latest_scheduler_heartbeat <= scheduler_health_check_threshold:
            scheduler_status = 'healthy'
        else:
            scheduler_status = 'unhealthy'

    payload['scheduler'] = {'status': scheduler_status,
                            'latest_scheduler_heartbeat': str(latest_scheduler_heartbeat)}

    return wwwutils.json_response(payload)
Exemplo n.º 15
0
    def _log_file_processing_stats(self, known_file_paths):
        """
        Print out stats about how files are getting processed.
        :param known_file_paths: a list of file paths that may contain Airflow
            DAG definitions
        :type known_file_paths: list[unicode]
        :return: None
        """

        # File Path: Path to the file containing the DAG definition
        # PID: PID associated with the process that's processing the file. May
        # be empty.
        # Runtime: If the process is currently running, how long it's been
        # running for in seconds.
        # Last Runtime: If the process ran before, how long did it take to
        # finish in seconds
        # Last Run: When the file finished processing in the previous run.
        headers = ["File Path",
                   "PID",
                   "Runtime",
                   "Last Runtime",
                   "Last Run"]

        rows = []
        for file_path in known_file_paths:
            last_runtime = self.get_last_runtime(file_path)
            processor_pid = self.get_pid(file_path)
            processor_start_time = self.get_start_time(file_path)
            runtime = ((timezone.utcnow() - processor_start_time).total_seconds()
                       if processor_start_time else None)
            last_run = self.get_last_finish_time(file_path)

            rows.append((file_path,
                         processor_pid,
                         runtime,
                         last_runtime,
                         last_run))

        # Sort by longest last runtime. (Can't sort None values in python3)
        rows = sorted(rows, key=lambda x: x[3] or 0.0)

        formatted_rows = []
        for file_path, pid, runtime, last_runtime, last_run in rows:
            formatted_rows.append((file_path,
                                   pid,
                                   "{:.2f}s".format(runtime)
                                   if runtime else None,
                                   "{:.2f}s".format(last_runtime)
                                   if last_runtime else None,
                                   last_run.strftime("%Y-%m-%dT%H:%M:%S")
                                   if last_run else None))
        log_str = ("\n" +
                   "=" * 80 +
                   "\n" +
                   "DAG File Processing Stats\n\n" +
                   tabulate(formatted_rows, headers=headers) +
                   "\n" +
                   "=" * 80)

        self.log.info(log_str)
    def test_with_skip_in_branch_downstream_dependencies2(self):
        self.branch_op = BranchPythonOperator(task_id='make_choice',
                                              dag=self.dag,
                                              python_callable=lambda: 'branch_2')

        self.branch_op >> self.branch_1 >> self.branch_2
        self.branch_op >> self.branch_2
        self.dag.clear()

        dr = self.dag.create_dagrun(
            run_id="manual__",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEqual(ti.state, State.SKIPPED)
            elif ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.NONE)
            else:
                raise Exception
Exemplo n.º 17
0
    def test_xcom_pull_after_success(self):
        """
        tests xcom set/clear relative to a task in a 'success' rerun scenario
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=timezone.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = timezone.utcnow()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        # The second run and assert is to handle AIRFLOW-131 (don't clear on
        # prior success)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)

        # Test AIRFLOW-703: Xcom shouldn't be cleared if the task doesn't
        # execute, even if dependencies are ignored
        ti.run(ignore_all_deps=True, mark_success=True)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        # Xcom IS finally cleared once task has executed
        ti.run(ignore_all_deps=True)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
 def _make_dag_run(self):
     return self.dag.create_dagrun(
         run_id='manual__',
         start_date=timezone.utcnow(),
         execution_date=DEFAULT_DATE,
         state=State.RUNNING
     )
Exemplo n.º 19
0
    def test_file_transfer_no_intermediate_dir_error_put(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as f:
            f.write(test_local_file_content)

        # Try to put test file to remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            put_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath_int_dir,
                operation=SFTPOperation.PUT,
                create_intermediate_dirs=False,
                dag=self.dag
            )
            self.assertIsNotNone(put_test_task)
            ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
Exemplo n.º 20
0
    def test_xcom_enable_pickle_type(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test2"
        dag_id = "test_dag2"
        task_id = "test_task2"

        configuration.set("core", "enable_xcom_pickling", "True")

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id,
                 task_id=task_id,
                 execution_date=execution_date)

        ret_value = XCom.get_one(key=key,
                                 dag_id=dag_id,
                                 task_id=task_id,
                                 execution_date=execution_date)

        self.assertEqual(ret_value, json_obj)

        session = settings.Session()
        ret_value = session.query(XCom).filter(XCom.key == key, XCom.dag_id == dag_id,
                                               XCom.task_id == task_id,
                                               XCom.execution_date == execution_date
                                               ).first().value

        self.assertEqual(ret_value, json_obj)
    def test_reschedule_with_test_mode(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=25,
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False])
        dr = self._make_dag_run()

        # poke returns False and AirflowRescheduleException is raised
        date1 = timezone.utcnow()
        with freeze_time(date1):
            for dt in self.dag.date_range(DEFAULT_DATE, end_date=DEFAULT_DATE):
                TaskInstance(sensor, dt).run(
                    ignore_ti_state=True,
                    test_mode=True)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # in test mode state is not modified
                self.assertEquals(ti.state, State.NONE)
                # in test mode no reschedule request is recorded
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 0)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
Exemplo n.º 22
0
    def test_xcom_get_many(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test4"
        dag_id1 = "test_dag4"
        task_id1 = "test_task4"
        dag_id2 = "test_dag5"
        task_id2 = "test_task5"

        configuration.set("core", "xcom_enable_pickling", "True")

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id1,
                 task_id=task_id1,
                 execution_date=execution_date)

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id2,
                 task_id=task_id2,
                 execution_date=execution_date)

        results = XCom.get_many(key=key,
                                execution_date=execution_date)

        for result in results:
            self.assertEqual(result.value, json_obj)
    def execute(self, context):
        # If the DAG Run is externally triggered, then return without
        # skipping downstream tasks
        if context['dag_run'] and context['dag_run'].external_trigger:
            self.log.info("Externally triggered DAG_Run: allowing execution to proceed.")
            return

        now = timezone.utcnow()
        left_window = context['dag'].following_schedule(
            context['execution_date'])
        right_window = context['dag'].following_schedule(left_window)
        self.log.info(
            'Checking latest only with left_window: %s right_window: %s now: %s',
            left_window, right_window, now
        )

        if not left_window < now <= right_window:
            self.log.info('Not latest execution, skipping downstream.')

            downstream_tasks = context['task'].get_flat_relatives(upstream=False)
            self.log.debug("Downstream task_ids %s", downstream_tasks)

            if downstream_tasks:
                self.skip(context['dag_run'],
                          context['ti'].execution_date,
                          downstream_tasks)

            self.log.info('Done.')
        else:
            self.log.info('Latest, allowing execution to proceed.')
    def test_soft_fail_with_reschedule(self):
        sensor = self._make_sensor(
            return_value=False,
            poke_interval=10,
            timeout=5,
            soft_fail=True,
            mode='reschedule')
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.NONE)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False, timeout occurs
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)
Exemplo n.º 25
0
    def test_with_dag_run(self):
        value = False
        dag = DAG('shortcircuit_operator_test_with_dag_run',
                  default_args={
                       'owner': 'airflow',
                       'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        logging.error("Tasks {}".format(dag.tasks))
        dr = dag.create_dagrun(
            run_id="manual__",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

        value = True
        dag.clear()
        dr.verify_integrity()
        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.NONE)
            else:
                raise
Exemplo n.º 26
0
 def test_check_and_change_state_before_execution_dep_not_met(self):
     dag = models.DAG(dag_id='test_check_and_change_state_before_execution')
     task = DummyOperator(task_id='task', dag=dag, start_date=DEFAULT_DATE)
     task2 = DummyOperator(task_id='task2', dag=dag, start_date=DEFAULT_DATE)
     task >> task2
     ti = TI(
         task=task2, execution_date=timezone.utcnow())
     self.assertFalse(ti._check_and_change_state_before_execution())
    def test_ok_with_custom_reschedule_exception(self):
        sensor = self._make_sensor(
            return_value=None,
            mode='reschedule')
        date1 = timezone.utcnow()
        date2 = date1 + timedelta(seconds=60)
        date3 = date1 + timedelta(seconds=120)
        sensor.poke = Mock(side_effect=[
            AirflowRescheduleException(date2),
            AirflowRescheduleException(date3),
            True,
        ])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date, date2)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False and task is re-scheduled
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify two rows in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 2)
                self.assertEquals(task_reschedules[1].start_date, date2)
                self.assertEquals(task_reschedules[1].reschedule_date, date3)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns True and task succeeds
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
    def test_s3_to_sftp_operation(self):
        # Setting
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # Test for creation of s3 bucket
        conn = boto3.client('s3')
        conn.create_bucket(Bucket=self.s3_bucket)
        self.assertTrue((self.s3_hook.check_for_bucket(self.s3_bucket)))

        with open(LOCAL_FILE_PATH, 'w') as f:
            f.write(test_remote_file_content)
        self.s3_hook.load_file(LOCAL_FILE_PATH, self.s3_key, bucket_name=BUCKET)

        # Check if object was created in s3
        objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket,
                                                   Prefix=self.s3_key)
        # there should be object found, and there should only be one object found
        self.assertEqual(len(objects_in_dest_bucket['Contents']), 1)

        # the object found should be consistent with dest_key specified earlier
        self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key)

        # get remote file to local
        run_task = S3ToSFTPOperator(
            s3_bucket=BUCKET,
            s3_key=S3_KEY,
            sftp_path=SFTP_PATH,
            sftp_conn_id=SFTP_CONN_ID,
            s3_conn_id=S3_CONN_ID,
            task_id=TASK_ID,
            dag=self.dag
        )
        self.assertIsNotNone(run_task)

        run_task.execute(None)

        # Check that the file is created remotely
        check_file_task = SSHOperator(
            task_id="test_check_file",
            ssh_hook=self.hook,
            command="cat {0}".format(self.sftp_path),
            do_xcom_push=True,
            dag=self.dag
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(),
            test_remote_file_content.encode('utf-8'))

        # Clean up after finishing with test
        conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key)
        conn.delete_bucket(Bucket=self.s3_bucket)
        self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
Exemplo n.º 29
0
    def test_ok_with_reschedule(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=25,
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False, False, True])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date,
                                  date1 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False and task is re-scheduled
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE)
                # verify two rows in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 2)
                self.assertEquals(task_reschedules[1].start_date, date2)
                self.assertEquals(task_reschedules[1].reschedule_date,
                                  date2 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns True and task succeeds
        date3 = date2 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
Exemplo n.º 30
0
    def _refresh_dag_dir(self):
        """
        Refresh file paths from dag dir if we haven't done it for too long.
        """
        elapsed_time_since_refresh = (timezone.utcnow() -
                                      self.last_dag_dir_refresh_time).total_seconds()
        if elapsed_time_since_refresh > self.dag_dir_list_interval:
            # Build up a list of Python files that could contain DAGs
            self.log.info("Searching for files in %s", self._dag_directory)
            self._file_paths = list_py_file_paths(self._dag_directory)
            self.last_dag_dir_refresh_time = timezone.utcnow()
            self.log.info("There are %s files in %s", len(self._file_paths), self._dag_directory)
            self.set_file_paths(self._file_paths)

            try:
                self.log.debug("Removing old import errors")
                self.clear_nonexistent_import_errors()
            except Exception:
                self.log.exception("Error removing old import errors")
Exemplo n.º 31
0
    def update_state(self, session: Session = None) -> List[TI]:
        """
        Determines the overall state of the DagRun based on the state
        of its TaskInstances.

        :param session: Sqlalchemy ORM Session
        :type session: Session
        :return: ready_tis: the tis that can be scheduled in the current loop
        :rtype ready_tis: list[airflow.models.TaskInstance]
        """

        dag = self.get_dag()
        ready_tis: List[TI] = []
        tis = list(
            self.get_task_instances(session=session,
                                    state=State.task_states +
                                    (State.SHUTDOWN, )))
        self.log.debug("number of tis tasks for %s: %s task(s)", self,
                       len(tis))
        for ti in tis:
            ti.task = dag.get_task(ti.task_id)

        start_dttm = timezone.utcnow()
        unfinished_tasks = [t for t in tis if t.state in State.unfinished()]
        finished_tasks = [
            t for t in tis
            if t.state in State.finished() + [State.UPSTREAM_FAILED]
        ]
        none_depends_on_past = all(not t.task.depends_on_past
                                   for t in unfinished_tasks)
        none_task_concurrency = all(t.task.task_concurrency is None
                                    for t in unfinished_tasks)
        if unfinished_tasks:
            scheduleable_tasks = [
                ut for ut in unfinished_tasks
                if ut.state in SCHEDULEABLE_STATES
            ]
            self.log.debug("number of scheduleable tasks for %s: %s task(s)",
                           self, len(scheduleable_tasks))
            ready_tis, changed_tis = self._get_ready_tis(
                scheduleable_tasks, finished_tasks, session)
            self.log.debug("ready tis length for %s: %s task(s)", self,
                           len(ready_tis))
            if none_depends_on_past and none_task_concurrency:
                # small speed up
                are_runnable_tasks = ready_tis or self._are_premature_tis(
                    unfinished_tasks, finished_tasks, session) or changed_tis

        duration = (timezone.utcnow() - start_dttm)
        Stats.timing("dagrun.dependency-check.{}".format(self.dag_id),
                     duration)

        leaf_task_ids = {t.task_id for t in dag.leaves}
        leaf_tis = [ti for ti in tis if ti.task_id in leaf_task_ids]

        # if all roots finished and at least one failed, the run failed
        if not unfinished_tasks and any(
                leaf_ti.state in {State.FAILED, State.UPSTREAM_FAILED}
                for leaf_ti in leaf_tis):
            self.log.error('Marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='task_failure',
                                session=session)

        # if all leafs succeeded and no unfinished tasks, the run succeeded
        elif not unfinished_tasks and all(
                leaf_ti.state in {State.SUCCESS, State.SKIPPED}
                for leaf_ti in leaf_tis):
            self.log.info('Marking run %s successful', self)
            self.set_state(State.SUCCESS)
            dag.handle_callback(self,
                                success=True,
                                reason='success',
                                session=session)

        # if *all tasks* are deadlocked, the run failed
        elif (unfinished_tasks and none_depends_on_past
              and none_task_concurrency and not are_runnable_tasks):
            self.log.error('Deadlock; marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='all_tasks_deadlocked',
                                session=session)

        # finally, if the roots aren't done, the dag is still running
        else:
            self.set_state(State.RUNNING)

        self._emit_duration_stats_for_finished_state()

        # todo: determine we want to use with_for_update to make sure to lock the run
        session.merge(self)
        session.commit()

        return ready_tis
Exemplo n.º 32
0
def task_instance_link(attr):
    """Generates a URL to the Graph view for a TaskInstance."""
    dag_id = attr.get('dag_id')
    task_id = attr.get('task_id')
    execution_date = attr.get('dag_run.execution_date') or attr.get('execution_date') or timezone.utcnow()
    url = url_for('Airflow.task', dag_id=dag_id, task_id=task_id, execution_date=execution_date.isoformat())
    url_root = url_for(
        'Airflow.graph', dag_id=dag_id, root=task_id, execution_date=execution_date.isoformat()
    )
    return Markup(
        """
        <span style="white-space: nowrap;">
        <a href="{url}">{task_id}</a>
        <a href="{url_root}" title="Filter on this task and upstream">
        <span class="material-icons" style="margin-left:0;"
            aria-hidden="true">filter_alt</span>
        </a>
        </span>
        """
    ).format(url=url, task_id=task_id, url_root=url_root)
Exemplo n.º 33
0
    def _execute(self):
        self._enable_task_listeners()
        self.task_runner = get_task_runner(self)

        def signal_handler(signum, frame):
            """Setting kill signal handler"""
            self.log.error("Received SIGTERM. Terminating subprocesses")
            self.task_runner.terminate()
            self.handle_task_exit(128 + signum)

        signal.signal(signal.SIGTERM, signal_handler)

        if not self.task_instance.check_and_change_state_before_execution(
                mark_success=self.mark_success,
                ignore_all_deps=self.ignore_all_deps,
                ignore_depends_on_past=self.ignore_depends_on_past,
                ignore_task_deps=self.ignore_task_deps,
                ignore_ti_state=self.ignore_ti_state,
                job_id=self.id,
                pool=self.pool,
                external_executor_id=self.external_executor_id,
        ):
            self.log.info("Task is not able to be run")
            return

        try:
            self.task_runner.start()

            heartbeat_time_limit = conf.getint(
                'scheduler', 'scheduler_zombie_task_threshold')

            # LocalTaskJob should not run callbacks, which are handled by TaskInstance._run_raw_task
            # 1, LocalTaskJob does not parse DAG, thus cannot run callbacks
            # 2, The run_as_user of LocalTaskJob is likely not same as the TaskInstance._run_raw_task.
            # When run_as_user is specified, the process owner of the LocalTaskJob must be sudoable.
            # It is not secure to run callbacks with sudoable users.

            # If _run_raw_task receives SIGKILL, scheduler will mark it as zombie and invoke callbacks
            # If LocalTaskJob receives SIGTERM, LocalTaskJob passes SIGTERM to _run_raw_task
            # If the state of task_instance is changed, LocalTaskJob sends SIGTERM to _run_raw_task
            while not self.terminating:
                # Monitor the task to see if it's done. Wait in a syscall
                # (`os.wait`) for as long as possible so we notice the
                # subprocess finishing as quick as we can
                max_wait_time = max(
                    0,  # Make sure this value is never negative,
                    min(
                        (heartbeat_time_limit -
                         (timezone.utcnow() -
                          self.latest_heartbeat).total_seconds() * 0.75),
                        self.heartrate,
                    ),
                )

                return_code = self.task_runner.return_code(
                    timeout=max_wait_time)
                if return_code is not None:
                    self.handle_task_exit(return_code)
                    return

                self.heartbeat()

                # If it's been too long since we've heartbeat, then it's possible that
                # the scheduler rescheduled this task, so kill launched processes.
                # This can only really happen if the worker can't read the DB for a long time
                time_since_last_heartbeat = (
                    timezone.utcnow() - self.latest_heartbeat).total_seconds()
                if time_since_last_heartbeat > heartbeat_time_limit:
                    Stats.incr('local_task_job_prolonged_heartbeat_failure', 1,
                               1)
                    self.log.error("Heartbeat time limit exceeded!")
                    raise AirflowException(
                        f"Time since last heartbeat({time_since_last_heartbeat:.2f}s) exceeded limit "
                        f"({heartbeat_time_limit}s).")
        finally:
            self.on_kill()
Exemplo n.º 34
0
    def update_state(
        self,
        session: Session = NEW_SESSION,
        execute_callbacks: bool = True
    ) -> Tuple[List[TI], Optional[DagCallbackRequest]]:
        """
        Determines the overall state of the DagRun based on the state
        of its TaskInstances.

        :param session: Sqlalchemy ORM Session
        :param execute_callbacks: Should dag callbacks (success/failure, SLA etc) be invoked
            directly (default: true) or recorded as a pending request in the ``callback`` property
        :return: Tuple containing tis that can be scheduled in the current loop & `callback` that
            needs to be executed
        """
        # Callback to execute in case of Task Failures
        callback: Optional[DagCallbackRequest] = None

        start_dttm = timezone.utcnow()
        self.last_scheduling_decision = start_dttm
        with Stats.timer(f"dagrun.dependency-check.{self.dag_id}"):
            dag = self.get_dag()
            info = self.task_instance_scheduling_decisions(session)

            tis = info.tis
            schedulable_tis = info.schedulable_tis
            changed_tis = info.changed_tis
            finished_tis = info.finished_tis
            unfinished_tis = info.unfinished_tis

            none_depends_on_past = all(not t.task.depends_on_past
                                       for t in unfinished_tis)
            none_task_concurrency = all(t.task.max_active_tis_per_dag is None
                                        for t in unfinished_tis)
            none_deferred = all(t.state != State.DEFERRED
                                for t in unfinished_tis)

            if unfinished_tis and none_depends_on_past and none_task_concurrency and none_deferred:
                # small speed up
                are_runnable_tasks = (schedulable_tis
                                      or self._are_premature_tis(
                                          unfinished_tis, finished_tis,
                                          session) or changed_tis)

        leaf_task_ids = {t.task_id for t in dag.leaves}
        leaf_tis = [
            ti for ti in tis if ti.task_id in leaf_task_ids
            if ti.state != TaskInstanceState.REMOVED
        ]

        # if all roots finished and at least one failed, the run failed
        if not unfinished_tis and any(leaf_ti.state in State.failed_states
                                      for leaf_ti in leaf_tis):
            self.log.error('Marking run %s failed', self)
            self.set_state(DagRunState.FAILED)
            if execute_callbacks:
                dag.handle_callback(self,
                                    success=False,
                                    reason='task_failure',
                                    session=session)
            elif dag.has_on_failure_callback:
                callback = DagCallbackRequest(
                    full_filepath=dag.fileloc,
                    dag_id=self.dag_id,
                    run_id=self.run_id,
                    is_failure_callback=True,
                    msg='task_failure',
                )

        # if all leaves succeeded and no unfinished tasks, the run succeeded
        elif not unfinished_tis and all(leaf_ti.state in State.success_states
                                        for leaf_ti in leaf_tis):
            self.log.info('Marking run %s successful', self)
            self.set_state(DagRunState.SUCCESS)
            if execute_callbacks:
                dag.handle_callback(self,
                                    success=True,
                                    reason='success',
                                    session=session)
            elif dag.has_on_success_callback:
                callback = DagCallbackRequest(
                    full_filepath=dag.fileloc,
                    dag_id=self.dag_id,
                    run_id=self.run_id,
                    is_failure_callback=False,
                    msg='success',
                )

        # if *all tasks* are deadlocked, the run failed
        elif (unfinished_tis and none_depends_on_past and none_task_concurrency
              and none_deferred and not are_runnable_tasks):
            self.log.error('Deadlock; marking run %s failed', self)
            self.set_state(DagRunState.FAILED)
            if execute_callbacks:
                dag.handle_callback(self,
                                    success=False,
                                    reason='all_tasks_deadlocked',
                                    session=session)
            elif dag.has_on_failure_callback:
                callback = DagCallbackRequest(
                    full_filepath=dag.fileloc,
                    dag_id=self.dag_id,
                    run_id=self.run_id,
                    is_failure_callback=True,
                    msg='all_tasks_deadlocked',
                )

        # finally, if the roots aren't done, the dag is still running
        else:
            self.set_state(DagRunState.RUNNING)

        if self._state == DagRunState.FAILED or self._state == DagRunState.SUCCESS:
            msg = ("DagRun Finished: dag_id=%s, execution_date=%s, run_id=%s, "
                   "run_start_date=%s, run_end_date=%s, run_duration=%s, "
                   "state=%s, external_trigger=%s, run_type=%s, "
                   "data_interval_start=%s, data_interval_end=%s, dag_hash=%s")
            self.log.info(
                msg,
                self.dag_id,
                self.execution_date,
                self.run_id,
                self.start_date,
                self.end_date,
                (self.end_date - self.start_date).total_seconds()
                if self.start_date and self.end_date else None,
                self._state,
                self.external_trigger,
                self.run_type,
                self.data_interval_start,
                self.data_interval_end,
                self.dag_hash,
            )
            session.flush()

        self._emit_true_scheduling_delay_stats_for_finished_state(finished_tis)
        self._emit_duration_stats_for_finished_state()

        session.merge(self)
        # We do not flush here for performance reasons(It increases queries count by +20)

        return schedulable_tis, callback
Exemplo n.º 35
0
    def collect_dags(
        self,
        dag_folder=None,
        only_if_updated=True,
        include_examples=configuration.conf.getboolean('core',
                                                       'LOAD_EXAMPLES'),
        safe_mode=configuration.conf.getboolean('core',
                                                'DAG_DISCOVERY_SAFE_MODE')):
        """
        Given a file path or a folder, this method looks for python modules,
        imports them and adds them to the dagbag collection.

        Note that if a ``.airflowignore`` file is found while processing
        the directory, it will behave much like a ``.gitignore``,
        ignoring files that match any of the regex patterns specified
        in the file.

        **Note**: The patterns in .airflowignore are treated as
        un-anchored regexes, not shell-like glob patterns.
        """
        start_dttm = timezone.utcnow()
        dag_folder = dag_folder or self.dag_folder
        # Used to store stats around DagBag processing
        stats = []
        FileLoadStat = namedtuple('FileLoadStat',
                                  "file duration dag_num task_num dags")

        dag_folder = correct_maybe_zipped(dag_folder)

        dags_by_name = {}

        for filepath in list_py_file_paths(dag_folder,
                                           safe_mode=safe_mode,
                                           include_examples=include_examples):
            try:
                ts = timezone.utcnow()
                found_dags = self.process_file(filepath,
                                               only_if_updated=only_if_updated,
                                               safe_mode=safe_mode)
                dag_ids = [dag.dag_id for dag in found_dags]
                dag_id_names = str(dag_ids)

                td = timezone.utcnow() - ts
                td = td.total_seconds() + (float(td.microseconds) / 1000000)
                dags_by_name[dag_id_names] = dag_ids
                stats.append(
                    FileLoadStat(
                        filepath.replace(dag_folder, ''),
                        td,
                        len(found_dags),
                        sum([len(dag.tasks) for dag in found_dags]),
                        dag_id_names,
                    ))
            except Exception as e:
                self.log.exception(e)
        Stats.gauge('collect_dags',
                    (timezone.utcnow() - start_dttm).total_seconds(), 1)
        Stats.gauge('dagbag_size', len(self.dags), 1)
        Stats.gauge('dagbag_import_errors', len(self.import_errors), 1)
        self.dagbag_stats = sorted(stats,
                                   key=lambda x: x.duration,
                                   reverse=True)
        for file_stat in self.dagbag_stats:
            dag_ids = dags_by_name[file_stat.dags]
            if file_stat.dag_num >= 1:
                # if we found multiple dags per file, the stat is 'dag_id1 _ dag_id2'
                dag_names = '_'.join(dag_ids)
                Stats.timing('dag.loading-duration.{}'.format(dag_names),
                             file_stat.duration)
    def start(self):
        """
        Use multiple processes to parse and generate tasks for the
        DAGs in parallel. By processing them in separate processes,
        we can get parallelism and isolation from potentially harmful
        user code.
        """

        self.register_exit_signals()

        # Start a new process group
        os.setpgid(0, 0)

        self.log.info("Processing files using up to %s processes at a time ",
                      self._parallelism)
        self.log.info("Process each file at most once every %s seconds",
                      self._file_process_interval)
        self.log.info("Checking for new files in %s every %s seconds",
                      self._dag_directory, self.dag_dir_list_interval)

        # In sync mode we want timeout=None -- wait forever until a message is received
        if self._async_mode:
            poll_time = 0.0
        else:
            poll_time = None

        # Used to track how long it takes us to get once around every file in the DAG folder.
        self._parsing_start_time = timezone.utcnow()
        while True:
            loop_start_time = time.time()

            # pylint: disable=no-else-break
            if self._signal_conn.poll(poll_time):
                agent_signal = self._signal_conn.recv()
                self.log.debug("Received %s signal from DagFileProcessorAgent",
                               agent_signal)
                if agent_signal == DagParsingSignal.TERMINATE_MANAGER:
                    self.terminate()
                    break
                elif agent_signal == DagParsingSignal.END_MANAGER:
                    self.end()
                    sys.exit(os.EX_OK)
                elif agent_signal == DagParsingSignal.AGENT_HEARTBEAT:
                    # continue the loop to parse dags
                    pass
                elif isinstance(agent_signal, FailureCallbackRequest):
                    self._add_callback_to_queue(agent_signal)
                else:
                    raise AirflowException("Invalid message")
            elif not self._async_mode:
                # In "sync" mode we don't want to parse the DAGs until we
                # are told to (as that would open another connection to the
                # SQLite DB which isn't a good practice
                continue
            # pylint: enable=no-else-break
            self._refresh_dag_dir()
            self._find_zombies()  # pylint: disable=no-value-for-parameter

            self._kill_timed_out_processors()

            # Generate more file paths to process if we processed all the files
            # already.
            if not self._file_path_queue:
                self.emit_metrics()
                self.prepare_file_path_queue()

            self.start_new_processes()

            # Update number of loop iteration.
            self._num_run += 1

            simple_dags = self.collect_results()
            for simple_dag in simple_dags:
                self._signal_conn.send(simple_dag)

            if not self._async_mode:
                self.log.debug(
                    "Waiting for processors to finish since we're using sqlite"
                )
                # Wait until the running DAG processors are finished before
                # sending a DagParsingStat message back. This means the Agent
                # can tell we've got to the end of this iteration when it sees
                # this type of message
                self.wait_until_finished()

                # Collect anything else that has finished, but don't kick off any more processors
                simple_dags = self.collect_results()
                for simple_dag in simple_dags:
                    self._signal_conn.send(simple_dag)

            self._print_stat()

            all_files_processed = all(
                self.get_last_finish_time(x) is not None
                for x in self.file_paths)
            max_runs_reached = self.max_runs_reached()

            dag_parsing_stat = DagParsingStat(
                self._file_paths,
                max_runs_reached,
                all_files_processed,
            )
            self._signal_conn.send(dag_parsing_stat)

            if max_runs_reached:
                self.log.info(
                    "Exiting dag parsing loop as all files "
                    "have been processed %s times", self._max_runs)
                break

            if self._async_mode:
                loop_duration = time.time() - loop_start_time
                if loop_duration < 1:
                    poll_time = 1 - loop_duration
                else:
                    poll_time = 0.0
Exemplo n.º 37
0
    def update_state(self, session=None):
        """
        Determines the overall state of the DagRun based on the state
        of its TaskInstances.

        :return: State
        """

        dag = self.get_dag()

        tis = self.get_task_instances(session=session)
        self.log.debug("Updating state for %s considering %s task(s)", self,
                       len(tis))

        for ti in list(tis):
            # skip in db?
            if ti.state == State.REMOVED:
                tis.remove(ti)
            else:
                ti.task = dag.get_task(ti.task_id)

        # pre-calculate
        # db is faster
        start_dttm = timezone.utcnow()
        unfinished_tasks = self.get_task_instances(state=State.unfinished(),
                                                   session=session)
        none_depends_on_past = all(not t.task.depends_on_past
                                   for t in unfinished_tasks)
        none_task_concurrency = all(t.task.task_concurrency is None
                                    for t in unfinished_tasks)
        # small speed up
        if unfinished_tasks and none_depends_on_past and none_task_concurrency:
            # todo: this can actually get pretty slow: one task costs between 0.01-015s
            no_dependencies_met = True
            for ut in unfinished_tasks:
                # We need to flag upstream and check for changes because upstream
                # failures/re-schedules can result in deadlock false positives
                old_state = ut.state
                deps_met = ut.are_dependencies_met(dep_context=DepContext(
                    flag_upstream_failed=True,
                    ignore_in_retry_period=True,
                    ignore_in_reschedule_period=True),
                                                   session=session)
                if deps_met or old_state != ut.current_state(session=session):
                    no_dependencies_met = False
                    break

        duration = (timezone.utcnow() - start_dttm).total_seconds() * 1000
        Stats.timing("dagrun.dependency-check.{}".format(self.dag_id),
                     duration)

        root_ids = [t.task_id for t in dag.roots]
        roots = [t for t in tis if t.task_id in root_ids]

        # if all roots finished and at least one failed, the run failed
        if (not unfinished_tasks
                and any(r.state in (State.FAILED, State.UPSTREAM_FAILED)
                        for r in roots)):
            self.log.info('Marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='task_failure',
                                session=session)

        # if all roots succeeded and no unfinished tasks, the run succeeded
        elif not unfinished_tasks and all(
                r.state in (State.SUCCESS, State.SKIPPED) for r in roots):
            self.log.info('Marking run %s successful', self)
            self.set_state(State.SUCCESS)
            dag.handle_callback(self,
                                success=True,
                                reason='success',
                                session=session)

        # if *all tasks* are deadlocked, the run failed
        elif (unfinished_tasks and none_depends_on_past
              and none_task_concurrency and no_dependencies_met):
            self.log.info('Deadlock; marking run %s failed', self)
            self.set_state(State.FAILED)
            dag.handle_callback(self,
                                success=False,
                                reason='all_tasks_deadlocked',
                                session=session)

        # finally, if the roots aren't done, the dag is still running
        else:
            self.set_state(State.RUNNING)

        self._emit_duration_stats_for_finished_state()

        # todo: determine we want to use with_for_update to make sure to lock the run
        session.merge(self)
        session.commit()

        return self.state
Exemplo n.º 38
0
    def __init__(self,
                 dag_directory,
                 file_paths,
                 max_runs,
                 processor_factory,
                 signal_conn,
                 stat_queue,
                 result_queue,
                 async_mode=True):
        """
        :param dag_directory: Directory where DAG definitions are kept. All
        files in file_paths should be under this directory
        :type dag_directory: unicode
        :param file_paths: list of file paths that contain DAG definitions
        :type file_paths: list[unicode]
        :param max_runs: The number of times to parse and schedule each file. -1
        for unlimited.
        :type max_runs: int
        :param processor_factory: function that creates processors for DAG
        definition files. Arguments are (dag_definition_path)
        :type processor_factory: (unicode, unicode, list) -> (AbstractDagFileProcessor)
        :param signal_conn: connection to communicate signal with processor agent.
        :type signal_conn: Connection
        :param stat_queue: the queue to use for passing back parsing stat to agent.
        :type stat_queue: multiprocessing.Queue
        :param result_queue: the queue to use for passing back the result to agent.
        :type result_queue: multiprocessing.Queue
        :param async_mode: whether to start the manager in async mode
        :type async_mode: bool
        """
        self._file_paths = file_paths
        self._file_path_queue = []
        self._dag_directory = dag_directory
        self._max_runs = max_runs
        self._processor_factory = processor_factory
        self._signal_conn = signal_conn
        self._stat_queue = stat_queue
        self._result_queue = result_queue
        self._async_mode = async_mode

        self._parallelism = conf.getint('scheduler', 'max_threads')
        if 'sqlite' in conf.get('core', 'sql_alchemy_conn') and self._parallelism > 1:
            self.log.error("Cannot use more than 1 thread when using sqlite. "
                           "Setting parallelism to 1")
            self._parallelism = 1

        # Parse and schedule each file no faster than this interval.
        self._file_process_interval = conf.getint('scheduler',
                                                  'min_file_process_interval')
        # How often to print out DAG file processing stats to the log. Default to
        # 30 seconds.
        self.print_stats_interval = conf.getint('scheduler',
                                                'print_stats_interval')
        # How many seconds do we wait for tasks to heartbeat before mark them as zombies.
        self._zombie_threshold_secs = (
            conf.getint('scheduler', 'scheduler_zombie_task_threshold'))
        # Map from file path to the processor
        self._processors = {}
        # Map from file path to the last runtime
        self._last_runtime = {}
        # Map from file path to the last finish time
        self._last_finish_time = {}
        self._last_zombie_query_time = timezone.utcnow()
        # Last time that the DAG dir was traversed to look for files
        self.last_dag_dir_refresh_time = timezone.utcnow()
        # Last time stats were printed
        self.last_stat_print_time = timezone.datetime(2000, 1, 1)
        # TODO: Remove magic number
        self._zombie_query_interval = 10
        # Map from file path to the number of runs
        self._run_count = defaultdict(int)
        # Manager heartbeat key.
        self._heart_beat_key = 'heart-beat'

        # How often to scan the DAGs directory for new files. Default to 5 minutes.
        self.dag_dir_list_interval = conf.getint('scheduler',
                                                 'dag_dir_list_interval')

        self._log = logging.getLogger('airflow.processor_manager')

        signal.signal(signal.SIGINT, self._exit_gracefully)
        signal.signal(signal.SIGTERM, self._exit_gracefully)
Exemplo n.º 39
0
import airflow
from airflow.models import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.utils import timezone
from datetime import datetime, timedelta

args = {
    'owner': 'airflow',
    'start_date': timezone.utcnow() - timedelta(hours=1)
}

dag = DAG(
    dag_id='lab_01',
    default_args=args,
    schedule_interval='0 * * * *',
    dagrun_timeout=timedelta(minutes=60),
)

start = DummyOperator(
    task_id='start',
    dag=dag,
)

step0 = DummyOperator(
    task_id='from_kafka_to_logstash',
    dag=dag,
)

step1 = DummyOperator(
    task_id='from_logstash_to_clickhouse',
Exemplo n.º 40
0
 def set_state(self, state):
     if self._state != state:
         self._state = state
         self.end_date = timezone.utcnow() if self._state in State.finished(
         ) else None
Exemplo n.º 41
0
 def __init__(self, delta: datetime.timedelta):
     super().__init__(moment=timezone.utcnow() + delta)
Exemplo n.º 42
0
    def test_xcom_push_and_pull(self, mock_conn, mock_run_query):
        ti = TaskInstance(task=self.operator, execution_date=timezone.utcnow())
        ti.run()

        self.assertEqual(ti.xcom_pull(task_ids=MOCK_DATA['task_id']), str(MOCK_RESULT))
Exemplo n.º 43
0
    def prepare_file_path_queue(self):
        """Generate more file paths to process. Result are saved in _file_path_queue."""
        self._parsing_start_time = time.perf_counter()
        # If the file path is already being processed, or if a file was
        # processed recently, wait until the next batch
        file_paths_in_progress = self._processors.keys()
        now = timezone.utcnow()

        # Sort the file paths by the parsing order mode
        list_mode = conf.get("scheduler", "file_parsing_sort_mode")

        files_with_mtime = {}
        file_paths = []
        is_mtime_mode = list_mode == "modified_time"

        file_paths_recently_processed = []
        for file_path in self._file_paths:

            if is_mtime_mode:
                try:
                    files_with_mtime[file_path] = os.path.getmtime(file_path)
                except FileNotFoundError:
                    self.log.warning("Skipping processing of missing file: %s", file_path)
                    continue
                file_modified_time = timezone.make_aware(datetime.fromtimestamp(files_with_mtime[file_path]))
            else:
                file_paths.append(file_path)
                file_modified_time = None

            # Find file paths that were recently processed to exclude them
            # from being added to file_path_queue
            # unless they were modified recently and parsing mode is "modified_time"
            # in which case we don't honor "self._file_process_interval" (min_file_process_interval)
            last_finish_time = self.get_last_finish_time(file_path)
            if (
                last_finish_time is not None
                and (now - last_finish_time).total_seconds() < self._file_process_interval
                and not (is_mtime_mode and file_modified_time and (file_modified_time > last_finish_time))
            ):
                file_paths_recently_processed.append(file_path)

        # Sort file paths via last modified time
        if is_mtime_mode:
            file_paths = sorted(files_with_mtime, key=files_with_mtime.get, reverse=True)
        elif list_mode == "alphabetical":
            file_paths = sorted(file_paths)
        elif list_mode == "random_seeded_by_host":
            # Shuffle the list seeded by hostname so multiple schedulers can work on different
            # set of files. Since we set the seed, the sort order will remain same per host
            random.Random(get_hostname()).shuffle(file_paths)

        files_paths_at_run_limit = [
            file_path for file_path, stat in self._file_stats.items() if stat.run_count == self._max_runs
        ]

        file_paths_to_exclude = set(file_paths_in_progress).union(
            file_paths_recently_processed, files_paths_at_run_limit
        )

        # Do not convert the following list to set as set does not preserve the order
        # and we need to maintain the order of file_paths for `[scheduler] file_parsing_sort_mode`
        files_paths_to_queue = [
            file_path for file_path in file_paths if file_path not in file_paths_to_exclude
        ]

        for file_path, processor in self._processors.items():
            self.log.debug(
                "File path %s is still being processed (started: %s)",
                processor.file_path,
                processor.start_time.isoformat(),
            )

        self.log.debug("Queuing the following files for processing:\n\t%s", "\n\t".join(files_paths_to_queue))

        for file_path in files_paths_to_queue:
            if file_path not in self._file_stats:
                self._file_stats[file_path] = DagFileStat(
                    num_dags=0, import_errors=0, last_finish_time=None, last_duration=None, run_count=0
                )

        self._file_path_queue.extend(files_paths_to_queue)
Exemplo n.º 44
0
    def _log_file_processing_stats(self, known_file_paths):
        """
        Print out stats about how files are getting processed.

        :param known_file_paths: a list of file paths that may contain Airflow
            DAG definitions
        :return: None
        """
        # File Path: Path to the file containing the DAG definition
        # PID: PID associated with the process that's processing the file. May
        # be empty.
        # Runtime: If the process is currently running, how long it's been
        # running for in seconds.
        # Last Runtime: If the process ran before, how long did it take to
        # finish in seconds
        # Last Run: When the file finished processing in the previous run.
        headers = ["File Path", "PID", "Runtime", "# DAGs", "# Errors", "Last Runtime", "Last Run"]

        rows = []
        now = timezone.utcnow()
        for file_path in known_file_paths:
            last_runtime = self.get_last_runtime(file_path)
            num_dags = self.get_last_dag_count(file_path)
            num_errors = self.get_last_error_count(file_path)
            file_name = os.path.basename(file_path)
            file_name = os.path.splitext(file_name)[0].replace(os.sep, '.')

            processor_pid = self.get_pid(file_path)
            processor_start_time = self.get_start_time(file_path)
            runtime = (now - processor_start_time) if processor_start_time else None
            last_run = self.get_last_finish_time(file_path)
            if last_run:
                seconds_ago = (now - last_run).total_seconds()
                Stats.gauge(f'dag_processing.last_run.seconds_ago.{file_name}', seconds_ago)

            rows.append((file_path, processor_pid, runtime, num_dags, num_errors, last_runtime, last_run))

        # Sort by longest last runtime. (Can't sort None values in python3)
        rows = sorted(rows, key=lambda x: x[3] or 0.0)

        formatted_rows = []
        for file_path, pid, runtime, num_dags, num_errors, last_runtime, last_run in rows:
            formatted_rows.append(
                (
                    file_path,
                    pid,
                    f"{runtime.total_seconds():.2f}s" if runtime else None,
                    num_dags,
                    num_errors,
                    f"{last_runtime:.2f}s" if last_runtime else None,
                    last_run.strftime("%Y-%m-%dT%H:%M:%S") if last_run else None,
                )
            )
        log_str = (
            "\n"
            + "=" * 80
            + "\n"
            + "DAG File Processing Stats\n\n"
            + tabulate(formatted_rows, headers=headers)
            + "\n"
            + "=" * 80
        )

        self.log.info(log_str)
Exemplo n.º 45
0
    def collect_dags(self,
                     dag_folder=None,
                     only_if_updated=True,
                     include_examples=conf.getboolean('core', 'LOAD_EXAMPLES'),
                     safe_mode=conf.getboolean('core',
                                               'DAG_DISCOVERY_SAFE_MODE')):
        """
        Given a file path or a folder, this method looks for python modules,
        imports them and adds them to the dagbag collection.

        Note that if a ``.airflowignore`` file is found while processing
        the directory, it will behave much like a ``.gitignore``,
        ignoring files that match any of the regex patterns specified
        in the file.

        **Note**: The patterns in .airflowignore are treated as
        un-anchored regexes, not shell-like glob patterns.
        """
        if self.store_serialized_dags:
            return

        self.log.info("Filling up the DagBag from %s", dag_folder)
        start_dttm = timezone.utcnow()
        dag_folder = dag_folder or self.dag_folder
        # Used to store stats around DagBag processing
        stats = []
        FileLoadStat = namedtuple('FileLoadStat',
                                  "file duration dag_num task_num dags")

        dag_folder = correct_maybe_zipped(dag_folder)

        for filepath in list_py_file_paths(dag_folder,
                                           safe_mode=safe_mode,
                                           include_examples=include_examples):
            try:
                ts = timezone.utcnow()
                found_dags = self.process_file(filepath,
                                               only_if_updated=only_if_updated,
                                               safe_mode=safe_mode)
                dag_ids = [dag.dag_id for dag in found_dags]
                dag_id_names = str(dag_ids)

                td = timezone.utcnow() - ts
                td = td.total_seconds() + (float(td.microseconds) / 1000000)
                stats.append(
                    FileLoadStat(
                        filepath.replace(settings.DAGS_FOLDER, ''),
                        td,
                        len(found_dags),
                        sum([len(dag.tasks) for dag in found_dags]),
                        dag_id_names,
                    ))
            except Exception as e:
                self.log.exception(e)
        Stats.gauge('collect_dags',
                    (timezone.utcnow() - start_dttm).total_seconds(), 1)
        Stats.gauge('dagbag_size', len(self.dags), 1)
        Stats.gauge('dagbag_import_errors', len(self.import_errors), 1)
        self.dagbag_stats = sorted(stats,
                                   key=lambda x: x.duration,
                                   reverse=True)
        for file_stat in self.dagbag_stats:
            # file_stat.file similar format: /subdir/dag_name.py
            # TODO: Remove for Airflow 2.0
            filename = file_stat.file.split('/')[-1].replace('.py', '')
            Stats.timing('dag.loading-duration.{}'.format(filename),
                         file_stat.duration)
Exemplo n.º 46
0
    def test_next_execution(self):
        # A scaffolding function
        def reset_dr_db(dag_id):
            session = Session()
            dr = session.query(models.DagRun).filter_by(dag_id=dag_id)
            dr.delete()
            session.commit()
            session.close()

        EXAMPLE_DAGS_FOLDER = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.realpath(__file__))
                )
            ),
            "airflow/example_dags"
        )

        dagbag = models.DagBag(dag_folder=EXAMPLE_DAGS_FOLDER,
                               include_examples=False)
        dag_ids = ['example_bash_operator',  # schedule_interval is '0 0 * * *'
                   'latest_only',  # schedule_interval is timedelta(hours=4)
                   'example_python_operator',  # schedule_interval=None
                   'example_xcom']  # schedule_interval="@once"

        # The details below is determined by the schedule_interval of example DAGs
        now = timezone.utcnow()
        next_execution_time_for_dag1 = pytz.utc.localize(
            datetime.combine(
                now.date() + timedelta(days=1),
                time(0)
            )
        )
        next_execution_time_for_dag2 = now + timedelta(hours=4)
        expected_output = [str(next_execution_time_for_dag1),
                           str(next_execution_time_for_dag2),
                           "None",
                           "None"]

        for i in range(len(dag_ids)):
            dag_id = dag_ids[i]

            # Clear dag run so no execution history fo each DAG
            reset_dr_db(dag_id)

            p = subprocess.Popen(["airflow", "next_execution", dag_id,
                                  "--subdir", EXAMPLE_DAGS_FOLDER],
                                 stdout=subprocess.PIPE)
            p.wait()
            stdout = []
            for line in p.stdout:
                stdout.append(str(line.decode("utf-8").rstrip()))

            # `next_execution` function is inapplicable if no execution record found
            # It prints `None` in such cases
            self.assertEqual(stdout[-1], "None")

            dag = dagbag.dags[dag_id]
            # Create a DagRun for each DAG, to prepare for next step
            dag.create_dagrun(
                run_id='manual__' + now.isoformat(),
                execution_date=now,
                start_date=now,
                state=State.FAILED
            )

            p = subprocess.Popen(["airflow", "next_execution", dag_id,
                                  "--subdir", EXAMPLE_DAGS_FOLDER],
                                 stdout=subprocess.PIPE)
            p.wait()
            stdout = []
            for line in p.stdout:
                stdout.append(str(line.decode("utf-8").rstrip()))
            self.assertEqual(stdout[-1], expected_output[i])

            reset_dr_db(dag_id)
Exemplo n.º 47
0
    def execute(self, context: Context):
        if isinstance(self.execution_date, datetime.datetime):
            parsed_execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            parsed_execution_date = timezone.parse(self.execution_date)
        else:
            parsed_execution_date = timezone.utcnow()

        if self.trigger_run_id:
            run_id = self.trigger_run_id
        else:
            run_id = DagRun.generate_run_id(DagRunType.MANUAL,
                                            parsed_execution_date)
        try:
            dag_run = trigger_dag(
                dag_id=self.trigger_dag_id,
                run_id=run_id,
                conf=self.conf,
                execution_date=parsed_execution_date,
                replace_microseconds=False,
            )

        except DagRunAlreadyExists as e:
            if self.reset_dag_run:
                self.log.info("Clearing %s on %s", self.trigger_dag_id,
                              parsed_execution_date)

                # Get target dag object and call clear()

                dag_model = DagModel.get_current(self.trigger_dag_id)
                if dag_model is None:
                    raise DagNotFound(
                        f"Dag id {self.trigger_dag_id} not found in DagModel")

                dag_bag = DagBag(dag_folder=dag_model.fileloc,
                                 read_dags_from_db=True)
                dag = dag_bag.get_dag(self.trigger_dag_id)
                dag.clear(start_date=parsed_execution_date,
                          end_date=parsed_execution_date)
                dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0]
            else:
                raise e
        if dag_run is None:
            raise RuntimeError("The dag_run should be set here!")
        # Store the execution date from the dag run (either created or found above) to
        # be used when creating the extra link on the webserver.
        ti = context['task_instance']
        ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO,
                     value=dag_run.execution_date.isoformat())
        ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id)

        if self.wait_for_completion:
            # wait for dag to complete
            while True:
                self.log.info(
                    'Waiting for %s on %s to become allowed state %s ...',
                    self.trigger_dag_id,
                    dag_run.execution_date,
                    self.allowed_states,
                )
                time.sleep(self.poke_interval)

                dag_run.refresh_from_db()
                state = dag_run.state
                if state in self.failed_states:
                    raise AirflowException(
                        f"{self.trigger_dag_id} failed with failed states {state}"
                    )
                if state in self.allowed_states:
                    self.log.info("%s finished with allowed state %s",
                                  self.trigger_dag_id, state)
                    return
Exemplo n.º 48
0
    def _mark_multi_state(self,
                          operator,
                          poke_hash,
                          encoded_poke_context,
                          state,
                          session=None):
        """
        Mark state for multiple tasks in the task_instance table to a new state if they have
        the same signature as the poke_hash.

        :param operator: The sensor's operator class name.
        :param poke_hash: The hash code generated from sensor's poke context.
        :param encoded_poke_context: The raw encoded poke_context.
        :param state: Set multiple sensor tasks to this state.
        :param session: The sqlalchemy session.
        """
        def mark_state(ti, sensor_instance):
            ti.state = state
            sensor_instance.state = state
            if state in State.finished:
                ti.end_date = end_date
                ti.set_duration()

        SI = SensorInstance
        TI = TaskInstance

        count_marked = 0
        query_result = []
        try:
            query_result = (session.query(TI, SI).join(
                TI,
                and_(
                    TI.dag_id == SI.dag_id,
                    TI.task_id == SI.task_id,
                    TI.execution_date == SI.execution_date,
                ),
            ).filter(SI.state == State.SENSING).filter(
                SI.hashcode == poke_hash).filter(
                    SI.operator == operator).with_for_update().all())

            end_date = timezone.utcnow()
            for ti, sensor_instance in query_result:
                if sensor_instance.poke_context != encoded_poke_context:
                    continue

                ti.hostname = self.hostname
                if ti.state == State.SENSING:
                    mark_state(ti=ti, sensor_instance=sensor_instance)
                    count_marked += 1
                else:
                    # ti.state != State.SENSING
                    sensor_instance.state = ti.state

            session.commit()

        except Exception:  # pylint: disable=broad-except
            self.log.warning(
                "Exception _mark_multi_state in smart sensor for hashcode %s",
                str(poke_hash),  # cast to str in advance for highlighting
                exc_info=True,
            )
        self.log.info("Marked %s tasks out of %s to state %s", count_marked,
                      len(query_result), state)
Exemplo n.º 49
0
def date_range(start_date, end_date=None, num=None, delta=None):
    """
    Get a set of dates as a list based on a start, end and delta, delta
    can be something that can be added to ``datetime.datetime``
    or a cron expression as a ``str``
    :param start_date: anchor date to start the series from
    :type start_date: datetime.datetime
    :param end_date: right boundary for the date range
    :type end_date: datetime.datetime
    :param num: alternatively to end_date, you can specify the number of
        number of entries you want in the range. This number can be negative,
        output will always be sorted regardless
    :type num: int
    >>> date_range(datetime(2016, 1, 1), datetime(2016, 1, 3), delta=timedelta(1))
    [datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 1, 2, 0, 0),
     datetime.datetime(2016, 1, 3, 0, 0)]
    >>> date_range(datetime(2016, 1, 1), datetime(2016, 1, 3), delta='0 0 * * *')
    [datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 1, 2, 0, 0),
     datetime.datetime(2016, 1, 3, 0, 0)]
    >>> date_range(datetime(2016, 1, 1), datetime(2016, 3, 3), delta="0 0 0 * *")
    [datetime.datetime(2016, 1, 1, 0, 0), datetime.datetime(2016, 2, 1, 0, 0),
     datetime.datetime(2016, 3, 1, 0, 0)]
    """
    if not delta:
        return []
    if end_date and start_date > end_date:
        raise Exception("Wait. start_date needs to be before end_date")
    if end_date and num:
        raise Exception("Wait. Either specify end_date OR num")
    if not end_date and not num:
        end_date = timezone.utcnow()

    delta_iscron = False
    tz = start_date.tzinfo
    if isinstance(delta, six.string_types):
        delta_iscron = True
        start_date = timezone.make_naive(start_date, tz)
        cron = croniter(delta, start_date)
    elif isinstance(delta, timedelta):
        delta = abs(delta)
    dates = []
    if end_date:
        if timezone.is_naive(start_date):
            end_date = timezone.make_naive(end_date, tz)
        while start_date <= end_date:
            if timezone.is_naive(start_date):
                dates.append(timezone.make_aware(start_date, tz))
            else:
                dates.append(start_date)

            if delta_iscron:
                start_date = cron.get_next(datetime)
            else:
                start_date += delta
    else:
        for _ in range(abs(num)):
            if timezone.is_naive(start_date):
                dates.append(timezone.make_aware(start_date, tz))
            else:
                dates.append(start_date)

            if delta_iscron:
                if num > 0:
                    start_date = cron.get_next(datetime)
                else:
                    start_date = cron.get_prev(datetime)
            else:
                if num > 0:
                    start_date += delta
                else:
                    start_date -= delta
    return sorted(dates)
Exemplo n.º 50
0
# limitations under the License.

import json
import unittest

from freezegun import freeze_time
from mock import patch

from airflow import AirflowException
from airflow.api.client.local_client import Client
from airflow import models
from airflow import settings
from airflow.utils import timezone
from airflow.utils.state import State

EXECDATE = timezone.utcnow()
EXECDATE_NOFRACTIONS = EXECDATE.replace(microsecond=0)
EXECDATE_ISO = EXECDATE_NOFRACTIONS.isoformat()


class TestLocalClient(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super(TestLocalClient, cls).setUpClass()
        session = settings.Session()
        session.query(models.Pool).delete()
        session.commit()
        session.close()

    def setUp(self):
        super(TestLocalClient, self).setUp()
Exemplo n.º 51
0
 def test_clear(self):
     self.dag.clear(
         start_date=DEFAULT_DATE,
         end_date=timezone.utcnow())
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.models.dag import DAG
from airflow.utils import timezone
from airflow.ti_deps.met_handlers.aiflow_met_handler import AIFlowMetHandler
from airflow.models.event import Event
from airflow.operators.send_event_operator import SendEventOperator
from airflow.operators.bash_operator import BashOperator

dag = DAG(dag_id='event_scheduler_example_bash_dag',
          start_date=timezone.utcnow(),
          schedule_interval="@once")
op_0 = BashOperator(task_id='0_job',
                    dag=dag,
                    bash_command='echo "0 hello word!"')
op_1 = BashOperator(task_id='1_job',
                    dag=dag,
                    bash_command='echo "1 hello word!"')
op_2 = SendEventOperator(task_id='2_job',
                         dag=dag,
                         uri='localhost:50051',
                         event=Event(key='key_1',
                                     value='value_1',
                                     event_type='UNDEFINED'))
op_3 = SendEventOperator(task_id='3_job',
                         dag=dag,
Exemplo n.º 53
0
    def get_dag(self, dag_id, session: Session = None):
        """
        Gets the DAG out of the dictionary, and refreshes it if expired

        :param dag_id: DAG Id
        :type dag_id: str
        """
        # Avoid circular import
        from airflow.models.dag import DagModel

        if self.read_dags_from_db:
            # Import here so that serialized dag is only imported when serialization is enabled
            from airflow.models.serialized_dag import SerializedDagModel

            if dag_id not in self.dags:
                # Load from DB if not (yet) in the bag
                self._add_dag_from_db(dag_id=dag_id, session=session)
                return self.dags.get(dag_id)

            # If DAG is in the DagBag, check the following
            # 1. if time has come to check if DAG is updated (controlled by min_serialized_dag_fetch_secs)
            # 2. check the last_updated column in SerializedDag table to see if Serialized DAG is updated
            # 3. if (2) is yes, fetch the Serialized DAG.
            min_serialized_dag_fetch_secs = timedelta(
                seconds=settings.MIN_SERIALIZED_DAG_FETCH_INTERVAL)
            if (dag_id in self.dags_last_fetched
                    and timezone.utcnow() > self.dags_last_fetched[dag_id] +
                    min_serialized_dag_fetch_secs):
                sd_last_updated_datetime = SerializedDagModel.get_last_updated_datetime(
                    dag_id=dag_id,
                    session=session,
                )
                if sd_last_updated_datetime and sd_last_updated_datetime > self.dags_last_fetched[
                        dag_id]:
                    self._add_dag_from_db(dag_id=dag_id, session=session)

            return self.dags.get(dag_id)

        # If asking for a known subdag, we want to refresh the parent
        dag = None
        root_dag_id = dag_id
        if dag_id in self.dags:
            dag = self.dags[dag_id]
            if dag.is_subdag:
                root_dag_id = dag.parent_dag.dag_id  # type: ignore

        # If DAG Model is absent, we can't check last_expired property. Is the DAG not yet synchronized?
        orm_dag = DagModel.get_current(root_dag_id, session=session)
        if not orm_dag:
            return self.dags.get(dag_id)

        # If the dag corresponding to root_dag_id is absent or expired
        is_missing = root_dag_id not in self.dags
        is_expired = orm_dag.last_expired and dag and dag.last_loaded < orm_dag.last_expired
        if is_missing or is_expired:
            # Reprocess source file
            found_dags = self.process_file(filepath=correct_maybe_zipped(
                orm_dag.fileloc),
                                           only_if_updated=False)

            # If the source file no longer exports `dag_id`, delete it from self.dags
            if found_dags and dag_id in [
                    found_dag.dag_id for found_dag in found_dags
            ]:
                return self.dags[dag_id]
            elif dag_id in self.dags:
                del self.dags[dag_id]
        return self.dags.get(dag_id)
Exemplo n.º 54
0
 def poke(self, context):
     self.log.info('Checking if the time (%s) has come', self.target_time)
     return timezone.make_naive(timezone.utcnow(),
                                self.dag.timezone).time() > self.target_time
Exemplo n.º 55
0
            def _per_task_process(key, ti, session=None):  # pylint: disable=too-many-return-statements
                ti.refresh_from_db(lock_for_update=True, session=session)

                task = self.dag.get_task(ti.task_id, include_subdags=True)
                ti.task = task

                ignore_depends_on_past = self.ignore_first_depends_on_past and ti.execution_date == (
                    start_date or ti.start_date)
                self.log.debug("Task instance to run %s state %s", ti,
                               ti.state)

                # The task was already marked successful or skipped by a
                # different Job. Don't rerun it.
                if ti.state == State.SUCCESS:
                    ti_status.succeeded.add(key)
                    self.log.debug("Task instance %s succeeded. Don't rerun.",
                                   ti)
                    ti_status.to_run.pop(key)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    return
                elif ti.state == State.SKIPPED:
                    ti_status.skipped.add(key)
                    self.log.debug("Task instance %s skipped. Don't rerun.",
                                   ti)
                    ti_status.to_run.pop(key)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    return

                # guard against externally modified tasks instances or
                # in case max concurrency has been reached at task runtime
                elif ti.state == State.NONE:
                    self.log.warning(
                        "FIXME: task instance {} state was set to None "
                        "externally. This should not happen")
                    ti.set_state(State.SCHEDULED, session=session)
                if self.rerun_failed_tasks:
                    # Rerun failed tasks or upstreamed failed tasks
                    if ti.state in (State.FAILED, State.UPSTREAM_FAILED):
                        self.log.error("Task instance %s with state %s", ti,
                                       ti.state)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        # Reset the failed task in backfill to scheduled state
                        ti.set_state(State.SCHEDULED, session=session)
                else:
                    # Default behaviour which works for subdag.
                    if ti.state in (State.FAILED, State.UPSTREAM_FAILED):
                        self.log.error("Task instance %s with state %s", ti,
                                       ti.state)
                        ti_status.failed.add(key)
                        ti_status.to_run.pop(key)
                        if key in ti_status.running:
                            ti_status.running.pop(key)
                        return

                backfill_context = DepContext(
                    deps=BACKFILL_QUEUED_DEPS,
                    ignore_depends_on_past=ignore_depends_on_past,
                    ignore_task_deps=self.ignore_task_deps,
                    flag_upstream_failed=True,
                )

                # Is the task runnable? -- then run it
                # the dependency checker can change states of tis
                if ti.are_dependencies_met(dep_context=backfill_context,
                                           session=session,
                                           verbose=self.verbose):
                    if executor.has_task(ti):
                        self.log.debug(
                            "Task Instance %s already in executor "
                            "waiting for queue to clear", ti)
                    else:
                        self.log.debug('Sending %s to executor', ti)
                        # Skip scheduled state, we are executing immediately
                        ti.state = State.QUEUED
                        ti.queued_by_job_id = self.id
                        ti.queued_dttm = timezone.utcnow()
                        session.merge(ti)

                        cfg_path = None
                        if self.executor_class in (
                                executor_constants.LOCAL_EXECUTOR,
                                executor_constants.SEQUENTIAL_EXECUTOR,
                        ):
                            cfg_path = tmp_configuration_copy()

                        executor.queue_task_instance(
                            ti,
                            mark_success=self.mark_success,
                            pickle_id=pickle_id,
                            ignore_task_deps=self.ignore_task_deps,
                            ignore_depends_on_past=ignore_depends_on_past,
                            pool=self.pool,
                            cfg_path=cfg_path,
                        )
                        ti_status.running[key] = ti
                        ti_status.to_run.pop(key)
                    session.commit()
                    return

                if ti.state == State.UPSTREAM_FAILED:
                    self.log.error("Task instance %s upstream failed", ti)
                    ti_status.failed.add(key)
                    ti_status.to_run.pop(key)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    return

                # special case
                if ti.state == State.UP_FOR_RETRY:
                    self.log.debug(
                        "Task instance %s retry period not "
                        "expired yet", ti)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    ti_status.to_run[key] = ti
                    return

                # special case
                if ti.state == State.UP_FOR_RESCHEDULE:
                    self.log.debug(
                        "Task instance %s reschedule period not "
                        "expired yet", ti)
                    if key in ti_status.running:
                        ti_status.running.pop(key)
                    ti_status.to_run[key] = ti
                    return

                # all remaining tasks
                self.log.debug('Adding %s to not_ready', ti)
                ti_status.not_ready.add(key)
Exemplo n.º 56
0
    def _retry_or_fail_task(self, sensor_work, error, session=None):
        """
        Change single task state for sensor task. For final state, set the end_date.
        Since smart sensor take care all retries in one process. Failed sensor tasks
        logically experienced all retries and the try_number should be set to max_tries.

        :param sensor_work: The sensor_work with exception.
        :type sensor_work: SensorWork
        :param error: The error message for this sensor_work.
        :type error: str.
        :param session: The sqlalchemy session.
        """
        def email_alert(task_instance, error_info):
            try:
                subject, html_content, _ = task_instance.get_email_subject_content(
                    error_info)
                email = sensor_work.execution_context.get('email')

                send_email(email, subject, html_content)
            except Exception:  # pylint: disable=broad-except
                sensor_work.log.warning("Exception alerting email.",
                                        exc_info=True)

        def handle_failure(sensor_work, ti):
            if sensor_work.execution_context.get(
                    'retries') and ti.try_number <= ti.max_tries:
                # retry
                ti.state = State.UP_FOR_RETRY
                if sensor_work.execution_context.get(
                        'email_on_retry'
                ) and sensor_work.execution_context.get('email'):
                    sensor_work.log.info("%s sending email alert for retry",
                                         sensor_work.ti_key)
                    email_alert(ti, error)
            else:
                ti.state = State.FAILED
                if sensor_work.execution_context.get(
                        'email_on_failure'
                ) and sensor_work.execution_context.get('email'):
                    sensor_work.log.info("%s sending email alert for failure",
                                         sensor_work.ti_key)
                    email_alert(ti, error)

        try:
            dag_id, task_id, execution_date = sensor_work.ti_key
            TI = TaskInstance
            SI = SensorInstance
            sensor_instance = (session.query(SI).filter(
                SI.dag_id == dag_id, SI.task_id == task_id,
                SI.execution_date == execution_date).with_for_update().first())

            if sensor_instance.hashcode != sensor_work.hashcode:
                # Return without setting state
                return

            ti = (session.query(TI).filter(
                TI.dag_id == dag_id, TI.task_id == task_id,
                TI.execution_date == execution_date).with_for_update().first())

            if ti:
                if ti.state == State.SENSING:
                    ti.hostname = self.hostname
                    handle_failure(sensor_work, ti)

                    sensor_instance.state = State.FAILED
                    ti.end_date = timezone.utcnow()
                    ti.set_duration()
                else:
                    sensor_instance.state = ti.state
                session.merge(sensor_instance)
                session.merge(ti)
                session.commit()

                sensor_work.log.info(
                    "Task %s got an error: %s. Set the state to failed. Exit.",
                    str(sensor_work.ti_key), error)
                sensor_work.close_sensor_logger()

        except AirflowException:
            sensor_work.log.warning("Exception on failing %s",
                                    sensor_work.ti_key,
                                    exc_info=True)
Exemplo n.º 57
0
 def dt(attr):
     f = attr.get(attr_name)
     f = f.isoformat() if f else ''
     if timezone.utcnow().isoformat()[:4] == f[:4]:
         f = f[5:]
     return Markup("<nobr>{}</nobr>").format(f)
Exemplo n.º 58
0
    def manage_slas(self, dag: DAG, session: Session = None) -> None:
        """
        Finding all tasks that have SLAs defined, and sending alert emails
        where needed. New SLA misses are also recorded in the database.

        We are assuming that the scheduler runs often, so we only check for
        tasks that should have succeeded in the past hour.
        """
        self.log.info("Running SLA Checks for %s", dag.dag_id)
        if not any(isinstance(ti.sla, timedelta) for ti in dag.tasks):
            self.log.info(
                "Skipping SLA check for %s because no tasks in DAG have SLAs",
                dag)
            return

        qry = (session.query(
            TI.task_id,
            func.max(DR.execution_date).label('max_ti')).join(
                TI.dag_run).with_hint(
                    TI, 'USE INDEX (PRIMARY)', dialect_name='mysql').filter(
                        TI.dag_id == dag.dag_id).filter(
                            or_(TI.state == State.SUCCESS,
                                TI.state == State.SKIPPED)).filter(
                                    TI.task_id.in_(dag.task_ids)).group_by(
                                        TI.task_id).subquery('sq'))
        # get recorded SlaMiss
        recorded_slas_query = set(
            session.query(SlaMiss.dag_id, SlaMiss.task_id,
                          SlaMiss.execution_date).filter(
                              SlaMiss.dag_id == dag.dag_id,
                              SlaMiss.task_id.in_(dag.task_ids)))

        max_tis: Iterator[TI] = (session.query(TI).join(TI.dag_run).filter(
            TI.dag_id == dag.dag_id,
            TI.task_id == qry.c.task_id,
            DR.execution_date == qry.c.max_ti,
        ))

        ts = timezone.utcnow()

        for ti in max_tis:
            task = dag.get_task(ti.task_id)
            if not task.sla:
                continue

            if not isinstance(task.sla, timedelta):
                raise TypeError(
                    f"SLA is expected to be timedelta object, got "
                    f"{type(task.sla)} in {task.dag_id}:{task.task_id}")

            sla_misses = []
            next_info = dag.next_dagrun_info(dag.get_run_data_interval(
                ti.dag_run),
                                             restricted=False)
            if next_info is None:
                self.log.info(
                    "Skipping SLA check for %s because task does not have scheduled date",
                    ti)
            else:
                while next_info.logical_date < ts:
                    next_info = dag.next_dagrun_info(next_info.data_interval,
                                                     restricted=False)

                    if next_info is None:
                        break
                    if (ti.dag_id, ti.task_id,
                            next_info.logical_date) in recorded_slas_query:
                        break
                    if next_info.logical_date + task.sla < ts:

                        sla_miss = SlaMiss(
                            task_id=ti.task_id,
                            dag_id=ti.dag_id,
                            execution_date=next_info.logical_date,
                            timestamp=ts,
                        )
                        sla_misses.append(sla_miss)
            if sla_misses:
                session.add_all(sla_misses)
        session.commit()

        slas: List[SlaMiss] = (
            session.query(SlaMiss).filter(SlaMiss.notification_sent == False,
                                          SlaMiss.dag_id == dag.dag_id)  # noqa
            .all())
        if slas:
            sla_dates: List[datetime.datetime] = [
                sla.execution_date for sla in slas
            ]
            fetched_tis: List[TI] = (session.query(TI).filter(
                TI.state != State.SUCCESS, TI.execution_date.in_(sla_dates),
                TI.dag_id == dag.dag_id).all())
            blocking_tis: List[TI] = []
            for ti in fetched_tis:
                if ti.task_id in dag.task_ids:
                    ti.task = dag.get_task(ti.task_id)
                    blocking_tis.append(ti)
                else:
                    session.delete(ti)
                    session.commit()

            task_list = "\n".join(sla.task_id + ' on ' +
                                  sla.execution_date.isoformat()
                                  for sla in slas)
            blocking_task_list = "\n".join(ti.task_id + ' on ' +
                                           ti.execution_date.isoformat()
                                           for ti in blocking_tis)
            # Track whether email or any alert notification sent
            # We consider email or the alert callback as notifications
            email_sent = False
            notification_sent = False
            if dag.sla_miss_callback:
                # Execute the alert callback
                self.log.info('Calling SLA miss callback')
                try:
                    dag.sla_miss_callback(dag, task_list, blocking_task_list,
                                          slas, blocking_tis)
                    notification_sent = True
                except Exception:
                    self.log.exception(
                        "Could not call sla_miss_callback for DAG %s",
                        dag.dag_id)
            email_content = f"""\
            Here's a list of tasks that missed their SLAs:
            <pre><code>{task_list}\n<code></pre>
            Blocking tasks:
            <pre><code>{blocking_task_list}<code></pre>
            Airflow Webserver URL: {conf.get(section='webserver', key='base_url')}
            """

            tasks_missed_sla = []
            for sla in slas:
                try:
                    task = dag.get_task(sla.task_id)
                except TaskNotFound:
                    # task already deleted from DAG, skip it
                    self.log.warning(
                        "Task %s doesn't exist in DAG anymore, skipping SLA miss notification.",
                        sla.task_id)
                    continue
                tasks_missed_sla.append(task)

            emails: Set[str] = set()
            for task in tasks_missed_sla:
                if task.email:
                    if isinstance(task.email, str):
                        emails |= set(get_email_address_list(task.email))
                    elif isinstance(task.email, (list, tuple)):
                        emails |= set(task.email)
            if emails:
                try:
                    send_email(emails,
                               f"[airflow] SLA miss on DAG={dag.dag_id}",
                               email_content)
                    email_sent = True
                    notification_sent = True
                except Exception:
                    Stats.incr('sla_email_notification_failure')
                    self.log.exception(
                        "Could not send SLA Miss email notification for DAG %s",
                        dag.dag_id)
            # If we sent any notification, update the sla_miss table
            if notification_sent:
                for sla in slas:
                    sla.email_sent = email_sent
                    sla.notification_sent = True
                    session.merge(sla)
            session.commit()
Exemplo n.º 59
0
    def heartbeat(self):
        """
        This should be periodically called by the scheduler. This method will
        kick off new processes to process DAG definition files and read the
        results from the finished processors.

        :return: a list of SimpleDags that were produced by processors that
        have finished since the last time this was called
        :rtype: list[SimpleDag]
        """
        finished_processors = {}
        """:type : dict[unicode, AbstractDagFileProcessor]"""
        running_processors = {}
        """:type : dict[unicode, AbstractDagFileProcessor]"""

        for file_path, processor in self._processors.items():
            if processor.done:
                self.log.debug("Processor for %s finished", file_path)
                now = timezone.utcnow()
                finished_processors[file_path] = processor
                self._last_runtime[file_path] = (
                    now - processor.start_time).total_seconds()
                self._last_finish_time[file_path] = now
                self._run_count[file_path] += 1
            else:
                running_processors[file_path] = processor
        self._processors = running_processors

        self.log.debug("%s/%s scheduler processes running",
                       len(self._processors), self._parallelism)

        self.log.debug("%s file paths queued for processing",
                       len(self._file_path_queue))

        # Collect all the DAGs that were found in the processed files
        simple_dags = []
        for file_path, processor in finished_processors.items():
            if processor.result is None:
                self.log.warning(
                    "Processor for %s exited with return code %s.",
                    processor.file_path, processor.exit_code)
            else:
                for simple_dag in processor.result:
                    simple_dags.append(simple_dag)

        # Generate more file paths to process if we processed all the files
        # already.
        if len(self._file_path_queue) == 0:
            # If the file path is already being processed, or if a file was
            # processed recently, wait until the next batch
            file_paths_in_progress = self._processors.keys()
            now = timezone.utcnow()
            file_paths_recently_processed = []
            for file_path in self._file_paths:
                last_finish_time = self.get_last_finish_time(file_path)
                if (last_finish_time is not None
                        and (now - last_finish_time).total_seconds() <
                        self._process_file_interval):
                    file_paths_recently_processed.append(file_path)

            files_paths_at_run_limit = [
                file_path for file_path, num_runs in self._run_count.items()
                if num_runs == self._max_runs
            ]

            files_paths_to_queue = list(
                set(self._file_paths) - set(file_paths_in_progress) -
                set(file_paths_recently_processed) -
                set(files_paths_at_run_limit))

            for file_path, processor in self._processors.items():
                self.log.debug(
                    "File path %s is still being processed (started: %s)",
                    processor.file_path, processor.start_time.isoformat())

            self.log.debug("Queuing the following files for processing:\n\t%s",
                           "\n\t".join(files_paths_to_queue))

            self._file_path_queue.extend(files_paths_to_queue)

        # Start more processors if we have enough slots and files to process
        while (self._parallelism - len(self._processors) > 0
               and len(self._file_path_queue) > 0):
            file_path = self._file_path_queue.pop(0)
            processor = self._processor_factory(file_path)

            processor.start()
            self.log.debug(
                "Started a process (PID: %s) to generate tasks for %s",
                processor.pid, file_path)
            self._processors[file_path] = processor

        # Update scheduler heartbeat count.
        self._run_count[self._heart_beat_key] += 1

        return simple_dags
Exemplo n.º 60
0
 def test_utcnow(self):
     now = timezone.utcnow()
     self.assertTrue(timezone.is_localized(now))
     self.assertEqual(now.replace(tzinfo=None),
                      now.astimezone(UTC).replace(tzinfo=None))