Exemplo n.º 1
0
    def test_handle_task_update(self):
        """Tests calling TaskManager.handle_task_update()"""

        task_id = 'task_1'
        task_name = 'My Task'
        agent_id = 'agent_1'
        task_1 = ImplementedTask(task_id, task_name, agent_id)

        when_launched = now()
        manager = TaskManager()
        manager.launch_tasks([task_1], when_launched)

        when_finished = datetime.timedelta(seconds=1)
        update_1 = job_test_utils.create_task_status_update(
            task_1.id,
            task_1.agent_id,
            TaskStatusUpdate.FINISHED,
            when=when_finished)
        manager.handle_task_update(update_1)

        self.assertTrue(task_1.has_ended)
        self.assertEqual(task_1._ended, when_finished)

        update_2 = job_test_utils.create_task_status_update(
            'task_2', 'New Agent', TaskStatusUpdate.RUNNING, when=now())
        manager.handle_task_update(update_2)  # Should ignore, no error
Exemplo n.º 2
0
    def test_job_exe_clean_task(self):
        """Tests the NodeManager where a cleanup task is returned to clean up a job execution"""

        when = now()
        node_mgr = NodeManager()
        node_mgr.register_agents([self.agent_1, self.agent_2])
        node_mgr.sync_with_database(scheduler_mgr.config)
        cleanup_mgr = CleanupManager()
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        tasks = node_mgr.get_next_tasks(when)

        task_mgr = TaskManager()
        # Complete initial cleanup tasks
        for task in tasks:
            task_mgr.launch_tasks([task], now())
            update = job_test_utils.create_task_status_update(
                task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
            task_mgr.handle_task_update(update)
            node_mgr.handle_task_update(update)

        # Mark image pull done to get rid of image tasks
        for node in node_mgr.get_nodes():
            node._image_pull_completed()
            node._update_state()

        job_exe = job_test_utils.create_running_job_exe(agent_id=self.agent_1,
                                                        node=self.node_1)
        # Add a job execution to clean up and get the cleanup task for it
        cleanup_mgr.add_job_execution(job_exe)
        tasks = node_mgr.get_next_tasks(when)
        self.assertEqual(len(tasks), 1)
        task = tasks[0]
        self.assertEqual(task.agent_id, self.agent_1.agent_id)
        self.assertFalse(task.is_initial_cleanup)
        self.assertEqual(len(task.job_exes), 1)
Exemplo n.º 3
0
class TestIngestJobType(TestCase):
    """Tests things related to the ingest job type"""

    fixtures = [
        'basic_errors.json', 'basic_job_errors.json', 'ingest_job_types.json',
        'ingest_errors.json'
    ]

    def setUp(self):
        django.setup()

        reset_error_cache()

        self.task_mgr = TaskManager()

    def test_timed_out_system_job_task(self):
        """Tests running through a job execution where a system job task times out"""

        ingest_job_type = Ingest.objects.get_ingest_job_type()
        ingest_job_type.max_tries = 1
        ingest_job_type.save()
        running_job_exe = job_test_utils.create_running_job_exe(
            agent_id='agent_1', job_type=ingest_job_type, num_exes=1)

        # Start job-task and then task times out
        when_launched = now() + timedelta(seconds=1)
        job_task_started = when_launched + timedelta(seconds=1)
        when_timed_out = job_task_started + timedelta(seconds=1)
        job_task = running_job_exe.start_next_task()
        self.task_mgr.launch_tasks([job_task], when_launched)
        update = job_test_utils.create_task_status_update(
            job_task.id, 'agent', TaskStatusUpdate.RUNNING, job_task_started)
        self.task_mgr.handle_task_update(update)
        running_job_exe.task_update(update)
        running_job_exe.execution_timed_out(job_task, when_timed_out)

        self.assertFalse(running_job_exe.is_finished()
                         )  # Not finished until killed task update arrives
        self.assertEqual(running_job_exe.status, 'FAILED')
        self.assertEqual(running_job_exe.error_category, 'SYSTEM')
        self.assertEqual(running_job_exe.error.name, 'ingest-timeout')
        self.assertEqual(running_job_exe.finished, when_timed_out)
        self.assertFalse(running_job_exe.is_next_task_ready())

        # Killed task update arrives, job execution is now finished
        job_task_kill = when_timed_out + timedelta(seconds=1)
        update = job_test_utils.create_task_status_update(
            job_task.id, 'agent', TaskStatusUpdate.KILLED, job_task_kill)
        self.task_mgr.handle_task_update(update)
        running_job_exe.task_update(update)
        self.assertTrue(running_job_exe.is_finished())
        self.assertEqual(running_job_exe.status, 'FAILED')
        self.assertEqual(running_job_exe.error_category, 'SYSTEM')
        self.assertEqual(running_job_exe.error.name, 'ingest-timeout')
        self.assertEqual(running_job_exe.finished, when_timed_out)
        self.assertFalse(running_job_exe.is_next_task_ready())
Exemplo n.º 4
0
class TestIngestJobType(TestCase):
    """Tests things related to the ingest job type"""

    fixtures = [
        'basic_errors.json', 'basic_job_errors.json', 'ingest_job_types.json',
        'ingest_errors.json'
    ]

    def setUp(self):
        django.setup()

        self.task_mgr = TaskManager()

    def test_timed_out_system_job_task(self):
        """Tests running through a job execution where a system job task times out"""

        ingest_job_type = Ingest.objects.get_ingest_job_type()
        ingest_job_type.max_tries = 1
        ingest_job_type.save()
        job = job_test_utils.create_job(job_type=ingest_job_type, num_exes=1)
        job_exe = job_test_utils.create_job_exe(job=job)
        running_job_exe = RunningJobExecution(job_exe)

        # Start job-task and then task times out
        when_launched = now() + timedelta(seconds=1)
        job_task_started = when_launched + timedelta(seconds=1)
        when_timed_out = job_task_started + timedelta(seconds=1)
        job_task = running_job_exe.start_next_task()
        self.task_mgr.launch_tasks([job_task], when_launched)
        update = job_test_utils.create_task_status_update(
            job_task.id, 'agent', TaskStatusUpdate.RUNNING, job_task_started)
        self.task_mgr.handle_task_update(update)
        running_job_exe.task_update(update)
        running_job_exe.execution_timed_out(job_task, when_timed_out)
        self.assertTrue(running_job_exe.is_finished())
        self.assertFalse(running_job_exe.is_next_task_ready())

        job_exe = JobExecution.objects.get(id=job_exe.id)
        self.assertEqual('FAILED', job_exe.status)
        self.assertEqual('ingest-timeout', job_exe.error.name)
        self.assertEqual(when_timed_out, job_exe.ended)
Exemplo n.º 5
0
    def test_pull_task_change_agent_id(self):
        """Tests the NodeManager where a node's agent ID changes during a pull task"""

        when = now()
        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)
        for node in manager.get_nodes():
            node._last_health_task = when
            node._initial_cleanup_completed()
            node._update_state()
        tasks = manager.get_next_tasks(when)

        task_mgr = TaskManager()
        task_2 = None
        for task in tasks:
            task_mgr.launch_tasks([task], when)
            if task.agent_id == self.agent_2.agent_id:
                task_2 = task

        # Node 2 changes agent ID to 3
        manager.lost_node(self.agent_2.agent_id)
        manager.register_agents([self.agent_3])
        manager.sync_with_database(scheduler_mgr.config)
        for node in manager.get_nodes():
            node._last_health_task = when
            node._initial_cleanup_completed()
            node._update_state()

        # Should get new Docker pull task for node 2
        tasks = manager.get_next_tasks(when)
        self.assertEqual(len(tasks), 1)
        new_task_2 = tasks[0]
        self.assertEqual(new_task_2.agent_id, self.agent_3.agent_id)

        # Task update comes back for original node 2 Docker pull task, manager should ignore with no exception
        update = job_test_utils.create_task_status_update(
            task_2.id, task_2.agent_id, TaskStatusUpdate.FAILED, when)
        task_mgr.handle_task_update(update)
        manager.handle_task_update(update)
Exemplo n.º 6
0
class TestNode(TestCase):
    def setUp(self):
        django.setup()

        self.node_agent = 'agent_1'
        self.node = node_test_utils.create_node(hostname='host_1',
                                                slave_id=self.node_agent)
        self.job_exe = job_test_utils.create_job_exe(node=self.node)
        self.task_mgr = TaskManager()

    @patch('scheduler.node.conditions.now')
    def test_generate_status_json(self, mock_now):
        """Tests calling generate_status_json() successfully"""

        right_now = now()
        mock_now.return_value = right_now
        num_job_exes = JOB_EXES_WARNING_THRESHOLD + 1

        node = Node(self.node_agent, self.node)
        node._conditions.handle_pull_task_failed()
        node._conditions.update_cleanup_count(num_job_exes)
        node._update_state()
        nodes_list = []
        node.generate_status_json(nodes_list)

        expected_results = [{
            'id':
            node.id,
            'hostname':
            node.hostname,
            'agent_id':
            self.node_agent,
            'is_active':
            True,
            'state': {
                'name': 'DEGRADED',
                'title': Node.DEGRADED.title,
                'description': Node.DEGRADED.description
            },
            'errors': [{
                'name': 'IMAGE_PULL',
                'title': NodeConditions.IMAGE_PULL_ERR.title,
                'description': NodeConditions.IMAGE_PULL_ERR.description,
                'started': datetime_to_string(right_now),
                'last_updated': datetime_to_string(right_now)
            }],
            'warnings': [{
                'name':
                'CLEANUP',
                'title':
                NodeConditions.CLEANUP_WARNING.title,
                'description':
                NodeConditions.CLEANUP_WARNING.description % num_job_exes,
                'started':
                datetime_to_string(right_now),
                'last_updated':
                datetime_to_string(right_now)
            }]
        }]
        self.assertListEqual(nodes_list, expected_results)

    def test_handle_failed_cleanup_task(self):
        """Tests handling failed cleanup task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._last_heath_task = when
        # Get initial cleanup task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        task_1_id = task.id

        # Fail task after running and get different task next time
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FAILED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)

        # No new cleanup task right away
        tasks = node.get_next_tasks(when + datetime.timedelta(seconds=5))
        self.assertListEqual([], tasks)
        self.assertFalse(node._is_initial_cleanup_completed)

        # After error threshold, we should get new cleanup task
        new_time = when + Node.CLEANUP_ERR_THRESHOLD + datetime.timedelta(
            seconds=5)
        node._last_heath_task = new_time  # Get rid of health check task
        task = node.get_next_tasks(new_time)[0]
        self.assertNotEqual(task.id, task_1_id)
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))

    def test_handle_initial_cleanup_task(self):
        """Tests handling the initial cleanup task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._last_heath_task = when

        # Get initial cleanup task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        self.assertTrue(task.is_initial_cleanup)
        self.assertEqual(task.agent_id, self.node_agent)

        # Schedule initial cleanup and make sure no new task is ready
        self.task_mgr.launch_tasks([task], now())
        self.assertListEqual([], node.get_next_tasks(when))
        self.assertFalse(node._is_initial_cleanup_completed)

        # Complete initial clean up, verify no new cleanup task
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        for task in node.get_next_tasks(when):
            self.assertFalse(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        self.assertTrue(node._is_initial_cleanup_completed)

    def test_handle_killed_cleanup_task(self):
        """Tests handling killed cleanup task"""

        when = now()
        node = Node(self.node_agent, self.node)
        # Get initial cleanup task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        task_1_id = task.id

        # Kill task after running and get different task next time
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.KILLED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        self.assertNotEqual(task.id, task_1_id)
        self.assertFalse(node._is_initial_cleanup_completed)

    def test_handle_lost_cleanup_tasks(self):
        """Tests handling lost cleanup tasks"""

        when = now()
        node = Node(self.node_agent, self.node)
        # Get initial cleanup task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        task_1_id = task.id

        # Lose task without scheduling and get same task again
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertFalse(node._is_initial_cleanup_completed)

        # Lose task with scheduling and get same task again
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertFalse(node._is_initial_cleanup_completed)

        # Lose task after running and get same task again
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertFalse(node._is_initial_cleanup_completed)

    def test_handle_regular_cleanup_task(self):
        """Tests handling a regular cleanup task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._last_heath_task = when
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()

        # No task since there are no job executions to clean
        self.assertListEqual([], node.get_next_tasks(when))

        # Add job execution and complete task to clean it up
        job_exe = RunningJobExecution(self.job_exe)
        node.add_job_execution(job_exe)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(CLEANUP_TASK_ID_PREFIX))
        self.assertFalse(task.is_initial_cleanup)
        self.assertListEqual(task.job_exes, [job_exe])
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)

        # No task since all job executions have been cleaned
        self.assertListEqual([], node.get_next_tasks(when))

    def test_paused_node_cleanup_task(self):
        """Tests not returning cleanup task when its node is paused"""

        when = now()
        paused_node = node_test_utils.create_node(hostname='host_1',
                                                  slave_id=self.node_agent)
        paused_node.is_paused = True
        node = Node(self.node_agent, paused_node)
        # Turn off health task
        node._last_heath_task = when
        # No task due to paused node
        self.assertListEqual([], node.get_next_tasks(when))

    def test_handle_failed_health_task(self):
        """Tests handling failed health task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()
        # Get health task
        task = node.get_next_tasks(when)[0]
        task_1_id = task.id
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))

        # Fail task after running
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FAILED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)

        # Check node state
        self.assertEqual(node._state, Node.DEGRADED)
        self.assertTrue(NodeConditions.HEALTH_FAIL_ERR.name in
                        node._conditions._active_errors)

        # No new health task right away
        tasks = node.get_next_tasks(when + datetime.timedelta(seconds=5))
        self.assertListEqual([], tasks)
        self.assertFalse(node._conditions.is_health_check_normal)

        # After error threshold, we should get new health task
        new_time = when + Node.HEALTH_ERR_THRESHOLD + datetime.timedelta(
            seconds=5)
        task = node.get_next_tasks(new_time)[0]
        self.assertNotEqual(task.id, task_1_id)
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))

    def test_handle_failed_health_task_bad_daemon(self):
        """Tests handling a failed health task where the Docker daemon is bad"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()
        # Get health task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))

        # Fail task with bad daemon exit code
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id,
            task.agent_id,
            TaskStatusUpdate.FAILED,
            now(),
            exit_code=HealthTask.BAD_DAEMON_CODE)
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)

        # Check node state
        self.assertEqual(node._state, Node.DEGRADED)
        self.assertTrue(NodeConditions.BAD_DAEMON_ERR.name in
                        node._conditions._active_errors)

    def test_handle_failed_health_task_bad_logstash(self):
        """Tests handling a failed health task where logstash is unreachable"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()
        # Get health task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))

        # Fail task with bad logstash exit code
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id,
            task.agent_id,
            TaskStatusUpdate.FAILED,
            now(),
            exit_code=HealthTask.BAD_LOGSTASH_CODE)
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)

        # Check node state
        self.assertEqual(node._state, Node.DEGRADED)
        self.assertTrue(NodeConditions.BAD_LOGSTASH_ERR.name in
                        node._conditions._active_errors)

    def test_handle_failed_health_task_low_docker_space(self):
        """Tests handling a failed health task where Docker has low disk space"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()
        # Get health task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))

        # Fail task with low Docker space exit code
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id,
            task.agent_id,
            TaskStatusUpdate.FAILED,
            now(),
            exit_code=HealthTask.LOW_DOCKER_SPACE_CODE)
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)

        # Check node state
        self.assertEqual(node._state, Node.DEGRADED)
        self.assertTrue(NodeConditions.LOW_DOCKER_SPACE_ERR.name in
                        node._conditions._active_errors)

    def test_handle_successful_health_task(self):
        """Tests handling the health task successfully"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()

        # Get health task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))
        self.assertEqual(task.agent_id, self.node_agent)

        # Schedule health task and make sure no new task is ready
        self.task_mgr.launch_tasks([task], now())
        self.assertListEqual([], node.get_next_tasks(when))
        self.assertTrue(node._conditions.is_health_check_normal)

        # Complete pull task, verify no new task
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        self.assertListEqual([], node.get_next_tasks(when))
        self.assertTrue(node._conditions.is_health_check_normal)

    def test_handle_killed_health_task(self):
        """Tests handling killed health task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()
        # Get pull task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))
        task_1_id = task.id

        # Kill task after running and get different task next time
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.KILLED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))
        self.assertNotEqual(task.id, task_1_id)
        self.assertTrue(node._conditions.is_health_check_normal)

    def test_handle_lost_health_task(self):
        """Tests handling lost health task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._initial_cleanup_completed()
        node._image_pull_completed()
        node._update_state()
        # Get pull task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))
        task_1_id = task.id
        self.assertIsNotNone(task)

        # Lose task without scheduling and get same task again
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertTrue(node._conditions.is_health_check_normal)

        # Lose task with scheduling and get same task again
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertTrue(node._conditions.is_health_check_normal)

        # Lose task after running and get same task again
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(HEALTH_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertTrue(node._conditions.is_health_check_normal)

    def test_handle_failed_pull_task(self):
        """Tests handling failed Docker pull task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._last_heath_task = when
        node._initial_cleanup_completed()
        node._update_state()
        # Get Docker pull task
        task = node.get_next_tasks(when)[0]
        task_1_id = task.id
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))

        # Fail task after running
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FAILED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)

        # No new pull task right away
        tasks = node.get_next_tasks(when + datetime.timedelta(seconds=5))
        self.assertListEqual([], tasks)
        self.assertFalse(node._is_image_pulled)

        # After error threshold, we should get new pull task
        new_time = when + Node.IMAGE_PULL_ERR_THRESHOLD + datetime.timedelta(
            seconds=5)
        node._last_heath_task = new_time  # Get rid of health check task
        task = node.get_next_tasks(new_time)[0]
        self.assertNotEqual(task.id, task_1_id)
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))

    def test_handle_successful_pull_task(self):
        """Tests handling the Docker pull task successfully"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._last_heath_task = when
        node._initial_cleanup_completed()
        node._update_state()

        # Get Docker pull task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))
        self.assertEqual(task.agent_id, self.node_agent)

        # Schedule pull task and make sure no new task is ready
        self.task_mgr.launch_tasks([task], now())
        self.assertListEqual([], node.get_next_tasks(when))
        self.assertFalse(node._is_image_pulled)

        # Complete pull task, verify no new task
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        self.assertListEqual([], node.get_next_tasks(when))
        self.assertTrue(node._is_image_pulled)
        # Node should now be ready
        self.assertEqual(node._state, Node.READY)

    def test_handle_killed_pull_task(self):
        """Tests handling killed cleanup task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._last_heath_task = when
        node._initial_cleanup_completed()
        node._update_state()
        # Get pull task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))
        task_1_id = task.id

        # Kill task after running and get different task next time
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.KILLED, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))
        self.assertNotEqual(task.id, task_1_id)
        self.assertFalse(node._is_image_pulled)

    def test_handle_lost_pull_task(self):
        """Tests handling lost pull task"""

        when = now()
        node = Node(self.node_agent, self.node)
        node._last_heath_task = when
        node._initial_cleanup_completed()
        node._update_state()
        # Get pull task
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))
        task_1_id = task.id
        self.assertIsNotNone(task)

        # Lose task without scheduling and get same task again
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertFalse(node._is_image_pulled)

        # Lose task with scheduling and get same task again
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertFalse(node._is_image_pulled)

        # Lose task after running and get same task again
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        node.handle_task_update(update)
        task = node.get_next_tasks(when)[0]
        self.assertTrue(task.id.startswith(PULL_TASK_ID_PREFIX))
        self.assertEqual(task.id, task_1_id)
        self.assertFalse(node._is_image_pulled)

    def test_paused_node_pull_task(self):
        """Tests not returning pull task when its node is paused"""

        when = now()
        paused_node = node_test_utils.create_node(hostname='host_1',
                                                  slave_id=self.node_agent)
        paused_node.is_paused = True
        node = Node(self.node_agent, paused_node)
        node._last_heath_task = when
        node._initial_cleanup_completed()
        node._update_state()
        tasks = node.get_next_tasks(when)
        # No task due to paused node
        self.assertListEqual([], tasks)

    def test_node_that_is_not_cleaned_yet_no_pull_task(self):
        """Tests not returning pull task when the node hasn't been cleaned up yet"""

        when = now()
        node = Node(self.node_agent, self.node)
        tasks = node.get_next_tasks(when)
        # No pull task due to node not cleaned yet
        for task in tasks:
            self.assertFalse(task.id.startswith(PULL_TASK_ID_PREFIX))
Exemplo n.º 7
0
class TestJobExecutionManager(TransactionTestCase):
    """Tests the JobExecutionManager class"""

    fixtures = ['basic_errors.json', 'basic_job_errors.json']

    def setUp(self):
        django.setup()

        # Clear error cache so tests work correctly
        reset_error_cache()

        self.agent_id = 'agent'
        self.node_model_1 = node_test_utils.create_node()
        self.job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_1)
        self.node_model_2 = node_test_utils.create_node()
        self.job_exe_2 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_2)

        self.task_mgr = TaskManager()
        self.job_exe_mgr = JobExecutionManager()

    def test_check_for_starvation(self):
        """Tests calling check_for_starvation() successfully"""

        self.job_exe_mgr.schedule_job_exes([self.job_exe_1, self.job_exe_2], [])

        # Start and complete first task of execution
        task_1_launched = now()
        task_1 = self.job_exe_1.start_next_task()
        self.task_mgr.launch_tasks([task_1], task_1_launched)
        task_1_started = task_1_launched + timedelta(seconds=1)
        update = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.RUNNING, task_1_started)
        self.task_mgr.handle_task_update(update)
        self.job_exe_mgr.handle_task_update(update)
        task_1_completed = task_1_started + timedelta(seconds=10)
        update = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.FINISHED,
                                                          task_1_completed)
        self.task_mgr.handle_task_update(update)
        self.job_exe_mgr.handle_task_update(update)

        # Check after the time threshold has passed and task 2 has still not been launched
        check_time = task_1_completed + RESOURCE_STARVATION_THRESHOLD + timedelta(seconds=1)
        finished_job_exes = self.job_exe_mgr.check_for_starvation(check_time)

        # Check that execution 1 was failed for starvation
        self.assertEqual(len(finished_job_exes), 1)
        starved_job_exe = finished_job_exes[0]
        self.assertEqual(starved_job_exe.id, self.job_exe_1.id)
        self.assertEqual(starved_job_exe.status, 'FAILED')
        self.assertEqual(starved_job_exe.error.name, 'resource-starvation')
        self.assertEqual(starved_job_exe.finished, check_time)

    def test_generate_status_json(self):
        """Tests calling generate_status_json() successfully"""

        self.job_exe_mgr.schedule_job_exes([self.job_exe_1, self.job_exe_2], [])
        json_dict = [{'id': self.node_model_1.id}, {'id': self.node_model_2.id}]
        self.job_exe_mgr.generate_status_json(json_dict, now())

        for node_dict in json_dict:
            self.assertEqual(node_dict['job_executions']['running']['total'], 1)

    def test_get_messages_for_canceled_job_exes(self):
        """Tests calling get_messages() successfully when canceled job_exes have been added"""

        job_exe_ends = []
        for _ in range(int(MAX_NUM * 2.5)):  # Should result in 3 messages
            job_exe = job_test_utils.create_job_exe()
            job_exe_ends.append(job_exe.create_canceled_job_exe_end_model(now()))

        self.job_exe_mgr.add_canceled_job_exes(job_exe_ends)
        messages = self.job_exe_mgr.get_messages()

        self.assertEqual(len(messages), 3)

    def test_handle_task_timeout(self):
        """Tests calling handle_task_timeout() successfully"""

        self.job_exe_mgr.schedule_job_exes([self.job_exe_1, self.job_exe_2], [])

        task = self.job_exe_1.start_next_task()
        self.job_exe_mgr.handle_task_timeout(task, now())

        self.assertEqual(self.job_exe_1.status, 'FAILED')

    def test_handle_task_update(self):
        """Tests calling handle_task_update() successfully"""

        self.job_exe_mgr.schedule_job_exes([self.job_exe_1, self.job_exe_2], [])

        # Start tasks
        task_1 = self.job_exe_1.start_next_task()
        task_1_started = now() - timedelta(minutes=5)
        update_1 = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.RUNNING,
                                                            task_1_started)
        task_2 = self.job_exe_2.start_next_task()
        # Shortcut job exe 2 so that there is only one task to complete
        self.job_exe_2._remaining_tasks = []
        task_2_started = now() - timedelta(minutes=5)
        update_2 = job_test_utils.create_task_status_update(task_2.id, 'agent', TaskStatusUpdate.RUNNING,
                                                            task_2_started)

        # Job execution is not finished, so None should be returned and no message is available
        result = self.job_exe_mgr.handle_task_update(update_1)
        self.assertIsNone(result)
        result = self.job_exe_mgr.handle_task_update(update_2)
        self.assertIsNone(result)
        self.assertListEqual(self.job_exe_mgr.get_messages(), [])

        # Fail task 1 for job exe 1
        task_1_failed = task_1_started + timedelta(seconds=1)
        update_1 = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.FAILED, task_1_failed,
                                                            exit_code=1)

        # Complete task 2 for job exe 2
        task_2_completed = task_2_started + timedelta(seconds=1)
        update_2 = job_test_utils.create_task_status_update(task_2.id, 'agent', TaskStatusUpdate.FINISHED,
                                                            task_2_completed)

        # Job executions are finished, so they should be returned and a create_job_exe_ends message, a failed_jobs
        # message, and a completed_jobs message is available
        result = self.job_exe_mgr.handle_task_update(update_1)
        self.assertEqual(self.job_exe_1.id, result.id)
        result = self.job_exe_mgr.handle_task_update(update_2)
        self.assertEqual(self.job_exe_2.id, result.id)

        messages = self.job_exe_mgr.get_messages()
        self.assertEqual(len(messages), 3)
        job_exe_ends_msg = messages[0]
        self.assertEqual(job_exe_ends_msg.type, 'create_job_exe_ends')
        self.assertEqual(job_exe_ends_msg._job_exe_ends[0].job_exe_id, self.job_exe_1.id)
        self.assertEqual(job_exe_ends_msg._job_exe_ends[1].job_exe_id, self.job_exe_2.id)
        completed_jobs_msg = messages[1]
        self.assertEqual(completed_jobs_msg.type, 'completed_jobs')
        self.assertEqual(completed_jobs_msg._completed_jobs[0].job_id, self.job_exe_2.job_id)
        failed_jobs_msg = messages[2]
        self.assertEqual(failed_jobs_msg.type, 'failed_jobs')
        self.assertEqual(failed_jobs_msg._failed_jobs.values()[0][0].job_id, self.job_exe_1.job_id)

    def test_init_with_database(self):
        """Tests calling init_with_database() successfully"""

        self.job_exe_mgr.init_with_database()

    def test_lost_node(self):
        """Tests calling lost_node() successfully"""

        self.job_exe_mgr.schedule_job_exes([self.job_exe_1, self.job_exe_2], [])

        task_1 = self.job_exe_1.start_next_task()
        task_1_started = now() - timedelta(minutes=5)
        update = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.RUNNING, task_1_started)
        self.job_exe_mgr.handle_task_update(update)

        # Lose node and get lost task update
        self.job_exe_mgr.lost_node(self.node_model_1.id, now())
        update = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.LOST, task_1_started)
        lost_job_exe = self.job_exe_mgr.handle_task_update(update)

        self.assertEqual(lost_job_exe.id, self.job_exe_1.id)
        self.assertEqual(lost_job_exe.status, 'FAILED')
        self.assertEqual(lost_job_exe.error.name, 'node-lost')

        # Make sure a create_job_exe_ends message and failed_jobs message exists for the lost job execution
        messages = self.job_exe_mgr.get_messages()
        self.assertEqual(len(messages), 2)
        job_exe_ends_msg = messages[0]
        self.assertEqual(job_exe_ends_msg.type, 'create_job_exe_ends')
        self.assertEqual(job_exe_ends_msg._job_exe_ends[0].job_exe_id, self.job_exe_1.id)
        failed_jobs_msg = messages[1]
        self.assertEqual(failed_jobs_msg.type, 'failed_jobs')
        self.assertTrue(get_builtin_error('node-lost').id in failed_jobs_msg._failed_jobs)
        self.assertEqual(failed_jobs_msg._failed_jobs.values()[0][0].job_id, self.job_exe_1.job_id)

    def test_schedule_job_exes(self):
        """Tests calling schedule_job_exes() successfully"""

        self.job_exe_mgr.schedule_job_exes([self.job_exe_1, self.job_exe_2], [])

        # Both executions should be in the manager and ready
        self.assertEqual(len(self.job_exe_mgr.get_running_job_exes()), 2)
        self.assertIsNotNone(self.job_exe_mgr.get_running_job_exe(self.job_exe_1.cluster_id))
        self.assertIsNotNone(self.job_exe_mgr.get_running_job_exe(self.job_exe_2.cluster_id))

    def test_sync_with_database(self):
        """Tests calling sync_with_database() successfully"""

        self.job_exe_mgr.schedule_job_exes([self.job_exe_1, self.job_exe_2], [])

        task_1 = self.job_exe_1.start_next_task()
        task_1_started = now() - timedelta(minutes=5)
        update = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.RUNNING, task_1_started)
        self.job_exe_mgr.handle_task_update(update)

        # Cancel job_exe_1 and job_exe_2 and have manager sync with database
        Job.objects.update_jobs_to_canceled([self.job_exe_1._job_exe.job, self.job_exe_2._job_exe.job], now())
        finished_job_exes = self.job_exe_mgr.sync_with_database()

        self.assertEqual(self.job_exe_1.status, 'CANCELED')
        self.assertFalse(self.job_exe_1.is_finished())
        self.assertEqual(self.job_exe_2.status, 'CANCELED')
        self.assertTrue(self.job_exe_2.is_finished())

        # Only job_exe_2 is finished, job_exe_1 has a task to kill
        self.assertEqual(len(finished_job_exes), 1)
        self.assertEqual(finished_job_exes[0].id, self.job_exe_2.id)
        # Make sure a create_job_exe_ends message exists for job_exe_2
        message = self.job_exe_mgr.get_messages()[0]
        self.assertEqual(message.type, 'create_job_exe_ends')
        self.assertEqual(message._job_exe_ends[0].job_exe_id, self.job_exe_2.id)

        # Task killed for job_exe_1
        task_1_killed = task_1_started + timedelta(minutes=5)
        update = job_test_utils.create_task_status_update(task_1.id, 'agent', TaskStatusUpdate.KILLED, task_1_killed)
        self.job_exe_mgr.handle_task_update(update)

        # Make sure a create_job_exe_ends message exists for job_exe_1
        self.assertTrue(self.job_exe_1.is_finished())
        message = self.job_exe_mgr.get_messages()[0]
        self.assertEqual(message.type, 'create_job_exe_ends')
        self.assertEqual(message._job_exe_ends[0].job_exe_id, self.job_exe_1.id)
Exemplo n.º 8
0
class TestSystemTaskManager(TestCase):
    def setUp(self):
        django.setup()

        self.agent_id = 'agent_1'
        self.system_task_mgr = SystemTaskManager()
        self.task_mgr = TaskManager()

        # Make sure messaging service is "off" for these tests
        scheduler_mgr.config.num_message_handlers = 0

    def test_handle_completed_db_update_task(self):
        """Tests handling completed database update task"""

        # Get database update task
        when = now()
        self.assertFalse(self.system_task_mgr._is_db_update_completed)
        task = self.system_task_mgr.get_tasks_to_schedule(when)[0]
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        task_1_id = task.id

        # Schedule database update task and make sure there are no more system tasks
        task.agent_id = self.agent_id
        self.task_mgr.launch_tasks([task], now())
        self.assertListEqual([],
                             self.system_task_mgr.get_tasks_to_schedule(now()))

        # Complete task, verify no new tasks
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        self.assertListEqual([],
                             self.system_task_mgr.get_tasks_to_schedule(now()))
        self.assertTrue(self.system_task_mgr._is_db_update_completed)

    def test_handle_failed_db_update_task(self):
        """Tests handling failed database update task"""

        # Get database update task
        when = now()
        self.assertFalse(self.system_task_mgr._is_db_update_completed)
        task = self.system_task_mgr.get_tasks_to_schedule(when)[0]
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        task_1_id = task.id

        # Fail task after running and get different task next time
        task.agent_id = self.agent_id
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.FAILED, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)

        # No new database update right away
        tasks = self.system_task_mgr.get_tasks_to_schedule(when +
                                                           datetime.timedelta(
                                                               seconds=5))
        self.assertListEqual([], tasks)
        self.assertFalse(self.system_task_mgr._is_db_update_completed)

        # After error threshold, we should get new database update task
        new_time = when + SystemTaskManager.DATABASE_UPDATE_ERR_THRESHOLD + datetime.timedelta(
            seconds=5)
        task = self.system_task_mgr.get_tasks_to_schedule(new_time)[0]
        self.assertNotEqual(task.id, task_1_id)
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        self.assertFalse(self.system_task_mgr._is_db_update_completed)

    def test_handle_killed_db_update_task(self):
        """Tests handling killed database update task"""

        # Get database update task
        when = now()
        self.assertFalse(self.system_task_mgr._is_db_update_completed)
        task = self.system_task_mgr.get_tasks_to_schedule(when)[0]
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        task_1_id = task.id

        # Kill task after running and get different task next time
        task.agent_id = self.agent_id
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.KILLED, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        task = self.system_task_mgr.get_tasks_to_schedule(when)[0]
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        self.assertNotEqual(task.id, task_1_id)
        self.assertFalse(self.system_task_mgr._is_db_update_completed)

    def test_handle_lost_db_update_task(self):
        """Tests handling lost database update task"""

        # Get database update task
        when = now()
        self.assertFalse(self.system_task_mgr._is_db_update_completed)
        task = self.system_task_mgr.get_tasks_to_schedule(when)[0]
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        task_1_id = task.id

        # Lose task after scheduling and get different task next time
        task.agent_id = self.agent_id
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        task = self.system_task_mgr.get_tasks_to_schedule(when)[0]
        task_2_id = task.id
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        self.assertNotEqual(task.id, task_1_id)
        self.assertFalse(self.system_task_mgr._is_db_update_completed)

        # Lose task after running and get different task next time
        task.agent_id = self.agent_id
        self.task_mgr.launch_tasks([task], now())
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        update = job_test_utils.create_task_status_update(
            task.id, task.agent_id, TaskStatusUpdate.LOST, now())
        self.task_mgr.handle_task_update(update)
        self.system_task_mgr.handle_task_update(update)
        task = self.system_task_mgr.get_tasks_to_schedule(when)[0]
        self.assertTrue(task.id.startswith(DB_UPDATE_TASK_ID_PREFIX))
        self.assertNotEqual(task.id, task_1_id)
        self.assertNotEqual(task.id, task_2_id)
        self.assertFalse(self.system_task_mgr._is_db_update_completed)