示例#1
0
    def test_job_exe_no_offers(self):
        """Tests the NodeManager where a node is running an exe and has not given offers to Scale in 1 hour.
           Expected behavior: The node is scheduler and DB are in sync and the node is still active"""

        last_offer = now() - datetime.timedelta(hours=1)
        node_mgr = NodeManager()
        node_mgr.register_agents([self.agent_1])
        node_mgr.sync_with_database(scheduler_mgr.config)

        # Add job to node
        job_test_utils.create_running_job_exe(agent_id=self.agent_1,
                                              node=self.node_1)

        # Set last_offer_received to 1 hour ago
        Node.objects.filter(id=self.node_1.id).update(
            last_offer_received=last_offer)

        # This inspects what nodes are running jobs and what nodes need to be removed if they
        # have not sent offers in the last 5 minutes
        node_mgr.sync_with_database(scheduler_mgr.config)

        # Get the DB and Scheduler state and make sure they are consistent
        db_record = Node.objects.get(id=self.node_1.id)
        scheduler_record = node_mgr.get_node(self.agent_1.agent_id)

        self.assertEqual(db_record.is_active, scheduler_record._is_active,
                         True)
示例#2
0
    def test_lost_known_node(self, mock_get_slaves):
        """Tests the NodeManager where a known node was lost"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)
        manager.lost_node(self.node_agent_2)

        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
        node_1 = manager.get_node(self.node_agent_1)
        self.assertTrue(node_1.is_online)
        node_2 = manager.get_node(self.node_agent_2)
        self.assertFalse(node_2.is_online)
示例#3
0
    def test_lost_unknown_node(self, mock_get_slaves):
        """Tests the NodeManager where an unknown node was lost"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.lost_node(self.node_agent_2)
        manager.sync_with_database('master_host', 5050)

        # Unknown node 2 was lost before syncing with database, it should not appear in the manager
        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 1)
        node_1 = manager.get_node(self.node_agent_1)
        self.assertEqual(node_1.hostname, self.node_1.hostname)
        self.assertTrue(node_1.is_online)
        self.assertIsNone(manager.get_node(self.node_agent_2))
示例#4
0
    def test_successful_update(self):
        """Tests doing a successful database update"""

        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)

        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
示例#5
0
    def test_sync_with_renamed_node(self):
        """Tests doing a successful database update when a node model has its hostname changed in the database"""

        # Initial sync
        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)

        self.node_1.hostname = 'new_host_1'
        self.node_1.save()

        # No exception is success
        manager.sync_with_database(scheduler_mgr.config)
示例#6
0
    def test_get_initial_cleanup_tasks(self):
        """Tests getting initial cleanup tasks from the manager"""

        when = now()
        manager = NodeManager()
        tasks = manager.get_next_tasks(when)
        self.assertListEqual(tasks, [])  # No tasks yet due to no nodes

        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)
        for node in manager.get_nodes():
            node._last_health_task = when

        tasks = manager.get_next_tasks(when)
        self.assertEqual(len(tasks), 2)
        for task in tasks:
            self.assertTrue(task.is_initial_cleanup)
示例#7
0
    def test_successful_update(self, mock_get_slaves):
        """Tests doing a successful database update"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)

        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
示例#8
0
    def test_sync_with_renamed_node(self, mock_get_slaves):
        """Tests doing a successful database update when a node model has its hostname changed in the database"""

        mock_get_slaves.return_value = self.slave_infos

        # Initial sync
        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)

        self.node_1.hostname = 'new_host_1'
        self.node_1.save()

        # No exception is success
        manager.sync_with_database('master_host', 5050)
示例#9
0
    def test_get_initial_cleanup_tasks(self, mock_get_slaves):
        """Tests getting initial cleanup tasks from the manager"""

        mock_get_slaves.return_value = self.slave_infos

        when = now()
        manager = NodeManager()
        tasks = manager.get_next_tasks(when)
        self.assertListEqual(tasks, [])  # No tasks yet due to no nodes

        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)
        for node in manager.get_nodes():
            node._last_heath_task = when

        tasks = manager.get_next_tasks(when)
        self.assertEqual(len(tasks), 2)
        for task in tasks:
            self.assertTrue(task.is_initial_cleanup)
示例#10
0
    def test_get_pull_tasks(self):
        """Tests getting Docker pull tasks from the manager"""

        when = now()
        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)
        for node in manager.get_nodes():
            node._last_health_task = when
            node._initial_cleanup_completed()
            node._update_state()

        tasks = manager.get_next_tasks(when)
        self.assertEqual(len(tasks), 2)
        for task in tasks:
            self.assertTrue(isinstance(task, PullTask))
示例#11
0
    def test_sync_node_model(self):
        """Tests doing a successful database update when a node model has been updated in the database"""

        # Initial sync
        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)

        # Database model changes to inactive
        self.node_1.is_active = False
        self.node_1.save()

        # Sync with database
        manager.sync_with_database(scheduler_mgr.config)

        found_node_1 = False
        for node in manager.get_nodes():
            if node.hostname == self.node_1.hostname:
                found_node_1 = True
                self.assertFalse(node.is_active)
        self.assertTrue(found_node_1)
示例#12
0
    def test_no_job_exe_no_offers(self):
        """Tests the NodeManager where a node is not running an exe and has not given offers to Scale in 1 hour.
           Expected behavior: The node is deleted and the DB model is update with is_active=False"""

        last_offer = now() - datetime.timedelta(hours=1)
        node_mgr = NodeManager()
        node_mgr.register_agents([self.agent_1])
        node_mgr.sync_with_database(scheduler_mgr.config)

        # Set last_offer_received to 1 hour ago
        Node.objects.filter(id=self.node_1.id).update(last_offer_received=last_offer)

        # This inspects what nodes are running jobs and what nodes need to be removed if they
        # have not sent offers in the last 5 minutes
        node_mgr.sync_with_database(scheduler_mgr.config)

        # Get the DB state
        db_record = Node.objects.get(id=self.node_1.id)
        
        self.assertIsNone(node_mgr.get_node(self.agent_1.agent_id))
        self.assertEqual(db_record.is_active, False)
示例#13
0
    def test_get_pull_tasks(self, mock_get_slaves):
        """Tests getting Docker pull tasks from the manager"""

        mock_get_slaves.return_value = self.slave_infos

        when = now()
        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)
        for node in manager.get_nodes():
            node._last_heath_task = when
            node._initial_cleanup_completed()
            node._update_state()

        tasks = manager.get_next_tasks(when)
        self.assertEqual(len(tasks), 2)
        for task in tasks:
            self.assertTrue(isinstance(task, PullTask))
示例#14
0
    def test_successful_update(self, mock_get_slaves):
        """Tests doing a successful database update"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)

        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
示例#15
0
    def test_sync_node_model(self, mock_get_slaves):
        """Tests doing a successful database update when a node model has been updated in the database"""

        mock_get_slaves.return_value = self.slave_infos

        # Initial sync
        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)

        # Database model changes to inactive
        self.node_1.is_active = False
        self.node_1.save()

        # Sync with database
        manager.sync_with_database('master_host', 5050)

        found_node_1 = False
        for node in manager.get_nodes():
            if node.hostname == self.node_1.hostname:
                found_node_1 = True
                self.assertFalse(node.is_active)
        self.assertTrue(found_node_1)
示例#16
0
    def test_lost_unknown_node(self):
        """Tests the NodeManager where an unknown node was lost"""

        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.lost_node(self.agent_2.agent_id)
        manager.sync_with_database(scheduler_mgr.config)

        # Unknown node 2 was lost before syncing with database, it should not appear in the manager
        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 1)
        node_1 = manager.get_node(self.agent_1.agent_id)
        self.assertEqual(node_1.hostname, self.node_1.hostname)
        self.assertTrue(node_1._is_online)
        self.assertIsNone(manager.get_node(self.agent_2.agent_id))
示例#17
0
    def test_sync_and_remove_node_model(self):
        """Tests doing a successful database update when a node model should be removed from the scheduler"""

        # Initial sync
        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)

        # Database model changes to inactive
        self.node_1.is_active = False
        self.node_1.save()

        # Node is lost
        manager.lost_node(self.agent_1.agent_id)

        # Sync with database
        manager.sync_with_database(scheduler_mgr.config)

        # Make sure node 1 is gone
        found_node_1 = False
        for node in manager.get_nodes():
            if node.hostname == self.node_1.hostname:
                found_node_1 = True
        self.assertFalse(found_node_1)
示例#18
0
    def test_lost_known_node(self, mock_get_slaves):
        """Tests the NodeManager where a known node was lost"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)
        manager.lost_node(self.node_agent_2)

        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
        node_1 = manager.get_node(self.node_agent_1)
        self.assertTrue(node_1._is_online)
        node_2 = manager.get_node(self.node_agent_2)
        self.assertFalse(node_2._is_online)
示例#19
0
    def test_lost_unknown_node(self, mock_get_slaves):
        """Tests the NodeManager where an unknown node was lost"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.lost_node(self.node_agent_2)
        manager.sync_with_database('master_host', 5050)

        # Unknown node 2 was lost before syncing with database, it should not appear in the manager
        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 1)
        node_1 = manager.get_node(self.node_agent_1)
        self.assertEqual(node_1.hostname, self.node_1.hostname)
        self.assertTrue(node_1._is_online)
        self.assertIsNone(manager.get_node(self.node_agent_2))
示例#20
0
    def test_job_exe_clean_task(self):
        """Tests the NodeManager where a cleanup task is returned to clean up a job execution"""

        when = now()
        node_mgr = NodeManager()
        node_mgr.register_agents([self.agent_1, self.agent_2])
        node_mgr.sync_with_database(scheduler_mgr.config)
        cleanup_mgr = CleanupManager()
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        tasks = node_mgr.get_next_tasks(when)

        task_mgr = TaskManager()
        # Complete initial cleanup tasks
        for task in tasks:
            task_mgr.launch_tasks([task], now())
            update = job_test_utils.create_task_status_update(
                task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
            task_mgr.handle_task_update(update)
            node_mgr.handle_task_update(update)

        # Mark image pull done to get rid of image tasks
        for node in node_mgr.get_nodes():
            node._image_pull_completed()
            node._update_state()

        job_exe = job_test_utils.create_running_job_exe(agent_id=self.agent_1,
                                                        node=self.node_1)
        # Add a job execution to clean up and get the cleanup task for it
        cleanup_mgr.add_job_execution(job_exe)
        tasks = node_mgr.get_next_tasks(when)
        self.assertEqual(len(tasks), 1)
        task = tasks[0]
        self.assertEqual(task.agent_id, self.agent_1.agent_id)
        self.assertFalse(task.is_initial_cleanup)
        self.assertEqual(len(task.job_exes), 1)
示例#21
0
    def test_sync_and_remove_node_model(self, mock_get_slaves):
        """Tests doing a successful database update when a node model should be removed from the scheduler"""

        mock_get_slaves.return_value = self.slave_infos

        # Initial sync
        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)

        # Database model changes to inactive
        self.node_1.is_active = False
        self.node_1.save()

        # Node is lost
        manager.lost_node(self.node_agent_1)

        # Sync with database
        manager.sync_with_database('master_host', 5050)

        # Make sure node 1 is gone
        found_node_1 = False
        for node in manager.get_nodes():
            if node.hostname == self.node_1.hostname:
                found_node_1 = True
        self.assertFalse(found_node_1)
示例#22
0
    def test_pull_task_change_agent_id(self):
        """Tests the NodeManager where a node's agent ID changes during a pull task"""

        when = now()
        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)
        for node in manager.get_nodes():
            node._last_health_task = when
            node._initial_cleanup_completed()
            node._update_state()
        tasks = manager.get_next_tasks(when)

        task_mgr = TaskManager()
        task_2 = None
        for task in tasks:
            task_mgr.launch_tasks([task], when)
            if task.agent_id == self.agent_2.agent_id:
                task_2 = task

        # Node 2 changes agent ID to 3
        manager.lost_node(self.agent_2.agent_id)
        manager.register_agents([self.agent_3])
        manager.sync_with_database(scheduler_mgr.config)
        for node in manager.get_nodes():
            node._last_health_task = when
            node._initial_cleanup_completed()
            node._update_state()

        # Should get new Docker pull task for node 2
        tasks = manager.get_next_tasks(when)
        self.assertEqual(len(tasks), 1)
        new_task_2 = tasks[0]
        self.assertEqual(new_task_2.agent_id, self.agent_3.agent_id)

        # Task update comes back for original node 2 Docker pull task, manager should ignore with no exception
        update = job_test_utils.create_task_status_update(
            task_2.id, task_2.agent_id, TaskStatusUpdate.FAILED, when)
        task_mgr.handle_task_update(update)
        manager.handle_task_update(update)
示例#23
0
    def test_change_agent_id_with_inactive_node(self):
        """Tests the NodeManager where a registered node changes its agent ID, and the node is inactive"""

        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)

        # Node 2 is now inactive
        Node.objects.filter(
            id=manager.get_node(self.agent_2.agent_id).id).update(
                is_active=False)
        manager.sync_with_database(scheduler_mgr.config)

        manager.lost_node(self.agent_2.agent_id)
        manager.register_agents([self.agent_3])
        manager.sync_with_database(scheduler_mgr.config)

        # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online
        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
        node_1 = manager.get_node(self.agent_1.agent_id)
        self.assertEqual(node_1.hostname, self.node_1.hostname)
        self.assertTrue(node_1._is_online)
        self.assertIsNone(manager.get_node(self.agent_2.agent_id))
        node_2 = manager.get_node(self.agent_3.agent_id)
        self.assertEqual(node_2.hostname, 'host_2')
        self.assertTrue(node_2._is_online)
        self.assertFalse(node_2._is_active)
示例#24
0
    def test_change_agent_id(self):
        """Tests the NodeManager where a registered node changes its agent ID"""

        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)

        manager.lost_node(self.agent_2.agent_id)
        manager.register_agents([self.agent_3])
        manager.sync_with_database(scheduler_mgr.config)

        # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online
        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
        node_1 = manager.get_node(self.agent_1.agent_id)
        self.assertEqual(node_1.hostname, self.node_1.hostname)
        self.assertTrue(node_1._is_online)
        self.assertIsNone(manager.get_node(self.agent_2.agent_id))
        node_2 = manager.get_node(self.agent_3.agent_id)
        self.assertEqual(node_2.hostname, 'host_2')
        self.assertTrue(node_2._is_online)
示例#25
0
    def test_lost_known_node(self):
        """Tests the NodeManager where a known node was lost"""

        manager = NodeManager()
        manager.register_agents([self.agent_1, self.agent_2])
        manager.sync_with_database(scheduler_mgr.config)
        manager.lost_node(self.agent_2.agent_id)

        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
        node_1 = manager.get_node(self.agent_1.agent_id)
        self.assertTrue(node_1._is_online)
        node_2 = manager.get_node(self.agent_2.agent_id)
        self.assertFalse(node_2._is_online)
示例#26
0
    def test_change_agent_id(self, mock_get_slaves):
        """Tests the NodeManager where a registered node changes its agent ID"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)

        mock_get_slaves.return_value = self.slave_infos_updated
        manager.lost_node(self.node_agent_2)
        manager.register_agent_ids([self.node_agent_3])
        manager.sync_with_database('master_host', 5050)

        # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online
        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
        node_1 = manager.get_node(self.node_agent_1)
        self.assertEqual(node_1.hostname, self.node_1.hostname)
        self.assertTrue(node_1.is_online)
        self.assertIsNone(manager.get_node(self.node_agent_2))
        node_2 = manager.get_node(self.node_agent_3)
        self.assertEqual(node_2.hostname, 'host_2')
        self.assertTrue(node_2.is_online)
示例#27
0
    def test_change_agent_id_with_inactive_node(self, mock_get_slaves):
        """Tests the NodeManager where a registered node changes its agent ID, and the node is inactive"""

        mock_get_slaves.return_value = self.slave_infos

        manager = NodeManager()
        manager.register_agent_ids([self.node_agent_1, self.node_agent_2])
        manager.sync_with_database('master_host', 5050)

        # Node 2 is now inactive
        Node.objects.filter(id=manager.get_node(self.node_agent_2).id).update(is_active=False)
        manager.sync_with_database('master_host', 5050)

        mock_get_slaves.return_value = self.slave_infos_updated
        manager.lost_node(self.node_agent_2)
        manager.register_agent_ids([self.node_agent_3])
        manager.sync_with_database('master_host', 5050)

        # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online
        nodes = manager.get_nodes()
        self.assertEqual(len(nodes), 2)
        node_1 = manager.get_node(self.node_agent_1)
        self.assertEqual(node_1.hostname, self.node_1.hostname)
        self.assertTrue(node_1._is_online)
        self.assertIsNone(manager.get_node(self.node_agent_2))
        node_2 = manager.get_node(self.node_agent_3)
        self.assertEqual(node_2.hostname, 'host_2')
        self.assertTrue(node_2._is_online)
        self.assertFalse(node_2._is_active)