Exemplo n.º 1
0
    def test_update_all_cluster_resources(self):
        """Tests successfully updating the all cluster resources database in a cluster"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(22048.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        offer_3 = ResourceOffer(
            'offer_3', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(225.0),
                           Mem(1024.0),
                           Disk(22048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2, offer_3])

        resource_mgr.refresh_agent_resources([], now())

        resource_db = ClusterResources.objects.first()

        self.assertIsNone(resource_db)

        resource_mgr.update_all_cluster_resources()

        resource_db = ClusterResources.objects.first()

        self.assertIsNotNone(resource_db)

        self.assertEqual(resource_db.mem, 25120.0)
        self.assertEqual(resource_db.gpus, 0.0)
        self.assertEqual(resource_db.disk, 25120.0)
        self.assertEqual(resource_db.cpus, 252.0)
Exemplo n.º 2
0
    def test_schedule_system_tasks(self):
        """Tests successfully calling perform_scheduling() when scheduling system tasks"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        # Clear the queue
        Queue.objects.all().delete()
        # Set us up to schedule a database update task
        system_task_mgr._is_db_update_completed = False
        # Set us up to schedule 2 message handler tasks
        Scheduler.objects.update(num_message_handlers=2)
        scheduler_mgr.sync_with_database()

        scheduling_manager = SchedulingManager()

        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(
            num_tasks,
            3)  # Schedule database update task and 2 message handler tasks
Exemplo n.º 3
0
    def test_canceled_queue_model(self):
        """Tests successfully calling perform_scheduling() when a queue model has been canceled"""
        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        self.queue_1.is_canceled = True
        self.queue_1.save()

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks, 1)  # Scheduled non-canceled queued job execution
        # queue_1 should be canceled, queue_2 should be running, queue should be empty now
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 1)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 0)
        # Job execution manager should have a message for the canceled job execution
        messages = job_exe_mgr.get_messages()
        found_job_exe_end_message = False
        for message in messages:
            if message.type == 'create_job_exe_ends':
                found_job_exe_end_message = True
        self.assertTrue(found_job_exe_end_message)
Exemplo n.º 4
0
    def test_paused_job_type(self):
        """Tests calling perform_scheduling() when a job type is paused"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        self.queue_1.job_type.is_paused = True
        self.queue_1.job_type.save()
        job_type_mgr.sync_with_database()

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks,
                         1)  # Schedule queued job execution that is not paused
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 1)
Exemplo n.º 5
0
    def test_paused_scheduler(self):
        """Tests calling perform_scheduling() with a paused scheduler"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        Scheduler.objects.update(is_paused=True)
        scheduler_mgr.sync_with_database()
        node_mgr.sync_with_database(
            scheduler_mgr.config)  # Updates nodes with paused scheduler
        system_task_mgr._is_db_update_completed = False  # Make sure system tasks don't get scheduled

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(num_tasks, 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
Exemplo n.º 6
0
    def test_missing_job_types(self):
        """Tests calling perform_scheduling() when a queued job type has not been synced to the scheduler"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        scheduling_manager = SchedulingManager()

        # Clear out job type manager for scheduling
        with patch('scheduler.scheduling.manager.job_type_mgr.get_job_types'
                   ) as mock_get_job_types:
            mock_get_job_types.return_value = {}
            num_tasks = scheduling_manager.perform_scheduling(
                self._client, now())

        # Nothing should be scheduled
        self.assertEqual(num_tasks, 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
Exemplo n.º 7
0
    def test_max_resources(self):
        """Tests successfully calculating the max resources in a cluster"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(22048.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        offer_3 = ResourceOffer(
            'offer_3', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(225.0),
                           Mem(1024.0),
                           Disk(22048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2, offer_3])

        resource_mgr.refresh_agent_resources([], now())

        max = resource_mgr.get_max_available_resources()
        self.assertTrue(
            max.is_equal(
                NodeResources([Cpus(250.0),
                               Mem(22048.0),
                               Disk(24096.0)])))
Exemplo n.º 8
0
    def test_missing_workspace(self):
        """Tests calling perform_scheduling() when a queued job's workspace has not been synced to the scheduler"""

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        # Add workspaces to the queued jobs
        queue_1 = Queue.objects.get(id=self.queue_1.id)
        config = queue_1.get_execution_configuration()
        config.set_output_workspaces({'my_output': 'my_workspace'})
        queue_1.configuration = config.get_dict()
        queue_1.save()
        queue_2 = Queue.objects.get(id=self.queue_2.id)
        config = queue_2.get_execution_configuration()
        config.set_output_workspaces({'my_output': 'my_workspace'})
        queue_2.configuration = config.get_dict()
        queue_2.save()

        scheduling_manager = SchedulingManager()

        # Clear out workspace manager for scheduling
        with patch('scheduler.scheduling.manager.workspace_mgr.get_workspaces') as mock_get_workspaces:
            mock_get_workspaces.return_value = {}
            num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        # Nothing should be scheduled
        self.assertEqual(num_tasks, 0)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0)
        self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
Exemplo n.º 9
0
    def test_job_type_limit(self):
        """Tests calling perform_scheduling() with a job type limit"""
        Queue.objects.all().delete()
        job_type_with_limit = job_test_utils.create_seed_job_type()
        job_type_with_limit.max_scheduled = 4
        job_type_with_limit.save()
        running_job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_1.agent_id,
                                                                  job_type=job_type_with_limit, node=self.node_1)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        job_type_mgr.sync_with_database()
        # One job of this type is already running
        job_exe_mgr.schedule_job_exes([running_job_exe_1], [])

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(0.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(num_tasks, 3)  # One is already running, should only be able to schedule 3 more
Exemplo n.º 10
0
    def test_all_available_resources(self):
        """Tests successfully calculating the available resources in a cluster"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(22048.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        offer_3 = ResourceOffer(
            'offer_3', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(225.0),
                           Mem(1024.0),
                           Disk(22048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2, offer_3])

        resource_mgr.refresh_agent_resources([], now())

        all_available_resources = resource_mgr.get_all_available_resources()
        self.assertDictEqual(all_available_resources, {
            'mem': 25120.0,
            'gpus': 0.0,
            'disk': 25120.0,
            'cpus': 252.0
        })
Exemplo n.º 11
0
    def test_successful_schedule(self):
        """Tests successfully calling perform_scheduling()"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(1024.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])
        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks,
                         2)  # Schedule smaller queued job executions
        # Ensure job execution models are created and queue models are deleted
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 1)
        self.assertEqual(
            JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(
            JobExecution.objects.filter(
                job_id=self.queue_large.job_id).count(), 0)
        self.assertEqual(
            Queue.objects.filter(
                id__in=[self.queue_1.id, self.queue_2.id]).count(), 0)
Exemplo n.º 12
0
    def test_get_queued_resources(self):
        """Tests successfully getting queued resource information"""
        offer_1 = ResourceOffer(
            'offer_1', self.agent_1.agent_id, self.framework_id,
            NodeResources([Cpus(2.0), Mem(22048.0),
                           Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer(
            'offer_2', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        offer_3 = ResourceOffer(
            'offer_3', self.agent_2.agent_id, self.framework_id,
            NodeResources([Cpus(225.0),
                           Mem(1024.0),
                           Disk(22048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2, offer_3])

        resource_mgr.refresh_agent_resources([], now())

        resource_db = ClusterResources.objects.first()

        self.assertIsNone(resource_db)

        resource_mgr.update_all_cluster_resources()

        resource_db = ClusterResources.objects.first()

        self.assertIsNotNone(resource_db)

        self.assertEqual(resource_db.mem, 25120.0)
        self.assertEqual(resource_db.gpus, 0.0)
        self.assertEqual(resource_db.disk, 25120.0)
        self.assertEqual(resource_db.cpus, 252.0)

        queued_resources = resource_mgr.get_queued_resources()

        self.assertDictEqual(
            queued_resources, {
                "cluster_resources": {
                    'cpus': 252,
                    'disk': 25120,
                    'gpus': 0,
                    'mem': 25120
                },
                "queue_lengths": {
                    'PENDING': 0,
                    'QUEUED': 3,
                    'RUNNING': 0
                },
                "total_resources": {
                    'PENDING': {},
                    'QUEUED': {
                        'cpus': 3.0,
                        'mem': 384.0
                    },
                    'RUNNING': {}
                }
            })
Exemplo n.º 13
0
 def setUp(self):
     django.setup()
     resource_mgr.clear()
     self.agent_1 = Agent('agent_1', 'host_1')
     self.agent_2 = Agent('agent_2', 'host_2')
     self.framework_id = '1234'
     offer_1 = ResourceOffer(
         'offer_1', self.agent_1.agent_id, self.framework_id,
         NodeResources([Cpus(2.0), Mem(1024.0),
                        Disk(1024.0)]), now(), None)
     offer_2 = ResourceOffer(
         'offer_2', self.agent_2.agent_id, self.framework_id,
         NodeResources([Cpus(25.0), Mem(2048.0),
                        Disk(2048.0)]), now(), None)
     resource_mgr.add_new_offers([offer_1, offer_2])
     resource_mgr.refresh_agent_resources([], now())
Exemplo n.º 14
0
    def populate_queue_resources(apps, schema_editor):
        from node.resources.node_resources import NodeResources
        from node.resources.resource import Cpus, Disk, Mem

        # Go through all of the queue models and populate their new resources columns
        Queue = apps.get_model('queue', 'Queue')
        total_count = Queue.objects.all().count()
        print 'Populating new resources field for %s queue models' % str(
            total_count)
        done_count = 0
        batch_size = 1000
        while done_count < total_count:
            percent = (float(done_count) / float(total_count)) * 100.00
            print 'Completed %s of %s queue models (%f%%)' % (
                done_count, total_count, percent)
            batch_end = done_count + batch_size
            for queue in Queue.objects.order_by(
                    'job_exe_id')[done_count:batch_end]:
                cpus = queue.cpus_required
                mem = queue.mem_required
                disk = queue.disk_total_required
                resources = NodeResources([Cpus(cpus), Mem(mem), Disk(disk)])
                queue.resources = resources.get_json().get_dict()
                queue.save()
            done_count += batch_size
        print 'All %s queue models completed' % str(total_count)
Exemplo n.º 15
0
def create_queue(job_type=None, priority=1, timeout=3600, cpus_required=1.0, mem_required=512.0, disk_in_required=200.0,
                 disk_out_required=100.0, disk_total_required=300.0, gpus_required=0, queued=timezone.now()):
    """Creates a queue model for unit testing

    :param job_type: The job type
    :type job_type: :class:`job.models.JobType`
    :param priority: The priority
    :type priority: int
    :param timeout: The timeout
    :type timeout: int
    :param cpus_required: The number of CPUs required
    :type cpus_required: float
    :param mem_required: The memory required in MiB
    :type mem_required: float
    :param disk_in_required: The input disk space required in MiB
    :type disk_in_required: float
    :param disk_out_required: The output disk space required in MiB
    :type disk_out_required: float
    :param disk_total_required: The total disk space required in MiB
    :type disk_total_required: float
    :param gpus_required: The number of GPUs required
    :type gpus_required: float
    :param queued: The time the execution was queued
    :type queued: :class:`datetime.datetime`
    """

    job = job_test_utils.create_job(job_type=job_type, status='QUEUED')
    resources = NodeResources([Cpus(cpus_required), Mem(mem_required), Disk(disk_total_required), Gpus(gpus_required)])

    return Queue.objects.create(job_type=job.job_type, job=job, exe_num=job.num_exes, priority=priority,
                                timeout=timeout, input_file_size=disk_in_required,
                                interface=job.get_job_interface().get_dict(),
                                configuration=ExecutionConfiguration().get_dict(),
                                resources=resources.get_json().get_dict(), queued=queued)
Exemplo n.º 16
0
def job_get_resources(self):
    """Returns the resources required for this job

    :returns: The required resources
    :rtype: :class:`node.resources.node_resources.NodeResources`
    """

    resources = self.job_type.get_resources()

    # Calculate memory required in MiB rounded up to the nearest whole MiB
    multiplier = self.job_type.mem_mult_required
    const = self.job_type.mem_const_required
    disk_in_required = self.disk_in_required
    if not disk_in_required:
        disk_in_required = 0.0
    memory_mb = long(math.ceil(multiplier * disk_in_required + const))
    memory_required = max(memory_mb, MIN_MEM)

    # Calculate output space required in MiB rounded up to the nearest whole MiB
    multiplier = self.job_type.disk_out_mult_required
    const = self.job_type.disk_out_const_required
    output_size_mb = long(math.ceil(multiplier * disk_in_required + const))
    disk_out_required = max(output_size_mb, MIN_DISK)

    resources.add(
        NodeResources(
            [Mem(memory_required),
             Disk(disk_out_required + disk_in_required)]))
    return resources
Exemplo n.º 17
0
    def __init__(self, resources=None):
        """Constructor

        :param resources: The list of node resources
        :type resources: list
        """

        self._resources = {}  # {Name: Resource}
        if resources:
            for resource in resources:
                if resource.resource_type != 'SCALAR':
                    raise ScaleLogicBug(
                        'Resource type "%s" is not currently supported',
                        resource.resource_type)
                self._resources[resource.name] = resource

        # Make sure standard resources are defined
        if 'cpus' not in self._resources:
            self._resources['cpus'] = Cpus(0.0)
        if 'mem' not in self._resources:
            self._resources['mem'] = Mem(0.0)
        if 'disk' not in self._resources:
            self._resources['disk'] = Disk(0.0)
        if 'gpus' not in self._resources:
            self._resources['gpus'] = Gpus(0.0)
Exemplo n.º 18
0
    def test_node_with_new_agent_id(self):
        """Tests successfully calling perform_scheduling() when a node get a new agent ID"""
        # Host 2 gets new agent ID of agent_3
        node_mgr.lost_node(self.agent_2)
        node_mgr.register_agents([self.agent_3])
        node_mgr.sync_with_database(scheduler_mgr.config)

        offer = ResourceOffer(
            'offer', self.agent_3.agent_id, self.framework_id,
            NodeResources([Cpus(25.0), Mem(2048.0),
                           Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())

        self.assertEqual(num_tasks, 2)  # Schedule both queued job executions
        # Check that created tasks have the correct agent ID
        calls = self._client.method_calls
        # One for checking for driver and second for task launch
        self.assertEqual(2, len(calls))
        # Get tasks off 2nd calls (index
        mesos_tasks = calls[1][1][1]
        for mesos_task in mesos_tasks:
            self.assertEqual(self.agent_3.agent_id,
                             mesos_task['agent_id']['value'])
Exemplo n.º 19
0
    def test_no_default_workspace(self, mock_taskinfo):
        """Tests calling perform_scheduling() when a queued job's workspace has not been synced to the scheduler"""
        mock_taskinfo.return_value = MagicMock()

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now())
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now())
        resource_mgr.add_new_offers([offer_1, offer_2])
        
        # Add output data to the first queued job:
        # output data + no workspace defined = fail
        queue_1 = Queue.objects.get(id=self.queue_1.id)
        queue_1.get_job_interface().definition['output_data'] = [{'name': 'my_output', 'type': 'file'}]
        config = queue_1.get_execution_configuration()
        queue_1.configuration = config.get_dict()
        queue_1.save()
        # No output data + no workspace = pass
        queue_2 = Queue.objects.get(id=self.queue_2.id)
        config = queue_2.get_execution_configuration()
        queue_2.configuration = config.get_dict()
        queue_2.save()
        
        scheduling_manager = SchedulingManager()
        
        # Set a workspace on the manager
        with patch('scheduler.scheduling.manager.workspace_mgr.get_workspaces') as mock_get_workspaces:
            mock_get_workspaces.return_value = {
                'name': 'my_workspace',
                'title': 'My Workspace',
                'description': 'My workspaces',
                'is_active': True,
                'json_config': {'version': '1.0','broker': {'type': 'host','host_path': '/host/path'}},
            }
            num_tasks = scheduling_manager.perform_scheduling(self._driver, now())
        
        # Only queue_2 should be scheduled
        self.assertEqual(num_tasks, 1)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0)
        self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1)
        self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 1)
Exemplo n.º 20
0
    def setUp(self):
        django.setup()

        reset_error_cache()

        self.framework_id = '1234'
        Scheduler.objects.initialize_scheduler()
        Scheduler.objects.update(
            num_message_handlers=0
        )  # Prevent message handler tasks from scheduling
        self._client = MagicMock()

        scheduler_mgr.sync_with_database()
        scheduler_mgr.update_from_mesos(framework_id=self.framework_id)
        resource_mgr.clear()
        job_exe_mgr.clear()

        self.agent_1 = Agent('agent_1', 'host_1')
        self.agent_2 = Agent('agent_2', 'host_2')
        self.agent_3 = Agent('agent_3', 'host_2')
        node_mgr.clear()
        node_mgr.register_agents([self.agent_1, self.agent_2])
        node_mgr.sync_with_database(scheduler_mgr.config)
        # Ignore initial cleanup, health check, and image pull tasks
        for node in node_mgr.get_nodes():
            node._last_health_task = now()
            node._initial_cleanup_completed()
            node._is_image_pulled = True
            node._update_state()
            if node.agent_id == 'agent_1':
                self.node_1_id = node.id
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        self.node_1 = Node.objects.get(id=self.node_1_id)
        # Ignore system tasks
        system_task_mgr._is_db_update_completed = True

        self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0,
                                                     mem_required=1024.0,
                                                     disk_in_required=100.0,
                                                     disk_out_required=200.0,
                                                     disk_total_required=300.0)
        self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0,
                                                     mem_required=512.0,
                                                     disk_in_required=400.0,
                                                     disk_out_required=45.0,
                                                     disk_total_required=445.0)
        self.queue_large = queue_test_utils.create_queue(
            resources=NodeResources([Cpus(
                125.0), Mem(12048.0), Disk(12048.0)]))

        job_type_mgr.sync_with_database()
Exemplo n.º 21
0
    def test_successful_mesos_sync(self, mock_dcos):
        """Tests doing a successful sync with mesos"""
        mock_dcos.return_value.json.return_value = {
            'slaves': [{
                'id': 'agent_1',
                'resources': {
                    'cpus': 1.0,
                    'mem': 1024.0,
                    'disk': 1024.0
                }
            }]
        }

        host = host_address_from_mesos_url('http://leader.mesos:80/mesos')
        resource_mgr.sync_with_mesos(host)
        self.assertTrue(
            resource_mgr._agent_resources['agent_1']._total_resources.is_equal(
                NodeResources([Cpus(1.0), Mem(1024.0),
                               Disk(1024.0)])))
Exemplo n.º 22
0
    def test_node_with_new_agent_id(self, mock_taskinfo):
        """Tests successfully calling perform_scheduling() when a node get a new agent ID"""
        mock_taskinfo.return_value = MagicMock()

        # Host 2 gets new agent ID of agent_3
        node_mgr.lost_node(self.agent_2)
        node_mgr.register_agents([self.agent_3])
        node_mgr.sync_with_database(scheduler_mgr.config)

        offer = ResourceOffer('offer', self.agent_3.agent_id, self.framework_id,
                              NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now())
        resource_mgr.add_new_offers([offer])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._driver, now())
        self.assertEqual(num_tasks, 2)  # Schedule both queued job executions
        # Check that created tasks have the correct agent ID
        calls = self._driver.method_calls
        self.assertEqual(1, len(calls))
        mesos_tasks = calls[0][1][1]
        for mesos_task in mesos_tasks:
            self.assertEqual(self.agent_3.agent_id, mesos_task.slave_id.value)
Exemplo n.º 23
0
    def _configure_regular_job(config, job_exe, job_type):
        """Configures the given execution as a regular (non-system) job by adding pre and post tasks,
        input/output mounts, etc

        :param config: The execution configuration
        :type config: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        """

        config.create_tasks(['pull', 'pre', 'main', 'post'])
        config.add_to_task('pull', args=create_pull_command(job_type.docker_image))
        env_vars = {'SCALE_JOB_ID': unicode(job_exe.job_id), 'SCALE_EXE_NUM': unicode(job_exe.exe_num)}
        config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS, env_vars=env_vars)
        config.add_to_task('post', args=POST_TASK_COMMAND_ARGS, env_vars=env_vars)

        # Configure input workspaces
        ro_input_workspaces = {}
        rw_input_workspaces = {}
        for input_workspace in config.get_input_workspace_names():
            ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO)
            rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW)
        config.add_to_task('pre', workspaces=ro_input_workspaces)
        config.add_to_task('main', workspaces=ro_input_workspaces)
        # Post tasks have access to input workspaces in case input files need moved as part of parse results
        config.add_to_task('post', workspaces=rw_input_workspaces)

        # Configure output workspaces
        output_workspaces = {}
        for output_workspace in config.get_output_workspace_names():
            output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW)
        config.add_to_task('post', workspaces=output_workspaces)

        # Configure input/output mounts
        input_mnt_name = 'scale_input_mount'
        output_mnt_name = 'scale_output_mount'
        input_vol_name = get_job_exe_input_vol_name(job_exe)
        output_vol_name = get_job_exe_output_vol_name(job_exe)
        input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False)
        input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False)
        output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False)
        output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False)
        config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw})
        config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw})
        config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro})

        # Configure output directory
        # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style
        # job types
        env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH}
        args = config._get_task_dict('main')['args']
        args = JobInterface._replace_command_parameters(args, env_vars)
        config.add_to_task('main', args=args, env_vars=env_vars)

        # Configure task resources
        resources = job_exe.get_resources()
        # Pull-task and pre-task require full amount of resources
        config.add_to_task('pull', resources=resources)
        config.add_to_task('pre', resources=resources)
        # Main-task no longer requires the input file space
        resources.subtract(NodeResources([Disk(job_exe.input_file_size)]))
        config.add_to_task('main', resources=resources)
        # Post-task no longer requires any disk space
        resources.remove_resource('disk')
        config.add_to_task('post', resources=resources)
Exemplo n.º 24
0
    def _configure_regular_job(config, job_exe, job_type, system_logging_level):
        """Configures the given execution as a regular (non-system) job by adding pre and post tasks,
        input/output mounts, etc

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        :param system_logging_level: The logging level to be passed in through environment
        :type system_logging_level: str
        """

        config.create_tasks(['pull', 'pre', 'main', 'post'])
        config.add_to_task('pull', args=create_pull_command(job_exe.docker_image))
        config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS)
        config.add_to_task('post', args=POST_TASK_COMMAND_ARGS)

        # Configure input workspaces
        ro_input_workspaces = {}
        rw_input_workspaces = {}
        for input_workspace in config.get_input_workspace_names():
            ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO)
            rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW)
        config.add_to_task('pre', workspaces=ro_input_workspaces)
        config.add_to_task('main', workspaces=ro_input_workspaces)
        # Post tasks have access to input workspaces in case input files need moved as part of parse results
        config.add_to_task('post', workspaces=rw_input_workspaces)

        # Configure output workspaces
        output_workspaces = {}
        for output_workspace in config.get_output_workspace_names():
            output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW)
        config.add_to_task('post', workspaces=output_workspaces)

        # Configure input/output mounts
        input_mnt_name = 'scale_input_mount'
        output_mnt_name = 'scale_output_mount'
        input_vol_name = get_job_exe_input_vol_name(job_exe)
        output_vol_name = get_job_exe_output_vol_name(job_exe)
        input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False)
        input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False)
        output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False)
        output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False)

        config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw},
                           env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})
        config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw})
        config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro},
                           env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})

        # Configure output directory
        # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style
        # job types
        env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH}
        args = config._get_task_dict('main')['args']

        # TODO: Remove old-style logic for command parameters inject when with v6
        if not JobInterfaceSunset.is_seed_dict(job_type.manifest):
            args = JobInterface.replace_command_parameters(args, env_vars)
        else:
            args = environment_expansion(env_vars, args, remove_extras=True)
        config.add_to_task('main', args=args, env_vars=env_vars)

        # Configure task resources
        resources = job_exe.get_resources()
        # Pull-task and pre-task require full amount of resources
        config.add_to_task('pull', resources=resources)
        config.add_to_task('pre', resources=resources)
        # Main-task no longer requires the input file space
        resources.subtract(NodeResources([Disk(job_exe.input_file_size)]))
        config.add_to_task('main', resources=resources)
        # Post-task no longer requires any disk space
        resources.remove_resource('disk')
        config.add_to_task('post', resources=resources)
Exemplo n.º 25
0
    def test_add_allocated_offers(self):
        """Tests calling add_allocated_offers() when there are enough resources for everything"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        health_task = HealthTask('1234', 'agent_1')
        pull_task = PullTask('1234', 'agent_1')
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        node.get_next_tasks = MagicMock()
        node.get_next_tasks.return_value = [health_task, pull_task]
        offered_resources = NodeResources([Cpus(100.0), Mem(500.0)])
        watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)])
        resource_set = ResourceSet(offered_resources, NodeResources(),
                                   watermark_resources)
        scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set)
        running_job_exe_1 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(1.0), Mem(10.0)]))
        running_job_exe_2 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(2.0), Mem(20.0)]))
        node_task_resources = NodeResources()
        node_task_resources.add(health_task.get_resources())
        node_task_resources.add(pull_task.get_resources())
        all_required_resources = NodeResources()
        all_required_resources.add(node_task_resources)
        all_required_resources.add(
            running_job_exe_1.next_task().get_resources())
        all_required_resources.add(
            running_job_exe_2.next_task().get_resources())
        expected_remaining_resources = NodeResources()
        expected_remaining_resources.add(offered_resources)
        expected_remaining_resources.subtract(all_required_resources)

        # Set up node with node tasks and job exes (there would never be queued job exes since they would be scheduled
        # before add_allocated_offers() was called
        scheduling_node.accept_node_tasks(now(), [])
        scheduling_node.accept_job_exe_next_task(running_job_exe_1, [])
        scheduling_node.accept_job_exe_next_task(running_job_exe_2, [])
        self.assertEqual(len(scheduling_node.allocated_tasks), 2)
        self.assertEqual(len(scheduling_node._allocated_running_job_exes), 2)
        self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0)
        self.assertTrue(
            scheduling_node.allocated_resources.is_equal(
                all_required_resources))

        # Set up offers (we get back more than we need)
        offer_1 = ResourceOffer('offer_1', 'agent_1', '1234',
                                NodeResources([Cpus(1.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', 'agent_1', '1234',
                                all_required_resources, now(), None)
        offer_3 = ResourceOffer(
            'offer_3', 'agent_1', '1234',
            NodeResources([Cpus(7.5), Mem(600.0),
                           Disk(800.0)]), now(), None)

        scheduling_node.add_allocated_offers([offer_1, offer_2, offer_3])
        self.assertListEqual(scheduling_node.allocated_offers,
                             [offer_1, offer_2, offer_3])
        # All allocated tasks and job exes should still be here
        self.assertEqual(len(scheduling_node.allocated_tasks), 2)
        self.assertEqual(len(scheduling_node._allocated_running_job_exes), 2)
        self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0)
        self.assertTrue(
            scheduling_node.allocated_resources.is_equal(
                all_required_resources))
        self.assertTrue(
            scheduling_node._remaining_resources.is_equal(
                expected_remaining_resources))