示例#1
0
def _test_bandwidth(claim):
    host = claim.tracker.host
    if not claim.tracker.pci_tracker:
        LOG.debug("pci_tracker is null.")
        return
    pci_pools = claim.tracker.pci_tracker.pci_stats.pools

    request_bandwidth = {}
    pre_request_metadata = claim.instance.get('metadata')
    if isinstance(pre_request_metadata, list):
        request_metadata = {}
        for metadata in pre_request_metadata:
            request_metadata[metadata['key']] = metadata['value']
    else:
        request_metadata = pre_request_metadata or {}
    for phy_net, __, bandwidth in _get_nw_info_from_metadata(
            claim, request_metadata):
        if phy_net in request_bandwidth:
            request_bandwidth[phy_net] += int(bandwidth)
        else:
            request_bandwidth[phy_net] = int(bandwidth)
    if len(request_bandwidth) == 0:
        return

    total_bandwidth = {}
    for pool in pci_pools:
        total_bandwidth[pool['physical_network']] = int(pool['bandwidths'])

    used_bandwidth = {}
    instance_list = conductor.API().instance_get_all_by_host(
        claim.context, host)
    for instance in instance_list:
        if 'deleting' == instance.get('task_state'):
            continue
        if claim.instance.get('uuid') == instance.get('uuid'):
            continue
        metadata_dict = {}
        for metadata in instance.get('metadata', []):
            metadata_dict[metadata['key']] = metadata['value']
        for phy_net, __, bandwidth in _get_nw_info_from_metadata(
                claim, metadata_dict):
            if phy_net in used_bandwidth:
                used_bandwidth[phy_net] += int(bandwidth)
            else:
                used_bandwidth[phy_net] = int(bandwidth)

    for phy_net, bandwidth in request_bandwidth.iteritems():
        if phy_net not in total_bandwidth:
            raise exception.ComputeResourcesUnavailable(
                reason="Not Enough Bandwidth")
        free = total_bandwidth[phy_net] - used_bandwidth.get(phy_net, 0)
        if bandwidth > free:
            raise exception.ComputeResourcesUnavailable(
                reason="Not Enough Bandwidth")
示例#2
0
    def _claim_test(self, resources, limits=None):
        """Test if this claim can be satisfied given available resources and
        optional oversubscription limits

        This should be called before the compute node actually consumes the
        resources required to execute the claim.

        :param resources: available local compute node resources
        :returns: Return true if resources are available to claim.
        """
        if not limits:
            limits = {}

        # If an individual limit is None, the resource will be considered
        # unlimited:
        memory_mb_limit = limits.get('memory_mb')
        disk_gb_limit = limits.get('disk_gb')
        vcpus_limit = limits.get('vcpu')
        closids_limit = limits.get('closids')
        numa_topology_limit = limits.get('numa_topology')

        # WRS: Ensure print formats display even with None value.
        LOG.info(
            "Attempting claim on node %(node)s: "
            "memory %(memory_mb)s MB, "
            "disk %(disk_gb)s GB, vcpus %(vcpus)s CPU, "
            "closids %(closids)s", {
                'node': self.nodename,
                'memory_mb': self.memory_mb,
                'disk_gb': self.disk_gb,
                'vcpus': self.vcpus,
                'closids': self.closids,
            },
            instance=self.instance)

        reasons = [
            self._test_memory(resources, memory_mb_limit),
            self._test_disk(resources, disk_gb_limit),
            self._test_vcpus(resources, vcpus_limit)
        ]
        if utils.is_libvirt_compute(resources):
            reasons.extend([
                self._test_closids(resources, closids_limit),
                self._test_numa_topology(resources, numa_topology_limit),
                self._test_pci()
            ])
        reasons = [r for r in reasons if r is not None]
        if len(reasons) > 0:
            LOG.error('Claim unsuccessful on node %s: %s',
                      self.nodename,
                      "; ".join(reasons),
                      instance=self.instance)
            raise exception.ComputeResourcesUnavailable(
                reason="; ".join(reasons))

        # WRS: Log the claim attributes
        LOG.info('Claim successful on node %s: %s',
                 self.nodename,
                 self,
                 instance=self.instance)
示例#3
0
    def _claim_test(self, resources, limits=None):
        """Test if this claim can be satisfied given available resources and
        optional oversubscription limits

        This should be called before the compute node actually consumes the
        resources required to execute the claim.

        :param resources: available local compute node resources
        :param limits: Optional limits to test, either dict or
            objects.SchedulerLimits
        :raises: exception.ComputeResourcesUnavailable if any resource claim
            fails
        """
        if not limits:
            limits = {}

        if isinstance(limits, objects.SchedulerLimits):
            limits = limits.to_dict()

        # If an individual limit is None, the resource will be considered
        # unlimited:
        numa_topology_limit = limits.get('numa_topology')

        reasons = [self._test_numa_topology(resources, numa_topology_limit),
                   self._test_pci()]
        reasons = [r for r in reasons if r is not None]
        if len(reasons) > 0:
            raise exception.ComputeResourcesUnavailable(reason=
                    "; ".join(reasons))

        LOG.info('Claim successful on node %s', self.nodename,
                 instance=self.instance)
示例#4
0
 def fake_instance_claim(_self, _context, _inst, nodename, *a, **kw):
     # Before triggering the reschedule to the other host, max out the
     # capacity on the alternate host.
     alt_nodename = 'host0' if nodename == 'host1' else 'host1'
     rp_uuid = self._get_provider_uuid_by_host(alt_nodename)
     inventories = self._get_provider_inventory(rp_uuid)
     # Fake some other consumer taking all of the VCPU on the alt host.
     # Since we set cpu_allocation_ratio=1.0 the total is the total
     # capacity for VCPU on the host.
     total_vcpu = inventories['VCPU']['total']
     alt_consumer = '7d32d0bc-af16-44b2-8019-a24925d76152'
     allocs = {
         'allocations': {
             rp_uuid: {
                 'resources': {
                     'VCPU': total_vcpu
                 }
             }
         },
         'project_id': self.api.project_id,
         'user_id': self.api.project_id
     }
     resp = self.placement_api.put('/allocations/%s' % alt_consumer,
                                   allocs,
                                   version='1.12')
     self.assertEqual(204, resp.status, resp.content)
     raise exception.ComputeResourcesUnavailable(reason='overhead!')
示例#5
0
    def _claim_test(self, resources, limits=None):
        """Test if this claim can be satisfied given available resources and
        optional oversubscription limits

        This should be called before the compute node actually consumes the
        resources required to execute the claim.

        :param resources: available local compute node resources
        :returns: Return true if resources are available to claim.
        """
        if not limits:
            limits = {}

        # If an individual limit is None, the resource will be considered
        # unlimited:
        memory_mb_limit = limits.get('memory_mb')
        disk_gb_limit = limits.get('disk_gb')

        msg = _("Attempting claim: memory %(memory_mb)d MB, disk %(disk_gb)d "
                "GB")
        params = {'memory_mb': self.memory_mb, 'disk_gb': self.disk_gb}
        LOG.audit(msg % params, instance=self.instance)

        reasons = [
            self._test_memory(resources, memory_mb_limit),
            self._test_disk(resources, disk_gb_limit),
            self._test_pci()
        ]
        reasons = reasons + self._test_ext_resources(limits)
        reasons = [r for r in reasons if r is not None]
        if len(reasons) > 0:
            raise exception.ComputeResourcesUnavailable(
                reason="; ".join(reasons))

        LOG.audit(_('Claim successful'), instance=self.instance)
 def fake_instance_claim(_self, *args, **kwargs):
     self.attempts += 1
     if self.failed_host is None:
         # Set the failed_host value to the ResourceTracker.host value.
         self.failed_host = _self.host
         raise exception.ComputeResourcesUnavailable(
             reason='failure on host %s' % _self.host)
     return real_instance_claim(_self, *args, **kwargs)
示例#7
0
    def instance_claim(self, context, instance_ref, limits=None):
        """Indicate that some resources are needed for an upcoming compute
        instance build operation.

        This should be called before the compute node is about to perform
        an instance build operation that will consume additional resources.

        :param context: security context
        :param instance_ref: instance to reserve resources for
        :param limits: Dict of oversubscription limits for memory, disk,
                       and CPUs.
        :returns: A Claim ticket representing the reserved resources.  It can
                  be used to revert the resource usage if an error occurs
                  during the instance build.
        """
        if self.disabled:
            # compute_driver doesn't support resource tracking, just
            # set the 'host' and node fields and continue the build:
            self._set_instance_host_and_node(context, instance_ref)
            return claims.NopClaim()

        # sanity checks:
        if instance_ref['host']:
            LOG.warning(_("Host field should not be set on the instance until "
                          "resources have been claimed."),
                        instance=instance_ref)

        if instance_ref['node']:
            LOG.warning(_("Node field should not be set on the instance "
                          "until resources have been claimed."),
                        instance=instance_ref)

        # get memory overhead required to build this instance:
        overhead = self.driver.estimate_instance_overhead(instance_ref)
        LOG.debug(
            _("Memory overhead for %(flavor)d MB instance; %(overhead)d "
              "MB"), {
                  'flavor': instance_ref['memory_mb'],
                  'overhead': overhead['memory_mb']
              })

        claim = claims.Claim(instance_ref, self, overhead=overhead)

        if claim.test(self.compute_node, limits):

            self._set_instance_host_and_node(context, instance_ref)

            # Mark resources in-use and update stats
            self._update_usage_from_instance(self.compute_node, instance_ref)

            elevated = context.elevated()
            # persist changes to the compute node:
            self._update(elevated, self.compute_node)

            return claim

        else:
            raise exception.ComputeResourcesUnavailable()
示例#8
0
 def fake_orig_claim(_self, context, instance, instance_type, nodename,
                     *args, **kwargs):
     if not claim_calls:
         claim_calls.append(nodename)
         raise exception.ComputeResourcesUnavailable(
             reason='Simulated claim failure')
     else:
         claim_calls.append(nodename)
         return orig_claim(_self, context, instance, instance_type,
                           nodename, *args, **kwargs)
示例#9
0
文件: fake.py 项目: wangyi4/stx-nova
 def spawn(self, context, instance, image_meta, injected_files,
           admin_password, network_info=None, block_device_info=None):
     if not self.rescheduled.get(instance.uuid, False):
         # We only reschedule on the first time something hits spawn().
         self.rescheduled[instance.uuid] = True
         raise exception.ComputeResourcesUnavailable(
             reason='FakeRescheduleDriver')
     super(FakeRescheduleDriver, self).spawn(
         context, instance, image_meta, injected_files,
         admin_password, network_info, block_device_info)
示例#10
0
    def _claim_test(self, resources, limits=None):
        """Test if this claim can be satisfied given available resources and
        optional oversubscription limits

        This should be called before the compute node actually consumes the
        resources required to execute the claim.

        :param resources: available local compute node resources
        :param limits: Optional limits to test, either dict or
            objects.SchedulerLimits
        :raises: exception.ComputeResourcesUnavailable if any resource claim
            fails
        """
        if not limits:
            limits = {}

        if isinstance(limits, objects.SchedulerLimits):
            limits = limits.to_dict()

        # If an individual limit is None, the resource will be considered
        # unlimited:
        memory_mb_limit = limits.get('memory_mb')
        disk_gb_limit = limits.get('disk_gb')
        vcpus_limit = limits.get('vcpu')
        numa_topology_limit = limits.get('numa_topology')

        LOG.info(
            "Attempting claim on node %(node)s: "
            "memory %(memory_mb)d MB, "
            "disk %(disk_gb)d GB, vcpus %(vcpus)d CPU", {
                'node': self.nodename,
                'memory_mb': self.memory_mb,
                'disk_gb': self.disk_gb,
                'vcpus': self.vcpus
            },
            instance=self.instance)

        reasons = [
            self._test_memory(resources, memory_mb_limit),
            self._test_disk(resources, disk_gb_limit),
            self._test_vcpus(resources, vcpus_limit),
            self._test_numa_topology(resources, numa_topology_limit),
            self._test_pci()
        ]
        reasons = [r for r in reasons if r is not None]
        if len(reasons) > 0:
            raise exception.ComputeResourcesUnavailable(
                reason="; ".join(reasons))

        LOG.info('Claim successful on node %s',
                 self.nodename,
                 instance=self.instance)
示例#11
0
    def resize_claim(self, context, instance, instance_type, limits=None):
        """Indicate that resources are needed for a resize operation to this
        compute host.
        :param context: security context
        :param instance: instance object to reserve resources for
        :param instance_type: new instance_type being resized to
        :param limits: Dict of oversubscription limits for memory, disk,
                       and CPUs.
        :returns: A Claim ticket representing the reserved resources.  This
                  should be turned into finalize  a resource claim or free
                  resources after the compute operation is finished.
        """
        if self.disabled:
            # compute_driver doesn't support resource tracking, just
            # generate the migration record and continue the resize:
            migration = self._create_migration(context, instance,
                                               instance_type)
            return claims.NopClaim(migration=migration)

        # get memory overhead required to build this instance:
        overhead = self.driver.estimate_instance_overhead(instance_type)
        LOG.debug(
            _("Memory overhead for %(flavor)d MB instance; %(overhead)d "
              "MB"), {
                  'flavor': instance_type['memory_mb'],
                  'overhead': overhead['memory_mb']
              })

        instance_ref = obj_base.obj_to_primitive(instance)
        claim = claims.ResizeClaim(instance_ref,
                                   instance_type,
                                   self,
                                   overhead=overhead)

        if claim.test(self.compute_node, limits):

            migration = self._create_migration(context, instance_ref,
                                               instance_type)
            claim.migration = migration

            # Mark the resources in-use for the resize landing on this
            # compute host:
            self._update_usage_from_migration(context, instance_ref,
                                              self.compute_node, migration)
            elevated = context.elevated()
            self._update(elevated, self.compute_node)

            return claim

        else:
            raise exception.ComputeResourcesUnavailable()
示例#12
0
 def wrap_prep_resize(_self, *args, **kwargs):
     # Poison the AZ query to blow up as if the cell conductor does not
     # have access to the API DB.
     self.agg_mock = self.useFixture(
         fixtures.MockPatch(
             'nova.objects.AggregateList.get_by_host',
             side_effect=oslo_db_exc.CantStartEngineError)).mock
     if self.rescheduled is None:
         # Track the first host that we rescheduled from.
         self.rescheduled = _self.host
         # Trigger a reschedule.
         raise exception.ComputeResourcesUnavailable(
             reason='test_migrate_reschedule_blocked_az_up_call')
     return original_prep_resize(_self, *args, **kwargs)
示例#13
0
    def _claim_test(self, resources, limits=None):
        """Test if this claim can be satisfied given available resources and
        optional oversubscription limits

        This should be called before the compute node actually consumes the
        resources required to execute the claim.

        :param resources: available local compute node resources
        :returns: Return true if resources are available to claim.
        """
        if not limits:
            limits = {}

        # If an individual limit is None, the resource will be considered
        # unlimited:
        memory_mb_limit = limits.get('memory_mb')
        disk_gb_limit = limits.get('disk_gb')
        vcpus_limit = limits.get('vcpu')
        numa_topology_limit = limits.get('numa_topology')

        LOG.info(
            "Attempting claim on node %(node)s: "
            "memory %(memory_mb)d MB, "
            "disk %(disk_gb)d GB, vcpus %(vcpus)d CPU", {
                'node': self.nodename,
                'memory_mb': self.memory_mb,
                'disk_gb': self.disk_gb,
                'vcpus': self.vcpus
            },
            instance=self.instance)

        reasons = [
            self._test_memory(resources, memory_mb_limit),
            self._test_disk(resources, disk_gb_limit),
            self._test_vcpus(resources, vcpus_limit),
            # fix me, log as follow:
            # Require both a host and instance NUMA topology to fit instance on host.
            # numa_fit_instance_to_host /opt/stack/nova/nova/virt/hardware.py:1453
            self._test_numa_topology(resources, numa_topology_limit),
            self._test_pci()
        ]
        reasons = [r for r in reasons if r is not None]
        if len(reasons) > 0:
            raise exception.ComputeResourcesUnavailable(
                reason="; ".join(reasons))

        LOG.info('Claim successful on node %s',
                 self.nodename,
                 instance=self.instance)
示例#14
0
    def _claim_test(self, resources, limits=None, filter_properties=None):
        """Test if this claim can be satisfied given available resources and
        optional oversubscription limits

        This should be called before the compute node actually consumes the
        resources required to execute the claim.

        :param resources: available local compute node resources
        :returns: Return true if resources are available to claim.
        """
        if not limits:
            limits = {}

        if not filter_properties:
            filter_properties = {}

        # If an individual limit is None, the resource will be considered
        # unlimited:
        memory_mb_limit = limits.get('memory_mb')
        disk_gb_limit = limits.get('disk_gb')
        vcpus_limit = limits.get('vcpu')
        numa_topology_limit = limits.get('numa_topology')

        LOG.info(_LI("Attempting claim: memory %(memory_mb)d MB, "
                     "disk %(disk_gb)d GB, vcpus %(vcpus)d CPU"), {
                         'memory_mb': self.memory_mb,
                         'disk_gb': self.disk_gb,
                         'vcpus': self.vcpus
                     },
                 instance=self.instance)

        reasons = [
            self._test_memory(resources, memory_mb_limit),
            self._test_disk(resources, disk_gb_limit),
            self._test_vcpus(resources, vcpus_limit),
            self._test_numa_topology(resources, numa_topology_limit),
            self._test_pci(),
            self._test_instance_group_policy(filter_properties)
        ]
        reasons = [r for r in reasons if r is not None]
        if len(reasons) > 0:
            raise exception.ComputeResourcesUnavailable(
                reason="; ".join(reasons))

        LOG.info(_LI('Claim successful'), instance=self.instance)
示例#15
0
    def instance_claim(self, context, instance_ref, limits=None):
        """Indicate that some resources are needed for an upcoming compute
        instance build operation.

        This should be called before the compute node is about to perform
        an instance build operation that will consume additional resources.

        :param context: security context
        :param instance_ref: instance to reserve resources for
        :param limits: Dict of oversubscription limits for memory, disk,
                       and CPUs.
        :returns: A Claim ticket representing the reserved resources.  It can
                  be used to revert the resource usage if an error occurs
                  during the instance build.
        """
        if self.disabled:
            # compute_driver doesn't support resource tracking, just
            # set the 'host' field and continue the build:
            instance_ref = self._set_instance_host(context,
                    instance_ref['uuid'])
            return claims.NopClaim()

        # sanity check:
        if instance_ref['host']:
            LOG.warning(_("Host field should be not be set on the instance "
                          "until resources have been claimed."),
                          instance=instance_ref)

        claim = claims.Claim(instance_ref, self)

        if claim.test(self.compute_node, limits):

            instance_ref = self._set_instance_host(context,
                    instance_ref['uuid'])

            # Mark resources in-use and update stats
            self._update_usage_from_instance(self.compute_node, instance_ref)

            # persist changes to the compute node:
            self._update(context, self.compute_node)

            return claim

        else:
            raise exception.ComputeResourcesUnavailable()
示例#16
0
    def _get_vgpu_info(self, allocations):
        """Get vGPU info basing on the allocations.

        :param allocations: Information about resources allocated to the
                            instance via placement, of the form returned by
                            SchedulerReportClient.get_allocations_for_consumer.
        :returns: Dictionary describing vGPU info if any vGPU allocated;
                  None otherwise.
        :raises: exception.ComputeResourcesUnavailable if there is no
                 available vGPUs.
        """
        if not self._is_vgpu_allocated(allocations):
            return None

        # NOTE(jianghuaw): At the moment, we associate all vGPUs resource to
        # the compute node regardless which GPU group the vGPUs belong to, so
        # we need search all GPU groups until we got one group which has
        # remaining capacity to supply one vGPU. Once we switch to the
        # nested resource providers, the allocations will contain the resource
        # provider which represents a particular GPU group. It's able to get
        # the GPU group and vGPU type directly by using the resource provider's
        # uuid. Then we can consider moving this function to vmops, as there is
        # no need to query host stats to get all GPU groups.
        host_stats = self.host_state.get_host_stats(refresh=True)
        vgpu_stats = host_stats['vgpu_stats']
        for grp_uuid in vgpu_stats:
            if vgpu_stats[grp_uuid]['remaining'] > 0:
                # NOTE(jianghuaw): As XenServer only supports single vGPU per
                # VM, we've restricted the inventory data having `max_unit` as
                # 1. If it reached here, surely only one GPU is allocated.
                # So just return the GPU group uuid and vGPU type uuid once
                # we got one group which still has remaining vGPUs.
                return dict(gpu_grp_uuid=grp_uuid,
                            vgpu_type_uuid=vgpu_stats[grp_uuid]['uuid'])
        # No remaining vGPU available: e.g. the vGPU resource has been used by
        # other instance or the vGPU has been changed to be disabled.
        raise exception.ComputeResourcesUnavailable(
            reason='vGPU resource is not available')
    def resize_claim(self, context, instance_ref, instance_type, limits=None):
        """Indicate that resources are needed for a resize operation to this
        compute host.
        :param context: security context
        :param instance_ref: instance to reserve resources for
        :param instance_type: new instance_type being resized to
        :param limits: Dict of oversubscription limits for memory, disk,
                       and CPUs.
        :returns: A Claim ticket representing the reserved resources.  This
                  should be turned into finalize  a resource claim or free
                  resources after the compute operation is finished.
        """
        if self.disabled:
            # compute_driver doesn't support resource tracking, just
            # generate the migration record and continue the resize:
            migration_ref = self._create_migration(context, instance_ref,
                                                   instance_type)
            return claims.NopClaim(migration=migration_ref)

        claim = claims.ResizeClaim(instance_ref, instance_type, self)

        if claim.test(self.compute_node, limits):

            migration_ref = self._create_migration(context, instance_ref,
                                                   instance_type)
            claim.migration = migration_ref

            # Mark the resources in-use for the resize landing on this
            # compute host:
            self._update_usage_from_migration(context, instance_ref,
                                              self.compute_node, migration_ref)
            elevated = context.elevated()
            self._update(elevated, self.compute_node)

            return claim

        else:
            raise exception.ComputeResourcesUnavailable()
示例#18
0
 def _compute_resources_unavailable(*args, **kwargs):
     raise exception.ComputeResourcesUnavailable(reason="fake-resource")
示例#19
0
 def __enter__(self):
     if not self.claim and not self.tracker.disabled:
         # insufficient resources to complete request
         raise exception.ComputeResourcesUnavailable()