Exemplo n.º 1
0
    def _test_numa_topology(self, resources, limit):
        network_info = objects.InstanceInfoCache.get_by_instance_uuid(
            self.context, self.instance['uuid']).network_info
        self.instance['numa_topology'] = self.numa_topology or {}
        self.instance['vcpus'] = self.instance_type['vcpus']
        self.instance['memory_mb'] = self.memory_mb
        # check if cpu&mem are ok
        bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info(
            self.instance, self.tracker.host, network_info=network_info,
            action="resize")

        if instance_numa and instance_numa['cells'][0].get('mem'):
            cells = []
            for cell in instance_numa['cells']:
                cells.append(objects.InstanceNUMACell(
                    id=cell['id'], cpuset=set(cell['cpuset']),
                    memory=cell['mem']['total'],
                    pagesize=cell.get('pagesize')))

            format_inst_numa = objects.InstanceNUMATopology(
                cells=cells, instance_uuid=self.instance['uuid'])
            self.claimed_numa_topology = format_inst_numa
            self.bind_info = bind_info
            self.instance['numa_topology'] = format_inst_numa
        elif not instance_numa:
            return
        else:
            self.claimed_numa_topology = instance_numa
            self.bind_info = bind_info
            self.instance['numa_topology'] = instance_numa
Exemplo n.º 2
0
    def _test_numa_topology(self, resources, limit):
        network_info = objects.InstanceInfoCache.get_by_instance_uuid(
            self.context, self.instance['uuid']).network_info
        self.instance['numa_topology'] = self.numa_topology or {}
        self.instance['vcpus'] = self.instance_type['vcpus']
        self.instance['memory_mb'] = self.memory_mb
        # check if cpu&mem are ok
        bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info(
            self.instance,
            self.tracker.host,
            network_info=network_info,
            action="resize")

        if instance_numa and instance_numa['cells'][0].get('mem'):
            cells = []
            for cell in instance_numa['cells']:
                cells.append(
                    objects.InstanceNUMACell(id=cell['id'],
                                             cpuset=set(cell['cpuset']),
                                             memory=cell['mem']['total'],
                                             pagesize=cell.get('pagesize')))

            format_inst_numa = objects.InstanceNUMATopology(
                cells=cells, instance_uuid=self.instance['uuid'])
            self.claimed_numa_topology = format_inst_numa
            self.bind_info = bind_info
            self.instance['numa_topology'] = format_inst_numa
        elif not instance_numa:
            return
        else:
            self.claimed_numa_topology = instance_numa
            self.bind_info = bind_info
            self.instance['numa_topology'] = instance_numa
Exemplo n.º 3
0
    def get_cpu_at_live_migration(self, context, instance, network_info,
                                  block_migration, migrate_data):
        # should check mem is ok
        self.test_mem_in_resource_tracker(context, instance)
        bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info(
            instance, self.host, network_info=network_info)
        if instance_numa and instance_numa['cells'][0].get('mem'):
            numa_id = instance_numa['cells'][0]['id']
        else:
            numa_id = None
        # check live migration record has already created
        migrate_data['cpu'] = bind_info
        migrate_data['numa'] = numa_id
        self._create_live_migration_record(
            context, instance, block_migration, migrate_data)
        if instance_numa and instance_numa['cells'][0].get('mem'):
            cells = []
            for cell in instance_numa['cells']:
                cells.append(objects.InstanceNUMACell(
                    id=cell['id'], cpuset=set(cell['cpuset']),
                    memory=cell['mem']['total'],
                    pagesize=cell.get('pagesize')))

            format_inst_numa = objects.InstanceNUMATopology(
                cells=cells, instance_uuid=instance['uuid'])

            sys_meta = instance.system_metadata
            sys_meta['new_numa_topo'] = jsonutils.dumps(format_inst_numa)
            sys_meta['new_bind_info'] = jsonutils.dumps(bind_info)
            instance.system_metadata = sys_meta
            instance.save()

            instance.numa_topology = format_inst_numa
        # trigger update_resource
        self._update_usage_from_instance(context, self.compute_node, instance)
        # if necessary
        elevated = context.elevated()
        # persist changes to the compute node:
        self._update(elevated, self.compute_node)

        return migrate_data
Exemplo n.º 4
0
    def get_cpu_at_live_migration(self, context, instance, network_info, block_migration, migrate_data):
        # should check mem is ok
        self.test_mem_in_resource_tracker(context, instance)
        bind_info, instance_numa, _ = sched_utils.get_inst_cpu_bind_info(instance, self.host, network_info=network_info)
        if instance_numa and instance_numa["cells"][0].get("mem"):
            numa_id = instance_numa["cells"][0]["id"]
        else:
            numa_id = None
        # check live migration record has already created
        migrate_data["cpu"] = bind_info
        migrate_data["numa"] = numa_id
        self._create_live_migration_record(context, instance, block_migration, migrate_data)
        if instance_numa and instance_numa["cells"][0].get("mem"):
            cells = []
            for cell in instance_numa["cells"]:
                cells.append(
                    objects.InstanceNUMACell(
                        id=cell["id"],
                        cpuset=set(cell["cpuset"]),
                        memory=cell["mem"]["total"],
                        pagesize=cell.get("pagesize"),
                    )
                )

            format_inst_numa = objects.InstanceNUMATopology(cells=cells, instance_uuid=instance["uuid"])

            sys_meta = instance.system_metadata
            sys_meta["new_numa_topo"] = jsonutils.dumps(format_inst_numa)
            sys_meta["new_bind_info"] = jsonutils.dumps(bind_info)
            instance.system_metadata = sys_meta
            instance.save()

            instance.numa_topology = format_inst_numa
        # trigger update_resource
        self._update_usage_from_instance(context, self.compute_node, instance)
        # if necessary
        elevated = context.elevated()
        # persist changes to the compute node:
        self._update(elevated, self.compute_node)

        return migrate_data
Exemplo n.º 5
0
    def _test_core_bind(self, context, instance, resource_tracker):
        LOG.debug("get instance cpu bind info in _test_core_bind")
        filter_properties = {}
        inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid(
            context, instance.uuid)
        if inst_extra:
            scheduler_hints = jsonutils.loads(inst_extra.scheduler_hints
                                              or '{}')
            stats = jsonutils.loads(inst_extra.stats or '{}')
        else:
            scheduler_hints = {}
            stats = {}
        filter_properties['scheduler_hints'] = scheduler_hints
        filter_properties['stats'] = stats
        pci_requests = objects.InstancePCIRequests.get_by_instance_uuid(
            context, instance['uuid'])
        if pci_requests:
            filter_properties['pci_requests'] = pci_requests

        bind_info, instance_numa, enable_ht = sched_utils.get_inst_cpu_bind_info(
            instance,
            resource_tracker.host,
            filter_properties=filter_properties)

        sched_utils.update_cpu_bind_info_to_db(bind_info, instance.uuid,
                                               instance_numa)

        if instance_numa and instance_numa['cells'][0].get('is_huawei'):
            cells = []
            for cell in instance_numa['cells']:
                cells.append(
                    objects.InstanceNUMACell(id=cell['id'],
                                             cpuset=set(cell['cpuset']),
                                             memory=cell['mem']['total'],
                                             pagesize=cell.get('pagesize')))

            format_inst_numa = objects.InstanceNUMATopology(cells=cells)
            self.claimed_numa_topology = format_inst_numa
            self.instance['numa_topology'] = format_inst_numa
Exemplo n.º 6
0
    def _test_core_bind(self, context, instance, resource_tracker):
        LOG.debug("get instance cpu bind info in _test_core_bind")
        filter_properties = {}
        inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid(
            context, instance.uuid)
        if inst_extra:
            scheduler_hints = jsonutils.loads(
                inst_extra.scheduler_hints or '{}')
            stats = jsonutils.loads(inst_extra.stats or '{}')
        else:
            scheduler_hints = {}
            stats = {}
        filter_properties['scheduler_hints'] = scheduler_hints
        filter_properties['stats'] = stats
        pci_requests = objects.InstancePCIRequests.get_by_instance_uuid(
            context, instance['uuid'])
        if pci_requests:
            filter_properties['pci_requests'] = pci_requests

        bind_info, instance_numa, enable_ht = sched_utils.get_inst_cpu_bind_info(
            instance, resource_tracker.host, filter_properties=filter_properties)

        sched_utils.update_cpu_bind_info_to_db(bind_info, instance.uuid,
                                               instance_numa)

        if instance_numa and instance_numa['cells'][0].get('is_huawei'):
            cells = []
            for cell in instance_numa['cells']:
                cells.append(objects.InstanceNUMACell(
                    id=cell['id'], cpuset=set(cell['cpuset']),
                    memory=cell['mem']['total'],
                    pagesize=cell.get('pagesize')))

            format_inst_numa = objects.InstanceNUMATopology(cells=cells)
            self.claimed_numa_topology = format_inst_numa
            self.instance['numa_topology'] = format_inst_numa
Exemplo n.º 7
0
    def _schedule(self, context, request_spec, filter_properties):
        """Returns a list of hosts that meet the required specs,
        ordered by their fitness.
        """
        elevated = context.elevated()
        instance_properties = request_spec['instance_properties']
        instance_type = request_spec.get("instance_type", None)
        instance_uuids = request_spec.get("instance_uuids", None)

        LOG.debug("[HINTS] filter_properties=%s" % filter_properties)
        # query scheduler_hints from database, and skip what in the parameters.
        if instance_uuids:
            inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid(
                context, instance_uuids[0])
            if inst_extra:
                scheduler_hints = jsonutils.loads(inst_extra.scheduler_hints
                                                  or '{}')
                stats = jsonutils.loads(inst_extra.stats or '{}')
            else:
                scheduler_hints = {}
                stats = {}

            LOG.debug("[HINTS] Got scheduler_hints via db. "
                      "scheduler_hints=%s" % scheduler_hints)
            filter_properties['scheduler_hints'] = scheduler_hints
            filter_properties['stats'] = stats
            instance_properties['stats'] = stats
        try:
            update_group_hosts = self._setup_instance_group(
                context, filter_properties)
        except exception.InstanceGroupNotFound as e:
            # InstanceGroup has already checked in API,
            # might has been deleted when migrate/ha
            LOG.warning("ServerGroup %s doesn't exist" %
                        scheduler_hints.get('group', "None"))
            update_group_hosts = False
        config_options = self._get_configuration_options()

        filter_properties.update({
            'context': context,
            'request_spec': request_spec,
            'config_options': config_options,
            'instance_type': instance_type
        })

        self.populate_filter_properties(request_spec, filter_properties)

        # Find our local list of acceptable hosts by repeatedly
        # filtering and weighing our options. Each time we choose a
        # host, we virtually consume resources on it so subsequent
        # selections can adjust accordingly.

        # Note: remember, we are using an iterator here. So only
        # traverse this list once. This can bite you if the hosts
        # are being scanned in a filter or weighing function.
        hosts = self._get_all_host_states(elevated)

        selected_hosts = []
        if instance_uuids:
            num_instances = len(instance_uuids)
        else:
            num_instances = request_spec.get('num_instances', 1)
        for num in xrange(num_instances):
            #NOTE: add a tracker of filter
            tracker = HuaweiFilterTracker()
            filter_properties['__tracker'] = tracker

            # Filter local hosts based on requirements ...
            hosts = self.host_manager.get_filtered_hosts(hosts,
                                                         filter_properties,
                                                         index=num)
            if not hosts:
                # Can't get any more locally.
                break

            LOG.debug("Filtered %(hosts)s", {'hosts': hosts})

            weighed_hosts = self.host_manager.get_weighed_hosts(
                hosts, filter_properties)

            LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts})

            scheduler_host_subset_size = CONF.scheduler_host_subset_size
            if scheduler_host_subset_size > len(weighed_hosts):
                scheduler_host_subset_size = len(weighed_hosts)
            if scheduler_host_subset_size < 1:
                scheduler_host_subset_size = 1

            chosen_host = random.choice(
                weighed_hosts[0:scheduler_host_subset_size])

            host_mapper = dict()
            for host in weighed_hosts:
                host_mapper[host.obj.host] = host

            if 'resize_prefer_to_same_host' in filter_properties:
                origin_host = filter_properties['resize_prefer_to_same_host']
                chosen_host = host_mapper.get(origin_host, chosen_host)

            migrate_host = filter_properties.get('migrate_host')
            if migrate_host:
                if migrate_host in host_mapper:
                    chosen_host = host_mapper.get(migrate_host)
                else:
                    # migrate_host not in filter hosts list
                    # raise NoVaildHost
                    break

            selected_hosts.append(chosen_host)

            # Now consume the resources so the filter/weights
            # will change for the next instance.
            # NOTE () adding and deleting pci_requests is a temporary
            # fix to avoid DB access in consume_from_instance() while getting
            # pci_requests. The change can be removed once pci_requests is
            # part of the instance object that is passed into the scheduler
            # APIs
            pci_requests = filter_properties.get('pci_requests')
            if pci_requests:
                instance_properties['pci_requests'] = pci_requests

            if request_spec.get('instance_type'):
                instance_properties['numa_topology'] = \
                    hardware.numa_get_constraints(instance_type, {})
            self._update_instance_topology(instance_properties, chosen_host)

            try:
                bind_info, instance_numa, __ = utils.get_inst_cpu_bind_info(
                    instance_properties,
                    chosen_host.obj,
                    filter_properties=filter_properties)
            except exception.NovaException as ex:
                msg = ("Get cpu binding info on host %(host)s failed, the"
                       " host_numa_top is %(host_numa_top)s, "
                       "instance_properties is  %(instance_properties)s")
                params = {
                    'host': chosen_host.obj.host,
                    'host_numa_top': chosen_host.obj.numa_topology,
                    'instance_properties': instance_properties
                }
                # set bind_info and instance_numa is None
                bind_info = None
                instance_numa = None
                LOG.debug(_LE(msg), params)
                LOG.debug(_LE(ex.format_message()))

            scheduler_hints = filter_properties.get('scheduler_hints', None)

            if instance_numa and instance_numa['cells'][0].get('is_huawei'):
                cells = []
                for cell in instance_numa['cells']:
                    cells.append(
                        objects.InstanceNUMACell(
                            id=cell['id'],
                            cpuset=set(cell['cpuset']),
                            memory=cell['mem']['total'],
                            pagesize=cell.get('pagesize')))

                format_inst_numa = objects.InstanceNUMATopology(cells=cells)
                instance_properties['numa_topology'] = format_inst_numa

            try:
                if isinstance(chosen_host.obj,
                              ironic_host_manager.IronicNodeState):
                    chosen_host.obj.consume_from_instance(instance_properties)
                else:
                    chosen_host.obj.consume_from_instance(
                        instance_properties, filter_properties)
            except exception.PciDeviceRequestFailed as e:
                # pop the select chosen host in order to rollback resource in
                # memory
                LOG.warning("consume get exception: %s", e.format_message())
                rollback_hosts = [chosen_host]
                self.host_manager.force_update_host_states(
                    context, rollback_hosts)

            if pci_requests:
                del instance_properties['pci_requests']
            if update_group_hosts is True:
                # NOTE(): Group details are serialized into a list now
                # that they are populated by the conductor, we need to
                # deserialize them
                if isinstance(filter_properties['group_hosts'], list):
                    filter_properties['group_hosts'] = set(
                        filter_properties['group_hosts'])

        self._check_fulfill_for_multiple_create(context, num_instances,
                                                selected_hosts)

        return selected_hosts
Exemplo n.º 8
0
    def _schedule(self, context, request_spec, filter_properties):
        """Returns a list of hosts that meet the required specs,
        ordered by their fitness.
        """
        elevated = context.elevated()
        instance_properties = request_spec['instance_properties']
        instance_type = request_spec.get("instance_type", None)
        instance_uuids = request_spec.get("instance_uuids", None)

        LOG.debug("[HINTS] filter_properties=%s" % filter_properties)
        # query scheduler_hints from database, and skip what in the parameters.
        if instance_uuids:
            inst_extra = objects.HuaweiInstanceExtra.get_by_instance_uuid(
                context, instance_uuids[0])
            if inst_extra:
                scheduler_hints = jsonutils.loads(
                    inst_extra.scheduler_hints or '{}')
                stats = jsonutils.loads(inst_extra.stats or '{}')
            else:
                scheduler_hints = {}
                stats = {}

            LOG.debug("[HINTS] Got scheduler_hints via db. "
                      "scheduler_hints=%s" % scheduler_hints)
            filter_properties['scheduler_hints'] = scheduler_hints
            filter_properties['stats'] = stats
            instance_properties['stats'] = stats
        try:
            update_group_hosts = self._setup_instance_group(context,filter_properties)
        except exception.InstanceGroupNotFound as e:
            # InstanceGroup has already checked in API,
            # might has been deleted when migrate/ha
            LOG.warning("ServerGroup %s doesn't exist" %
                        scheduler_hints.get('group', "None"))
            update_group_hosts = False
        config_options = self._get_configuration_options()

        filter_properties.update({'context': context,
                                  'request_spec': request_spec,
                                  'config_options': config_options,
                                  'instance_type': instance_type})

        self.populate_filter_properties(request_spec,
                                        filter_properties)

        # Find our local list of acceptable hosts by repeatedly
        # filtering and weighing our options. Each time we choose a
        # host, we virtually consume resources on it so subsequent
        # selections can adjust accordingly.

        # Note: remember, we are using an iterator here. So only
        # traverse this list once. This can bite you if the hosts
        # are being scanned in a filter or weighing function.
        hosts = self._get_all_host_states(elevated)

        selected_hosts = []
        if instance_uuids:
            num_instances = len(instance_uuids)
        else:
            num_instances = request_spec.get('num_instances', 1)
        for num in xrange(num_instances):
            #NOTE: add a tracker of filter
            tracker = HuaweiFilterTracker()
            filter_properties['__tracker'] = tracker

            # Filter local hosts based on requirements ...
            hosts = self.host_manager.get_filtered_hosts(hosts,
                                                         filter_properties,
                                                         index=num)
            if not hosts:
                # Can't get any more locally.
                break

            LOG.debug("Filtered %(hosts)s", {'hosts': hosts})

            weighed_hosts = self.host_manager.get_weighed_hosts(
                hosts,
                filter_properties)

            LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts})

            scheduler_host_subset_size = CONF.scheduler_host_subset_size
            if scheduler_host_subset_size > len(weighed_hosts):
                scheduler_host_subset_size = len(weighed_hosts)
            if scheduler_host_subset_size < 1:
                scheduler_host_subset_size = 1

            chosen_host = random.choice(
                weighed_hosts[0:scheduler_host_subset_size])

            host_mapper = dict()
            for host in weighed_hosts:
                host_mapper[host.obj.host] = host

            if 'resize_prefer_to_same_host' in filter_properties:
                origin_host = filter_properties['resize_prefer_to_same_host']
                chosen_host = host_mapper.get(origin_host, chosen_host)

            migrate_host = filter_properties.get('migrate_host')
            if migrate_host:
                if migrate_host in host_mapper:
                    chosen_host = host_mapper.get(migrate_host)
                else:
                    # migrate_host not in filter hosts list
                    # raise NoVaildHost
                    break

            selected_hosts.append(chosen_host)

            # Now consume the resources so the filter/weights
            # will change for the next instance.
            # NOTE () adding and deleting pci_requests is a temporary
            # fix to avoid DB access in consume_from_instance() while getting
            # pci_requests. The change can be removed once pci_requests is
            # part of the instance object that is passed into the scheduler
            # APIs
            pci_requests = filter_properties.get('pci_requests')
            if pci_requests:
                instance_properties['pci_requests'] = pci_requests

            if request_spec.get('instance_type'):
                instance_properties['numa_topology'] = \
                    hardware.numa_get_constraints(instance_type, {})
            self._update_instance_topology(instance_properties, chosen_host)

            try:
                bind_info, instance_numa, __ = utils.get_inst_cpu_bind_info(
                    instance_properties, chosen_host.obj,
                    filter_properties=filter_properties)
            except exception.NovaException as ex:
                msg = ("Get cpu binding info on host %(host)s failed, the"
                       " host_numa_top is %(host_numa_top)s, "
                       "instance_properties is  %(instance_properties)s")
                params = {'host': chosen_host.obj.host,
                          'host_numa_top': chosen_host.obj.numa_topology,
                          'instance_properties': instance_properties}
                # set bind_info and instance_numa is None
                bind_info = None
                instance_numa = None
                LOG.debug(_LE(msg), params)
                LOG.debug(_LE(ex.format_message()))

            scheduler_hints = filter_properties.get('scheduler_hints', None)

            if instance_numa and instance_numa['cells'][0].get('is_huawei'):
                cells = []
                for cell in instance_numa['cells']:
                    cells.append(objects.InstanceNUMACell(
                        id=cell['id'], cpuset=set(cell['cpuset']),
                        memory=cell['mem']['total'],
                        pagesize=cell.get('pagesize')))

                format_inst_numa = objects.InstanceNUMATopology(cells=cells)
                instance_properties['numa_topology'] = format_inst_numa

            try:
                if isinstance(chosen_host.obj, ironic_host_manager.IronicNodeState):
                    chosen_host.obj.consume_from_instance(instance_properties)
                else:
                    chosen_host.obj.consume_from_instance(instance_properties,
                                                          filter_properties)
            except exception.PciDeviceRequestFailed as e:
                # pop the select chosen host in order to rollback resource in
                # memory
                LOG.warning("consume get exception: %s", e.format_message())
                rollback_hosts = [chosen_host]
                self.host_manager.force_update_host_states(context,
                                           rollback_hosts)

            if pci_requests:
                del instance_properties['pci_requests']
            if update_group_hosts is True:
                # NOTE(): Group details are serialized into a list now
                # that they are populated by the conductor, we need to
                # deserialize them
                if isinstance(filter_properties['group_hosts'], list):
                    filter_properties['group_hosts'] = set(
                        filter_properties['group_hosts'])

        self._check_fulfill_for_multiple_create(context, num_instances,
                                                selected_hosts)

        return selected_hosts